0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.sql.catalyst.expressions;
0019
0020 import java.nio.ByteOrder;
0021 import java.nio.charset.StandardCharsets;
0022 import java.util.HashSet;
0023 import java.util.Random;
0024 import java.util.Set;
0025
0026 import org.apache.spark.unsafe.Platform;
0027 import org.junit.Assert;
0028 import org.junit.Test;
0029
0030
0031
0032
0033
0034
0035 public class XXH64Suite {
0036
0037 private static final XXH64 hasher = new XXH64(0);
0038
0039 private static final int SIZE = 101;
0040 private static final long PRIME = 2654435761L;
0041 private static final byte[] BUFFER = new byte[SIZE];
0042 private static final int TEST_INT = 0x4B1FFF9E;
0043 private static final long TEST_LONG = 0xDD2F535E4B1FFF9EL;
0044
0045
0046 static {
0047 long seed = PRIME;
0048 for (int i = 0; i < SIZE; i++) {
0049 BUFFER[i] = (byte) (seed >> 24);
0050 seed *= seed;
0051 }
0052 }
0053
0054 @Test
0055 public void testKnownIntegerInputs() {
0056 Assert.assertEquals(0x9256E58AA397AEF1L, hasher.hashInt(TEST_INT));
0057 Assert.assertEquals(0x9D5FFDFB928AB4BL, XXH64.hashInt(TEST_INT, PRIME));
0058 }
0059
0060 @Test
0061 public void testKnownLongInputs() {
0062 Assert.assertEquals(0xF74CB1451B32B8CFL, hasher.hashLong(TEST_LONG));
0063 Assert.assertEquals(0x9C44B77FBCC302C5L, XXH64.hashLong(TEST_LONG, PRIME));
0064 }
0065
0066 @Test
0067 public void testKnownByteArrayInputs() {
0068 Assert.assertEquals(0xEF46DB3751D8E999L,
0069 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 0));
0070 Assert.assertEquals(0xAC75FDA2929B17EFL,
0071 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 0, PRIME));
0072 Assert.assertEquals(0x4FCE394CC88952D8L,
0073 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 1));
0074 Assert.assertEquals(0x739840CB819FA723L,
0075 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 1, PRIME));
0076
0077 if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) {
0078 Assert.assertEquals(0x9256E58AA397AEF1L,
0079 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
0080 Assert.assertEquals(0x9D5FFDFB928AB4BL,
0081 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
0082 Assert.assertEquals(0xF74CB1451B32B8CFL,
0083 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
0084 Assert.assertEquals(0x9C44B77FBCC302C5L,
0085 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
0086 Assert.assertEquals(0xCFFA8DB881BC3A3DL,
0087 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
0088 Assert.assertEquals(0x5B9611585EFCC9CBL,
0089 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
0090 Assert.assertEquals(0x0EAB543384F878ADL,
0091 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
0092 Assert.assertEquals(0xCAA65939306F1E21L,
0093 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
0094 } else {
0095 Assert.assertEquals(0x7F875412350ADDDCL,
0096 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
0097 Assert.assertEquals(0x564D279F524D8516L,
0098 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
0099 Assert.assertEquals(0x7D9F07E27E0EB006L,
0100 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
0101 Assert.assertEquals(0x893CEF564CB7858L,
0102 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
0103 Assert.assertEquals(0xC6198C4C9CC49E17L,
0104 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
0105 Assert.assertEquals(0x4E21BEF7164D4BBL,
0106 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
0107 Assert.assertEquals(0xBCF5FAEDEE1F2B5AL,
0108 hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
0109 Assert.assertEquals(0x6F680C877A358FE5L,
0110 XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
0111 }
0112 }
0113
0114 @Test
0115 public void randomizedStressTest() {
0116 int size = 65536;
0117 Random rand = new Random();
0118
0119
0120 Set<Long> hashcodes = new HashSet<>();
0121 for (int i = 0; i < size; i++) {
0122 int vint = rand.nextInt();
0123 long lint = rand.nextLong();
0124 Assert.assertEquals(hasher.hashInt(vint), hasher.hashInt(vint));
0125 Assert.assertEquals(hasher.hashLong(lint), hasher.hashLong(lint));
0126
0127 hashcodes.add(hasher.hashLong(lint));
0128 }
0129
0130
0131 Assert.assertTrue(hashcodes.size() > size * 0.95d);
0132 }
0133
0134 @Test
0135 public void randomizedStressTestBytes() {
0136 int size = 65536;
0137 Random rand = new Random();
0138
0139
0140 Set<Long> hashcodes = new HashSet<>();
0141 for (int i = 0; i < size; i++) {
0142 int byteArrSize = rand.nextInt(100) * 8;
0143 byte[] bytes = new byte[byteArrSize];
0144 rand.nextBytes(bytes);
0145
0146 Assert.assertEquals(
0147 hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0148 hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0149
0150 hashcodes.add(hasher.hashUnsafeWords(
0151 bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0152 }
0153
0154
0155 Assert.assertTrue(hashcodes.size() > size * 0.95d);
0156 }
0157
0158 @Test
0159 public void randomizedStressTestPaddedStrings() {
0160 int size = 64000;
0161
0162 Set<Long> hashcodes = new HashSet<>();
0163 for (int i = 0; i < size; i++) {
0164 int byteArrSize = 8;
0165 byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
0166 byte[] paddedBytes = new byte[byteArrSize];
0167 System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
0168
0169 Assert.assertEquals(
0170 hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0171 hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0172
0173 hashcodes.add(hasher.hashUnsafeWords(
0174 paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0175 }
0176
0177
0178 Assert.assertTrue(hashcodes.size() > size * 0.95d);
0179 }
0180 }