0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.sql.catalyst.expressions;
0019
0020 import org.apache.spark.unsafe.Platform;
0021 import org.apache.spark.unsafe.types.UTF8String;
0022 import org.junit.Assert;
0023 import org.junit.Test;
0024
0025 import java.nio.charset.StandardCharsets;
0026 import java.util.HashSet;
0027 import java.util.Random;
0028 import java.util.Set;
0029
0030 public class HiveHasherSuite {
0031
0032 @Test
0033 public void testKnownIntegerInputs() {
0034 int[] inputs = {0, Integer.MIN_VALUE, Integer.MAX_VALUE, 593689054, -189366624};
0035 for (int input : inputs) {
0036 Assert.assertEquals(input, HiveHasher.hashInt(input));
0037 }
0038 }
0039
0040 @Test
0041 public void testKnownLongInputs() {
0042 Assert.assertEquals(0, HiveHasher.hashLong(0L));
0043 Assert.assertEquals(41, HiveHasher.hashLong(-42L));
0044 Assert.assertEquals(42, HiveHasher.hashLong(42L));
0045 Assert.assertEquals(-2147483648, HiveHasher.hashLong(Long.MIN_VALUE));
0046 Assert.assertEquals(-2147483648, HiveHasher.hashLong(Long.MAX_VALUE));
0047 }
0048
0049 @Test
0050 public void testKnownStringAndIntInputs() {
0051 int[] inputs = {84, 19, 8};
0052 int[] expected = {-823832826, -823835053, 111972242};
0053
0054 for (int i = 0; i < inputs.length; i++) {
0055 UTF8String s = UTF8String.fromString("val_" + inputs[i]);
0056 int hash = HiveHasher.hashUnsafeBytes(s.getBaseObject(), s.getBaseOffset(), s.numBytes());
0057 Assert.assertEquals(expected[i], ((31 * inputs[i]) + hash));
0058 }
0059 }
0060
0061 @Test
0062 public void randomizedStressTest() {
0063 int size = 65536;
0064 Random rand = new Random();
0065
0066
0067 Set<Integer> hashcodes = new HashSet<>();
0068 for (int i = 0; i < size; i++) {
0069 int vint = rand.nextInt();
0070 long lint = rand.nextLong();
0071 Assert.assertEquals(HiveHasher.hashInt(vint), HiveHasher.hashInt(vint));
0072 Assert.assertEquals(HiveHasher.hashLong(lint), HiveHasher.hashLong(lint));
0073
0074 hashcodes.add(HiveHasher.hashLong(lint));
0075 }
0076
0077
0078 Assert.assertTrue(hashcodes.size() > size * 0.95);
0079 }
0080
0081 @Test
0082 public void randomizedStressTestBytes() {
0083 int size = 65536;
0084 Random rand = new Random();
0085
0086
0087 Set<Integer> hashcodes = new HashSet<>();
0088 for (int i = 0; i < size; i++) {
0089 int byteArrSize = rand.nextInt(100) * 8;
0090 byte[] bytes = new byte[byteArrSize];
0091 rand.nextBytes(bytes);
0092
0093 Assert.assertEquals(
0094 HiveHasher.hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0095 HiveHasher.hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0096
0097 hashcodes.add(HiveHasher.hashUnsafeBytes(
0098 bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0099 }
0100
0101
0102 Assert.assertTrue(hashcodes.size() > size * 0.95);
0103 }
0104
0105 @Test
0106 public void randomizedStressTestPaddedStrings() {
0107 int size = 64000;
0108
0109 Set<Integer> hashcodes = new HashSet<>();
0110 for (int i = 0; i < size; i++) {
0111 int byteArrSize = 8;
0112 byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
0113 byte[] paddedBytes = new byte[byteArrSize];
0114 System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
0115
0116 Assert.assertEquals(
0117 HiveHasher.hashUnsafeBytes(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0118 HiveHasher.hashUnsafeBytes(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0119
0120 hashcodes.add(HiveHasher.hashUnsafeBytes(
0121 paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0122 }
0123
0124
0125 Assert.assertTrue(hashcodes.size() > size * 0.95);
0126 }
0127 }