0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.unsafe.hash;
0019
0020 import java.nio.charset.StandardCharsets;
0021 import java.util.HashSet;
0022 import java.util.Random;
0023 import java.util.Set;
0024
0025 import scala.util.hashing.MurmurHash3$;
0026
0027 import org.apache.spark.unsafe.Platform;
0028 import org.junit.Assert;
0029 import org.junit.Test;
0030
0031
0032
0033
0034 public class Murmur3_x86_32Suite {
0035
0036 private static final Murmur3_x86_32 hasher = new Murmur3_x86_32(0);
0037
0038 @Test
0039 public void testKnownIntegerInputs() {
0040 Assert.assertEquals(593689054, hasher.hashInt(0));
0041 Assert.assertEquals(-189366624, hasher.hashInt(-42));
0042 Assert.assertEquals(-1134849565, hasher.hashInt(42));
0043 Assert.assertEquals(-1718298732, hasher.hashInt(Integer.MIN_VALUE));
0044 Assert.assertEquals(-1653689534, hasher.hashInt(Integer.MAX_VALUE));
0045 }
0046
0047 @Test
0048 public void testKnownLongInputs() {
0049 Assert.assertEquals(1669671676, hasher.hashLong(0L));
0050 Assert.assertEquals(-846261623, hasher.hashLong(-42L));
0051 Assert.assertEquals(1871679806, hasher.hashLong(42L));
0052 Assert.assertEquals(1366273829, hasher.hashLong(Long.MIN_VALUE));
0053 Assert.assertEquals(-2106506049, hasher.hashLong(Long.MAX_VALUE));
0054 }
0055
0056
0057 @Test
0058 public void testKnownBytesInputs() {
0059 byte[] test = "test".getBytes(StandardCharsets.UTF_8);
0060 Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(test, 0),
0061 Murmur3_x86_32.hashUnsafeBytes2(test, Platform.BYTE_ARRAY_OFFSET, test.length, 0));
0062 byte[] test1 = "test1".getBytes(StandardCharsets.UTF_8);
0063 Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(test1, 0),
0064 Murmur3_x86_32.hashUnsafeBytes2(test1, Platform.BYTE_ARRAY_OFFSET, test1.length, 0));
0065 byte[] te = "te".getBytes(StandardCharsets.UTF_8);
0066 Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(te, 0),
0067 Murmur3_x86_32.hashUnsafeBytes2(te, Platform.BYTE_ARRAY_OFFSET, te.length, 0));
0068 byte[] tes = "tes".getBytes(StandardCharsets.UTF_8);
0069 Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(tes, 0),
0070 Murmur3_x86_32.hashUnsafeBytes2(tes, Platform.BYTE_ARRAY_OFFSET, tes.length, 0));
0071 }
0072
0073 @Test
0074 public void randomizedStressTest() {
0075 int size = 65536;
0076 Random rand = new Random();
0077
0078
0079 Set<Integer> hashcodes = new HashSet<>();
0080 for (int i = 0; i < size; i++) {
0081 int vint = rand.nextInt();
0082 long lint = rand.nextLong();
0083 Assert.assertEquals(hasher.hashInt(vint), hasher.hashInt(vint));
0084 Assert.assertEquals(hasher.hashLong(lint), hasher.hashLong(lint));
0085
0086 hashcodes.add(hasher.hashLong(lint));
0087 }
0088
0089
0090 Assert.assertTrue(hashcodes.size() > size * 0.95);
0091 }
0092
0093 @Test
0094 public void randomizedStressTestBytes() {
0095 int size = 65536;
0096 Random rand = new Random();
0097
0098
0099 Set<Integer> hashcodes = new HashSet<>();
0100 for (int i = 0; i < size; i++) {
0101 int byteArrSize = rand.nextInt(100) * 8;
0102 byte[] bytes = new byte[byteArrSize];
0103 rand.nextBytes(bytes);
0104
0105 Assert.assertEquals(
0106 hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0107 hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0108
0109 hashcodes.add(hasher.hashUnsafeWords(
0110 bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0111 }
0112
0113
0114 Assert.assertTrue(hashcodes.size() > size * 0.95);
0115 }
0116
0117 @Test
0118 public void randomizedStressTestPaddedStrings() {
0119 int size = 64000;
0120
0121 Set<Integer> hashcodes = new HashSet<>();
0122 for (int i = 0; i < size; i++) {
0123 int byteArrSize = 8;
0124 byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
0125 byte[] paddedBytes = new byte[byteArrSize];
0126 System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
0127
0128 Assert.assertEquals(
0129 hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0130 hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0131
0132 hashcodes.add(hasher.hashUnsafeWords(
0133 paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0134 }
0135
0136
0137 Assert.assertTrue(hashcodes.size() > size * 0.95);
0138 }
0139 }