Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.unsafe.hash;
0019 
0020 import java.nio.charset.StandardCharsets;
0021 import java.util.HashSet;
0022 import java.util.Random;
0023 import java.util.Set;
0024 
0025 import scala.util.hashing.MurmurHash3$;
0026 
0027 import org.apache.spark.unsafe.Platform;
0028 import org.junit.Assert;
0029 import org.junit.Test;
0030 
0031 /**
0032  * Test file based on Guava's Murmur3Hash32Test.
0033  */
0034 public class Murmur3_x86_32Suite {
0035 
0036   private static final Murmur3_x86_32 hasher = new Murmur3_x86_32(0);
0037 
0038   @Test
0039   public void testKnownIntegerInputs() {
0040     Assert.assertEquals(593689054, hasher.hashInt(0));
0041     Assert.assertEquals(-189366624, hasher.hashInt(-42));
0042     Assert.assertEquals(-1134849565, hasher.hashInt(42));
0043     Assert.assertEquals(-1718298732, hasher.hashInt(Integer.MIN_VALUE));
0044     Assert.assertEquals(-1653689534, hasher.hashInt(Integer.MAX_VALUE));
0045   }
0046 
0047   @Test
0048   public void testKnownLongInputs() {
0049     Assert.assertEquals(1669671676, hasher.hashLong(0L));
0050     Assert.assertEquals(-846261623, hasher.hashLong(-42L));
0051     Assert.assertEquals(1871679806, hasher.hashLong(42L));
0052     Assert.assertEquals(1366273829, hasher.hashLong(Long.MIN_VALUE));
0053     Assert.assertEquals(-2106506049, hasher.hashLong(Long.MAX_VALUE));
0054   }
0055 
0056   // SPARK-23381 Check whether the hash of the byte array is the same as another implementations
0057   @Test
0058   public void testKnownBytesInputs() {
0059     byte[] test = "test".getBytes(StandardCharsets.UTF_8);
0060     Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(test, 0),
0061       Murmur3_x86_32.hashUnsafeBytes2(test, Platform.BYTE_ARRAY_OFFSET, test.length, 0));
0062     byte[] test1 = "test1".getBytes(StandardCharsets.UTF_8);
0063     Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(test1, 0),
0064       Murmur3_x86_32.hashUnsafeBytes2(test1, Platform.BYTE_ARRAY_OFFSET, test1.length, 0));
0065     byte[] te = "te".getBytes(StandardCharsets.UTF_8);
0066     Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(te, 0),
0067       Murmur3_x86_32.hashUnsafeBytes2(te, Platform.BYTE_ARRAY_OFFSET, te.length, 0));
0068     byte[] tes = "tes".getBytes(StandardCharsets.UTF_8);
0069     Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(tes, 0),
0070       Murmur3_x86_32.hashUnsafeBytes2(tes, Platform.BYTE_ARRAY_OFFSET, tes.length, 0));
0071   }
0072 
0073   @Test
0074   public void randomizedStressTest() {
0075     int size = 65536;
0076     Random rand = new Random();
0077 
0078     // A set used to track collision rate.
0079     Set<Integer> hashcodes = new HashSet<>();
0080     for (int i = 0; i < size; i++) {
0081       int vint = rand.nextInt();
0082       long lint = rand.nextLong();
0083       Assert.assertEquals(hasher.hashInt(vint), hasher.hashInt(vint));
0084       Assert.assertEquals(hasher.hashLong(lint), hasher.hashLong(lint));
0085 
0086       hashcodes.add(hasher.hashLong(lint));
0087     }
0088 
0089     // A very loose bound.
0090     Assert.assertTrue(hashcodes.size() > size * 0.95);
0091   }
0092 
0093   @Test
0094   public void randomizedStressTestBytes() {
0095     int size = 65536;
0096     Random rand = new Random();
0097 
0098     // A set used to track collision rate.
0099     Set<Integer> hashcodes = new HashSet<>();
0100     for (int i = 0; i < size; i++) {
0101       int byteArrSize = rand.nextInt(100) * 8;
0102       byte[] bytes = new byte[byteArrSize];
0103       rand.nextBytes(bytes);
0104 
0105       Assert.assertEquals(
0106         hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0107         hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0108 
0109       hashcodes.add(hasher.hashUnsafeWords(
0110         bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0111     }
0112 
0113     // A very loose bound.
0114     Assert.assertTrue(hashcodes.size() > size * 0.95);
0115   }
0116 
0117   @Test
0118   public void randomizedStressTestPaddedStrings() {
0119     int size = 64000;
0120     // A set used to track collision rate.
0121     Set<Integer> hashcodes = new HashSet<>();
0122     for (int i = 0; i < size; i++) {
0123       int byteArrSize = 8;
0124       byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
0125       byte[] paddedBytes = new byte[byteArrSize];
0126       System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
0127 
0128       Assert.assertEquals(
0129         hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0130         hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0131 
0132       hashcodes.add(hasher.hashUnsafeWords(
0133         paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0134     }
0135 
0136     // A very loose bound.
0137     Assert.assertTrue(hashcodes.size() > size * 0.95);
0138   }
0139 }