Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.sql.catalyst.expressions;
0019 
0020 import java.nio.ByteOrder;
0021 import java.nio.charset.StandardCharsets;
0022 import java.util.HashSet;
0023 import java.util.Random;
0024 import java.util.Set;
0025 
0026 import org.apache.spark.unsafe.Platform;
0027 import org.junit.Assert;
0028 import org.junit.Test;
0029 
0030 /**
0031  * Test the XXH64 function.
0032  * <p/>
0033  * Test constants were taken from the original implementation and the airlift/slice implementation.
0034  */
0035 public class XXH64Suite {
0036 
0037   private static final XXH64 hasher = new XXH64(0);
0038 
0039   private static final int SIZE = 101;
0040   private static final long PRIME = 2654435761L;
0041   private static final byte[] BUFFER = new byte[SIZE];
0042   private static final int TEST_INT = 0x4B1FFF9E; // First 4 bytes in the buffer
0043   private static final long TEST_LONG = 0xDD2F535E4B1FFF9EL; // First 8 bytes in the buffer
0044 
0045   /* Create the test data. */
0046   static {
0047     long seed = PRIME;
0048     for (int i = 0; i < SIZE; i++) {
0049       BUFFER[i] = (byte) (seed >> 24);
0050       seed *= seed;
0051     }
0052   }
0053 
0054   @Test
0055   public void testKnownIntegerInputs() {
0056     Assert.assertEquals(0x9256E58AA397AEF1L, hasher.hashInt(TEST_INT));
0057     Assert.assertEquals(0x9D5FFDFB928AB4BL, XXH64.hashInt(TEST_INT, PRIME));
0058   }
0059 
0060   @Test
0061   public void testKnownLongInputs() {
0062     Assert.assertEquals(0xF74CB1451B32B8CFL, hasher.hashLong(TEST_LONG));
0063     Assert.assertEquals(0x9C44B77FBCC302C5L, XXH64.hashLong(TEST_LONG, PRIME));
0064   }
0065 
0066   @Test
0067   public void testKnownByteArrayInputs() {
0068     Assert.assertEquals(0xEF46DB3751D8E999L,
0069             hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 0));
0070     Assert.assertEquals(0xAC75FDA2929B17EFL,
0071             XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 0, PRIME));
0072     Assert.assertEquals(0x4FCE394CC88952D8L,
0073             hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 1));
0074     Assert.assertEquals(0x739840CB819FA723L,
0075             XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 1, PRIME));
0076 
0077     if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) {
0078       Assert.assertEquals(0x9256E58AA397AEF1L,
0079               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
0080       Assert.assertEquals(0x9D5FFDFB928AB4BL,
0081               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
0082       Assert.assertEquals(0xF74CB1451B32B8CFL,
0083               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
0084       Assert.assertEquals(0x9C44B77FBCC302C5L,
0085               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
0086       Assert.assertEquals(0xCFFA8DB881BC3A3DL,
0087               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
0088       Assert.assertEquals(0x5B9611585EFCC9CBL,
0089               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
0090       Assert.assertEquals(0x0EAB543384F878ADL,
0091               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
0092       Assert.assertEquals(0xCAA65939306F1E21L,
0093               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
0094     } else {
0095       Assert.assertEquals(0x7F875412350ADDDCL,
0096               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
0097       Assert.assertEquals(0x564D279F524D8516L,
0098               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
0099       Assert.assertEquals(0x7D9F07E27E0EB006L,
0100               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
0101       Assert.assertEquals(0x893CEF564CB7858L,
0102               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
0103       Assert.assertEquals(0xC6198C4C9CC49E17L,
0104               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
0105       Assert.assertEquals(0x4E21BEF7164D4BBL,
0106               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
0107       Assert.assertEquals(0xBCF5FAEDEE1F2B5AL,
0108               hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
0109       Assert.assertEquals(0x6F680C877A358FE5L,
0110               XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
0111     }
0112   }
0113 
0114   @Test
0115   public void randomizedStressTest() {
0116     int size = 65536;
0117     Random rand = new Random();
0118 
0119     // A set used to track collision rate.
0120     Set<Long> hashcodes = new HashSet<>();
0121     for (int i = 0; i < size; i++) {
0122       int vint = rand.nextInt();
0123       long lint = rand.nextLong();
0124       Assert.assertEquals(hasher.hashInt(vint), hasher.hashInt(vint));
0125       Assert.assertEquals(hasher.hashLong(lint), hasher.hashLong(lint));
0126 
0127       hashcodes.add(hasher.hashLong(lint));
0128     }
0129 
0130     // A very loose bound.
0131     Assert.assertTrue(hashcodes.size() > size * 0.95d);
0132   }
0133 
0134   @Test
0135   public void randomizedStressTestBytes() {
0136     int size = 65536;
0137     Random rand = new Random();
0138 
0139     // A set used to track collision rate.
0140     Set<Long> hashcodes = new HashSet<>();
0141     for (int i = 0; i < size; i++) {
0142       int byteArrSize = rand.nextInt(100) * 8;
0143       byte[] bytes = new byte[byteArrSize];
0144       rand.nextBytes(bytes);
0145 
0146       Assert.assertEquals(
0147               hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0148               hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0149 
0150       hashcodes.add(hasher.hashUnsafeWords(
0151               bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0152     }
0153 
0154     // A very loose bound.
0155     Assert.assertTrue(hashcodes.size() > size * 0.95d);
0156   }
0157 
0158   @Test
0159   public void randomizedStressTestPaddedStrings() {
0160     int size = 64000;
0161     // A set used to track collision rate.
0162     Set<Long> hashcodes = new HashSet<>();
0163     for (int i = 0; i < size; i++) {
0164       int byteArrSize = 8;
0165       byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
0166       byte[] paddedBytes = new byte[byteArrSize];
0167       System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
0168 
0169       Assert.assertEquals(
0170               hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
0171               hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0172 
0173       hashcodes.add(hasher.hashUnsafeWords(
0174               paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
0175     }
0176 
0177     // A very loose bound.
0178     Assert.assertTrue(hashcodes.size() > size * 0.95d);
0179   }
0180 }