Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 package org.apache.spark.sql.catalyst.expressions;
0018 
0019 import org.apache.spark.unsafe.Platform;
0020 import org.apache.spark.unsafe.types.UTF8String;
0021 
0022 // scalastyle: off
0023 /**
0024  * xxHash64. A high quality and fast 64 bit hash code by Yann Colet and Mathias Westerdahl. The
0025  * class below is modelled like its Murmur3_x86_32 cousin.
0026  * <p/>
0027  * This was largely based on the following (original) C and Java implementations:
0028  * https://github.com/Cyan4973/xxHash/blob/master/xxhash.c
0029  * https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/main/java/net/openhft/hashing/XxHash_r39.java
0030  * https://github.com/airlift/slice/blob/master/src/main/java/io/airlift/slice/XxHash64.java
0031  */
0032 // scalastyle: on
0033 public final class XXH64 {
0034 
0035   private static final long PRIME64_1 = 0x9E3779B185EBCA87L;
0036   private static final long PRIME64_2 = 0xC2B2AE3D27D4EB4FL;
0037   private static final long PRIME64_3 = 0x165667B19E3779F9L;
0038   private static final long PRIME64_4 = 0x85EBCA77C2B2AE63L;
0039   private static final long PRIME64_5 = 0x27D4EB2F165667C5L;
0040 
0041   private final long seed;
0042 
0043   public XXH64(long seed) {
0044     super();
0045     this.seed = seed;
0046   }
0047 
0048   @Override
0049   public String toString() {
0050     return "xxHash64(seed=" + seed + ")";
0051   }
0052 
0053   public long hashInt(int input) {
0054     return hashInt(input, seed);
0055   }
0056 
0057   public static long hashInt(int input, long seed) {
0058     long hash = seed + PRIME64_5 + 4L;
0059     hash ^= (input & 0xFFFFFFFFL) * PRIME64_1;
0060     hash = Long.rotateLeft(hash, 23) * PRIME64_2 + PRIME64_3;
0061     return fmix(hash);
0062   }
0063 
0064   public long hashLong(long input) {
0065     return hashLong(input, seed);
0066   }
0067 
0068   public static long hashLong(long input, long seed) {
0069     long hash = seed + PRIME64_5 + 8L;
0070     hash ^= Long.rotateLeft(input * PRIME64_2, 31) * PRIME64_1;
0071     hash = Long.rotateLeft(hash, 27) * PRIME64_1 + PRIME64_4;
0072     return fmix(hash);
0073   }
0074 
0075   public long hashUnsafeWords(Object base, long offset, int length) {
0076     return hashUnsafeWords(base, offset, length, seed);
0077   }
0078 
0079   public static long hashUnsafeWords(Object base, long offset, int length, long seed) {
0080     assert (length % 8 == 0) : "lengthInBytes must be a multiple of 8 (word-aligned)";
0081     long hash = hashBytesByWords(base, offset, length, seed);
0082     return fmix(hash);
0083   }
0084 
0085   public long hashUnsafeBytes(Object base, long offset, int length) {
0086     return hashUnsafeBytes(base, offset, length, seed);
0087   }
0088 
0089   public static long hashUnsafeBytes(Object base, long offset, int length, long seed) {
0090     assert (length >= 0) : "lengthInBytes cannot be negative";
0091     long hash = hashBytesByWords(base, offset, length, seed);
0092     long end = offset + length;
0093     offset += length & -8;
0094 
0095     if (offset + 4L <= end) {
0096       hash ^= (Platform.getInt(base, offset) & 0xFFFFFFFFL) * PRIME64_1;
0097       hash = Long.rotateLeft(hash, 23) * PRIME64_2 + PRIME64_3;
0098       offset += 4L;
0099     }
0100 
0101     while (offset < end) {
0102       hash ^= (Platform.getByte(base, offset) & 0xFFL) * PRIME64_5;
0103       hash = Long.rotateLeft(hash, 11) * PRIME64_1;
0104       offset++;
0105     }
0106     return fmix(hash);
0107   }
0108 
0109   public static long hashUTF8String(UTF8String str, long seed) {
0110     return hashUnsafeBytes(str.getBaseObject(), str.getBaseOffset(), str.numBytes(), seed);
0111   }
0112 
0113   private static long fmix(long hash) {
0114     hash ^= hash >>> 33;
0115     hash *= PRIME64_2;
0116     hash ^= hash >>> 29;
0117     hash *= PRIME64_3;
0118     hash ^= hash >>> 32;
0119     return hash;
0120   }
0121 
0122   private static long hashBytesByWords(Object base, long offset, int length, long seed) {
0123     long end = offset + length;
0124     long hash;
0125     if (length >= 32) {
0126       long limit = end - 32;
0127       long v1 = seed + PRIME64_1 + PRIME64_2;
0128       long v2 = seed + PRIME64_2;
0129       long v3 = seed;
0130       long v4 = seed - PRIME64_1;
0131 
0132       do {
0133         v1 += Platform.getLong(base, offset) * PRIME64_2;
0134         v1 = Long.rotateLeft(v1, 31);
0135         v1 *= PRIME64_1;
0136 
0137         v2 += Platform.getLong(base, offset + 8) * PRIME64_2;
0138         v2 = Long.rotateLeft(v2, 31);
0139         v2 *= PRIME64_1;
0140 
0141         v3 += Platform.getLong(base, offset + 16) * PRIME64_2;
0142         v3 = Long.rotateLeft(v3, 31);
0143         v3 *= PRIME64_1;
0144 
0145         v4 += Platform.getLong(base, offset + 24) * PRIME64_2;
0146         v4 = Long.rotateLeft(v4, 31);
0147         v4 *= PRIME64_1;
0148 
0149         offset += 32L;
0150       } while (offset <= limit);
0151 
0152       hash = Long.rotateLeft(v1, 1)
0153               + Long.rotateLeft(v2, 7)
0154               + Long.rotateLeft(v3, 12)
0155               + Long.rotateLeft(v4, 18);
0156 
0157       v1 *= PRIME64_2;
0158       v1 = Long.rotateLeft(v1, 31);
0159       v1 *= PRIME64_1;
0160       hash ^= v1;
0161       hash = hash * PRIME64_1 + PRIME64_4;
0162 
0163       v2 *= PRIME64_2;
0164       v2 = Long.rotateLeft(v2, 31);
0165       v2 *= PRIME64_1;
0166       hash ^= v2;
0167       hash = hash * PRIME64_1 + PRIME64_4;
0168 
0169       v3 *= PRIME64_2;
0170       v3 = Long.rotateLeft(v3, 31);
0171       v3 *= PRIME64_1;
0172       hash ^= v3;
0173       hash = hash * PRIME64_1 + PRIME64_4;
0174 
0175       v4 *= PRIME64_2;
0176       v4 = Long.rotateLeft(v4, 31);
0177       v4 *= PRIME64_1;
0178       hash ^= v4;
0179       hash = hash * PRIME64_1 + PRIME64_4;
0180     } else {
0181       hash = seed + PRIME64_5;
0182     }
0183 
0184     hash += length;
0185 
0186     long limit = end - 8;
0187     while (offset <= limit) {
0188       long k1 = Platform.getLong(base, offset);
0189       hash ^= Long.rotateLeft(k1 * PRIME64_2, 31) * PRIME64_1;
0190       hash = Long.rotateLeft(hash, 27) * PRIME64_1 + PRIME64_4;
0191       offset += 8L;
0192     }
0193     return hash;
0194   }
0195 }