0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 package org.apache.spark.sql.catalyst.expressions;
0018
0019 import org.apache.spark.unsafe.Platform;
0020 import org.apache.spark.unsafe.types.UTF8String;
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 public final class XXH64 {
0034
0035 private static final long PRIME64_1 = 0x9E3779B185EBCA87L;
0036 private static final long PRIME64_2 = 0xC2B2AE3D27D4EB4FL;
0037 private static final long PRIME64_3 = 0x165667B19E3779F9L;
0038 private static final long PRIME64_4 = 0x85EBCA77C2B2AE63L;
0039 private static final long PRIME64_5 = 0x27D4EB2F165667C5L;
0040
0041 private final long seed;
0042
0043 public XXH64(long seed) {
0044 super();
0045 this.seed = seed;
0046 }
0047
0048 @Override
0049 public String toString() {
0050 return "xxHash64(seed=" + seed + ")";
0051 }
0052
0053 public long hashInt(int input) {
0054 return hashInt(input, seed);
0055 }
0056
0057 public static long hashInt(int input, long seed) {
0058 long hash = seed + PRIME64_5 + 4L;
0059 hash ^= (input & 0xFFFFFFFFL) * PRIME64_1;
0060 hash = Long.rotateLeft(hash, 23) * PRIME64_2 + PRIME64_3;
0061 return fmix(hash);
0062 }
0063
0064 public long hashLong(long input) {
0065 return hashLong(input, seed);
0066 }
0067
0068 public static long hashLong(long input, long seed) {
0069 long hash = seed + PRIME64_5 + 8L;
0070 hash ^= Long.rotateLeft(input * PRIME64_2, 31) * PRIME64_1;
0071 hash = Long.rotateLeft(hash, 27) * PRIME64_1 + PRIME64_4;
0072 return fmix(hash);
0073 }
0074
0075 public long hashUnsafeWords(Object base, long offset, int length) {
0076 return hashUnsafeWords(base, offset, length, seed);
0077 }
0078
0079 public static long hashUnsafeWords(Object base, long offset, int length, long seed) {
0080 assert (length % 8 == 0) : "lengthInBytes must be a multiple of 8 (word-aligned)";
0081 long hash = hashBytesByWords(base, offset, length, seed);
0082 return fmix(hash);
0083 }
0084
0085 public long hashUnsafeBytes(Object base, long offset, int length) {
0086 return hashUnsafeBytes(base, offset, length, seed);
0087 }
0088
0089 public static long hashUnsafeBytes(Object base, long offset, int length, long seed) {
0090 assert (length >= 0) : "lengthInBytes cannot be negative";
0091 long hash = hashBytesByWords(base, offset, length, seed);
0092 long end = offset + length;
0093 offset += length & -8;
0094
0095 if (offset + 4L <= end) {
0096 hash ^= (Platform.getInt(base, offset) & 0xFFFFFFFFL) * PRIME64_1;
0097 hash = Long.rotateLeft(hash, 23) * PRIME64_2 + PRIME64_3;
0098 offset += 4L;
0099 }
0100
0101 while (offset < end) {
0102 hash ^= (Platform.getByte(base, offset) & 0xFFL) * PRIME64_5;
0103 hash = Long.rotateLeft(hash, 11) * PRIME64_1;
0104 offset++;
0105 }
0106 return fmix(hash);
0107 }
0108
0109 public static long hashUTF8String(UTF8String str, long seed) {
0110 return hashUnsafeBytes(str.getBaseObject(), str.getBaseOffset(), str.numBytes(), seed);
0111 }
0112
0113 private static long fmix(long hash) {
0114 hash ^= hash >>> 33;
0115 hash *= PRIME64_2;
0116 hash ^= hash >>> 29;
0117 hash *= PRIME64_3;
0118 hash ^= hash >>> 32;
0119 return hash;
0120 }
0121
0122 private static long hashBytesByWords(Object base, long offset, int length, long seed) {
0123 long end = offset + length;
0124 long hash;
0125 if (length >= 32) {
0126 long limit = end - 32;
0127 long v1 = seed + PRIME64_1 + PRIME64_2;
0128 long v2 = seed + PRIME64_2;
0129 long v3 = seed;
0130 long v4 = seed - PRIME64_1;
0131
0132 do {
0133 v1 += Platform.getLong(base, offset) * PRIME64_2;
0134 v1 = Long.rotateLeft(v1, 31);
0135 v1 *= PRIME64_1;
0136
0137 v2 += Platform.getLong(base, offset + 8) * PRIME64_2;
0138 v2 = Long.rotateLeft(v2, 31);
0139 v2 *= PRIME64_1;
0140
0141 v3 += Platform.getLong(base, offset + 16) * PRIME64_2;
0142 v3 = Long.rotateLeft(v3, 31);
0143 v3 *= PRIME64_1;
0144
0145 v4 += Platform.getLong(base, offset + 24) * PRIME64_2;
0146 v4 = Long.rotateLeft(v4, 31);
0147 v4 *= PRIME64_1;
0148
0149 offset += 32L;
0150 } while (offset <= limit);
0151
0152 hash = Long.rotateLeft(v1, 1)
0153 + Long.rotateLeft(v2, 7)
0154 + Long.rotateLeft(v3, 12)
0155 + Long.rotateLeft(v4, 18);
0156
0157 v1 *= PRIME64_2;
0158 v1 = Long.rotateLeft(v1, 31);
0159 v1 *= PRIME64_1;
0160 hash ^= v1;
0161 hash = hash * PRIME64_1 + PRIME64_4;
0162
0163 v2 *= PRIME64_2;
0164 v2 = Long.rotateLeft(v2, 31);
0165 v2 *= PRIME64_1;
0166 hash ^= v2;
0167 hash = hash * PRIME64_1 + PRIME64_4;
0168
0169 v3 *= PRIME64_2;
0170 v3 = Long.rotateLeft(v3, 31);
0171 v3 *= PRIME64_1;
0172 hash ^= v3;
0173 hash = hash * PRIME64_1 + PRIME64_4;
0174
0175 v4 *= PRIME64_2;
0176 v4 = Long.rotateLeft(v4, 31);
0177 v4 *= PRIME64_1;
0178 hash ^= v4;
0179 hash = hash * PRIME64_1 + PRIME64_4;
0180 } else {
0181 hash = seed + PRIME64_5;
0182 }
0183
0184 hash += length;
0185
0186 long limit = end - 8;
0187 while (offset <= limit) {
0188 long k1 = Platform.getLong(base, offset);
0189 hash ^= Long.rotateLeft(k1 * PRIME64_2, 31) * PRIME64_1;
0190 hash = Long.rotateLeft(hash, 27) * PRIME64_1 + PRIME64_4;
0191 offset += 8L;
0192 }
0193 return hash;
0194 }
0195 }