0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.util.sketch;
0019
0020
0021
0022
0023
0024
0025 final class Murmur3_x86_32 {
0026 private static final int C1 = 0xcc9e2d51;
0027 private static final int C2 = 0x1b873593;
0028
0029 private final int seed;
0030
0031 Murmur3_x86_32(int seed) {
0032 this.seed = seed;
0033 }
0034
0035 @Override
0036 public String toString() {
0037 return "Murmur3_32(seed=" + seed + ")";
0038 }
0039
0040 public int hashInt(int input) {
0041 return hashInt(input, seed);
0042 }
0043
0044 public static int hashInt(int input, int seed) {
0045 int k1 = mixK1(input);
0046 int h1 = mixH1(seed, k1);
0047
0048 return fmix(h1, 4);
0049 }
0050
0051 public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
0052 return hashUnsafeWords(base, offset, lengthInBytes, seed);
0053 }
0054
0055 public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
0056
0057 assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
0058 int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
0059 return fmix(h1, lengthInBytes);
0060 }
0061
0062 public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
0063
0064
0065 assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
0066 int lengthAligned = lengthInBytes - lengthInBytes % 4;
0067 int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
0068 for (int i = lengthAligned; i < lengthInBytes; i++) {
0069 int halfWord = Platform.getByte(base, offset + i);
0070 int k1 = mixK1(halfWord);
0071 h1 = mixH1(h1, k1);
0072 }
0073 return fmix(h1, lengthInBytes);
0074 }
0075
0076 public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) {
0077
0078
0079 assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
0080 int lengthAligned = lengthInBytes - lengthInBytes % 4;
0081 int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
0082 int k1 = 0;
0083 for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
0084 k1 ^= (Platform.getByte(base, offset + i) & 0xFF) << shift;
0085 }
0086 h1 ^= mixK1(k1);
0087 return fmix(h1, lengthInBytes);
0088 }
0089
0090 private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
0091 assert (lengthInBytes % 4 == 0);
0092 int h1 = seed;
0093 for (int i = 0; i < lengthInBytes; i += 4) {
0094 int halfWord = Platform.getInt(base, offset + i);
0095 int k1 = mixK1(halfWord);
0096 h1 = mixH1(h1, k1);
0097 }
0098 return h1;
0099 }
0100
0101 public int hashLong(long input) {
0102 return hashLong(input, seed);
0103 }
0104
0105 public static int hashLong(long input, int seed) {
0106 int low = (int) input;
0107 int high = (int) (input >>> 32);
0108
0109 int k1 = mixK1(low);
0110 int h1 = mixH1(seed, k1);
0111
0112 k1 = mixK1(high);
0113 h1 = mixH1(h1, k1);
0114
0115 return fmix(h1, 8);
0116 }
0117
0118 private static int mixK1(int k1) {
0119 k1 *= C1;
0120 k1 = Integer.rotateLeft(k1, 15);
0121 k1 *= C2;
0122 return k1;
0123 }
0124
0125 private static int mixH1(int h1, int k1) {
0126 h1 ^= k1;
0127 h1 = Integer.rotateLeft(h1, 13);
0128 h1 = h1 * 5 + 0xe6546b64;
0129 return h1;
0130 }
0131
0132
0133 private static int fmix(int h1, int length) {
0134 h1 ^= length;
0135 h1 ^= h1 >>> 16;
0136 h1 *= 0x85ebca6b;
0137 h1 ^= h1 >>> 13;
0138 h1 *= 0xc2b2ae35;
0139 h1 ^= h1 >>> 16;
0140 return h1;
0141 }
0142 }