Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.unsafe.hash;
0019 
0020 import org.apache.spark.unsafe.Platform;
0021 
0022 /**
0023  * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
0024  */
0025 public final class Murmur3_x86_32 {
0026   private static final int C1 = 0xcc9e2d51;
0027   private static final int C2 = 0x1b873593;
0028 
0029   private final int seed;
0030 
0031   public Murmur3_x86_32(int seed) {
0032     this.seed = seed;
0033   }
0034 
0035   @Override
0036   public String toString() {
0037     return "Murmur3_32(seed=" + seed + ")";
0038   }
0039 
0040   public int hashInt(int input) {
0041     return hashInt(input, seed);
0042   }
0043 
0044   public static int hashInt(int input, int seed) {
0045     int k1 = mixK1(input);
0046     int h1 = mixH1(seed, k1);
0047 
0048     return fmix(h1, 4);
0049   }
0050 
0051   public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
0052     return hashUnsafeWords(base, offset, lengthInBytes, seed);
0053   }
0054 
0055   public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
0056     // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
0057     assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
0058     int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
0059     return fmix(h1, lengthInBytes);
0060   }
0061 
0062   public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
0063     // This is not compatible with original and another implementations.
0064     // But remain it for backward compatibility for the components existing before 2.3.
0065     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
0066     int lengthAligned = lengthInBytes - lengthInBytes % 4;
0067     int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
0068     for (int i = lengthAligned; i < lengthInBytes; i++) {
0069       int halfWord = Platform.getByte(base, offset + i);
0070       int k1 = mixK1(halfWord);
0071       h1 = mixH1(h1, k1);
0072     }
0073     return fmix(h1, lengthInBytes);
0074   }
0075 
0076   public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) {
0077     // This is compatible with original and another implementations.
0078     // Use this method for new components after Spark 2.3.
0079     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
0080     int lengthAligned = lengthInBytes - lengthInBytes % 4;
0081     int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
0082     int k1 = 0;
0083     for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
0084       k1 ^= (Platform.getByte(base, offset + i) & 0xFF) << shift;
0085     }
0086     h1 ^= mixK1(k1);
0087     return fmix(h1, lengthInBytes);
0088   }
0089 
0090   private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
0091     assert (lengthInBytes % 4 == 0);
0092     int h1 = seed;
0093     for (int i = 0; i < lengthInBytes; i += 4) {
0094       int halfWord = Platform.getInt(base, offset + i);
0095       int k1 = mixK1(halfWord);
0096       h1 = mixH1(h1, k1);
0097     }
0098     return h1;
0099   }
0100 
0101   public int hashLong(long input) {
0102     return hashLong(input, seed);
0103   }
0104 
0105   public static int hashLong(long input, int seed) {
0106     int low = (int) input;
0107     int high = (int) (input >>> 32);
0108 
0109     int k1 = mixK1(low);
0110     int h1 = mixH1(seed, k1);
0111 
0112     k1 = mixK1(high);
0113     h1 = mixH1(h1, k1);
0114 
0115     return fmix(h1, 8);
0116   }
0117 
0118   private static int mixK1(int k1) {
0119     k1 *= C1;
0120     k1 = Integer.rotateLeft(k1, 15);
0121     k1 *= C2;
0122     return k1;
0123   }
0124 
0125   private static int mixH1(int h1, int k1) {
0126     h1 ^= k1;
0127     h1 = Integer.rotateLeft(h1, 13);
0128     h1 = h1 * 5 + 0xe6546b64;
0129     return h1;
0130   }
0131 
0132   // Finalization mix - force all bits of a hash block to avalanche
0133   private static int fmix(int h1, int length) {
0134     h1 ^= length;
0135     h1 ^= h1 >>> 16;
0136     h1 *= 0x85ebca6b;
0137     h1 ^= h1 >>> 13;
0138     h1 *= 0xc2b2ae35;
0139     h1 ^= h1 >>> 16;
0140     return h1;
0141   }
0142 }