Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2011 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
      5  * in compliance with the License. You may obtain a copy of the License at
      6  *
      7  * http://www.apache.org/licenses/LICENSE-2.0
      8  *
      9  * Unless required by applicable law or agreed to in writing, software distributed under the License
     10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     11  * or implied. See the License for the specific language governing permissions and limitations under
     12  * the License.
     13  */
     14 
     15 package com.google.common.hash;
     16 
     17 import static com.google.common.base.Preconditions.checkArgument;
     18 
     19 import com.google.common.annotations.Beta;
     20 import com.google.common.annotations.VisibleForTesting;
     21 import com.google.common.base.Supplier;
     22 
     23 import java.security.MessageDigest;
     24 import java.util.Iterator;
     25 import java.util.zip.Adler32;
     26 import java.util.zip.CRC32;
     27 import java.util.zip.Checksum;
     28 
     29 import javax.annotation.Nullable;
     30 
     31 /**
     32  * Static methods to obtain {@link HashFunction} instances, and other static hashing-related
     33  * utilities.
     34  *
     35  * <p>A comparison of the various hash functions can be found
     36  * <a href="http://goo.gl/jS7HH">here</a>.
     37  *
     38  * @author Kevin Bourrillion
     39  * @author Dimitris Andreou
     40  * @author Kurt Alfred Kluever
     41  * @since 11.0
     42  */
     43 @Beta
     44 public final class Hashing {
     45   /**
     46    * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm
     47    * the returned function implements is unspecified and subject to change without notice.
     48    *
     49    * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code
     50    * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current
     51    * process in any way, for example being sent over RPC, or saved to disk.
     52    *
     53    * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value
     54    * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances.
     55    *
     56    * @param minimumBits a positive integer (can be arbitrarily large)
     57    * @return a hash function, described above, that produces hash codes of length {@code
     58    *     minimumBits} or greater
     59    */
     60   public static HashFunction goodFastHash(int minimumBits) {
     61     int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
     62 
     63     if (bits == 32) {
     64       return Murmur3_32Holder.GOOD_FAST_HASH_FUNCTION_32;
     65     }
     66     if (bits <= 128) {
     67       return Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
     68     }
     69 
     70     // Otherwise, join together some 128-bit murmur3s
     71     int hashFunctionsNeeded = (bits + 127) / 128;
     72     HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
     73     hashFunctions[0] = Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
     74     int seed = GOOD_FAST_HASH_SEED;
     75     for (int i = 1; i < hashFunctionsNeeded; i++) {
     76       seed += 1500450271; // a prime; shouldn't matter
     77       hashFunctions[i] = murmur3_128(seed);
     78     }
     79     return new ConcatenatedHashFunction(hashFunctions);
     80   }
     81 
     82   /**
     83    * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything
     84    * dependent on the hash codes they produce will fail sooner.
     85    */
     86   private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
     87 
     88   /**
     89    * Returns a hash function implementing the
     90    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
     91    * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
     92    * using the given seed value.
     93    *
     94    * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
     95    */
     96   public static HashFunction murmur3_32(int seed) {
     97     return new Murmur3_32HashFunction(seed);
     98   }
     99 
    100   /**
    101    * Returns a hash function implementing the
    102    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
    103    * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
    104    * using a seed value of zero.
    105    *
    106    * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
    107    */
    108   public static HashFunction murmur3_32() {
    109     return Murmur3_32Holder.MURMUR3_32;
    110   }
    111 
    112   private static class Murmur3_32Holder {
    113     static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
    114 
    115     /** Returned by {@link #goodFastHash} when {@code minimumBits <= 32}. */
    116     static final HashFunction GOOD_FAST_HASH_FUNCTION_32 = murmur3_32(GOOD_FAST_HASH_SEED);
    117   }
    118 
    119   /**
    120    * Returns a hash function implementing the
    121    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
    122    * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant),
    123    * using the given seed value.
    124    *
    125    * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
    126    */
    127   public static HashFunction murmur3_128(int seed) {
    128     return new Murmur3_128HashFunction(seed);
    129   }
    130 
    131   /**
    132    * Returns a hash function implementing the
    133    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
    134    * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant),
    135    * using a seed value of zero.
    136    *
    137    * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
    138    */
    139   public static HashFunction murmur3_128() {
    140     return Murmur3_128Holder.MURMUR3_128;
    141   }
    142 
    143   private static class Murmur3_128Holder {
    144     static final HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
    145 
    146     /** Returned by {@link #goodFastHash} when {@code 32 < minimumBits <= 128}. */
    147     static final HashFunction GOOD_FAST_HASH_FUNCTION_128 = murmur3_128(GOOD_FAST_HASH_SEED);
    148   }
    149 
    150   /**
    151    * Returns a hash function implementing the
    152    * <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a>
    153    * using a seed value of {@code k = 00 01 02 ...}.
    154    *
    155    * @since 15.0
    156    */
    157   public static HashFunction sipHash24() {
    158     return SipHash24Holder.SIP_HASH_24;
    159   }
    160 
    161   private static class SipHash24Holder {
    162     static final HashFunction SIP_HASH_24 =
    163         new SipHashFunction(2, 4, 0x0706050403020100L, 0x0f0e0d0c0b0a0908L);
    164   }
    165 
    166   /**
    167    * Returns a hash function implementing the
    168    * <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a>
    169    * using the given seed.
    170    *
    171    * @since 15.0
    172    */
    173   public static HashFunction sipHash24(long k0, long k1) {
    174     return new SipHashFunction(2, 4, k0, k1);
    175   }
    176 
    177   /**
    178    * Returns a hash function implementing the MD5 hash algorithm (128 hash bits) by delegating to
    179    * the MD5 {@link MessageDigest}.
    180    */
    181   public static HashFunction md5() {
    182     return Md5Holder.MD5;
    183   }
    184 
    185   private static class Md5Holder {
    186     static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()");
    187   }
    188 
    189   /**
    190    * Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by delegating to the
    191    * SHA-1 {@link MessageDigest}.
    192    */
    193   public static HashFunction sha1() {
    194     return Sha1Holder.SHA_1;
    195   }
    196 
    197   private static class Sha1Holder {
    198     static final HashFunction SHA_1 =
    199         new MessageDigestHashFunction("SHA-1", "Hashing.sha1()");
    200   }
    201 
    202   /**
    203    * Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to
    204    * the SHA-256 {@link MessageDigest}.
    205    */
    206   public static HashFunction sha256() {
    207     return Sha256Holder.SHA_256;
    208   }
    209 
    210   private static class Sha256Holder {
    211     static final HashFunction SHA_256 =
    212         new MessageDigestHashFunction("SHA-256", "Hashing.sha256()");
    213   }
    214 
    215   /**
    216    * Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the
    217    * SHA-512 {@link MessageDigest}.
    218    */
    219   public static HashFunction sha512() {
    220     return Sha512Holder.SHA_512;
    221   }
    222 
    223   private static class Sha512Holder {
    224     static final HashFunction SHA_512 =
    225         new MessageDigestHashFunction("SHA-512", "Hashing.sha512()");
    226   }
    227 
    228   /**
    229    * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described
    230    * by RFC 3720, Section 12.1.
    231    *
    232    * @since 18.0
    233    */
    234   public static HashFunction crc32c() {
    235     return Crc32cHolder.CRC_32_C;
    236   }
    237 
    238   private static final class Crc32cHolder {
    239     static final HashFunction CRC_32_C = new Crc32cHashFunction();
    240   }
    241 
    242   /**
    243    * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits) by delegating
    244    * to the {@link CRC32} {@link Checksum}.
    245    *
    246    * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
    247    * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
    248    *
    249    * @since 14.0
    250    */
    251   public static HashFunction crc32() {
    252     return Crc32Holder.CRC_32;
    253   }
    254 
    255   private static class Crc32Holder {
    256     static final HashFunction CRC_32 =
    257         checksumHashFunction(ChecksumType.CRC_32, "Hashing.crc32()");
    258   }
    259 
    260   /**
    261    * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits) by
    262    * delegating to the {@link Adler32} {@link Checksum}.
    263    *
    264    * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
    265    * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
    266    *
    267    * @since 14.0
    268    */
    269   public static HashFunction adler32() {
    270     return Adler32Holder.ADLER_32;
    271   }
    272 
    273   private static class Adler32Holder {
    274     static final HashFunction ADLER_32 =
    275         checksumHashFunction(ChecksumType.ADLER_32, "Hashing.adler32()");
    276   }
    277 
    278   private static HashFunction checksumHashFunction(ChecksumType type, String toString) {
    279     return new ChecksumHashFunction(type, type.bits, toString);
    280   }
    281 
    282   enum ChecksumType implements Supplier<Checksum> {
    283     CRC_32(32) {
    284       @Override
    285       public Checksum get() {
    286         return new CRC32();
    287       }
    288     },
    289     ADLER_32(32) {
    290       @Override
    291       public Checksum get() {
    292         return new Adler32();
    293       }
    294     };
    295 
    296     private final int bits;
    297 
    298     ChecksumType(int bits) {
    299       this.bits = bits;
    300     }
    301 
    302     @Override
    303     public abstract Checksum get();
    304   }
    305 
    306   /**
    307    * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform
    308    * manner that minimizes the need for remapping as {@code buckets} grows. That is,
    309    * {@code consistentHash(h, n)} equals:
    310    *
    311    * <ul>
    312    * <li>{@code n - 1}, with approximate probability {@code 1/n}
    313    * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
    314    * </ul>
    315    *
    316    * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
    317    * article on consistent hashing</a> for more information.
    318    */
    319   public static int consistentHash(HashCode hashCode, int buckets) {
    320     return consistentHash(hashCode.padToLong(), buckets);
    321   }
    322 
    323   /**
    324    * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform
    325    * manner that minimizes the need for remapping as {@code buckets} grows. That is,
    326    * {@code consistentHash(h, n)} equals:
    327    *
    328    * <ul>
    329    * <li>{@code n - 1}, with approximate probability {@code 1/n}
    330    * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
    331    * </ul>
    332    *
    333    * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
    334    * article on consistent hashing</a> for more information.
    335    */
    336   public static int consistentHash(long input, int buckets) {
    337     checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
    338     LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input);
    339     int candidate = 0;
    340     int next;
    341 
    342     // Jump from bucket to bucket until we go out of range
    343     while (true) {
    344       next = (int) ((candidate + 1) / generator.nextDouble());
    345       if (next >= 0 && next < buckets) {
    346         candidate = next;
    347       } else {
    348         return candidate;
    349       }
    350     }
    351   }
    352 
    353   /**
    354    * Returns a hash code, having the same bit length as each of the input hash codes,
    355    * that combines the information of these hash codes in an ordered fashion. That
    356    * is, whenever two equal hash codes are produced by two calls to this method, it
    357    * is <i>as likely as possible</i> that each was computed from the <i>same</i>
    358    * input hash codes in the <i>same</i> order.
    359    *
    360    * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
    361    *     do not all have the same bit length
    362    */
    363   public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
    364     Iterator<HashCode> iterator = hashCodes.iterator();
    365     checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
    366     int bits = iterator.next().bits();
    367     byte[] resultBytes = new byte[bits / 8];
    368     for (HashCode hashCode : hashCodes) {
    369       byte[] nextBytes = hashCode.asBytes();
    370       checkArgument(nextBytes.length == resultBytes.length,
    371           "All hashcodes must have the same bit length.");
    372       for (int i = 0; i < nextBytes.length; i++) {
    373         resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
    374       }
    375     }
    376     return HashCode.fromBytesNoCopy(resultBytes);
    377   }
    378 
    379   /**
    380    * Returns a hash code, having the same bit length as each of the input hash codes,
    381    * that combines the information of these hash codes in an unordered fashion. That
    382    * is, whenever two equal hash codes are produced by two calls to this method, it
    383    * is <i>as likely as possible</i> that each was computed from the <i>same</i>
    384    * input hash codes in <i>some</i> order.
    385    *
    386    * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
    387    *     do not all have the same bit length
    388    */
    389   public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
    390     Iterator<HashCode> iterator = hashCodes.iterator();
    391     checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
    392     byte[] resultBytes = new byte[iterator.next().bits() / 8];
    393     for (HashCode hashCode : hashCodes) {
    394       byte[] nextBytes = hashCode.asBytes();
    395       checkArgument(nextBytes.length == resultBytes.length,
    396           "All hashcodes must have the same bit length.");
    397       for (int i = 0; i < nextBytes.length; i++) {
    398         resultBytes[i] += nextBytes[i];
    399       }
    400     }
    401     return HashCode.fromBytesNoCopy(resultBytes);
    402   }
    403 
    404   /**
    405    * Checks that the passed argument is positive, and ceils it to a multiple of 32.
    406    */
    407   static int checkPositiveAndMakeMultipleOf32(int bits) {
    408     checkArgument(bits > 0, "Number of bits must be positive");
    409     return (bits + 31) & ~31;
    410   }
    411 
    412   // TODO(kevinb): Maybe expose this class via a static Hashing method?
    413   @VisibleForTesting
    414   static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
    415     private final int bits;
    416 
    417     ConcatenatedHashFunction(HashFunction... functions) {
    418       super(functions);
    419       int bitSum = 0;
    420       for (HashFunction function : functions) {
    421         bitSum += function.bits();
    422       }
    423       this.bits = bitSum;
    424     }
    425 
    426     @Override
    427     HashCode makeHash(Hasher[] hashers) {
    428       byte[] bytes = new byte[bits / 8];
    429       int i = 0;
    430       for (Hasher hasher : hashers) {
    431         HashCode newHash = hasher.hash();
    432         i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8);
    433       }
    434       return HashCode.fromBytesNoCopy(bytes);
    435     }
    436 
    437     @Override
    438     public int bits() {
    439       return bits;
    440     }
    441 
    442     @Override
    443     public boolean equals(@Nullable Object object) {
    444       if (object instanceof ConcatenatedHashFunction) {
    445         ConcatenatedHashFunction other = (ConcatenatedHashFunction) object;
    446         if (bits != other.bits || functions.length != other.functions.length) {
    447           return false;
    448         }
    449         for (int i = 0; i < functions.length; i++) {
    450           if (!functions[i].equals(other.functions[i])) {
    451             return false;
    452           }
    453         }
    454         return true;
    455       }
    456       return false;
    457     }
    458 
    459     @Override
    460     public int hashCode() {
    461       int hash = bits;
    462       for (HashFunction function : functions) {
    463         hash ^= function.hashCode();
    464       }
    465       return hash;
    466     }
    467   }
    468 
    469   /**
    470    * Linear CongruentialGenerator to use for consistent hashing.
    471    * See http://en.wikipedia.org/wiki/Linear_congruential_generator
    472    */
    473   private static final class LinearCongruentialGenerator {
    474     private long state;
    475 
    476     public LinearCongruentialGenerator(long seed) {
    477       this.state = seed;
    478     }
    479 
    480     public double nextDouble() {
    481       state = 2862933555777941757L * state + 1;
    482       return ((double) ((int) (state >>> 33) + 1)) / (0x1.0p31);
    483     }
    484   }
    485 
    486   private Hashing() {}
    487 }
    488