Home | History | Annotate | Download | only in hash
      1 // Copyright 2011 Google Inc. All Rights Reserved.
      2 
      3 package com.google.common.hash;
      4 
      5 import static com.google.common.base.Preconditions.checkArgument;
      6 
      7 import com.google.common.math.IntMath;
      8 
      9 import java.math.RoundingMode;
     10 
     11 /**
     12  * Collections of strategies of generating the {@code k * log(M)} bits required for an element to
     13  * be mapped to a {@link BloomFilter} of {@code M} bits and {@code k} hash functions. These
     14  * strategies are part of the serialized form of the Bloom filters that use them, thus they must be
     15  * preserved as is (no updates allowed, only introduction of new versions).
     16  *
     17  * @author andreou (at) google.com (Dimitris Andreou)
     18  */
     19 enum BloomFilterStrategies implements BloomFilter.Strategy {
     20   /**
     21    * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and
     22    * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
     23    * performance of a Bloom filter (yet only needs two 32bit hash functions).
     24    */
     25   MURMUR128_MITZ_32() {
     26     @Override public <T> void put(T object, Funnel<? super T> funnel,
     27         int numHashFunctions, BitArray bits) {
     28       // TODO(user): when the murmur's shortcuts are implemented, update this code
     29       long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong();
     30       int hash1 = (int) hash64;
     31       int hash2 = (int) (hash64 >>> 32);
     32       for (int i = 1; i <= numHashFunctions; i++) {
     33         int nextHash = hash1 + i * hash2;
     34         if (nextHash < 0) {
     35           nextHash = ~nextHash;
     36         }
     37         // up to here, the code is identical with the next method
     38         bits.set(nextHash % bits.size());
     39       }
     40     }
     41 
     42     @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel,
     43         int numHashFunctions, BitArray bits) {
     44       long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong();
     45       int hash1 = (int) hash64;
     46       int hash2 = (int) (hash64 >>> 32);
     47       for (int i = 1; i <= numHashFunctions; i++) {
     48         int nextHash = hash1 + i * hash2;
     49         if (nextHash < 0) {
     50           nextHash = ~nextHash;
     51         }
     52         // up to here, the code is identical with the previous method
     53         if (!bits.get(nextHash % bits.size())) {
     54           return false;
     55         }
     56       }
     57       return true;
     58     }
     59   };
     60 
     61   static class BitArray {
     62     final long[] data;
     63 
     64     BitArray(int bits) {
     65       this(new long[IntMath.divide(bits, 64, RoundingMode.CEILING)]);
     66     }
     67 
     68     // Used by serialization
     69     BitArray(long[] data) {
     70       checkArgument(data.length > 0, "data length is zero!");
     71       this.data = data;
     72     }
     73 
     74     void set(int index) {
     75       data[index >> 6] |= (1L << index);
     76     }
     77 
     78     boolean get(int index) {
     79       return (data[index >> 6] & (1L << index)) != 0;
     80     }
     81 
     82     /** Number of bits */
     83     int size() {
     84       return data.length * Long.SIZE;
     85     }
     86   }
     87 }
     88