Home | History | Annotate | Download | only in util
      1 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
      4 
      5 #include "leveldb/filter_policy.h"
      6 
      7 #include "leveldb/slice.h"
      8 #include "util/hash.h"
      9 
     10 namespace leveldb {
     11 
     12 namespace {
     13 static uint32_t BloomHash(const Slice& key) {
     14   return Hash(key.data(), key.size(), 0xbc9f1d34);
     15 }
     16 
     17 class BloomFilterPolicy : public FilterPolicy {
     18  private:
     19   size_t bits_per_key_;
     20   size_t k_;
     21 
     22  public:
     23   explicit BloomFilterPolicy(int bits_per_key)
     24       : bits_per_key_(bits_per_key) {
     25     // We intentionally round down to reduce probing cost a little bit
     26     k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
     27     if (k_ < 1) k_ = 1;
     28     if (k_ > 30) k_ = 30;
     29   }
     30 
     31   virtual const char* Name() const {
     32     return "leveldb.BuiltinBloomFilter";
     33   }
     34 
     35   virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
     36     // Compute bloom filter size (in both bits and bytes)
     37     size_t bits = n * bits_per_key_;
     38 
     39     // For small n, we can see a very high false positive rate.  Fix it
     40     // by enforcing a minimum bloom filter length.
     41     if (bits < 64) bits = 64;
     42 
     43     size_t bytes = (bits + 7) / 8;
     44     bits = bytes * 8;
     45 
     46     const size_t init_size = dst->size();
     47     dst->resize(init_size + bytes, 0);
     48     dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
     49     char* array = &(*dst)[init_size];
     50     for (size_t i = 0; i < n; i++) {
     51       // Use double-hashing to generate a sequence of hash values.
     52       // See analysis in [Kirsch,Mitzenmacher 2006].
     53       uint32_t h = BloomHash(keys[i]);
     54       const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
     55       for (size_t j = 0; j < k_; j++) {
     56         const uint32_t bitpos = h % bits;
     57         array[bitpos/8] |= (1 << (bitpos % 8));
     58         h += delta;
     59       }
     60     }
     61   }
     62 
     63   virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
     64     const size_t len = bloom_filter.size();
     65     if (len < 2) return false;
     66 
     67     const char* array = bloom_filter.data();
     68     const size_t bits = (len - 1) * 8;
     69 
     70     // Use the encoded k so that we can read filters generated by
     71     // bloom filters created using different parameters.
     72     const size_t k = array[len-1];
     73     if (k > 30) {
     74       // Reserved for potentially new encodings for short bloom filters.
     75       // Consider it a match.
     76       return true;
     77     }
     78 
     79     uint32_t h = BloomHash(key);
     80     const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
     81     for (size_t j = 0; j < k; j++) {
     82       const uint32_t bitpos = h % bits;
     83       if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
     84       h += delta;
     85     }
     86     return true;
     87   }
     88 };
     89 }
     90 
     91 const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
     92   return new BloomFilterPolicy(bits_per_key);
     93 }
     94 
     95 }  // namespace leveldb
     96