1 // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #include "leveldb/filter_policy.h" 6 7 #include "leveldb/slice.h" 8 #include "util/hash.h" 9 10 namespace leveldb { 11 12 namespace { 13 static uint32_t BloomHash(const Slice& key) { 14 return Hash(key.data(), key.size(), 0xbc9f1d34); 15 } 16 17 class BloomFilterPolicy : public FilterPolicy { 18 private: 19 size_t bits_per_key_; 20 size_t k_; 21 22 public: 23 explicit BloomFilterPolicy(int bits_per_key) 24 : bits_per_key_(bits_per_key) { 25 // We intentionally round down to reduce probing cost a little bit 26 k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2) 27 if (k_ < 1) k_ = 1; 28 if (k_ > 30) k_ = 30; 29 } 30 31 virtual const char* Name() const { 32 return "leveldb.BuiltinBloomFilter"; 33 } 34 35 virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { 36 // Compute bloom filter size (in both bits and bytes) 37 size_t bits = n * bits_per_key_; 38 39 // For small n, we can see a very high false positive rate. Fix it 40 // by enforcing a minimum bloom filter length. 41 if (bits < 64) bits = 64; 42 43 size_t bytes = (bits + 7) / 8; 44 bits = bytes * 8; 45 46 const size_t init_size = dst->size(); 47 dst->resize(init_size + bytes, 0); 48 dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter 49 char* array = &(*dst)[init_size]; 50 for (size_t i = 0; i < n; i++) { 51 // Use double-hashing to generate a sequence of hash values. 52 // See analysis in [Kirsch,Mitzenmacher 2006]. 53 uint32_t h = BloomHash(keys[i]); 54 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits 55 for (size_t j = 0; j < k_; j++) { 56 const uint32_t bitpos = h % bits; 57 array[bitpos/8] |= (1 << (bitpos % 8)); 58 h += delta; 59 } 60 } 61 } 62 63 virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { 64 const size_t len = bloom_filter.size(); 65 if (len < 2) return false; 66 67 const char* array = bloom_filter.data(); 68 const size_t bits = (len - 1) * 8; 69 70 // Use the encoded k so that we can read filters generated by 71 // bloom filters created using different parameters. 72 const size_t k = array[len-1]; 73 if (k > 30) { 74 // Reserved for potentially new encodings for short bloom filters. 75 // Consider it a match. 76 return true; 77 } 78 79 uint32_t h = BloomHash(key); 80 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits 81 for (size_t j = 0; j < k; j++) { 82 const uint32_t bitpos = h % bits; 83 if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; 84 h += delta; 85 } 86 return true; 87 } 88 }; 89 } 90 91 const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { 92 return new BloomFilterPolicy(bits_per_key); 93 } 94 95 } // namespace leveldb 96