Home | History | Annotate | Download | only in util
      1 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
      4 
      5 #include "leveldb/filter_policy.h"
      6 
      7 #include "util/coding.h"
      8 #include "util/logging.h"
      9 #include "util/testharness.h"
     10 #include "util/testutil.h"
     11 
     12 namespace leveldb {
     13 
     14 static const int kVerbose = 1;
     15 
     16 static Slice Key(int i, char* buffer) {
     17   EncodeFixed32(buffer, i);
     18   return Slice(buffer, sizeof(uint32_t));
     19 }
     20 
     21 class BloomTest {
     22  private:
     23   const FilterPolicy* policy_;
     24   std::string filter_;
     25   std::vector<std::string> keys_;
     26 
     27  public:
     28   BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
     29 
     30   ~BloomTest() {
     31     delete policy_;
     32   }
     33 
     34   void Reset() {
     35     keys_.clear();
     36     filter_.clear();
     37   }
     38 
     39   void Add(const Slice& s) {
     40     keys_.push_back(s.ToString());
     41   }
     42 
     43   void Build() {
     44     std::vector<Slice> key_slices;
     45     for (size_t i = 0; i < keys_.size(); i++) {
     46       key_slices.push_back(Slice(keys_[i]));
     47     }
     48     filter_.clear();
     49     policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
     50     keys_.clear();
     51     if (kVerbose >= 2) DumpFilter();
     52   }
     53 
     54   size_t FilterSize() const {
     55     return filter_.size();
     56   }
     57 
     58   void DumpFilter() {
     59     fprintf(stderr, "F(");
     60     for (size_t i = 0; i+1 < filter_.size(); i++) {
     61       const unsigned int c = static_cast<unsigned int>(filter_[i]);
     62       for (int j = 0; j < 8; j++) {
     63         fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
     64       }
     65     }
     66     fprintf(stderr, ")\n");
     67   }
     68 
     69   bool Matches(const Slice& s) {
     70     if (!keys_.empty()) {
     71       Build();
     72     }
     73     return policy_->KeyMayMatch(s, filter_);
     74   }
     75 
     76   double FalsePositiveRate() {
     77     char buffer[sizeof(int)];
     78     int result = 0;
     79     for (int i = 0; i < 10000; i++) {
     80       if (Matches(Key(i + 1000000000, buffer))) {
     81         result++;
     82       }
     83     }
     84     return result / 10000.0;
     85   }
     86 };
     87 
     88 TEST(BloomTest, EmptyFilter) {
     89   ASSERT_TRUE(! Matches("hello"));
     90   ASSERT_TRUE(! Matches("world"));
     91 }
     92 
     93 TEST(BloomTest, Small) {
     94   Add("hello");
     95   Add("world");
     96   ASSERT_TRUE(Matches("hello"));
     97   ASSERT_TRUE(Matches("world"));
     98   ASSERT_TRUE(! Matches("x"));
     99   ASSERT_TRUE(! Matches("foo"));
    100 }
    101 
    102 static int NextLength(int length) {
    103   if (length < 10) {
    104     length += 1;
    105   } else if (length < 100) {
    106     length += 10;
    107   } else if (length < 1000) {
    108     length += 100;
    109   } else {
    110     length += 1000;
    111   }
    112   return length;
    113 }
    114 
    115 TEST(BloomTest, VaryingLengths) {
    116   char buffer[sizeof(int)];
    117 
    118   // Count number of filters that significantly exceed the false positive rate
    119   int mediocre_filters = 0;
    120   int good_filters = 0;
    121 
    122   for (int length = 1; length <= 10000; length = NextLength(length)) {
    123     Reset();
    124     for (int i = 0; i < length; i++) {
    125       Add(Key(i, buffer));
    126     }
    127     Build();
    128 
    129     ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
    130 
    131     // All added keys must match
    132     for (int i = 0; i < length; i++) {
    133       ASSERT_TRUE(Matches(Key(i, buffer)))
    134           << "Length " << length << "; key " << i;
    135     }
    136 
    137     // Check false positive rate
    138     double rate = FalsePositiveRate();
    139     if (kVerbose >= 1) {
    140       fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
    141               rate*100.0, length, static_cast<int>(FilterSize()));
    142     }
    143     ASSERT_LE(rate, 0.02);   // Must not be over 2%
    144     if (rate > 0.0125) mediocre_filters++;  // Allowed, but not too often
    145     else good_filters++;
    146   }
    147   if (kVerbose >= 1) {
    148     fprintf(stderr, "Filters: %d good, %d mediocre\n",
    149             good_filters, mediocre_filters);
    150   }
    151   ASSERT_LE(mediocre_filters, good_filters/5);
    152 }
    153 
    154 // Different bits-per-byte
    155 
    156 }  // namespace leveldb
    157 
    158 int main(int argc, char** argv) {
    159   return leveldb::test::RunAllTests();
    160 }
    161