Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // A simple bloom filter. It uses a large number (20) of hashes to reduce the
      6 // possibility of false positives. The bloom filter's hashing uses random keys
      7 // in order to minimize the chance that a false positive for one user is a false
      8 // positive for all.
      9 //
     10 // The bloom filter manages it serialization to disk with the following file
     11 // format:
     12 //         4 byte version number
     13 //         4 byte number of hash keys (n)
     14 //     n * 8 bytes of hash keys
     15 // Remaining bytes are the filter data.
     16 
     17 #ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
     18 #define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
     19 #pragma once
     20 
     21 #include <vector>
     22 
     23 #include "base/gtest_prod_util.h"
     24 #include "base/memory/ref_counted.h"
     25 #include "base/memory/scoped_ptr.h"
     26 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     27 
     28 class FilePath;
     29 
     30 class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> {
     31  public:
     32   typedef uint64 HashKey;
     33   typedef std::vector<HashKey> HashKeys;
     34 
     35   // Constructs an empty filter with the given size.
     36   explicit BloomFilter(int bit_size);
     37 
     38   // Constructs a filter from serialized data. This object owns the memory and
     39   // will delete it on destruction.
     40   BloomFilter(char* data, int size, const HashKeys& keys);
     41 
     42   void Insert(SBPrefix hash);
     43   bool Exists(SBPrefix hash) const;
     44 
     45   const char* data() const { return data_.get(); }
     46   int size() const { return byte_size_; }
     47 
     48   // Loading and storing the filter from / to disk.
     49   static BloomFilter* LoadFile(const FilePath& filter_name);
     50   bool WriteFile(const FilePath& filter_name) const;
     51 
     52   // How many bits to use per item. See the design doc for more information.
     53   static const int kBloomFilterSizeRatio = 25;
     54 
     55   // Force a minimum size on the bloom filter to prevent a high false positive
     56   // hash request rate (in bytes).
     57   static const int kBloomFilterMinSize = 250000;
     58 
     59   // Force a maximum size on the bloom filter to avoid using too much memory
     60   // (in bytes).
     61   static const int kBloomFilterMaxSize = 3 * 1024 * 1024;
     62 
     63   // Use the above constants to calculate an appropriate size to pass
     64   // to the BloomFilter constructor based on the intended |key_count|.
     65   // TODO(shess): This is very clunky.  It would be cleaner to have
     66   // the constructor manage this, but at this time the unit and perf
     67   // tests wish to make their own calculations.
     68   static int FilterSizeForKeyCount(int key_count);
     69 
     70  private:
     71   friend class base::RefCountedThreadSafe<BloomFilter>;
     72   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterUse);
     73   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterFile);
     74 
     75   static const int kNumHashKeys = 20;
     76   static const int kFileVersion = 1;
     77 
     78   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
     79   // ORDERING OF THESE VALUES.
     80   enum FailureType {
     81     FAILURE_FILTER_READ_OPEN,
     82     FAILURE_FILTER_READ_VERSION,
     83     FAILURE_FILTER_READ_NUM_KEYS,
     84     FAILURE_FILTER_READ_KEY,
     85     FAILURE_FILTER_READ_DATA_MINSIZE,
     86     FAILURE_FILTER_READ_DATA_MAXSIZE,
     87     FAILURE_FILTER_READ_DATA_SHORT,
     88     FAILURE_FILTER_READ_DATA,
     89 
     90     // Memory space for histograms is determined by the max.  ALWAYS
     91     // ADD NEW VALUES BEFORE THIS ONE.
     92     FAILURE_FILTER_MAX
     93   };
     94 
     95   static void RecordFailure(FailureType failure_type);
     96 
     97   ~BloomFilter();
     98 
     99   int byte_size_;  // size in bytes
    100   int bit_size_;   // size in bits
    101   scoped_array<char> data_;
    102 
    103   // Random keys used for hashing.
    104   HashKeys hash_keys_;
    105 
    106   DISALLOW_COPY_AND_ASSIGN(BloomFilter);
    107 };
    108 
    109 #endif  // CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
    110