1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // A simple bloom filter. It uses a large number (20) of hashes to reduce the 6 // possibility of false positives. The bloom filter's hashing uses random keys 7 // in order to minimize the chance that a false positive for one user is a false 8 // positive for all. 9 // 10 // The bloom filter manages it serialization to disk with the following file 11 // format: 12 // 4 byte version number 13 // 4 byte number of hash keys (n) 14 // n * 8 bytes of hash keys 15 // Remaining bytes are the filter data. 16 17 #ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ 18 #define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ 19 #pragma once 20 21 #include <vector> 22 23 #include "base/gtest_prod_util.h" 24 #include "base/memory/ref_counted.h" 25 #include "base/memory/scoped_ptr.h" 26 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 27 28 class FilePath; 29 30 class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> { 31 public: 32 typedef uint64 HashKey; 33 typedef std::vector<HashKey> HashKeys; 34 35 // Constructs an empty filter with the given size. 36 explicit BloomFilter(int bit_size); 37 38 // Constructs a filter from serialized data. This object owns the memory and 39 // will delete it on destruction. 40 BloomFilter(char* data, int size, const HashKeys& keys); 41 42 void Insert(SBPrefix hash); 43 bool Exists(SBPrefix hash) const; 44 45 const char* data() const { return data_.get(); } 46 int size() const { return byte_size_; } 47 48 // Loading and storing the filter from / to disk. 49 static BloomFilter* LoadFile(const FilePath& filter_name); 50 bool WriteFile(const FilePath& filter_name) const; 51 52 // How many bits to use per item. See the design doc for more information. 53 static const int kBloomFilterSizeRatio = 25; 54 55 // Force a minimum size on the bloom filter to prevent a high false positive 56 // hash request rate (in bytes). 57 static const int kBloomFilterMinSize = 250000; 58 59 // Force a maximum size on the bloom filter to avoid using too much memory 60 // (in bytes). 61 static const int kBloomFilterMaxSize = 3 * 1024 * 1024; 62 63 // Use the above constants to calculate an appropriate size to pass 64 // to the BloomFilter constructor based on the intended |key_count|. 65 // TODO(shess): This is very clunky. It would be cleaner to have 66 // the constructor manage this, but at this time the unit and perf 67 // tests wish to make their own calculations. 68 static int FilterSizeForKeyCount(int key_count); 69 70 private: 71 friend class base::RefCountedThreadSafe<BloomFilter>; 72 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterUse); 73 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterFile); 74 75 static const int kNumHashKeys = 20; 76 static const int kFileVersion = 1; 77 78 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 79 // ORDERING OF THESE VALUES. 80 enum FailureType { 81 FAILURE_FILTER_READ_OPEN, 82 FAILURE_FILTER_READ_VERSION, 83 FAILURE_FILTER_READ_NUM_KEYS, 84 FAILURE_FILTER_READ_KEY, 85 FAILURE_FILTER_READ_DATA_MINSIZE, 86 FAILURE_FILTER_READ_DATA_MAXSIZE, 87 FAILURE_FILTER_READ_DATA_SHORT, 88 FAILURE_FILTER_READ_DATA, 89 90 // Memory space for histograms is determined by the max. ALWAYS 91 // ADD NEW VALUES BEFORE THIS ONE. 92 FAILURE_FILTER_MAX 93 }; 94 95 static void RecordFailure(FailureType failure_type); 96 97 ~BloomFilter(); 98 99 int byte_size_; // size in bytes 100 int bit_size_; // size in bits 101 scoped_array<char> data_; 102 103 // Random keys used for hashing. 104 HashKeys hash_keys_; 105 106 DISALLOW_COPY_AND_ASSIGN(BloomFilter); 107 }; 108 109 #endif // CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ 110