Home | History | Annotate | Download | only in wtf
      1 /*
      2  * Copyright (C) 2011 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
     14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
     17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     23  * THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef BloomFilter_h
     27 #define BloomFilter_h
     28 
     29 #include "wtf/Compiler.h"
     30 #include "wtf/text/AtomicString.h"
     31 
     32 namespace WTF {
     33 
     34 // Counting bloom filter with k=2 and 8 bit counters. Uses 2^keyBits bytes of memory.
     35 // False positive rate is approximately (1-e^(-2n/m))^2, where n is the number of unique
     36 // keys and m is the table size (==2^keyBits).
     37 template <unsigned keyBits>
     38 class BloomFilter {
     39 public:
     40     COMPILE_ASSERT(keyBits <= 16, bloom_filter_key_size);
     41 
     42     static const size_t tableSize = 1 << keyBits;
     43     static const unsigned keyMask = (1 << keyBits) - 1;
     44     static uint8_t maximumCount() { return std::numeric_limits<uint8_t>::max(); }
     45 
     46     BloomFilter() { clear(); }
     47 
     48     void add(unsigned hash);
     49     void remove(unsigned hash);
     50 
     51     // The filter may give false positives (claim it may contain a key it doesn't)
     52     // but never false negatives (claim it doesn't contain a key it does).
     53     bool mayContain(unsigned hash) const { return firstSlot(hash) && secondSlot(hash); }
     54 
     55     // The filter must be cleared before reuse even if all keys are removed.
     56     // Otherwise overflowed keys will stick around.
     57     void clear();
     58 
     59     void add(const AtomicString& string) { add(string.impl()->existingHash()); }
     60     void add(const String& string) { add(string.impl()->hash()); }
     61     void remove(const AtomicString& string) { remove(string.impl()->existingHash()); }
     62     void remove(const String& string) { remove(string.impl()->hash()); }
     63 
     64     bool mayContain(const AtomicString& string) const { return mayContain(string.impl()->existingHash()); }
     65     bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); }
     66 
     67 #if !ASSERT_DISABLED
     68     // Slow.
     69     bool likelyEmpty() const;
     70     bool isClear() const;
     71 #endif
     72 
     73 private:
     74     uint8_t& firstSlot(unsigned hash) { return m_table[hash & keyMask]; }
     75     uint8_t& secondSlot(unsigned hash) { return m_table[(hash >> 16) & keyMask]; }
     76     const uint8_t& firstSlot(unsigned hash) const { return m_table[hash & keyMask]; }
     77     const uint8_t& secondSlot(unsigned hash) const { return m_table[(hash >> 16) & keyMask]; }
     78 
     79     uint8_t m_table[tableSize];
     80 };
     81 
     82 template <unsigned keyBits>
     83 inline void BloomFilter<keyBits>::add(unsigned hash)
     84 {
     85     uint8_t& first = firstSlot(hash);
     86     uint8_t& second = secondSlot(hash);
     87     if (LIKELY(first < maximumCount()))
     88         ++first;
     89     if (LIKELY(second < maximumCount()))
     90         ++second;
     91 }
     92 
     93 template <unsigned keyBits>
     94 inline void BloomFilter<keyBits>::remove(unsigned hash)
     95 {
     96     uint8_t& first = firstSlot(hash);
     97     uint8_t& second = secondSlot(hash);
     98     ASSERT(first);
     99     ASSERT(second);
    100     // In case of an overflow, the slot sticks in the table until clear().
    101     if (LIKELY(first < maximumCount()))
    102         --first;
    103     if (LIKELY(second < maximumCount()))
    104         --second;
    105 }
    106 
    107 template <unsigned keyBits>
    108 inline void BloomFilter<keyBits>::clear()
    109 {
    110     memset(m_table, 0, tableSize);
    111 }
    112 
    113 #if !ASSERT_DISABLED
    114 template <unsigned keyBits>
    115 bool BloomFilter<keyBits>::likelyEmpty() const
    116 {
    117     for (size_t n = 0; n < tableSize; ++n) {
    118         if (m_table[n] && m_table[n] != maximumCount())
    119             return false;
    120     }
    121     return true;
    122 }
    123 
    124 template <unsigned keyBits>
    125 bool BloomFilter<keyBits>::isClear() const
    126 {
    127     for (size_t n = 0; n < tableSize; ++n) {
    128         if (m_table[n])
    129             return false;
    130     }
    131     return true;
    132 }
    133 #endif
    134 
    135 }
    136 
    137 using WTF::BloomFilter;
    138 
    139 #endif
    140