Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2011 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.hash;
     18 
     19 import static com.google.common.base.Charsets.UTF_8;
     20 import static com.google.common.hash.BloomFilterStrategies.BitArray;
     21 
     22 import com.google.common.collect.ImmutableSet;
     23 import com.google.common.math.LongMath;
     24 import com.google.common.primitives.Ints;
     25 import com.google.common.testing.EqualsTester;
     26 import com.google.common.testing.NullPointerTester;
     27 import com.google.common.testing.SerializableTester;
     28 
     29 import junit.framework.TestCase;
     30 
     31 import java.io.ByteArrayInputStream;
     32 import java.io.ByteArrayOutputStream;
     33 import java.math.RoundingMode;
     34 import java.util.Random;
     35 
     36 import javax.annotation.Nullable;
     37 
     38 /**
     39  * Tests for SimpleGenericBloomFilter and derived BloomFilter views.
     40  *
     41  * @author Dimitris Andreou
     42  */
     43 public class BloomFilterTest extends TestCase {
     44   public void testLargeBloomFilterDoesntOverflow() {
     45     long numBits = Integer.MAX_VALUE;
     46     numBits++;
     47 
     48     BitArray bitArray = new BitArray(numBits);
     49     assertTrue(
     50         "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(),
     51         bitArray.bitSize() > 0);
     52 
     53     // Ideally we would also test the bitSize() overflow of this BF, but it runs out of heap space
     54     // BloomFilter.create(Funnels.unencodedCharsFunnel(), 244412641, 1e-11);
     55   }
     56 
     57   public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() {
     58     int numInsertions = 1000000;
     59     BloomFilter<String> bf = BloomFilter.create(
     60         Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
     61         BloomFilterStrategies.MURMUR128_MITZ_32);
     62 
     63     // Insert "numInsertions" even numbers into the BF.
     64     for (int i = 0; i < numInsertions * 2; i += 2) {
     65       bf.put(Integer.toString(i));
     66     }
     67 
     68     // Assert that the BF "might" have all of the even numbers.
     69     for (int i = 0; i < numInsertions * 2; i += 2) {
     70       assertTrue(bf.mightContain(Integer.toString(i)));
     71     }
     72 
     73     // Now we check for known false positives using a set of known false positives.
     74     // (These are all of the false positives under 900.)
     75     ImmutableSet<Integer> falsePositives = ImmutableSet.of(
     76         49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781);
     77     for (int i = 1; i < 900; i += 2) {
     78       if (!falsePositives.contains(i)) {
     79         assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
     80       }
     81     }
     82 
     83     // Check that there are exactly 29824 false positives for this BF.
     84     int knownNumberOfFalsePositives = 29824;
     85     int numFpp = 0;
     86     for (int i = 1; i < numInsertions * 2; i += 2) {
     87       if (bf.mightContain(Integer.toString(i))) {
     88         numFpp++;
     89       }
     90     }
     91     assertEquals(knownNumberOfFalsePositives, numFpp);
     92     double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
     93     double expectedFpp = bf.expectedFpp();
     94     // The normal order of (expected, actual) is reversed here on purpose.
     95     assertEquals(actualFpp, expectedFpp, 0.00015);
     96   }
     97 
     98   public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() {
     99     int numInsertions = 1000000;
    100     BloomFilter<String> bf = BloomFilter.create(
    101         Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
    102         BloomFilterStrategies.MURMUR128_MITZ_64);
    103 
    104     // Insert "numInsertions" even numbers into the BF.
    105     for (int i = 0; i < numInsertions * 2; i += 2) {
    106       bf.put(Integer.toString(i));
    107     }
    108 
    109     // Assert that the BF "might" have all of the even numbers.
    110     for (int i = 0; i < numInsertions * 2; i += 2) {
    111       assertTrue(bf.mightContain(Integer.toString(i)));
    112     }
    113 
    114     // Now we check for known false positives using a set of known false positives.
    115     // (These are all of the false positives under 900.)
    116     ImmutableSet<Integer> falsePositives = ImmutableSet.of(
    117         15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857);
    118     for (int i = 1; i < 900; i += 2) {
    119       if (!falsePositives.contains(i)) {
    120         assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
    121       }
    122     }
    123 
    124     // Check that there are exactly 30104 false positives for this BF.
    125     int knownNumberOfFalsePositives = 30104;
    126     int numFpp = 0;
    127     for (int i = 1; i < numInsertions * 2; i += 2) {
    128       if (bf.mightContain(Integer.toString(i))) {
    129         numFpp++;
    130       }
    131     }
    132     assertEquals(knownNumberOfFalsePositives, numFpp);
    133     double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
    134     double expectedFpp = bf.expectedFpp();
    135     // The normal order of (expected, actual) is reversed here on purpose.
    136     assertEquals(actualFpp, expectedFpp, 0.00033);
    137   }
    138 
    139   public void testCreateAndCheckBloomFilterWithKnownUtf8FalsePositives64() {
    140     int numInsertions = 1000000;
    141     BloomFilter<String> bf = BloomFilter.create(
    142         Funnels.stringFunnel(UTF_8), numInsertions, 0.03,
    143         BloomFilterStrategies.MURMUR128_MITZ_64);
    144 
    145     // Insert "numInsertions" even numbers into the BF.
    146     for (int i = 0; i < numInsertions * 2; i += 2) {
    147       bf.put(Integer.toString(i));
    148     }
    149 
    150     // Assert that the BF "might" have all of the even numbers.
    151     for (int i = 0; i < numInsertions * 2; i += 2) {
    152       assertTrue(bf.mightContain(Integer.toString(i)));
    153     }
    154 
    155     // Now we check for known false positives using a set of known false positives.
    156     // (These are all of the false positives under 900.)
    157     ImmutableSet<Integer> falsePositives =
    158         ImmutableSet.of(129, 471, 723, 89, 751, 835, 871);
    159     for (int i = 1; i < 900; i += 2) {
    160       if (!falsePositives.contains(i)) {
    161         assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
    162       }
    163     }
    164 
    165     // Check that there are exactly 29763 false positives for this BF.
    166     int knownNumberOfFalsePositives = 29763;
    167     int numFpp = 0;
    168     for (int i = 1; i < numInsertions * 2; i += 2) {
    169       if (bf.mightContain(Integer.toString(i))) {
    170         numFpp++;
    171       }
    172     }
    173     assertEquals(knownNumberOfFalsePositives, numFpp);
    174     double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
    175     double expectedFpp = bf.expectedFpp();
    176     // The normal order of (expected, actual) is reversed here on purpose.
    177     assertEquals(actualFpp, expectedFpp, 0.00033);
    178   }
    179 
    180   /**
    181    * Sanity checking with many combinations of false positive rates and expected insertions
    182    */
    183   public void testBasic() {
    184     for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) {
    185       for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) {
    186         checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr));
    187       }
    188     }
    189   }
    190 
    191   public void testPreconditions() {
    192     try {
    193       BloomFilter.create(Funnels.unencodedCharsFunnel(), -1);
    194       fail();
    195     } catch (IllegalArgumentException expected) {}
    196     try {
    197       BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03);
    198       fail();
    199     } catch (IllegalArgumentException expected) {}
    200     try {
    201       BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0);
    202       fail();
    203     } catch (IllegalArgumentException expected) {}
    204     try {
    205       BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0);
    206       fail();
    207     } catch (IllegalArgumentException expected) {}
    208   }
    209 
    210   public void testFailureWhenMoreThan255HashFunctionsAreNeeded() {
    211     try {
    212       int n = 1000;
    213       double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001;
    214       BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p);
    215       fail();
    216     } catch (IllegalArgumentException expected) {}
    217   }
    218 
    219   public void testNullPointers() {
    220     NullPointerTester tester = new NullPointerTester();
    221     tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100));
    222     tester.testAllPublicStaticMethods(BloomFilter.class);
    223   }
    224 
    225   /**
    226    * Tests that we never get an optimal hashes number of zero.
    227    */
    228   public void testOptimalHashes() {
    229     for (int n = 1; n < 1000; n++) {
    230       for (int m = 0; m < 1000; m++) {
    231         assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0);
    232       }
    233     }
    234   }
    235 
    236   // https://code.google.com/p/guava-libraries/issues/detail?id=1781
    237   public void testOptimalNumOfHashFunctionsRounding() {
    238     assertEquals(7, BloomFilter.optimalNumOfHashFunctions(319, 3072));
    239   }
    240 
    241   /**
    242    * Tests that we always get a non-negative optimal size.
    243    */
    244   public void testOptimalSize() {
    245     for (int n = 1; n < 1000; n++) {
    246       for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) {
    247         assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0);
    248       }
    249     }
    250 
    251     // some random values
    252     Random random = new Random(0);
    253     for (int repeats = 0; repeats < 10000; repeats++) {
    254       assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0);
    255     }
    256 
    257     // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger
    258     assertEquals(3327428144502L, BloomFilter.optimalNumOfBits(
    259         Integer.MAX_VALUE, Double.MIN_VALUE));
    260     try {
    261       BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE);
    262       fail("we can't represent such a large BF!");
    263     } catch (IllegalArgumentException expected) {
    264       assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage());
    265     }
    266   }
    267 
    268   private void checkSanity(BloomFilter<Object> bf) {
    269     assertFalse(bf.mightContain(new Object()));
    270     assertFalse(bf.apply(new Object()));
    271     for (int i = 0; i < 100; i++) {
    272       Object o = new Object();
    273       bf.put(o);
    274       assertTrue(bf.mightContain(o));
    275       assertTrue(bf.apply(o));
    276     }
    277   }
    278 
    279   public void testCopy() {
    280     BloomFilter<String> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
    281     BloomFilter<String> copy = original.copy();
    282     assertNotSame(original, copy);
    283     assertEquals(original, copy);
    284   }
    285 
    286   public void testExpectedFpp() {
    287     BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03);
    288     double fpp = bf.expectedFpp();
    289     assertEquals(0.0, fpp);
    290     // usually completed in less than 200 iterations
    291     while (fpp != 1.0) {
    292       boolean changed = bf.put(new Object());
    293       double newFpp = bf.expectedFpp();
    294       // if changed, the new fpp is strictly higher, otherwise it is the same
    295       assertTrue(changed ? newFpp > fpp : newFpp == fpp);
    296       fpp = newFpp;
    297     }
    298   }
    299 
    300   public void testBitSize() {
    301     double fpp = 0.03;
    302     for (int i = 1; i < 10000; i++) {
    303       long numBits = BloomFilter.optimalNumOfBits(i, fpp);
    304       int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING));
    305       assertEquals(
    306           arraySize * Long.SIZE,
    307           BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize());
    308     }
    309   }
    310 
    311   public void testEquals_empty() {
    312     new EqualsTester()
    313         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01))
    314         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02))
    315         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01))
    316         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02))
    317         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01))
    318         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02))
    319         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01))
    320         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02))
    321         .testEquals();
    322   }
    323 
    324   public void testEquals() {
    325     BloomFilter<String> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
    326     bf1.put("1");
    327     bf1.put("2");
    328 
    329     BloomFilter<String> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
    330     bf2.put("1");
    331     bf2.put("2");
    332 
    333     new EqualsTester()
    334         .addEqualityGroup(bf1, bf2)
    335         .testEquals();
    336 
    337     bf2.put("3");
    338 
    339     new EqualsTester()
    340         .addEqualityGroup(bf1)
    341         .addEqualityGroup(bf2)
    342         .testEquals();
    343   }
    344 
    345   public void testEqualsWithCustomFunnel() {
    346     BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100);
    347     BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100);
    348     assertEquals(bf1, bf2);
    349   }
    350 
    351   public void testSerializationWithCustomFunnel() {
    352     SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100));
    353   }
    354 
    355   private static final class CustomFunnel implements Funnel<Long> {
    356     @Override
    357     public void funnel(Long value, PrimitiveSink into) {
    358       into.putLong(value);
    359     }
    360     @Override
    361     public boolean equals(@Nullable Object object) {
    362       return (object instanceof CustomFunnel);
    363     }
    364     @Override
    365     public int hashCode() {
    366       return 42;
    367     }
    368   }
    369 
    370   public void testPutReturnValue() {
    371     for (int i = 0; i < 10; i++) {
    372       BloomFilter<String> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
    373       for (int j = 0; j < 10; j++) {
    374         String value = new Object().toString();
    375         boolean mightContain = bf.mightContain(value);
    376         boolean put = bf.put(value);
    377         assertTrue(mightContain != put);
    378       }
    379     }
    380   }
    381 
    382   public void testPutAll() {
    383     int element1 = 1;
    384     int element2 = 2;
    385 
    386     BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100);
    387     bf1.put(element1);
    388     assertTrue(bf1.mightContain(element1));
    389     assertFalse(bf1.mightContain(element2));
    390 
    391     BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100);
    392     bf2.put(element2);
    393     assertFalse(bf2.mightContain(element1));
    394     assertTrue(bf2.mightContain(element2));
    395 
    396     assertTrue(bf1.isCompatible(bf2));
    397     bf1.putAll(bf2);
    398     assertTrue(bf1.mightContain(element1));
    399     assertTrue(bf1.mightContain(element2));
    400     assertFalse(bf2.mightContain(element1));
    401     assertTrue(bf2.mightContain(element2));
    402   }
    403 
    404   public void testPutAllDifferentSizes() {
    405     BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
    406     BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10);
    407 
    408     try {
    409       assertFalse(bf1.isCompatible(bf2));
    410       bf1.putAll(bf2);
    411       fail();
    412     } catch (IllegalArgumentException expected) {
    413     }
    414 
    415     try {
    416       assertFalse(bf2.isCompatible(bf1));
    417       bf2.putAll(bf1);
    418       fail();
    419     } catch (IllegalArgumentException expected) {
    420     }
    421   }
    422 
    423   public void testPutAllWithSelf() {
    424     BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
    425     try {
    426       assertFalse(bf1.isCompatible(bf1));
    427       bf1.putAll(bf1);
    428       fail();
    429     } catch (IllegalArgumentException expected) {
    430     }
    431   }
    432 
    433   public void testJavaSerialization() {
    434     BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100);
    435     for (int i = 0; i < 10; i++) {
    436       bf.put(Ints.toByteArray(i));
    437     }
    438 
    439     BloomFilter<byte[]> copy = SerializableTester.reserialize(bf);
    440     for (int i = 0; i < 10; i++) {
    441       assertTrue(copy.mightContain(Ints.toByteArray(i)));
    442     }
    443     assertEquals(bf.expectedFpp(), copy.expectedFpp());
    444 
    445     SerializableTester.reserializeAndAssert(bf);
    446   }
    447 
    448   public void testCustomSerialization() throws Exception {
    449     Funnel<byte[]> funnel = Funnels.byteArrayFunnel();
    450     BloomFilter<byte[]> bf = BloomFilter.create(funnel, 100);
    451     for (int i = 0; i < 100; i++) {
    452       bf.put(Ints.toByteArray(i));
    453     }
    454 
    455     ByteArrayOutputStream out = new ByteArrayOutputStream();
    456     bf.writeTo(out);
    457 
    458     assertEquals(bf, BloomFilter.readFrom(new ByteArrayInputStream(out.toByteArray()), funnel));
    459   }
    460 
    461   /**
    462    * This test will fail whenever someone updates/reorders the BloomFilterStrategies constants.
    463    * Only appending a new constant is allowed.
    464    */
    465   public void testBloomFilterStrategies() {
    466     assertEquals(2, BloomFilterStrategies.values().length);
    467     assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]);
    468     assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]);
    469   }
    470 }
    471