1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.hash; 18 19 import static com.google.common.base.Charsets.UTF_8; 20 import static com.google.common.hash.BloomFilterStrategies.BitArray; 21 22 import com.google.common.collect.ImmutableSet; 23 import com.google.common.math.LongMath; 24 import com.google.common.primitives.Ints; 25 import com.google.common.testing.EqualsTester; 26 import com.google.common.testing.NullPointerTester; 27 import com.google.common.testing.SerializableTester; 28 29 import junit.framework.TestCase; 30 31 import java.io.ByteArrayInputStream; 32 import java.io.ByteArrayOutputStream; 33 import java.math.RoundingMode; 34 import java.util.Random; 35 36 import javax.annotation.Nullable; 37 38 /** 39 * Tests for SimpleGenericBloomFilter and derived BloomFilter views. 40 * 41 * @author Dimitris Andreou 42 */ 43 public class BloomFilterTest extends TestCase { 44 public void testLargeBloomFilterDoesntOverflow() { 45 long numBits = Integer.MAX_VALUE; 46 numBits++; 47 48 BitArray bitArray = new BitArray(numBits); 49 assertTrue( 50 "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(), 51 bitArray.bitSize() > 0); 52 53 // Ideally we would also test the bitSize() overflow of this BF, but it runs out of heap space 54 // BloomFilter.create(Funnels.unencodedCharsFunnel(), 244412641, 1e-11); 55 } 56 57 public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() { 58 int numInsertions = 1000000; 59 BloomFilter<String> bf = BloomFilter.create( 60 Funnels.unencodedCharsFunnel(), numInsertions, 0.03, 61 BloomFilterStrategies.MURMUR128_MITZ_32); 62 63 // Insert "numInsertions" even numbers into the BF. 64 for (int i = 0; i < numInsertions * 2; i += 2) { 65 bf.put(Integer.toString(i)); 66 } 67 68 // Assert that the BF "might" have all of the even numbers. 69 for (int i = 0; i < numInsertions * 2; i += 2) { 70 assertTrue(bf.mightContain(Integer.toString(i))); 71 } 72 73 // Now we check for known false positives using a set of known false positives. 74 // (These are all of the false positives under 900.) 75 ImmutableSet<Integer> falsePositives = ImmutableSet.of( 76 49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781); 77 for (int i = 1; i < 900; i += 2) { 78 if (!falsePositives.contains(i)) { 79 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); 80 } 81 } 82 83 // Check that there are exactly 29824 false positives for this BF. 84 int knownNumberOfFalsePositives = 29824; 85 int numFpp = 0; 86 for (int i = 1; i < numInsertions * 2; i += 2) { 87 if (bf.mightContain(Integer.toString(i))) { 88 numFpp++; 89 } 90 } 91 assertEquals(knownNumberOfFalsePositives, numFpp); 92 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; 93 double expectedFpp = bf.expectedFpp(); 94 // The normal order of (expected, actual) is reversed here on purpose. 95 assertEquals(actualFpp, expectedFpp, 0.00015); 96 } 97 98 public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() { 99 int numInsertions = 1000000; 100 BloomFilter<String> bf = BloomFilter.create( 101 Funnels.unencodedCharsFunnel(), numInsertions, 0.03, 102 BloomFilterStrategies.MURMUR128_MITZ_64); 103 104 // Insert "numInsertions" even numbers into the BF. 105 for (int i = 0; i < numInsertions * 2; i += 2) { 106 bf.put(Integer.toString(i)); 107 } 108 109 // Assert that the BF "might" have all of the even numbers. 110 for (int i = 0; i < numInsertions * 2; i += 2) { 111 assertTrue(bf.mightContain(Integer.toString(i))); 112 } 113 114 // Now we check for known false positives using a set of known false positives. 115 // (These are all of the false positives under 900.) 116 ImmutableSet<Integer> falsePositives = ImmutableSet.of( 117 15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857); 118 for (int i = 1; i < 900; i += 2) { 119 if (!falsePositives.contains(i)) { 120 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); 121 } 122 } 123 124 // Check that there are exactly 30104 false positives for this BF. 125 int knownNumberOfFalsePositives = 30104; 126 int numFpp = 0; 127 for (int i = 1; i < numInsertions * 2; i += 2) { 128 if (bf.mightContain(Integer.toString(i))) { 129 numFpp++; 130 } 131 } 132 assertEquals(knownNumberOfFalsePositives, numFpp); 133 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; 134 double expectedFpp = bf.expectedFpp(); 135 // The normal order of (expected, actual) is reversed here on purpose. 136 assertEquals(actualFpp, expectedFpp, 0.00033); 137 } 138 139 public void testCreateAndCheckBloomFilterWithKnownUtf8FalsePositives64() { 140 int numInsertions = 1000000; 141 BloomFilter<String> bf = BloomFilter.create( 142 Funnels.stringFunnel(UTF_8), numInsertions, 0.03, 143 BloomFilterStrategies.MURMUR128_MITZ_64); 144 145 // Insert "numInsertions" even numbers into the BF. 146 for (int i = 0; i < numInsertions * 2; i += 2) { 147 bf.put(Integer.toString(i)); 148 } 149 150 // Assert that the BF "might" have all of the even numbers. 151 for (int i = 0; i < numInsertions * 2; i += 2) { 152 assertTrue(bf.mightContain(Integer.toString(i))); 153 } 154 155 // Now we check for known false positives using a set of known false positives. 156 // (These are all of the false positives under 900.) 157 ImmutableSet<Integer> falsePositives = 158 ImmutableSet.of(129, 471, 723, 89, 751, 835, 871); 159 for (int i = 1; i < 900; i += 2) { 160 if (!falsePositives.contains(i)) { 161 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); 162 } 163 } 164 165 // Check that there are exactly 29763 false positives for this BF. 166 int knownNumberOfFalsePositives = 29763; 167 int numFpp = 0; 168 for (int i = 1; i < numInsertions * 2; i += 2) { 169 if (bf.mightContain(Integer.toString(i))) { 170 numFpp++; 171 } 172 } 173 assertEquals(knownNumberOfFalsePositives, numFpp); 174 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; 175 double expectedFpp = bf.expectedFpp(); 176 // The normal order of (expected, actual) is reversed here on purpose. 177 assertEquals(actualFpp, expectedFpp, 0.00033); 178 } 179 180 /** 181 * Sanity checking with many combinations of false positive rates and expected insertions 182 */ 183 public void testBasic() { 184 for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) { 185 for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) { 186 checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr)); 187 } 188 } 189 } 190 191 public void testPreconditions() { 192 try { 193 BloomFilter.create(Funnels.unencodedCharsFunnel(), -1); 194 fail(); 195 } catch (IllegalArgumentException expected) {} 196 try { 197 BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03); 198 fail(); 199 } catch (IllegalArgumentException expected) {} 200 try { 201 BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0); 202 fail(); 203 } catch (IllegalArgumentException expected) {} 204 try { 205 BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0); 206 fail(); 207 } catch (IllegalArgumentException expected) {} 208 } 209 210 public void testFailureWhenMoreThan255HashFunctionsAreNeeded() { 211 try { 212 int n = 1000; 213 double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001; 214 BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p); 215 fail(); 216 } catch (IllegalArgumentException expected) {} 217 } 218 219 public void testNullPointers() { 220 NullPointerTester tester = new NullPointerTester(); 221 tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100)); 222 tester.testAllPublicStaticMethods(BloomFilter.class); 223 } 224 225 /** 226 * Tests that we never get an optimal hashes number of zero. 227 */ 228 public void testOptimalHashes() { 229 for (int n = 1; n < 1000; n++) { 230 for (int m = 0; m < 1000; m++) { 231 assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0); 232 } 233 } 234 } 235 236 // https://code.google.com/p/guava-libraries/issues/detail?id=1781 237 public void testOptimalNumOfHashFunctionsRounding() { 238 assertEquals(7, BloomFilter.optimalNumOfHashFunctions(319, 3072)); 239 } 240 241 /** 242 * Tests that we always get a non-negative optimal size. 243 */ 244 public void testOptimalSize() { 245 for (int n = 1; n < 1000; n++) { 246 for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) { 247 assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0); 248 } 249 } 250 251 // some random values 252 Random random = new Random(0); 253 for (int repeats = 0; repeats < 10000; repeats++) { 254 assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0); 255 } 256 257 // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger 258 assertEquals(3327428144502L, BloomFilter.optimalNumOfBits( 259 Integer.MAX_VALUE, Double.MIN_VALUE)); 260 try { 261 BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE); 262 fail("we can't represent such a large BF!"); 263 } catch (IllegalArgumentException expected) { 264 assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage()); 265 } 266 } 267 268 private void checkSanity(BloomFilter<Object> bf) { 269 assertFalse(bf.mightContain(new Object())); 270 assertFalse(bf.apply(new Object())); 271 for (int i = 0; i < 100; i++) { 272 Object o = new Object(); 273 bf.put(o); 274 assertTrue(bf.mightContain(o)); 275 assertTrue(bf.apply(o)); 276 } 277 } 278 279 public void testCopy() { 280 BloomFilter<String> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 281 BloomFilter<String> copy = original.copy(); 282 assertNotSame(original, copy); 283 assertEquals(original, copy); 284 } 285 286 public void testExpectedFpp() { 287 BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03); 288 double fpp = bf.expectedFpp(); 289 assertEquals(0.0, fpp); 290 // usually completed in less than 200 iterations 291 while (fpp != 1.0) { 292 boolean changed = bf.put(new Object()); 293 double newFpp = bf.expectedFpp(); 294 // if changed, the new fpp is strictly higher, otherwise it is the same 295 assertTrue(changed ? newFpp > fpp : newFpp == fpp); 296 fpp = newFpp; 297 } 298 } 299 300 public void testBitSize() { 301 double fpp = 0.03; 302 for (int i = 1; i < 10000; i++) { 303 long numBits = BloomFilter.optimalNumOfBits(i, fpp); 304 int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING)); 305 assertEquals( 306 arraySize * Long.SIZE, 307 BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize()); 308 } 309 } 310 311 public void testEquals_empty() { 312 new EqualsTester() 313 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01)) 314 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02)) 315 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01)) 316 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02)) 317 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01)) 318 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02)) 319 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01)) 320 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02)) 321 .testEquals(); 322 } 323 324 public void testEquals() { 325 BloomFilter<String> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 326 bf1.put("1"); 327 bf1.put("2"); 328 329 BloomFilter<String> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 330 bf2.put("1"); 331 bf2.put("2"); 332 333 new EqualsTester() 334 .addEqualityGroup(bf1, bf2) 335 .testEquals(); 336 337 bf2.put("3"); 338 339 new EqualsTester() 340 .addEqualityGroup(bf1) 341 .addEqualityGroup(bf2) 342 .testEquals(); 343 } 344 345 public void testEqualsWithCustomFunnel() { 346 BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100); 347 BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100); 348 assertEquals(bf1, bf2); 349 } 350 351 public void testSerializationWithCustomFunnel() { 352 SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100)); 353 } 354 355 private static final class CustomFunnel implements Funnel<Long> { 356 @Override 357 public void funnel(Long value, PrimitiveSink into) { 358 into.putLong(value); 359 } 360 @Override 361 public boolean equals(@Nullable Object object) { 362 return (object instanceof CustomFunnel); 363 } 364 @Override 365 public int hashCode() { 366 return 42; 367 } 368 } 369 370 public void testPutReturnValue() { 371 for (int i = 0; i < 10; i++) { 372 BloomFilter<String> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 373 for (int j = 0; j < 10; j++) { 374 String value = new Object().toString(); 375 boolean mightContain = bf.mightContain(value); 376 boolean put = bf.put(value); 377 assertTrue(mightContain != put); 378 } 379 } 380 } 381 382 public void testPutAll() { 383 int element1 = 1; 384 int element2 = 2; 385 386 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100); 387 bf1.put(element1); 388 assertTrue(bf1.mightContain(element1)); 389 assertFalse(bf1.mightContain(element2)); 390 391 BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100); 392 bf2.put(element2); 393 assertFalse(bf2.mightContain(element1)); 394 assertTrue(bf2.mightContain(element2)); 395 396 assertTrue(bf1.isCompatible(bf2)); 397 bf1.putAll(bf2); 398 assertTrue(bf1.mightContain(element1)); 399 assertTrue(bf1.mightContain(element2)); 400 assertFalse(bf2.mightContain(element1)); 401 assertTrue(bf2.mightContain(element2)); 402 } 403 404 public void testPutAllDifferentSizes() { 405 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1); 406 BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10); 407 408 try { 409 assertFalse(bf1.isCompatible(bf2)); 410 bf1.putAll(bf2); 411 fail(); 412 } catch (IllegalArgumentException expected) { 413 } 414 415 try { 416 assertFalse(bf2.isCompatible(bf1)); 417 bf2.putAll(bf1); 418 fail(); 419 } catch (IllegalArgumentException expected) { 420 } 421 } 422 423 public void testPutAllWithSelf() { 424 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1); 425 try { 426 assertFalse(bf1.isCompatible(bf1)); 427 bf1.putAll(bf1); 428 fail(); 429 } catch (IllegalArgumentException expected) { 430 } 431 } 432 433 public void testJavaSerialization() { 434 BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100); 435 for (int i = 0; i < 10; i++) { 436 bf.put(Ints.toByteArray(i)); 437 } 438 439 BloomFilter<byte[]> copy = SerializableTester.reserialize(bf); 440 for (int i = 0; i < 10; i++) { 441 assertTrue(copy.mightContain(Ints.toByteArray(i))); 442 } 443 assertEquals(bf.expectedFpp(), copy.expectedFpp()); 444 445 SerializableTester.reserializeAndAssert(bf); 446 } 447 448 public void testCustomSerialization() throws Exception { 449 Funnel<byte[]> funnel = Funnels.byteArrayFunnel(); 450 BloomFilter<byte[]> bf = BloomFilter.create(funnel, 100); 451 for (int i = 0; i < 100; i++) { 452 bf.put(Ints.toByteArray(i)); 453 } 454 455 ByteArrayOutputStream out = new ByteArrayOutputStream(); 456 bf.writeTo(out); 457 458 assertEquals(bf, BloomFilter.readFrom(new ByteArrayInputStream(out.toByteArray()), funnel)); 459 } 460 461 /** 462 * This test will fail whenever someone updates/reorders the BloomFilterStrategies constants. 463 * Only appending a new constant is allowed. 464 */ 465 public void testBloomFilterStrategies() { 466 assertEquals(2, BloomFilterStrategies.values().length); 467 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]); 468 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]); 469 } 470 } 471