1 #include <stdlib.h> 2 #include <inttypes.h> 3 4 #include "bloom.h" 5 #include "../hash.h" 6 #include "../minmax.h" 7 #include "../crc/xxhash.h" 8 #include "../crc/murmur3.h" 9 #include "../crc/crc32c.h" 10 #include "../crc/fnv.h" 11 12 struct bloom { 13 uint64_t nentries; 14 15 uint32_t *map; 16 }; 17 18 #define BITS_PER_INDEX (sizeof(uint32_t) * 8) 19 #define BITS_INDEX_MASK (BITS_PER_INDEX - 1) 20 21 struct bloom_hash { 22 unsigned int seed; 23 uint32_t (*fn)(const void *, uint32_t, uint32_t); 24 }; 25 26 static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed) 27 { 28 return fio_crc32c(buf, len); 29 } 30 31 static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed) 32 { 33 return fnv(buf, len, seed); 34 } 35 36 #define BLOOM_SEED 0x8989 37 38 struct bloom_hash hashes[] = { 39 { 40 .seed = BLOOM_SEED, 41 .fn = jhash, 42 }, 43 { 44 .seed = BLOOM_SEED, 45 .fn = XXH32, 46 }, 47 { 48 .seed = BLOOM_SEED, 49 .fn = murmurhash3, 50 }, 51 { 52 .seed = BLOOM_SEED, 53 .fn = bloom_crc32c, 54 }, 55 { 56 .seed = BLOOM_SEED, 57 .fn = bloom_fnv, 58 }, 59 }; 60 61 #define N_HASHES 5 62 63 #define MIN_ENTRIES 1073741824UL 64 65 struct bloom *bloom_new(uint64_t entries) 66 { 67 struct bloom *b; 68 size_t no_uints; 69 70 crc32c_intel_probe(); 71 72 b = malloc(sizeof(*b)); 73 b->nentries = entries; 74 no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; 75 no_uints = max((unsigned long) no_uints, MIN_ENTRIES); 76 b->map = calloc(no_uints, sizeof(uint32_t)); 77 if (!b->map) { 78 free(b); 79 return NULL; 80 } 81 82 return b; 83 } 84 85 void bloom_free(struct bloom *b) 86 { 87 free(b->map); 88 free(b); 89 } 90 91 static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords, 92 int set) 93 { 94 uint32_t hash[N_HASHES]; 95 int i, was_set; 96 97 for (i = 0; i < N_HASHES; i++) { 98 hash[i] = hashes[i].fn(data, nwords, hashes[i].seed); 99 hash[i] = hash[i] % b->nentries; 100 } 101 102 was_set = 0; 103 for (i = 0; i < N_HASHES; i++) { 104 const unsigned int index = hash[i] / BITS_PER_INDEX; 105 const unsigned int bit = hash[i] & BITS_INDEX_MASK; 106 107 if (b->map[index] & (1U << bit)) 108 was_set++; 109 if (set) 110 b->map[index] |= 1U << bit; 111 } 112 113 return was_set == N_HASHES; 114 } 115 116 int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords) 117 { 118 return __bloom_check(b, data, nwords, 1); 119 } 120