1 #include <stdlib.h> 2 #include <inttypes.h> 3 4 #include "bloom.h" 5 #include "../hash.h" 6 #include "../minmax.h" 7 #include "../crc/xxhash.h" 8 #include "../crc/murmur3.h" 9 #include "../crc/crc32c.h" 10 #include "../crc/fnv.h" 11 12 struct bloom { 13 uint64_t nentries; 14 15 uint32_t *map; 16 }; 17 18 #define BITS_PER_INDEX (sizeof(uint32_t) * 8) 19 #define BITS_INDEX_MASK (BITS_PER_INDEX - 1) 20 21 struct bloom_hash { 22 unsigned int seed; 23 uint32_t (*fn)(const void *, uint32_t, uint32_t); 24 }; 25 26 static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed) 27 { 28 return fio_crc32c(buf, len); 29 } 30 31 static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed) 32 { 33 return fnv(buf, len, seed); 34 } 35 36 #define BLOOM_SEED 0x8989 37 38 static struct bloom_hash hashes[] = { 39 { 40 .seed = BLOOM_SEED, 41 .fn = jhash, 42 }, 43 { 44 .seed = BLOOM_SEED, 45 .fn = XXH32, 46 }, 47 { 48 .seed = BLOOM_SEED, 49 .fn = murmurhash3, 50 }, 51 { 52 .seed = BLOOM_SEED, 53 .fn = bloom_crc32c, 54 }, 55 { 56 .seed = BLOOM_SEED, 57 .fn = bloom_fnv, 58 }, 59 }; 60 61 #define N_HASHES 5 62 63 struct bloom *bloom_new(uint64_t entries) 64 { 65 struct bloom *b; 66 size_t no_uints; 67 68 crc32c_arm64_probe(); 69 crc32c_intel_probe(); 70 71 b = malloc(sizeof(*b)); 72 b->nentries = entries; 73 no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; 74 b->map = calloc(no_uints, sizeof(uint32_t)); 75 if (!b->map) { 76 free(b); 77 return NULL; 78 } 79 80 return b; 81 } 82 83 void bloom_free(struct bloom *b) 84 { 85 free(b->map); 86 free(b); 87 } 88 89 static bool __bloom_check(struct bloom *b, const void *data, unsigned int len, 90 bool set) 91 { 92 uint32_t hash[N_HASHES]; 93 int i, was_set; 94 95 for (i = 0; i < N_HASHES; i++) { 96 hash[i] = hashes[i].fn(data, len, hashes[i].seed); 97 hash[i] = hash[i] % b->nentries; 98 } 99 100 was_set = 0; 101 for (i = 0; i < N_HASHES; i++) { 102 const unsigned int index = hash[i] / BITS_PER_INDEX; 103 const unsigned int bit = hash[i] & BITS_INDEX_MASK; 104 105 if (b->map[index] & (1U << bit)) 106 was_set++; 107 else if (set) 108 b->map[index] |= 1U << bit; 109 else 110 break; 111 } 112 113 return was_set == N_HASHES; 114 } 115 116 bool bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords) 117 { 118 return __bloom_check(b, data, nwords * sizeof(uint32_t), true); 119 } 120 121 bool bloom_string(struct bloom *b, const char *data, unsigned int len, 122 bool set) 123 { 124 return __bloom_check(b, data, len, set); 125 } 126