Home | History | Annotate | Download | only in lib
      1 #include <stdlib.h>
      2 #include <inttypes.h>
      3 
      4 #include "bloom.h"
      5 #include "../hash.h"
      6 #include "../minmax.h"
      7 #include "../crc/xxhash.h"
      8 #include "../crc/murmur3.h"
      9 #include "../crc/crc32c.h"
     10 #include "../crc/fnv.h"
     11 
     12 struct bloom {
     13 	uint64_t nentries;
     14 
     15 	uint32_t *map;
     16 };
     17 
     18 #define BITS_PER_INDEX	(sizeof(uint32_t) * 8)
     19 #define BITS_INDEX_MASK	(BITS_PER_INDEX - 1)
     20 
     21 struct bloom_hash {
     22 	unsigned int seed;
     23 	uint32_t (*fn)(const void *, uint32_t, uint32_t);
     24 };
     25 
     26 static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed)
     27 {
     28 	return fio_crc32c(buf, len);
     29 }
     30 
     31 static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
     32 {
     33 	return fnv(buf, len, seed);
     34 }
     35 
     36 #define BLOOM_SEED	0x8989
     37 
     38 static struct bloom_hash hashes[] = {
     39 	{
     40 		.seed = BLOOM_SEED,
     41 		.fn = jhash,
     42 	},
     43 	{
     44 		.seed = BLOOM_SEED,
     45 		.fn = XXH32,
     46 	},
     47 	{
     48 		.seed = BLOOM_SEED,
     49 		.fn = murmurhash3,
     50 	},
     51 	{
     52 		.seed = BLOOM_SEED,
     53 		.fn = bloom_crc32c,
     54 	},
     55 	{
     56 		.seed = BLOOM_SEED,
     57 		.fn = bloom_fnv,
     58 	},
     59 };
     60 
     61 #define N_HASHES	5
     62 
     63 struct bloom *bloom_new(uint64_t entries)
     64 {
     65 	struct bloom *b;
     66 	size_t no_uints;
     67 
     68 	crc32c_arm64_probe();
     69 	crc32c_intel_probe();
     70 
     71 	b = malloc(sizeof(*b));
     72 	b->nentries = entries;
     73 	no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
     74 	b->map = calloc(no_uints, sizeof(uint32_t));
     75 	if (!b->map) {
     76 		free(b);
     77 		return NULL;
     78 	}
     79 
     80 	return b;
     81 }
     82 
     83 void bloom_free(struct bloom *b)
     84 {
     85 	free(b->map);
     86 	free(b);
     87 }
     88 
     89 static bool __bloom_check(struct bloom *b, const void *data, unsigned int len,
     90 			  bool set)
     91 {
     92 	uint32_t hash[N_HASHES];
     93 	int i, was_set;
     94 
     95 	for (i = 0; i < N_HASHES; i++) {
     96 		hash[i] = hashes[i].fn(data, len, hashes[i].seed);
     97 		hash[i] = hash[i] % b->nentries;
     98 	}
     99 
    100 	was_set = 0;
    101 	for (i = 0; i < N_HASHES; i++) {
    102 		const unsigned int index = hash[i] / BITS_PER_INDEX;
    103 		const unsigned int bit = hash[i] & BITS_INDEX_MASK;
    104 
    105 		if (b->map[index] & (1U << bit))
    106 			was_set++;
    107 		else if (set)
    108 			b->map[index] |= 1U << bit;
    109 		else
    110 			break;
    111 	}
    112 
    113 	return was_set == N_HASHES;
    114 }
    115 
    116 bool bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
    117 {
    118 	return __bloom_check(b, data, nwords * sizeof(uint32_t), true);
    119 }
    120 
    121 bool bloom_string(struct bloom *b, const char *data, unsigned int len,
    122 		  bool set)
    123 {
    124 	return __bloom_check(b, data, len, set);
    125 }
    126