1 /* 2 ------------------------------------------------------------------------------ 3 perfect.h: code to generate code for a hash for perfect hashing. 4 (c) Bob Jenkins, September 1996 5 You may use this code in any way you wish, and it is free. No warranty. 6 I hereby place this in the public domain. 7 Source is http://burtleburtle.net/bob/c/perfect.h 8 ------------------------------------------------------------------------------ 9 */ 10 11 #ifndef STANDARD 12 #include "standard.h" 13 #endif 14 15 #ifndef PERFECT 16 #define PERFECT 17 18 #define MAXKEYLEN 30 /* maximum length of a key */ 19 #define USE_SCRAMBLE 4096 /* use scramble if blen >= USE_SCRAMBLE */ 20 #define SCRAMBLE_LEN ((ub4)1<<16) /* length of *scramble* */ 21 #define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */ 22 #define RETRY_PERFECT 1 /* number of times to try to make a perfect hash */ 23 #define RETRY_HEX 200 /* RETRY_PERFECT when hex keys given */ 24 25 /* the generated code for the final hash, assumes initial hash is done */ 26 struct gencode 27 { 28 char **line; /* array of text lines, 80 bytes apiece */ 29 /* 30 * The code placed here must declare "ub4 rsl" 31 * and assign it the value of the perfect hash using the function inputs. 32 * Later code will be tacked on which returns rsl or manipulates it according 33 * to the user directives. 34 * 35 * This code is at the top of the routine; it may and must declare any 36 * local variables it needs. 37 * 38 * Each way of filling in **line should be given a comment that is a unique 39 * tag. A testcase named with that tag should also be found which tests 40 * the generated code. 41 */ 42 ub4 len; /* number of lines available for final hash */ 43 ub4 used; /* number of lines used by final hash */ 44 45 ub4 lowbit; /* for HEX, lowest interesting bit */ 46 ub4 highbit; /* for HEX, highest interesting bit */ 47 ub4 diffbits; /* bits which differ for some key */ 48 ub4 i,j,k,l,m,n,o; /* state machine used in hexn() */ 49 }; 50 typedef struct gencode gencode; 51 52 /* user directives: perfect hash? minimal perfect hash? input is an int? */ 53 struct hashform 54 { 55 enum { 56 NORMAL_HM, /* key is a string */ 57 INLINE_HM, /* user will do initial hash, we must choose salt for them */ 58 HEX_HM, /* key to be hashed is a hexidecimal 4-byte integer */ 59 DECIMAL_HM, /* key to be hashed is a decimal 4-byte integer */ 60 AB_HM, /* key to be hashed is "A B", where A and B are (A,B) in hex */ 61 ABDEC_HM /* like AB_HM, but in decimal */ 62 } mode; 63 enum { 64 STRING_HT, /* key is a string */ 65 INT_HT, /* key is an integer */ 66 AB_HT /* dunno what key is, but input is distinct (A,B) pair */ 67 } hashtype; 68 enum { 69 NORMAL_HP, /* just find a perfect hash */ 70 MINIMAL_HP /* find a minimal perfect hash */ 71 } perfect; 72 enum { 73 FAST_HS, /* fast mode */ 74 SLOW_HS /* slow mode */ 75 } speed; 76 }; 77 typedef struct hashform hashform; 78 79 /* representation of a key */ 80 struct key 81 { 82 char *name_k; /* the actual key */ 83 ub4 len_k; /* the length of the actual key */ 84 ub4 hash_k; /* the initial hash value for this key */ 85 struct key *next_k; /* next key */ 86 /* beyond this point is mapping-dependent */ 87 ub4 a_k; /* a, of the key maps to (a,b) */ 88 ub4 b_k; /* b, of the key maps to (a,b) */ 89 struct key *nextb_k; /* next key with this b */ 90 }; 91 typedef struct key key; 92 93 /* things indexed by b of original (a,b) pair */ 94 struct bstuff 95 { 96 ub2 val_b; /* hash=a^tabb[b].val_b */ 97 key *list_b; /* tabb[i].list_b is list of keys with b==i */ 98 ub4 listlen_b; /* length of list_b */ 99 ub4 water_b; /* high watermark of who has visited this map node */ 100 }; 101 typedef struct bstuff bstuff; 102 103 /* things indexed by final hash value */ 104 struct hstuff 105 { 106 key *key_h; /* tabh[i].key_h is the key with a hash of i */ 107 }; 108 typedef struct hstuff hstuff; 109 110 /* things indexed by queue position */ 111 struct qstuff 112 { 113 bstuff *b_q; /* b that currently occupies this hash */ 114 ub4 parent_q; /* queue position of parent that could use this hash */ 115 ub2 newval_q; /* what to change parent tab[b] to to use this hash */ 116 ub2 oldval_q; /* original value of tab[b] */ 117 }; 118 typedef struct qstuff qstuff; 119 120 /* return ceiling(log based 2 of x) */ 121 ub4 phash_log2(ub4 x); 122 123 /* Given the keys, scramble[], and hash mode, find the perfect hash */ 124 void findhash(bstuff **tabb, hstuff **tabh, ub4 *alen, ub4 *blen, ub4 *salt, 125 gencode *final, ub4 *scramble, ub4 *smax, key *keys, ub4 nkeys, 126 hashform *form); 127 128 /* private, but in a different file because it's excessively verbose */ 129 int inithex(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub4 salt, 130 gencode *final, hashform *form); 131 132 #endif /* PERFECT */ 133