Home | History | Annotate | Download | only in libyasm
      1 /*
      2  * Hash Array Mapped Trie (HAMT) implementation
      3  *
      4  *  Copyright (C) 2001-2007  Peter Johnson
      5  *
      6  *  Based on the paper "Ideal Hash Tries" by Phil Bagwell [2000].
      7  *  One algorithmic change from that described in the paper: we use the LSB's
      8  *  of the key to index the root table and move upward in the key rather than
      9  *  use the MSBs as described in the paper.  The LSBs have more entropy.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
     21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
     24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 #include "util.h"
     33 
     34 #include <ctype.h>
     35 
     36 #include "libyasm-stdint.h"
     37 #include "coretype.h"
     38 #include "hamt.h"
     39 
     40 struct HAMTEntry {
     41     STAILQ_ENTRY(HAMTEntry) next;       /* next hash table entry */
     42     /*@dependent@*/ const char *str;    /* string being hashed */
     43     /*@owned@*/ void *data;             /* data pointer being stored */
     44 };
     45 
     46 typedef struct HAMTNode {
     47     unsigned long BitMapKey;            /* 32 bits, bitmap or hash key */
     48     uintptr_t BaseValue;                /* Base of HAMTNode list or value */
     49 } HAMTNode;
     50 
     51 struct HAMT {
     52     STAILQ_HEAD(HAMTEntryHead, HAMTEntry) entries;
     53     HAMTNode *root;
     54     /*@exits@*/ void (*error_func) (const char *file, unsigned int line,
     55                                     const char *message);
     56     unsigned long (*HashKey) (const char *key);
     57     unsigned long (*ReHashKey) (const char *key, int Level);
     58     int (*CmpKey) (const char *s1, const char *s2);
     59 };
     60 
     61 /* XXX make a portable version of this.  This depends on the pointer being
     62  * 4 or 2-byte aligned (as it uses the LSB of the pointer variable to store
     63  * the subtrie flag!
     64  */
     65 #define IsSubTrie(n)            ((n)->BaseValue & 1)
     66 #define SetSubTrie(h, n, v)     do {                            \
     67         if ((uintptr_t)(v) & 1)                                 \
     68             h->error_func(__FILE__, __LINE__,                   \
     69                           N_("Subtrie is seen as subtrie before flag is set (misaligned?)"));   \
     70         (n)->BaseValue = (uintptr_t)(v) | 1;    \
     71     } while (0)
     72 #define SetValue(h, n, v)       do {                            \
     73         if ((uintptr_t)(v) & 1)                                 \
     74             h->error_func(__FILE__, __LINE__,                   \
     75                           N_("Value is seen as subtrie (misaligned?)")); \
     76         (n)->BaseValue = (uintptr_t)(v);        \
     77     } while (0)
     78 #define GetSubTrie(n)           (HAMTNode *)(((n)->BaseValue | 1) ^ 1)
     79 
     80 static unsigned long
     81 HashKey(const char *key)
     82 {
     83     unsigned long a=31415, b=27183, vHash;
     84     for (vHash=0; *key; key++, a*=b)
     85         vHash = a*vHash + *key;
     86     return vHash;
     87 }
     88 
     89 static unsigned long
     90 ReHashKey(const char *key, int Level)
     91 {
     92     unsigned long a=31415, b=27183, vHash;
     93     for (vHash=0; *key; key++, a*=b)
     94         vHash = a*vHash*(unsigned long)Level + *key;
     95     return vHash;
     96 }
     97 
     98 static unsigned long
     99 HashKey_nocase(const char *key)
    100 {
    101     unsigned long a=31415, b=27183, vHash;
    102     for (vHash=0; *key; key++, a*=b)
    103         vHash = a*vHash + tolower(*key);
    104     return vHash;
    105 }
    106 
    107 static unsigned long
    108 ReHashKey_nocase(const char *key, int Level)
    109 {
    110     unsigned long a=31415, b=27183, vHash;
    111     for (vHash=0; *key; key++, a*=b)
    112         vHash = a*vHash*(unsigned long)Level + tolower(*key);
    113     return vHash;
    114 }
    115 
    116 HAMT *
    117 HAMT_create(int nocase, /*@exits@*/ void (*error_func)
    118     (const char *file, unsigned int line, const char *message))
    119 {
    120     /*@out@*/ HAMT *hamt = yasm_xmalloc(sizeof(HAMT));
    121     int i;
    122 
    123     STAILQ_INIT(&hamt->entries);
    124     hamt->root = yasm_xmalloc(32*sizeof(HAMTNode));
    125 
    126     for (i=0; i<32; i++) {
    127         hamt->root[i].BitMapKey = 0;
    128         hamt->root[i].BaseValue = 0;
    129     }
    130 
    131     hamt->error_func = error_func;
    132     if (nocase) {
    133         hamt->HashKey = HashKey_nocase;
    134         hamt->ReHashKey = ReHashKey_nocase;
    135         hamt->CmpKey = yasm__strcasecmp;
    136     } else {
    137         hamt->HashKey = HashKey;
    138         hamt->ReHashKey = ReHashKey;
    139         hamt->CmpKey = strcmp;
    140     }
    141 
    142     return hamt;
    143 }
    144 
    145 static void
    146 HAMT_delete_trie(HAMTNode *node)
    147 {
    148     if (IsSubTrie(node)) {
    149         unsigned long i, Size;
    150 
    151         /* Count total number of bits in bitmap to determine size */
    152         BitCount(Size, node->BitMapKey);
    153         Size &= 0x1F;
    154         if (Size == 0)
    155             Size = 32;
    156 
    157         for (i=0; i<Size; i++)
    158             HAMT_delete_trie(&(GetSubTrie(node))[i]);
    159         yasm_xfree(GetSubTrie(node));
    160     }
    161 }
    162 
    163 void
    164 HAMT_destroy(HAMT *hamt, void (*deletefunc) (/*@only@*/ void *data))
    165 {
    166     int i;
    167 
    168     /* delete entries */
    169     while (!STAILQ_EMPTY(&hamt->entries)) {
    170         HAMTEntry *entry;
    171         entry = STAILQ_FIRST(&hamt->entries);
    172         STAILQ_REMOVE_HEAD(&hamt->entries, next);
    173         deletefunc(entry->data);
    174         yasm_xfree(entry);
    175     }
    176 
    177     /* delete trie */
    178     for (i=0; i<32; i++)
    179         HAMT_delete_trie(&hamt->root[i]);
    180 
    181     yasm_xfree(hamt->root);
    182     yasm_xfree(hamt);
    183 }
    184 
    185 int
    186 HAMT_traverse(HAMT *hamt, void *d,
    187               int (*func) (/*@dependent@*/ /*@null@*/ void *node,
    188                             /*@null@*/ void *d))
    189 {
    190     HAMTEntry *entry;
    191     STAILQ_FOREACH(entry, &hamt->entries, next) {
    192         int retval = func(entry->data, d);
    193         if (retval != 0)
    194             return retval;
    195     }
    196     return 0;
    197 }
    198 
    199 const HAMTEntry *
    200 HAMT_first(const HAMT *hamt)
    201 {
    202     return STAILQ_FIRST(&hamt->entries);
    203 }
    204 
    205 const HAMTEntry *
    206 HAMT_next(const HAMTEntry *prev)
    207 {
    208     return STAILQ_NEXT(prev, next);
    209 }
    210 
    211 void *
    212 HAMTEntry_get_data(const HAMTEntry *entry)
    213 {
    214     return entry->data;
    215 }
    216 
    217 /*@-temptrans -kepttrans -mustfree@*/
    218 void *
    219 HAMT_insert(HAMT *hamt, const char *str, void *data, int *replace,
    220             void (*deletefunc) (/*@only@*/ void *data))
    221 {
    222     HAMTNode *node, *newnodes;
    223     HAMTEntry *entry;
    224     unsigned long key, keypart, Map;
    225     int keypartbits = 0;
    226     int level = 0;
    227 
    228     key = hamt->HashKey(str);
    229     keypart = key & 0x1F;
    230     node = &hamt->root[keypart];
    231 
    232     if (!node->BaseValue) {
    233         node->BitMapKey = key;
    234         entry = yasm_xmalloc(sizeof(HAMTEntry));
    235         entry->str = str;
    236         entry->data = data;
    237         STAILQ_INSERT_TAIL(&hamt->entries, entry, next);
    238         SetValue(hamt, node, entry);
    239         if (IsSubTrie(node))
    240             hamt->error_func(__FILE__, __LINE__,
    241                              N_("Data is seen as subtrie (misaligned?)"));
    242         *replace = 1;
    243         return data;
    244     }
    245 
    246     for (;;) {
    247         if (!(IsSubTrie(node))) {
    248             if (node->BitMapKey == key
    249                 && hamt->CmpKey(((HAMTEntry *)(node->BaseValue))->str,
    250                                 str) == 0) {
    251                 /*@-branchstate@*/
    252                 if (*replace) {
    253                     deletefunc(((HAMTEntry *)(node->BaseValue))->data);
    254                     ((HAMTEntry *)(node->BaseValue))->str = str;
    255                     ((HAMTEntry *)(node->BaseValue))->data = data;
    256                 } else
    257                     deletefunc(data);
    258                 /*@=branchstate@*/
    259                 return ((HAMTEntry *)(node->BaseValue))->data;
    260             } else {
    261                 unsigned long key2 = node->BitMapKey;
    262                 /* build tree downward until keys differ */
    263                 for (;;) {
    264                     unsigned long keypart2;
    265 
    266                     /* replace node with subtrie */
    267                     keypartbits += 5;
    268                     if (keypartbits > 30) {
    269                         /* Exceeded 32 bits: rehash */
    270                         key = hamt->ReHashKey(str, level);
    271                         key2 = hamt->ReHashKey(
    272                             ((HAMTEntry *)(node->BaseValue))->str, level);
    273                         keypartbits = 0;
    274                     }
    275                     keypart = (key >> keypartbits) & 0x1F;
    276                     keypart2 = (key2 >> keypartbits) & 0x1F;
    277 
    278                     if (keypart == keypart2) {
    279                         /* Still equal, build one-node subtrie and continue
    280                          * downward.
    281                          */
    282                         newnodes = yasm_xmalloc(sizeof(HAMTNode));
    283                         newnodes[0].BitMapKey = key2;
    284                         newnodes[0].BaseValue = node->BaseValue;
    285                         node->BitMapKey = 1<<keypart;
    286                         SetSubTrie(hamt, node, newnodes);
    287                         node = &newnodes[0];
    288                         level++;
    289                     } else {
    290                         /* partitioned: allocate two-node subtrie */
    291                         newnodes = yasm_xmalloc(2*sizeof(HAMTNode));
    292 
    293                         entry = yasm_xmalloc(sizeof(HAMTEntry));
    294                         entry->str = str;
    295                         entry->data = data;
    296                         STAILQ_INSERT_TAIL(&hamt->entries, entry, next);
    297 
    298                         /* Copy nodes into subtrie based on order */
    299                         if (keypart2 < keypart) {
    300                             newnodes[0].BitMapKey = key2;
    301                             newnodes[0].BaseValue = node->BaseValue;
    302                             newnodes[1].BitMapKey = key;
    303                             SetValue(hamt, &newnodes[1], entry);
    304                         } else {
    305                             newnodes[0].BitMapKey = key;
    306                             SetValue(hamt, &newnodes[0], entry);
    307                             newnodes[1].BitMapKey = key2;
    308                             newnodes[1].BaseValue = node->BaseValue;
    309                         }
    310 
    311                         /* Set bits in bitmap corresponding to keys */
    312                         node->BitMapKey = (1UL<<keypart) | (1UL<<keypart2);
    313                         SetSubTrie(hamt, node, newnodes);
    314                         *replace = 1;
    315                         return data;
    316                     }
    317                 }
    318             }
    319         }
    320 
    321         /* Subtrie: look up in bitmap */
    322         keypartbits += 5;
    323         if (keypartbits > 30) {
    324             /* Exceeded 32 bits of current key: rehash */
    325             key = hamt->ReHashKey(str, level);
    326             keypartbits = 0;
    327         }
    328         keypart = (key >> keypartbits) & 0x1F;
    329         if (!(node->BitMapKey & (1<<keypart))) {
    330             /* bit is 0 in bitmap -> add node to table */
    331             unsigned long Size;
    332 
    333             /* set bit to 1 */
    334             node->BitMapKey |= 1<<keypart;
    335 
    336             /* Count total number of bits in bitmap to determine new size */
    337             BitCount(Size, node->BitMapKey);
    338             Size &= 0x1F;
    339             if (Size == 0)
    340                 Size = 32;
    341             newnodes = yasm_xmalloc(Size*sizeof(HAMTNode));
    342 
    343             /* Count bits below to find where to insert new node at */
    344             BitCount(Map, node->BitMapKey & ~((~0UL)<<keypart));
    345             Map &= 0x1F;        /* Clamp to <32 */
    346             /* Copy existing nodes leaving gap for new node */
    347             memcpy(newnodes, GetSubTrie(node), Map*sizeof(HAMTNode));
    348             memcpy(&newnodes[Map+1], &(GetSubTrie(node))[Map],
    349                    (Size-Map-1)*sizeof(HAMTNode));
    350             /* Delete old subtrie */
    351             yasm_xfree(GetSubTrie(node));
    352             /* Set up new node */
    353             newnodes[Map].BitMapKey = key;
    354             entry = yasm_xmalloc(sizeof(HAMTEntry));
    355             entry->str = str;
    356             entry->data = data;
    357             STAILQ_INSERT_TAIL(&hamt->entries, entry, next);
    358             SetValue(hamt, &newnodes[Map], entry);
    359             SetSubTrie(hamt, node, newnodes);
    360 
    361             *replace = 1;
    362             return data;
    363         }
    364 
    365         /* Count bits below */
    366         BitCount(Map, node->BitMapKey & ~((~0UL)<<keypart));
    367         Map &= 0x1F;    /* Clamp to <32 */
    368 
    369         /* Go down a level */
    370         level++;
    371         node = &(GetSubTrie(node))[Map];
    372     }
    373 }
    374 /*@=temptrans =kepttrans =mustfree@*/
    375 
    376 void *
    377 HAMT_search(HAMT *hamt, const char *str)
    378 {
    379     HAMTNode *node;
    380     unsigned long key, keypart, Map;
    381     int keypartbits = 0;
    382     int level = 0;
    383 
    384     key = hamt->HashKey(str);
    385     keypart = key & 0x1F;
    386     node = &hamt->root[keypart];
    387 
    388     if (!node->BaseValue)
    389         return NULL;
    390 
    391     for (;;) {
    392         if (!(IsSubTrie(node))) {
    393             if (node->BitMapKey == key
    394                 && hamt->CmpKey(((HAMTEntry *)(node->BaseValue))->str,
    395                                 str) == 0)
    396                 return ((HAMTEntry *)(node->BaseValue))->data;
    397             else
    398                 return NULL;
    399         }
    400 
    401         /* Subtree: look up in bitmap */
    402         keypartbits += 5;
    403         if (keypartbits > 30) {
    404             /* Exceeded 32 bits of current key: rehash */
    405             key = hamt->ReHashKey(str, level);
    406             keypartbits = 0;
    407         }
    408         keypart = (key >> keypartbits) & 0x1F;
    409         if (!(node->BitMapKey & (1<<keypart)))
    410             return NULL;        /* bit is 0 in bitmap -> no match */
    411 
    412         /* Count bits below */
    413         BitCount(Map, node->BitMapKey & ~((~0UL)<<keypart));
    414         Map &= 0x1F;    /* Clamp to <32 */
    415 
    416         /* Go down a level */
    417         level++;
    418         node = &(GetSubTrie(node))[Map];
    419     }
    420 }
    421 
    422