Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- A separately-chained hash table.               m_hashtable.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2005-2017 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "pub_core_basics.h"
     32 #include "pub_core_debuglog.h"
     33 #include "pub_core_hashtable.h"
     34 #include "pub_core_libcassert.h"
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_libcprint.h"
     37 #include "pub_core_mallocfree.h"
     38 
     39 /*--------------------------------------------------------------------*/
     40 /*--- Declarations                                                 ---*/
     41 /*--------------------------------------------------------------------*/
     42 
     43 #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
     44 
     45 struct _VgHashTable {
     46    UInt         n_chains;   // should be prime
     47    UInt         n_elements;
     48    VgHashNode*  iterNode;   // current iterator node
     49    UInt         iterChain;  // next chain to be traversed by the iterator
     50    VgHashNode** chains;     // expanding array of hash chains
     51    Bool         iterOK;     // table safe to iterate over?
     52    const HChar* name;       // name of table (for debugging only)
     53 };
     54 
     55 #define N_HASH_PRIMES 20
     56 
     57 static const SizeT primes[N_HASH_PRIMES] = {
     58          769UL,         1543UL,         3079UL,          6151UL,
     59        12289UL,        24593UL,        49157UL,         98317UL,
     60       196613UL,       393241UL,       786433UL,       1572869UL,
     61      3145739UL,      6291469UL,     12582917UL,      25165843UL,
     62     50331653UL,    100663319UL,    201326611UL,     402653189UL
     63 };
     64 
     65 /*--------------------------------------------------------------------*/
     66 /*--- Functions                                                    ---*/
     67 /*--------------------------------------------------------------------*/
     68 
     69 VgHashTable *VG_(HT_construct) ( const HChar* name )
     70 {
     71    /* Initialises to zero, ie. all entries NULL */
     72    SizeT       n_chains = primes[0];
     73    SizeT       sz       = n_chains * sizeof(VgHashNode*);
     74    VgHashTable *table   = VG_(calloc)("hashtable.Hc.1",
     75                                       1, sizeof(struct _VgHashTable));
     76    table->chains        = VG_(calloc)("hashtable.Hc.2", 1, sz);
     77    table->n_chains      = n_chains;
     78    table->n_elements    = 0;
     79    table->iterOK        = True;
     80    table->name          = name;
     81    vg_assert(name);
     82    return table;
     83 }
     84 
     85 UInt VG_(HT_count_nodes) ( const VgHashTable *table )
     86 {
     87    return table->n_elements;
     88 }
     89 
     90 static void resize ( VgHashTable *table )
     91 {
     92    Int          i;
     93    SizeT        sz;
     94    SizeT        old_chains = table->n_chains;
     95    SizeT        new_chains = old_chains + 1;
     96    VgHashNode** chains;
     97    VgHashNode * node;
     98 
     99    /* If we've run out of primes, do nothing. */
    100    if (old_chains == primes[N_HASH_PRIMES-1])
    101       return;
    102 
    103    vg_assert(old_chains >= primes[0]
    104              && old_chains < primes[N_HASH_PRIMES-1]);
    105 
    106    for (i = 0; i < N_HASH_PRIMES; i++) {
    107       if (primes[i] > new_chains) {
    108          new_chains = primes[i];
    109          break;
    110       }
    111    }
    112 
    113    vg_assert(new_chains > old_chains);
    114    vg_assert(new_chains > primes[0]
    115              && new_chains <= primes[N_HASH_PRIMES-1]);
    116 
    117    VG_(debugLog)(
    118       1, "hashtable",
    119          "resizing table `%s' from %lu to %lu (total elems %lu)\n",
    120          table->name, (UWord)old_chains, (UWord)new_chains,
    121          (UWord)table->n_elements );
    122 
    123    table->n_chains = new_chains;
    124    sz = new_chains * sizeof(VgHashNode*);
    125    chains = VG_(calloc)("hashtable.resize.1", 1, sz);
    126 
    127    for (i = 0; i < old_chains; i++) {
    128       node = table->chains[i];
    129       while (node != NULL) {
    130          VgHashNode* next = node->next;
    131          UWord chain = CHAIN_NO(node->key, table);
    132          node->next = chains[chain];
    133          chains[chain] = node;
    134          node = next;
    135       }
    136    }
    137 
    138    VG_(free)(table->chains);
    139    table->chains = chains;
    140 }
    141 
    142 /* Puts a new, heap allocated VgHashNode, into the VgHashTable.  Prepends
    143    the node to the appropriate chain.  No duplicate key detection is done. */
    144 void VG_(HT_add_node) ( VgHashTable *table, void* vnode )
    145 {
    146    VgHashNode* node     = (VgHashNode*)vnode;
    147    UWord chain          = CHAIN_NO(node->key, table);
    148    node->next           = table->chains[chain];
    149    table->chains[chain] = node;
    150    table->n_elements++;
    151    if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
    152       resize(table);
    153    }
    154 
    155    /* Table has been modified; hence HT_Next should assert. */
    156    table->iterOK = False;
    157 }
    158 
    159 /* Looks up a VgHashNode by key in the table.  Returns NULL if not found. */
    160 void* VG_(HT_lookup) ( const VgHashTable *table, UWord key )
    161 {
    162    VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ];
    163 
    164    while (curr) {
    165       if (key == curr->key) {
    166          return curr;
    167       }
    168       curr = curr->next;
    169    }
    170    return NULL;
    171 }
    172 
    173 /* Looks up a VgHashNode by node in the table.  Returns NULL if not found.
    174    GEN!!! marks the lines that differs from VG_(HT_lookup). */
    175 void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node,
    176                            HT_Cmp_t cmp )
    177 {
    178    const VgHashNode* hnode = node; // GEN!!!
    179    VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!!
    180 
    181    while (curr) {
    182       if (hnode->key == curr->key && cmp (hnode, curr) == 0) { // GEN!!!
    183          return curr;
    184       }
    185       curr = curr->next;
    186    }
    187    return NULL;
    188 }
    189 
    190 /* Removes a VgHashNode from the table.  Returns NULL if not found. */
    191 void* VG_(HT_remove) ( VgHashTable *table, UWord key )
    192 {
    193    UWord        chain         = CHAIN_NO(key, table);
    194    VgHashNode*  curr          =   table->chains[chain];
    195    VgHashNode** prev_next_ptr = &(table->chains[chain]);
    196 
    197    /* Table has been modified; hence HT_Next should assert. */
    198    table->iterOK = False;
    199 
    200    while (curr) {
    201       if (key == curr->key) {
    202          *prev_next_ptr = curr->next;
    203          table->n_elements--;
    204          return curr;
    205       }
    206       prev_next_ptr = &(curr->next);
    207       curr = curr->next;
    208    }
    209    return NULL;
    210 }
    211 
    212 /* Removes a VgHashNode by node from the table.  Returns NULL if not found.
    213    GEN!!! marks the lines that differs from VG_(HT_remove). */
    214 void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp  )
    215 {
    216    const VgHashNode* hnode    = node; // GEN!!!
    217    UWord        chain         = CHAIN_NO(hnode->key, table); // GEN!!!
    218    VgHashNode*  curr          =   table->chains[chain];
    219    VgHashNode** prev_next_ptr = &(table->chains[chain]);
    220 
    221    /* Table has been modified; hence HT_Next should assert. */
    222    table->iterOK = False;
    223 
    224    while (curr) {
    225       if (hnode->key == curr->key && cmp(hnode, curr) == 0) { // GEN!!!
    226          *prev_next_ptr = curr->next;
    227          table->n_elements--;
    228          return curr;
    229       }
    230       prev_next_ptr = &(curr->next);
    231       curr = curr->next;
    232    }
    233    return NULL;
    234 }
    235 
    236 void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp )
    237 {
    238    #define MAXOCCUR 20
    239    UInt elt_occurences[MAXOCCUR+1];
    240    UInt key_occurences[MAXOCCUR+1];
    241    UInt cno_occurences[MAXOCCUR+1];
    242    /* Key occurence  : how many ht elements have the same key.
    243       elt_occurences : how many elements are inserted multiple time.
    244       cno_occurences : how many chains have that length.
    245       The last entry in these arrays collects all occurences >= MAXOCCUR. */
    246    #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
    247    UInt i;
    248    UInt nkey, nelt, ncno;
    249    VgHashNode *cnode, *node;
    250 
    251    VG_(memset)(key_occurences, 0, sizeof(key_occurences));
    252    VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
    253    VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
    254 
    255    // Note that the below algorithm is quadractic in nr of elements in a chain
    256    // but if that happens, the hash table/function is really bad and that
    257    // should be fixed.
    258    for (i = 0; i < table->n_chains; i++) {
    259       ncno = 0;
    260       for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) {
    261          ncno++;
    262 
    263          nkey = 0;
    264          // Is the same cnode->key existing before cnode ?
    265          for (node = table->chains[i]; node != cnode; node = node->next) {
    266             if (node->key == cnode->key)
    267                nkey++;
    268          }
    269          // If cnode->key not in a previous node, count occurences of key.
    270          if (nkey == 0) {
    271             for (node = cnode; node != NULL; node = node->next) {
    272                if (node->key == cnode->key)
    273                   nkey++;
    274             }
    275             INCOCCUR(key_occurences, nkey);
    276          }
    277 
    278          nelt = 0;
    279          // Is the same cnode element existing before cnode ?
    280          for (node = table->chains[i]; node != cnode; node = node->next) {
    281             if (node->key == cnode->key
    282                 && (cmp == NULL || cmp (node, cnode) == 0)) {
    283                nelt++;
    284             }
    285          }
    286          // If cnode element not in a previous node, count occurences of elt.
    287          if (nelt == 0) {
    288             for (node = cnode; node != NULL; node = node->next) {
    289                if (node->key == cnode->key
    290                    && (cmp == NULL || cmp (node, cnode) == 0)) {
    291                   nelt++;
    292                }
    293             }
    294             INCOCCUR(elt_occurences, nelt);
    295          }
    296       }
    297       INCOCCUR(cno_occurences, ncno);
    298    }
    299 
    300    VG_(message)(Vg_DebugMsg,
    301                 "nr occurences of"
    302                 " chains of len N,"
    303                 " N-plicated keys,"
    304                 " N-plicated elts\n");
    305    nkey = nelt = ncno = 0;
    306    for (i = 0; i <= MAXOCCUR; i++) {
    307       if (elt_occurences[i] > 0
    308           || key_occurences[i] > 0
    309           || cno_occurences[i] > 0)
    310          VG_(message)(Vg_DebugMsg,
    311                       "%s=%2u : nr chain %6u, nr keys %6u, nr elts %6u\n",
    312                       i == MAXOCCUR ? ">" : "N", i,
    313                       cno_occurences[i], key_occurences[i], elt_occurences[i]);
    314       nkey += key_occurences[i];
    315       nelt += elt_occurences[i];
    316       ncno += cno_occurences[i];
    317    }
    318    VG_(message)(Vg_DebugMsg,
    319                 "total nr of unique   slots: %6u, keys %6u, elts %6u."
    320                 " Avg chain len %3.1f\n",
    321                 ncno, nkey, nelt,
    322                 (Double)nelt/(Double)(ncno == cno_occurences[0] ?
    323                                       1 : ncno - cno_occurences[0]));
    324 }
    325 
    326 
    327 /* Allocates a suitably-sized array, copies pointers to all the hashtable
    328    elements into it, then returns both the array and the size of it.  The
    329    array must be freed with VG_(free).
    330 */
    331 VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems)
    332 {
    333    UInt       i, j;
    334    VgHashNode** arr;
    335    VgHashNode*  node;
    336 
    337    *n_elems = table->n_elements;
    338    if (*n_elems == 0)
    339       return NULL;
    340 
    341    arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
    342 
    343    j = 0;
    344    for (i = 0; i < table->n_chains; i++) {
    345       for (node = table->chains[i]; node != NULL; node = node->next) {
    346          arr[j++] = node;
    347       }
    348    }
    349    vg_assert(j == *n_elems);
    350 
    351    return arr;
    352 }
    353 
    354 void VG_(HT_ResetIter)(VgHashTable *table)
    355 {
    356    vg_assert(table);
    357    table->iterNode  = NULL;
    358    table->iterChain = 0;
    359    table->iterOK    = True;
    360 }
    361 
    362 void* VG_(HT_Next)(VgHashTable *table)
    363 {
    364    Int i;
    365    vg_assert(table);
    366    /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
    367       In short if this fails, it means the caller tried to modify the
    368       table whilst iterating over it, which is a bug.
    369       One exception: HT_remove_at_Iter can remove the current entry and
    370       leave the iterator in a valid state for HT_Next. */
    371    vg_assert(table->iterOK);
    372 
    373    if (table->iterNode && table->iterNode->next) {
    374       table->iterNode = table->iterNode->next;
    375       return table->iterNode;
    376    }
    377 
    378    for (i = table->iterChain; i < table->n_chains; i++) {
    379       if (table->chains[i]) {
    380          table->iterNode  = table->chains[i];
    381          table->iterChain = i + 1;  // Next chain to be traversed
    382          return table->iterNode;
    383       }
    384    }
    385    return NULL;
    386 }
    387 
    388 void VG_(HT_remove_at_Iter)(VgHashTable *table)
    389 {
    390    vg_assert(table);
    391    vg_assert(table->iterOK);
    392    vg_assert(table->iterNode);
    393 
    394    const UInt curChain = table->iterChain - 1; // chain of iterNode.
    395 
    396 
    397    if (table->chains[curChain] == table->iterNode) {
    398       /* iterNode is the first of its chain -> remove it from the chain. */
    399       table->chains[curChain] = table->iterNode->next;
    400       /* Setup the iterator to visit first node of curChain: */
    401       table->iterNode  = NULL;
    402       table->iterChain = curChain;
    403    } else {
    404       /* iterNode is somewhere inside curChain chain */
    405       VgHashNode* prev = table->chains[curChain];
    406 
    407       while (prev->next != table->iterNode)
    408          prev = prev->next;
    409       /* Remove iterNode from the chain. */
    410       prev->next = table->iterNode->next;
    411       /* Setup the iterator to visit prev->next, which is the node
    412          that was after the deleted node. */
    413       table->iterNode = prev;
    414    }
    415 
    416    table->n_elements--;
    417 }
    418 
    419 void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*))
    420 {
    421    UInt       i;
    422    VgHashNode *node, *node_next;
    423 
    424    for (i = 0; i < table->n_chains; i++) {
    425       for (node = table->chains[i]; node != NULL; node = node_next) {
    426          node_next = node->next;
    427          freenode_fn(node);
    428       }
    429    }
    430    VG_(free)(table->chains);
    431    VG_(free)(table);
    432 }
    433 
    434 /*--------------------------------------------------------------------*/
    435 /*--- end                                                          ---*/
    436 /*--------------------------------------------------------------------*/
    437