1 2 /*--------------------------------------------------------------------*/ 3 /*--- A separately-chained hash table. m_hashtable.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2005-2015 Nicholas Nethercote 11 njn (at) valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "pub_core_basics.h" 32 #include "pub_core_debuglog.h" 33 #include "pub_core_hashtable.h" 34 #include "pub_core_libcassert.h" 35 #include "pub_core_libcbase.h" 36 #include "pub_core_libcprint.h" 37 #include "pub_core_mallocfree.h" 38 39 /*--------------------------------------------------------------------*/ 40 /*--- Declarations ---*/ 41 /*--------------------------------------------------------------------*/ 42 43 #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains) 44 45 struct _VgHashTable { 46 UInt n_chains; // should be prime 47 UInt n_elements; 48 VgHashNode* iterNode; // current iterator node 49 UInt iterChain; // next chain to be traversed by the iterator 50 VgHashNode** chains; // expanding array of hash chains 51 Bool iterOK; // table safe to iterate over? 52 const HChar* name; // name of table (for debugging only) 53 }; 54 55 #define N_HASH_PRIMES 20 56 57 static const SizeT primes[N_HASH_PRIMES] = { 58 769UL, 1543UL, 3079UL, 6151UL, 59 12289UL, 24593UL, 49157UL, 98317UL, 60 196613UL, 393241UL, 786433UL, 1572869UL, 61 3145739UL, 6291469UL, 12582917UL, 25165843UL, 62 50331653UL, 100663319UL, 201326611UL, 402653189UL 63 }; 64 65 /*--------------------------------------------------------------------*/ 66 /*--- Functions ---*/ 67 /*--------------------------------------------------------------------*/ 68 69 VgHashTable *VG_(HT_construct) ( const HChar* name ) 70 { 71 /* Initialises to zero, ie. all entries NULL */ 72 SizeT n_chains = primes[0]; 73 SizeT sz = n_chains * sizeof(VgHashNode*); 74 VgHashTable *table = VG_(calloc)("hashtable.Hc.1", 75 1, sizeof(struct _VgHashTable)); 76 table->chains = VG_(calloc)("hashtable.Hc.2", 1, sz); 77 table->n_chains = n_chains; 78 table->n_elements = 0; 79 table->iterOK = True; 80 table->name = name; 81 vg_assert(name); 82 return table; 83 } 84 85 UInt VG_(HT_count_nodes) ( const VgHashTable *table ) 86 { 87 return table->n_elements; 88 } 89 90 static void resize ( VgHashTable *table ) 91 { 92 Int i; 93 SizeT sz; 94 SizeT old_chains = table->n_chains; 95 SizeT new_chains = old_chains + 1; 96 VgHashNode** chains; 97 VgHashNode * node; 98 99 /* If we've run out of primes, do nothing. */ 100 if (old_chains == primes[N_HASH_PRIMES-1]) 101 return; 102 103 vg_assert(old_chains >= primes[0] 104 && old_chains < primes[N_HASH_PRIMES-1]); 105 106 for (i = 0; i < N_HASH_PRIMES; i++) { 107 if (primes[i] > new_chains) { 108 new_chains = primes[i]; 109 break; 110 } 111 } 112 113 vg_assert(new_chains > old_chains); 114 vg_assert(new_chains > primes[0] 115 && new_chains <= primes[N_HASH_PRIMES-1]); 116 117 VG_(debugLog)( 118 1, "hashtable", 119 "resizing table `%s' from %lu to %lu (total elems %lu)\n", 120 table->name, (UWord)old_chains, (UWord)new_chains, 121 (UWord)table->n_elements ); 122 123 table->n_chains = new_chains; 124 sz = new_chains * sizeof(VgHashNode*); 125 chains = VG_(calloc)("hashtable.resize.1", 1, sz); 126 127 for (i = 0; i < old_chains; i++) { 128 node = table->chains[i]; 129 while (node != NULL) { 130 VgHashNode* next = node->next; 131 UWord chain = CHAIN_NO(node->key, table); 132 node->next = chains[chain]; 133 chains[chain] = node; 134 node = next; 135 } 136 } 137 138 VG_(free)(table->chains); 139 table->chains = chains; 140 } 141 142 /* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends 143 the node to the appropriate chain. No duplicate key detection is done. */ 144 void VG_(HT_add_node) ( VgHashTable *table, void* vnode ) 145 { 146 VgHashNode* node = (VgHashNode*)vnode; 147 UWord chain = CHAIN_NO(node->key, table); 148 node->next = table->chains[chain]; 149 table->chains[chain] = node; 150 table->n_elements++; 151 if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) { 152 resize(table); 153 } 154 155 /* Table has been modified; hence HT_Next should assert. */ 156 table->iterOK = False; 157 } 158 159 /* Looks up a VgHashNode by key in the table. Returns NULL if not found. */ 160 void* VG_(HT_lookup) ( const VgHashTable *table, UWord key ) 161 { 162 VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ]; 163 164 while (curr) { 165 if (key == curr->key) { 166 return curr; 167 } 168 curr = curr->next; 169 } 170 return NULL; 171 } 172 173 /* Looks up a VgHashNode by node in the table. Returns NULL if not found. 174 GEN!!! marks the lines that differs from VG_(HT_lookup). */ 175 void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node, 176 HT_Cmp_t cmp ) 177 { 178 const VgHashNode* hnode = node; // GEN!!! 179 VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!! 180 181 while (curr) { 182 if (hnode->key == curr->key && cmp (hnode, curr) == 0) { // GEN!!! 183 return curr; 184 } 185 curr = curr->next; 186 } 187 return NULL; 188 } 189 190 /* Removes a VgHashNode from the table. Returns NULL if not found. */ 191 void* VG_(HT_remove) ( VgHashTable *table, UWord key ) 192 { 193 UWord chain = CHAIN_NO(key, table); 194 VgHashNode* curr = table->chains[chain]; 195 VgHashNode** prev_next_ptr = &(table->chains[chain]); 196 197 /* Table has been modified; hence HT_Next should assert. */ 198 table->iterOK = False; 199 200 while (curr) { 201 if (key == curr->key) { 202 *prev_next_ptr = curr->next; 203 table->n_elements--; 204 return curr; 205 } 206 prev_next_ptr = &(curr->next); 207 curr = curr->next; 208 } 209 return NULL; 210 } 211 212 /* Removes a VgHashNode by node from the table. Returns NULL if not found. 213 GEN!!! marks the lines that differs from VG_(HT_remove). */ 214 void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp ) 215 { 216 const VgHashNode* hnode = node; // GEN!!! 217 UWord chain = CHAIN_NO(hnode->key, table); // GEN!!! 218 VgHashNode* curr = table->chains[chain]; 219 VgHashNode** prev_next_ptr = &(table->chains[chain]); 220 221 /* Table has been modified; hence HT_Next should assert. */ 222 table->iterOK = False; 223 224 while (curr) { 225 if (hnode->key == curr->key && cmp(hnode, curr) == 0) { // GEN!!! 226 *prev_next_ptr = curr->next; 227 table->n_elements--; 228 return curr; 229 } 230 prev_next_ptr = &(curr->next); 231 curr = curr->next; 232 } 233 return NULL; 234 } 235 236 void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp ) 237 { 238 #define MAXOCCUR 20 239 UInt elt_occurences[MAXOCCUR+1]; 240 UInt key_occurences[MAXOCCUR+1]; 241 UInt cno_occurences[MAXOCCUR+1]; 242 /* Key occurence : how many ht elements have the same key. 243 elt_occurences : how many elements are inserted multiple time. 244 cno_occurences : how many chains have that length. 245 The last entry in these arrays collects all occurences >= MAXOCCUR. */ 246 #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++) 247 UInt i; 248 UInt nkey, nelt, ncno; 249 VgHashNode *cnode, *node; 250 251 VG_(memset)(key_occurences, 0, sizeof(key_occurences)); 252 VG_(memset)(elt_occurences, 0, sizeof(elt_occurences)); 253 VG_(memset)(cno_occurences, 0, sizeof(cno_occurences)); 254 255 // Note that the below algorithm is quadractic in nr of elements in a chain 256 // but if that happens, the hash table/function is really bad and that 257 // should be fixed. 258 for (i = 0; i < table->n_chains; i++) { 259 ncno = 0; 260 for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) { 261 ncno++; 262 263 nkey = 0; 264 // Is the same cnode->key existing before cnode ? 265 for (node = table->chains[i]; node != cnode; node = node->next) { 266 if (node->key == cnode->key) 267 nkey++; 268 } 269 // If cnode->key not in a previous node, count occurences of key. 270 if (nkey == 0) { 271 for (node = cnode; node != NULL; node = node->next) { 272 if (node->key == cnode->key) 273 nkey++; 274 } 275 INCOCCUR(key_occurences, nkey); 276 } 277 278 nelt = 0; 279 // Is the same cnode element existing before cnode ? 280 for (node = table->chains[i]; node != cnode; node = node->next) { 281 if (node->key == cnode->key 282 && (cmp == NULL || cmp (node, cnode) == 0)) { 283 nelt++; 284 } 285 } 286 // If cnode element not in a previous node, count occurences of elt. 287 if (nelt == 0) { 288 for (node = cnode; node != NULL; node = node->next) { 289 if (node->key == cnode->key 290 && (cmp == NULL || cmp (node, cnode) == 0)) { 291 nelt++; 292 } 293 } 294 INCOCCUR(elt_occurences, nelt); 295 } 296 } 297 INCOCCUR(cno_occurences, ncno); 298 } 299 300 VG_(message)(Vg_DebugMsg, 301 "nr occurences of" 302 " chains of len N," 303 " N-plicated keys," 304 " N-plicated elts\n"); 305 nkey = nelt = ncno = 0; 306 for (i = 0; i <= MAXOCCUR; i++) { 307 if (elt_occurences[i] > 0 308 || key_occurences[i] > 0 309 || cno_occurences[i] > 0) 310 VG_(message)(Vg_DebugMsg, 311 "%s=%2u : nr chain %6u, nr keys %6u, nr elts %6u\n", 312 i == MAXOCCUR ? ">" : "N", i, 313 cno_occurences[i], key_occurences[i], elt_occurences[i]); 314 nkey += key_occurences[i]; 315 nelt += elt_occurences[i]; 316 ncno += cno_occurences[i]; 317 } 318 VG_(message)(Vg_DebugMsg, 319 "total nr of unique slots: %6u, keys %6u, elts %6u." 320 " Avg chain len %3.1f\n", 321 ncno, nkey, nelt, 322 (Double)nelt/(Double)(ncno == cno_occurences[0] ? 323 1 : ncno - cno_occurences[0])); 324 } 325 326 327 /* Allocates a suitably-sized array, copies pointers to all the hashtable 328 elements into it, then returns both the array and the size of it. The 329 array must be freed with VG_(free). 330 */ 331 VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems) 332 { 333 UInt i, j; 334 VgHashNode** arr; 335 VgHashNode* node; 336 337 *n_elems = table->n_elements; 338 if (*n_elems == 0) 339 return NULL; 340 341 arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) ); 342 343 j = 0; 344 for (i = 0; i < table->n_chains; i++) { 345 for (node = table->chains[i]; node != NULL; node = node->next) { 346 arr[j++] = node; 347 } 348 } 349 vg_assert(j == *n_elems); 350 351 return arr; 352 } 353 354 void VG_(HT_ResetIter)(VgHashTable *table) 355 { 356 vg_assert(table); 357 table->iterNode = NULL; 358 table->iterChain = 0; 359 table->iterOK = True; 360 } 361 362 void* VG_(HT_Next)(VgHashTable *table) 363 { 364 Int i; 365 vg_assert(table); 366 /* See long comment on HT_Next prototype in pub_tool_hashtable.h. 367 In short if this fails, it means the caller tried to modify the 368 table whilst iterating over it, which is a bug. */ 369 vg_assert(table->iterOK); 370 371 if (table->iterNode && table->iterNode->next) { 372 table->iterNode = table->iterNode->next; 373 return table->iterNode; 374 } 375 376 for (i = table->iterChain; i < table->n_chains; i++) { 377 if (table->chains[i]) { 378 table->iterNode = table->chains[i]; 379 table->iterChain = i + 1; // Next chain to be traversed 380 return table->iterNode; 381 } 382 } 383 return NULL; 384 } 385 386 void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*)) 387 { 388 UInt i; 389 VgHashNode *node, *node_next; 390 391 for (i = 0; i < table->n_chains; i++) { 392 for (node = table->chains[i]; node != NULL; node = node_next) { 393 node_next = node->next; 394 freenode_fn(node); 395 } 396 } 397 VG_(free)(table->chains); 398 VG_(free)(table); 399 } 400 401 /*--------------------------------------------------------------------*/ 402 /*--- end ---*/ 403 /*--------------------------------------------------------------------*/ 404