1 /* The implementation of the hash table (_Py_hashtable_t) is based on the 2 cfuhash project: 3 http://sourceforge.net/projects/libcfu/ 4 5 Copyright of cfuhash: 6 ---------------------------------- 7 Creation date: 2005-06-24 21:22:40 8 Authors: Don 9 Change log: 10 11 Copyright (c) 2005 Don Owens 12 All rights reserved. 13 14 This code is released under the BSD license: 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions 18 are met: 19 20 * Redistributions of source code must retain the above copyright 21 notice, this list of conditions and the following disclaimer. 22 23 * Redistributions in binary form must reproduce the above 24 copyright notice, this list of conditions and the following 25 disclaimer in the documentation and/or other materials provided 26 with the distribution. 27 28 * Neither the name of the author nor the names of its 29 contributors may be used to endorse or promote products derived 30 from this software without specific prior written permission. 31 32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 OF THE POSSIBILITY OF SUCH DAMAGE. 44 ---------------------------------- 45 */ 46 47 #include "Python.h" 48 #include "hashtable.h" 49 50 #define HASHTABLE_MIN_SIZE 16 51 #define HASHTABLE_HIGH 0.50 52 #define HASHTABLE_LOW 0.10 53 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) 54 55 #define BUCKETS_HEAD(SLIST) \ 56 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) 57 #define TABLE_HEAD(HT, BUCKET) \ 58 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) 59 #define ENTRY_NEXT(ENTRY) \ 60 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) 61 #define HASHTABLE_ITEM_SIZE(HT) \ 62 (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size) 63 64 #define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \ 65 do { \ 66 assert((DATA_SIZE) == (TABLE)->data_size); \ 67 memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \ 68 (DATA_SIZE)); \ 69 } while (0) 70 71 #define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \ 72 do { \ 73 assert((DATA_SIZE) == (TABLE)->data_size); \ 74 memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \ 75 (PDATA), (DATA_SIZE)); \ 76 } while (0) 77 78 /* Forward declaration */ 79 static void hashtable_rehash(_Py_hashtable_t *ht); 80 81 static void 82 _Py_slist_init(_Py_slist_t *list) 83 { 84 list->head = NULL; 85 } 86 87 88 static void 89 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) 90 { 91 item->next = list->head; 92 list->head = item; 93 } 94 95 96 static void 97 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, 98 _Py_slist_item_t *item) 99 { 100 if (previous != NULL) 101 previous->next = item->next; 102 else 103 list->head = item->next; 104 } 105 106 107 Py_uhash_t 108 _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey) 109 { 110 void *key; 111 112 _Py_HASHTABLE_READ_KEY(ht, pkey, key); 113 return (Py_uhash_t)_Py_HashPointer(key); 114 } 115 116 117 int 118 _Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey, 119 const _Py_hashtable_entry_t *entry) 120 { 121 const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry); 122 return (memcmp(pkey, pkey2, ht->key_size) == 0); 123 } 124 125 126 /* makes sure the real size of the buckets array is a power of 2 */ 127 static size_t 128 round_size(size_t s) 129 { 130 size_t i; 131 if (s < HASHTABLE_MIN_SIZE) 132 return HASHTABLE_MIN_SIZE; 133 i = 1; 134 while (i < s) 135 i <<= 1; 136 return i; 137 } 138 139 140 _Py_hashtable_t * 141 _Py_hashtable_new_full(size_t key_size, size_t data_size, 142 size_t init_size, 143 _Py_hashtable_hash_func hash_func, 144 _Py_hashtable_compare_func compare_func, 145 _Py_hashtable_allocator_t *allocator) 146 { 147 _Py_hashtable_t *ht; 148 size_t buckets_size; 149 _Py_hashtable_allocator_t alloc; 150 151 if (allocator == NULL) { 152 alloc.malloc = PyMem_RawMalloc; 153 alloc.free = PyMem_RawFree; 154 } 155 else 156 alloc = *allocator; 157 158 ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); 159 if (ht == NULL) 160 return ht; 161 162 ht->num_buckets = round_size(init_size); 163 ht->entries = 0; 164 ht->key_size = key_size; 165 ht->data_size = data_size; 166 167 buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); 168 ht->buckets = alloc.malloc(buckets_size); 169 if (ht->buckets == NULL) { 170 alloc.free(ht); 171 return NULL; 172 } 173 memset(ht->buckets, 0, buckets_size); 174 175 ht->hash_func = hash_func; 176 ht->compare_func = compare_func; 177 ht->alloc = alloc; 178 return ht; 179 } 180 181 182 _Py_hashtable_t * 183 _Py_hashtable_new(size_t key_size, size_t data_size, 184 _Py_hashtable_hash_func hash_func, 185 _Py_hashtable_compare_func compare_func) 186 { 187 return _Py_hashtable_new_full(key_size, data_size, 188 HASHTABLE_MIN_SIZE, 189 hash_func, compare_func, 190 NULL); 191 } 192 193 194 size_t 195 _Py_hashtable_size(_Py_hashtable_t *ht) 196 { 197 size_t size; 198 199 size = sizeof(_Py_hashtable_t); 200 201 /* buckets */ 202 size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *); 203 204 /* entries */ 205 size += ht->entries * HASHTABLE_ITEM_SIZE(ht); 206 207 return size; 208 } 209 210 211 #ifdef Py_DEBUG 212 void 213 _Py_hashtable_print_stats(_Py_hashtable_t *ht) 214 { 215 size_t size; 216 size_t chain_len, max_chain_len, total_chain_len, nchains; 217 _Py_hashtable_entry_t *entry; 218 size_t hv; 219 double load; 220 221 size = _Py_hashtable_size(ht); 222 223 load = (double)ht->entries / ht->num_buckets; 224 225 max_chain_len = 0; 226 total_chain_len = 0; 227 nchains = 0; 228 for (hv = 0; hv < ht->num_buckets; hv++) { 229 entry = TABLE_HEAD(ht, hv); 230 if (entry != NULL) { 231 chain_len = 0; 232 for (; entry; entry = ENTRY_NEXT(entry)) { 233 chain_len++; 234 } 235 if (chain_len > max_chain_len) 236 max_chain_len = chain_len; 237 total_chain_len += chain_len; 238 nchains++; 239 } 240 } 241 printf("hash table %p: entries=%" 242 PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ", 243 ht, ht->entries, ht->num_buckets, load * 100.0); 244 if (nchains) 245 printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); 246 printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n", 247 max_chain_len, size / 1024); 248 } 249 #endif 250 251 252 _Py_hashtable_entry_t * 253 _Py_hashtable_get_entry(_Py_hashtable_t *ht, 254 size_t key_size, const void *pkey) 255 { 256 Py_uhash_t key_hash; 257 size_t index; 258 _Py_hashtable_entry_t *entry; 259 260 assert(key_size == ht->key_size); 261 262 key_hash = ht->hash_func(ht, pkey); 263 index = key_hash & (ht->num_buckets - 1); 264 265 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { 266 if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) 267 break; 268 } 269 270 return entry; 271 } 272 273 274 static int 275 _Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey, 276 void *data, size_t data_size) 277 { 278 Py_uhash_t key_hash; 279 size_t index; 280 _Py_hashtable_entry_t *entry, *previous; 281 282 assert(key_size == ht->key_size); 283 284 key_hash = ht->hash_func(ht, pkey); 285 index = key_hash & (ht->num_buckets - 1); 286 287 previous = NULL; 288 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { 289 if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) 290 break; 291 previous = entry; 292 } 293 294 if (entry == NULL) 295 return 0; 296 297 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, 298 (_Py_slist_item_t *)entry); 299 ht->entries--; 300 301 if (data != NULL) 302 ENTRY_READ_PDATA(ht, entry, data_size, data); 303 ht->alloc.free(entry); 304 305 if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) 306 hashtable_rehash(ht); 307 return 1; 308 } 309 310 311 int 312 _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, 313 size_t data_size, const void *data) 314 { 315 Py_uhash_t key_hash; 316 size_t index; 317 _Py_hashtable_entry_t *entry; 318 319 assert(key_size == ht->key_size); 320 321 assert(data != NULL || data_size == 0); 322 #ifndef NDEBUG 323 /* Don't write the assertion on a single line because it is interesting 324 to know the duplicated entry if the assertion failed. The entry can 325 be read using a debugger. */ 326 entry = _Py_hashtable_get_entry(ht, key_size, pkey); 327 assert(entry == NULL); 328 #endif 329 330 key_hash = ht->hash_func(ht, pkey); 331 index = key_hash & (ht->num_buckets - 1); 332 333 entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); 334 if (entry == NULL) { 335 /* memory allocation failed */ 336 return -1; 337 } 338 339 entry->key_hash = key_hash; 340 memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size); 341 if (data) 342 ENTRY_WRITE_PDATA(ht, entry, data_size, data); 343 344 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); 345 ht->entries++; 346 347 if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) 348 hashtable_rehash(ht); 349 return 0; 350 } 351 352 353 int 354 _Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, 355 size_t data_size, void *data) 356 { 357 _Py_hashtable_entry_t *entry; 358 359 assert(data != NULL); 360 361 entry = _Py_hashtable_get_entry(ht, key_size, pkey); 362 if (entry == NULL) 363 return 0; 364 ENTRY_READ_PDATA(ht, entry, data_size, data); 365 return 1; 366 } 367 368 369 int 370 _Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey, 371 size_t data_size, void *data) 372 { 373 assert(data != NULL); 374 return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size); 375 } 376 377 378 /* Code commented since the function is not needed in Python */ 379 #if 0 380 void 381 _Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey) 382 { 383 #ifndef NDEBUG 384 int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); 385 assert(found); 386 #else 387 (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); 388 #endif 389 } 390 #endif 391 392 393 int 394 _Py_hashtable_foreach(_Py_hashtable_t *ht, 395 _Py_hashtable_foreach_func func, 396 void *arg) 397 { 398 _Py_hashtable_entry_t *entry; 399 size_t hv; 400 401 for (hv = 0; hv < ht->num_buckets; hv++) { 402 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { 403 int res = func(ht, entry, arg); 404 if (res) 405 return res; 406 } 407 } 408 return 0; 409 } 410 411 412 static void 413 hashtable_rehash(_Py_hashtable_t *ht) 414 { 415 size_t buckets_size, new_size, bucket; 416 _Py_slist_t *old_buckets = NULL; 417 size_t old_num_buckets; 418 419 new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); 420 if (new_size == ht->num_buckets) 421 return; 422 423 old_num_buckets = ht->num_buckets; 424 425 buckets_size = new_size * sizeof(ht->buckets[0]); 426 old_buckets = ht->buckets; 427 ht->buckets = ht->alloc.malloc(buckets_size); 428 if (ht->buckets == NULL) { 429 /* cancel rehash on memory allocation failure */ 430 ht->buckets = old_buckets ; 431 /* memory allocation failed */ 432 return; 433 } 434 memset(ht->buckets, 0, buckets_size); 435 436 ht->num_buckets = new_size; 437 438 for (bucket = 0; bucket < old_num_buckets; bucket++) { 439 _Py_hashtable_entry_t *entry, *next; 440 for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { 441 size_t entry_index; 442 443 444 assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash); 445 next = ENTRY_NEXT(entry); 446 entry_index = entry->key_hash & (new_size - 1); 447 448 _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); 449 } 450 } 451 452 ht->alloc.free(old_buckets); 453 } 454 455 456 void 457 _Py_hashtable_clear(_Py_hashtable_t *ht) 458 { 459 _Py_hashtable_entry_t *entry, *next; 460 size_t i; 461 462 for (i=0; i < ht->num_buckets; i++) { 463 for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { 464 next = ENTRY_NEXT(entry); 465 ht->alloc.free(entry); 466 } 467 _Py_slist_init(&ht->buckets[i]); 468 } 469 ht->entries = 0; 470 hashtable_rehash(ht); 471 } 472 473 474 void 475 _Py_hashtable_destroy(_Py_hashtable_t *ht) 476 { 477 size_t i; 478 479 for (i = 0; i < ht->num_buckets; i++) { 480 _Py_slist_item_t *entry = ht->buckets[i].head; 481 while (entry) { 482 _Py_slist_item_t *entry_next = entry->next; 483 ht->alloc.free(entry); 484 entry = entry_next; 485 } 486 } 487 488 ht->alloc.free(ht->buckets); 489 ht->alloc.free(ht); 490 } 491 492 493 _Py_hashtable_t * 494 _Py_hashtable_copy(_Py_hashtable_t *src) 495 { 496 const size_t key_size = src->key_size; 497 const size_t data_size = src->data_size; 498 _Py_hashtable_t *dst; 499 _Py_hashtable_entry_t *entry; 500 size_t bucket; 501 int err; 502 503 dst = _Py_hashtable_new_full(key_size, data_size, 504 src->num_buckets, 505 src->hash_func, 506 src->compare_func, 507 &src->alloc); 508 if (dst == NULL) 509 return NULL; 510 511 for (bucket=0; bucket < src->num_buckets; bucket++) { 512 entry = TABLE_HEAD(src, bucket); 513 for (; entry; entry = ENTRY_NEXT(entry)) { 514 const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry); 515 const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry); 516 err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata); 517 if (err) { 518 _Py_hashtable_destroy(dst); 519 return NULL; 520 } 521 } 522 } 523 return dst; 524 } 525