Home | History | Annotate | Download | only in Modules
      1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
      2    cfuhash project:
      3    http://sourceforge.net/projects/libcfu/
      4 
      5    Copyright of cfuhash:
      6    ----------------------------------
      7    Creation date: 2005-06-24 21:22:40
      8    Authors: Don
      9    Change log:
     10 
     11    Copyright (c) 2005 Don Owens
     12    All rights reserved.
     13 
     14    This code is released under the BSD license:
     15 
     16    Redistribution and use in source and binary forms, with or without
     17    modification, are permitted provided that the following conditions
     18    are met:
     19 
     20      * Redistributions of source code must retain the above copyright
     21        notice, this list of conditions and the following disclaimer.
     22 
     23      * Redistributions in binary form must reproduce the above
     24        copyright notice, this list of conditions and the following
     25        disclaimer in the documentation and/or other materials provided
     26        with the distribution.
     27 
     28      * Neither the name of the author nor the names of its
     29        contributors may be used to endorse or promote products derived
     30        from this software without specific prior written permission.
     31 
     32    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     33    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     34    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     35    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     36    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     37    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     38    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     39    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     40    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     41    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     42    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     43    OF THE POSSIBILITY OF SUCH DAMAGE.
     44    ----------------------------------
     45 */
     46 
     47 #include "Python.h"
     48 #include "hashtable.h"
     49 
     50 #define HASHTABLE_MIN_SIZE 16
     51 #define HASHTABLE_HIGH 0.50
     52 #define HASHTABLE_LOW 0.10
     53 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
     54 
     55 #define BUCKETS_HEAD(SLIST) \
     56         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
     57 #define TABLE_HEAD(HT, BUCKET) \
     58         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
     59 #define ENTRY_NEXT(ENTRY) \
     60         ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
     61 #define HASHTABLE_ITEM_SIZE(HT) \
     62         (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
     63 
     64 #define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
     65     do { \
     66         assert((DATA_SIZE) == (TABLE)->data_size); \
     67         memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \
     68                   (DATA_SIZE)); \
     69     } while (0)
     70 
     71 #define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
     72     do { \
     73         assert((DATA_SIZE) == (TABLE)->data_size); \
     74         memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \
     75                   (PDATA), (DATA_SIZE)); \
     76     } while (0)
     77 
     78 /* Forward declaration */
     79 static void hashtable_rehash(_Py_hashtable_t *ht);
     80 
     81 static void
     82 _Py_slist_init(_Py_slist_t *list)
     83 {
     84     list->head = NULL;
     85 }
     86 
     87 
     88 static void
     89 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
     90 {
     91     item->next = list->head;
     92     list->head = item;
     93 }
     94 
     95 
     96 static void
     97 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
     98                  _Py_slist_item_t *item)
     99 {
    100     if (previous != NULL)
    101         previous->next = item->next;
    102     else
    103         list->head = item->next;
    104 }
    105 
    106 
    107 Py_uhash_t
    108 _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
    109 {
    110     void *key;
    111 
    112     _Py_HASHTABLE_READ_KEY(ht, pkey, key);
    113     return (Py_uhash_t)_Py_HashPointer(key);
    114 }
    115 
    116 
    117 int
    118 _Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey,
    119                              const _Py_hashtable_entry_t *entry)
    120 {
    121     const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
    122     return (memcmp(pkey, pkey2, ht->key_size) == 0);
    123 }
    124 
    125 
    126 /* makes sure the real size of the buckets array is a power of 2 */
    127 static size_t
    128 round_size(size_t s)
    129 {
    130     size_t i;
    131     if (s < HASHTABLE_MIN_SIZE)
    132         return HASHTABLE_MIN_SIZE;
    133     i = 1;
    134     while (i < s)
    135         i <<= 1;
    136     return i;
    137 }
    138 
    139 
    140 _Py_hashtable_t *
    141 _Py_hashtable_new_full(size_t key_size, size_t data_size,
    142                        size_t init_size,
    143                        _Py_hashtable_hash_func hash_func,
    144                        _Py_hashtable_compare_func compare_func,
    145                        _Py_hashtable_allocator_t *allocator)
    146 {
    147     _Py_hashtable_t *ht;
    148     size_t buckets_size;
    149     _Py_hashtable_allocator_t alloc;
    150 
    151     if (allocator == NULL) {
    152         alloc.malloc = PyMem_RawMalloc;
    153         alloc.free = PyMem_RawFree;
    154     }
    155     else
    156         alloc = *allocator;
    157 
    158     ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
    159     if (ht == NULL)
    160         return ht;
    161 
    162     ht->num_buckets = round_size(init_size);
    163     ht->entries = 0;
    164     ht->key_size = key_size;
    165     ht->data_size = data_size;
    166 
    167     buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
    168     ht->buckets = alloc.malloc(buckets_size);
    169     if (ht->buckets == NULL) {
    170         alloc.free(ht);
    171         return NULL;
    172     }
    173     memset(ht->buckets, 0, buckets_size);
    174 
    175     ht->hash_func = hash_func;
    176     ht->compare_func = compare_func;
    177     ht->alloc = alloc;
    178     return ht;
    179 }
    180 
    181 
    182 _Py_hashtable_t *
    183 _Py_hashtable_new(size_t key_size, size_t data_size,
    184                   _Py_hashtable_hash_func hash_func,
    185                   _Py_hashtable_compare_func compare_func)
    186 {
    187     return _Py_hashtable_new_full(key_size, data_size,
    188                                   HASHTABLE_MIN_SIZE,
    189                                   hash_func, compare_func,
    190                                   NULL);
    191 }
    192 
    193 
    194 size_t
    195 _Py_hashtable_size(_Py_hashtable_t *ht)
    196 {
    197     size_t size;
    198 
    199     size = sizeof(_Py_hashtable_t);
    200 
    201     /* buckets */
    202     size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
    203 
    204     /* entries */
    205     size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
    206 
    207     return size;
    208 }
    209 
    210 
    211 #ifdef Py_DEBUG
    212 void
    213 _Py_hashtable_print_stats(_Py_hashtable_t *ht)
    214 {
    215     size_t size;
    216     size_t chain_len, max_chain_len, total_chain_len, nchains;
    217     _Py_hashtable_entry_t *entry;
    218     size_t hv;
    219     double load;
    220 
    221     size = _Py_hashtable_size(ht);
    222 
    223     load = (double)ht->entries / ht->num_buckets;
    224 
    225     max_chain_len = 0;
    226     total_chain_len = 0;
    227     nchains = 0;
    228     for (hv = 0; hv < ht->num_buckets; hv++) {
    229         entry = TABLE_HEAD(ht, hv);
    230         if (entry != NULL) {
    231             chain_len = 0;
    232             for (; entry; entry = ENTRY_NEXT(entry)) {
    233                 chain_len++;
    234             }
    235             if (chain_len > max_chain_len)
    236                 max_chain_len = chain_len;
    237             total_chain_len += chain_len;
    238             nchains++;
    239         }
    240     }
    241     printf("hash table %p: entries=%"
    242            PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
    243            ht, ht->entries, ht->num_buckets, load * 100.0);
    244     if (nchains)
    245         printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
    246     printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
    247            max_chain_len, size / 1024);
    248 }
    249 #endif
    250 
    251 
    252 _Py_hashtable_entry_t *
    253 _Py_hashtable_get_entry(_Py_hashtable_t *ht,
    254                         size_t key_size, const void *pkey)
    255 {
    256     Py_uhash_t key_hash;
    257     size_t index;
    258     _Py_hashtable_entry_t *entry;
    259 
    260     assert(key_size == ht->key_size);
    261 
    262     key_hash = ht->hash_func(ht, pkey);
    263     index = key_hash & (ht->num_buckets - 1);
    264 
    265     for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
    266         if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
    267             break;
    268     }
    269 
    270     return entry;
    271 }
    272 
    273 
    274 static int
    275 _Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
    276                         void *data, size_t data_size)
    277 {
    278     Py_uhash_t key_hash;
    279     size_t index;
    280     _Py_hashtable_entry_t *entry, *previous;
    281 
    282     assert(key_size == ht->key_size);
    283 
    284     key_hash = ht->hash_func(ht, pkey);
    285     index = key_hash & (ht->num_buckets - 1);
    286 
    287     previous = NULL;
    288     for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
    289         if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
    290             break;
    291         previous = entry;
    292     }
    293 
    294     if (entry == NULL)
    295         return 0;
    296 
    297     _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
    298                      (_Py_slist_item_t *)entry);
    299     ht->entries--;
    300 
    301     if (data != NULL)
    302         ENTRY_READ_PDATA(ht, entry, data_size, data);
    303     ht->alloc.free(entry);
    304 
    305     if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
    306         hashtable_rehash(ht);
    307     return 1;
    308 }
    309 
    310 
    311 int
    312 _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
    313                   size_t data_size, const void *data)
    314 {
    315     Py_uhash_t key_hash;
    316     size_t index;
    317     _Py_hashtable_entry_t *entry;
    318 
    319     assert(key_size == ht->key_size);
    320 
    321     assert(data != NULL || data_size == 0);
    322 #ifndef NDEBUG
    323     /* Don't write the assertion on a single line because it is interesting
    324        to know the duplicated entry if the assertion failed. The entry can
    325        be read using a debugger. */
    326     entry = _Py_hashtable_get_entry(ht, key_size, pkey);
    327     assert(entry == NULL);
    328 #endif
    329 
    330     key_hash = ht->hash_func(ht, pkey);
    331     index = key_hash & (ht->num_buckets - 1);
    332 
    333     entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
    334     if (entry == NULL) {
    335         /* memory allocation failed */
    336         return -1;
    337     }
    338 
    339     entry->key_hash = key_hash;
    340     memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size);
    341     if (data)
    342         ENTRY_WRITE_PDATA(ht, entry, data_size, data);
    343 
    344     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
    345     ht->entries++;
    346 
    347     if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
    348         hashtable_rehash(ht);
    349     return 0;
    350 }
    351 
    352 
    353 int
    354 _Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
    355                   size_t data_size, void *data)
    356 {
    357     _Py_hashtable_entry_t *entry;
    358 
    359     assert(data != NULL);
    360 
    361     entry = _Py_hashtable_get_entry(ht, key_size, pkey);
    362     if (entry == NULL)
    363         return 0;
    364     ENTRY_READ_PDATA(ht, entry, data_size, data);
    365     return 1;
    366 }
    367 
    368 
    369 int
    370 _Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
    371                   size_t data_size, void *data)
    372 {
    373     assert(data != NULL);
    374     return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
    375 }
    376 
    377 
    378 /* Code commented since the function is not needed in Python */
    379 #if 0
    380 void
    381 _Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
    382 {
    383 #ifndef NDEBUG
    384     int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
    385     assert(found);
    386 #else
    387     (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
    388 #endif
    389 }
    390 #endif
    391 
    392 
    393 int
    394 _Py_hashtable_foreach(_Py_hashtable_t *ht,
    395                       _Py_hashtable_foreach_func func,
    396                       void *arg)
    397 {
    398     _Py_hashtable_entry_t *entry;
    399     size_t hv;
    400 
    401     for (hv = 0; hv < ht->num_buckets; hv++) {
    402         for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
    403             int res = func(ht, entry, arg);
    404             if (res)
    405                 return res;
    406         }
    407     }
    408     return 0;
    409 }
    410 
    411 
    412 static void
    413 hashtable_rehash(_Py_hashtable_t *ht)
    414 {
    415     size_t buckets_size, new_size, bucket;
    416     _Py_slist_t *old_buckets = NULL;
    417     size_t old_num_buckets;
    418 
    419     new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
    420     if (new_size == ht->num_buckets)
    421         return;
    422 
    423     old_num_buckets = ht->num_buckets;
    424 
    425     buckets_size = new_size * sizeof(ht->buckets[0]);
    426     old_buckets = ht->buckets;
    427     ht->buckets = ht->alloc.malloc(buckets_size);
    428     if (ht->buckets == NULL) {
    429         /* cancel rehash on memory allocation failure */
    430         ht->buckets = old_buckets ;
    431         /* memory allocation failed */
    432         return;
    433     }
    434     memset(ht->buckets, 0, buckets_size);
    435 
    436     ht->num_buckets = new_size;
    437 
    438     for (bucket = 0; bucket < old_num_buckets; bucket++) {
    439         _Py_hashtable_entry_t *entry, *next;
    440         for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
    441             size_t entry_index;
    442 
    443 
    444             assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash);
    445             next = ENTRY_NEXT(entry);
    446             entry_index = entry->key_hash & (new_size - 1);
    447 
    448             _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
    449         }
    450     }
    451 
    452     ht->alloc.free(old_buckets);
    453 }
    454 
    455 
    456 void
    457 _Py_hashtable_clear(_Py_hashtable_t *ht)
    458 {
    459     _Py_hashtable_entry_t *entry, *next;
    460     size_t i;
    461 
    462     for (i=0; i < ht->num_buckets; i++) {
    463         for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
    464             next = ENTRY_NEXT(entry);
    465             ht->alloc.free(entry);
    466         }
    467         _Py_slist_init(&ht->buckets[i]);
    468     }
    469     ht->entries = 0;
    470     hashtable_rehash(ht);
    471 }
    472 
    473 
    474 void
    475 _Py_hashtable_destroy(_Py_hashtable_t *ht)
    476 {
    477     size_t i;
    478 
    479     for (i = 0; i < ht->num_buckets; i++) {
    480         _Py_slist_item_t *entry = ht->buckets[i].head;
    481         while (entry) {
    482             _Py_slist_item_t *entry_next = entry->next;
    483             ht->alloc.free(entry);
    484             entry = entry_next;
    485         }
    486     }
    487 
    488     ht->alloc.free(ht->buckets);
    489     ht->alloc.free(ht);
    490 }
    491 
    492 
    493 _Py_hashtable_t *
    494 _Py_hashtable_copy(_Py_hashtable_t *src)
    495 {
    496     const size_t key_size = src->key_size;
    497     const size_t data_size = src->data_size;
    498     _Py_hashtable_t *dst;
    499     _Py_hashtable_entry_t *entry;
    500     size_t bucket;
    501     int err;
    502 
    503     dst = _Py_hashtable_new_full(key_size, data_size,
    504                                  src->num_buckets,
    505                                  src->hash_func,
    506                                  src->compare_func,
    507                                  &src->alloc);
    508     if (dst == NULL)
    509         return NULL;
    510 
    511     for (bucket=0; bucket < src->num_buckets; bucket++) {
    512         entry = TABLE_HEAD(src, bucket);
    513         for (; entry; entry = ENTRY_NEXT(entry)) {
    514             const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry);
    515             const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry);
    516             err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata);
    517             if (err) {
    518                 _Py_hashtable_destroy(dst);
    519                 return NULL;
    520             }
    521         }
    522     }
    523     return dst;
    524 }
    525