1 /* 2 * This file is part of ltrace. 3 * Copyright (C) 2012, 2013 Petr Machata, Red Hat Inc. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 * 02110-1301 USA 19 */ 20 21 #include <string.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include "dict.h" 25 26 struct status_bits { 27 unsigned char taken : 1; 28 unsigned char erased : 1; 29 }; 30 31 static struct status_bits * 32 bitp(struct dict *dict, size_t n) 33 { 34 return VECT_ELEMENT(&dict->status, struct status_bits, n); 35 } 36 37 void 38 dict_init(struct dict *dict, 39 size_t key_size, size_t value_size, 40 size_t (*hash1)(const void *), 41 int (*eq)(const void *, const void *), 42 size_t (*hash2)(size_t)) 43 { 44 assert(hash1 != NULL); 45 assert(eq != NULL); 46 47 vect_init(&dict->keys, key_size); 48 vect_init(&dict->values, value_size); 49 VECT_INIT(&dict->status, struct status_bits); 50 dict->size = 0; 51 52 dict->hash1 = hash1; 53 dict->hash2 = hash2; 54 dict->eq = eq; 55 } 56 57 struct clone_data { 58 struct dict *target; 59 int (*clone_key)(void *tgt, const void *src, void *data); 60 int (*clone_value)(void *tgt, const void *src, void *data); 61 void (*dtor_key)(void *tgt, void *data); 62 void (*dtor_value)(void *tgt, void *data); 63 void *data; 64 }; 65 66 static enum callback_status 67 clone_cb(void *key, void *value, void *data) 68 { 69 struct clone_data *clone_data = data; 70 71 char nkey[clone_data->target->keys.elt_size]; 72 if (clone_data->clone_key == NULL) 73 memmove(nkey, key, sizeof(nkey)); 74 else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0) 75 return CBS_STOP; 76 77 char nvalue[clone_data->target->values.elt_size]; 78 if (clone_data->clone_value == NULL) { 79 memmove(nvalue, value, sizeof(nvalue)); 80 } else if (clone_data->clone_value(&nvalue, value, 81 clone_data->data) < 0) { 82 fail: 83 if (clone_data->clone_key != NULL) 84 clone_data->dtor_key(&nkey, clone_data->data); 85 return CBS_STOP; 86 } 87 88 if (dict_insert(clone_data->target, nkey, nvalue) < 0) { 89 if (clone_data->clone_value != NULL) 90 clone_data->dtor_value(&nvalue, clone_data->data); 91 goto fail; 92 } 93 94 return CBS_CONT; 95 } 96 97 int 98 dict_clone(struct dict *target, const struct dict *source, 99 int (*clone_key)(void *tgt, const void *src, void *data), 100 void (*dtor_key)(void *tgt, void *data), 101 int (*clone_value)(void *tgt, const void *src, void *data), 102 void (*dtor_value)(void *tgt, void *data), 103 void *data) 104 { 105 assert((clone_key != NULL) == (dtor_key != NULL)); 106 assert((clone_value != NULL) == (dtor_value != NULL)); 107 108 dict_init(target, source->keys.elt_size, source->values.elt_size, 109 source->hash1, source->eq, source->hash2); 110 struct clone_data clone_data = { 111 target, clone_key, clone_value, dtor_key, dtor_value, data 112 }; 113 if (dict_each((struct dict *)source, NULL, 114 clone_cb, &clone_data) != NULL) { 115 dict_destroy(target, dtor_key, dtor_value, data); 116 return -1; 117 } 118 return 0; 119 } 120 121 size_t 122 dict_size(const struct dict *dict) 123 { 124 return dict->size; 125 } 126 127 int 128 dict_empty(const struct dict *dict) 129 { 130 return dict->size == 0; 131 } 132 133 struct destroy_data { 134 void (*dtor_key)(void *tgt, void *data); 135 void (*dtor_value)(void *tgt, void *data); 136 void *data; 137 }; 138 139 static enum callback_status 140 destroy_cb(void *key, void *value, void *data) 141 { 142 struct destroy_data *destroy_data = data; 143 if (destroy_data->dtor_key) 144 destroy_data->dtor_key(key, destroy_data->data); 145 if (destroy_data->dtor_value) 146 destroy_data->dtor_value(value, destroy_data->data); 147 return CBS_CONT; 148 } 149 150 void 151 dict_destroy(struct dict *dict, 152 void (*dtor_key)(void *tgt, void *data), 153 void (*dtor_value)(void *tgt, void *data), 154 void *data) 155 { 156 /* Some slots are unused (the corresponding keys and values 157 * are uninitialized), so we can't call dtors for them. 158 * Iterate DICT instead. */ 159 if (dtor_key != NULL || dtor_value != NULL) { 160 struct destroy_data destroy_data = { 161 dtor_key, dtor_value, data 162 }; 163 dict_each(dict, NULL, destroy_cb, &destroy_data); 164 } 165 166 vect_destroy(&dict->keys, NULL, NULL); 167 vect_destroy(&dict->values, NULL, NULL); 168 vect_destroy(&dict->status, NULL, NULL); 169 } 170 171 static size_t 172 default_secondary_hash(size_t pos) 173 { 174 return pos % 97 + 1; 175 } 176 177 static size_t 178 small_secondary_hash(size_t pos) 179 { 180 return 1; 181 } 182 183 static inline size_t 184 n(struct dict *dict) 185 { 186 return vect_size(&dict->keys); 187 } 188 189 static inline size_t (* 190 hash2(struct dict *dict))(size_t) 191 { 192 if (dict->hash2 != NULL) 193 return dict->hash2; 194 else if (n(dict) < 100) 195 return small_secondary_hash; 196 else 197 return default_secondary_hash; 198 } 199 200 static void * 201 getkey(struct dict *dict, size_t pos) 202 { 203 return ((unsigned char *)dict->keys.data) 204 + dict->keys.elt_size * pos; 205 } 206 207 static void * 208 getvalue(struct dict *dict, size_t pos) 209 { 210 return ((unsigned char *)dict->values.data) 211 + dict->values.elt_size * pos; 212 } 213 214 static size_t 215 find_slot(struct dict *dict, const void *key, 216 int *foundp, int *should_rehash, size_t *pi) 217 { 218 assert(n(dict) > 0); 219 size_t pos = dict->hash1(key) % n(dict); 220 size_t pos0 = -1; 221 size_t d = hash2(dict)(pos); 222 size_t i = 0; 223 *foundp = 0; 224 225 /* We skip over any taken or erased slots. But we remember 226 * the first erased that we find, and if we don't find the key 227 * later, we return that position. */ 228 for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased; 229 pos = (pos + d) % n(dict)) { 230 if (pos0 == (size_t)-1 && bitp(dict, pos)->erased) 231 pos0 = pos; 232 233 /* If there is a loop, but we've seen an erased 234 * element, take that one. Otherwise give up. */ 235 if (++i > dict->size) { 236 if (pos0 != (size_t)-1) 237 break; 238 return (size_t)-1; 239 } 240 241 if (bitp(dict, pos)->taken 242 && dict->eq(getkey(dict, pos), key)) { 243 *foundp = 1; 244 break; 245 } 246 } 247 248 if (!*foundp && pos0 != (size_t)-1) 249 pos = pos0; 250 251 /* If the hash table degraded into a linked list, request a 252 * rehash. */ 253 if (should_rehash != NULL) 254 *should_rehash = i > 10 && i > n(dict) / 10; 255 256 if (pi != NULL) 257 *pi = i; 258 return pos; 259 } 260 261 static enum callback_status 262 rehash_move(void *key, void *value, void *data) 263 { 264 if (dict_insert(data, key, value) < 0) 265 return CBS_STOP; 266 else 267 return CBS_CONT; 268 } 269 270 static int 271 rehash(struct dict *dict, size_t nn) 272 { 273 assert(nn != n(dict)); 274 int ret = -1; 275 276 struct dict tmp; 277 dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size, 278 dict->hash1, dict->eq, dict->hash2); 279 280 /* To honor all invariants (so that we can safely call 281 * dict_destroy), we first make a request to _reserve_ enough 282 * room in all vectors. This has no observable effect on 283 * contents of vectors. */ 284 if (vect_reserve(&tmp.keys, nn) < 0 285 || vect_reserve(&tmp.values, nn) < 0 286 || vect_reserve(&tmp.status, nn) < 0) 287 goto done; 288 289 /* Now that we know that there is enough size in vectors, we 290 * simply bump the size. */ 291 tmp.keys.size = nn; 292 tmp.values.size = nn; 293 size_t old_size = tmp.status.size; 294 tmp.status.size = nn; 295 memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size), 296 0, (tmp.status.size - old_size) * tmp.status.elt_size); 297 298 /* At this point, TMP is once more an empty dictionary with NN 299 * slots. Now move stuff from DICT to TMP. */ 300 if (dict_each(dict, NULL, rehash_move, &tmp) != NULL) 301 goto done; 302 303 /* And now swap contents of DICT and TMP, and we are done. */ 304 { 305 struct dict tmp2 = *dict; 306 *dict = tmp; 307 tmp = tmp2; 308 } 309 310 ret = 0; 311 312 done: 313 /* We only want to release the containers, not the actual data 314 * that they hold, so it's fine if we don't pass any dtor. */ 315 dict_destroy(&tmp, NULL, NULL, NULL); 316 return ret; 317 318 } 319 320 static const size_t primes[] = { 321 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 322 8191, 16381, 32749, 65521, 130981, 0 323 }; 324 325 static size_t 326 larger_size(size_t current) 327 { 328 if (current == 0) 329 return primes[0]; 330 331 if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) { 332 size_t i; 333 for (i = 0; primes[i] != 0; ++i) 334 if (primes[i] > current) 335 return primes[i]; 336 abort(); 337 } 338 339 /* We ran out of primes, so invent a new one. The following 340 * gives primes until about 17M elements (and then some more 341 * later). */ 342 return 2 * current + 6585; 343 } 344 345 static size_t 346 smaller_size(size_t current) 347 { 348 if (current <= primes[0]) 349 return primes[0]; 350 351 if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) { 352 size_t i; 353 size_t prev = 0; 354 for (i = 0; primes[i] != 0; ++i) { 355 if (primes[i] >= current) 356 return prev; 357 prev = primes[i]; 358 } 359 abort(); 360 } 361 362 return (current - 6585) / 2; 363 } 364 365 int 366 dict_insert(struct dict *dict, void *key, void *value) 367 { 368 if (n(dict) == 0 || dict->size > 0.7 * n(dict)) 369 rehash: 370 if (rehash(dict, larger_size(n(dict))) < 0) 371 return -1; 372 373 int found; 374 int should_rehash; 375 size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL); 376 if (slot_n == (size_t)-1) 377 return -1; 378 if (found) 379 return 1; 380 assert(!bitp(dict, slot_n)->taken); 381 382 /* If rehash was requested, do that, and retry. But just live 383 * with it for apparently sparse tables. No resizing can fix 384 * a rubbish hash. */ 385 if (should_rehash && dict->size > 0.3 * n(dict)) 386 goto rehash; 387 388 memmove(getkey(dict, slot_n), key, dict->keys.elt_size); 389 memmove(getvalue(dict, slot_n), value, dict->values.elt_size); 390 391 bitp(dict, slot_n)->taken = 1; 392 bitp(dict, slot_n)->erased = 0; 393 ++dict->size; 394 395 return 0; 396 } 397 398 void * 399 dict_find(struct dict *dict, const void *key) 400 { 401 if (dict->size == 0) 402 return NULL; 403 assert(n(dict) > 0); 404 405 int found; 406 size_t slot_n = find_slot(dict, key, &found, NULL, NULL); 407 if (found) 408 return getvalue(dict, slot_n); 409 else 410 return NULL; 411 } 412 413 int 414 dict_erase(struct dict *dict, const void *key, 415 void (*dtor_key)(void *tgt, void *data), 416 void (*dtor_value)(void *tgt, void *data), 417 void *data) 418 { 419 int found; 420 size_t i; 421 size_t slot_n = find_slot(dict, key, &found, NULL, &i); 422 if (!found) 423 return -1; 424 425 if (dtor_key != NULL) 426 dtor_key(getkey(dict, slot_n), data); 427 if (dtor_value != NULL) 428 dtor_value(getvalue(dict, slot_n), data); 429 430 bitp(dict, slot_n)->taken = 0; 431 bitp(dict, slot_n)->erased = 1; 432 --dict->size; 433 434 if (dict->size < 0.3 * n(dict)) { 435 size_t smaller = smaller_size(n(dict)); 436 if (smaller != n(dict)) 437 /* Don't mind if it fails when shrinking. */ 438 rehash(dict, smaller); 439 } 440 441 return 0; 442 } 443 444 void * 445 dict_each(struct dict *dict, void *start_after, 446 enum callback_status (*cb)(void *, void *, void *), void *data) 447 { 448 size_t i; 449 if (start_after != NULL) 450 i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1; 451 else 452 i = 0; 453 454 for (; i < dict->keys.size; ++i) 455 if (bitp(dict, i)->taken && !bitp(dict, i)->erased) { 456 void *key = getkey(dict, i); 457 if (cb(key, getvalue(dict, i), data) != CBS_CONT) 458 return key; 459 } 460 461 return NULL; 462 } 463 464 size_t 465 dict_hash_int(const int *key) 466 { 467 return (size_t)(*key * 2654435761U); 468 } 469 470 int 471 dict_eq_int(const int *key1, const int *key2) 472 { 473 return *key1 == *key2; 474 } 475 476 size_t 477 dict_hash_string(const char **key) 478 { 479 size_t h = 5381; 480 const char *str = *key; 481 while (*str != 0) 482 h = h * 33 ^ *str++; 483 return h; 484 } 485 486 int 487 dict_eq_string(const char **key1, const char **key2) 488 { 489 return strcmp(*key1, *key2) == 0; 490 } 491 492 void 493 dict_dtor_string(const char **key, void *data) 494 { 495 free((char *)*key); 496 } 497 498 int 499 dict_clone_string(const char **tgt, const char **src, void *data) 500 { 501 *tgt = strdup(*src); 502 return *tgt != NULL ? 0 : -1; 503 } 504 505 #ifdef TEST 506 static enum callback_status 507 dump(int *key, int *value, void *data) 508 { 509 char *seen = data; 510 assert(seen[*key] == 0); 511 seen[*key] = 1; 512 assert(*value == *key * 2 + 1); 513 return CBS_STOP; 514 } 515 516 static size_t 517 dict_hash_int_silly(const int *key) 518 { 519 return *key % 10; 520 } 521 522 static void 523 verify(struct dict *di, size_t len, char *seen) 524 { 525 size_t ct = 0; 526 int *it; 527 for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;) 528 ct++; 529 assert(ct == len); 530 memset(seen, 0, len); 531 } 532 533 static enum callback_status 534 fill_keys(int *key, int *value, void *data) 535 { 536 int *array = data; 537 array[++array[0]] = *key; 538 return CBS_CONT; 539 } 540 541 static void 542 test1(void) 543 { 544 struct dict di; 545 DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL); 546 547 char seen[100000] = {}; 548 size_t i; 549 for (i = 0; i < sizeof(seen); ++i) { 550 int key = i; 551 int value = 2 * i + 1; 552 DICT_INSERT(&di, &key, &value); 553 int *valp = DICT_FIND_REF(&di, &key, int); 554 assert(valp != NULL); 555 assert(*valp == value); 556 assert(dict_size(&di) == i + 1); 557 } 558 559 verify(&di, sizeof(seen), seen); 560 561 struct dict d2; 562 DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL); 563 DICT_DESTROY(&di, int, int, NULL, NULL, NULL); 564 verify(&d2, sizeof(seen), seen); 565 566 /* Now we try to gradually erase all elements. We can't erase 567 * inside a DICT_EACH call, so copy first keys to a separate 568 * memory area first. */ 569 int keys[d2.size + 1]; 570 size_t ct = 0; 571 keys[0] = 0; 572 DICT_EACH(&d2, int, int, NULL, fill_keys, keys); 573 for (i = 0; i < (size_t)keys[0]; ++i) { 574 assert(DICT_ERASE(&d2, &keys[i + 1], int, 575 NULL, NULL, NULL) == 0); 576 ++ct; 577 } 578 assert(ct == sizeof(seen)); 579 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL); 580 } 581 582 static void 583 test_erase(void) 584 { 585 int i; 586 587 /* To test erase, we need a relatively bad hash function, so 588 * that there are some overlapping chains in the table. */ 589 struct dict d2; 590 DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL); 591 const int limit = 500; 592 for (i = 0; i < limit; ++i) { 593 int key = 2 * i + 1; 594 int value = 2 * key + 1; 595 DICT_INSERT(&d2, &key, &value); 596 } 597 598 /* Now we try to delete each of the keys, and verify that none 599 * of the chains was broken. */ 600 for (i = 0; i < limit; ++i) { 601 struct dict copy; 602 DICT_CLONE(©, &d2, int, int, NULL, NULL, NULL, NULL, NULL); 603 int key = 2 * i + 1; 604 DICT_ERASE(©, &key, int, NULL, NULL, NULL); 605 assert(dict_size(©) == dict_size(&d2) - 1); 606 607 int j; 608 for (j = 0; j < limit; ++j) { 609 key = 2 * j + 1; 610 int *valp = DICT_FIND_REF(©, &key, int); 611 if (i != j) { 612 assert(valp != NULL); 613 assert(*valp == 2 * key + 1); 614 } else { 615 assert(valp == NULL); 616 } 617 } 618 619 DICT_DESTROY(©, int, int, NULL, NULL, NULL); 620 } 621 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL); 622 } 623 624 int main(int argc, char *argv[]) 625 { 626 test1(); 627 test_erase(); 628 return 0; 629 } 630 631 #endif 632