Home | History | Annotate | Download | only in ltrace
      1 /*
      2  * This file is part of ltrace.
      3  * Copyright (C) 2012, 2013 Petr Machata, Red Hat Inc.
      4  *
      5  * This program is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU General Public License as
      7  * published by the Free Software Foundation; either version 2 of the
      8  * License, or (at your option) any later version.
      9  *
     10  * This program is distributed in the hope that it will be useful, but
     11  * WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU General Public License
     16  * along with this program; if not, write to the Free Software
     17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
     18  * 02110-1301 USA
     19  */
     20 
     21 #include <string.h>
     22 #include <stdlib.h>
     23 #include <stdio.h>
     24 #include "dict.h"
     25 
     26 struct status_bits {
     27 	unsigned char taken : 1;
     28 	unsigned char erased : 1;
     29 };
     30 
     31 static struct status_bits *
     32 bitp(struct dict *dict, size_t n)
     33 {
     34 	return VECT_ELEMENT(&dict->status, struct status_bits, n);
     35 }
     36 
     37 void
     38 dict_init(struct dict *dict,
     39 	  size_t key_size, size_t value_size,
     40 	  size_t (*hash1)(const void *),
     41 	  int (*eq)(const void *, const void *),
     42 	  size_t (*hash2)(size_t))
     43 {
     44 	assert(hash1 != NULL);
     45 	assert(eq != NULL);
     46 
     47 	vect_init(&dict->keys, key_size);
     48 	vect_init(&dict->values, value_size);
     49 	VECT_INIT(&dict->status, struct status_bits);
     50 	dict->size = 0;
     51 
     52 	dict->hash1 = hash1;
     53 	dict->hash2 = hash2;
     54 	dict->eq = eq;
     55 }
     56 
     57 struct clone_data {
     58 	struct dict *target;
     59 	int (*clone_key)(void *tgt, const void *src, void *data);
     60 	int (*clone_value)(void *tgt, const void *src, void *data);
     61 	void (*dtor_key)(void *tgt, void *data);
     62 	void (*dtor_value)(void *tgt, void *data);
     63 	void *data;
     64 };
     65 
     66 static enum callback_status
     67 clone_cb(void *key, void *value, void *data)
     68 {
     69 	struct clone_data *clone_data = data;
     70 
     71 	char nkey[clone_data->target->keys.elt_size];
     72 	if (clone_data->clone_key == NULL)
     73 		memmove(nkey, key, sizeof(nkey));
     74 	else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0)
     75 		return CBS_STOP;
     76 
     77 	char nvalue[clone_data->target->values.elt_size];
     78 	if (clone_data->clone_value == NULL) {
     79 		memmove(nvalue, value, sizeof(nvalue));
     80 	} else if (clone_data->clone_value(&nvalue, value,
     81 					 clone_data->data) < 0) {
     82 	fail:
     83 		if (clone_data->clone_key != NULL)
     84 			clone_data->dtor_key(&nkey, clone_data->data);
     85 		return CBS_STOP;
     86 	}
     87 
     88 	if (dict_insert(clone_data->target, nkey, nvalue) < 0) {
     89 		if (clone_data->clone_value != NULL)
     90 			clone_data->dtor_value(&nvalue, clone_data->data);
     91 		goto fail;
     92 	}
     93 
     94 	return CBS_CONT;
     95 }
     96 
     97 int
     98 dict_clone(struct dict *target, const struct dict *source,
     99 	   int (*clone_key)(void *tgt, const void *src, void *data),
    100 	   void (*dtor_key)(void *tgt, void *data),
    101 	   int (*clone_value)(void *tgt, const void *src, void *data),
    102 	   void (*dtor_value)(void *tgt, void *data),
    103 	   void *data)
    104 {
    105 	assert((clone_key != NULL) == (dtor_key != NULL));
    106 	assert((clone_value != NULL) == (dtor_value != NULL));
    107 
    108 	dict_init(target, source->keys.elt_size, source->values.elt_size,
    109 		  source->hash1, source->eq, source->hash2);
    110 	struct clone_data clone_data = {
    111 		target, clone_key, clone_value, dtor_key, dtor_value, data
    112 	};
    113 	if (dict_each((struct dict *)source, NULL,
    114 		      clone_cb, &clone_data) != NULL) {
    115 		dict_destroy(target, dtor_key, dtor_value, data);
    116 		return -1;
    117 	}
    118 	return 0;
    119 }
    120 
    121 size_t
    122 dict_size(const struct dict *dict)
    123 {
    124 	return dict->size;
    125 }
    126 
    127 int
    128 dict_empty(const struct dict *dict)
    129 {
    130 	return dict->size == 0;
    131 }
    132 
    133 struct destroy_data {
    134 	void (*dtor_key)(void *tgt, void *data);
    135 	void (*dtor_value)(void *tgt, void *data);
    136 	void *data;
    137 };
    138 
    139 static enum callback_status
    140 destroy_cb(void *key, void *value, void *data)
    141 {
    142 	struct destroy_data *destroy_data = data;
    143 	if (destroy_data->dtor_key)
    144 		destroy_data->dtor_key(key, destroy_data->data);
    145 	if (destroy_data->dtor_value)
    146 		destroy_data->dtor_value(value, destroy_data->data);
    147 	return CBS_CONT;
    148 }
    149 
    150 void
    151 dict_destroy(struct dict *dict,
    152 	     void (*dtor_key)(void *tgt, void *data),
    153 	     void (*dtor_value)(void *tgt, void *data),
    154 	     void *data)
    155 {
    156 	/* Some slots are unused (the corresponding keys and values
    157 	 * are uninitialized), so we can't call dtors for them.
    158 	 * Iterate DICT instead.  */
    159 	if (dtor_key != NULL || dtor_value != NULL) {
    160 		struct destroy_data destroy_data = {
    161 			dtor_key, dtor_value, data
    162 		};
    163 		dict_each(dict, NULL, destroy_cb, &destroy_data);
    164 	}
    165 
    166 	vect_destroy(&dict->keys, NULL, NULL);
    167 	vect_destroy(&dict->values, NULL, NULL);
    168 	vect_destroy(&dict->status, NULL, NULL);
    169 }
    170 
    171 static size_t
    172 default_secondary_hash(size_t pos)
    173 {
    174 	return pos % 97 + 1;
    175 }
    176 
    177 static size_t
    178 small_secondary_hash(size_t pos)
    179 {
    180 	return 1;
    181 }
    182 
    183 static inline size_t
    184 n(struct dict *dict)
    185 {
    186 	return vect_size(&dict->keys);
    187 }
    188 
    189 static inline size_t (*
    190 hash2(struct dict *dict))(size_t)
    191 {
    192 	if (dict->hash2 != NULL)
    193 		return dict->hash2;
    194 	else if (n(dict) < 100)
    195 		return small_secondary_hash;
    196 	else
    197 		return default_secondary_hash;
    198 }
    199 
    200 static void *
    201 getkey(struct dict *dict, size_t pos)
    202 {
    203 	return ((unsigned char *)dict->keys.data)
    204 		+ dict->keys.elt_size * pos;
    205 }
    206 
    207 static void *
    208 getvalue(struct dict *dict, size_t pos)
    209 {
    210 	return ((unsigned char *)dict->values.data)
    211 		+ dict->values.elt_size * pos;
    212 }
    213 
    214 static size_t
    215 find_slot(struct dict *dict, const void *key,
    216 	  int *foundp, int *should_rehash, size_t *pi)
    217 {
    218 	assert(n(dict) > 0);
    219 	size_t pos = dict->hash1(key) % n(dict);
    220 	size_t pos0 = -1;
    221 	size_t d = hash2(dict)(pos);
    222 	size_t i = 0;
    223 	*foundp = 0;
    224 
    225 	/* We skip over any taken or erased slots.  But we remember
    226 	 * the first erased that we find, and if we don't find the key
    227 	 * later, we return that position.  */
    228 	for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased;
    229 	     pos = (pos + d) % n(dict)) {
    230 		if (pos0 == (size_t)-1 && bitp(dict, pos)->erased)
    231 			pos0 = pos;
    232 
    233 		/* If there is a loop, but we've seen an erased
    234 		 * element, take that one.  Otherwise give up.  */
    235 		if (++i > dict->size) {
    236 			if (pos0 != (size_t)-1)
    237 				break;
    238 			return (size_t)-1;
    239 		}
    240 
    241 		if (bitp(dict, pos)->taken
    242 		    && dict->eq(getkey(dict, pos), key)) {
    243 			*foundp = 1;
    244 			break;
    245 		}
    246 	}
    247 
    248 	if (!*foundp && pos0 != (size_t)-1)
    249 		pos = pos0;
    250 
    251 	/* If the hash table degraded into a linked list, request a
    252 	 * rehash.  */
    253 	if (should_rehash != NULL)
    254 		*should_rehash = i > 10 && i > n(dict) / 10;
    255 
    256 	if (pi != NULL)
    257 		*pi = i;
    258 	return pos;
    259 }
    260 
    261 static enum callback_status
    262 rehash_move(void *key, void *value, void *data)
    263 {
    264 	if (dict_insert(data, key, value) < 0)
    265 		return CBS_STOP;
    266 	else
    267 		return CBS_CONT;
    268 }
    269 
    270 static int
    271 rehash(struct dict *dict, size_t nn)
    272 {
    273 	assert(nn != n(dict));
    274 	int ret = -1;
    275 
    276 	struct dict tmp;
    277 	dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size,
    278 		  dict->hash1, dict->eq, dict->hash2);
    279 
    280 	/* To honor all invariants (so that we can safely call
    281 	 * dict_destroy), we first make a request to _reserve_ enough
    282 	 * room in all vectors.  This has no observable effect on
    283 	 * contents of vectors.  */
    284 	if (vect_reserve(&tmp.keys, nn) < 0
    285 	    || vect_reserve(&tmp.values, nn) < 0
    286 	    || vect_reserve(&tmp.status, nn) < 0)
    287 		goto done;
    288 
    289 	/* Now that we know that there is enough size in vectors, we
    290 	 * simply bump the size.  */
    291 	tmp.keys.size = nn;
    292 	tmp.values.size = nn;
    293 	size_t old_size = tmp.status.size;
    294 	tmp.status.size = nn;
    295 	memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size),
    296 	       0, (tmp.status.size - old_size) * tmp.status.elt_size);
    297 
    298 	/* At this point, TMP is once more an empty dictionary with NN
    299 	 * slots.  Now move stuff from DICT to TMP.  */
    300 	if (dict_each(dict, NULL, rehash_move, &tmp) != NULL)
    301 		goto done;
    302 
    303 	/* And now swap contents of DICT and TMP, and we are done.  */
    304 	{
    305 		struct dict tmp2 = *dict;
    306 		*dict = tmp;
    307 		tmp = tmp2;
    308 	}
    309 
    310 	ret = 0;
    311 
    312 done:
    313 	/* We only want to release the containers, not the actual data
    314 	 * that they hold, so it's fine if we don't pass any dtor.  */
    315 	dict_destroy(&tmp, NULL, NULL, NULL);
    316 	return ret;
    317 
    318 }
    319 
    320 static const size_t primes[] = {
    321 	13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
    322 	8191, 16381, 32749, 65521, 130981, 0
    323 };
    324 
    325 static size_t
    326 larger_size(size_t current)
    327 {
    328 	if (current == 0)
    329 		return primes[0];
    330 
    331 	if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) {
    332 		size_t i;
    333 		for (i = 0; primes[i] != 0; ++i)
    334 			if (primes[i] > current)
    335 				return primes[i];
    336 		abort();
    337 	}
    338 
    339 	/* We ran out of primes, so invent a new one.  The following
    340 	 * gives primes until about 17M elements (and then some more
    341 	 * later).  */
    342 	return 2 * current + 6585;
    343 }
    344 
    345 static size_t
    346 smaller_size(size_t current)
    347 {
    348 	if (current <= primes[0])
    349 		return primes[0];
    350 
    351 	if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) {
    352 		size_t i;
    353 		size_t prev = 0;
    354 		for (i = 0; primes[i] != 0; ++i) {
    355 			if (primes[i] >= current)
    356 				return prev;
    357 			prev = primes[i];
    358 		}
    359 		abort();
    360 	}
    361 
    362 	return (current - 6585) / 2;
    363 }
    364 
    365 int
    366 dict_insert(struct dict *dict, void *key, void *value)
    367 {
    368 	if (n(dict) == 0 || dict->size > 0.7 * n(dict))
    369 	rehash:
    370 		if (rehash(dict, larger_size(n(dict))) < 0)
    371 			return -1;
    372 
    373 	int found;
    374 	int should_rehash;
    375 	size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL);
    376 	if (slot_n == (size_t)-1)
    377 		return -1;
    378 	if (found)
    379 		return 1;
    380 	assert(!bitp(dict, slot_n)->taken);
    381 
    382 	/* If rehash was requested, do that, and retry.  But just live
    383 	 * with it for apparently sparse tables.  No resizing can fix
    384 	 * a rubbish hash.  */
    385 	if (should_rehash && dict->size > 0.3 * n(dict))
    386 		goto rehash;
    387 
    388 	memmove(getkey(dict, slot_n), key, dict->keys.elt_size);
    389 	memmove(getvalue(dict, slot_n), value, dict->values.elt_size);
    390 
    391 	bitp(dict, slot_n)->taken = 1;
    392 	bitp(dict, slot_n)->erased = 0;
    393 	++dict->size;
    394 
    395 	return 0;
    396 }
    397 
    398 void *
    399 dict_find(struct dict *dict, const void *key)
    400 {
    401 	if (dict->size == 0)
    402 		return NULL;
    403 	assert(n(dict) > 0);
    404 
    405 	int found;
    406 	size_t slot_n = find_slot(dict, key, &found, NULL, NULL);
    407 	if (found)
    408 		return getvalue(dict, slot_n);
    409 	else
    410 		return NULL;
    411 }
    412 
    413 int
    414 dict_erase(struct dict *dict, const void *key,
    415 	   void (*dtor_key)(void *tgt, void *data),
    416 	   void (*dtor_value)(void *tgt, void *data),
    417 	   void *data)
    418 {
    419 	int found;
    420 	size_t i;
    421 	size_t slot_n = find_slot(dict, key, &found, NULL, &i);
    422 	if (!found)
    423 		return -1;
    424 
    425 	if (dtor_key != NULL)
    426 		dtor_key(getkey(dict, slot_n), data);
    427 	if (dtor_value != NULL)
    428 		dtor_value(getvalue(dict, slot_n), data);
    429 
    430 	bitp(dict, slot_n)->taken = 0;
    431 	bitp(dict, slot_n)->erased = 1;
    432 	--dict->size;
    433 
    434 	if (dict->size < 0.3 * n(dict)) {
    435 		size_t smaller = smaller_size(n(dict));
    436 		if (smaller != n(dict))
    437 			/* Don't mind if it fails when shrinking.  */
    438 			rehash(dict, smaller);
    439 	}
    440 
    441 	return 0;
    442 }
    443 
    444 void *
    445 dict_each(struct dict *dict, void *start_after,
    446 	  enum callback_status (*cb)(void *, void *, void *), void *data)
    447 {
    448 	size_t i;
    449 	if (start_after != NULL)
    450 		i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1;
    451 	else
    452 		i = 0;
    453 
    454 	for (; i < dict->keys.size; ++i)
    455 		if (bitp(dict, i)->taken && !bitp(dict, i)->erased) {
    456 			void *key = getkey(dict, i);
    457 			if (cb(key, getvalue(dict, i), data) != CBS_CONT)
    458 				return key;
    459 		}
    460 
    461 	return NULL;
    462 }
    463 
    464 size_t
    465 dict_hash_int(const int *key)
    466 {
    467 	return (size_t)(*key * 2654435761U);
    468 }
    469 
    470 int
    471 dict_eq_int(const int *key1, const int *key2)
    472 {
    473 	return *key1 == *key2;
    474 }
    475 
    476 size_t
    477 dict_hash_string(const char **key)
    478 {
    479 	size_t h = 5381;
    480 	const char *str = *key;
    481 	while (*str != 0)
    482 		h = h * 33 ^ *str++;
    483 	return h;
    484 }
    485 
    486 int
    487 dict_eq_string(const char **key1, const char **key2)
    488 {
    489 	return strcmp(*key1, *key2) == 0;
    490 }
    491 
    492 void
    493 dict_dtor_string(const char **key, void *data)
    494 {
    495 	free((char *)*key);
    496 }
    497 
    498 int
    499 dict_clone_string(const char **tgt, const char **src, void *data)
    500 {
    501 	*tgt = strdup(*src);
    502 	return *tgt != NULL ? 0 : -1;
    503 }
    504 
    505 #ifdef TEST
    506 static enum callback_status
    507 dump(int *key, int *value, void *data)
    508 {
    509 	char *seen = data;
    510 	assert(seen[*key] == 0);
    511 	seen[*key] = 1;
    512 	assert(*value == *key * 2 + 1);
    513 	return CBS_STOP;
    514 }
    515 
    516 static size_t
    517 dict_hash_int_silly(const int *key)
    518 {
    519 	return *key % 10;
    520 }
    521 
    522 static void
    523 verify(struct dict *di, size_t len, char *seen)
    524 {
    525 	size_t ct = 0;
    526 	int *it;
    527 	for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;)
    528 		ct++;
    529 	assert(ct == len);
    530 	memset(seen, 0, len);
    531 }
    532 
    533 static enum callback_status
    534 fill_keys(int *key, int *value, void *data)
    535 {
    536 	int *array = data;
    537 	array[++array[0]] = *key;
    538 	return CBS_CONT;
    539 }
    540 
    541 static void
    542 test1(void)
    543 {
    544 	struct dict di;
    545 	DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL);
    546 
    547 	char seen[100000] = {};
    548 	size_t i;
    549 	for (i = 0; i < sizeof(seen); ++i) {
    550 		int key = i;
    551 		int value = 2 * i + 1;
    552 		DICT_INSERT(&di, &key, &value);
    553 		int *valp = DICT_FIND_REF(&di, &key, int);
    554 		assert(valp != NULL);
    555 		assert(*valp == value);
    556 		assert(dict_size(&di) == i + 1);
    557 	}
    558 
    559 	verify(&di, sizeof(seen), seen);
    560 
    561 	struct dict d2;
    562 	DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL);
    563 	DICT_DESTROY(&di, int, int, NULL, NULL, NULL);
    564 	verify(&d2, sizeof(seen), seen);
    565 
    566 	/* Now we try to gradually erase all elements.  We can't erase
    567 	 * inside a DICT_EACH call, so copy first keys to a separate
    568 	 * memory area first.  */
    569 	int keys[d2.size + 1];
    570 	size_t ct = 0;
    571 	keys[0] = 0;
    572 	DICT_EACH(&d2, int, int, NULL, fill_keys, keys);
    573 	for (i = 0; i < (size_t)keys[0]; ++i) {
    574 		assert(DICT_ERASE(&d2, &keys[i + 1], int,
    575 				  NULL, NULL, NULL) == 0);
    576 		++ct;
    577 	}
    578 	assert(ct == sizeof(seen));
    579 	DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
    580 }
    581 
    582 static void
    583 test_erase(void)
    584 {
    585 	int i;
    586 
    587 	/* To test erase, we need a relatively bad hash function, so
    588 	 * that there are some overlapping chains in the table.  */
    589 	struct dict d2;
    590 	DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL);
    591 	const int limit = 500;
    592 	for (i = 0; i < limit; ++i) {
    593 		int key = 2 * i + 1;
    594 		int value = 2 * key + 1;
    595 		DICT_INSERT(&d2, &key, &value);
    596 	}
    597 
    598 	/* Now we try to delete each of the keys, and verify that none
    599 	 * of the chains was broken.  */
    600 	for (i = 0; i < limit; ++i) {
    601 		struct dict copy;
    602 		DICT_CLONE(&copy, &d2, int, int, NULL, NULL, NULL, NULL, NULL);
    603 		int key = 2 * i + 1;
    604 		DICT_ERASE(&copy, &key, int, NULL, NULL, NULL);
    605 		assert(dict_size(&copy) == dict_size(&d2) - 1);
    606 
    607 		int j;
    608 		for (j = 0; j < limit; ++j) {
    609 			key = 2 * j + 1;
    610 			int *valp = DICT_FIND_REF(&copy, &key, int);
    611 			if (i != j) {
    612 				assert(valp != NULL);
    613 				assert(*valp == 2 * key + 1);
    614 			} else {
    615 				assert(valp == NULL);
    616 			}
    617 		}
    618 
    619 		DICT_DESTROY(&copy, int, int, NULL, NULL, NULL);
    620 	}
    621 	DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
    622 }
    623 
    624 int main(int argc, char *argv[])
    625 {
    626 	test1();
    627 	test_erase();
    628 	return 0;
    629 }
    630 
    631 #endif
    632