1 /* 2 * $Id: linkhash.c,v 1.4 2006/01/26 02:16:28 mclark Exp $ 3 * 4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. 5 * Michael Clark <michael (at) metaparadigm.com> 6 * Copyright (c) 2009 Hewlett-Packard Development Company, L.P. 7 * 8 * This library is free software; you can redistribute it and/or modify 9 * it under the terms of the MIT license. See COPYING for details. 10 * 11 */ 12 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <stdarg.h> 17 #include <stddef.h> 18 #include <limits.h> 19 20 #ifdef HAVE_ENDIAN_H 21 # include <endian.h> /* attempt to define endianness */ 22 #endif 23 24 #include "random_seed.h" 25 #include "linkhash.h" 26 27 void lh_abort(const char *msg, ...) 28 { 29 va_list ap; 30 va_start(ap, msg); 31 vprintf(msg, ap); 32 va_end(ap); 33 exit(1); 34 } 35 36 unsigned long lh_ptr_hash(const void *k) 37 { 38 /* CAW: refactored to be 64bit nice */ 39 return (unsigned long)((((ptrdiff_t)k * LH_PRIME) >> 4) & ULONG_MAX); 40 } 41 42 int lh_ptr_equal(const void *k1, const void *k2) 43 { 44 return (k1 == k2); 45 } 46 47 /* 48 * hashlittle from lookup3.c, by Bob Jenkins, May 2006, Public Domain. 49 * http://burtleburtle.net/bob/c/lookup3.c 50 * minor modifications to make functions static so no symbols are exported 51 * minor mofifications to compile with -Werror 52 */ 53 54 /* 55 ------------------------------------------------------------------------------- 56 lookup3.c, by Bob Jenkins, May 2006, Public Domain. 57 58 These are functions for producing 32-bit hashes for hash table lookup. 59 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 60 are externally useful functions. Routines to test the hash are included 61 if SELF_TEST is defined. You can use this free for any purpose. It's in 62 the public domain. It has no warranty. 63 64 You probably want to use hashlittle(). hashlittle() and hashbig() 65 hash byte arrays. hashlittle() is is faster than hashbig() on 66 little-endian machines. Intel and AMD are little-endian machines. 67 On second thought, you probably want hashlittle2(), which is identical to 68 hashlittle() except it returns two 32-bit hashes for the price of one. 69 You could implement hashbig2() if you wanted but I haven't bothered here. 70 71 If you want to find a hash of, say, exactly 7 integers, do 72 a = i1; b = i2; c = i3; 73 mix(a,b,c); 74 a += i4; b += i5; c += i6; 75 mix(a,b,c); 76 a += i7; 77 final(a,b,c); 78 then use c as the hash value. If you have a variable length array of 79 4-byte integers to hash, use hashword(). If you have a byte array (like 80 a character string), use hashlittle(). If you have several byte arrays, or 81 a mix of things, see the comments above hashlittle(). 82 83 Why is this so big? I read 12 bytes at a time into 3 4-byte integers, 84 then mix those integers. This is fast (you can do a lot more thorough 85 mixing with 12*3 instructions on 3 integers than you can with 3 instructions 86 on 1 byte), but shoehorning those bytes into integers efficiently is messy. 87 ------------------------------------------------------------------------------- 88 */ 89 90 /* 91 * My best guess at if you are big-endian or little-endian. This may 92 * need adjustment. 93 */ 94 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ 95 __BYTE_ORDER == __LITTLE_ENDIAN) || \ 96 (defined(i386) || defined(__i386__) || defined(__i486__) || \ 97 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) 98 # define HASH_LITTLE_ENDIAN 1 99 # define HASH_BIG_ENDIAN 0 100 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ 101 __BYTE_ORDER == __BIG_ENDIAN) || \ 102 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) 103 # define HASH_LITTLE_ENDIAN 0 104 # define HASH_BIG_ENDIAN 1 105 #else 106 # define HASH_LITTLE_ENDIAN 0 107 # define HASH_BIG_ENDIAN 0 108 #endif 109 110 #define hashsize(n) ((uint32_t)1<<(n)) 111 #define hashmask(n) (hashsize(n)-1) 112 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 113 114 /* 115 ------------------------------------------------------------------------------- 116 mix -- mix 3 32-bit values reversibly. 117 118 This is reversible, so any information in (a,b,c) before mix() is 119 still in (a,b,c) after mix(). 120 121 If four pairs of (a,b,c) inputs are run through mix(), or through 122 mix() in reverse, there are at least 32 bits of the output that 123 are sometimes the same for one pair and different for another pair. 124 This was tested for: 125 * pairs that differed by one bit, by two bits, in any combination 126 of top bits of (a,b,c), or in any combination of bottom bits of 127 (a,b,c). 128 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 129 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 130 is commonly produced by subtraction) look like a single 1-bit 131 difference. 132 * the base values were pseudorandom, all zero but one bit set, or 133 all zero plus a counter that starts at zero. 134 135 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 136 satisfy this are 137 4 6 8 16 19 4 138 9 15 3 18 27 15 139 14 9 3 7 17 3 140 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 141 for "differ" defined as + with a one-bit base and a two-bit delta. I 142 used http://burtleburtle.net/bob/hash/avalanche.html to choose 143 the operations, constants, and arrangements of the variables. 144 145 This does not achieve avalanche. There are input bits of (a,b,c) 146 that fail to affect some output bits of (a,b,c), especially of a. The 147 most thoroughly mixed value is c, but it doesn't really even achieve 148 avalanche in c. 149 150 This allows some parallelism. Read-after-writes are good at doubling 151 the number of bits affected, so the goal of mixing pulls in the opposite 152 direction as the goal of parallelism. I did what I could. Rotates 153 seem to cost as much as shifts on every machine I could lay my hands 154 on, and rotates are much kinder to the top and bottom bits, so I used 155 rotates. 156 ------------------------------------------------------------------------------- 157 */ 158 #define mix(a,b,c) \ 159 { \ 160 a -= c; a ^= rot(c, 4); c += b; \ 161 b -= a; b ^= rot(a, 6); a += c; \ 162 c -= b; c ^= rot(b, 8); b += a; \ 163 a -= c; a ^= rot(c,16); c += b; \ 164 b -= a; b ^= rot(a,19); a += c; \ 165 c -= b; c ^= rot(b, 4); b += a; \ 166 } 167 168 /* 169 ------------------------------------------------------------------------------- 170 final -- final mixing of 3 32-bit values (a,b,c) into c 171 172 Pairs of (a,b,c) values differing in only a few bits will usually 173 produce values of c that look totally different. This was tested for 174 * pairs that differed by one bit, by two bits, in any combination 175 of top bits of (a,b,c), or in any combination of bottom bits of 176 (a,b,c). 177 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 178 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 179 is commonly produced by subtraction) look like a single 1-bit 180 difference. 181 * the base values were pseudorandom, all zero but one bit set, or 182 all zero plus a counter that starts at zero. 183 184 These constants passed: 185 14 11 25 16 4 14 24 186 12 14 25 16 4 14 24 187 and these came close: 188 4 8 15 26 3 22 24 189 10 8 15 26 3 22 24 190 11 8 15 26 3 22 24 191 ------------------------------------------------------------------------------- 192 */ 193 #define final(a,b,c) \ 194 { \ 195 c ^= b; c -= rot(b,14); \ 196 a ^= c; a -= rot(c,11); \ 197 b ^= a; b -= rot(a,25); \ 198 c ^= b; c -= rot(b,16); \ 199 a ^= c; a -= rot(c,4); \ 200 b ^= a; b -= rot(a,14); \ 201 c ^= b; c -= rot(b,24); \ 202 } 203 204 205 /* 206 ------------------------------------------------------------------------------- 207 hashlittle() -- hash a variable-length key into a 32-bit value 208 k : the key (the unaligned variable-length array of bytes) 209 length : the length of the key, counting by bytes 210 initval : can be any 4-byte value 211 Returns a 32-bit value. Every bit of the key affects every bit of 212 the return value. Two keys differing by one or two bits will have 213 totally different hash values. 214 215 The best hash table sizes are powers of 2. There is no need to do 216 mod a prime (mod is sooo slow!). If you need less than 32 bits, 217 use a bitmask. For example, if you need only 10 bits, do 218 h = (h & hashmask(10)); 219 In which case, the hash table should have hashsize(10) elements. 220 221 If you are hashing n strings (uint8_t **)k, do it like this: 222 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 223 224 By Bob Jenkins, 2006. bob_jenkins (at) burtleburtle.net. You may use this 225 code any way you wish, private, educational, or commercial. It's free. 226 227 Use for hash table lookup, or anything where one collision in 2^^32 is 228 acceptable. Do NOT use for cryptographic purposes. 229 ------------------------------------------------------------------------------- 230 */ 231 232 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) 233 { 234 uint32_t a,b,c; /* internal state */ 235 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 236 237 /* Set up the internal state */ 238 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 239 240 u.ptr = key; 241 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 242 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 243 244 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 245 while (length > 12) 246 { 247 a += k[0]; 248 b += k[1]; 249 c += k[2]; 250 mix(a,b,c); 251 length -= 12; 252 k += 3; 253 } 254 255 /*----------------------------- handle the last (probably partial) block */ 256 /* 257 * "k[2]&0xffffff" actually reads beyond the end of the string, but 258 * then masks off the part it's not allowed to read. Because the 259 * string is aligned, the masked-off tail is in the same word as the 260 * rest of the string. Every machine with memory protection I've seen 261 * does it on word boundaries, so is OK with this. But VALGRIND will 262 * still catch it and complain. The masking trick does make the hash 263 * noticably faster for short strings (like English words). 264 */ 265 #ifndef VALGRIND 266 267 switch(length) 268 { 269 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 270 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 271 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 272 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 273 case 8 : b+=k[1]; a+=k[0]; break; 274 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 275 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 276 case 5 : b+=k[1]&0xff; a+=k[0]; break; 277 case 4 : a+=k[0]; break; 278 case 3 : a+=k[0]&0xffffff; break; 279 case 2 : a+=k[0]&0xffff; break; 280 case 1 : a+=k[0]&0xff; break; 281 case 0 : return c; /* zero length strings require no mixing */ 282 } 283 284 #else /* make valgrind happy */ 285 286 const uint8_t *k8 = (const uint8_t *)k; 287 switch(length) 288 { 289 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 290 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 291 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 292 case 9 : c+=k8[8]; /* fall through */ 293 case 8 : b+=k[1]; a+=k[0]; break; 294 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 295 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 296 case 5 : b+=k8[4]; /* fall through */ 297 case 4 : a+=k[0]; break; 298 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 299 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 300 case 1 : a+=k8[0]; break; 301 case 0 : return c; 302 } 303 304 #endif /* !valgrind */ 305 306 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 307 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 308 const uint8_t *k8; 309 310 /*--------------- all but last block: aligned reads and different mixing */ 311 while (length > 12) 312 { 313 a += k[0] + (((uint32_t)k[1])<<16); 314 b += k[2] + (((uint32_t)k[3])<<16); 315 c += k[4] + (((uint32_t)k[5])<<16); 316 mix(a,b,c); 317 length -= 12; 318 k += 6; 319 } 320 321 /*----------------------------- handle the last (probably partial) block */ 322 k8 = (const uint8_t *)k; 323 switch(length) 324 { 325 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 326 b+=k[2]+(((uint32_t)k[3])<<16); 327 a+=k[0]+(((uint32_t)k[1])<<16); 328 break; 329 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 330 case 10: c+=k[4]; 331 b+=k[2]+(((uint32_t)k[3])<<16); 332 a+=k[0]+(((uint32_t)k[1])<<16); 333 break; 334 case 9 : c+=k8[8]; /* fall through */ 335 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 336 a+=k[0]+(((uint32_t)k[1])<<16); 337 break; 338 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 339 case 6 : b+=k[2]; 340 a+=k[0]+(((uint32_t)k[1])<<16); 341 break; 342 case 5 : b+=k8[4]; /* fall through */ 343 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 344 break; 345 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 346 case 2 : a+=k[0]; 347 break; 348 case 1 : a+=k8[0]; 349 break; 350 case 0 : return c; /* zero length requires no mixing */ 351 } 352 353 } else { /* need to read the key one byte at a time */ 354 const uint8_t *k = (const uint8_t *)key; 355 356 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 357 while (length > 12) 358 { 359 a += k[0]; 360 a += ((uint32_t)k[1])<<8; 361 a += ((uint32_t)k[2])<<16; 362 a += ((uint32_t)k[3])<<24; 363 b += k[4]; 364 b += ((uint32_t)k[5])<<8; 365 b += ((uint32_t)k[6])<<16; 366 b += ((uint32_t)k[7])<<24; 367 c += k[8]; 368 c += ((uint32_t)k[9])<<8; 369 c += ((uint32_t)k[10])<<16; 370 c += ((uint32_t)k[11])<<24; 371 mix(a,b,c); 372 length -= 12; 373 k += 12; 374 } 375 376 /*-------------------------------- last block: affect all 32 bits of (c) */ 377 switch(length) /* all the case statements fall through */ 378 { 379 case 12: c+=((uint32_t)k[11])<<24; 380 case 11: c+=((uint32_t)k[10])<<16; 381 case 10: c+=((uint32_t)k[9])<<8; 382 case 9 : c+=k[8]; 383 case 8 : b+=((uint32_t)k[7])<<24; 384 case 7 : b+=((uint32_t)k[6])<<16; 385 case 6 : b+=((uint32_t)k[5])<<8; 386 case 5 : b+=k[4]; 387 case 4 : a+=((uint32_t)k[3])<<24; 388 case 3 : a+=((uint32_t)k[2])<<16; 389 case 2 : a+=((uint32_t)k[1])<<8; 390 case 1 : a+=k[0]; 391 break; 392 case 0 : return c; 393 } 394 } 395 396 final(a,b,c); 397 return c; 398 } 399 400 unsigned long lh_char_hash(const void *k) 401 { 402 static volatile int random_seed = -1; 403 404 if (random_seed == -1) { 405 int seed; 406 /* we can't use -1 as it is the unitialized sentinel */ 407 while ((seed = json_c_get_random_seed()) == -1); 408 #if defined __GNUC__ 409 __sync_val_compare_and_swap(&random_seed, -1, seed); 410 #elif defined _MSC_VER 411 InterlockedCompareExchange(&random_seed, seed, -1); 412 #else 413 #warning "racy random seed initializtion if used by multiple threads" 414 random_seed = seed; /* potentially racy */ 415 #endif 416 } 417 418 return hashlittle((const char*)k, strlen((const char*)k), random_seed); 419 } 420 421 int lh_char_equal(const void *k1, const void *k2) 422 { 423 return (strcmp((const char*)k1, (const char*)k2) == 0); 424 } 425 426 struct lh_table* lh_table_new(int size, const char *name, 427 lh_entry_free_fn *free_fn, 428 lh_hash_fn *hash_fn, 429 lh_equal_fn *equal_fn) 430 { 431 int i; 432 struct lh_table *t; 433 434 t = (struct lh_table*)calloc(1, sizeof(struct lh_table)); 435 if(!t) lh_abort("lh_table_new: calloc failed\n"); 436 t->count = 0; 437 t->size = size; 438 t->name = name; 439 t->table = (struct lh_entry*)calloc(size, sizeof(struct lh_entry)); 440 if(!t->table) lh_abort("lh_table_new: calloc failed\n"); 441 t->free_fn = free_fn; 442 t->hash_fn = hash_fn; 443 t->equal_fn = equal_fn; 444 for(i = 0; i < size; i++) t->table[i].k = LH_EMPTY; 445 return t; 446 } 447 448 struct lh_table* lh_kchar_table_new(int size, const char *name, 449 lh_entry_free_fn *free_fn) 450 { 451 return lh_table_new(size, name, free_fn, lh_char_hash, lh_char_equal); 452 } 453 454 struct lh_table* lh_kptr_table_new(int size, const char *name, 455 lh_entry_free_fn *free_fn) 456 { 457 return lh_table_new(size, name, free_fn, lh_ptr_hash, lh_ptr_equal); 458 } 459 460 void lh_table_resize(struct lh_table *t, int new_size) 461 { 462 struct lh_table *new_t; 463 struct lh_entry *ent; 464 465 new_t = lh_table_new(new_size, t->name, NULL, t->hash_fn, t->equal_fn); 466 ent = t->head; 467 while(ent) { 468 lh_table_insert(new_t, ent->k, ent->v); 469 ent = ent->next; 470 } 471 free(t->table); 472 t->table = new_t->table; 473 t->size = new_size; 474 t->head = new_t->head; 475 t->tail = new_t->tail; 476 t->resizes++; 477 free(new_t); 478 } 479 480 void lh_table_free(struct lh_table *t) 481 { 482 struct lh_entry *c; 483 for(c = t->head; c != NULL; c = c->next) { 484 if(t->free_fn) { 485 t->free_fn(c); 486 } 487 } 488 free(t->table); 489 free(t); 490 } 491 492 493 int lh_table_insert(struct lh_table *t, void *k, const void *v) 494 { 495 unsigned long h, n; 496 497 t->inserts++; 498 if(t->count >= t->size * LH_LOAD_FACTOR) lh_table_resize(t, t->size * 2); 499 500 h = t->hash_fn(k); 501 n = h % t->size; 502 503 while( 1 ) { 504 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) break; 505 t->collisions++; 506 if ((int)++n == t->size) n = 0; 507 } 508 509 t->table[n].k = k; 510 t->table[n].v = v; 511 t->count++; 512 513 if(t->head == NULL) { 514 t->head = t->tail = &t->table[n]; 515 t->table[n].next = t->table[n].prev = NULL; 516 } else { 517 t->tail->next = &t->table[n]; 518 t->table[n].prev = t->tail; 519 t->table[n].next = NULL; 520 t->tail = &t->table[n]; 521 } 522 523 return 0; 524 } 525 526 527 struct lh_entry* lh_table_lookup_entry(struct lh_table *t, const void *k) 528 { 529 unsigned long h = t->hash_fn(k); 530 unsigned long n = h % t->size; 531 int count = 0; 532 533 t->lookups++; 534 while( count < t->size ) { 535 if(t->table[n].k == LH_EMPTY) return NULL; 536 if(t->table[n].k != LH_FREED && 537 t->equal_fn(t->table[n].k, k)) return &t->table[n]; 538 if ((int)++n == t->size) n = 0; 539 count++; 540 } 541 return NULL; 542 } 543 544 545 const void* lh_table_lookup(struct lh_table *t, const void *k) 546 { 547 void *result; 548 lh_table_lookup_ex(t, k, &result); 549 return result; 550 } 551 552 json_bool lh_table_lookup_ex(struct lh_table* t, const void* k, void **v) 553 { 554 struct lh_entry *e = lh_table_lookup_entry(t, k); 555 if (e != NULL) { 556 if (v != NULL) *v = (void *)e->v; 557 return TRUE; /* key found */ 558 } 559 if (v != NULL) *v = NULL; 560 return FALSE; /* key not found */ 561 } 562 563 int lh_table_delete_entry(struct lh_table *t, struct lh_entry *e) 564 { 565 ptrdiff_t n = (ptrdiff_t)(e - t->table); /* CAW: fixed to be 64bit nice, still need the crazy negative case... */ 566 567 /* CAW: this is bad, really bad, maybe stack goes other direction on this machine... */ 568 if(n < 0) { return -2; } 569 570 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) return -1; 571 t->count--; 572 if(t->free_fn) t->free_fn(e); 573 t->table[n].v = NULL; 574 t->table[n].k = LH_FREED; 575 if(t->tail == &t->table[n] && t->head == &t->table[n]) { 576 t->head = t->tail = NULL; 577 } else if (t->head == &t->table[n]) { 578 t->head->next->prev = NULL; 579 t->head = t->head->next; 580 } else if (t->tail == &t->table[n]) { 581 t->tail->prev->next = NULL; 582 t->tail = t->tail->prev; 583 } else { 584 t->table[n].prev->next = t->table[n].next; 585 t->table[n].next->prev = t->table[n].prev; 586 } 587 t->table[n].next = t->table[n].prev = NULL; 588 return 0; 589 } 590 591 592 int lh_table_delete(struct lh_table *t, const void *k) 593 { 594 struct lh_entry *e = lh_table_lookup_entry(t, k); 595 if(!e) return -1; 596 return lh_table_delete_entry(t, e); 597 } 598 599 int lh_table_length(struct lh_table *t) 600 { 601 return t->count; 602 } 603