1 /* 2 ** 2001 September 22 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** This is the implementation of generic hash-tables used in SQLite. 13 ** We've modified it slightly to serve as a standalone hash table 14 ** implementation for the full-text indexing module. 15 */ 16 #include <assert.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include "ft_hash.h" 21 22 void *malloc_and_zero(int n){ 23 void *p = malloc(n); 24 if( p ){ 25 memset(p, 0, n); 26 } 27 return p; 28 } 29 30 /* Turn bulk memory into a hash table object by initializing the 31 ** fields of the Hash structure. 32 ** 33 ** "pNew" is a pointer to the hash table that is to be initialized. 34 ** keyClass is one of the constants HASH_INT, HASH_POINTER, 35 ** HASH_BINARY, or HASH_STRING. The value of keyClass 36 ** determines what kind of key the hash table will use. "copyKey" is 37 ** true if the hash table should make its own private copy of keys and 38 ** false if it should just use the supplied pointer. CopyKey only makes 39 ** sense for HASH_STRING and HASH_BINARY and is ignored 40 ** for other key classes. 41 */ 42 void HashInit(Hash *pNew, int keyClass, int copyKey){ 43 assert( pNew!=0 ); 44 assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY ); 45 pNew->keyClass = keyClass; 46 #if 0 47 if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0; 48 #endif 49 pNew->copyKey = copyKey; 50 pNew->first = 0; 51 pNew->count = 0; 52 pNew->htsize = 0; 53 pNew->ht = 0; 54 pNew->xMalloc = malloc_and_zero; 55 pNew->xFree = free; 56 } 57 58 /* Remove all entries from a hash table. Reclaim all memory. 59 ** Call this routine to delete a hash table or to reset a hash table 60 ** to the empty state. 61 */ 62 void HashClear(Hash *pH){ 63 HashElem *elem; /* For looping over all elements of the table */ 64 65 assert( pH!=0 ); 66 elem = pH->first; 67 pH->first = 0; 68 if( pH->ht ) pH->xFree(pH->ht); 69 pH->ht = 0; 70 pH->htsize = 0; 71 while( elem ){ 72 HashElem *next_elem = elem->next; 73 if( pH->copyKey && elem->pKey ){ 74 pH->xFree(elem->pKey); 75 } 76 pH->xFree(elem); 77 elem = next_elem; 78 } 79 pH->count = 0; 80 } 81 82 #if 0 /* NOT USED */ 83 /* 84 ** Hash and comparison functions when the mode is HASH_INT 85 */ 86 static int intHash(const void *pKey, int nKey){ 87 return nKey ^ (nKey<<8) ^ (nKey>>8); 88 } 89 static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){ 90 return n2 - n1; 91 } 92 #endif 93 94 #if 0 /* NOT USED */ 95 /* 96 ** Hash and comparison functions when the mode is HASH_POINTER 97 */ 98 static int ptrHash(const void *pKey, int nKey){ 99 uptr x = Addr(pKey); 100 return x ^ (x<<8) ^ (x>>8); 101 } 102 static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ 103 if( pKey1==pKey2 ) return 0; 104 if( pKey1<pKey2 ) return -1; 105 return 1; 106 } 107 #endif 108 109 /* 110 ** Hash and comparison functions when the mode is HASH_STRING 111 */ 112 static int strHash(const void *pKey, int nKey){ 113 const char *z = (const char *)pKey; 114 int h = 0; 115 if( nKey<=0 ) nKey = (int) strlen(z); 116 while( nKey > 0 ){ 117 h = (h<<3) ^ h ^ *z++; 118 nKey--; 119 } 120 return h & 0x7fffffff; 121 } 122 static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ 123 if( n1!=n2 ) return 1; 124 return strncmp((const char*)pKey1,(const char*)pKey2,n1); 125 } 126 127 /* 128 ** Hash and comparison functions when the mode is HASH_BINARY 129 */ 130 static int binHash(const void *pKey, int nKey){ 131 int h = 0; 132 const char *z = (const char *)pKey; 133 while( nKey-- > 0 ){ 134 h = (h<<3) ^ h ^ *(z++); 135 } 136 return h & 0x7fffffff; 137 } 138 static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ 139 if( n1!=n2 ) return 1; 140 return memcmp(pKey1,pKey2,n1); 141 } 142 143 /* 144 ** Return a pointer to the appropriate hash function given the key class. 145 ** 146 ** The C syntax in this function definition may be unfamilar to some 147 ** programmers, so we provide the following additional explanation: 148 ** 149 ** The name of the function is "hashFunction". The function takes a 150 ** single parameter "keyClass". The return value of hashFunction() 151 ** is a pointer to another function. Specifically, the return value 152 ** of hashFunction() is a pointer to a function that takes two parameters 153 ** with types "const void*" and "int" and returns an "int". 154 */ 155 static int (*hashFunction(int keyClass))(const void*,int){ 156 #if 0 /* HASH_INT and HASH_POINTER are never used */ 157 switch( keyClass ){ 158 case HASH_INT: return &intHash; 159 case HASH_POINTER: return &ptrHash; 160 case HASH_STRING: return &strHash; 161 case HASH_BINARY: return &binHash;; 162 default: break; 163 } 164 return 0; 165 #else 166 if( keyClass==HASH_STRING ){ 167 return &strHash; 168 }else{ 169 assert( keyClass==HASH_BINARY ); 170 return &binHash; 171 } 172 #endif 173 } 174 175 /* 176 ** Return a pointer to the appropriate hash function given the key class. 177 ** 178 ** For help in interpreted the obscure C code in the function definition, 179 ** see the header comment on the previous function. 180 */ 181 static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ 182 #if 0 /* HASH_INT and HASH_POINTER are never used */ 183 switch( keyClass ){ 184 case HASH_INT: return &intCompare; 185 case HASH_POINTER: return &ptrCompare; 186 case HASH_STRING: return &strCompare; 187 case HASH_BINARY: return &binCompare; 188 default: break; 189 } 190 return 0; 191 #else 192 if( keyClass==HASH_STRING ){ 193 return &strCompare; 194 }else{ 195 assert( keyClass==HASH_BINARY ); 196 return &binCompare; 197 } 198 #endif 199 } 200 201 /* Link an element into the hash table 202 */ 203 static void insertElement( 204 Hash *pH, /* The complete hash table */ 205 struct _ht *pEntry, /* The entry into which pNew is inserted */ 206 HashElem *pNew /* The element to be inserted */ 207 ){ 208 HashElem *pHead; /* First element already in pEntry */ 209 pHead = pEntry->chain; 210 if( pHead ){ 211 pNew->next = pHead; 212 pNew->prev = pHead->prev; 213 if( pHead->prev ){ pHead->prev->next = pNew; } 214 else { pH->first = pNew; } 215 pHead->prev = pNew; 216 }else{ 217 pNew->next = pH->first; 218 if( pH->first ){ pH->first->prev = pNew; } 219 pNew->prev = 0; 220 pH->first = pNew; 221 } 222 pEntry->count++; 223 pEntry->chain = pNew; 224 } 225 226 227 /* Resize the hash table so that it cantains "new_size" buckets. 228 ** "new_size" must be a power of 2. The hash table might fail 229 ** to resize if sqliteMalloc() fails. 230 */ 231 static void rehash(Hash *pH, int new_size){ 232 struct _ht *new_ht; /* The new hash table */ 233 HashElem *elem, *next_elem; /* For looping over existing elements */ 234 int (*xHash)(const void*,int); /* The hash function */ 235 236 assert( (new_size & (new_size-1))==0 ); 237 new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) ); 238 if( new_ht==0 ) return; 239 if( pH->ht ) pH->xFree(pH->ht); 240 pH->ht = new_ht; 241 pH->htsize = new_size; 242 xHash = hashFunction(pH->keyClass); 243 for(elem=pH->first, pH->first=0; elem; elem = next_elem){ 244 int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); 245 next_elem = elem->next; 246 insertElement(pH, &new_ht[h], elem); 247 } 248 } 249 250 /* This function (for internal use only) locates an element in an 251 ** hash table that matches the given key. The hash for this key has 252 ** already been computed and is passed as the 4th parameter. 253 */ 254 static HashElem *findElementGivenHash( 255 const Hash *pH, /* The pH to be searched */ 256 const void *pKey, /* The key we are searching for */ 257 int nKey, 258 int h /* The hash for this key. */ 259 ){ 260 HashElem *elem; /* Used to loop thru the element list */ 261 int count; /* Number of elements left to test */ 262 int (*xCompare)(const void*,int,const void*,int); /* comparison function */ 263 264 if( pH->ht ){ 265 struct _ht *pEntry = &pH->ht[h]; 266 elem = pEntry->chain; 267 count = pEntry->count; 268 xCompare = compareFunction(pH->keyClass); 269 while( count-- && elem ){ 270 if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 271 return elem; 272 } 273 elem = elem->next; 274 } 275 } 276 return 0; 277 } 278 279 /* Remove a single entry from the hash table given a pointer to that 280 ** element and a hash on the element's key. 281 */ 282 static void removeElementGivenHash( 283 Hash *pH, /* The pH containing "elem" */ 284 HashElem* elem, /* The element to be removed from the pH */ 285 int h /* Hash value for the element */ 286 ){ 287 struct _ht *pEntry; 288 if( elem->prev ){ 289 elem->prev->next = elem->next; 290 }else{ 291 pH->first = elem->next; 292 } 293 if( elem->next ){ 294 elem->next->prev = elem->prev; 295 } 296 pEntry = &pH->ht[h]; 297 if( pEntry->chain==elem ){ 298 pEntry->chain = elem->next; 299 } 300 pEntry->count--; 301 if( pEntry->count<=0 ){ 302 pEntry->chain = 0; 303 } 304 if( pH->copyKey && elem->pKey ){ 305 pH->xFree(elem->pKey); 306 } 307 pH->xFree( elem ); 308 pH->count--; 309 if( pH->count<=0 ){ 310 assert( pH->first==0 ); 311 assert( pH->count==0 ); 312 HashClear(pH); 313 } 314 } 315 316 /* Attempt to locate an element of the hash table pH with a key 317 ** that matches pKey,nKey. Return the data for this element if it is 318 ** found, or NULL if there is no match. 319 */ 320 void *HashFind(const Hash *pH, const void *pKey, int nKey){ 321 int h; /* A hash on key */ 322 HashElem *elem; /* The element that matches key */ 323 int (*xHash)(const void*,int); /* The hash function */ 324 325 if( pH==0 || pH->ht==0 ) return 0; 326 xHash = hashFunction(pH->keyClass); 327 assert( xHash!=0 ); 328 h = (*xHash)(pKey,nKey); 329 assert( (pH->htsize & (pH->htsize-1))==0 ); 330 elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); 331 return elem ? elem->data : 0; 332 } 333 334 /* Insert an element into the hash table pH. The key is pKey,nKey 335 ** and the data is "data". 336 ** 337 ** If no element exists with a matching key, then a new 338 ** element is created. A copy of the key is made if the copyKey 339 ** flag is set. NULL is returned. 340 ** 341 ** If another element already exists with the same key, then the 342 ** new data replaces the old data and the old data is returned. 343 ** The key is not copied in this instance. If a malloc fails, then 344 ** the new data is returned and the hash table is unchanged. 345 ** 346 ** If the "data" parameter to this function is NULL, then the 347 ** element corresponding to "key" is removed from the hash table. 348 */ 349 void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){ 350 int hraw; /* Raw hash value of the key */ 351 int h; /* the hash of the key modulo hash table size */ 352 HashElem *elem; /* Used to loop thru the element list */ 353 HashElem *new_elem; /* New element added to the pH */ 354 int (*xHash)(const void*,int); /* The hash function */ 355 356 assert( pH!=0 ); 357 xHash = hashFunction(pH->keyClass); 358 assert( xHash!=0 ); 359 hraw = (*xHash)(pKey, nKey); 360 assert( (pH->htsize & (pH->htsize-1))==0 ); 361 h = hraw & (pH->htsize-1); 362 elem = findElementGivenHash(pH,pKey,nKey,h); 363 if( elem ){ 364 void *old_data = elem->data; 365 if( data==0 ){ 366 removeElementGivenHash(pH,elem,h); 367 }else{ 368 elem->data = data; 369 } 370 return old_data; 371 } 372 if( data==0 ) return 0; 373 new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) ); 374 if( new_elem==0 ) return data; 375 if( pH->copyKey && pKey!=0 ){ 376 new_elem->pKey = pH->xMalloc( nKey ); 377 if( new_elem->pKey==0 ){ 378 pH->xFree(new_elem); 379 return data; 380 } 381 memcpy((void*)new_elem->pKey, pKey, nKey); 382 }else{ 383 new_elem->pKey = (void*)pKey; 384 } 385 new_elem->nKey = nKey; 386 pH->count++; 387 if( pH->htsize==0 ){ 388 rehash(pH,8); 389 if( pH->htsize==0 ){ 390 pH->count = 0; 391 pH->xFree(new_elem); 392 return data; 393 } 394 } 395 if( pH->count > pH->htsize ){ 396 rehash(pH,pH->htsize*2); 397 } 398 assert( pH->htsize>0 ); 399 assert( (pH->htsize & (pH->htsize-1))==0 ); 400 h = hraw & (pH->htsize-1); 401 insertElement(pH, &pH->ht[h], new_elem); 402 new_elem->data = data; 403 return 0; 404 } 405