Home | History | Annotate | Download | only in fts1
      1 /*
      2 ** 2001 September 22
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 *************************************************************************
     12 ** This is the implementation of generic hash-tables used in SQLite.
     13 ** We've modified it slightly to serve as a standalone hash table
     14 ** implementation for the full-text indexing module.
     15 */
     16 #include <assert.h>
     17 #include <stdlib.h>
     18 #include <string.h>
     19 
     20 #include "ft_hash.h"
     21 
     22 void *malloc_and_zero(int n){
     23   void *p = malloc(n);
     24   if( p ){
     25     memset(p, 0, n);
     26   }
     27   return p;
     28 }
     29 
     30 /* Turn bulk memory into a hash table object by initializing the
     31 ** fields of the Hash structure.
     32 **
     33 ** "pNew" is a pointer to the hash table that is to be initialized.
     34 ** keyClass is one of the constants HASH_INT, HASH_POINTER,
     35 ** HASH_BINARY, or HASH_STRING.  The value of keyClass
     36 ** determines what kind of key the hash table will use.  "copyKey" is
     37 ** true if the hash table should make its own private copy of keys and
     38 ** false if it should just use the supplied pointer.  CopyKey only makes
     39 ** sense for HASH_STRING and HASH_BINARY and is ignored
     40 ** for other key classes.
     41 */
     42 void HashInit(Hash *pNew, int keyClass, int copyKey){
     43   assert( pNew!=0 );
     44   assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
     45   pNew->keyClass = keyClass;
     46 #if 0
     47   if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
     48 #endif
     49   pNew->copyKey = copyKey;
     50   pNew->first = 0;
     51   pNew->count = 0;
     52   pNew->htsize = 0;
     53   pNew->ht = 0;
     54   pNew->xMalloc = malloc_and_zero;
     55   pNew->xFree = free;
     56 }
     57 
     58 /* Remove all entries from a hash table.  Reclaim all memory.
     59 ** Call this routine to delete a hash table or to reset a hash table
     60 ** to the empty state.
     61 */
     62 void HashClear(Hash *pH){
     63   HashElem *elem;         /* For looping over all elements of the table */
     64 
     65   assert( pH!=0 );
     66   elem = pH->first;
     67   pH->first = 0;
     68   if( pH->ht ) pH->xFree(pH->ht);
     69   pH->ht = 0;
     70   pH->htsize = 0;
     71   while( elem ){
     72     HashElem *next_elem = elem->next;
     73     if( pH->copyKey && elem->pKey ){
     74       pH->xFree(elem->pKey);
     75     }
     76     pH->xFree(elem);
     77     elem = next_elem;
     78   }
     79   pH->count = 0;
     80 }
     81 
     82 #if 0 /* NOT USED */
     83 /*
     84 ** Hash and comparison functions when the mode is HASH_INT
     85 */
     86 static int intHash(const void *pKey, int nKey){
     87   return nKey ^ (nKey<<8) ^ (nKey>>8);
     88 }
     89 static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
     90   return n2 - n1;
     91 }
     92 #endif
     93 
     94 #if 0 /* NOT USED */
     95 /*
     96 ** Hash and comparison functions when the mode is HASH_POINTER
     97 */
     98 static int ptrHash(const void *pKey, int nKey){
     99   uptr x = Addr(pKey);
    100   return x ^ (x<<8) ^ (x>>8);
    101 }
    102 static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
    103   if( pKey1==pKey2 ) return 0;
    104   if( pKey1<pKey2 ) return -1;
    105   return 1;
    106 }
    107 #endif
    108 
    109 /*
    110 ** Hash and comparison functions when the mode is HASH_STRING
    111 */
    112 static int strHash(const void *pKey, int nKey){
    113   const char *z = (const char *)pKey;
    114   int h = 0;
    115   if( nKey<=0 ) nKey = (int) strlen(z);
    116   while( nKey > 0  ){
    117     h = (h<<3) ^ h ^ *z++;
    118     nKey--;
    119   }
    120   return h & 0x7fffffff;
    121 }
    122 static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
    123   if( n1!=n2 ) return 1;
    124   return strncmp((const char*)pKey1,(const char*)pKey2,n1);
    125 }
    126 
    127 /*
    128 ** Hash and comparison functions when the mode is HASH_BINARY
    129 */
    130 static int binHash(const void *pKey, int nKey){
    131   int h = 0;
    132   const char *z = (const char *)pKey;
    133   while( nKey-- > 0 ){
    134     h = (h<<3) ^ h ^ *(z++);
    135   }
    136   return h & 0x7fffffff;
    137 }
    138 static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
    139   if( n1!=n2 ) return 1;
    140   return memcmp(pKey1,pKey2,n1);
    141 }
    142 
    143 /*
    144 ** Return a pointer to the appropriate hash function given the key class.
    145 **
    146 ** The C syntax in this function definition may be unfamilar to some
    147 ** programmers, so we provide the following additional explanation:
    148 **
    149 ** The name of the function is "hashFunction".  The function takes a
    150 ** single parameter "keyClass".  The return value of hashFunction()
    151 ** is a pointer to another function.  Specifically, the return value
    152 ** of hashFunction() is a pointer to a function that takes two parameters
    153 ** with types "const void*" and "int" and returns an "int".
    154 */
    155 static int (*hashFunction(int keyClass))(const void*,int){
    156 #if 0  /* HASH_INT and HASH_POINTER are never used */
    157   switch( keyClass ){
    158     case HASH_INT:     return &intHash;
    159     case HASH_POINTER: return &ptrHash;
    160     case HASH_STRING:  return &strHash;
    161     case HASH_BINARY:  return &binHash;;
    162     default: break;
    163   }
    164   return 0;
    165 #else
    166   if( keyClass==HASH_STRING ){
    167     return &strHash;
    168   }else{
    169     assert( keyClass==HASH_BINARY );
    170     return &binHash;
    171   }
    172 #endif
    173 }
    174 
    175 /*
    176 ** Return a pointer to the appropriate hash function given the key class.
    177 **
    178 ** For help in interpreted the obscure C code in the function definition,
    179 ** see the header comment on the previous function.
    180 */
    181 static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
    182 #if 0 /* HASH_INT and HASH_POINTER are never used */
    183   switch( keyClass ){
    184     case HASH_INT:     return &intCompare;
    185     case HASH_POINTER: return &ptrCompare;
    186     case HASH_STRING:  return &strCompare;
    187     case HASH_BINARY:  return &binCompare;
    188     default: break;
    189   }
    190   return 0;
    191 #else
    192   if( keyClass==HASH_STRING ){
    193     return &strCompare;
    194   }else{
    195     assert( keyClass==HASH_BINARY );
    196     return &binCompare;
    197   }
    198 #endif
    199 }
    200 
    201 /* Link an element into the hash table
    202 */
    203 static void insertElement(
    204   Hash *pH,              /* The complete hash table */
    205   struct _ht *pEntry,    /* The entry into which pNew is inserted */
    206   HashElem *pNew         /* The element to be inserted */
    207 ){
    208   HashElem *pHead;       /* First element already in pEntry */
    209   pHead = pEntry->chain;
    210   if( pHead ){
    211     pNew->next = pHead;
    212     pNew->prev = pHead->prev;
    213     if( pHead->prev ){ pHead->prev->next = pNew; }
    214     else             { pH->first = pNew; }
    215     pHead->prev = pNew;
    216   }else{
    217     pNew->next = pH->first;
    218     if( pH->first ){ pH->first->prev = pNew; }
    219     pNew->prev = 0;
    220     pH->first = pNew;
    221   }
    222   pEntry->count++;
    223   pEntry->chain = pNew;
    224 }
    225 
    226 
    227 /* Resize the hash table so that it cantains "new_size" buckets.
    228 ** "new_size" must be a power of 2.  The hash table might fail
    229 ** to resize if sqliteMalloc() fails.
    230 */
    231 static void rehash(Hash *pH, int new_size){
    232   struct _ht *new_ht;            /* The new hash table */
    233   HashElem *elem, *next_elem;    /* For looping over existing elements */
    234   int (*xHash)(const void*,int); /* The hash function */
    235 
    236   assert( (new_size & (new_size-1))==0 );
    237   new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
    238   if( new_ht==0 ) return;
    239   if( pH->ht ) pH->xFree(pH->ht);
    240   pH->ht = new_ht;
    241   pH->htsize = new_size;
    242   xHash = hashFunction(pH->keyClass);
    243   for(elem=pH->first, pH->first=0; elem; elem = next_elem){
    244     int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
    245     next_elem = elem->next;
    246     insertElement(pH, &new_ht[h], elem);
    247   }
    248 }
    249 
    250 /* This function (for internal use only) locates an element in an
    251 ** hash table that matches the given key.  The hash for this key has
    252 ** already been computed and is passed as the 4th parameter.
    253 */
    254 static HashElem *findElementGivenHash(
    255   const Hash *pH,     /* The pH to be searched */
    256   const void *pKey,   /* The key we are searching for */
    257   int nKey,
    258   int h               /* The hash for this key. */
    259 ){
    260   HashElem *elem;                /* Used to loop thru the element list */
    261   int count;                     /* Number of elements left to test */
    262   int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
    263 
    264   if( pH->ht ){
    265     struct _ht *pEntry = &pH->ht[h];
    266     elem = pEntry->chain;
    267     count = pEntry->count;
    268     xCompare = compareFunction(pH->keyClass);
    269     while( count-- && elem ){
    270       if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
    271         return elem;
    272       }
    273       elem = elem->next;
    274     }
    275   }
    276   return 0;
    277 }
    278 
    279 /* Remove a single entry from the hash table given a pointer to that
    280 ** element and a hash on the element's key.
    281 */
    282 static void removeElementGivenHash(
    283   Hash *pH,         /* The pH containing "elem" */
    284   HashElem* elem,   /* The element to be removed from the pH */
    285   int h             /* Hash value for the element */
    286 ){
    287   struct _ht *pEntry;
    288   if( elem->prev ){
    289     elem->prev->next = elem->next;
    290   }else{
    291     pH->first = elem->next;
    292   }
    293   if( elem->next ){
    294     elem->next->prev = elem->prev;
    295   }
    296   pEntry = &pH->ht[h];
    297   if( pEntry->chain==elem ){
    298     pEntry->chain = elem->next;
    299   }
    300   pEntry->count--;
    301   if( pEntry->count<=0 ){
    302     pEntry->chain = 0;
    303   }
    304   if( pH->copyKey && elem->pKey ){
    305     pH->xFree(elem->pKey);
    306   }
    307   pH->xFree( elem );
    308   pH->count--;
    309   if( pH->count<=0 ){
    310     assert( pH->first==0 );
    311     assert( pH->count==0 );
    312     HashClear(pH);
    313   }
    314 }
    315 
    316 /* Attempt to locate an element of the hash table pH with a key
    317 ** that matches pKey,nKey.  Return the data for this element if it is
    318 ** found, or NULL if there is no match.
    319 */
    320 void *HashFind(const Hash *pH, const void *pKey, int nKey){
    321   int h;             /* A hash on key */
    322   HashElem *elem;    /* The element that matches key */
    323   int (*xHash)(const void*,int);  /* The hash function */
    324 
    325   if( pH==0 || pH->ht==0 ) return 0;
    326   xHash = hashFunction(pH->keyClass);
    327   assert( xHash!=0 );
    328   h = (*xHash)(pKey,nKey);
    329   assert( (pH->htsize & (pH->htsize-1))==0 );
    330   elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
    331   return elem ? elem->data : 0;
    332 }
    333 
    334 /* Insert an element into the hash table pH.  The key is pKey,nKey
    335 ** and the data is "data".
    336 **
    337 ** If no element exists with a matching key, then a new
    338 ** element is created.  A copy of the key is made if the copyKey
    339 ** flag is set.  NULL is returned.
    340 **
    341 ** If another element already exists with the same key, then the
    342 ** new data replaces the old data and the old data is returned.
    343 ** The key is not copied in this instance.  If a malloc fails, then
    344 ** the new data is returned and the hash table is unchanged.
    345 **
    346 ** If the "data" parameter to this function is NULL, then the
    347 ** element corresponding to "key" is removed from the hash table.
    348 */
    349 void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
    350   int hraw;             /* Raw hash value of the key */
    351   int h;                /* the hash of the key modulo hash table size */
    352   HashElem *elem;       /* Used to loop thru the element list */
    353   HashElem *new_elem;   /* New element added to the pH */
    354   int (*xHash)(const void*,int);  /* The hash function */
    355 
    356   assert( pH!=0 );
    357   xHash = hashFunction(pH->keyClass);
    358   assert( xHash!=0 );
    359   hraw = (*xHash)(pKey, nKey);
    360   assert( (pH->htsize & (pH->htsize-1))==0 );
    361   h = hraw & (pH->htsize-1);
    362   elem = findElementGivenHash(pH,pKey,nKey,h);
    363   if( elem ){
    364     void *old_data = elem->data;
    365     if( data==0 ){
    366       removeElementGivenHash(pH,elem,h);
    367     }else{
    368       elem->data = data;
    369     }
    370     return old_data;
    371   }
    372   if( data==0 ) return 0;
    373   new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
    374   if( new_elem==0 ) return data;
    375   if( pH->copyKey && pKey!=0 ){
    376     new_elem->pKey = pH->xMalloc( nKey );
    377     if( new_elem->pKey==0 ){
    378       pH->xFree(new_elem);
    379       return data;
    380     }
    381     memcpy((void*)new_elem->pKey, pKey, nKey);
    382   }else{
    383     new_elem->pKey = (void*)pKey;
    384   }
    385   new_elem->nKey = nKey;
    386   pH->count++;
    387   if( pH->htsize==0 ){
    388     rehash(pH,8);
    389     if( pH->htsize==0 ){
    390       pH->count = 0;
    391       pH->xFree(new_elem);
    392       return data;
    393     }
    394   }
    395   if( pH->count > pH->htsize ){
    396     rehash(pH,pH->htsize*2);
    397   }
    398   assert( pH->htsize>0 );
    399   assert( (pH->htsize & (pH->htsize-1))==0 );
    400   h = hraw & (pH->htsize-1);
    401   insertElement(pH, &pH->ht[h], new_elem);
    402   new_elem->data = data;
    403   return 0;
    404 }
    405