Home | History | Annotate | Download | only in src
      1 /*
      2 ** 2008 February 16
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 *************************************************************************
     12 ** This file implements an object that represents a fixed-length
     13 ** bitmap.  Bits are numbered starting with 1.
     14 **
     15 ** A bitmap is used to record which pages of a database file have been
     16 ** journalled during a transaction, or which pages have the "dont-write"
     17 ** property.  Usually only a few pages are meet either condition.
     18 ** So the bitmap is usually sparse and has low cardinality.
     19 ** But sometimes (for example when during a DROP of a large table) most
     20 ** or all of the pages in a database can get journalled.  In those cases,
     21 ** the bitmap becomes dense with high cardinality.  The algorithm needs
     22 ** to handle both cases well.
     23 **
     24 ** The size of the bitmap is fixed when the object is created.
     25 **
     26 ** All bits are clear when the bitmap is created.  Individual bits
     27 ** may be set or cleared one at a time.
     28 **
     29 ** Test operations are about 100 times more common that set operations.
     30 ** Clear operations are exceedingly rare.  There are usually between
     31 ** 5 and 500 set operations per Bitvec object, though the number of sets can
     32 ** sometimes grow into tens of thousands or larger.  The size of the
     33 ** Bitvec object is the number of pages in the database file at the
     34 ** start of a transaction, and is thus usually less than a few thousand,
     35 ** but can be as large as 2 billion for a really big database.
     36 */
     37 #include "sqliteInt.h"
     38 
     39 /* Size of the Bitvec structure in bytes. */
     40 #define BITVEC_SZ        512
     41 
     42 /* Round the union size down to the nearest pointer boundary, since that's how
     43 ** it will be aligned within the Bitvec struct. */
     44 #define BITVEC_USIZE     (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*))
     45 
     46 /* Type of the array "element" for the bitmap representation.
     47 ** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE.
     48 ** Setting this to the "natural word" size of your CPU may improve
     49 ** performance. */
     50 #define BITVEC_TELEM     u8
     51 /* Size, in bits, of the bitmap element. */
     52 #define BITVEC_SZELEM    8
     53 /* Number of elements in a bitmap array. */
     54 #define BITVEC_NELEM     (BITVEC_USIZE/sizeof(BITVEC_TELEM))
     55 /* Number of bits in the bitmap array. */
     56 #define BITVEC_NBIT      (BITVEC_NELEM*BITVEC_SZELEM)
     57 
     58 /* Number of u32 values in hash table. */
     59 #define BITVEC_NINT      (BITVEC_USIZE/sizeof(u32))
     60 /* Maximum number of entries in hash table before
     61 ** sub-dividing and re-hashing. */
     62 #define BITVEC_MXHASH    (BITVEC_NINT/2)
     63 /* Hashing function for the aHash representation.
     64 ** Empirical testing showed that the *37 multiplier
     65 ** (an arbitrary prime)in the hash function provided
     66 ** no fewer collisions than the no-op *1. */
     67 #define BITVEC_HASH(X)   (((X)*1)%BITVEC_NINT)
     68 
     69 #define BITVEC_NPTR      (BITVEC_USIZE/sizeof(Bitvec *))
     70 
     71 
     72 /*
     73 ** A bitmap is an instance of the following structure.
     74 **
     75 ** This bitmap records the existance of zero or more bits
     76 ** with values between 1 and iSize, inclusive.
     77 **
     78 ** There are three possible representations of the bitmap.
     79 ** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight
     80 ** bitmap.  The least significant bit is bit 1.
     81 **
     82 ** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is
     83 ** a hash table that will hold up to BITVEC_MXHASH distinct values.
     84 **
     85 ** Otherwise, the value i is redirected into one of BITVEC_NPTR
     86 ** sub-bitmaps pointed to by Bitvec.u.apSub[].  Each subbitmap
     87 ** handles up to iDivisor separate values of i.  apSub[0] holds
     88 ** values between 1 and iDivisor.  apSub[1] holds values between
     89 ** iDivisor+1 and 2*iDivisor.  apSub[N] holds values between
     90 ** N*iDivisor+1 and (N+1)*iDivisor.  Each subbitmap is normalized
     91 ** to hold deal with values between 1 and iDivisor.
     92 */
     93 struct Bitvec {
     94   u32 iSize;      /* Maximum bit index.  Max iSize is 4,294,967,296. */
     95   u32 nSet;       /* Number of bits that are set - only valid for aHash
     96                   ** element.  Max is BITVEC_NINT.  For BITVEC_SZ of 512,
     97                   ** this would be 125. */
     98   u32 iDivisor;   /* Number of bits handled by each apSub[] entry. */
     99                   /* Should >=0 for apSub element. */
    100                   /* Max iDivisor is max(u32) / BITVEC_NPTR + 1.  */
    101                   /* For a BITVEC_SZ of 512, this would be 34,359,739. */
    102   union {
    103     BITVEC_TELEM aBitmap[BITVEC_NELEM];    /* Bitmap representation */
    104     u32 aHash[BITVEC_NINT];      /* Hash table representation */
    105     Bitvec *apSub[BITVEC_NPTR];  /* Recursive representation */
    106   } u;
    107 };
    108 
    109 /*
    110 ** Create a new bitmap object able to handle bits between 0 and iSize,
    111 ** inclusive.  Return a pointer to the new object.  Return NULL if
    112 ** malloc fails.
    113 */
    114 Bitvec *sqlite3BitvecCreate(u32 iSize){
    115   Bitvec *p;
    116   assert( sizeof(*p)==BITVEC_SZ );
    117   p = sqlite3MallocZero( sizeof(*p) );
    118   if( p ){
    119     p->iSize = iSize;
    120   }
    121   return p;
    122 }
    123 
    124 /*
    125 ** Check to see if the i-th bit is set.  Return true or false.
    126 ** If p is NULL (if the bitmap has not been created) or if
    127 ** i is out of range, then return false.
    128 */
    129 int sqlite3BitvecTest(Bitvec *p, u32 i){
    130   if( p==0 ) return 0;
    131   if( i>p->iSize || i==0 ) return 0;
    132   i--;
    133   while( p->iDivisor ){
    134     u32 bin = i/p->iDivisor;
    135     i = i%p->iDivisor;
    136     p = p->u.apSub[bin];
    137     if (!p) {
    138       return 0;
    139     }
    140   }
    141   if( p->iSize<=BITVEC_NBIT ){
    142     return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0;
    143   } else{
    144     u32 h = BITVEC_HASH(i++);
    145     while( p->u.aHash[h] ){
    146       if( p->u.aHash[h]==i ) return 1;
    147       h = (h+1) % BITVEC_NINT;
    148     }
    149     return 0;
    150   }
    151 }
    152 
    153 /*
    154 ** Set the i-th bit.  Return 0 on success and an error code if
    155 ** anything goes wrong.
    156 **
    157 ** This routine might cause sub-bitmaps to be allocated.  Failing
    158 ** to get the memory needed to hold the sub-bitmap is the only
    159 ** that can go wrong with an insert, assuming p and i are valid.
    160 **
    161 ** The calling function must ensure that p is a valid Bitvec object
    162 ** and that the value for "i" is within range of the Bitvec object.
    163 ** Otherwise the behavior is undefined.
    164 */
    165 int sqlite3BitvecSet(Bitvec *p, u32 i){
    166   u32 h;
    167   if( p==0 ) return SQLITE_OK;
    168   assert( i>0 );
    169   assert( i<=p->iSize );
    170   i--;
    171   while((p->iSize > BITVEC_NBIT) && p->iDivisor) {
    172     u32 bin = i/p->iDivisor;
    173     i = i%p->iDivisor;
    174     if( p->u.apSub[bin]==0 ){
    175       p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor );
    176       if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM;
    177     }
    178     p = p->u.apSub[bin];
    179   }
    180   if( p->iSize<=BITVEC_NBIT ){
    181     p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1));
    182     return SQLITE_OK;
    183   }
    184   h = BITVEC_HASH(i++);
    185   /* if there wasn't a hash collision, and this doesn't */
    186   /* completely fill the hash, then just add it without */
    187   /* worring about sub-dividing and re-hashing. */
    188   if( !p->u.aHash[h] ){
    189     if (p->nSet<(BITVEC_NINT-1)) {
    190       goto bitvec_set_end;
    191     } else {
    192       goto bitvec_set_rehash;
    193     }
    194   }
    195   /* there was a collision, check to see if it's already */
    196   /* in hash, if not, try to find a spot for it */
    197   do {
    198     if( p->u.aHash[h]==i ) return SQLITE_OK;
    199     h++;
    200     if( h>=BITVEC_NINT ) h = 0;
    201   } while( p->u.aHash[h] );
    202   /* we didn't find it in the hash.  h points to the first */
    203   /* available free spot. check to see if this is going to */
    204   /* make our hash too "full".  */
    205 bitvec_set_rehash:
    206   if( p->nSet>=BITVEC_MXHASH ){
    207     unsigned int j;
    208     int rc;
    209     u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash));
    210     if( aiValues==0 ){
    211       return SQLITE_NOMEM;
    212     }else{
    213       memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash));
    214       memset(p->u.apSub, 0, sizeof(p->u.apSub));
    215       p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR;
    216       rc = sqlite3BitvecSet(p, i);
    217       for(j=0; j<BITVEC_NINT; j++){
    218         if( aiValues[j] ) rc |= sqlite3BitvecSet(p, aiValues[j]);
    219       }
    220       sqlite3StackFree(0, aiValues);
    221       return rc;
    222     }
    223   }
    224 bitvec_set_end:
    225   p->nSet++;
    226   p->u.aHash[h] = i;
    227   return SQLITE_OK;
    228 }
    229 
    230 /*
    231 ** Clear the i-th bit.
    232 **
    233 ** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage
    234 ** that BitvecClear can use to rebuilt its hash table.
    235 */
    236 void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){
    237   if( p==0 ) return;
    238   assert( i>0 );
    239   i--;
    240   while( p->iDivisor ){
    241     u32 bin = i/p->iDivisor;
    242     i = i%p->iDivisor;
    243     p = p->u.apSub[bin];
    244     if (!p) {
    245       return;
    246     }
    247   }
    248   if( p->iSize<=BITVEC_NBIT ){
    249     p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1)));
    250   }else{
    251     unsigned int j;
    252     u32 *aiValues = pBuf;
    253     memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash));
    254     memset(p->u.aHash, 0, sizeof(p->u.aHash));
    255     p->nSet = 0;
    256     for(j=0; j<BITVEC_NINT; j++){
    257       if( aiValues[j] && aiValues[j]!=(i+1) ){
    258         u32 h = BITVEC_HASH(aiValues[j]-1);
    259         p->nSet++;
    260         while( p->u.aHash[h] ){
    261           h++;
    262           if( h>=BITVEC_NINT ) h = 0;
    263         }
    264         p->u.aHash[h] = aiValues[j];
    265       }
    266     }
    267   }
    268 }
    269 
    270 /*
    271 ** Destroy a bitmap object.  Reclaim all memory used.
    272 */
    273 void sqlite3BitvecDestroy(Bitvec *p){
    274   if( p==0 ) return;
    275   if( p->iDivisor ){
    276     unsigned int i;
    277     for(i=0; i<BITVEC_NPTR; i++){
    278       sqlite3BitvecDestroy(p->u.apSub[i]);
    279     }
    280   }
    281   sqlite3_free(p);
    282 }
    283 
    284 /*
    285 ** Return the value of the iSize parameter specified when Bitvec *p
    286 ** was created.
    287 */
    288 u32 sqlite3BitvecSize(Bitvec *p){
    289   return p->iSize;
    290 }
    291 
    292 #ifndef SQLITE_OMIT_BUILTIN_TEST
    293 /*
    294 ** Let V[] be an array of unsigned characters sufficient to hold
    295 ** up to N bits.  Let I be an integer between 0 and N.  0<=I<N.
    296 ** Then the following macros can be used to set, clear, or test
    297 ** individual bits within V.
    298 */
    299 #define SETBIT(V,I)      V[I>>3] |= (1<<(I&7))
    300 #define CLEARBIT(V,I)    V[I>>3] &= ~(1<<(I&7))
    301 #define TESTBIT(V,I)     (V[I>>3]&(1<<(I&7)))!=0
    302 
    303 /*
    304 ** This routine runs an extensive test of the Bitvec code.
    305 **
    306 ** The input is an array of integers that acts as a program
    307 ** to test the Bitvec.  The integers are opcodes followed
    308 ** by 0, 1, or 3 operands, depending on the opcode.  Another
    309 ** opcode follows immediately after the last operand.
    310 **
    311 ** There are 6 opcodes numbered from 0 through 5.  0 is the
    312 ** "halt" opcode and causes the test to end.
    313 **
    314 **    0          Halt and return the number of errors
    315 **    1 N S X    Set N bits beginning with S and incrementing by X
    316 **    2 N S X    Clear N bits beginning with S and incrementing by X
    317 **    3 N        Set N randomly chosen bits
    318 **    4 N        Clear N randomly chosen bits
    319 **    5 N S X    Set N bits from S increment X in array only, not in bitvec
    320 **
    321 ** The opcodes 1 through 4 perform set and clear operations are performed
    322 ** on both a Bitvec object and on a linear array of bits obtained from malloc.
    323 ** Opcode 5 works on the linear array only, not on the Bitvec.
    324 ** Opcode 5 is used to deliberately induce a fault in order to
    325 ** confirm that error detection works.
    326 **
    327 ** At the conclusion of the test the linear array is compared
    328 ** against the Bitvec object.  If there are any differences,
    329 ** an error is returned.  If they are the same, zero is returned.
    330 **
    331 ** If a memory allocation error occurs, return -1.
    332 */
    333 int sqlite3BitvecBuiltinTest(int sz, int *aOp){
    334   Bitvec *pBitvec = 0;
    335   unsigned char *pV = 0;
    336   int rc = -1;
    337   int i, nx, pc, op;
    338   void *pTmpSpace;
    339 
    340   /* Allocate the Bitvec to be tested and a linear array of
    341   ** bits to act as the reference */
    342   pBitvec = sqlite3BitvecCreate( sz );
    343   pV = sqlite3_malloc( (sz+7)/8 + 1 );
    344   pTmpSpace = sqlite3_malloc(BITVEC_SZ);
    345   if( pBitvec==0 || pV==0 || pTmpSpace==0  ) goto bitvec_end;
    346   memset(pV, 0, (sz+7)/8 + 1);
    347 
    348   /* NULL pBitvec tests */
    349   sqlite3BitvecSet(0, 1);
    350   sqlite3BitvecClear(0, 1, pTmpSpace);
    351 
    352   /* Run the program */
    353   pc = 0;
    354   while( (op = aOp[pc])!=0 ){
    355     switch( op ){
    356       case 1:
    357       case 2:
    358       case 5: {
    359         nx = 4;
    360         i = aOp[pc+2] - 1;
    361         aOp[pc+2] += aOp[pc+3];
    362         break;
    363       }
    364       case 3:
    365       case 4:
    366       default: {
    367         nx = 2;
    368         sqlite3_randomness(sizeof(i), &i);
    369         break;
    370       }
    371     }
    372     if( (--aOp[pc+1]) > 0 ) nx = 0;
    373     pc += nx;
    374     i = (i & 0x7fffffff)%sz;
    375     if( (op & 1)!=0 ){
    376       SETBIT(pV, (i+1));
    377       if( op!=5 ){
    378         if( sqlite3BitvecSet(pBitvec, i+1) ) goto bitvec_end;
    379       }
    380     }else{
    381       CLEARBIT(pV, (i+1));
    382       sqlite3BitvecClear(pBitvec, i+1, pTmpSpace);
    383     }
    384   }
    385 
    386   /* Test to make sure the linear array exactly matches the
    387   ** Bitvec object.  Start with the assumption that they do
    388   ** match (rc==0).  Change rc to non-zero if a discrepancy
    389   ** is found.
    390   */
    391   rc = sqlite3BitvecTest(0,0) + sqlite3BitvecTest(pBitvec, sz+1)
    392           + sqlite3BitvecTest(pBitvec, 0)
    393           + (sqlite3BitvecSize(pBitvec) - sz);
    394   for(i=1; i<=sz; i++){
    395     if(  (TESTBIT(pV,i))!=sqlite3BitvecTest(pBitvec,i) ){
    396       rc = i;
    397       break;
    398     }
    399   }
    400 
    401   /* Free allocated structure */
    402 bitvec_end:
    403   sqlite3_free(pTmpSpace);
    404   sqlite3_free(pV);
    405   sqlite3BitvecDestroy(pBitvec);
    406   return rc;
    407 }
    408 #endif /* SQLITE_OMIT_BUILTIN_TEST */
    409