Home | History | Annotate | Download | only in src
      1 /*
      2 ** 2011 March 24
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 *************************************************************************
     12 **
     13 ** Code for demonstartion virtual table that generates variations
     14 ** on an input word at increasing edit distances from the original.
     15 **
     16 ** A fuzzer virtual table is created like this:
     17 **
     18 **     CREATE VIRTUAL TABLE temp.f USING fuzzer;
     19 **
     20 ** The name of the new virtual table in the example above is "f".
     21 ** Note that all fuzzer virtual tables must be TEMP tables.  The
     22 ** "temp." prefix in front of the table name is required when the
     23 ** table is being created.  The "temp." prefix can be omitted when
     24 ** using the table as long as the name is unambiguous.
     25 **
     26 ** Before being used, the fuzzer needs to be programmed by giving it
     27 ** character transformations and a cost associated with each transformation.
     28 ** Examples:
     29 **
     30 **    INSERT INTO f(cFrom,cTo,Cost) VALUES('','a',100);
     31 **
     32 ** The above statement says that the cost of inserting a letter 'a' is
     33 ** 100.  (All costs are integers.  We recommend that costs be scaled so
     34 ** that the average cost is around 100.)
     35 **
     36 **    INSERT INTO f(cFrom,cTo,Cost) VALUES('b','',87);
     37 **
     38 ** The above statement says that the cost of deleting a single letter
     39 ** 'b' is 87.
     40 **
     41 **    INSERT INTO f(cFrom,cTo,Cost) VALUES('o','oe',38);
     42 **    INSERT INTO f(cFrom,cTo,Cost) VALUES('oe','o',40);
     43 **
     44 ** This third example says that the cost of transforming the single
     45 ** letter "o" into the two-letter sequence "oe" is 38 and that the
     46 ** cost of transforming "oe" back into "o" is 40.
     47 **
     48 ** After all the transformation costs have been set, the fuzzer table
     49 ** can be queried as follows:
     50 **
     51 **    SELECT word, distance FROM f
     52 **     WHERE word MATCH 'abcdefg'
     53 **       AND distance<200;
     54 **
     55 ** This first query outputs the string "abcdefg" and all strings that
     56 ** can be derived from that string by appling the specified transformations.
     57 ** The strings are output together with their total transformation cost
     58 ** (called "distance") and appear in order of increasing cost.  No string
     59 ** is output more than once.  If there are multiple ways to transform the
     60 ** target string into the output string then the lowest cost transform is
     61 ** the one that is returned.  In the example, the search is limited to
     62 ** strings with a total distance of less than 200.
     63 **
     64 ** It is important to put some kind of a limit on the fuzzer output.  This
     65 ** can be either in the form of a LIMIT clause at the end of the query,
     66 ** or better, a "distance<NNN" constraint where NNN is some number.  The
     67 ** running time and memory requirement is exponential in the value of NNN
     68 ** so you want to make sure that NNN is not too big.  A value of NNN that
     69 ** is about twice the average transformation cost seems to give good results.
     70 **
     71 ** The fuzzer table can be useful for tasks such as spelling correction.
     72 ** Suppose there is a second table vocabulary(w) where the w column contains
     73 ** all correctly spelled words.   Let $word be a word you want to look up.
     74 **
     75 **   SELECT vocabulary.w FROM f, vocabulary
     76 **    WHERE f.word MATCH $word
     77 **      AND f.distance<=200
     78 **      AND f.word=vocabulary.w
     79 **    LIMIT 20
     80 **
     81 ** The query above gives the 20 closest words to the $word being tested.
     82 ** (Note that for good performance, the vocubulary.w column should be
     83 ** indexed.)
     84 **
     85 ** A similar query can be used to find all words in the dictionary that
     86 ** begin with some prefix $prefix:
     87 **
     88 **   SELECT vocabulary.w FROM f, vocabulary
     89 **    WHERE f.word MATCH $prefix
     90 **      AND f.distance<=200
     91 **      AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
     92 **    LIMIT 50
     93 **
     94 ** This last query will show up to 50 words out of the vocabulary that
     95 ** match or nearly match the $prefix.
     96 */
     97 #include "sqlite3.h"
     98 #include <stdlib.h>
     99 #include <string.h>
    100 #include <assert.h>
    101 #include <stdio.h>
    102 
    103 #ifndef SQLITE_OMIT_VIRTUALTABLE
    104 
    105 /*
    106 ** Forward declaration of objects used by this implementation
    107 */
    108 typedef struct fuzzer_vtab fuzzer_vtab;
    109 typedef struct fuzzer_cursor fuzzer_cursor;
    110 typedef struct fuzzer_rule fuzzer_rule;
    111 typedef struct fuzzer_seen fuzzer_seen;
    112 typedef struct fuzzer_stem fuzzer_stem;
    113 
    114 /*
    115 ** Type of the "cost" of an edit operation.  Might be changed to
    116 ** "float" or "double" or "sqlite3_int64" in the future.
    117 */
    118 typedef int fuzzer_cost;
    119 
    120 
    121 /*
    122 ** Each transformation rule is stored as an instance of this object.
    123 ** All rules are kept on a linked list sorted by rCost.
    124 */
    125 struct fuzzer_rule {
    126   fuzzer_rule *pNext;        /* Next rule in order of increasing rCost */
    127   fuzzer_cost rCost;         /* Cost of this transformation */
    128   int nFrom, nTo;            /* Length of the zFrom and zTo strings */
    129   char *zFrom;               /* Transform from */
    130   char zTo[4];               /* Transform to (extra space appended) */
    131 };
    132 
    133 /*
    134 ** A stem object is used to generate variants.  It is also used to record
    135 ** previously generated outputs.
    136 **
    137 ** Every stem is added to a hash table as it is output.  Generation of
    138 ** duplicate stems is suppressed.
    139 **
    140 ** Active stems (those that might generate new outputs) are kepts on a linked
    141 ** list sorted by increasing cost.  The cost is the sum of rBaseCost and
    142 ** pRule->rCost.
    143 */
    144 struct fuzzer_stem {
    145   char *zBasis;              /* Word being fuzzed */
    146   int nBasis;                /* Length of the zBasis string */
    147   const fuzzer_rule *pRule;  /* Current rule to apply */
    148   int n;                     /* Apply pRule at this character offset */
    149   fuzzer_cost rBaseCost;     /* Base cost of getting to zBasis */
    150   fuzzer_cost rCostX;        /* Precomputed rBaseCost + pRule->rCost */
    151   fuzzer_stem *pNext;        /* Next stem in rCost order */
    152   fuzzer_stem *pHash;        /* Next stem with same hash on zBasis */
    153 };
    154 
    155 /*
    156 ** A fuzzer virtual-table object
    157 */
    158 struct fuzzer_vtab {
    159   sqlite3_vtab base;         /* Base class - must be first */
    160   char *zClassName;          /* Name of this class.  Default: "fuzzer" */
    161   fuzzer_rule *pRule;        /* All active rules in this fuzzer */
    162   fuzzer_rule *pNewRule;     /* New rules to add when last cursor expires */
    163   int nCursor;               /* Number of active cursors */
    164 };
    165 
    166 #define FUZZER_HASH  4001    /* Hash table size */
    167 #define FUZZER_NQUEUE  20    /* Number of slots on the stem queue */
    168 
    169 /* A fuzzer cursor object */
    170 struct fuzzer_cursor {
    171   sqlite3_vtab_cursor base;  /* Base class - must be first */
    172   sqlite3_int64 iRowid;      /* The rowid of the current word */
    173   fuzzer_vtab *pVtab;        /* The virtual table this cursor belongs to */
    174   fuzzer_cost rLimit;        /* Maximum cost of any term */
    175   fuzzer_stem *pStem;        /* Stem with smallest rCostX */
    176   fuzzer_stem *pDone;        /* Stems already processed to completion */
    177   fuzzer_stem *aQueue[FUZZER_NQUEUE];  /* Queue of stems with higher rCostX */
    178   int mxQueue;               /* Largest used index in aQueue[] */
    179   char *zBuf;                /* Temporary use buffer */
    180   int nBuf;                  /* Bytes allocated for zBuf */
    181   int nStem;                 /* Number of stems allocated */
    182   fuzzer_rule nullRule;      /* Null rule used first */
    183   fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
    184 };
    185 
    186 /* Methods for the fuzzer module */
    187 static int fuzzerConnect(
    188   sqlite3 *db,
    189   void *pAux,
    190   int argc, const char *const*argv,
    191   sqlite3_vtab **ppVtab,
    192   char **pzErr
    193 ){
    194   fuzzer_vtab *pNew;
    195   int n;
    196   if( strcmp(argv[1],"temp")!=0 ){
    197     *pzErr = sqlite3_mprintf("%s virtual tables must be TEMP", argv[0]);
    198     return SQLITE_ERROR;
    199   }
    200   n = strlen(argv[0]) + 1;
    201   pNew = sqlite3_malloc( sizeof(*pNew) + n );
    202   if( pNew==0 ) return SQLITE_NOMEM;
    203   pNew->zClassName = (char*)&pNew[1];
    204   memcpy(pNew->zClassName, argv[0], n);
    205   sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)");
    206   memset(pNew, 0, sizeof(*pNew));
    207   *ppVtab = &pNew->base;
    208   return SQLITE_OK;
    209 }
    210 /* Note that for this virtual table, the xCreate and xConnect
    211 ** methods are identical. */
    212 
    213 static int fuzzerDisconnect(sqlite3_vtab *pVtab){
    214   fuzzer_vtab *p = (fuzzer_vtab*)pVtab;
    215   assert( p->nCursor==0 );
    216   do{
    217     while( p->pRule ){
    218       fuzzer_rule *pRule = p->pRule;
    219       p->pRule = pRule->pNext;
    220       sqlite3_free(pRule);
    221     }
    222     p->pRule = p->pNewRule;
    223     p->pNewRule = 0;
    224   }while( p->pRule );
    225   sqlite3_free(p);
    226   return SQLITE_OK;
    227 }
    228 /* The xDisconnect and xDestroy methods are also the same */
    229 
    230 /*
    231 ** The two input rule lists are both sorted in order of increasing
    232 ** cost.  Merge them together into a single list, sorted by cost, and
    233 ** return a pointer to the head of that list.
    234 */
    235 static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){
    236   fuzzer_rule head;
    237   fuzzer_rule *pTail;
    238 
    239   pTail =  &head;
    240   while( pA && pB ){
    241     if( pA->rCost<=pB->rCost ){
    242       pTail->pNext = pA;
    243       pTail = pA;
    244       pA = pA->pNext;
    245     }else{
    246       pTail->pNext = pB;
    247       pTail = pB;
    248       pB = pB->pNext;
    249     }
    250   }
    251   if( pA==0 ){
    252     pTail->pNext = pB;
    253   }else{
    254     pTail->pNext = pA;
    255   }
    256   return head.pNext;
    257 }
    258 
    259 
    260 /*
    261 ** Open a new fuzzer cursor.
    262 */
    263 static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
    264   fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
    265   fuzzer_cursor *pCur;
    266   pCur = sqlite3_malloc( sizeof(*pCur) );
    267   if( pCur==0 ) return SQLITE_NOMEM;
    268   memset(pCur, 0, sizeof(*pCur));
    269   pCur->pVtab = p;
    270   *ppCursor = &pCur->base;
    271   if( p->nCursor==0 && p->pNewRule ){
    272     unsigned int i;
    273     fuzzer_rule *pX;
    274     fuzzer_rule *a[15];
    275     for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0;
    276     while( (pX = p->pNewRule)!=0 ){
    277       p->pNewRule = pX->pNext;
    278       pX->pNext = 0;
    279       for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){
    280         pX = fuzzerMergeRules(a[i], pX);
    281         a[i] = 0;
    282       }
    283       a[i] = fuzzerMergeRules(a[i], pX);
    284     }
    285     for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){
    286       pX = fuzzerMergeRules(a[i], pX);
    287     }
    288     p->pRule = fuzzerMergeRules(p->pRule, pX);
    289   }
    290   p->nCursor++;
    291   return SQLITE_OK;
    292 }
    293 
    294 /*
    295 ** Free all stems in a list.
    296 */
    297 static void fuzzerClearStemList(fuzzer_stem *pStem){
    298   while( pStem ){
    299     fuzzer_stem *pNext = pStem->pNext;
    300     sqlite3_free(pStem);
    301     pStem = pNext;
    302   }
    303 }
    304 
    305 /*
    306 ** Free up all the memory allocated by a cursor.  Set it rLimit to 0
    307 ** to indicate that it is at EOF.
    308 */
    309 static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
    310   int i;
    311   fuzzerClearStemList(pCur->pStem);
    312   fuzzerClearStemList(pCur->pDone);
    313   for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
    314   pCur->rLimit = (fuzzer_cost)0;
    315   if( clearHash && pCur->nStem ){
    316     pCur->mxQueue = 0;
    317     pCur->pStem = 0;
    318     pCur->pDone = 0;
    319     memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
    320     memset(pCur->apHash, 0, sizeof(pCur->apHash));
    321   }
    322   pCur->nStem = 0;
    323 }
    324 
    325 /*
    326 ** Close a fuzzer cursor.
    327 */
    328 static int fuzzerClose(sqlite3_vtab_cursor *cur){
    329   fuzzer_cursor *pCur = (fuzzer_cursor *)cur;
    330   fuzzerClearCursor(pCur, 0);
    331   sqlite3_free(pCur->zBuf);
    332   pCur->pVtab->nCursor--;
    333   sqlite3_free(pCur);
    334   return SQLITE_OK;
    335 }
    336 
    337 /*
    338 ** Compute the current output term for a fuzzer_stem.
    339 */
    340 static int fuzzerRender(
    341   fuzzer_stem *pStem,   /* The stem to be rendered */
    342   char **pzBuf,         /* Write results into this buffer.  realloc if needed */
    343   int *pnBuf            /* Size of the buffer */
    344 ){
    345   const fuzzer_rule *pRule = pStem->pRule;
    346   int n;
    347   char *z;
    348 
    349   n = pStem->nBasis + pRule->nTo - pRule->nFrom;
    350   if( (*pnBuf)<n+1 ){
    351     (*pzBuf) = sqlite3_realloc((*pzBuf), n+100);
    352     if( (*pzBuf)==0 ) return SQLITE_NOMEM;
    353     (*pnBuf) = n+100;
    354   }
    355   n = pStem->n;
    356   z = *pzBuf;
    357   if( n<0 ){
    358     memcpy(z, pStem->zBasis, pStem->nBasis+1);
    359   }else{
    360     memcpy(z, pStem->zBasis, n);
    361     memcpy(&z[n], pRule->zTo, pRule->nTo);
    362     memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom],
    363            pStem->nBasis-n-pRule->nFrom+1);
    364   }
    365   return SQLITE_OK;
    366 }
    367 
    368 /*
    369 ** Compute a hash on zBasis.
    370 */
    371 static unsigned int fuzzerHash(const char *z){
    372   unsigned int h = 0;
    373   while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); }
    374   return h % FUZZER_HASH;
    375 }
    376 
    377 /*
    378 ** Current cost of a stem
    379 */
    380 static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
    381   return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
    382 }
    383 
    384 #if 0
    385 /*
    386 ** Print a description of a fuzzer_stem on stderr.
    387 */
    388 static void fuzzerStemPrint(
    389   const char *zPrefix,
    390   fuzzer_stem *pStem,
    391   const char *zSuffix
    392 ){
    393   if( pStem->n<0 ){
    394     fprintf(stderr, "%s[%s](%d)-->self%s",
    395        zPrefix,
    396        pStem->zBasis, pStem->rBaseCost,
    397        zSuffix
    398     );
    399   }else{
    400     char *zBuf = 0;
    401     int nBuf = 0;
    402     if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
    403     fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
    404       zPrefix,
    405       pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
    406       zSuffix
    407     );
    408     sqlite3_free(zBuf);
    409   }
    410 }
    411 #endif
    412 
    413 /*
    414 ** Return 1 if the string to which the cursor is point has already
    415 ** been emitted.  Return 0 if not.  Return -1 on a memory allocation
    416 ** failures.
    417 */
    418 static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){
    419   unsigned int h;
    420   fuzzer_stem *pLookup;
    421 
    422   if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
    423     return -1;
    424   }
    425   h = fuzzerHash(pCur->zBuf);
    426   pLookup = pCur->apHash[h];
    427     while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
    428     pLookup = pLookup->pHash;
    429   }
    430   return pLookup!=0;
    431 }
    432 
    433 /*
    434 ** Advance a fuzzer_stem to its next value.   Return 0 if there are
    435 ** no more values that can be generated by this fuzzer_stem.  Return
    436 ** -1 on a memory allocation failure.
    437 */
    438 static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
    439   const fuzzer_rule *pRule;
    440   while( (pRule = pStem->pRule)!=0 ){
    441     while( pStem->n < pStem->nBasis - pRule->nFrom ){
    442       pStem->n++;
    443       if( pRule->nFrom==0
    444        || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0
    445       ){
    446         /* Found a rewrite case.  Make sure it is not a duplicate */
    447         int rc = fuzzerSeen(pCur, pStem);
    448         if( rc<0 ) return -1;
    449         if( rc==0 ){
    450           fuzzerCost(pStem);
    451           return 1;
    452         }
    453       }
    454     }
    455     pStem->n = -1;
    456     pStem->pRule = pRule->pNext;
    457     if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
    458   }
    459   return 0;
    460 }
    461 
    462 /*
    463 ** The two input stem lists are both sorted in order of increasing
    464 ** rCostX.  Merge them together into a single list, sorted by rCostX, and
    465 ** return a pointer to the head of that new list.
    466 */
    467 static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
    468   fuzzer_stem head;
    469   fuzzer_stem *pTail;
    470 
    471   pTail =  &head;
    472   while( pA && pB ){
    473     if( pA->rCostX<=pB->rCostX ){
    474       pTail->pNext = pA;
    475       pTail = pA;
    476       pA = pA->pNext;
    477     }else{
    478       pTail->pNext = pB;
    479       pTail = pB;
    480       pB = pB->pNext;
    481     }
    482   }
    483   if( pA==0 ){
    484     pTail->pNext = pB;
    485   }else{
    486     pTail->pNext = pA;
    487   }
    488   return head.pNext;
    489 }
    490 
    491 /*
    492 ** Load pCur->pStem with the lowest-cost stem.  Return a pointer
    493 ** to the lowest-cost stem.
    494 */
    495 static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
    496   fuzzer_stem *pBest, *pX;
    497   int iBest;
    498   int i;
    499 
    500   if( pCur->pStem==0 ){
    501     iBest = -1;
    502     pBest = 0;
    503     for(i=0; i<=pCur->mxQueue; i++){
    504       pX = pCur->aQueue[i];
    505       if( pX==0 ) continue;
    506       if( pBest==0 || pBest->rCostX>pX->rCostX ){
    507         pBest = pX;
    508         iBest = i;
    509       }
    510     }
    511     if( pBest ){
    512       pCur->aQueue[iBest] = pBest->pNext;
    513       pBest->pNext = 0;
    514       pCur->pStem = pBest;
    515     }
    516   }
    517   return pCur->pStem;
    518 }
    519 
    520 /*
    521 ** Insert pNew into queue of pending stems.  Then find the stem
    522 ** with the lowest rCostX and move it into pCur->pStem.
    523 ** list.  The insert is done such the pNew is in the correct order
    524 ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
    525 */
    526 static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
    527   fuzzer_stem *pX;
    528   int i;
    529 
    530   /* If pCur->pStem exists and is greater than pNew, then make pNew
    531   ** the new pCur->pStem and insert the old pCur->pStem instead.
    532   */
    533   if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
    534     pNew->pNext = 0;
    535     pCur->pStem = pNew;
    536     pNew = pX;
    537   }
    538 
    539   /* Insert the new value */
    540   pNew->pNext = 0;
    541   pX = pNew;
    542   for(i=0; i<=pCur->mxQueue; i++){
    543     if( pCur->aQueue[i] ){
    544       pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
    545       pCur->aQueue[i] = 0;
    546     }else{
    547       pCur->aQueue[i] = pX;
    548       break;
    549     }
    550   }
    551   if( i>pCur->mxQueue ){
    552     if( i<FUZZER_NQUEUE ){
    553       pCur->mxQueue = i;
    554       pCur->aQueue[i] = pX;
    555     }else{
    556       assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
    557       pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
    558       pCur->aQueue[FUZZER_NQUEUE-1] = pX;
    559     }
    560   }
    561 
    562   return fuzzerLowestCostStem(pCur);
    563 }
    564 
    565 /*
    566 ** Allocate a new fuzzer_stem.  Add it to the hash table but do not
    567 ** link it into either the pCur->pStem or pCur->pDone lists.
    568 */
    569 static fuzzer_stem *fuzzerNewStem(
    570   fuzzer_cursor *pCur,
    571   const char *zWord,
    572   fuzzer_cost rBaseCost
    573 ){
    574   fuzzer_stem *pNew;
    575   unsigned int h;
    576 
    577   pNew = sqlite3_malloc( sizeof(*pNew) + strlen(zWord) + 1 );
    578   if( pNew==0 ) return 0;
    579   memset(pNew, 0, sizeof(*pNew));
    580   pNew->zBasis = (char*)&pNew[1];
    581   pNew->nBasis = strlen(zWord);
    582   memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
    583   pNew->pRule = pCur->pVtab->pRule;
    584   pNew->n = -1;
    585   pNew->rBaseCost = pNew->rCostX = rBaseCost;
    586   h = fuzzerHash(pNew->zBasis);
    587   pNew->pHash = pCur->apHash[h];
    588   pCur->apHash[h] = pNew;
    589   pCur->nStem++;
    590   return pNew;
    591 }
    592 
    593 
    594 /*
    595 ** Advance a cursor to its next row of output
    596 */
    597 static int fuzzerNext(sqlite3_vtab_cursor *cur){
    598   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
    599   int rc;
    600   fuzzer_stem *pStem, *pNew;
    601 
    602   pCur->iRowid++;
    603 
    604   /* Use the element the cursor is currently point to to create
    605   ** a new stem and insert the new stem into the priority queue.
    606   */
    607   pStem = pCur->pStem;
    608   if( pStem->rCostX>0 ){
    609     rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
    610     if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
    611     pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
    612     if( pNew ){
    613       if( fuzzerAdvance(pCur, pNew)==0 ){
    614         pNew->pNext = pCur->pDone;
    615         pCur->pDone = pNew;
    616       }else{
    617         if( fuzzerInsert(pCur, pNew)==pNew ){
    618           return SQLITE_OK;
    619         }
    620       }
    621     }else{
    622       return SQLITE_NOMEM;
    623     }
    624   }
    625 
    626   /* Adjust the priority queue so that the first element of the
    627   ** stem list is the next lowest cost word.
    628   */
    629   while( (pStem = pCur->pStem)!=0 ){
    630     if( fuzzerAdvance(pCur, pStem) ){
    631       pCur->pStem = 0;
    632       pStem = fuzzerInsert(pCur, pStem);
    633       if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
    634         if( rc<0 ) return SQLITE_NOMEM;
    635         continue;
    636       }
    637       return SQLITE_OK;  /* New word found */
    638     }
    639     pCur->pStem = 0;
    640     pStem->pNext = pCur->pDone;
    641     pCur->pDone = pStem;
    642     if( fuzzerLowestCostStem(pCur) ){
    643       rc = fuzzerSeen(pCur, pCur->pStem);
    644       if( rc<0 ) return SQLITE_NOMEM;
    645       if( rc==0 ){
    646         return SQLITE_OK;
    647       }
    648     }
    649   }
    650 
    651   /* Reach this point only if queue has been exhausted and there is
    652   ** nothing left to be output. */
    653   pCur->rLimit = (fuzzer_cost)0;
    654   return SQLITE_OK;
    655 }
    656 
    657 /*
    658 ** Called to "rewind" a cursor back to the beginning so that
    659 ** it starts its output over again.  Always called at least once
    660 ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
    661 */
    662 static int fuzzerFilter(
    663   sqlite3_vtab_cursor *pVtabCursor,
    664   int idxNum, const char *idxStr,
    665   int argc, sqlite3_value **argv
    666 ){
    667   fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor;
    668   const char *zWord = 0;
    669   fuzzer_stem *pStem;
    670 
    671   fuzzerClearCursor(pCur, 1);
    672   pCur->rLimit = 2147483647;
    673   if( idxNum==1 ){
    674     zWord = (const char*)sqlite3_value_text(argv[0]);
    675   }else if( idxNum==2 ){
    676     pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]);
    677   }else if( idxNum==3 ){
    678     zWord = (const char*)sqlite3_value_text(argv[0]);
    679     pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]);
    680   }
    681   if( zWord==0 ) zWord = "";
    682   pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0);
    683   if( pStem==0 ) return SQLITE_NOMEM;
    684   pCur->nullRule.pNext = pCur->pVtab->pRule;
    685   pCur->nullRule.rCost = 0;
    686   pCur->nullRule.nFrom = 0;
    687   pCur->nullRule.nTo = 0;
    688   pCur->nullRule.zFrom = "";
    689   pStem->pRule = &pCur->nullRule;
    690   pStem->n = pStem->nBasis;
    691   pCur->iRowid = 1;
    692   return SQLITE_OK;
    693 }
    694 
    695 /*
    696 ** Only the word and distance columns have values.  All other columns
    697 ** return NULL
    698 */
    699 static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
    700   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
    701   if( i==0 ){
    702     /* the "word" column */
    703     if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
    704       return SQLITE_NOMEM;
    705     }
    706     sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
    707   }else if( i==1 ){
    708     /* the "distance" column */
    709     sqlite3_result_int(ctx, pCur->pStem->rCostX);
    710   }else{
    711     /* All other columns are NULL */
    712     sqlite3_result_null(ctx);
    713   }
    714   return SQLITE_OK;
    715 }
    716 
    717 /*
    718 ** The rowid.
    719 */
    720 static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
    721   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
    722   *pRowid = pCur->iRowid;
    723   return SQLITE_OK;
    724 }
    725 
    726 /*
    727 ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
    728 ** that the cursor has nothing more to output.
    729 */
    730 static int fuzzerEof(sqlite3_vtab_cursor *cur){
    731   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
    732   return pCur->rLimit<=(fuzzer_cost)0;
    733 }
    734 
    735 /*
    736 ** Search for terms of these forms:
    737 **
    738 **       word MATCH $str
    739 **       distance < $value
    740 **       distance <= $value
    741 **
    742 ** The distance< and distance<= are both treated as distance<=.
    743 ** The query plan number is as follows:
    744 **
    745 **   0:    None of the terms above are found
    746 **   1:    There is a "word MATCH" term with $str in filter.argv[0].
    747 **   2:    There is a "distance<" term with $value in filter.argv[0].
    748 **   3:    Both "word MATCH" and "distance<" with $str in argv[0] and
    749 **         $value in argv[1].
    750 */
    751 static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
    752   int iPlan = 0;
    753   int iDistTerm = -1;
    754   int i;
    755   const struct sqlite3_index_constraint *pConstraint;
    756   pConstraint = pIdxInfo->aConstraint;
    757   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
    758     if( pConstraint->usable==0 ) continue;
    759     if( (iPlan & 1)==0
    760      && pConstraint->iColumn==0
    761      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
    762     ){
    763       iPlan |= 1;
    764       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
    765       pIdxInfo->aConstraintUsage[i].omit = 1;
    766     }
    767     if( (iPlan & 2)==0
    768      && pConstraint->iColumn==1
    769      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
    770            || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
    771     ){
    772       iPlan |= 2;
    773       iDistTerm = i;
    774     }
    775   }
    776   if( iPlan==2 ){
    777     pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1;
    778   }else if( iPlan==3 ){
    779     pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2;
    780   }
    781   pIdxInfo->idxNum = iPlan;
    782   if( pIdxInfo->nOrderBy==1
    783    && pIdxInfo->aOrderBy[0].iColumn==1
    784    && pIdxInfo->aOrderBy[0].desc==0
    785   ){
    786     pIdxInfo->orderByConsumed = 1;
    787   }
    788   pIdxInfo->estimatedCost = (double)10000;
    789 
    790   return SQLITE_OK;
    791 }
    792 
    793 /*
    794 ** Disallow all attempts to DELETE or UPDATE.  Only INSERTs are allowed.
    795 **
    796 ** On an insert, the cFrom, cTo, and cost columns are used to construct
    797 ** a new rule.   All other columns are ignored.  The rule is ignored
    798 ** if cFrom and cTo are identical.  A NULL value for cFrom or cTo is
    799 ** interpreted as an empty string.  The cost must be positive.
    800 */
    801 static int fuzzerUpdate(
    802   sqlite3_vtab *pVTab,
    803   int argc,
    804   sqlite3_value **argv,
    805   sqlite_int64 *pRowid
    806 ){
    807   fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
    808   fuzzer_rule *pRule;
    809   const char *zFrom;
    810   int nFrom;
    811   const char *zTo;
    812   int nTo;
    813   fuzzer_cost rCost;
    814   if( argc!=7 ){
    815     sqlite3_free(pVTab->zErrMsg);
    816     pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table",
    817                                      p->zClassName);
    818     return SQLITE_CONSTRAINT;
    819   }
    820   if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){
    821     sqlite3_free(pVTab->zErrMsg);
    822     pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table",
    823                                      p->zClassName);
    824     return SQLITE_CONSTRAINT;
    825   }
    826   zFrom = (char*)sqlite3_value_text(argv[4]);
    827   if( zFrom==0 ) zFrom = "";
    828   zTo = (char*)sqlite3_value_text(argv[5]);
    829   if( zTo==0 ) zTo = "";
    830   if( strcmp(zFrom,zTo)==0 ){
    831     /* Silently ignore null transformations */
    832     return SQLITE_OK;
    833   }
    834   rCost = sqlite3_value_int(argv[6]);
    835   if( rCost<=0 ){
    836     sqlite3_free(pVTab->zErrMsg);
    837     pVTab->zErrMsg = sqlite3_mprintf("cost must be positive");
    838     return SQLITE_CONSTRAINT;
    839   }
    840   nFrom = strlen(zFrom);
    841   nTo = strlen(zTo);
    842   pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo );
    843   if( pRule==0 ){
    844     return SQLITE_NOMEM;
    845   }
    846   pRule->zFrom = &pRule->zTo[nTo+1];
    847   pRule->nFrom = nFrom;
    848   memcpy(pRule->zFrom, zFrom, nFrom+1);
    849   memcpy(pRule->zTo, zTo, nTo+1);
    850   pRule->nTo = nTo;
    851   pRule->rCost = rCost;
    852   pRule->pNext = p->pNewRule;
    853   p->pNewRule = pRule;
    854   return SQLITE_OK;
    855 }
    856 
    857 /*
    858 ** A virtual table module that provides read-only access to a
    859 ** Tcl global variable namespace.
    860 */
    861 static sqlite3_module fuzzerModule = {
    862   0,                           /* iVersion */
    863   fuzzerConnect,
    864   fuzzerConnect,
    865   fuzzerBestIndex,
    866   fuzzerDisconnect,
    867   fuzzerDisconnect,
    868   fuzzerOpen,                  /* xOpen - open a cursor */
    869   fuzzerClose,                 /* xClose - close a cursor */
    870   fuzzerFilter,                /* xFilter - configure scan constraints */
    871   fuzzerNext,                  /* xNext - advance a cursor */
    872   fuzzerEof,                   /* xEof - check for end of scan */
    873   fuzzerColumn,                /* xColumn - read data */
    874   fuzzerRowid,                 /* xRowid - read data */
    875   fuzzerUpdate,                /* xUpdate - INSERT */
    876   0,                           /* xBegin */
    877   0,                           /* xSync */
    878   0,                           /* xCommit */
    879   0,                           /* xRollback */
    880   0,                           /* xFindMethod */
    881   0,                           /* xRename */
    882 };
    883 
    884 #endif /* SQLITE_OMIT_VIRTUALTABLE */
    885 
    886 
    887 /*
    888 ** Register the fuzzer virtual table
    889 */
    890 int fuzzer_register(sqlite3 *db){
    891   int rc = SQLITE_OK;
    892 #ifndef SQLITE_OMIT_VIRTUALTABLE
    893   rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0);
    894 #endif
    895   return rc;
    896 }
    897 
    898 #ifdef SQLITE_TEST
    899 #include <tcl.h>
    900 /*
    901 ** Decode a pointer to an sqlite3 object.
    902 */
    903 extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite3 **ppDb);
    904 
    905 /*
    906 ** Register the echo virtual table module.
    907 */
    908 static int register_fuzzer_module(
    909   ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
    910   Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
    911   int objc,              /* Number of arguments */
    912   Tcl_Obj *CONST objv[]  /* Command arguments */
    913 ){
    914   sqlite3 *db;
    915   if( objc!=2 ){
    916     Tcl_WrongNumArgs(interp, 1, objv, "DB");
    917     return TCL_ERROR;
    918   }
    919   if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
    920   fuzzer_register(db);
    921   return TCL_OK;
    922 }
    923 
    924 
    925 /*
    926 ** Register commands with the TCL interpreter.
    927 */
    928 int Sqlitetestfuzzer_Init(Tcl_Interp *interp){
    929   static struct {
    930      char *zName;
    931      Tcl_ObjCmdProc *xProc;
    932      void *clientData;
    933   } aObjCmd[] = {
    934      { "register_fuzzer_module",   register_fuzzer_module, 0 },
    935   };
    936   int i;
    937   for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
    938     Tcl_CreateObjCommand(interp, aObjCmd[i].zName,
    939         aObjCmd[i].xProc, aObjCmd[i].clientData, 0);
    940   }
    941   return TCL_OK;
    942 }
    943 
    944 #endif /* SQLITE_TEST */
    945