Home | History | Annotate | Download | only in fts3
      1 /*
      2 ** 2008 Nov 28
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 ******************************************************************************
     12 **
     13 ** This module contains code that implements a parser for fts3 query strings
     14 ** (the right-hand argument to the MATCH operator). Because the supported
     15 ** syntax is relatively simple, the whole tokenizer/parser system is
     16 ** hand-coded.
     17 */
     18 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
     19 
     20 /*
     21 ** By default, this module parses the legacy syntax that has been
     22 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
     23 ** is defined, then it uses the new syntax. The differences between
     24 ** the new and the old syntaxes are:
     25 **
     26 **  a) The new syntax supports parenthesis. The old does not.
     27 **
     28 **  b) The new syntax supports the AND and NOT operators. The old does not.
     29 **
     30 **  c) The old syntax supports the "-" token qualifier. This is not
     31 **     supported by the new syntax (it is replaced by the NOT operator).
     32 **
     33 **  d) When using the old syntax, the OR operator has a greater precedence
     34 **     than an implicit AND. When using the new, both implicity and explicit
     35 **     AND operators have a higher precedence than OR.
     36 **
     37 ** If compiled with SQLITE_TEST defined, then this module exports the
     38 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
     39 ** to zero causes the module to use the old syntax. If it is set to
     40 ** non-zero the new syntax is activated. This is so both syntaxes can
     41 ** be tested using a single build of testfixture.
     42 **
     43 ** The following describes the syntax supported by the fts3 MATCH
     44 ** operator in a similar format to that used by the lemon parser
     45 ** generator. This module does not use actually lemon, it uses a
     46 ** custom parser.
     47 **
     48 **   query ::= andexpr (OR andexpr)*.
     49 **
     50 **   andexpr ::= notexpr (AND? notexpr)*.
     51 **
     52 **   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
     53 **   notexpr ::= LP query RP.
     54 **
     55 **   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
     56 **
     57 **   distance_opt ::= .
     58 **   distance_opt ::= / INTEGER.
     59 **
     60 **   phrase ::= TOKEN.
     61 **   phrase ::= COLUMN:TOKEN.
     62 **   phrase ::= "TOKEN TOKEN TOKEN...".
     63 */
     64 
     65 #ifdef SQLITE_TEST
     66 int sqlite3_fts3_enable_parentheses = 0;
     67 #else
     68 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
     69 #  define sqlite3_fts3_enable_parentheses 1
     70 # else
     71 #  define sqlite3_fts3_enable_parentheses 0
     72 # endif
     73 #endif
     74 
     75 /*
     76 ** Default span for NEAR operators.
     77 */
     78 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
     79 
     80 #include "fts3Int.h"
     81 #include <string.h>
     82 #include <assert.h>
     83 
     84 typedef struct ParseContext ParseContext;
     85 struct ParseContext {
     86   sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
     87   const char **azCol;                 /* Array of column names for fts3 table */
     88   int nCol;                           /* Number of entries in azCol[] */
     89   int iDefaultCol;                    /* Default column to query */
     90   sqlite3_context *pCtx;              /* Write error message here */
     91   int nNest;                          /* Number of nested brackets */
     92 };
     93 
     94 /*
     95 ** This function is equivalent to the standard isspace() function.
     96 **
     97 ** The standard isspace() can be awkward to use safely, because although it
     98 ** is defined to accept an argument of type int, its behaviour when passed
     99 ** an integer that falls outside of the range of the unsigned char type
    100 ** is undefined (and sometimes, "undefined" means segfault). This wrapper
    101 ** is defined to accept an argument of type char, and always returns 0 for
    102 ** any values that fall outside of the range of the unsigned char type (i.e.
    103 ** negative values).
    104 */
    105 static int fts3isspace(char c){
    106   return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
    107 }
    108 
    109 /*
    110 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
    111 ** zero the memory before returning a pointer to it. If unsuccessful,
    112 ** return NULL.
    113 */
    114 static void *fts3MallocZero(int nByte){
    115   void *pRet = sqlite3_malloc(nByte);
    116   if( pRet ) memset(pRet, 0, nByte);
    117   return pRet;
    118 }
    119 
    120 
    121 /*
    122 ** Extract the next token from buffer z (length n) using the tokenizer
    123 ** and other information (column names etc.) in pParse. Create an Fts3Expr
    124 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
    125 ** single token and set *ppExpr to point to it. If the end of the buffer is
    126 ** reached before a token is found, set *ppExpr to zero. It is the
    127 ** responsibility of the caller to eventually deallocate the allocated
    128 ** Fts3Expr structure (if any) by passing it to sqlite3_free().
    129 **
    130 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
    131 ** fails.
    132 */
    133 static int getNextToken(
    134   ParseContext *pParse,                   /* fts3 query parse context */
    135   int iCol,                               /* Value for Fts3Phrase.iColumn */
    136   const char *z, int n,                   /* Input string */
    137   Fts3Expr **ppExpr,                      /* OUT: expression */
    138   int *pnConsumed                         /* OUT: Number of bytes consumed */
    139 ){
    140   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
    141   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
    142   int rc;
    143   sqlite3_tokenizer_cursor *pCursor;
    144   Fts3Expr *pRet = 0;
    145   int nConsumed = 0;
    146 
    147   rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
    148   if( rc==SQLITE_OK ){
    149     const char *zToken;
    150     int nToken, iStart, iEnd, iPosition;
    151     int nByte;                               /* total space to allocate */
    152 
    153     pCursor->pTokenizer = pTokenizer;
    154     rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
    155 
    156     if( rc==SQLITE_OK ){
    157       nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
    158       pRet = (Fts3Expr *)fts3MallocZero(nByte);
    159       if( !pRet ){
    160         rc = SQLITE_NOMEM;
    161       }else{
    162         pRet->eType = FTSQUERY_PHRASE;
    163         pRet->pPhrase = (Fts3Phrase *)&pRet[1];
    164         pRet->pPhrase->nToken = 1;
    165         pRet->pPhrase->iColumn = iCol;
    166         pRet->pPhrase->aToken[0].n = nToken;
    167         pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
    168         memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
    169 
    170         if( iEnd<n && z[iEnd]=='*' ){
    171           pRet->pPhrase->aToken[0].isPrefix = 1;
    172           iEnd++;
    173         }
    174         if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
    175           pRet->pPhrase->isNot = 1;
    176         }
    177       }
    178       nConsumed = iEnd;
    179     }
    180 
    181     pModule->xClose(pCursor);
    182   }
    183 
    184   *pnConsumed = nConsumed;
    185   *ppExpr = pRet;
    186   return rc;
    187 }
    188 
    189 
    190 /*
    191 ** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
    192 ** then free the old allocation.
    193 */
    194 static void *fts3ReallocOrFree(void *pOrig, int nNew){
    195   void *pRet = sqlite3_realloc(pOrig, nNew);
    196   if( !pRet ){
    197     sqlite3_free(pOrig);
    198   }
    199   return pRet;
    200 }
    201 
    202 /*
    203 ** Buffer zInput, length nInput, contains the contents of a quoted string
    204 ** that appeared as part of an fts3 query expression. Neither quote character
    205 ** is included in the buffer. This function attempts to tokenize the entire
    206 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
    207 ** containing the results.
    208 **
    209 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
    210 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
    211 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
    212 ** to 0.
    213 */
    214 static int getNextString(
    215   ParseContext *pParse,                   /* fts3 query parse context */
    216   const char *zInput, int nInput,         /* Input string */
    217   Fts3Expr **ppExpr                       /* OUT: expression */
    218 ){
    219   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
    220   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
    221   int rc;
    222   Fts3Expr *p = 0;
    223   sqlite3_tokenizer_cursor *pCursor = 0;
    224   char *zTemp = 0;
    225   int nTemp = 0;
    226 
    227   rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
    228   if( rc==SQLITE_OK ){
    229     int ii;
    230     pCursor->pTokenizer = pTokenizer;
    231     for(ii=0; rc==SQLITE_OK; ii++){
    232       const char *zToken;
    233       int nToken, iBegin, iEnd, iPos;
    234       rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
    235       if( rc==SQLITE_OK ){
    236         int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
    237         p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken));
    238         zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
    239         if( !p || !zTemp ){
    240           goto no_mem;
    241         }
    242         if( ii==0 ){
    243           memset(p, 0, nByte);
    244           p->pPhrase = (Fts3Phrase *)&p[1];
    245         }
    246         p->pPhrase = (Fts3Phrase *)&p[1];
    247         memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken));
    248         p->pPhrase->nToken = ii+1;
    249         p->pPhrase->aToken[ii].n = nToken;
    250         memcpy(&zTemp[nTemp], zToken, nToken);
    251         nTemp += nToken;
    252         if( iEnd<nInput && zInput[iEnd]=='*' ){
    253           p->pPhrase->aToken[ii].isPrefix = 1;
    254         }else{
    255           p->pPhrase->aToken[ii].isPrefix = 0;
    256         }
    257       }
    258     }
    259 
    260     pModule->xClose(pCursor);
    261     pCursor = 0;
    262   }
    263 
    264   if( rc==SQLITE_DONE ){
    265     int jj;
    266     char *zNew = NULL;
    267     int nNew = 0;
    268     int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
    269     nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken);
    270     p = fts3ReallocOrFree(p, nByte + nTemp);
    271     if( !p ){
    272       goto no_mem;
    273     }
    274     if( zTemp ){
    275       zNew = &(((char *)p)[nByte]);
    276       memcpy(zNew, zTemp, nTemp);
    277     }else{
    278       memset(p, 0, nByte+nTemp);
    279     }
    280     p->pPhrase = (Fts3Phrase *)&p[1];
    281     for(jj=0; jj<p->pPhrase->nToken; jj++){
    282       p->pPhrase->aToken[jj].z = &zNew[nNew];
    283       nNew += p->pPhrase->aToken[jj].n;
    284     }
    285     sqlite3_free(zTemp);
    286     p->eType = FTSQUERY_PHRASE;
    287     p->pPhrase->iColumn = pParse->iDefaultCol;
    288     rc = SQLITE_OK;
    289   }
    290 
    291   *ppExpr = p;
    292   return rc;
    293 no_mem:
    294 
    295   if( pCursor ){
    296     pModule->xClose(pCursor);
    297   }
    298   sqlite3_free(zTemp);
    299   sqlite3_free(p);
    300   *ppExpr = 0;
    301   return SQLITE_NOMEM;
    302 }
    303 
    304 /*
    305 ** Function getNextNode(), which is called by fts3ExprParse(), may itself
    306 ** call fts3ExprParse(). So this forward declaration is required.
    307 */
    308 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
    309 
    310 /*
    311 ** The output variable *ppExpr is populated with an allocated Fts3Expr
    312 ** structure, or set to 0 if the end of the input buffer is reached.
    313 **
    314 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
    315 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
    316 ** If SQLITE_ERROR is returned, pContext is populated with an error message.
    317 */
    318 static int getNextNode(
    319   ParseContext *pParse,                   /* fts3 query parse context */
    320   const char *z, int n,                   /* Input string */
    321   Fts3Expr **ppExpr,                      /* OUT: expression */
    322   int *pnConsumed                         /* OUT: Number of bytes consumed */
    323 ){
    324   static const struct Fts3Keyword {
    325     char *z;                              /* Keyword text */
    326     unsigned char n;                      /* Length of the keyword */
    327     unsigned char parenOnly;              /* Only valid in paren mode */
    328     unsigned char eType;                  /* Keyword code */
    329   } aKeyword[] = {
    330     { "OR" ,  2, 0, FTSQUERY_OR   },
    331     { "AND",  3, 1, FTSQUERY_AND  },
    332     { "NOT",  3, 1, FTSQUERY_NOT  },
    333     { "NEAR", 4, 0, FTSQUERY_NEAR }
    334   };
    335   int ii;
    336   int iCol;
    337   int iColLen;
    338   int rc;
    339   Fts3Expr *pRet = 0;
    340 
    341   const char *zInput = z;
    342   int nInput = n;
    343 
    344   /* Skip over any whitespace before checking for a keyword, an open or
    345   ** close bracket, or a quoted string.
    346   */
    347   while( nInput>0 && fts3isspace(*zInput) ){
    348     nInput--;
    349     zInput++;
    350   }
    351   if( nInput==0 ){
    352     return SQLITE_DONE;
    353   }
    354 
    355   /* See if we are dealing with a keyword. */
    356   for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
    357     const struct Fts3Keyword *pKey = &aKeyword[ii];
    358 
    359     if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
    360       continue;
    361     }
    362 
    363     if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
    364       int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
    365       int nKey = pKey->n;
    366       char cNext;
    367 
    368       /* If this is a "NEAR" keyword, check for an explicit nearness. */
    369       if( pKey->eType==FTSQUERY_NEAR ){
    370         assert( nKey==4 );
    371         if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
    372           nNear = 0;
    373           for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
    374             nNear = nNear * 10 + (zInput[nKey] - '0');
    375           }
    376         }
    377       }
    378 
    379       /* At this point this is probably a keyword. But for that to be true,
    380       ** the next byte must contain either whitespace, an open or close
    381       ** parenthesis, a quote character, or EOF.
    382       */
    383       cNext = zInput[nKey];
    384       if( fts3isspace(cNext)
    385        || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
    386       ){
    387         pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
    388         if( !pRet ){
    389           return SQLITE_NOMEM;
    390         }
    391         pRet->eType = pKey->eType;
    392         pRet->nNear = nNear;
    393         *ppExpr = pRet;
    394         *pnConsumed = (int)((zInput - z) + nKey);
    395         return SQLITE_OK;
    396       }
    397 
    398       /* Turns out that wasn't a keyword after all. This happens if the
    399       ** user has supplied a token such as "ORacle". Continue.
    400       */
    401     }
    402   }
    403 
    404   /* Check for an open bracket. */
    405   if( sqlite3_fts3_enable_parentheses ){
    406     if( *zInput=='(' ){
    407       int nConsumed;
    408       pParse->nNest++;
    409       rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
    410       if( rc==SQLITE_OK && !*ppExpr ){
    411         rc = SQLITE_DONE;
    412       }
    413       *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
    414       return rc;
    415     }
    416 
    417     /* Check for a close bracket. */
    418     if( *zInput==')' ){
    419       pParse->nNest--;
    420       *pnConsumed = (int)((zInput - z) + 1);
    421       return SQLITE_DONE;
    422     }
    423   }
    424 
    425   /* See if we are dealing with a quoted phrase. If this is the case, then
    426   ** search for the closing quote and pass the whole string to getNextString()
    427   ** for processing. This is easy to do, as fts3 has no syntax for escaping
    428   ** a quote character embedded in a string.
    429   */
    430   if( *zInput=='"' ){
    431     for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
    432     *pnConsumed = (int)((zInput - z) + ii + 1);
    433     if( ii==nInput ){
    434       return SQLITE_ERROR;
    435     }
    436     return getNextString(pParse, &zInput[1], ii-1, ppExpr);
    437   }
    438 
    439 
    440   /* If control flows to this point, this must be a regular token, or
    441   ** the end of the input. Read a regular token using the sqlite3_tokenizer
    442   ** interface. Before doing so, figure out if there is an explicit
    443   ** column specifier for the token.
    444   **
    445   ** TODO: Strangely, it is not possible to associate a column specifier
    446   ** with a quoted phrase, only with a single token. Not sure if this was
    447   ** an implementation artifact or an intentional decision when fts3 was
    448   ** first implemented. Whichever it was, this module duplicates the
    449   ** limitation.
    450   */
    451   iCol = pParse->iDefaultCol;
    452   iColLen = 0;
    453   for(ii=0; ii<pParse->nCol; ii++){
    454     const char *zStr = pParse->azCol[ii];
    455     int nStr = (int)strlen(zStr);
    456     if( nInput>nStr && zInput[nStr]==':'
    457      && sqlite3_strnicmp(zStr, zInput, nStr)==0
    458     ){
    459       iCol = ii;
    460       iColLen = (int)((zInput - z) + nStr + 1);
    461       break;
    462     }
    463   }
    464   rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
    465   *pnConsumed += iColLen;
    466   return rc;
    467 }
    468 
    469 /*
    470 ** The argument is an Fts3Expr structure for a binary operator (any type
    471 ** except an FTSQUERY_PHRASE). Return an integer value representing the
    472 ** precedence of the operator. Lower values have a higher precedence (i.e.
    473 ** group more tightly). For example, in the C language, the == operator
    474 ** groups more tightly than ||, and would therefore have a higher precedence.
    475 **
    476 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
    477 ** is defined), the order of the operators in precedence from highest to
    478 ** lowest is:
    479 **
    480 **   NEAR
    481 **   NOT
    482 **   AND (including implicit ANDs)
    483 **   OR
    484 **
    485 ** Note that when using the old query syntax, the OR operator has a higher
    486 ** precedence than the AND operator.
    487 */
    488 static int opPrecedence(Fts3Expr *p){
    489   assert( p->eType!=FTSQUERY_PHRASE );
    490   if( sqlite3_fts3_enable_parentheses ){
    491     return p->eType;
    492   }else if( p->eType==FTSQUERY_NEAR ){
    493     return 1;
    494   }else if( p->eType==FTSQUERY_OR ){
    495     return 2;
    496   }
    497   assert( p->eType==FTSQUERY_AND );
    498   return 3;
    499 }
    500 
    501 /*
    502 ** Argument ppHead contains a pointer to the current head of a query
    503 ** expression tree being parsed. pPrev is the expression node most recently
    504 ** inserted into the tree. This function adds pNew, which is always a binary
    505 ** operator node, into the expression tree based on the relative precedence
    506 ** of pNew and the existing nodes of the tree. This may result in the head
    507 ** of the tree changing, in which case *ppHead is set to the new root node.
    508 */
    509 static void insertBinaryOperator(
    510   Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
    511   Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
    512   Fts3Expr *pNew           /* New binary node to insert into expression tree */
    513 ){
    514   Fts3Expr *pSplit = pPrev;
    515   while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
    516     pSplit = pSplit->pParent;
    517   }
    518 
    519   if( pSplit->pParent ){
    520     assert( pSplit->pParent->pRight==pSplit );
    521     pSplit->pParent->pRight = pNew;
    522     pNew->pParent = pSplit->pParent;
    523   }else{
    524     *ppHead = pNew;
    525   }
    526   pNew->pLeft = pSplit;
    527   pSplit->pParent = pNew;
    528 }
    529 
    530 /*
    531 ** Parse the fts3 query expression found in buffer z, length n. This function
    532 ** returns either when the end of the buffer is reached or an unmatched
    533 ** closing bracket - ')' - is encountered.
    534 **
    535 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
    536 ** parsed form of the expression and *pnConsumed is set to the number of
    537 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
    538 ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
    539 */
    540 static int fts3ExprParse(
    541   ParseContext *pParse,                   /* fts3 query parse context */
    542   const char *z, int n,                   /* Text of MATCH query */
    543   Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
    544   int *pnConsumed                         /* OUT: Number of bytes consumed */
    545 ){
    546   Fts3Expr *pRet = 0;
    547   Fts3Expr *pPrev = 0;
    548   Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
    549   int nIn = n;
    550   const char *zIn = z;
    551   int rc = SQLITE_OK;
    552   int isRequirePhrase = 1;
    553 
    554   while( rc==SQLITE_OK ){
    555     Fts3Expr *p = 0;
    556     int nByte = 0;
    557     rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
    558     if( rc==SQLITE_OK ){
    559       int isPhrase;
    560 
    561       if( !sqlite3_fts3_enable_parentheses
    562        && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot
    563       ){
    564         /* Create an implicit NOT operator. */
    565         Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
    566         if( !pNot ){
    567           sqlite3Fts3ExprFree(p);
    568           rc = SQLITE_NOMEM;
    569           goto exprparse_out;
    570         }
    571         pNot->eType = FTSQUERY_NOT;
    572         pNot->pRight = p;
    573         if( pNotBranch ){
    574           pNot->pLeft = pNotBranch;
    575         }
    576         pNotBranch = pNot;
    577         p = pPrev;
    578       }else{
    579         int eType = p->eType;
    580         assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
    581         isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
    582 
    583         /* The isRequirePhrase variable is set to true if a phrase or
    584         ** an expression contained in parenthesis is required. If a
    585         ** binary operator (AND, OR, NOT or NEAR) is encounted when
    586         ** isRequirePhrase is set, this is a syntax error.
    587         */
    588         if( !isPhrase && isRequirePhrase ){
    589           sqlite3Fts3ExprFree(p);
    590           rc = SQLITE_ERROR;
    591           goto exprparse_out;
    592         }
    593 
    594         if( isPhrase && !isRequirePhrase ){
    595           /* Insert an implicit AND operator. */
    596           Fts3Expr *pAnd;
    597           assert( pRet && pPrev );
    598           pAnd = fts3MallocZero(sizeof(Fts3Expr));
    599           if( !pAnd ){
    600             sqlite3Fts3ExprFree(p);
    601             rc = SQLITE_NOMEM;
    602             goto exprparse_out;
    603           }
    604           pAnd->eType = FTSQUERY_AND;
    605           insertBinaryOperator(&pRet, pPrev, pAnd);
    606           pPrev = pAnd;
    607         }
    608 
    609         /* This test catches attempts to make either operand of a NEAR
    610         ** operator something other than a phrase. For example, either of
    611         ** the following:
    612         **
    613         **    (bracketed expression) NEAR phrase
    614         **    phrase NEAR (bracketed expression)
    615         **
    616         ** Return an error in either case.
    617         */
    618         if( pPrev && (
    619             (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
    620          || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
    621         )){
    622           sqlite3Fts3ExprFree(p);
    623           rc = SQLITE_ERROR;
    624           goto exprparse_out;
    625         }
    626 
    627         if( isPhrase ){
    628           if( pRet ){
    629             assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
    630             pPrev->pRight = p;
    631             p->pParent = pPrev;
    632           }else{
    633             pRet = p;
    634           }
    635         }else{
    636           insertBinaryOperator(&pRet, pPrev, p);
    637         }
    638         isRequirePhrase = !isPhrase;
    639       }
    640       assert( nByte>0 );
    641     }
    642     assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
    643     nIn -= nByte;
    644     zIn += nByte;
    645     pPrev = p;
    646   }
    647 
    648   if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
    649     rc = SQLITE_ERROR;
    650   }
    651 
    652   if( rc==SQLITE_DONE ){
    653     rc = SQLITE_OK;
    654     if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
    655       if( !pRet ){
    656         rc = SQLITE_ERROR;
    657       }else{
    658         Fts3Expr *pIter = pNotBranch;
    659         while( pIter->pLeft ){
    660           pIter = pIter->pLeft;
    661         }
    662         pIter->pLeft = pRet;
    663         pRet = pNotBranch;
    664       }
    665     }
    666   }
    667   *pnConsumed = n - nIn;
    668 
    669 exprparse_out:
    670   if( rc!=SQLITE_OK ){
    671     sqlite3Fts3ExprFree(pRet);
    672     sqlite3Fts3ExprFree(pNotBranch);
    673     pRet = 0;
    674   }
    675   *ppExpr = pRet;
    676   return rc;
    677 }
    678 
    679 /*
    680 ** Parameters z and n contain a pointer to and length of a buffer containing
    681 ** an fts3 query expression, respectively. This function attempts to parse the
    682 ** query expression and create a tree of Fts3Expr structures representing the
    683 ** parsed expression. If successful, *ppExpr is set to point to the head
    684 ** of the parsed expression tree and SQLITE_OK is returned. If an error
    685 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
    686 ** error) is returned and *ppExpr is set to 0.
    687 **
    688 ** If parameter n is a negative number, then z is assumed to point to a
    689 ** nul-terminated string and the length is determined using strlen().
    690 **
    691 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
    692 ** use to normalize query tokens while parsing the expression. The azCol[]
    693 ** array, which is assumed to contain nCol entries, should contain the names
    694 ** of each column in the target fts3 table, in order from left to right.
    695 ** Column names must be nul-terminated strings.
    696 **
    697 ** The iDefaultCol parameter should be passed the index of the table column
    698 ** that appears on the left-hand-side of the MATCH operator (the default
    699 ** column to match against for tokens for which a column name is not explicitly
    700 ** specified as part of the query string), or -1 if tokens may by default
    701 ** match any table column.
    702 */
    703 int sqlite3Fts3ExprParse(
    704   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
    705   char **azCol,                       /* Array of column names for fts3 table */
    706   int nCol,                           /* Number of entries in azCol[] */
    707   int iDefaultCol,                    /* Default column to query */
    708   const char *z, int n,               /* Text of MATCH query */
    709   Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
    710 ){
    711   int nParsed;
    712   int rc;
    713   ParseContext sParse;
    714   sParse.pTokenizer = pTokenizer;
    715   sParse.azCol = (const char **)azCol;
    716   sParse.nCol = nCol;
    717   sParse.iDefaultCol = iDefaultCol;
    718   sParse.nNest = 0;
    719   if( z==0 ){
    720     *ppExpr = 0;
    721     return SQLITE_OK;
    722   }
    723   if( n<0 ){
    724     n = (int)strlen(z);
    725   }
    726   rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
    727 
    728   /* Check for mismatched parenthesis */
    729   if( rc==SQLITE_OK && sParse.nNest ){
    730     rc = SQLITE_ERROR;
    731     sqlite3Fts3ExprFree(*ppExpr);
    732     *ppExpr = 0;
    733   }
    734 
    735   return rc;
    736 }
    737 
    738 /*
    739 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
    740 */
    741 void sqlite3Fts3ExprFree(Fts3Expr *p){
    742   if( p ){
    743     sqlite3Fts3ExprFree(p->pLeft);
    744     sqlite3Fts3ExprFree(p->pRight);
    745     sqlite3_free(p->aDoclist);
    746     sqlite3_free(p);
    747   }
    748 }
    749 
    750 /****************************************************************************
    751 *****************************************************************************
    752 ** Everything after this point is just test code.
    753 */
    754 
    755 #ifdef SQLITE_TEST
    756 
    757 #include <stdio.h>
    758 
    759 /*
    760 ** Function to query the hash-table of tokenizers (see README.tokenizers).
    761 */
    762 static int queryTestTokenizer(
    763   sqlite3 *db,
    764   const char *zName,
    765   const sqlite3_tokenizer_module **pp
    766 ){
    767   int rc;
    768   sqlite3_stmt *pStmt;
    769   const char zSql[] = "SELECT fts3_tokenizer(?)";
    770 
    771   *pp = 0;
    772   rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
    773   if( rc!=SQLITE_OK ){
    774     return rc;
    775   }
    776 
    777   sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
    778   if( SQLITE_ROW==sqlite3_step(pStmt) ){
    779     if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
    780       memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
    781     }
    782   }
    783 
    784   return sqlite3_finalize(pStmt);
    785 }
    786 
    787 /*
    788 ** Return a pointer to a buffer containing a text representation of the
    789 ** expression passed as the first argument. The buffer is obtained from
    790 ** sqlite3_malloc(). It is the responsibility of the caller to use
    791 ** sqlite3_free() to release the memory. If an OOM condition is encountered,
    792 ** NULL is returned.
    793 **
    794 ** If the second argument is not NULL, then its contents are prepended to
    795 ** the returned expression text and then freed using sqlite3_free().
    796 */
    797 static char *exprToString(Fts3Expr *pExpr, char *zBuf){
    798   switch( pExpr->eType ){
    799     case FTSQUERY_PHRASE: {
    800       Fts3Phrase *pPhrase = pExpr->pPhrase;
    801       int i;
    802       zBuf = sqlite3_mprintf(
    803           "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot);
    804       for(i=0; zBuf && i<pPhrase->nToken; i++){
    805         zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
    806             pPhrase->aToken[i].n, pPhrase->aToken[i].z,
    807             (pPhrase->aToken[i].isPrefix?"+":"")
    808         );
    809       }
    810       return zBuf;
    811     }
    812 
    813     case FTSQUERY_NEAR:
    814       zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
    815       break;
    816     case FTSQUERY_NOT:
    817       zBuf = sqlite3_mprintf("%zNOT ", zBuf);
    818       break;
    819     case FTSQUERY_AND:
    820       zBuf = sqlite3_mprintf("%zAND ", zBuf);
    821       break;
    822     case FTSQUERY_OR:
    823       zBuf = sqlite3_mprintf("%zOR ", zBuf);
    824       break;
    825   }
    826 
    827   if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
    828   if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
    829   if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
    830 
    831   if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
    832   if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
    833 
    834   return zBuf;
    835 }
    836 
    837 /*
    838 ** This is the implementation of a scalar SQL function used to test the
    839 ** expression parser. It should be called as follows:
    840 **
    841 **   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
    842 **
    843 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
    844 ** to parse the query expression (see README.tokenizers). The second argument
    845 ** is the query expression to parse. Each subsequent argument is the name
    846 ** of a column of the fts3 table that the query expression may refer to.
    847 ** For example:
    848 **
    849 **   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
    850 */
    851 static void fts3ExprTest(
    852   sqlite3_context *context,
    853   int argc,
    854   sqlite3_value **argv
    855 ){
    856   sqlite3_tokenizer_module const *pModule = 0;
    857   sqlite3_tokenizer *pTokenizer = 0;
    858   int rc;
    859   char **azCol = 0;
    860   const char *zExpr;
    861   int nExpr;
    862   int nCol;
    863   int ii;
    864   Fts3Expr *pExpr;
    865   char *zBuf = 0;
    866   sqlite3 *db = sqlite3_context_db_handle(context);
    867 
    868   if( argc<3 ){
    869     sqlite3_result_error(context,
    870         "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
    871     );
    872     return;
    873   }
    874 
    875   rc = queryTestTokenizer(db,
    876                           (const char *)sqlite3_value_text(argv[0]), &pModule);
    877   if( rc==SQLITE_NOMEM ){
    878     sqlite3_result_error_nomem(context);
    879     goto exprtest_out;
    880   }else if( !pModule ){
    881     sqlite3_result_error(context, "No such tokenizer module", -1);
    882     goto exprtest_out;
    883   }
    884 
    885   rc = pModule->xCreate(0, 0, &pTokenizer);
    886   assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
    887   if( rc==SQLITE_NOMEM ){
    888     sqlite3_result_error_nomem(context);
    889     goto exprtest_out;
    890   }
    891   pTokenizer->pModule = pModule;
    892 
    893   zExpr = (const char *)sqlite3_value_text(argv[1]);
    894   nExpr = sqlite3_value_bytes(argv[1]);
    895   nCol = argc-2;
    896   azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
    897   if( !azCol ){
    898     sqlite3_result_error_nomem(context);
    899     goto exprtest_out;
    900   }
    901   for(ii=0; ii<nCol; ii++){
    902     azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
    903   }
    904 
    905   rc = sqlite3Fts3ExprParse(
    906       pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
    907   );
    908   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
    909     sqlite3_result_error(context, "Error parsing expression", -1);
    910   }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
    911     sqlite3_result_error_nomem(context);
    912   }else{
    913     sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
    914     sqlite3_free(zBuf);
    915   }
    916 
    917   sqlite3Fts3ExprFree(pExpr);
    918 
    919 exprtest_out:
    920   if( pModule && pTokenizer ){
    921     rc = pModule->xDestroy(pTokenizer);
    922   }
    923   sqlite3_free(azCol);
    924 }
    925 
    926 /*
    927 ** Register the query expression parser test function fts3_exprtest()
    928 ** with database connection db.
    929 */
    930 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
    931   return sqlite3_create_function(
    932       db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
    933   );
    934 }
    935 
    936 #endif
    937 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
    938