1 /* 2 ** 2008 Nov 28 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This module contains code that implements a parser for fts3 query strings 14 ** (the right-hand argument to the MATCH operator). Because the supported 15 ** syntax is relatively simple, the whole tokenizer/parser system is 16 ** hand-coded. 17 */ 18 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) 19 20 /* 21 ** By default, this module parses the legacy syntax that has been 22 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS 23 ** is defined, then it uses the new syntax. The differences between 24 ** the new and the old syntaxes are: 25 ** 26 ** a) The new syntax supports parenthesis. The old does not. 27 ** 28 ** b) The new syntax supports the AND and NOT operators. The old does not. 29 ** 30 ** c) The old syntax supports the "-" token qualifier. This is not 31 ** supported by the new syntax (it is replaced by the NOT operator). 32 ** 33 ** d) When using the old syntax, the OR operator has a greater precedence 34 ** than an implicit AND. When using the new, both implicity and explicit 35 ** AND operators have a higher precedence than OR. 36 ** 37 ** If compiled with SQLITE_TEST defined, then this module exports the 38 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable 39 ** to zero causes the module to use the old syntax. If it is set to 40 ** non-zero the new syntax is activated. This is so both syntaxes can 41 ** be tested using a single build of testfixture. 42 ** 43 ** The following describes the syntax supported by the fts3 MATCH 44 ** operator in a similar format to that used by the lemon parser 45 ** generator. This module does not use actually lemon, it uses a 46 ** custom parser. 47 ** 48 ** query ::= andexpr (OR andexpr)*. 49 ** 50 ** andexpr ::= notexpr (AND? notexpr)*. 51 ** 52 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. 53 ** notexpr ::= LP query RP. 54 ** 55 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. 56 ** 57 ** distance_opt ::= . 58 ** distance_opt ::= / INTEGER. 59 ** 60 ** phrase ::= TOKEN. 61 ** phrase ::= COLUMN:TOKEN. 62 ** phrase ::= "TOKEN TOKEN TOKEN...". 63 */ 64 65 #ifdef SQLITE_TEST 66 int sqlite3_fts3_enable_parentheses = 0; 67 #else 68 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 69 # define sqlite3_fts3_enable_parentheses 1 70 # else 71 # define sqlite3_fts3_enable_parentheses 0 72 # endif 73 #endif 74 75 /* 76 ** Default span for NEAR operators. 77 */ 78 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 79 80 #include "fts3Int.h" 81 #include <string.h> 82 #include <assert.h> 83 84 typedef struct ParseContext ParseContext; 85 struct ParseContext { 86 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ 87 const char **azCol; /* Array of column names for fts3 table */ 88 int nCol; /* Number of entries in azCol[] */ 89 int iDefaultCol; /* Default column to query */ 90 sqlite3_context *pCtx; /* Write error message here */ 91 int nNest; /* Number of nested brackets */ 92 }; 93 94 /* 95 ** This function is equivalent to the standard isspace() function. 96 ** 97 ** The standard isspace() can be awkward to use safely, because although it 98 ** is defined to accept an argument of type int, its behaviour when passed 99 ** an integer that falls outside of the range of the unsigned char type 100 ** is undefined (and sometimes, "undefined" means segfault). This wrapper 101 ** is defined to accept an argument of type char, and always returns 0 for 102 ** any values that fall outside of the range of the unsigned char type (i.e. 103 ** negative values). 104 */ 105 static int fts3isspace(char c){ 106 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; 107 } 108 109 /* 110 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, 111 ** zero the memory before returning a pointer to it. If unsuccessful, 112 ** return NULL. 113 */ 114 static void *fts3MallocZero(int nByte){ 115 void *pRet = sqlite3_malloc(nByte); 116 if( pRet ) memset(pRet, 0, nByte); 117 return pRet; 118 } 119 120 121 /* 122 ** Extract the next token from buffer z (length n) using the tokenizer 123 ** and other information (column names etc.) in pParse. Create an Fts3Expr 124 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this 125 ** single token and set *ppExpr to point to it. If the end of the buffer is 126 ** reached before a token is found, set *ppExpr to zero. It is the 127 ** responsibility of the caller to eventually deallocate the allocated 128 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). 129 ** 130 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation 131 ** fails. 132 */ 133 static int getNextToken( 134 ParseContext *pParse, /* fts3 query parse context */ 135 int iCol, /* Value for Fts3Phrase.iColumn */ 136 const char *z, int n, /* Input string */ 137 Fts3Expr **ppExpr, /* OUT: expression */ 138 int *pnConsumed /* OUT: Number of bytes consumed */ 139 ){ 140 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; 141 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; 142 int rc; 143 sqlite3_tokenizer_cursor *pCursor; 144 Fts3Expr *pRet = 0; 145 int nConsumed = 0; 146 147 rc = pModule->xOpen(pTokenizer, z, n, &pCursor); 148 if( rc==SQLITE_OK ){ 149 const char *zToken; 150 int nToken, iStart, iEnd, iPosition; 151 int nByte; /* total space to allocate */ 152 153 pCursor->pTokenizer = pTokenizer; 154 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); 155 156 if( rc==SQLITE_OK ){ 157 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; 158 pRet = (Fts3Expr *)fts3MallocZero(nByte); 159 if( !pRet ){ 160 rc = SQLITE_NOMEM; 161 }else{ 162 pRet->eType = FTSQUERY_PHRASE; 163 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; 164 pRet->pPhrase->nToken = 1; 165 pRet->pPhrase->iColumn = iCol; 166 pRet->pPhrase->aToken[0].n = nToken; 167 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; 168 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); 169 170 if( iEnd<n && z[iEnd]=='*' ){ 171 pRet->pPhrase->aToken[0].isPrefix = 1; 172 iEnd++; 173 } 174 if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ 175 pRet->pPhrase->isNot = 1; 176 } 177 } 178 nConsumed = iEnd; 179 } 180 181 pModule->xClose(pCursor); 182 } 183 184 *pnConsumed = nConsumed; 185 *ppExpr = pRet; 186 return rc; 187 } 188 189 190 /* 191 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, 192 ** then free the old allocation. 193 */ 194 static void *fts3ReallocOrFree(void *pOrig, int nNew){ 195 void *pRet = sqlite3_realloc(pOrig, nNew); 196 if( !pRet ){ 197 sqlite3_free(pOrig); 198 } 199 return pRet; 200 } 201 202 /* 203 ** Buffer zInput, length nInput, contains the contents of a quoted string 204 ** that appeared as part of an fts3 query expression. Neither quote character 205 ** is included in the buffer. This function attempts to tokenize the entire 206 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE 207 ** containing the results. 208 ** 209 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the 210 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory 211 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set 212 ** to 0. 213 */ 214 static int getNextString( 215 ParseContext *pParse, /* fts3 query parse context */ 216 const char *zInput, int nInput, /* Input string */ 217 Fts3Expr **ppExpr /* OUT: expression */ 218 ){ 219 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; 220 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; 221 int rc; 222 Fts3Expr *p = 0; 223 sqlite3_tokenizer_cursor *pCursor = 0; 224 char *zTemp = 0; 225 int nTemp = 0; 226 227 rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); 228 if( rc==SQLITE_OK ){ 229 int ii; 230 pCursor->pTokenizer = pTokenizer; 231 for(ii=0; rc==SQLITE_OK; ii++){ 232 const char *zToken; 233 int nToken, iBegin, iEnd, iPos; 234 rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); 235 if( rc==SQLITE_OK ){ 236 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); 237 p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); 238 zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); 239 if( !p || !zTemp ){ 240 goto no_mem; 241 } 242 if( ii==0 ){ 243 memset(p, 0, nByte); 244 p->pPhrase = (Fts3Phrase *)&p[1]; 245 } 246 p->pPhrase = (Fts3Phrase *)&p[1]; 247 memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); 248 p->pPhrase->nToken = ii+1; 249 p->pPhrase->aToken[ii].n = nToken; 250 memcpy(&zTemp[nTemp], zToken, nToken); 251 nTemp += nToken; 252 if( iEnd<nInput && zInput[iEnd]=='*' ){ 253 p->pPhrase->aToken[ii].isPrefix = 1; 254 }else{ 255 p->pPhrase->aToken[ii].isPrefix = 0; 256 } 257 } 258 } 259 260 pModule->xClose(pCursor); 261 pCursor = 0; 262 } 263 264 if( rc==SQLITE_DONE ){ 265 int jj; 266 char *zNew = NULL; 267 int nNew = 0; 268 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); 269 nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); 270 p = fts3ReallocOrFree(p, nByte + nTemp); 271 if( !p ){ 272 goto no_mem; 273 } 274 if( zTemp ){ 275 zNew = &(((char *)p)[nByte]); 276 memcpy(zNew, zTemp, nTemp); 277 }else{ 278 memset(p, 0, nByte+nTemp); 279 } 280 p->pPhrase = (Fts3Phrase *)&p[1]; 281 for(jj=0; jj<p->pPhrase->nToken; jj++){ 282 p->pPhrase->aToken[jj].z = &zNew[nNew]; 283 nNew += p->pPhrase->aToken[jj].n; 284 } 285 sqlite3_free(zTemp); 286 p->eType = FTSQUERY_PHRASE; 287 p->pPhrase->iColumn = pParse->iDefaultCol; 288 rc = SQLITE_OK; 289 } 290 291 *ppExpr = p; 292 return rc; 293 no_mem: 294 295 if( pCursor ){ 296 pModule->xClose(pCursor); 297 } 298 sqlite3_free(zTemp); 299 sqlite3_free(p); 300 *ppExpr = 0; 301 return SQLITE_NOMEM; 302 } 303 304 /* 305 ** Function getNextNode(), which is called by fts3ExprParse(), may itself 306 ** call fts3ExprParse(). So this forward declaration is required. 307 */ 308 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); 309 310 /* 311 ** The output variable *ppExpr is populated with an allocated Fts3Expr 312 ** structure, or set to 0 if the end of the input buffer is reached. 313 ** 314 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM 315 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. 316 ** If SQLITE_ERROR is returned, pContext is populated with an error message. 317 */ 318 static int getNextNode( 319 ParseContext *pParse, /* fts3 query parse context */ 320 const char *z, int n, /* Input string */ 321 Fts3Expr **ppExpr, /* OUT: expression */ 322 int *pnConsumed /* OUT: Number of bytes consumed */ 323 ){ 324 static const struct Fts3Keyword { 325 char *z; /* Keyword text */ 326 unsigned char n; /* Length of the keyword */ 327 unsigned char parenOnly; /* Only valid in paren mode */ 328 unsigned char eType; /* Keyword code */ 329 } aKeyword[] = { 330 { "OR" , 2, 0, FTSQUERY_OR }, 331 { "AND", 3, 1, FTSQUERY_AND }, 332 { "NOT", 3, 1, FTSQUERY_NOT }, 333 { "NEAR", 4, 0, FTSQUERY_NEAR } 334 }; 335 int ii; 336 int iCol; 337 int iColLen; 338 int rc; 339 Fts3Expr *pRet = 0; 340 341 const char *zInput = z; 342 int nInput = n; 343 344 /* Skip over any whitespace before checking for a keyword, an open or 345 ** close bracket, or a quoted string. 346 */ 347 while( nInput>0 && fts3isspace(*zInput) ){ 348 nInput--; 349 zInput++; 350 } 351 if( nInput==0 ){ 352 return SQLITE_DONE; 353 } 354 355 /* See if we are dealing with a keyword. */ 356 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ 357 const struct Fts3Keyword *pKey = &aKeyword[ii]; 358 359 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ 360 continue; 361 } 362 363 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ 364 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; 365 int nKey = pKey->n; 366 char cNext; 367 368 /* If this is a "NEAR" keyword, check for an explicit nearness. */ 369 if( pKey->eType==FTSQUERY_NEAR ){ 370 assert( nKey==4 ); 371 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ 372 nNear = 0; 373 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ 374 nNear = nNear * 10 + (zInput[nKey] - '0'); 375 } 376 } 377 } 378 379 /* At this point this is probably a keyword. But for that to be true, 380 ** the next byte must contain either whitespace, an open or close 381 ** parenthesis, a quote character, or EOF. 382 */ 383 cNext = zInput[nKey]; 384 if( fts3isspace(cNext) 385 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 386 ){ 387 pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); 388 if( !pRet ){ 389 return SQLITE_NOMEM; 390 } 391 pRet->eType = pKey->eType; 392 pRet->nNear = nNear; 393 *ppExpr = pRet; 394 *pnConsumed = (int)((zInput - z) + nKey); 395 return SQLITE_OK; 396 } 397 398 /* Turns out that wasn't a keyword after all. This happens if the 399 ** user has supplied a token such as "ORacle". Continue. 400 */ 401 } 402 } 403 404 /* Check for an open bracket. */ 405 if( sqlite3_fts3_enable_parentheses ){ 406 if( *zInput=='(' ){ 407 int nConsumed; 408 pParse->nNest++; 409 rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); 410 if( rc==SQLITE_OK && !*ppExpr ){ 411 rc = SQLITE_DONE; 412 } 413 *pnConsumed = (int)((zInput - z) + 1 + nConsumed); 414 return rc; 415 } 416 417 /* Check for a close bracket. */ 418 if( *zInput==')' ){ 419 pParse->nNest--; 420 *pnConsumed = (int)((zInput - z) + 1); 421 return SQLITE_DONE; 422 } 423 } 424 425 /* See if we are dealing with a quoted phrase. If this is the case, then 426 ** search for the closing quote and pass the whole string to getNextString() 427 ** for processing. This is easy to do, as fts3 has no syntax for escaping 428 ** a quote character embedded in a string. 429 */ 430 if( *zInput=='"' ){ 431 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); 432 *pnConsumed = (int)((zInput - z) + ii + 1); 433 if( ii==nInput ){ 434 return SQLITE_ERROR; 435 } 436 return getNextString(pParse, &zInput[1], ii-1, ppExpr); 437 } 438 439 440 /* If control flows to this point, this must be a regular token, or 441 ** the end of the input. Read a regular token using the sqlite3_tokenizer 442 ** interface. Before doing so, figure out if there is an explicit 443 ** column specifier for the token. 444 ** 445 ** TODO: Strangely, it is not possible to associate a column specifier 446 ** with a quoted phrase, only with a single token. Not sure if this was 447 ** an implementation artifact or an intentional decision when fts3 was 448 ** first implemented. Whichever it was, this module duplicates the 449 ** limitation. 450 */ 451 iCol = pParse->iDefaultCol; 452 iColLen = 0; 453 for(ii=0; ii<pParse->nCol; ii++){ 454 const char *zStr = pParse->azCol[ii]; 455 int nStr = (int)strlen(zStr); 456 if( nInput>nStr && zInput[nStr]==':' 457 && sqlite3_strnicmp(zStr, zInput, nStr)==0 458 ){ 459 iCol = ii; 460 iColLen = (int)((zInput - z) + nStr + 1); 461 break; 462 } 463 } 464 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); 465 *pnConsumed += iColLen; 466 return rc; 467 } 468 469 /* 470 ** The argument is an Fts3Expr structure for a binary operator (any type 471 ** except an FTSQUERY_PHRASE). Return an integer value representing the 472 ** precedence of the operator. Lower values have a higher precedence (i.e. 473 ** group more tightly). For example, in the C language, the == operator 474 ** groups more tightly than ||, and would therefore have a higher precedence. 475 ** 476 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS 477 ** is defined), the order of the operators in precedence from highest to 478 ** lowest is: 479 ** 480 ** NEAR 481 ** NOT 482 ** AND (including implicit ANDs) 483 ** OR 484 ** 485 ** Note that when using the old query syntax, the OR operator has a higher 486 ** precedence than the AND operator. 487 */ 488 static int opPrecedence(Fts3Expr *p){ 489 assert( p->eType!=FTSQUERY_PHRASE ); 490 if( sqlite3_fts3_enable_parentheses ){ 491 return p->eType; 492 }else if( p->eType==FTSQUERY_NEAR ){ 493 return 1; 494 }else if( p->eType==FTSQUERY_OR ){ 495 return 2; 496 } 497 assert( p->eType==FTSQUERY_AND ); 498 return 3; 499 } 500 501 /* 502 ** Argument ppHead contains a pointer to the current head of a query 503 ** expression tree being parsed. pPrev is the expression node most recently 504 ** inserted into the tree. This function adds pNew, which is always a binary 505 ** operator node, into the expression tree based on the relative precedence 506 ** of pNew and the existing nodes of the tree. This may result in the head 507 ** of the tree changing, in which case *ppHead is set to the new root node. 508 */ 509 static void insertBinaryOperator( 510 Fts3Expr **ppHead, /* Pointer to the root node of a tree */ 511 Fts3Expr *pPrev, /* Node most recently inserted into the tree */ 512 Fts3Expr *pNew /* New binary node to insert into expression tree */ 513 ){ 514 Fts3Expr *pSplit = pPrev; 515 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ 516 pSplit = pSplit->pParent; 517 } 518 519 if( pSplit->pParent ){ 520 assert( pSplit->pParent->pRight==pSplit ); 521 pSplit->pParent->pRight = pNew; 522 pNew->pParent = pSplit->pParent; 523 }else{ 524 *ppHead = pNew; 525 } 526 pNew->pLeft = pSplit; 527 pSplit->pParent = pNew; 528 } 529 530 /* 531 ** Parse the fts3 query expression found in buffer z, length n. This function 532 ** returns either when the end of the buffer is reached or an unmatched 533 ** closing bracket - ')' - is encountered. 534 ** 535 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the 536 ** parsed form of the expression and *pnConsumed is set to the number of 537 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM 538 ** (out of memory error) or SQLITE_ERROR (parse error) is returned. 539 */ 540 static int fts3ExprParse( 541 ParseContext *pParse, /* fts3 query parse context */ 542 const char *z, int n, /* Text of MATCH query */ 543 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ 544 int *pnConsumed /* OUT: Number of bytes consumed */ 545 ){ 546 Fts3Expr *pRet = 0; 547 Fts3Expr *pPrev = 0; 548 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ 549 int nIn = n; 550 const char *zIn = z; 551 int rc = SQLITE_OK; 552 int isRequirePhrase = 1; 553 554 while( rc==SQLITE_OK ){ 555 Fts3Expr *p = 0; 556 int nByte = 0; 557 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); 558 if( rc==SQLITE_OK ){ 559 int isPhrase; 560 561 if( !sqlite3_fts3_enable_parentheses 562 && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot 563 ){ 564 /* Create an implicit NOT operator. */ 565 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); 566 if( !pNot ){ 567 sqlite3Fts3ExprFree(p); 568 rc = SQLITE_NOMEM; 569 goto exprparse_out; 570 } 571 pNot->eType = FTSQUERY_NOT; 572 pNot->pRight = p; 573 if( pNotBranch ){ 574 pNot->pLeft = pNotBranch; 575 } 576 pNotBranch = pNot; 577 p = pPrev; 578 }else{ 579 int eType = p->eType; 580 assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); 581 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); 582 583 /* The isRequirePhrase variable is set to true if a phrase or 584 ** an expression contained in parenthesis is required. If a 585 ** binary operator (AND, OR, NOT or NEAR) is encounted when 586 ** isRequirePhrase is set, this is a syntax error. 587 */ 588 if( !isPhrase && isRequirePhrase ){ 589 sqlite3Fts3ExprFree(p); 590 rc = SQLITE_ERROR; 591 goto exprparse_out; 592 } 593 594 if( isPhrase && !isRequirePhrase ){ 595 /* Insert an implicit AND operator. */ 596 Fts3Expr *pAnd; 597 assert( pRet && pPrev ); 598 pAnd = fts3MallocZero(sizeof(Fts3Expr)); 599 if( !pAnd ){ 600 sqlite3Fts3ExprFree(p); 601 rc = SQLITE_NOMEM; 602 goto exprparse_out; 603 } 604 pAnd->eType = FTSQUERY_AND; 605 insertBinaryOperator(&pRet, pPrev, pAnd); 606 pPrev = pAnd; 607 } 608 609 /* This test catches attempts to make either operand of a NEAR 610 ** operator something other than a phrase. For example, either of 611 ** the following: 612 ** 613 ** (bracketed expression) NEAR phrase 614 ** phrase NEAR (bracketed expression) 615 ** 616 ** Return an error in either case. 617 */ 618 if( pPrev && ( 619 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) 620 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) 621 )){ 622 sqlite3Fts3ExprFree(p); 623 rc = SQLITE_ERROR; 624 goto exprparse_out; 625 } 626 627 if( isPhrase ){ 628 if( pRet ){ 629 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); 630 pPrev->pRight = p; 631 p->pParent = pPrev; 632 }else{ 633 pRet = p; 634 } 635 }else{ 636 insertBinaryOperator(&pRet, pPrev, p); 637 } 638 isRequirePhrase = !isPhrase; 639 } 640 assert( nByte>0 ); 641 } 642 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); 643 nIn -= nByte; 644 zIn += nByte; 645 pPrev = p; 646 } 647 648 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ 649 rc = SQLITE_ERROR; 650 } 651 652 if( rc==SQLITE_DONE ){ 653 rc = SQLITE_OK; 654 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ 655 if( !pRet ){ 656 rc = SQLITE_ERROR; 657 }else{ 658 Fts3Expr *pIter = pNotBranch; 659 while( pIter->pLeft ){ 660 pIter = pIter->pLeft; 661 } 662 pIter->pLeft = pRet; 663 pRet = pNotBranch; 664 } 665 } 666 } 667 *pnConsumed = n - nIn; 668 669 exprparse_out: 670 if( rc!=SQLITE_OK ){ 671 sqlite3Fts3ExprFree(pRet); 672 sqlite3Fts3ExprFree(pNotBranch); 673 pRet = 0; 674 } 675 *ppExpr = pRet; 676 return rc; 677 } 678 679 /* 680 ** Parameters z and n contain a pointer to and length of a buffer containing 681 ** an fts3 query expression, respectively. This function attempts to parse the 682 ** query expression and create a tree of Fts3Expr structures representing the 683 ** parsed expression. If successful, *ppExpr is set to point to the head 684 ** of the parsed expression tree and SQLITE_OK is returned. If an error 685 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse 686 ** error) is returned and *ppExpr is set to 0. 687 ** 688 ** If parameter n is a negative number, then z is assumed to point to a 689 ** nul-terminated string and the length is determined using strlen(). 690 ** 691 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to 692 ** use to normalize query tokens while parsing the expression. The azCol[] 693 ** array, which is assumed to contain nCol entries, should contain the names 694 ** of each column in the target fts3 table, in order from left to right. 695 ** Column names must be nul-terminated strings. 696 ** 697 ** The iDefaultCol parameter should be passed the index of the table column 698 ** that appears on the left-hand-side of the MATCH operator (the default 699 ** column to match against for tokens for which a column name is not explicitly 700 ** specified as part of the query string), or -1 if tokens may by default 701 ** match any table column. 702 */ 703 int sqlite3Fts3ExprParse( 704 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ 705 char **azCol, /* Array of column names for fts3 table */ 706 int nCol, /* Number of entries in azCol[] */ 707 int iDefaultCol, /* Default column to query */ 708 const char *z, int n, /* Text of MATCH query */ 709 Fts3Expr **ppExpr /* OUT: Parsed query structure */ 710 ){ 711 int nParsed; 712 int rc; 713 ParseContext sParse; 714 sParse.pTokenizer = pTokenizer; 715 sParse.azCol = (const char **)azCol; 716 sParse.nCol = nCol; 717 sParse.iDefaultCol = iDefaultCol; 718 sParse.nNest = 0; 719 if( z==0 ){ 720 *ppExpr = 0; 721 return SQLITE_OK; 722 } 723 if( n<0 ){ 724 n = (int)strlen(z); 725 } 726 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); 727 728 /* Check for mismatched parenthesis */ 729 if( rc==SQLITE_OK && sParse.nNest ){ 730 rc = SQLITE_ERROR; 731 sqlite3Fts3ExprFree(*ppExpr); 732 *ppExpr = 0; 733 } 734 735 return rc; 736 } 737 738 /* 739 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). 740 */ 741 void sqlite3Fts3ExprFree(Fts3Expr *p){ 742 if( p ){ 743 sqlite3Fts3ExprFree(p->pLeft); 744 sqlite3Fts3ExprFree(p->pRight); 745 sqlite3_free(p->aDoclist); 746 sqlite3_free(p); 747 } 748 } 749 750 /**************************************************************************** 751 ***************************************************************************** 752 ** Everything after this point is just test code. 753 */ 754 755 #ifdef SQLITE_TEST 756 757 #include <stdio.h> 758 759 /* 760 ** Function to query the hash-table of tokenizers (see README.tokenizers). 761 */ 762 static int queryTestTokenizer( 763 sqlite3 *db, 764 const char *zName, 765 const sqlite3_tokenizer_module **pp 766 ){ 767 int rc; 768 sqlite3_stmt *pStmt; 769 const char zSql[] = "SELECT fts3_tokenizer(?)"; 770 771 *pp = 0; 772 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 773 if( rc!=SQLITE_OK ){ 774 return rc; 775 } 776 777 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 778 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 779 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ 780 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); 781 } 782 } 783 784 return sqlite3_finalize(pStmt); 785 } 786 787 /* 788 ** Return a pointer to a buffer containing a text representation of the 789 ** expression passed as the first argument. The buffer is obtained from 790 ** sqlite3_malloc(). It is the responsibility of the caller to use 791 ** sqlite3_free() to release the memory. If an OOM condition is encountered, 792 ** NULL is returned. 793 ** 794 ** If the second argument is not NULL, then its contents are prepended to 795 ** the returned expression text and then freed using sqlite3_free(). 796 */ 797 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ 798 switch( pExpr->eType ){ 799 case FTSQUERY_PHRASE: { 800 Fts3Phrase *pPhrase = pExpr->pPhrase; 801 int i; 802 zBuf = sqlite3_mprintf( 803 "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); 804 for(i=0; zBuf && i<pPhrase->nToken; i++){ 805 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, 806 pPhrase->aToken[i].n, pPhrase->aToken[i].z, 807 (pPhrase->aToken[i].isPrefix?"+":"") 808 ); 809 } 810 return zBuf; 811 } 812 813 case FTSQUERY_NEAR: 814 zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); 815 break; 816 case FTSQUERY_NOT: 817 zBuf = sqlite3_mprintf("%zNOT ", zBuf); 818 break; 819 case FTSQUERY_AND: 820 zBuf = sqlite3_mprintf("%zAND ", zBuf); 821 break; 822 case FTSQUERY_OR: 823 zBuf = sqlite3_mprintf("%zOR ", zBuf); 824 break; 825 } 826 827 if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); 828 if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); 829 if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); 830 831 if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); 832 if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); 833 834 return zBuf; 835 } 836 837 /* 838 ** This is the implementation of a scalar SQL function used to test the 839 ** expression parser. It should be called as follows: 840 ** 841 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); 842 ** 843 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used 844 ** to parse the query expression (see README.tokenizers). The second argument 845 ** is the query expression to parse. Each subsequent argument is the name 846 ** of a column of the fts3 table that the query expression may refer to. 847 ** For example: 848 ** 849 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); 850 */ 851 static void fts3ExprTest( 852 sqlite3_context *context, 853 int argc, 854 sqlite3_value **argv 855 ){ 856 sqlite3_tokenizer_module const *pModule = 0; 857 sqlite3_tokenizer *pTokenizer = 0; 858 int rc; 859 char **azCol = 0; 860 const char *zExpr; 861 int nExpr; 862 int nCol; 863 int ii; 864 Fts3Expr *pExpr; 865 char *zBuf = 0; 866 sqlite3 *db = sqlite3_context_db_handle(context); 867 868 if( argc<3 ){ 869 sqlite3_result_error(context, 870 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 871 ); 872 return; 873 } 874 875 rc = queryTestTokenizer(db, 876 (const char *)sqlite3_value_text(argv[0]), &pModule); 877 if( rc==SQLITE_NOMEM ){ 878 sqlite3_result_error_nomem(context); 879 goto exprtest_out; 880 }else if( !pModule ){ 881 sqlite3_result_error(context, "No such tokenizer module", -1); 882 goto exprtest_out; 883 } 884 885 rc = pModule->xCreate(0, 0, &pTokenizer); 886 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); 887 if( rc==SQLITE_NOMEM ){ 888 sqlite3_result_error_nomem(context); 889 goto exprtest_out; 890 } 891 pTokenizer->pModule = pModule; 892 893 zExpr = (const char *)sqlite3_value_text(argv[1]); 894 nExpr = sqlite3_value_bytes(argv[1]); 895 nCol = argc-2; 896 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); 897 if( !azCol ){ 898 sqlite3_result_error_nomem(context); 899 goto exprtest_out; 900 } 901 for(ii=0; ii<nCol; ii++){ 902 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); 903 } 904 905 rc = sqlite3Fts3ExprParse( 906 pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr 907 ); 908 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ 909 sqlite3_result_error(context, "Error parsing expression", -1); 910 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ 911 sqlite3_result_error_nomem(context); 912 }else{ 913 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); 914 sqlite3_free(zBuf); 915 } 916 917 sqlite3Fts3ExprFree(pExpr); 918 919 exprtest_out: 920 if( pModule && pTokenizer ){ 921 rc = pModule->xDestroy(pTokenizer); 922 } 923 sqlite3_free(azCol); 924 } 925 926 /* 927 ** Register the query expression parser test function fts3_exprtest() 928 ** with database connection db. 929 */ 930 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ 931 return sqlite3_create_function( 932 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 933 ); 934 } 935 936 #endif 937 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ 938