1 diff -ru ext-orig/fts2/fts2.c ext/fts2/fts2.c 2 --- ext-orig/fts2/fts2.c 2009-09-04 13:37:41.000000000 -0700 3 +++ ext/fts2/fts2.c 2009-09-30 14:48:14.000000000 -0700 4 @@ -37,6 +37,20 @@ 5 ** This is an SQLite module implementing full-text search. 6 */ 7 8 +/* TODO(shess): To make it easier to spot changes without groveling 9 +** through changelogs, I've defined GEARS_FTS2_CHANGES to call them 10 +** out, and I will document them here. On imports, these changes 11 +** should be reviewed to make sure they are still present, or are 12 +** dropped as appropriate. 13 +** 14 +** SQLite core adds the custom function fts2_tokenizer() to be used 15 +** for defining new tokenizers. The second parameter is a vtable 16 +** pointer encoded as a blob. Obviously this cannot be exposed to 17 +** Gears callers for security reasons. It could be suppressed in the 18 +** authorizer, but for now I have simply commented the definition out. 19 +*/ 20 +#define GEARS_FTS2_CHANGES 1 21 + 22 /* 23 ** The code in this file is only compiled if: 24 ** 25 @@ -326,8 +326,10 @@ 26 #include "fts2_hash.h" 27 #include "fts2_tokenizer.h" 28 #include "sqlite3.h" 29 -#include "sqlite3ext.h" 30 -SQLITE_EXTENSION_INIT1 31 +#ifndef SQLITE_CORE 32 +# include "sqlite3ext.h" 33 + SQLITE_EXTENSION_INIT1 34 +#endif 35 36 37 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it 38 @@ -335,6 +349,16 @@ 39 # define TRACE(A) 40 #endif 41 42 +#if 0 43 +/* Useful to set breakpoints. See main.c sqlite3Corrupt(). */ 44 +static int fts2Corrupt(void){ 45 + return SQLITE_CORRUPT; 46 +} 47 +# define SQLITE_CORRUPT_BKPT fts2Corrupt() 48 +#else 49 +# define SQLITE_CORRUPT_BKPT SQLITE_CORRUPT 50 +#endif 51 + 52 /* It is not safe to call isspace(), tolower(), or isalnum() on 53 ** hi-bit-set characters. This is the same solution used in the 54 ** tokenizer. 55 @@ -423,30 +447,41 @@ 56 /* Read a 64-bit variable-length integer from memory starting at p[0]. 57 * Return the number of bytes read, or 0 on error. 58 * The value is stored in *v. */ 59 -static int getVarint(const char *p, sqlite_int64 *v){ 60 +static int getVarintSafe(const char *p, sqlite_int64 *v, int max){ 61 const unsigned char *q = (const unsigned char *) p; 62 sqlite_uint64 x = 0, y = 1; 63 - while( (*q & 0x80) == 0x80 ){ 64 + if( max>VARINT_MAX ) max = VARINT_MAX; 65 + while( max && (*q & 0x80) == 0x80 ){ 66 + max--; 67 x += y * (*q++ & 0x7f); 68 y <<= 7; 69 - if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ 70 - assert( 0 ); 71 - return 0; 72 - } 73 + } 74 + if ( !max ){ 75 + assert( 0 ); 76 + return 0; /* tried to read too much; bad data */ 77 } 78 x += y * (*q++); 79 *v = (sqlite_int64) x; 80 return (int) (q - (unsigned char *)p); 81 } 82 83 -static int getVarint32(const char *p, int *pi){ 84 +static int getVarint(const char *p, sqlite_int64 *v){ 85 + return getVarintSafe(p, v, VARINT_MAX); 86 +} 87 + 88 +static int getVarint32Safe(const char *p, int *pi, int max){ 89 sqlite_int64 i; 90 - int ret = getVarint(p, &i); 91 + int ret = getVarintSafe(p, &i, max); 92 + if( !ret ) return ret; 93 *pi = (int) i; 94 assert( *pi==i ); 95 return ret; 96 } 97 98 +static int getVarint32(const char* p, int *pi){ 99 + return getVarint32Safe(p, pi, VARINT_MAX); 100 +} 101 + 102 /*******************************************************************/ 103 /* DataBuffer is used to collect data into a buffer in piecemeal 104 ** fashion. It implements the usual distinction between amount of 105 @@ -615,7 +650,7 @@ 106 107 static int dlrAtEnd(DLReader *pReader){ 108 assert( pReader->nData>=0 ); 109 - return pReader->nData==0; 110 + return pReader->nData<=0; 111 } 112 static sqlite_int64 dlrDocid(DLReader *pReader){ 113 assert( !dlrAtEnd(pReader) ); 114 @@ -639,7 +674,8 @@ 115 */ 116 static const char *dlrPosData(DLReader *pReader){ 117 sqlite_int64 iDummy; 118 - int n = getVarint(pReader->pData, &iDummy); 119 + int n = getVarintSafe(pReader->pData, &iDummy, pReader->nElement); 120 + if( !n ) return NULL; 121 assert( !dlrAtEnd(pReader) ); 122 return pReader->pData+n; 123 } 124 @@ -649,7 +685,7 @@ 125 assert( !dlrAtEnd(pReader) ); 126 return pReader->nElement-n; 127 } 128 -static void dlrStep(DLReader *pReader){ 129 +static int dlrStep(DLReader *pReader){ 130 assert( !dlrAtEnd(pReader) ); 131 132 /* Skip past current doclist element. */ 133 @@ -658,32 +694,48 @@ 134 pReader->nData -= pReader->nElement; 135 136 /* If there is more data, read the next doclist element. */ 137 - if( pReader->nData!=0 ){ 138 + if( pReader->nData>0 ){ 139 sqlite_int64 iDocidDelta; 140 - int iDummy, n = getVarint(pReader->pData, &iDocidDelta); 141 + int nTotal = 0; 142 + int iDummy, n = getVarintSafe(pReader->pData, &iDocidDelta, pReader->nData); 143 + if( !n ) return SQLITE_CORRUPT_BKPT; 144 + nTotal += n; 145 pReader->iDocid += iDocidDelta; 146 if( pReader->iType>=DL_POSITIONS ){ 147 - assert( n<pReader->nData ); 148 while( 1 ){ 149 - n += getVarint32(pReader->pData+n, &iDummy); 150 - assert( n<=pReader->nData ); 151 + n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 152 + pReader->nData-nTotal); 153 + if( !n ) return SQLITE_CORRUPT_BKPT; 154 + nTotal += n; 155 if( iDummy==POS_END ) break; 156 if( iDummy==POS_COLUMN ){ 157 - n += getVarint32(pReader->pData+n, &iDummy); 158 - assert( n<pReader->nData ); 159 + n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 160 + pReader->nData-nTotal); 161 + if( !n ) return SQLITE_CORRUPT_BKPT; 162 + nTotal += n; 163 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ 164 - n += getVarint32(pReader->pData+n, &iDummy); 165 - n += getVarint32(pReader->pData+n, &iDummy); 166 - assert( n<pReader->nData ); 167 + n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 168 + pReader->nData-nTotal); 169 + if( !n ) return SQLITE_CORRUPT_BKPT; 170 + nTotal += n; 171 + n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 172 + pReader->nData-nTotal); 173 + if( !n ) return SQLITE_CORRUPT_BKPT; 174 + nTotal += n; 175 } 176 } 177 } 178 - pReader->nElement = n; 179 + pReader->nElement = nTotal; 180 assert( pReader->nElement<=pReader->nData ); 181 } 182 + return SQLITE_OK; 183 } 184 -static void dlrInit(DLReader *pReader, DocListType iType, 185 - const char *pData, int nData){ 186 +static void dlrDestroy(DLReader *pReader){ 187 + SCRAMBLE(pReader); 188 +} 189 +static int dlrInit(DLReader *pReader, DocListType iType, 190 + const char *pData, int nData){ 191 + int rc; 192 assert( pData!=NULL && nData!=0 ); 193 pReader->iType = iType; 194 pReader->pData = pData; 195 @@ -692,10 +744,9 @@ 196 pReader->iDocid = 0; 197 198 /* Load the first element's data. There must be a first element. */ 199 - dlrStep(pReader); 200 -} 201 -static void dlrDestroy(DLReader *pReader){ 202 - SCRAMBLE(pReader); 203 + rc = dlrStep(pReader); 204 + if( rc!=SQLITE_OK ) dlrDestroy(pReader); 205 + return rc; 206 } 207 208 #ifndef NDEBUG 209 @@ -782,9 +833,9 @@ 210 /* TODO(shess) This has become just a helper for docListMerge. 211 ** Consider a refactor to make this cleaner. 212 */ 213 -static void dlwAppend(DLWriter *pWriter, 214 - const char *pData, int nData, 215 - sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ 216 +static int dlwAppend(DLWriter *pWriter, 217 + const char *pData, int nData, 218 + sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ 219 sqlite_int64 iDocid = 0; 220 char c[VARINT_MAX]; 221 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ 222 @@ -793,7 +844,8 @@ 223 #endif 224 225 /* Recode the initial docid as delta from iPrevDocid. */ 226 - nFirstOld = getVarint(pData, &iDocid); 227 + nFirstOld = getVarintSafe(pData, &iDocid, nData); 228 + if( !nFirstOld ) return SQLITE_CORRUPT_BKPT; 229 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); 230 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); 231 232 @@ -814,10 +866,11 @@ 233 dataBufferAppend(pWriter->b, c, nFirstNew); 234 } 235 pWriter->iPrevDocid = iLastDocid; 236 + return SQLITE_OK; 237 } 238 -static void dlwCopy(DLWriter *pWriter, DLReader *pReader){ 239 - dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), 240 - dlrDocid(pReader), dlrDocid(pReader)); 241 +static int dlwCopy(DLWriter *pWriter, DLReader *pReader){ 242 + return dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), 243 + dlrDocid(pReader), dlrDocid(pReader)); 244 } 245 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ 246 char c[VARINT_MAX]; 247 @@ -878,45 +931,63 @@ 248 assert( !plrAtEnd(pReader) ); 249 return pReader->iEndOffset; 250 } 251 -static void plrStep(PLReader *pReader){ 252 - int i, n; 253 +static int plrStep(PLReader *pReader){ 254 + int i, n, nTotal = 0; 255 256 assert( !plrAtEnd(pReader) ); 257 258 - if( pReader->nData==0 ){ 259 + if( pReader->nData<=0 ){ 260 pReader->pData = NULL; 261 - return; 262 + return SQLITE_OK; 263 } 264 265 - n = getVarint32(pReader->pData, &i); 266 + n = getVarint32Safe(pReader->pData, &i, pReader->nData); 267 + if( !n ) return SQLITE_CORRUPT_BKPT; 268 + nTotal += n; 269 if( i==POS_COLUMN ){ 270 - n += getVarint32(pReader->pData+n, &pReader->iColumn); 271 + n = getVarint32Safe(pReader->pData+nTotal, &pReader->iColumn, 272 + pReader->nData-nTotal); 273 + if( !n ) return SQLITE_CORRUPT_BKPT; 274 + nTotal += n; 275 pReader->iPosition = 0; 276 pReader->iStartOffset = 0; 277 - n += getVarint32(pReader->pData+n, &i); 278 + n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); 279 + if( !n ) return SQLITE_CORRUPT_BKPT; 280 + nTotal += n; 281 } 282 /* Should never see adjacent column changes. */ 283 assert( i!=POS_COLUMN ); 284 285 if( i==POS_END ){ 286 + assert( nTotal<=pReader->nData ); 287 pReader->nData = 0; 288 pReader->pData = NULL; 289 - return; 290 + return SQLITE_OK; 291 } 292 293 pReader->iPosition += i-POS_BASE; 294 if( pReader->iType==DL_POSITIONS_OFFSETS ){ 295 - n += getVarint32(pReader->pData+n, &i); 296 + n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); 297 + if( !n ) return SQLITE_CORRUPT_BKPT; 298 + nTotal += n; 299 pReader->iStartOffset += i; 300 - n += getVarint32(pReader->pData+n, &i); 301 + n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); 302 + if( !n ) return SQLITE_CORRUPT_BKPT; 303 + nTotal += n; 304 pReader->iEndOffset = pReader->iStartOffset+i; 305 } 306 - assert( n<=pReader->nData ); 307 - pReader->pData += n; 308 - pReader->nData -= n; 309 + assert( nTotal<=pReader->nData ); 310 + pReader->pData += nTotal; 311 + pReader->nData -= nTotal; 312 + return SQLITE_OK; 313 } 314 315 -static void plrInit(PLReader *pReader, DLReader *pDLReader){ 316 +static void plrDestroy(PLReader *pReader){ 317 + SCRAMBLE(pReader); 318 +} 319 + 320 +static int plrInit(PLReader *pReader, DLReader *pDLReader){ 321 + int rc; 322 pReader->pData = dlrPosData(pDLReader); 323 pReader->nData = dlrPosDataLen(pDLReader); 324 pReader->iType = pDLReader->iType; 325 @@ -924,10 +995,9 @@ 326 pReader->iPosition = 0; 327 pReader->iStartOffset = 0; 328 pReader->iEndOffset = 0; 329 - plrStep(pReader); 330 -} 331 -static void plrDestroy(PLReader *pReader){ 332 - SCRAMBLE(pReader); 333 + rc = plrStep(pReader); 334 + if( rc!=SQLITE_OK ) plrDestroy(pReader); 335 + return rc; 336 } 337 338 /*******************************************************************/ 339 @@ -1113,14 +1183,16 @@ 340 ** deletion will be trimmed, and will thus not effect a deletion 341 ** during the merge. 342 */ 343 -static void docListTrim(DocListType iType, const char *pData, int nData, 344 - int iColumn, DocListType iOutType, DataBuffer *out){ 345 +static int docListTrim(DocListType iType, const char *pData, int nData, 346 + int iColumn, DocListType iOutType, DataBuffer *out){ 347 DLReader dlReader; 348 DLWriter dlWriter; 349 + int rc; 350 351 assert( iOutType<=iType ); 352 353 - dlrInit(&dlReader, iType, pData, nData); 354 + rc = dlrInit(&dlReader, iType, pData, nData); 355 + if( rc!=SQLITE_OK ) return rc; 356 dlwInit(&dlWriter, iOutType, out); 357 358 while( !dlrAtEnd(&dlReader) ){ 359 @@ -1128,7 +1200,8 @@ 360 PLWriter plWriter; 361 int match = 0; 362 363 - plrInit(&plReader, &dlReader); 364 + rc = plrInit(&plReader, &dlReader); 365 + if( rc!=SQLITE_OK ) break; 366 367 while( !plrAtEnd(&plReader) ){ 368 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ 369 @@ -1139,7 +1212,11 @@ 370 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), 371 plrStartOffset(&plReader), plrEndOffset(&plReader)); 372 } 373 - plrStep(&plReader); 374 + rc = plrStep(&plReader); 375 + if( rc!=SQLITE_OK ){ 376 + plrDestroy(&plReader); 377 + goto err; 378 + } 379 } 380 if( match ){ 381 plwTerminate(&plWriter); 382 @@ -1147,10 +1224,13 @@ 383 } 384 385 plrDestroy(&plReader); 386 - dlrStep(&dlReader); 387 + rc = dlrStep(&dlReader); 388 + if( rc!=SQLITE_OK ) break; 389 } 390 +err: 391 dlwDestroy(&dlWriter); 392 dlrDestroy(&dlReader); 393 + return rc; 394 } 395 396 /* Used by docListMerge() to keep doclists in the ascending order by 397 @@ -1207,19 +1287,20 @@ 398 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably 399 ** be fixed. 400 */ 401 -static void docListMerge(DataBuffer *out, 402 - DLReader *pReaders, int nReaders){ 403 +static int docListMerge(DataBuffer *out, 404 + DLReader *pReaders, int nReaders){ 405 OrderedDLReader readers[MERGE_COUNT]; 406 DLWriter writer; 407 int i, n; 408 const char *pStart = 0; 409 int nStart = 0; 410 sqlite_int64 iFirstDocid = 0, iLastDocid = 0; 411 + int rc = SQLITE_OK; 412 413 assert( nReaders>0 ); 414 if( nReaders==1 ){ 415 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); 416 - return; 417 + return SQLITE_OK; 418 } 419 420 assert( nReaders<=MERGE_COUNT ); 421 @@ -1252,20 +1333,23 @@ 422 nStart += dlrDocDataBytes(readers[0].pReader); 423 }else{ 424 if( pStart!=0 ){ 425 - dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); 426 + rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); 427 + if( rc!=SQLITE_OK ) goto err; 428 } 429 pStart = dlrDocData(readers[0].pReader); 430 nStart = dlrDocDataBytes(readers[0].pReader); 431 iFirstDocid = iDocid; 432 } 433 iLastDocid = iDocid; 434 - dlrStep(readers[0].pReader); 435 + rc = dlrStep(readers[0].pReader); 436 + if( rc!=SQLITE_OK ) goto err; 437 438 /* Drop all of the older elements with the same docid. */ 439 for(i=1; i<nReaders && 440 !dlrAtEnd(readers[i].pReader) && 441 dlrDocid(readers[i].pReader)==iDocid; i++){ 442 - dlrStep(readers[i].pReader); 443 + rc = dlrStep(readers[i].pReader); 444 + if( rc!=SQLITE_OK ) goto err; 445 } 446 447 /* Get the readers back into order. */ 448 @@ -1275,8 +1359,11 @@ 449 } 450 451 /* Copy over any remaining elements. */ 452 - if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); 453 + if( nStart>0 ) 454 + rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); 455 +err: 456 dlwDestroy(&writer); 457 + return rc; 458 } 459 460 /* Helper function for posListUnion(). Compares the current position 461 @@ -1312,30 +1399,40 @@ 462 ** work with any doclist type, though both inputs and the output 463 ** should be the same type. 464 */ 465 -static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ 466 +static int posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ 467 PLReader left, right; 468 PLWriter writer; 469 + int rc; 470 471 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); 472 assert( pLeft->iType==pRight->iType ); 473 assert( pLeft->iType==pOut->iType ); 474 475 - plrInit(&left, pLeft); 476 - plrInit(&right, pRight); 477 + rc = plrInit(&left, pLeft); 478 + if( rc != SQLITE_OK ) return rc; 479 + rc = plrInit(&right, pRight); 480 + if( rc != SQLITE_OK ){ 481 + plrDestroy(&left); 482 + return rc; 483 + } 484 plwInit(&writer, pOut, dlrDocid(pLeft)); 485 486 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ 487 int c = posListCmp(&left, &right); 488 if( c<0 ){ 489 plwCopy(&writer, &left); 490 - plrStep(&left); 491 + rc = plrStep(&left); 492 + if( rc != SQLITE_OK ) break; 493 }else if( c>0 ){ 494 plwCopy(&writer, &right); 495 - plrStep(&right); 496 + rc = plrStep(&right); 497 + if( rc != SQLITE_OK ) break; 498 }else{ 499 plwCopy(&writer, &left); 500 - plrStep(&left); 501 - plrStep(&right); 502 + rc = plrStep(&left); 503 + if( rc != SQLITE_OK ) break; 504 + rc = plrStep(&right); 505 + if( rc != SQLITE_OK ) break; 506 } 507 } 508 509 @@ -1343,56 +1440,75 @@ 510 plwDestroy(&writer); 511 plrDestroy(&left); 512 plrDestroy(&right); 513 + return rc; 514 } 515 516 /* Write the union of doclists in pLeft and pRight to pOut. For 517 ** docids in common between the inputs, the union of the position 518 ** lists is written. Inputs and outputs are always type DL_DEFAULT. 519 */ 520 -static void docListUnion( 521 +static int docListUnion( 522 const char *pLeft, int nLeft, 523 const char *pRight, int nRight, 524 DataBuffer *pOut /* Write the combined doclist here */ 525 ){ 526 DLReader left, right; 527 DLWriter writer; 528 + int rc; 529 530 if( nLeft==0 ){ 531 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); 532 - return; 533 + return SQLITE_OK; 534 } 535 if( nRight==0 ){ 536 dataBufferAppend(pOut, pLeft, nLeft); 537 - return; 538 + return SQLITE_OK; 539 } 540 541 - dlrInit(&left, DL_DEFAULT, pLeft, nLeft); 542 - dlrInit(&right, DL_DEFAULT, pRight, nRight); 543 + rc = dlrInit(&left, DL_DEFAULT, pLeft, nLeft); 544 + if( rc!=SQLITE_OK ) return rc; 545 + rc = dlrInit(&right, DL_DEFAULT, pRight, nRight); 546 + if( rc!=SQLITE_OK ){ 547 + dlrDestroy(&left); 548 + return rc; 549 + } 550 dlwInit(&writer, DL_DEFAULT, pOut); 551 552 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ 553 if( dlrAtEnd(&right) ){ 554 - dlwCopy(&writer, &left); 555 - dlrStep(&left); 556 + rc = dlwCopy(&writer, &left); 557 + if( rc!=SQLITE_OK ) break; 558 + rc = dlrStep(&left); 559 + if( rc!=SQLITE_OK ) break; 560 }else if( dlrAtEnd(&left) ){ 561 - dlwCopy(&writer, &right); 562 - dlrStep(&right); 563 + rc = dlwCopy(&writer, &right); 564 + if( rc!=SQLITE_OK ) break; 565 + rc = dlrStep(&right); 566 + if( rc!=SQLITE_OK ) break; 567 }else if( dlrDocid(&left)<dlrDocid(&right) ){ 568 - dlwCopy(&writer, &left); 569 - dlrStep(&left); 570 + rc = dlwCopy(&writer, &left); 571 + if( rc!=SQLITE_OK ) break; 572 + rc = dlrStep(&left); 573 + if( rc!=SQLITE_OK ) break; 574 }else if( dlrDocid(&left)>dlrDocid(&right) ){ 575 - dlwCopy(&writer, &right); 576 - dlrStep(&right); 577 + rc = dlwCopy(&writer, &right); 578 + if( rc!=SQLITE_OK ) break; 579 + rc = dlrStep(&right); 580 + if( rc!=SQLITE_OK ) break; 581 }else{ 582 - posListUnion(&left, &right, &writer); 583 - dlrStep(&left); 584 - dlrStep(&right); 585 + rc = posListUnion(&left, &right, &writer); 586 + if( rc!=SQLITE_OK ) break; 587 + rc = dlrStep(&left); 588 + if( rc!=SQLITE_OK ) break; 589 + rc = dlrStep(&right); 590 + if( rc!=SQLITE_OK ) break; 591 } 592 } 593 594 dlrDestroy(&left); 595 dlrDestroy(&right); 596 dlwDestroy(&writer); 597 + return rc; 598 } 599 600 /* pLeft and pRight are DLReaders positioned to the same docid. 601 @@ -1407,35 +1523,47 @@ 602 ** include the positions from pRight that are one more than a 603 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. 604 */ 605 -static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight, 606 - DLWriter *pOut){ 607 +static int posListPhraseMerge(DLReader *pLeft, DLReader *pRight, 608 + DLWriter *pOut){ 609 PLReader left, right; 610 PLWriter writer; 611 int match = 0; 612 + int rc; 613 614 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); 615 assert( pOut->iType!=DL_POSITIONS_OFFSETS ); 616 617 - plrInit(&left, pLeft); 618 - plrInit(&right, pRight); 619 + rc = plrInit(&left, pLeft); 620 + if( rc!=SQLITE_OK ) return rc; 621 + rc = plrInit(&right, pRight); 622 + if( rc!=SQLITE_OK ){ 623 + plrDestroy(&left); 624 + return rc; 625 + } 626 627 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ 628 if( plrColumn(&left)<plrColumn(&right) ){ 629 - plrStep(&left); 630 + rc = plrStep(&left); 631 + if( rc!=SQLITE_OK ) break; 632 }else if( plrColumn(&left)>plrColumn(&right) ){ 633 - plrStep(&right); 634 + rc = plrStep(&right); 635 + if( rc!=SQLITE_OK ) break; 636 }else if( plrPosition(&left)+1<plrPosition(&right) ){ 637 - plrStep(&left); 638 + rc = plrStep(&left); 639 + if( rc!=SQLITE_OK ) break; 640 }else if( plrPosition(&left)+1>plrPosition(&right) ){ 641 - plrStep(&right); 642 + rc = plrStep(&right); 643 + if( rc!=SQLITE_OK ) break; 644 }else{ 645 if( !match ){ 646 plwInit(&writer, pOut, dlrDocid(pLeft)); 647 match = 1; 648 } 649 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); 650 - plrStep(&left); 651 - plrStep(&right); 652 + rc = plrStep(&left); 653 + if( rc!=SQLITE_OK ) break; 654 + rc = plrStep(&right); 655 + if( rc!=SQLITE_OK ) break; 656 } 657 } 658 659 @@ -1446,6 +1574,7 @@ 660 661 plrDestroy(&left); 662 plrDestroy(&right); 663 + return rc; 664 } 665 666 /* We have two doclists with positions: pLeft and pRight. 667 @@ -1457,7 +1586,7 @@ 668 ** iType controls the type of data written to pOut. If iType is 669 ** DL_POSITIONS, the positions are those from pRight. 670 */ 671 -static void docListPhraseMerge( 672 +static int docListPhraseMerge( 673 const char *pLeft, int nLeft, 674 const char *pRight, int nRight, 675 DocListType iType, 676 @@ -1465,152 +1594,198 @@ 677 ){ 678 DLReader left, right; 679 DLWriter writer; 680 + int rc; 681 682 - if( nLeft==0 || nRight==0 ) return; 683 + if( nLeft==0 || nRight==0 ) return SQLITE_OK; 684 685 assert( iType!=DL_POSITIONS_OFFSETS ); 686 687 - dlrInit(&left, DL_POSITIONS, pLeft, nLeft); 688 - dlrInit(&right, DL_POSITIONS, pRight, nRight); 689 + rc = dlrInit(&left, DL_POSITIONS, pLeft, nLeft); 690 + if( rc!=SQLITE_OK ) return rc; 691 + rc = dlrInit(&right, DL_POSITIONS, pRight, nRight); 692 + if( rc!=SQLITE_OK ){ 693 + dlrDestroy(&left); 694 + return rc; 695 + } 696 dlwInit(&writer, iType, pOut); 697 698 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ 699 if( dlrDocid(&left)<dlrDocid(&right) ){ 700 - dlrStep(&left); 701 + rc = dlrStep(&left); 702 + if( rc!=SQLITE_OK ) break; 703 }else if( dlrDocid(&right)<dlrDocid(&left) ){ 704 - dlrStep(&right); 705 + rc = dlrStep(&right); 706 + if( rc!=SQLITE_OK ) break; 707 }else{ 708 - posListPhraseMerge(&left, &right, &writer); 709 - dlrStep(&left); 710 - dlrStep(&right); 711 + rc = posListPhraseMerge(&left, &right, &writer); 712 + if( rc!=SQLITE_OK ) break; 713 + rc = dlrStep(&left); 714 + if( rc!=SQLITE_OK ) break; 715 + rc = dlrStep(&right); 716 + if( rc!=SQLITE_OK ) break; 717 } 718 } 719 720 dlrDestroy(&left); 721 dlrDestroy(&right); 722 dlwDestroy(&writer); 723 + return rc; 724 } 725 726 /* We have two DL_DOCIDS doclists: pLeft and pRight. 727 ** Write the intersection of these two doclists into pOut as a 728 ** DL_DOCIDS doclist. 729 */ 730 -static void docListAndMerge( 731 +static int docListAndMerge( 732 const char *pLeft, int nLeft, 733 const char *pRight, int nRight, 734 DataBuffer *pOut /* Write the combined doclist here */ 735 ){ 736 DLReader left, right; 737 DLWriter writer; 738 + int rc; 739 740 - if( nLeft==0 || nRight==0 ) return; 741 + if( nLeft==0 || nRight==0 ) return SQLITE_OK; 742 743 - dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 744 - dlrInit(&right, DL_DOCIDS, pRight, nRight); 745 + rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 746 + if( rc!=SQLITE_OK ) return rc; 747 + rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); 748 + if( rc!=SQLITE_OK ){ 749 + dlrDestroy(&left); 750 + return rc; 751 + } 752 dlwInit(&writer, DL_DOCIDS, pOut); 753 754 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ 755 if( dlrDocid(&left)<dlrDocid(&right) ){ 756 - dlrStep(&left); 757 + rc = dlrStep(&left); 758 + if( rc!=SQLITE_OK ) break; 759 }else if( dlrDocid(&right)<dlrDocid(&left) ){ 760 - dlrStep(&right); 761 + rc = dlrStep(&right); 762 + if( rc!=SQLITE_OK ) break; 763 }else{ 764 dlwAdd(&writer, dlrDocid(&left)); 765 - dlrStep(&left); 766 - dlrStep(&right); 767 + rc = dlrStep(&left); 768 + if( rc!=SQLITE_OK ) break; 769 + rc = dlrStep(&right); 770 + if( rc!=SQLITE_OK ) break; 771 } 772 } 773 774 dlrDestroy(&left); 775 dlrDestroy(&right); 776 dlwDestroy(&writer); 777 + return rc; 778 } 779 780 /* We have two DL_DOCIDS doclists: pLeft and pRight. 781 ** Write the union of these two doclists into pOut as a 782 ** DL_DOCIDS doclist. 783 */ 784 -static void docListOrMerge( 785 +static int docListOrMerge( 786 const char *pLeft, int nLeft, 787 const char *pRight, int nRight, 788 DataBuffer *pOut /* Write the combined doclist here */ 789 ){ 790 DLReader left, right; 791 DLWriter writer; 792 + int rc; 793 794 if( nLeft==0 ){ 795 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); 796 - return; 797 + return SQLITE_OK; 798 } 799 if( nRight==0 ){ 800 dataBufferAppend(pOut, pLeft, nLeft); 801 - return; 802 + return SQLITE_OK; 803 } 804 805 - dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 806 - dlrInit(&right, DL_DOCIDS, pRight, nRight); 807 + rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 808 + if( rc!=SQLITE_OK ) return rc; 809 + rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); 810 + if( rc!=SQLITE_OK ){ 811 + dlrDestroy(&left); 812 + return rc; 813 + } 814 dlwInit(&writer, DL_DOCIDS, pOut); 815 816 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ 817 if( dlrAtEnd(&right) ){ 818 dlwAdd(&writer, dlrDocid(&left)); 819 - dlrStep(&left); 820 + rc = dlrStep(&left); 821 + if( rc!=SQLITE_OK ) break; 822 }else if( dlrAtEnd(&left) ){ 823 dlwAdd(&writer, dlrDocid(&right)); 824 - dlrStep(&right); 825 + rc = dlrStep(&right); 826 + if( rc!=SQLITE_OK ) break; 827 }else if( dlrDocid(&left)<dlrDocid(&right) ){ 828 dlwAdd(&writer, dlrDocid(&left)); 829 - dlrStep(&left); 830 + rc = dlrStep(&left); 831 + if( rc!=SQLITE_OK ) break; 832 }else if( dlrDocid(&right)<dlrDocid(&left) ){ 833 dlwAdd(&writer, dlrDocid(&right)); 834 - dlrStep(&right); 835 + rc = dlrStep(&right); 836 + if( rc!=SQLITE_OK ) break; 837 }else{ 838 dlwAdd(&writer, dlrDocid(&left)); 839 - dlrStep(&left); 840 - dlrStep(&right); 841 + rc = dlrStep(&left); 842 + if( rc!=SQLITE_OK ) break; 843 + rc = dlrStep(&right); 844 + if( rc!=SQLITE_OK ) break; 845 } 846 } 847 848 dlrDestroy(&left); 849 dlrDestroy(&right); 850 dlwDestroy(&writer); 851 + return rc; 852 } 853 854 /* We have two DL_DOCIDS doclists: pLeft and pRight. 855 ** Write into pOut as DL_DOCIDS doclist containing all documents that 856 ** occur in pLeft but not in pRight. 857 */ 858 -static void docListExceptMerge( 859 +static int docListExceptMerge( 860 const char *pLeft, int nLeft, 861 const char *pRight, int nRight, 862 DataBuffer *pOut /* Write the combined doclist here */ 863 ){ 864 DLReader left, right; 865 DLWriter writer; 866 + int rc; 867 868 - if( nLeft==0 ) return; 869 + if( nLeft==0 ) return SQLITE_OK; 870 if( nRight==0 ){ 871 dataBufferAppend(pOut, pLeft, nLeft); 872 - return; 873 + return SQLITE_OK; 874 } 875 876 - dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 877 - dlrInit(&right, DL_DOCIDS, pRight, nRight); 878 + rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 879 + if( rc!=SQLITE_OK ) return rc; 880 + rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); 881 + if( rc!=SQLITE_OK ){ 882 + dlrDestroy(&left); 883 + return rc; 884 + } 885 dlwInit(&writer, DL_DOCIDS, pOut); 886 887 while( !dlrAtEnd(&left) ){ 888 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ 889 - dlrStep(&right); 890 + rc = dlrStep(&right); 891 + if( rc!=SQLITE_OK ) goto err; 892 } 893 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ 894 dlwAdd(&writer, dlrDocid(&left)); 895 } 896 - dlrStep(&left); 897 + rc = dlrStep(&left); 898 + if( rc!=SQLITE_OK ) break; 899 } 900 901 +err: 902 dlrDestroy(&left); 903 dlrDestroy(&right); 904 dlwDestroy(&writer); 905 + return rc; 906 } 907 908 static char *string_dup_n(const char *s, int n){ 909 @@ -1814,7 +1989,7 @@ 910 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", 911 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", 912 /* SEGDIR_SELECT_LEVEL */ 913 - "select start_block, leaves_end_block, root from %_segdir " 914 + "select start_block, leaves_end_block, root, idx from %_segdir " 915 " where level = ? order by idx", 916 /* SEGDIR_SPAN */ 917 "select min(start_block), max(end_block) from %_segdir " 918 @@ -3413,7 +3588,8 @@ 919 return SQLITE_OK; 920 } 921 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); 922 - dlrStep(&c->reader); 923 + if( rc!=SQLITE_OK ) return rc; 924 + rc = dlrStep(&c->reader); 925 if( rc!=SQLITE_OK ) return rc; 926 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ 927 rc = sqlite3_step(c->pStmt); 928 @@ -3421,8 +3597,11 @@ 929 c->eof = 0; 930 return SQLITE_OK; 931 } 932 - /* an error occurred; abort */ 933 - return rc==SQLITE_DONE ? SQLITE_ERROR : rc; 934 + 935 + /* Corrupt if the index refers to missing document. */ 936 + if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; 937 + 938 + return rc; 939 } 940 } 941 942 @@ -3470,14 +3649,18 @@ 943 return rc; 944 } 945 dataBufferInit(&new, 0); 946 - docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, 947 - i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); 948 + rc = docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, 949 + i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); 950 dataBufferDestroy(&left); 951 dataBufferDestroy(&right); 952 + if( rc!=SQLITE_OK ){ 953 + dataBufferDestroy(&new); 954 + return rc; 955 + } 956 left = new; 957 } 958 *pResult = left; 959 - return SQLITE_OK; 960 + return rc; 961 } 962 963 /* Add a new term pTerm[0..nTerm-1] to the query *q. 964 @@ -3544,6 +3727,7 @@ 965 int firstIndex = pQuery->nTerms; 966 int iCol; 967 int nTerm = 1; 968 + int iEndLast = -1; 969 970 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); 971 if( rc!=SQLITE_OK ) return rc; 972 @@ -3568,6 +3752,20 @@ 973 pQuery->nextIsOr = 1; 974 continue; 975 } 976 + 977 + /* 978 + * The ICU tokenizer considers '*' a break character, so the code below 979 + * sets isPrefix correctly, but since that code doesn't eat the '*', the 980 + * ICU tokenizer returns it as the next token. So eat it here until a 981 + * better solution presents itself. 982 + */ 983 + if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' && 984 + iEndLast==iBegin){ 985 + pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; 986 + continue; 987 + } 988 + iEndLast = iEnd; 989 + 990 queryAdd(pQuery, pToken, nToken); 991 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ 992 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; 993 @@ -3707,18 +3905,30 @@ 994 return rc; 995 } 996 dataBufferInit(&new, 0); 997 - docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); 998 + rc = docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); 999 dataBufferDestroy(&right); 1000 dataBufferDestroy(&or); 1001 + if( rc!=SQLITE_OK ){ 1002 + if( i!=nNot ) dataBufferDestroy(&left); 1003 + queryClear(pQuery); 1004 + dataBufferDestroy(&new); 1005 + return rc; 1006 + } 1007 right = new; 1008 } 1009 if( i==nNot ){ /* first term processed. */ 1010 left = right; 1011 }else{ 1012 dataBufferInit(&new, 0); 1013 - docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new); 1014 + rc = docListAndMerge(left.pData, left.nData, 1015 + right.pData, right.nData, &new); 1016 dataBufferDestroy(&right); 1017 dataBufferDestroy(&left); 1018 + if( rc!=SQLITE_OK ){ 1019 + queryClear(pQuery); 1020 + dataBufferDestroy(&new); 1021 + return rc; 1022 + } 1023 left = new; 1024 } 1025 } 1026 @@ -3738,9 +3948,15 @@ 1027 return rc; 1028 } 1029 dataBufferInit(&new, 0); 1030 - docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new); 1031 + rc = docListExceptMerge(left.pData, left.nData, 1032 + right.pData, right.nData, &new); 1033 dataBufferDestroy(&right); 1034 dataBufferDestroy(&left); 1035 + if( rc!=SQLITE_OK ){ 1036 + queryClear(pQuery); 1037 + dataBufferDestroy(&new); 1038 + return rc; 1039 + } 1040 left = new; 1041 } 1042 1043 @@ -3834,7 +4050,8 @@ 1044 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q); 1045 if( rc!=SQLITE_OK ) return rc; 1046 if( c->result.nData!=0 ){ 1047 - dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); 1048 + rc = dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); 1049 + if( rc!=SQLITE_OK ) return rc; 1050 } 1051 break; 1052 } 1053 @@ -4335,22 +4552,19 @@ 1054 SCRAMBLE(pReader); 1055 } 1056 1057 -/* TODO(shess) The assertions are great, but what if we're in NDEBUG 1058 -** and the blob is empty or otherwise contains suspect data? 1059 -*/ 1060 -static void interiorReaderInit(const char *pData, int nData, 1061 - InteriorReader *pReader){ 1062 +static int interiorReaderInit(const char *pData, int nData, 1063 + InteriorReader *pReader){ 1064 int n, nTerm; 1065 1066 - /* Require at least the leading flag byte */ 1067 + /* These conditions are checked and met by the callers. */ 1068 assert( nData>0 ); 1069 assert( pData[0]!='\0' ); 1070 1071 CLEAR(pReader); 1072 1073 /* Decode the base blockid, and set the cursor to the first term. */ 1074 - n = getVarint(pData+1, &pReader->iBlockid); 1075 - assert( 1+n<=nData ); 1076 + n = getVarintSafe(pData+1, &pReader->iBlockid, nData-1); 1077 + if( !n ) return SQLITE_CORRUPT_BKPT; 1078 pReader->pData = pData+1+n; 1079 pReader->nData = nData-(1+n); 1080 1081 @@ -4361,17 +4575,18 @@ 1082 if( pReader->nData==0 ){ 1083 dataBufferInit(&pReader->term, 0); 1084 }else{ 1085 - n = getVarint32(pReader->pData, &nTerm); 1086 + n = getVarint32Safe(pReader->pData, &nTerm, pReader->nData); 1087 + if( !n || nTerm<0 || nTerm>pReader->nData-n) return SQLITE_CORRUPT_BKPT; 1088 dataBufferInit(&pReader->term, nTerm); 1089 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); 1090 - assert( n+nTerm<=pReader->nData ); 1091 pReader->pData += n+nTerm; 1092 pReader->nData -= n+nTerm; 1093 } 1094 + return SQLITE_OK; 1095 } 1096 1097 static int interiorReaderAtEnd(InteriorReader *pReader){ 1098 - return pReader->term.nData==0; 1099 + return pReader->term.nData<=0; 1100 } 1101 1102 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ 1103 @@ -4388,7 +4603,7 @@ 1104 } 1105 1106 /* Step forward to the next term in the node. */ 1107 -static void interiorReaderStep(InteriorReader *pReader){ 1108 +static int interiorReaderStep(InteriorReader *pReader){ 1109 assert( !interiorReaderAtEnd(pReader) ); 1110 1111 /* If the last term has been read, signal eof, else construct the 1112 @@ -4399,18 +4614,26 @@ 1113 }else{ 1114 int n, nPrefix, nSuffix; 1115 1116 - n = getVarint32(pReader->pData, &nPrefix); 1117 - n += getVarint32(pReader->pData+n, &nSuffix); 1118 + n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); 1119 + if( !n ) return SQLITE_CORRUPT_BKPT; 1120 + pReader->nData -= n; 1121 + pReader->pData += n; 1122 + n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData); 1123 + if( !n ) return SQLITE_CORRUPT_BKPT; 1124 + pReader->nData -= n; 1125 + pReader->pData += n; 1126 + if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; 1127 + if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; 1128 1129 /* Truncate the current term and append suffix data. */ 1130 pReader->term.nData = nPrefix; 1131 - dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); 1132 + dataBufferAppend(&pReader->term, pReader->pData, nSuffix); 1133 1134 - assert( n+nSuffix<=pReader->nData ); 1135 - pReader->pData += n+nSuffix; 1136 - pReader->nData -= n+nSuffix; 1137 + pReader->pData += nSuffix; 1138 + pReader->nData -= nSuffix; 1139 } 1140 pReader->iBlockid++; 1141 + return SQLITE_OK; 1142 } 1143 1144 /* Compare the current term to pTerm[nTerm], returning strcmp-style 1145 @@ -4782,7 +5005,8 @@ 1146 n = putVarint(c, nData); 1147 dataBufferAppend(&pWriter->data, c, n); 1148 1149 - docListMerge(&pWriter->data, pReaders, nReaders); 1150 + rc = docListMerge(&pWriter->data, pReaders, nReaders); 1151 + if( rc!= SQLITE_OK ) return rc; 1152 ASSERT_VALID_DOCLIST(DL_DEFAULT, 1153 pWriter->data.pData+iDoclistData+n, 1154 pWriter->data.nData-iDoclistData-n, NULL); 1155 @@ -4892,7 +5116,8 @@ 1156 int rc; 1157 DLReader reader; 1158 1159 - dlrInit(&reader, DL_DEFAULT, pData, nData); 1160 + rc = dlrInit(&reader, DL_DEFAULT, pData, nData); 1161 + if( rc!=SQLITE_OK ) return rc; 1162 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); 1163 dlrDestroy(&reader); 1164 1165 @@ -4937,38 +5162,40 @@ 1166 static const char *leafReaderData(LeafReader *pReader){ 1167 int n, nData; 1168 assert( pReader->term.nData>0 ); 1169 - n = getVarint32(pReader->pData, &nData); 1170 + n = getVarint32Safe(pReader->pData, &nData, pReader->nData); 1171 + if( !n || nData>pReader->nData-n ) return NULL; 1172 return pReader->pData+n; 1173 } 1174 1175 -static void leafReaderInit(const char *pData, int nData, 1176 - LeafReader *pReader){ 1177 +static int leafReaderInit(const char *pData, int nData, LeafReader *pReader){ 1178 int nTerm, n; 1179 1180 + /* All callers check this precondition. */ 1181 assert( nData>0 ); 1182 assert( pData[0]=='\0' ); 1183 1184 CLEAR(pReader); 1185 1186 /* Read the first term, skipping the header byte. */ 1187 - n = getVarint32(pData+1, &nTerm); 1188 + n = getVarint32Safe(pData+1, &nTerm, nData-1); 1189 + if( !n || nTerm<0 || nTerm>nData-1-n ) return SQLITE_CORRUPT_BKPT; 1190 dataBufferInit(&pReader->term, nTerm); 1191 dataBufferReplace(&pReader->term, pData+1+n, nTerm); 1192 1193 /* Position after the first term. */ 1194 - assert( 1+n+nTerm<nData ); 1195 pReader->pData = pData+1+n+nTerm; 1196 pReader->nData = nData-1-n-nTerm; 1197 + return SQLITE_OK; 1198 } 1199 1200 /* Step the reader forward to the next term. */ 1201 -static void leafReaderStep(LeafReader *pReader){ 1202 +static int leafReaderStep(LeafReader *pReader){ 1203 int n, nData, nPrefix, nSuffix; 1204 assert( !leafReaderAtEnd(pReader) ); 1205 1206 /* Skip previous entry's data block. */ 1207 - n = getVarint32(pReader->pData, &nData); 1208 - assert( n+nData<=pReader->nData ); 1209 + n = getVarint32Safe(pReader->pData, &nData, pReader->nData); 1210 + if( !n || nData<0 || nData>pReader->nData-n ) return SQLITE_CORRUPT_BKPT; 1211 pReader->pData += n+nData; 1212 pReader->nData -= n+nData; 1213 1214 @@ -4976,15 +5203,23 @@ 1215 /* Construct the new term using a prefix from the old term plus a 1216 ** suffix from the leaf data. 1217 */ 1218 - n = getVarint32(pReader->pData, &nPrefix); 1219 - n += getVarint32(pReader->pData+n, &nSuffix); 1220 - assert( n+nSuffix<pReader->nData ); 1221 + n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); 1222 + if( !n ) return SQLITE_CORRUPT_BKPT; 1223 + pReader->nData -= n; 1224 + pReader->pData += n; 1225 + n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData); 1226 + if( !n ) return SQLITE_CORRUPT_BKPT; 1227 + pReader->nData -= n; 1228 + pReader->pData += n; 1229 + if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; 1230 + if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; 1231 pReader->term.nData = nPrefix; 1232 - dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); 1233 + dataBufferAppend(&pReader->term, pReader->pData, nSuffix); 1234 1235 - pReader->pData += n+nSuffix; 1236 - pReader->nData -= n+nSuffix; 1237 + pReader->pData += nSuffix; 1238 + pReader->nData -= nSuffix; 1239 } 1240 + return SQLITE_OK; 1241 } 1242 1243 /* strcmp-style comparison of pReader's current term against pTerm. 1244 @@ -5077,6 +5312,9 @@ 1245 ** the leaf data was entirely contained in the root), or from the 1246 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. 1247 */ 1248 +/* TODO(shess): Figure out a means of indicating how many leaves are 1249 +** expected, for purposes of detecting corruption. 1250 +*/ 1251 static int leavesReaderInit(fulltext_vtab *v, 1252 int idx, 1253 sqlite_int64 iStartBlockid, 1254 @@ -5088,32 +5326,67 @@ 1255 1256 dataBufferInit(&pReader->rootData, 0); 1257 if( iStartBlockid==0 ){ 1258 + int rc; 1259 + /* Corrupt if this can't be a leaf node. */ 1260 + if( pRootData==NULL || nRootData<1 || pRootData[0]!='\0' ){ 1261 + return SQLITE_CORRUPT_BKPT; 1262 + } 1263 /* Entire leaf level fit in root data. */ 1264 dataBufferReplace(&pReader->rootData, pRootData, nRootData); 1265 - leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, 1266 - &pReader->leafReader); 1267 + rc = leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, 1268 + &pReader->leafReader); 1269 + if( rc!=SQLITE_OK ){ 1270 + dataBufferDestroy(&pReader->rootData); 1271 + return rc; 1272 + } 1273 }else{ 1274 sqlite3_stmt *s; 1275 int rc = sql_get_leaf_statement(v, idx, &s); 1276 if( rc!=SQLITE_OK ) return rc; 1277 1278 rc = sqlite3_bind_int64(s, 1, iStartBlockid); 1279 - if( rc!=SQLITE_OK ) return rc; 1280 + if( rc!=SQLITE_OK ) goto err; 1281 1282 rc = sqlite3_bind_int64(s, 2, iEndBlockid); 1283 - if( rc!=SQLITE_OK ) return rc; 1284 + if( rc!=SQLITE_OK ) goto err; 1285 1286 rc = sqlite3_step(s); 1287 + 1288 + /* Corrupt if interior node referenced missing leaf node. */ 1289 if( rc==SQLITE_DONE ){ 1290 - pReader->eof = 1; 1291 - return SQLITE_OK; 1292 + rc = SQLITE_CORRUPT_BKPT; 1293 + goto err; 1294 + } 1295 + 1296 + if( rc!=SQLITE_ROW ) goto err; 1297 + rc = SQLITE_OK; 1298 + 1299 + /* Corrupt if leaf data isn't a blob. */ 1300 + if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ 1301 + rc = SQLITE_CORRUPT_BKPT; 1302 + }else{ 1303 + const char *pLeafData = sqlite3_column_blob(s, 0); 1304 + int nLeafData = sqlite3_column_bytes(s, 0); 1305 + 1306 + /* Corrupt if this can't be a leaf node. */ 1307 + if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ 1308 + rc = SQLITE_CORRUPT_BKPT; 1309 + }else{ 1310 + rc = leafReaderInit(pLeafData, nLeafData, &pReader->leafReader); 1311 + } 1312 + } 1313 + 1314 + err: 1315 + if( rc!=SQLITE_OK ){ 1316 + if( idx==-1 ){ 1317 + sqlite3_finalize(s); 1318 + }else{ 1319 + sqlite3_reset(s); 1320 + } 1321 + return rc; 1322 } 1323 - if( rc!=SQLITE_ROW ) return rc; 1324 1325 pReader->pStmt = s; 1326 - leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), 1327 - sqlite3_column_bytes(pReader->pStmt, 0), 1328 - &pReader->leafReader); 1329 } 1330 return SQLITE_OK; 1331 } 1332 @@ -5122,11 +5395,12 @@ 1333 ** end of the current leaf, step forward to the next leaf block. 1334 */ 1335 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ 1336 + int rc; 1337 assert( !leavesReaderAtEnd(pReader) ); 1338 - leafReaderStep(&pReader->leafReader); 1339 + rc = leafReaderStep(&pReader->leafReader); 1340 + if( rc!=SQLITE_OK ) return rc; 1341 1342 if( leafReaderAtEnd(&pReader->leafReader) ){ 1343 - int rc; 1344 if( pReader->rootData.pData ){ 1345 pReader->eof = 1; 1346 return SQLITE_OK; 1347 @@ -5136,10 +5410,25 @@ 1348 pReader->eof = 1; 1349 return rc==SQLITE_DONE ? SQLITE_OK : rc; 1350 } 1351 - leafReaderDestroy(&pReader->leafReader); 1352 - leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), 1353 - sqlite3_column_bytes(pReader->pStmt, 0), 1354 - &pReader->leafReader); 1355 + 1356 + /* Corrupt if leaf data isn't a blob. */ 1357 + if( sqlite3_column_type(pReader->pStmt, 0)!=SQLITE_BLOB ){ 1358 + return SQLITE_CORRUPT_BKPT; 1359 + }else{ 1360 + LeafReader tmp; 1361 + const char *pLeafData = sqlite3_column_blob(pReader->pStmt, 0); 1362 + int nLeafData = sqlite3_column_bytes(pReader->pStmt, 0); 1363 + 1364 + /* Corrupt if this can't be a leaf node. */ 1365 + if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ 1366 + return SQLITE_CORRUPT_BKPT; 1367 + } 1368 + 1369 + rc = leafReaderInit(pLeafData, nLeafData, &tmp); 1370 + if( rc!=SQLITE_OK ) return rc; 1371 + leafReaderDestroy(&pReader->leafReader); 1372 + pReader->leafReader = tmp; 1373 + } 1374 } 1375 return SQLITE_OK; 1376 } 1377 @@ -5200,8 +5489,19 @@ 1378 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); 1379 const char *pRootData = sqlite3_column_blob(s, 2); 1380 int nRootData = sqlite3_column_bytes(s, 2); 1381 + sqlite_int64 iIndex = sqlite3_column_int64(s, 3); 1382 + 1383 + /* Corrupt if we get back different types than we stored. */ 1384 + /* Also corrupt if the index is not sequential starting at 0. */ 1385 + if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || 1386 + sqlite3_column_type(s, 1)!=SQLITE_INTEGER || 1387 + sqlite3_column_type(s, 2)!=SQLITE_BLOB || 1388 + i!=iIndex || 1389 + i>=MERGE_COUNT ){ 1390 + rc = SQLITE_CORRUPT_BKPT; 1391 + break; 1392 + } 1393 1394 - assert( i<MERGE_COUNT ); 1395 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, 1396 &pReaders[i]); 1397 if( rc!=SQLITE_OK ) break; 1398 @@ -5212,6 +5512,7 @@ 1399 while( i-->0 ){ 1400 leavesReaderDestroy(&pReaders[i]); 1401 } 1402 + sqlite3_reset(s); /* So we don't leave a lock. */ 1403 return rc; 1404 } 1405 1406 @@ -5235,13 +5536,26 @@ 1407 DLReader dlReaders[MERGE_COUNT]; 1408 const char *pTerm = leavesReaderTerm(pReaders); 1409 int i, nTerm = leavesReaderTermBytes(pReaders); 1410 + int rc; 1411 1412 assert( nReaders<=MERGE_COUNT ); 1413 1414 for(i=0; i<nReaders; i++){ 1415 - dlrInit(&dlReaders[i], DL_DEFAULT, 1416 - leavesReaderData(pReaders+i), 1417 - leavesReaderDataBytes(pReaders+i)); 1418 + const char *pData = leavesReaderData(pReaders+i); 1419 + if( pData==NULL ){ 1420 + rc = SQLITE_CORRUPT_BKPT; 1421 + break; 1422 + } 1423 + rc = dlrInit(&dlReaders[i], DL_DEFAULT, 1424 + pData, 1425 + leavesReaderDataBytes(pReaders+i)); 1426 + if( rc!=SQLITE_OK ) break; 1427 + } 1428 + if( rc!=SQLITE_OK ){ 1429 + while( i-->0 ){ 1430 + dlrDestroy(&dlReaders[i]); 1431 + } 1432 + return rc; 1433 } 1434 1435 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); 1436 @@ -5295,10 +5609,14 @@ 1437 memset(&lrs, '\0', sizeof(lrs)); 1438 rc = leavesReadersInit(v, iLevel, lrs, &i); 1439 if( rc!=SQLITE_OK ) return rc; 1440 - assert( i==MERGE_COUNT ); 1441 1442 leafWriterInit(iLevel+1, idx, &writer); 1443 1444 + if( i!=MERGE_COUNT ){ 1445 + rc = SQLITE_CORRUPT_BKPT; 1446 + goto err; 1447 + } 1448 + 1449 /* Since leavesReaderReorder() pushes readers at eof to the end, 1450 ** when the first reader is empty, all will be empty. 1451 */ 1452 @@ -5341,12 +5659,14 @@ 1453 } 1454 1455 /* Accumulate the union of *acc and *pData into *acc. */ 1456 -static void docListAccumulateUnion(DataBuffer *acc, 1457 - const char *pData, int nData) { 1458 +static int docListAccumulateUnion(DataBuffer *acc, 1459 + const char *pData, int nData) { 1460 DataBuffer tmp = *acc; 1461 + int rc; 1462 dataBufferInit(acc, tmp.nData+nData); 1463 - docListUnion(tmp.pData, tmp.nData, pData, nData, acc); 1464 + rc = docListUnion(tmp.pData, tmp.nData, pData, nData, acc); 1465 dataBufferDestroy(&tmp); 1466 + return rc; 1467 } 1468 1469 /* TODO(shess) It might be interesting to explore different merge 1470 @@ -5388,8 +5708,13 @@ 1471 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); 1472 if( c>0 ) break; /* Past any possible matches. */ 1473 if( c==0 ){ 1474 + int iBuffer, nData; 1475 const char *pData = leavesReaderData(pReader); 1476 - int iBuffer, nData = leavesReaderDataBytes(pReader); 1477 + if( pData==NULL ){ 1478 + rc = SQLITE_CORRUPT_BKPT; 1479 + break; 1480 + } 1481 + nData = leavesReaderDataBytes(pReader); 1482 1483 /* Find the first empty buffer. */ 1484 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ 1485 @@ -5435,11 +5760,13 @@ 1486 ** with pData/nData. 1487 */ 1488 dataBufferSwap(p, pAcc); 1489 - docListAccumulateUnion(pAcc, pData, nData); 1490 + rc = docListAccumulateUnion(pAcc, pData, nData); 1491 + if( rc!=SQLITE_OK ) goto err; 1492 1493 /* Accumulate remaining doclists into pAcc. */ 1494 for(++p; p<pAcc; ++p){ 1495 - docListAccumulateUnion(pAcc, p->pData, p->nData); 1496 + rc = docListAccumulateUnion(pAcc, p->pData, p->nData); 1497 + if( rc!=SQLITE_OK ) goto err; 1498 1499 /* dataBufferReset() could allow a large doclist to blow up 1500 ** our memory requirements. 1501 @@ -5464,13 +5791,15 @@ 1502 if( out->nData==0 ){ 1503 dataBufferSwap(out, &(pBuffers[iBuffer])); 1504 }else{ 1505 - docListAccumulateUnion(out, pBuffers[iBuffer].pData, 1506 - pBuffers[iBuffer].nData); 1507 + rc = docListAccumulateUnion(out, pBuffers[iBuffer].pData, 1508 + pBuffers[iBuffer].nData); 1509 + if( rc!=SQLITE_OK ) break; 1510 } 1511 } 1512 } 1513 } 1514 1515 +err: 1516 while( nBuffers-- ){ 1517 dataBufferDestroy(&(pBuffers[nBuffers])); 1518 } 1519 @@ -5529,20 +5858,26 @@ 1520 ** node. Consider whether breaking symmetry is worthwhile. I suspect 1521 ** it is not worthwhile. 1522 */ 1523 -static void getChildrenContaining(const char *pData, int nData, 1524 - const char *pTerm, int nTerm, int isPrefix, 1525 - sqlite_int64 *piStartChild, 1526 - sqlite_int64 *piEndChild){ 1527 +static int getChildrenContaining(const char *pData, int nData, 1528 + const char *pTerm, int nTerm, int isPrefix, 1529 + sqlite_int64 *piStartChild, 1530 + sqlite_int64 *piEndChild){ 1531 InteriorReader reader; 1532 + int rc; 1533 1534 assert( nData>1 ); 1535 assert( *pData!='\0' ); 1536 - interiorReaderInit(pData, nData, &reader); 1537 + rc = interiorReaderInit(pData, nData, &reader); 1538 + if( rc!=SQLITE_OK ) return rc; 1539 1540 /* Scan for the first child which could contain pTerm/nTerm. */ 1541 while( !interiorReaderAtEnd(&reader) ){ 1542 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; 1543 - interiorReaderStep(&reader); 1544 + rc = interiorReaderStep(&reader); 1545 + if( rc!=SQLITE_OK ){ 1546 + interiorReaderDestroy(&reader); 1547 + return rc; 1548 + } 1549 } 1550 *piStartChild = interiorReaderCurrentBlockid(&reader); 1551 1552 @@ -5552,7 +5887,11 @@ 1553 */ 1554 while( !interiorReaderAtEnd(&reader) ){ 1555 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; 1556 - interiorReaderStep(&reader); 1557 + rc = interiorReaderStep(&reader); 1558 + if( rc!=SQLITE_OK ){ 1559 + interiorReaderDestroy(&reader); 1560 + return rc; 1561 + } 1562 } 1563 *piEndChild = interiorReaderCurrentBlockid(&reader); 1564 1565 @@ -5561,6 +5900,7 @@ 1566 /* Children must ascend, and if !prefix, both must be the same. */ 1567 assert( *piEndChild>=*piStartChild ); 1568 assert( isPrefix || *piStartChild==*piEndChild ); 1569 + return rc; 1570 } 1571 1572 /* Read block at iBlockid and pass it with other params to 1573 @@ -5588,11 +5928,31 @@ 1574 if( rc!=SQLITE_OK ) return rc; 1575 1576 rc = sqlite3_step(s); 1577 - if( rc==SQLITE_DONE ) return SQLITE_ERROR; 1578 + /* Corrupt if interior node references missing child node. */ 1579 + if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; 1580 if( rc!=SQLITE_ROW ) return rc; 1581 1582 - getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0), 1583 - pTerm, nTerm, isPrefix, piStartChild, piEndChild); 1584 + /* Corrupt if child node isn't a blob. */ 1585 + if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ 1586 + sqlite3_reset(s); /* So we don't leave a lock. */ 1587 + return SQLITE_CORRUPT_BKPT; 1588 + }else{ 1589 + const char *pData = sqlite3_column_blob(s, 0); 1590 + int nData = sqlite3_column_bytes(s, 0); 1591 + 1592 + /* Corrupt if child is not a valid interior node. */ 1593 + if( pData==NULL || nData<1 || pData[0]=='\0' ){ 1594 + sqlite3_reset(s); /* So we don't leave a lock. */ 1595 + return SQLITE_CORRUPT_BKPT; 1596 + } 1597 + 1598 + rc = getChildrenContaining(pData, nData, pTerm, nTerm, 1599 + isPrefix, piStartChild, piEndChild); 1600 + if( rc!=SQLITE_OK ){ 1601 + sqlite3_reset(s); 1602 + return rc; 1603 + } 1604 + } 1605 1606 /* We expect only one row. We must execute another sqlite3_step() 1607 * to complete the iteration; otherwise the table will remain 1608 @@ -5622,8 +5982,9 @@ 1609 /* Process pData as an interior node, then loop down the tree 1610 ** until we find the set of leaf nodes to scan for the term. 1611 */ 1612 - getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, 1613 - &iStartChild, &iEndChild); 1614 + rc = getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, 1615 + &iStartChild, &iEndChild); 1616 + if( rc!=SQLITE_OK ) return rc; 1617 while( iStartChild>iLeavesEnd ){ 1618 sqlite_int64 iNextStart, iNextEnd; 1619 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, 1620 @@ -5675,7 +6036,8 @@ 1621 DataBuffer result; 1622 int rc; 1623 1624 - assert( nData>1 ); 1625 + /* Corrupt if segment root can't be valid. */ 1626 + if( pData==NULL || nData<1 ) return SQLITE_CORRUPT_BKPT; 1627 1628 /* This code should never be called with buffered updates. */ 1629 assert( v->nPendingData<0 ); 1630 @@ -5692,16 +6054,21 @@ 1631 DataBuffer merged; 1632 DLReader readers[2]; 1633 1634 - dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); 1635 - dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); 1636 - dataBufferInit(&merged, out->nData+result.nData); 1637 - docListMerge(&merged, readers, 2); 1638 - dataBufferDestroy(out); 1639 - *out = merged; 1640 - dlrDestroy(&readers[0]); 1641 - dlrDestroy(&readers[1]); 1642 + rc = dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); 1643 + if( rc==SQLITE_OK ){ 1644 + rc = dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); 1645 + if( rc==SQLITE_OK ){ 1646 + dataBufferInit(&merged, out->nData+result.nData); 1647 + rc = docListMerge(&merged, readers, 2); 1648 + dataBufferDestroy(out); 1649 + *out = merged; 1650 + dlrDestroy(&readers[1]); 1651 + } 1652 + dlrDestroy(&readers[0]); 1653 + } 1654 } 1655 } 1656 + 1657 dataBufferDestroy(&result); 1658 return rc; 1659 } 1660 @@ -5729,11 +6096,20 @@ 1661 const char *pData = sqlite3_column_blob(s, 2); 1662 const int nData = sqlite3_column_bytes(s, 2); 1663 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); 1664 + 1665 + /* Corrupt if we get back different types than we stored. */ 1666 + if( sqlite3_column_type(s, 1)!=SQLITE_INTEGER || 1667 + sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ 1668 + rc = SQLITE_CORRUPT_BKPT; 1669 + goto err; 1670 + } 1671 + 1672 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, 1673 &doclist); 1674 if( rc!=SQLITE_OK ) goto err; 1675 } 1676 if( rc==SQLITE_DONE ){ 1677 + rc = SQLITE_OK; 1678 if( doclist.nData!=0 ){ 1679 /* TODO(shess) The old term_select_all() code applied the column 1680 ** restrict as we merged segments, leading to smaller buffers. 1681 @@ -5741,13 +6117,13 @@ 1682 ** system is checked in. 1683 */ 1684 if( iColumn==v->nColumn) iColumn = -1; 1685 - docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, 1686 - iColumn, iType, out); 1687 + rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, 1688 + iColumn, iType, out); 1689 } 1690 - rc = SQLITE_OK; 1691 } 1692 1693 err: 1694 + sqlite3_reset(s); /* So we don't leave a lock. */ 1695 dataBufferDestroy(&doclist); 1696 return rc; 1697 } 1698 @@ -6089,6 +6465,7 @@ 1699 LeafWriter *pWriter){ 1700 int i, rc = SQLITE_OK; 1701 DataBuffer doclist, merged, tmp; 1702 + const char *pData; 1703 1704 /* Order the readers. */ 1705 i = nReaders; 1706 @@ -6109,14 +6486,21 @@ 1707 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; 1708 } 1709 1710 + pData = optLeavesReaderData(&readers[0]); 1711 + if( pData==NULL ){ 1712 + rc = SQLITE_CORRUPT_BKPT; 1713 + break; 1714 + } 1715 + 1716 /* Special-case for no merge. */ 1717 if( i==1 ){ 1718 /* Trim deletions from the doclist. */ 1719 dataBufferReset(&merged); 1720 - docListTrim(DL_DEFAULT, 1721 - optLeavesReaderData(&readers[0]), 1722 - optLeavesReaderDataBytes(&readers[0]), 1723 - -1, DL_DEFAULT, &merged); 1724 + rc = docListTrim(DL_DEFAULT, 1725 + pData, 1726 + optLeavesReaderDataBytes(&readers[0]), 1727 + -1, DL_DEFAULT, &merged); 1728 + if( rc!= SQLITE_OK ) break; 1729 }else{ 1730 DLReader dlReaders[MERGE_COUNT]; 1731 int iReader, nReaders; 1732 @@ -6124,9 +6508,10 @@ 1733 /* Prime the pipeline with the first reader's doclist. After 1734 ** one pass index 0 will reference the accumulated doclist. 1735 */ 1736 - dlrInit(&dlReaders[0], DL_DEFAULT, 1737 - optLeavesReaderData(&readers[0]), 1738 - optLeavesReaderDataBytes(&readers[0])); 1739 + rc = dlrInit(&dlReaders[0], DL_DEFAULT, 1740 + pData, 1741 + optLeavesReaderDataBytes(&readers[0])); 1742 + if( rc!=SQLITE_OK ) break; 1743 iReader = 1; 1744 1745 assert( iReader<i ); /* Must execute the loop at least once. */ 1746 @@ -6134,24 +6519,35 @@ 1747 /* Merge 16 inputs per pass. */ 1748 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; 1749 iReader++, nReaders++ ){ 1750 - dlrInit(&dlReaders[nReaders], DL_DEFAULT, 1751 - optLeavesReaderData(&readers[iReader]), 1752 - optLeavesReaderDataBytes(&readers[iReader])); 1753 + pData = optLeavesReaderData(&readers[iReader]); 1754 + if( pData == NULL ){ 1755 + rc = SQLITE_CORRUPT_BKPT; 1756 + break; 1757 + } 1758 + rc = dlrInit(&dlReaders[nReaders], DL_DEFAULT, 1759 + pData, 1760 + optLeavesReaderDataBytes(&readers[iReader])); 1761 + if( rc != SQLITE_OK ) break; 1762 } 1763 1764 /* Merge doclists and swap result into accumulator. */ 1765 - dataBufferReset(&merged); 1766 - docListMerge(&merged, dlReaders, nReaders); 1767 - tmp = merged; 1768 - merged = doclist; 1769 - doclist = tmp; 1770 + if( rc==SQLITE_OK ){ 1771 + dataBufferReset(&merged); 1772 + rc = docListMerge(&merged, dlReaders, nReaders); 1773 + tmp = merged; 1774 + merged = doclist; 1775 + doclist = tmp; 1776 + } 1777 1778 while( nReaders-- > 0 ){ 1779 dlrDestroy(&dlReaders[nReaders]); 1780 } 1781 1782 + if( rc!=SQLITE_OK ) goto err; 1783 + 1784 /* Accumulated doclist to reader 0 for next pass. */ 1785 - dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); 1786 + rc = dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); 1787 + if( rc!=SQLITE_OK ) goto err; 1788 } 1789 1790 /* Destroy reader that was left in the pipeline. */ 1791 @@ -6159,8 +6555,9 @@ 1792 1793 /* Trim deletions from the doclist. */ 1794 dataBufferReset(&merged); 1795 - docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, 1796 - -1, DL_DEFAULT, &merged); 1797 + rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, 1798 + -1, DL_DEFAULT, &merged); 1799 + if( rc!=SQLITE_OK ) goto err; 1800 } 1801 1802 /* Only pass doclists with hits (skip if all hits deleted). */ 1803 @@ -6240,6 +6637,14 @@ 1804 const char *pRootData = sqlite3_column_blob(s, 2); 1805 int nRootData = sqlite3_column_bytes(s, 2); 1806 1807 + /* Corrupt if we get back different types than we stored. */ 1808 + if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || 1809 + sqlite3_column_type(s, 1)!=SQLITE_INTEGER || 1810 + sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ 1811 + rc = SQLITE_CORRUPT_BKPT; 1812 + break; 1813 + } 1814 + 1815 assert( i<nReaders ); 1816 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, 1817 &readers[i].reader); 1818 @@ -6253,6 +6658,8 @@ 1819 if( rc==SQLITE_DONE ){ 1820 assert( i==nReaders ); 1821 rc = optimizeInternal(v, readers, nReaders, &writer); 1822 + }else{ 1823 + sqlite3_reset(s); /* So we don't leave a lock. */ 1824 } 1825 1826 while( i-- > 0 ){ 1827 @@ -6316,9 +6723,18 @@ 1828 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); 1829 const char *pRootData = sqlite3_column_blob(s, 2); 1830 const int nRootData = sqlite3_column_bytes(s, 2); 1831 + int rc; 1832 LeavesReader reader; 1833 - int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, 1834 - pRootData, nRootData, &reader); 1835 + 1836 + /* Corrupt if we get back different types than we stored. */ 1837 + if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || 1838 + sqlite3_column_type(s, 1)!=SQLITE_INTEGER || 1839 + sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ 1840 + return SQLITE_CORRUPT_BKPT; 1841 + } 1842 + 1843 + rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, 1844 + pRootData, nRootData, &reader); 1845 if( rc!=SQLITE_OK ) return rc; 1846 1847 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ 1848 @@ -6480,16 +6896,19 @@ 1849 const char *pData, int nData){ 1850 DataBuffer dump; 1851 DLReader dlReader; 1852 + int rc; 1853 1854 assert( pData!=NULL && nData>0 ); 1855 1856 + rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData); 1857 + if( rc!=SQLITE_OK ) return rc; 1858 dataBufferInit(&dump, 0); 1859 - dlrInit(&dlReader, DL_DEFAULT, pData, nData); 1860 - for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ 1861 + for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ 1862 char buf[256]; 1863 PLReader plReader; 1864 1865 - plrInit(&plReader, &dlReader); 1866 + rc = plrInit(&plReader, &dlReader); 1867 + if( rc!=SQLITE_OK ) break; 1868 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ 1869 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); 1870 dataBufferAppend(&dump, buf, strlen(buf)); 1871 @@ -6500,7 +6919,8 @@ 1872 dlrDocid(&dlReader), iColumn); 1873 dataBufferAppend(&dump, buf, strlen(buf)); 1874 1875 - for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ 1876 + for( ; !plrAtEnd(&plReader); rc = plrStep(&plReader) ){ 1877 + if( rc!=SQLITE_OK ) break; 1878 if( plrColumn(&plReader)!=iColumn ){ 1879 iColumn = plrColumn(&plReader); 1880 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); 1881 @@ -6521,6 +6941,7 @@ 1882 dataBufferAppend(&dump, buf, strlen(buf)); 1883 } 1884 plrDestroy(&plReader); 1885 + if( rc!= SQLITE_OK ) break; 1886 1887 assert( dump.nData>0 ); 1888 dump.nData--; /* Overwrite trailing space. */ 1889 @@ -6529,6 +6950,10 @@ 1890 } 1891 } 1892 dlrDestroy(&dlReader); 1893 + if( rc!=SQLITE_OK ){ 1894 + dataBufferDestroy(&dump); 1895 + return rc; 1896 + } 1897 1898 assert( dump.nData>0 ); 1899 dump.nData--; /* Overwrite trailing space. */ 1900 @@ -6540,6 +6965,7 @@ 1901 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); 1902 dump.pData = NULL; 1903 dump.nData = dump.nCapacity = 0; 1904 + return SQLITE_OK; 1905 } 1906 1907 /* Implements dump_doclist() for use in inspecting the fts2 index from 1908 @@ -6822,7 +7248,11 @@ 1909 ** module with sqlite. 1910 */ 1911 if( SQLITE_OK==rc 1912 +#if GEARS_FTS2_CHANGES && !SQLITE_TEST 1913 + /* fts2_tokenizer() disabled for security reasons. */ 1914 +#else 1915 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) 1916 +#endif 1917 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) 1918 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) 1919 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) 1920 diff -ru ext-orig/fts2/fts2_icu.c ext/fts2/fts2_icu.c 1921 --- ext-orig/fts2/fts2_icu.c 2009-09-03 13:32:06.000000000 -0700 1922 +++ ext/fts2/fts2_icu.c 2009-09-18 14:39:41.000000000 -0700 1923 @@ -198,7 +198,7 @@ 1924 1925 while( iStart<iEnd ){ 1926 int iWhite = iStart; 1927 - U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); 1928 + U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); 1929 if( u_isspace(c) ){ 1930 iStart = iWhite; 1931 }else{ 1932 diff -ru ext-orig/fts2/fts2_tokenizer.c ext/fts2/fts2_tokenizer.c 1933 --- ext-orig/fts2/fts2_tokenizer.c 2009-09-03 13:32:06.000000000 -0700 1934 +++ ext/fts2/fts2_tokenizer.c 2009-09-18 14:39:41.000000000 -0700 1935 @@ -28,11 +28,14 @@ 1936 1937 #include "sqlite3.h" 1938 #include "sqlite3ext.h" 1939 -SQLITE_EXTENSION_INIT1 1940 +#ifndef SQLITE_CORE 1941 + SQLITE_EXTENSION_INIT1 1942 +#endif 1943 1944 #include "fts2_hash.h" 1945 #include "fts2_tokenizer.h" 1946 #include <assert.h> 1947 +#include <stddef.h> 1948 1949 /* 1950 ** Implementation of the SQL scalar function for accessing the underlying 1951