1 /* 2 ** 2009 Oct 23 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file is part of the SQLite FTS3 extension module. Specifically, 14 ** this file contains code to insert, update and delete rows from FTS3 15 ** tables. It also contains code to merge FTS3 b-tree segments. Some 16 ** of the sub-routines used to merge segments are also used by the query 17 ** code in fts3.c. 18 */ 19 20 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) 21 22 #include "fts3Int.h" 23 #include <string.h> 24 #include <assert.h> 25 #include <stdlib.h> 26 27 /* 28 ** When full-text index nodes are loaded from disk, the buffer that they 29 ** are loaded into has the following number of bytes of padding at the end 30 ** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer 31 ** of 920 bytes is allocated for it. 32 ** 33 ** This means that if we have a pointer into a buffer containing node data, 34 ** it is always safe to read up to two varints from it without risking an 35 ** overread, even if the node data is corrupted. 36 */ 37 #define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) 38 39 typedef struct PendingList PendingList; 40 typedef struct SegmentNode SegmentNode; 41 typedef struct SegmentWriter SegmentWriter; 42 43 /* 44 ** Data structure used while accumulating terms in the pending-terms hash 45 ** table. The hash table entry maps from term (a string) to a malloc'd 46 ** instance of this structure. 47 */ 48 struct PendingList { 49 int nData; 50 char *aData; 51 int nSpace; 52 sqlite3_int64 iLastDocid; 53 sqlite3_int64 iLastCol; 54 sqlite3_int64 iLastPos; 55 }; 56 57 58 /* 59 ** Each cursor has a (possibly empty) linked list of the following objects. 60 */ 61 struct Fts3DeferredToken { 62 Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ 63 int iCol; /* Column token must occur in */ 64 Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ 65 PendingList *pList; /* Doclist is assembled here */ 66 }; 67 68 /* 69 ** An instance of this structure is used to iterate through the terms on 70 ** a contiguous set of segment b-tree leaf nodes. Although the details of 71 ** this structure are only manipulated by code in this file, opaque handles 72 ** of type Fts3SegReader* are also used by code in fts3.c to iterate through 73 ** terms when querying the full-text index. See functions: 74 ** 75 ** sqlite3Fts3SegReaderNew() 76 ** sqlite3Fts3SegReaderFree() 77 ** sqlite3Fts3SegReaderCost() 78 ** sqlite3Fts3SegReaderIterate() 79 ** 80 ** Methods used to manipulate Fts3SegReader structures: 81 ** 82 ** fts3SegReaderNext() 83 ** fts3SegReaderFirstDocid() 84 ** fts3SegReaderNextDocid() 85 */ 86 struct Fts3SegReader { 87 int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ 88 89 sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ 90 sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ 91 sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ 92 sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ 93 94 char *aNode; /* Pointer to node data (or NULL) */ 95 int nNode; /* Size of buffer at aNode (or 0) */ 96 Fts3HashElem **ppNextElem; 97 98 /* Variables set by fts3SegReaderNext(). These may be read directly 99 ** by the caller. They are valid from the time SegmentReaderNew() returns 100 ** until SegmentReaderNext() returns something other than SQLITE_OK 101 ** (i.e. SQLITE_DONE). 102 */ 103 int nTerm; /* Number of bytes in current term */ 104 char *zTerm; /* Pointer to current term */ 105 int nTermAlloc; /* Allocated size of zTerm buffer */ 106 char *aDoclist; /* Pointer to doclist of current entry */ 107 int nDoclist; /* Size of doclist in current entry */ 108 109 /* The following variables are used to iterate through the current doclist */ 110 char *pOffsetList; 111 sqlite3_int64 iDocid; 112 }; 113 114 #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) 115 #define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1]) 116 117 /* 118 ** An instance of this structure is used to create a segment b-tree in the 119 ** database. The internal details of this type are only accessed by the 120 ** following functions: 121 ** 122 ** fts3SegWriterAdd() 123 ** fts3SegWriterFlush() 124 ** fts3SegWriterFree() 125 */ 126 struct SegmentWriter { 127 SegmentNode *pTree; /* Pointer to interior tree structure */ 128 sqlite3_int64 iFirst; /* First slot in %_segments written */ 129 sqlite3_int64 iFree; /* Next free slot in %_segments */ 130 char *zTerm; /* Pointer to previous term buffer */ 131 int nTerm; /* Number of bytes in zTerm */ 132 int nMalloc; /* Size of malloc'd buffer at zMalloc */ 133 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ 134 int nSize; /* Size of allocation at aData */ 135 int nData; /* Bytes of data in aData */ 136 char *aData; /* Pointer to block from malloc() */ 137 }; 138 139 /* 140 ** Type SegmentNode is used by the following three functions to create 141 ** the interior part of the segment b+-tree structures (everything except 142 ** the leaf nodes). These functions and type are only ever used by code 143 ** within the fts3SegWriterXXX() family of functions described above. 144 ** 145 ** fts3NodeAddTerm() 146 ** fts3NodeWrite() 147 ** fts3NodeFree() 148 */ 149 struct SegmentNode { 150 SegmentNode *pParent; /* Parent node (or NULL for root node) */ 151 SegmentNode *pRight; /* Pointer to right-sibling */ 152 SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ 153 int nEntry; /* Number of terms written to node so far */ 154 char *zTerm; /* Pointer to previous term buffer */ 155 int nTerm; /* Number of bytes in zTerm */ 156 int nMalloc; /* Size of malloc'd buffer at zMalloc */ 157 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ 158 int nData; /* Bytes of valid data so far */ 159 char *aData; /* Node data */ 160 }; 161 162 /* 163 ** Valid values for the second argument to fts3SqlStmt(). 164 */ 165 #define SQL_DELETE_CONTENT 0 166 #define SQL_IS_EMPTY 1 167 #define SQL_DELETE_ALL_CONTENT 2 168 #define SQL_DELETE_ALL_SEGMENTS 3 169 #define SQL_DELETE_ALL_SEGDIR 4 170 #define SQL_DELETE_ALL_DOCSIZE 5 171 #define SQL_DELETE_ALL_STAT 6 172 #define SQL_SELECT_CONTENT_BY_ROWID 7 173 #define SQL_NEXT_SEGMENT_INDEX 8 174 #define SQL_INSERT_SEGMENTS 9 175 #define SQL_NEXT_SEGMENTS_ID 10 176 #define SQL_INSERT_SEGDIR 11 177 #define SQL_SELECT_LEVEL 12 178 #define SQL_SELECT_ALL_LEVEL 13 179 #define SQL_SELECT_LEVEL_COUNT 14 180 #define SQL_SELECT_SEGDIR_COUNT_MAX 15 181 #define SQL_DELETE_SEGDIR_BY_LEVEL 16 182 #define SQL_DELETE_SEGMENTS_RANGE 17 183 #define SQL_CONTENT_INSERT 18 184 #define SQL_DELETE_DOCSIZE 19 185 #define SQL_REPLACE_DOCSIZE 20 186 #define SQL_SELECT_DOCSIZE 21 187 #define SQL_SELECT_DOCTOTAL 22 188 #define SQL_REPLACE_DOCTOTAL 23 189 190 /* 191 ** This function is used to obtain an SQLite prepared statement handle 192 ** for the statement identified by the second argument. If successful, 193 ** *pp is set to the requested statement handle and SQLITE_OK returned. 194 ** Otherwise, an SQLite error code is returned and *pp is set to 0. 195 ** 196 ** If argument apVal is not NULL, then it must point to an array with 197 ** at least as many entries as the requested statement has bound 198 ** parameters. The values are bound to the statements parameters before 199 ** returning. 200 */ 201 static int fts3SqlStmt( 202 Fts3Table *p, /* Virtual table handle */ 203 int eStmt, /* One of the SQL_XXX constants above */ 204 sqlite3_stmt **pp, /* OUT: Statement handle */ 205 sqlite3_value **apVal /* Values to bind to statement */ 206 ){ 207 const char *azSql[] = { 208 /* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", 209 /* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", 210 /* 2 */ "DELETE FROM %Q.'%q_content'", 211 /* 3 */ "DELETE FROM %Q.'%q_segments'", 212 /* 4 */ "DELETE FROM %Q.'%q_segdir'", 213 /* 5 */ "DELETE FROM %Q.'%q_docsize'", 214 /* 6 */ "DELETE FROM %Q.'%q_stat'", 215 /* 7 */ "SELECT %s FROM %Q.'%q_content' AS x WHERE rowid=?", 216 /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", 217 /* 9 */ "INSERT INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", 218 /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", 219 /* 11 */ "INSERT INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", 220 221 /* Return segments in order from oldest to newest.*/ 222 /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " 223 "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", 224 /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " 225 "FROM %Q.'%q_segdir' ORDER BY level DESC, idx ASC", 226 227 /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", 228 /* 15 */ "SELECT count(*), max(level) FROM %Q.'%q_segdir'", 229 230 /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", 231 /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", 232 /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", 233 /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", 234 /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", 235 /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", 236 /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0", 237 /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)", 238 }; 239 int rc = SQLITE_OK; 240 sqlite3_stmt *pStmt; 241 242 assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); 243 assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); 244 245 pStmt = p->aStmt[eStmt]; 246 if( !pStmt ){ 247 char *zSql; 248 if( eStmt==SQL_CONTENT_INSERT ){ 249 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); 250 }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ 251 zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist, p->zDb, p->zName); 252 }else{ 253 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); 254 } 255 if( !zSql ){ 256 rc = SQLITE_NOMEM; 257 }else{ 258 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); 259 sqlite3_free(zSql); 260 assert( rc==SQLITE_OK || pStmt==0 ); 261 p->aStmt[eStmt] = pStmt; 262 } 263 } 264 if( apVal ){ 265 int i; 266 int nParam = sqlite3_bind_parameter_count(pStmt); 267 for(i=0; rc==SQLITE_OK && i<nParam; i++){ 268 rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); 269 } 270 } 271 *pp = pStmt; 272 return rc; 273 } 274 275 static int fts3SelectDocsize( 276 Fts3Table *pTab, /* FTS3 table handle */ 277 int eStmt, /* Either SQL_SELECT_DOCSIZE or DOCTOTAL */ 278 sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ 279 sqlite3_stmt **ppStmt /* OUT: Statement handle */ 280 ){ 281 sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ 282 int rc; /* Return code */ 283 284 assert( eStmt==SQL_SELECT_DOCSIZE || eStmt==SQL_SELECT_DOCTOTAL ); 285 286 rc = fts3SqlStmt(pTab, eStmt, &pStmt, 0); 287 if( rc==SQLITE_OK ){ 288 if( eStmt==SQL_SELECT_DOCSIZE ){ 289 sqlite3_bind_int64(pStmt, 1, iDocid); 290 } 291 rc = sqlite3_step(pStmt); 292 if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ 293 rc = sqlite3_reset(pStmt); 294 if( rc==SQLITE_OK ) rc = SQLITE_CORRUPT; 295 pStmt = 0; 296 }else{ 297 rc = SQLITE_OK; 298 } 299 } 300 301 *ppStmt = pStmt; 302 return rc; 303 } 304 305 int sqlite3Fts3SelectDoctotal( 306 Fts3Table *pTab, /* Fts3 table handle */ 307 sqlite3_stmt **ppStmt /* OUT: Statement handle */ 308 ){ 309 return fts3SelectDocsize(pTab, SQL_SELECT_DOCTOTAL, 0, ppStmt); 310 } 311 312 int sqlite3Fts3SelectDocsize( 313 Fts3Table *pTab, /* Fts3 table handle */ 314 sqlite3_int64 iDocid, /* Docid to read size data for */ 315 sqlite3_stmt **ppStmt /* OUT: Statement handle */ 316 ){ 317 return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt); 318 } 319 320 /* 321 ** Similar to fts3SqlStmt(). Except, after binding the parameters in 322 ** array apVal[] to the SQL statement identified by eStmt, the statement 323 ** is executed. 324 ** 325 ** Returns SQLITE_OK if the statement is successfully executed, or an 326 ** SQLite error code otherwise. 327 */ 328 static void fts3SqlExec( 329 int *pRC, /* Result code */ 330 Fts3Table *p, /* The FTS3 table */ 331 int eStmt, /* Index of statement to evaluate */ 332 sqlite3_value **apVal /* Parameters to bind */ 333 ){ 334 sqlite3_stmt *pStmt; 335 int rc; 336 if( *pRC ) return; 337 rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); 338 if( rc==SQLITE_OK ){ 339 sqlite3_step(pStmt); 340 rc = sqlite3_reset(pStmt); 341 } 342 *pRC = rc; 343 } 344 345 346 /* 347 ** This function ensures that the caller has obtained a shared-cache 348 ** table-lock on the %_content table. This is required before reading 349 ** data from the fts3 table. If this lock is not acquired first, then 350 ** the caller may end up holding read-locks on the %_segments and %_segdir 351 ** tables, but no read-lock on the %_content table. If this happens 352 ** a second connection will be able to write to the fts3 table, but 353 ** attempting to commit those writes might return SQLITE_LOCKED or 354 ** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain 355 ** write-locks on the %_segments and %_segdir ** tables). 356 ** 357 ** We try to avoid this because if FTS3 returns any error when committing 358 ** a transaction, the whole transaction will be rolled back. And this is 359 ** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can 360 ** still happen if the user reads data directly from the %_segments or 361 ** %_segdir tables instead of going through FTS3 though. 362 */ 363 int sqlite3Fts3ReadLock(Fts3Table *p){ 364 int rc; /* Return code */ 365 sqlite3_stmt *pStmt; /* Statement used to obtain lock */ 366 367 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0); 368 if( rc==SQLITE_OK ){ 369 sqlite3_bind_null(pStmt, 1); 370 sqlite3_step(pStmt); 371 rc = sqlite3_reset(pStmt); 372 } 373 return rc; 374 } 375 376 /* 377 ** Set *ppStmt to a statement handle that may be used to iterate through 378 ** all rows in the %_segdir table, from oldest to newest. If successful, 379 ** return SQLITE_OK. If an error occurs while preparing the statement, 380 ** return an SQLite error code. 381 ** 382 ** There is only ever one instance of this SQL statement compiled for 383 ** each FTS3 table. 384 ** 385 ** The statement returns the following columns from the %_segdir table: 386 ** 387 ** 0: idx 388 ** 1: start_block 389 ** 2: leaves_end_block 390 ** 3: end_block 391 ** 4: root 392 */ 393 int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){ 394 int rc; 395 sqlite3_stmt *pStmt = 0; 396 if( iLevel<0 ){ 397 rc = fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, &pStmt, 0); 398 }else{ 399 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); 400 if( rc==SQLITE_OK ) sqlite3_bind_int(pStmt, 1, iLevel); 401 } 402 *ppStmt = pStmt; 403 return rc; 404 } 405 406 407 /* 408 ** Append a single varint to a PendingList buffer. SQLITE_OK is returned 409 ** if successful, or an SQLite error code otherwise. 410 ** 411 ** This function also serves to allocate the PendingList structure itself. 412 ** For example, to create a new PendingList structure containing two 413 ** varints: 414 ** 415 ** PendingList *p = 0; 416 ** fts3PendingListAppendVarint(&p, 1); 417 ** fts3PendingListAppendVarint(&p, 2); 418 */ 419 static int fts3PendingListAppendVarint( 420 PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ 421 sqlite3_int64 i /* Value to append to data */ 422 ){ 423 PendingList *p = *pp; 424 425 /* Allocate or grow the PendingList as required. */ 426 if( !p ){ 427 p = sqlite3_malloc(sizeof(*p) + 100); 428 if( !p ){ 429 return SQLITE_NOMEM; 430 } 431 p->nSpace = 100; 432 p->aData = (char *)&p[1]; 433 p->nData = 0; 434 } 435 else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ 436 int nNew = p->nSpace * 2; 437 p = sqlite3_realloc(p, sizeof(*p) + nNew); 438 if( !p ){ 439 sqlite3_free(*pp); 440 *pp = 0; 441 return SQLITE_NOMEM; 442 } 443 p->nSpace = nNew; 444 p->aData = (char *)&p[1]; 445 } 446 447 /* Append the new serialized varint to the end of the list. */ 448 p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); 449 p->aData[p->nData] = '\0'; 450 *pp = p; 451 return SQLITE_OK; 452 } 453 454 /* 455 ** Add a docid/column/position entry to a PendingList structure. Non-zero 456 ** is returned if the structure is sqlite3_realloced as part of adding 457 ** the entry. Otherwise, zero. 458 ** 459 ** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. 460 ** Zero is always returned in this case. Otherwise, if no OOM error occurs, 461 ** it is set to SQLITE_OK. 462 */ 463 static int fts3PendingListAppend( 464 PendingList **pp, /* IN/OUT: PendingList structure */ 465 sqlite3_int64 iDocid, /* Docid for entry to add */ 466 sqlite3_int64 iCol, /* Column for entry to add */ 467 sqlite3_int64 iPos, /* Position of term for entry to add */ 468 int *pRc /* OUT: Return code */ 469 ){ 470 PendingList *p = *pp; 471 int rc = SQLITE_OK; 472 473 assert( !p || p->iLastDocid<=iDocid ); 474 475 if( !p || p->iLastDocid!=iDocid ){ 476 sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); 477 if( p ){ 478 assert( p->nData<p->nSpace ); 479 assert( p->aData[p->nData]==0 ); 480 p->nData++; 481 } 482 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ 483 goto pendinglistappend_out; 484 } 485 p->iLastCol = -1; 486 p->iLastPos = 0; 487 p->iLastDocid = iDocid; 488 } 489 if( iCol>0 && p->iLastCol!=iCol ){ 490 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) 491 || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) 492 ){ 493 goto pendinglistappend_out; 494 } 495 p->iLastCol = iCol; 496 p->iLastPos = 0; 497 } 498 if( iCol>=0 ){ 499 assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); 500 rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); 501 if( rc==SQLITE_OK ){ 502 p->iLastPos = iPos; 503 } 504 } 505 506 pendinglistappend_out: 507 *pRc = rc; 508 if( p!=*pp ){ 509 *pp = p; 510 return 1; 511 } 512 return 0; 513 } 514 515 /* 516 ** Tokenize the nul-terminated string zText and add all tokens to the 517 ** pending-terms hash-table. The docid used is that currently stored in 518 ** p->iPrevDocid, and the column is specified by argument iCol. 519 ** 520 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. 521 */ 522 static int fts3PendingTermsAdd( 523 Fts3Table *p, /* Table into which text will be inserted */ 524 const char *zText, /* Text of document to be inserted */ 525 int iCol, /* Column into which text is being inserted */ 526 u32 *pnWord /* OUT: Number of tokens inserted */ 527 ){ 528 int rc; 529 int iStart; 530 int iEnd; 531 int iPos; 532 int nWord = 0; 533 534 char const *zToken; 535 int nToken; 536 537 sqlite3_tokenizer *pTokenizer = p->pTokenizer; 538 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; 539 sqlite3_tokenizer_cursor *pCsr; 540 int (*xNext)(sqlite3_tokenizer_cursor *pCursor, 541 const char**,int*,int*,int*,int*); 542 543 assert( pTokenizer && pModule ); 544 545 rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr); 546 if( rc!=SQLITE_OK ){ 547 return rc; 548 } 549 pCsr->pTokenizer = pTokenizer; 550 551 xNext = pModule->xNext; 552 while( SQLITE_OK==rc 553 && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) 554 ){ 555 PendingList *pList; 556 557 if( iPos>=nWord ) nWord = iPos+1; 558 559 /* Positions cannot be negative; we use -1 as a terminator internally. 560 ** Tokens must have a non-zero length. 561 */ 562 if( iPos<0 || !zToken || nToken<=0 ){ 563 rc = SQLITE_ERROR; 564 break; 565 } 566 567 pList = (PendingList *)fts3HashFind(&p->pendingTerms, zToken, nToken); 568 if( pList ){ 569 p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); 570 } 571 if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ 572 if( pList==fts3HashInsert(&p->pendingTerms, zToken, nToken, pList) ){ 573 /* Malloc failed while inserting the new entry. This can only 574 ** happen if there was no previous entry for this token. 575 */ 576 assert( 0==fts3HashFind(&p->pendingTerms, zToken, nToken) ); 577 sqlite3_free(pList); 578 rc = SQLITE_NOMEM; 579 } 580 } 581 if( rc==SQLITE_OK ){ 582 p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); 583 } 584 } 585 586 pModule->xClose(pCsr); 587 *pnWord = nWord; 588 return (rc==SQLITE_DONE ? SQLITE_OK : rc); 589 } 590 591 /* 592 ** Calling this function indicates that subsequent calls to 593 ** fts3PendingTermsAdd() are to add term/position-list pairs for the 594 ** contents of the document with docid iDocid. 595 */ 596 static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){ 597 /* TODO(shess) Explore whether partially flushing the buffer on 598 ** forced-flush would provide better performance. I suspect that if 599 ** we ordered the doclists by size and flushed the largest until the 600 ** buffer was half empty, that would let the less frequent terms 601 ** generate longer doclists. 602 */ 603 if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){ 604 int rc = sqlite3Fts3PendingTermsFlush(p); 605 if( rc!=SQLITE_OK ) return rc; 606 } 607 p->iPrevDocid = iDocid; 608 return SQLITE_OK; 609 } 610 611 /* 612 ** Discard the contents of the pending-terms hash table. 613 */ 614 void sqlite3Fts3PendingTermsClear(Fts3Table *p){ 615 Fts3HashElem *pElem; 616 for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){ 617 sqlite3_free(fts3HashData(pElem)); 618 } 619 fts3HashClear(&p->pendingTerms); 620 p->nPendingData = 0; 621 } 622 623 /* 624 ** This function is called by the xUpdate() method as part of an INSERT 625 ** operation. It adds entries for each term in the new record to the 626 ** pendingTerms hash table. 627 ** 628 ** Argument apVal is the same as the similarly named argument passed to 629 ** fts3InsertData(). Parameter iDocid is the docid of the new row. 630 */ 631 static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){ 632 int i; /* Iterator variable */ 633 for(i=2; i<p->nColumn+2; i++){ 634 const char *zText = (const char *)sqlite3_value_text(apVal[i]); 635 if( zText ){ 636 int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]); 637 if( rc!=SQLITE_OK ){ 638 return rc; 639 } 640 } 641 aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); 642 } 643 return SQLITE_OK; 644 } 645 646 /* 647 ** This function is called by the xUpdate() method for an INSERT operation. 648 ** The apVal parameter is passed a copy of the apVal argument passed by 649 ** SQLite to the xUpdate() method. i.e: 650 ** 651 ** apVal[0] Not used for INSERT. 652 ** apVal[1] rowid 653 ** apVal[2] Left-most user-defined column 654 ** ... 655 ** apVal[p->nColumn+1] Right-most user-defined column 656 ** apVal[p->nColumn+2] Hidden column with same name as table 657 ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) 658 */ 659 static int fts3InsertData( 660 Fts3Table *p, /* Full-text table */ 661 sqlite3_value **apVal, /* Array of values to insert */ 662 sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ 663 ){ 664 int rc; /* Return code */ 665 sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ 666 667 /* Locate the statement handle used to insert data into the %_content 668 ** table. The SQL for this statement is: 669 ** 670 ** INSERT INTO %_content VALUES(?, ?, ?, ...) 671 ** 672 ** The statement features N '?' variables, where N is the number of user 673 ** defined columns in the FTS3 table, plus one for the docid field. 674 */ 675 rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); 676 if( rc!=SQLITE_OK ){ 677 return rc; 678 } 679 680 /* There is a quirk here. The users INSERT statement may have specified 681 ** a value for the "rowid" field, for the "docid" field, or for both. 682 ** Which is a problem, since "rowid" and "docid" are aliases for the 683 ** same value. For example: 684 ** 685 ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); 686 ** 687 ** In FTS3, this is an error. It is an error to specify non-NULL values 688 ** for both docid and some other rowid alias. 689 */ 690 if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ 691 if( SQLITE_NULL==sqlite3_value_type(apVal[0]) 692 && SQLITE_NULL!=sqlite3_value_type(apVal[1]) 693 ){ 694 /* A rowid/docid conflict. */ 695 return SQLITE_ERROR; 696 } 697 rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); 698 if( rc!=SQLITE_OK ) return rc; 699 } 700 701 /* Execute the statement to insert the record. Set *piDocid to the 702 ** new docid value. 703 */ 704 sqlite3_step(pContentInsert); 705 rc = sqlite3_reset(pContentInsert); 706 707 *piDocid = sqlite3_last_insert_rowid(p->db); 708 return rc; 709 } 710 711 712 713 /* 714 ** Remove all data from the FTS3 table. Clear the hash table containing 715 ** pending terms. 716 */ 717 static int fts3DeleteAll(Fts3Table *p){ 718 int rc = SQLITE_OK; /* Return code */ 719 720 /* Discard the contents of the pending-terms hash table. */ 721 sqlite3Fts3PendingTermsClear(p); 722 723 /* Delete everything from the %_content, %_segments and %_segdir tables. */ 724 fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); 725 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); 726 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); 727 if( p->bHasDocsize ){ 728 fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); 729 } 730 if( p->bHasStat ){ 731 fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); 732 } 733 return rc; 734 } 735 736 /* 737 ** The first element in the apVal[] array is assumed to contain the docid 738 ** (an integer) of a row about to be deleted. Remove all terms from the 739 ** full-text index. 740 */ 741 static void fts3DeleteTerms( 742 int *pRC, /* Result code */ 743 Fts3Table *p, /* The FTS table to delete from */ 744 sqlite3_value **apVal, /* apVal[] contains the docid to be deleted */ 745 u32 *aSz /* Sizes of deleted document written here */ 746 ){ 747 int rc; 748 sqlite3_stmt *pSelect; 749 750 if( *pRC ) return; 751 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, apVal); 752 if( rc==SQLITE_OK ){ 753 if( SQLITE_ROW==sqlite3_step(pSelect) ){ 754 int i; 755 for(i=1; i<=p->nColumn; i++){ 756 const char *zText = (const char *)sqlite3_column_text(pSelect, i); 757 rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]); 758 if( rc!=SQLITE_OK ){ 759 sqlite3_reset(pSelect); 760 *pRC = rc; 761 return; 762 } 763 aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); 764 } 765 } 766 rc = sqlite3_reset(pSelect); 767 }else{ 768 sqlite3_reset(pSelect); 769 } 770 *pRC = rc; 771 } 772 773 /* 774 ** Forward declaration to account for the circular dependency between 775 ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). 776 */ 777 static int fts3SegmentMerge(Fts3Table *, int); 778 779 /* 780 ** This function allocates a new level iLevel index in the segdir table. 781 ** Usually, indexes are allocated within a level sequentially starting 782 ** with 0, so the allocated index is one greater than the value returned 783 ** by: 784 ** 785 ** SELECT max(idx) FROM %_segdir WHERE level = :iLevel 786 ** 787 ** However, if there are already FTS3_MERGE_COUNT indexes at the requested 788 ** level, they are merged into a single level (iLevel+1) segment and the 789 ** allocated index is 0. 790 ** 791 ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK 792 ** returned. Otherwise, an SQLite error code is returned. 793 */ 794 static int fts3AllocateSegdirIdx(Fts3Table *p, int iLevel, int *piIdx){ 795 int rc; /* Return Code */ 796 sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ 797 int iNext = 0; /* Result of query pNextIdx */ 798 799 /* Set variable iNext to the next available segdir index at level iLevel. */ 800 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); 801 if( rc==SQLITE_OK ){ 802 sqlite3_bind_int(pNextIdx, 1, iLevel); 803 if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ 804 iNext = sqlite3_column_int(pNextIdx, 0); 805 } 806 rc = sqlite3_reset(pNextIdx); 807 } 808 809 if( rc==SQLITE_OK ){ 810 /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already 811 ** full, merge all segments in level iLevel into a single iLevel+1 812 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, 813 ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. 814 */ 815 if( iNext>=FTS3_MERGE_COUNT ){ 816 rc = fts3SegmentMerge(p, iLevel); 817 *piIdx = 0; 818 }else{ 819 *piIdx = iNext; 820 } 821 } 822 823 return rc; 824 } 825 826 /* 827 ** The %_segments table is declared as follows: 828 ** 829 ** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) 830 ** 831 ** This function reads data from a single row of the %_segments table. The 832 ** specific row is identified by the iBlockid parameter. If paBlob is not 833 ** NULL, then a buffer is allocated using sqlite3_malloc() and populated 834 ** with the contents of the blob stored in the "block" column of the 835 ** identified table row is. Whether or not paBlob is NULL, *pnBlob is set 836 ** to the size of the blob in bytes before returning. 837 ** 838 ** If an error occurs, or the table does not contain the specified row, 839 ** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If 840 ** paBlob is non-NULL, then it is the responsibility of the caller to 841 ** eventually free the returned buffer. 842 ** 843 ** This function may leave an open sqlite3_blob* handle in the 844 ** Fts3Table.pSegments variable. This handle is reused by subsequent calls 845 ** to this function. The handle may be closed by calling the 846 ** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy 847 ** performance improvement, but the blob handle should always be closed 848 ** before control is returned to the user (to prevent a lock being held 849 ** on the database file for longer than necessary). Thus, any virtual table 850 ** method (xFilter etc.) that may directly or indirectly call this function 851 ** must call sqlite3Fts3SegmentsClose() before returning. 852 */ 853 int sqlite3Fts3ReadBlock( 854 Fts3Table *p, /* FTS3 table handle */ 855 sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ 856 char **paBlob, /* OUT: Blob data in malloc'd buffer */ 857 int *pnBlob /* OUT: Size of blob data */ 858 ){ 859 int rc; /* Return code */ 860 861 /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ 862 assert( pnBlob); 863 864 if( p->pSegments ){ 865 rc = sqlite3_blob_reopen(p->pSegments, iBlockid); 866 }else{ 867 if( 0==p->zSegmentsTbl ){ 868 p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); 869 if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; 870 } 871 rc = sqlite3_blob_open( 872 p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments 873 ); 874 } 875 876 if( rc==SQLITE_OK ){ 877 int nByte = sqlite3_blob_bytes(p->pSegments); 878 if( paBlob ){ 879 char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); 880 if( !aByte ){ 881 rc = SQLITE_NOMEM; 882 }else{ 883 rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); 884 memset(&aByte[nByte], 0, FTS3_NODE_PADDING); 885 if( rc!=SQLITE_OK ){ 886 sqlite3_free(aByte); 887 aByte = 0; 888 } 889 } 890 *paBlob = aByte; 891 } 892 *pnBlob = nByte; 893 } 894 895 return rc; 896 } 897 898 /* 899 ** Close the blob handle at p->pSegments, if it is open. See comments above 900 ** the sqlite3Fts3ReadBlock() function for details. 901 */ 902 void sqlite3Fts3SegmentsClose(Fts3Table *p){ 903 sqlite3_blob_close(p->pSegments); 904 p->pSegments = 0; 905 } 906 907 /* 908 ** Move the iterator passed as the first argument to the next term in the 909 ** segment. If successful, SQLITE_OK is returned. If there is no next term, 910 ** SQLITE_DONE. Otherwise, an SQLite error code. 911 */ 912 static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){ 913 char *pNext; /* Cursor variable */ 914 int nPrefix; /* Number of bytes in term prefix */ 915 int nSuffix; /* Number of bytes in term suffix */ 916 917 if( !pReader->aDoclist ){ 918 pNext = pReader->aNode; 919 }else{ 920 pNext = &pReader->aDoclist[pReader->nDoclist]; 921 } 922 923 if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ 924 int rc; /* Return code from Fts3ReadBlock() */ 925 926 if( fts3SegReaderIsPending(pReader) ){ 927 Fts3HashElem *pElem = *(pReader->ppNextElem); 928 if( pElem==0 ){ 929 pReader->aNode = 0; 930 }else{ 931 PendingList *pList = (PendingList *)fts3HashData(pElem); 932 pReader->zTerm = (char *)fts3HashKey(pElem); 933 pReader->nTerm = fts3HashKeysize(pElem); 934 pReader->nNode = pReader->nDoclist = pList->nData + 1; 935 pReader->aNode = pReader->aDoclist = pList->aData; 936 pReader->ppNextElem++; 937 assert( pReader->aNode ); 938 } 939 return SQLITE_OK; 940 } 941 942 if( !fts3SegReaderIsRootOnly(pReader) ){ 943 sqlite3_free(pReader->aNode); 944 } 945 pReader->aNode = 0; 946 947 /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf 948 ** blocks have already been traversed. */ 949 assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); 950 if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ 951 return SQLITE_OK; 952 } 953 954 rc = sqlite3Fts3ReadBlock( 955 p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode 956 ); 957 if( rc!=SQLITE_OK ) return rc; 958 pNext = pReader->aNode; 959 } 960 961 /* Because of the FTS3_NODE_PADDING bytes of padding, the following is 962 ** safe (no risk of overread) even if the node data is corrupted. 963 */ 964 pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix); 965 pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix); 966 if( nPrefix<0 || nSuffix<=0 967 || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] 968 ){ 969 return SQLITE_CORRUPT; 970 } 971 972 if( nPrefix+nSuffix>pReader->nTermAlloc ){ 973 int nNew = (nPrefix+nSuffix)*2; 974 char *zNew = sqlite3_realloc(pReader->zTerm, nNew); 975 if( !zNew ){ 976 return SQLITE_NOMEM; 977 } 978 pReader->zTerm = zNew; 979 pReader->nTermAlloc = nNew; 980 } 981 memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); 982 pReader->nTerm = nPrefix+nSuffix; 983 pNext += nSuffix; 984 pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist); 985 pReader->aDoclist = pNext; 986 pReader->pOffsetList = 0; 987 988 /* Check that the doclist does not appear to extend past the end of the 989 ** b-tree node. And that the final byte of the doclist is 0x00. If either 990 ** of these statements is untrue, then the data structure is corrupt. 991 */ 992 if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] 993 || pReader->aDoclist[pReader->nDoclist-1] 994 ){ 995 return SQLITE_CORRUPT; 996 } 997 return SQLITE_OK; 998 } 999 1000 /* 1001 ** Set the SegReader to point to the first docid in the doclist associated 1002 ** with the current term. 1003 */ 1004 static void fts3SegReaderFirstDocid(Fts3SegReader *pReader){ 1005 int n; 1006 assert( pReader->aDoclist ); 1007 assert( !pReader->pOffsetList ); 1008 n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); 1009 pReader->pOffsetList = &pReader->aDoclist[n]; 1010 } 1011 1012 /* 1013 ** Advance the SegReader to point to the next docid in the doclist 1014 ** associated with the current term. 1015 ** 1016 ** If arguments ppOffsetList and pnOffsetList are not NULL, then 1017 ** *ppOffsetList is set to point to the first column-offset list 1018 ** in the doclist entry (i.e. immediately past the docid varint). 1019 ** *pnOffsetList is set to the length of the set of column-offset 1020 ** lists, not including the nul-terminator byte. For example: 1021 */ 1022 static void fts3SegReaderNextDocid( 1023 Fts3SegReader *pReader, 1024 char **ppOffsetList, 1025 int *pnOffsetList 1026 ){ 1027 char *p = pReader->pOffsetList; 1028 char c = 0; 1029 1030 /* Pointer p currently points at the first byte of an offset list. The 1031 ** following two lines advance it to point one byte past the end of 1032 ** the same offset list. 1033 */ 1034 while( *p | c ) c = *p++ & 0x80; 1035 p++; 1036 1037 /* If required, populate the output variables with a pointer to and the 1038 ** size of the previous offset-list. 1039 */ 1040 if( ppOffsetList ){ 1041 *ppOffsetList = pReader->pOffsetList; 1042 *pnOffsetList = (int)(p - pReader->pOffsetList - 1); 1043 } 1044 1045 /* If there are no more entries in the doclist, set pOffsetList to 1046 ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and 1047 ** Fts3SegReader.pOffsetList to point to the next offset list before 1048 ** returning. 1049 */ 1050 if( p>=&pReader->aDoclist[pReader->nDoclist] ){ 1051 pReader->pOffsetList = 0; 1052 }else{ 1053 sqlite3_int64 iDelta; 1054 pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); 1055 pReader->iDocid += iDelta; 1056 } 1057 } 1058 1059 /* 1060 ** This function is called to estimate the amount of data that will be 1061 ** loaded from the disk If SegReaderIterate() is called on this seg-reader, 1062 ** in units of average document size. 1063 ** 1064 ** This can be used as follows: If the caller has a small doclist that 1065 ** contains references to N documents, and is considering merging it with 1066 ** a large doclist (size X "average documents"), it may opt not to load 1067 ** the large doclist if X>N. 1068 */ 1069 int sqlite3Fts3SegReaderCost( 1070 Fts3Cursor *pCsr, /* FTS3 cursor handle */ 1071 Fts3SegReader *pReader, /* Segment-reader handle */ 1072 int *pnCost /* IN/OUT: Number of bytes read */ 1073 ){ 1074 Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; 1075 int rc = SQLITE_OK; /* Return code */ 1076 int nCost = 0; /* Cost in bytes to return */ 1077 int pgsz = p->nPgsz; /* Database page size */ 1078 1079 /* If this seg-reader is reading the pending-terms table, or if all data 1080 ** for the segment is stored on the root page of the b-tree, then the cost 1081 ** is zero. In this case all required data is already in main memory. 1082 */ 1083 if( p->bHasStat 1084 && !fts3SegReaderIsPending(pReader) 1085 && !fts3SegReaderIsRootOnly(pReader) 1086 ){ 1087 int nBlob = 0; 1088 sqlite3_int64 iBlock; 1089 1090 if( pCsr->nRowAvg==0 ){ 1091 /* The average document size, which is required to calculate the cost 1092 ** of each doclist, has not yet been determined. Read the required 1093 ** data from the %_stat table to calculate it. 1094 ** 1095 ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 1096 ** varints, where nCol is the number of columns in the FTS3 table. 1097 ** The first varint is the number of documents currently stored in 1098 ** the table. The following nCol varints contain the total amount of 1099 ** data stored in all rows of each column of the table, from left 1100 ** to right. 1101 */ 1102 sqlite3_stmt *pStmt; 1103 sqlite3_int64 nDoc = 0; 1104 sqlite3_int64 nByte = 0; 1105 const char *pEnd; 1106 const char *a; 1107 1108 rc = sqlite3Fts3SelectDoctotal(p, &pStmt); 1109 if( rc!=SQLITE_OK ) return rc; 1110 a = sqlite3_column_blob(pStmt, 0); 1111 assert( a ); 1112 1113 pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; 1114 a += sqlite3Fts3GetVarint(a, &nDoc); 1115 while( a<pEnd ){ 1116 a += sqlite3Fts3GetVarint(a, &nByte); 1117 } 1118 if( nDoc==0 || nByte==0 ){ 1119 sqlite3_reset(pStmt); 1120 return SQLITE_CORRUPT; 1121 } 1122 1123 pCsr->nRowAvg = (int)(((nByte / nDoc) + pgsz) / pgsz); 1124 assert( pCsr->nRowAvg>0 ); 1125 rc = sqlite3_reset(pStmt); 1126 if( rc!=SQLITE_OK ) return rc; 1127 } 1128 1129 /* Assume that a blob flows over onto overflow pages if it is larger 1130 ** than (pgsz-35) bytes in size (the file-format documentation 1131 ** confirms this). 1132 */ 1133 for(iBlock=pReader->iStartBlock; iBlock<=pReader->iLeafEndBlock; iBlock++){ 1134 rc = sqlite3Fts3ReadBlock(p, iBlock, 0, &nBlob); 1135 if( rc!=SQLITE_OK ) break; 1136 if( (nBlob+35)>pgsz ){ 1137 int nOvfl = (nBlob + 34)/pgsz; 1138 nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg); 1139 } 1140 } 1141 } 1142 1143 *pnCost += nCost; 1144 return rc; 1145 } 1146 1147 /* 1148 ** Free all allocations associated with the iterator passed as the 1149 ** second argument. 1150 */ 1151 void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ 1152 if( pReader && !fts3SegReaderIsPending(pReader) ){ 1153 sqlite3_free(pReader->zTerm); 1154 if( !fts3SegReaderIsRootOnly(pReader) ){ 1155 sqlite3_free(pReader->aNode); 1156 } 1157 } 1158 sqlite3_free(pReader); 1159 } 1160 1161 /* 1162 ** Allocate a new SegReader object. 1163 */ 1164 int sqlite3Fts3SegReaderNew( 1165 int iAge, /* Segment "age". */ 1166 sqlite3_int64 iStartLeaf, /* First leaf to traverse */ 1167 sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ 1168 sqlite3_int64 iEndBlock, /* Final block of segment */ 1169 const char *zRoot, /* Buffer containing root node */ 1170 int nRoot, /* Size of buffer containing root node */ 1171 Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ 1172 ){ 1173 int rc = SQLITE_OK; /* Return code */ 1174 Fts3SegReader *pReader; /* Newly allocated SegReader object */ 1175 int nExtra = 0; /* Bytes to allocate segment root node */ 1176 1177 assert( iStartLeaf<=iEndLeaf ); 1178 if( iStartLeaf==0 ){ 1179 nExtra = nRoot + FTS3_NODE_PADDING; 1180 } 1181 1182 pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); 1183 if( !pReader ){ 1184 return SQLITE_NOMEM; 1185 } 1186 memset(pReader, 0, sizeof(Fts3SegReader)); 1187 pReader->iIdx = iAge; 1188 pReader->iStartBlock = iStartLeaf; 1189 pReader->iLeafEndBlock = iEndLeaf; 1190 pReader->iEndBlock = iEndBlock; 1191 1192 if( nExtra ){ 1193 /* The entire segment is stored in the root node. */ 1194 pReader->aNode = (char *)&pReader[1]; 1195 pReader->nNode = nRoot; 1196 memcpy(pReader->aNode, zRoot, nRoot); 1197 memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); 1198 }else{ 1199 pReader->iCurrentBlock = iStartLeaf-1; 1200 } 1201 1202 if( rc==SQLITE_OK ){ 1203 *ppReader = pReader; 1204 }else{ 1205 sqlite3Fts3SegReaderFree(pReader); 1206 } 1207 return rc; 1208 } 1209 1210 /* 1211 ** This is a comparison function used as a qsort() callback when sorting 1212 ** an array of pending terms by term. This occurs as part of flushing 1213 ** the contents of the pending-terms hash table to the database. 1214 */ 1215 static int fts3CompareElemByTerm(const void *lhs, const void *rhs){ 1216 char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); 1217 char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); 1218 int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); 1219 int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); 1220 1221 int n = (n1<n2 ? n1 : n2); 1222 int c = memcmp(z1, z2, n); 1223 if( c==0 ){ 1224 c = n1 - n2; 1225 } 1226 return c; 1227 } 1228 1229 /* 1230 ** This function is used to allocate an Fts3SegReader that iterates through 1231 ** a subset of the terms stored in the Fts3Table.pendingTerms array. 1232 */ 1233 int sqlite3Fts3SegReaderPending( 1234 Fts3Table *p, /* Virtual table handle */ 1235 const char *zTerm, /* Term to search for */ 1236 int nTerm, /* Size of buffer zTerm */ 1237 int isPrefix, /* True for a term-prefix query */ 1238 Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ 1239 ){ 1240 Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ 1241 Fts3HashElem *pE; /* Iterator variable */ 1242 Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ 1243 int nElem = 0; /* Size of array at aElem */ 1244 int rc = SQLITE_OK; /* Return Code */ 1245 1246 if( isPrefix ){ 1247 int nAlloc = 0; /* Size of allocated array at aElem */ 1248 1249 for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){ 1250 char *zKey = (char *)fts3HashKey(pE); 1251 int nKey = fts3HashKeysize(pE); 1252 if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ 1253 if( nElem==nAlloc ){ 1254 Fts3HashElem **aElem2; 1255 nAlloc += 16; 1256 aElem2 = (Fts3HashElem **)sqlite3_realloc( 1257 aElem, nAlloc*sizeof(Fts3HashElem *) 1258 ); 1259 if( !aElem2 ){ 1260 rc = SQLITE_NOMEM; 1261 nElem = 0; 1262 break; 1263 } 1264 aElem = aElem2; 1265 } 1266 aElem[nElem++] = pE; 1267 } 1268 } 1269 1270 /* If more than one term matches the prefix, sort the Fts3HashElem 1271 ** objects in term order using qsort(). This uses the same comparison 1272 ** callback as is used when flushing terms to disk. 1273 */ 1274 if( nElem>1 ){ 1275 qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); 1276 } 1277 1278 }else{ 1279 pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm); 1280 if( pE ){ 1281 aElem = &pE; 1282 nElem = 1; 1283 } 1284 } 1285 1286 if( nElem>0 ){ 1287 int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); 1288 pReader = (Fts3SegReader *)sqlite3_malloc(nByte); 1289 if( !pReader ){ 1290 rc = SQLITE_NOMEM; 1291 }else{ 1292 memset(pReader, 0, nByte); 1293 pReader->iIdx = 0x7FFFFFFF; 1294 pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; 1295 memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); 1296 } 1297 } 1298 1299 if( isPrefix ){ 1300 sqlite3_free(aElem); 1301 } 1302 *ppReader = pReader; 1303 return rc; 1304 } 1305 1306 /* 1307 ** Compare the entries pointed to by two Fts3SegReader structures. 1308 ** Comparison is as follows: 1309 ** 1310 ** 1) EOF is greater than not EOF. 1311 ** 1312 ** 2) The current terms (if any) are compared using memcmp(). If one 1313 ** term is a prefix of another, the longer term is considered the 1314 ** larger. 1315 ** 1316 ** 3) By segment age. An older segment is considered larger. 1317 */ 1318 static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ 1319 int rc; 1320 if( pLhs->aNode && pRhs->aNode ){ 1321 int rc2 = pLhs->nTerm - pRhs->nTerm; 1322 if( rc2<0 ){ 1323 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); 1324 }else{ 1325 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); 1326 } 1327 if( rc==0 ){ 1328 rc = rc2; 1329 } 1330 }else{ 1331 rc = (pLhs->aNode==0) - (pRhs->aNode==0); 1332 } 1333 if( rc==0 ){ 1334 rc = pRhs->iIdx - pLhs->iIdx; 1335 } 1336 assert( rc!=0 ); 1337 return rc; 1338 } 1339 1340 /* 1341 ** A different comparison function for SegReader structures. In this 1342 ** version, it is assumed that each SegReader points to an entry in 1343 ** a doclist for identical terms. Comparison is made as follows: 1344 ** 1345 ** 1) EOF (end of doclist in this case) is greater than not EOF. 1346 ** 1347 ** 2) By current docid. 1348 ** 1349 ** 3) By segment age. An older segment is considered larger. 1350 */ 1351 static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ 1352 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); 1353 if( rc==0 ){ 1354 if( pLhs->iDocid==pRhs->iDocid ){ 1355 rc = pRhs->iIdx - pLhs->iIdx; 1356 }else{ 1357 rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; 1358 } 1359 } 1360 assert( pLhs->aNode && pRhs->aNode ); 1361 return rc; 1362 } 1363 1364 /* 1365 ** Compare the term that the Fts3SegReader object passed as the first argument 1366 ** points to with the term specified by arguments zTerm and nTerm. 1367 ** 1368 ** If the pSeg iterator is already at EOF, return 0. Otherwise, return 1369 ** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are 1370 ** equal, or +ve if the pSeg term is greater than zTerm/nTerm. 1371 */ 1372 static int fts3SegReaderTermCmp( 1373 Fts3SegReader *pSeg, /* Segment reader object */ 1374 const char *zTerm, /* Term to compare to */ 1375 int nTerm /* Size of term zTerm in bytes */ 1376 ){ 1377 int res = 0; 1378 if( pSeg->aNode ){ 1379 if( pSeg->nTerm>nTerm ){ 1380 res = memcmp(pSeg->zTerm, zTerm, nTerm); 1381 }else{ 1382 res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); 1383 } 1384 if( res==0 ){ 1385 res = pSeg->nTerm-nTerm; 1386 } 1387 } 1388 return res; 1389 } 1390 1391 /* 1392 ** Argument apSegment is an array of nSegment elements. It is known that 1393 ** the final (nSegment-nSuspect) members are already in sorted order 1394 ** (according to the comparison function provided). This function shuffles 1395 ** the array around until all entries are in sorted order. 1396 */ 1397 static void fts3SegReaderSort( 1398 Fts3SegReader **apSegment, /* Array to sort entries of */ 1399 int nSegment, /* Size of apSegment array */ 1400 int nSuspect, /* Unsorted entry count */ 1401 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ 1402 ){ 1403 int i; /* Iterator variable */ 1404 1405 assert( nSuspect<=nSegment ); 1406 1407 if( nSuspect==nSegment ) nSuspect--; 1408 for(i=nSuspect-1; i>=0; i--){ 1409 int j; 1410 for(j=i; j<(nSegment-1); j++){ 1411 Fts3SegReader *pTmp; 1412 if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; 1413 pTmp = apSegment[j+1]; 1414 apSegment[j+1] = apSegment[j]; 1415 apSegment[j] = pTmp; 1416 } 1417 } 1418 1419 #ifndef NDEBUG 1420 /* Check that the list really is sorted now. */ 1421 for(i=0; i<(nSuspect-1); i++){ 1422 assert( xCmp(apSegment[i], apSegment[i+1])<0 ); 1423 } 1424 #endif 1425 } 1426 1427 /* 1428 ** Insert a record into the %_segments table. 1429 */ 1430 static int fts3WriteSegment( 1431 Fts3Table *p, /* Virtual table handle */ 1432 sqlite3_int64 iBlock, /* Block id for new block */ 1433 char *z, /* Pointer to buffer containing block data */ 1434 int n /* Size of buffer z in bytes */ 1435 ){ 1436 sqlite3_stmt *pStmt; 1437 int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); 1438 if( rc==SQLITE_OK ){ 1439 sqlite3_bind_int64(pStmt, 1, iBlock); 1440 sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); 1441 sqlite3_step(pStmt); 1442 rc = sqlite3_reset(pStmt); 1443 } 1444 return rc; 1445 } 1446 1447 /* 1448 ** Insert a record into the %_segdir table. 1449 */ 1450 static int fts3WriteSegdir( 1451 Fts3Table *p, /* Virtual table handle */ 1452 int iLevel, /* Value for "level" field */ 1453 int iIdx, /* Value for "idx" field */ 1454 sqlite3_int64 iStartBlock, /* Value for "start_block" field */ 1455 sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ 1456 sqlite3_int64 iEndBlock, /* Value for "end_block" field */ 1457 char *zRoot, /* Blob value for "root" field */ 1458 int nRoot /* Number of bytes in buffer zRoot */ 1459 ){ 1460 sqlite3_stmt *pStmt; 1461 int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); 1462 if( rc==SQLITE_OK ){ 1463 sqlite3_bind_int(pStmt, 1, iLevel); 1464 sqlite3_bind_int(pStmt, 2, iIdx); 1465 sqlite3_bind_int64(pStmt, 3, iStartBlock); 1466 sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); 1467 sqlite3_bind_int64(pStmt, 5, iEndBlock); 1468 sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); 1469 sqlite3_step(pStmt); 1470 rc = sqlite3_reset(pStmt); 1471 } 1472 return rc; 1473 } 1474 1475 /* 1476 ** Return the size of the common prefix (if any) shared by zPrev and 1477 ** zNext, in bytes. For example, 1478 ** 1479 ** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 1480 ** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 1481 ** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 1482 */ 1483 static int fts3PrefixCompress( 1484 const char *zPrev, /* Buffer containing previous term */ 1485 int nPrev, /* Size of buffer zPrev in bytes */ 1486 const char *zNext, /* Buffer containing next term */ 1487 int nNext /* Size of buffer zNext in bytes */ 1488 ){ 1489 int n; 1490 UNUSED_PARAMETER(nNext); 1491 for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++); 1492 return n; 1493 } 1494 1495 /* 1496 ** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger 1497 ** (according to memcmp) than the previous term. 1498 */ 1499 static int fts3NodeAddTerm( 1500 Fts3Table *p, /* Virtual table handle */ 1501 SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */ 1502 int isCopyTerm, /* True if zTerm/nTerm is transient */ 1503 const char *zTerm, /* Pointer to buffer containing term */ 1504 int nTerm /* Size of term in bytes */ 1505 ){ 1506 SegmentNode *pTree = *ppTree; 1507 int rc; 1508 SegmentNode *pNew; 1509 1510 /* First try to append the term to the current node. Return early if 1511 ** this is possible. 1512 */ 1513 if( pTree ){ 1514 int nData = pTree->nData; /* Current size of node in bytes */ 1515 int nReq = nData; /* Required space after adding zTerm */ 1516 int nPrefix; /* Number of bytes of prefix compression */ 1517 int nSuffix; /* Suffix length */ 1518 1519 nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); 1520 nSuffix = nTerm-nPrefix; 1521 1522 nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; 1523 if( nReq<=p->nNodeSize || !pTree->zTerm ){ 1524 1525 if( nReq>p->nNodeSize ){ 1526 /* An unusual case: this is the first term to be added to the node 1527 ** and the static node buffer (p->nNodeSize bytes) is not large 1528 ** enough. Use a separately malloced buffer instead This wastes 1529 ** p->nNodeSize bytes, but since this scenario only comes about when 1530 ** the database contain two terms that share a prefix of almost 2KB, 1531 ** this is not expected to be a serious problem. 1532 */ 1533 assert( pTree->aData==(char *)&pTree[1] ); 1534 pTree->aData = (char *)sqlite3_malloc(nReq); 1535 if( !pTree->aData ){ 1536 return SQLITE_NOMEM; 1537 } 1538 } 1539 1540 if( pTree->zTerm ){ 1541 /* There is no prefix-length field for first term in a node */ 1542 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); 1543 } 1544 1545 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); 1546 memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); 1547 pTree->nData = nData + nSuffix; 1548 pTree->nEntry++; 1549 1550 if( isCopyTerm ){ 1551 if( pTree->nMalloc<nTerm ){ 1552 char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2); 1553 if( !zNew ){ 1554 return SQLITE_NOMEM; 1555 } 1556 pTree->nMalloc = nTerm*2; 1557 pTree->zMalloc = zNew; 1558 } 1559 pTree->zTerm = pTree->zMalloc; 1560 memcpy(pTree->zTerm, zTerm, nTerm); 1561 pTree->nTerm = nTerm; 1562 }else{ 1563 pTree->zTerm = (char *)zTerm; 1564 pTree->nTerm = nTerm; 1565 } 1566 return SQLITE_OK; 1567 } 1568 } 1569 1570 /* If control flows to here, it was not possible to append zTerm to the 1571 ** current node. Create a new node (a right-sibling of the current node). 1572 ** If this is the first node in the tree, the term is added to it. 1573 ** 1574 ** Otherwise, the term is not added to the new node, it is left empty for 1575 ** now. Instead, the term is inserted into the parent of pTree. If pTree 1576 ** has no parent, one is created here. 1577 */ 1578 pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); 1579 if( !pNew ){ 1580 return SQLITE_NOMEM; 1581 } 1582 memset(pNew, 0, sizeof(SegmentNode)); 1583 pNew->nData = 1 + FTS3_VARINT_MAX; 1584 pNew->aData = (char *)&pNew[1]; 1585 1586 if( pTree ){ 1587 SegmentNode *pParent = pTree->pParent; 1588 rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); 1589 if( pTree->pParent==0 ){ 1590 pTree->pParent = pParent; 1591 } 1592 pTree->pRight = pNew; 1593 pNew->pLeftmost = pTree->pLeftmost; 1594 pNew->pParent = pParent; 1595 pNew->zMalloc = pTree->zMalloc; 1596 pNew->nMalloc = pTree->nMalloc; 1597 pTree->zMalloc = 0; 1598 }else{ 1599 pNew->pLeftmost = pNew; 1600 rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); 1601 } 1602 1603 *ppTree = pNew; 1604 return rc; 1605 } 1606 1607 /* 1608 ** Helper function for fts3NodeWrite(). 1609 */ 1610 static int fts3TreeFinishNode( 1611 SegmentNode *pTree, 1612 int iHeight, 1613 sqlite3_int64 iLeftChild 1614 ){ 1615 int nStart; 1616 assert( iHeight>=1 && iHeight<128 ); 1617 nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); 1618 pTree->aData[nStart] = (char)iHeight; 1619 sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); 1620 return nStart; 1621 } 1622 1623 /* 1624 ** Write the buffer for the segment node pTree and all of its peers to the 1625 ** database. Then call this function recursively to write the parent of 1626 ** pTree and its peers to the database. 1627 ** 1628 ** Except, if pTree is a root node, do not write it to the database. Instead, 1629 ** set output variables *paRoot and *pnRoot to contain the root node. 1630 ** 1631 ** If successful, SQLITE_OK is returned and output variable *piLast is 1632 ** set to the largest blockid written to the database (or zero if no 1633 ** blocks were written to the db). Otherwise, an SQLite error code is 1634 ** returned. 1635 */ 1636 static int fts3NodeWrite( 1637 Fts3Table *p, /* Virtual table handle */ 1638 SegmentNode *pTree, /* SegmentNode handle */ 1639 int iHeight, /* Height of this node in tree */ 1640 sqlite3_int64 iLeaf, /* Block id of first leaf node */ 1641 sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ 1642 sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ 1643 char **paRoot, /* OUT: Data for root node */ 1644 int *pnRoot /* OUT: Size of root node in bytes */ 1645 ){ 1646 int rc = SQLITE_OK; 1647 1648 if( !pTree->pParent ){ 1649 /* Root node of the tree. */ 1650 int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); 1651 *piLast = iFree-1; 1652 *pnRoot = pTree->nData - nStart; 1653 *paRoot = &pTree->aData[nStart]; 1654 }else{ 1655 SegmentNode *pIter; 1656 sqlite3_int64 iNextFree = iFree; 1657 sqlite3_int64 iNextLeaf = iLeaf; 1658 for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ 1659 int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); 1660 int nWrite = pIter->nData - nStart; 1661 1662 rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); 1663 iNextFree++; 1664 iNextLeaf += (pIter->nEntry+1); 1665 } 1666 if( rc==SQLITE_OK ){ 1667 assert( iNextLeaf==iFree ); 1668 rc = fts3NodeWrite( 1669 p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot 1670 ); 1671 } 1672 } 1673 1674 return rc; 1675 } 1676 1677 /* 1678 ** Free all memory allocations associated with the tree pTree. 1679 */ 1680 static void fts3NodeFree(SegmentNode *pTree){ 1681 if( pTree ){ 1682 SegmentNode *p = pTree->pLeftmost; 1683 fts3NodeFree(p->pParent); 1684 while( p ){ 1685 SegmentNode *pRight = p->pRight; 1686 if( p->aData!=(char *)&p[1] ){ 1687 sqlite3_free(p->aData); 1688 } 1689 assert( pRight==0 || p->zMalloc==0 ); 1690 sqlite3_free(p->zMalloc); 1691 sqlite3_free(p); 1692 p = pRight; 1693 } 1694 } 1695 } 1696 1697 /* 1698 ** Add a term to the segment being constructed by the SegmentWriter object 1699 ** *ppWriter. When adding the first term to a segment, *ppWriter should 1700 ** be passed NULL. This function will allocate a new SegmentWriter object 1701 ** and return it via the input/output variable *ppWriter in this case. 1702 ** 1703 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. 1704 */ 1705 static int fts3SegWriterAdd( 1706 Fts3Table *p, /* Virtual table handle */ 1707 SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ 1708 int isCopyTerm, /* True if buffer zTerm must be copied */ 1709 const char *zTerm, /* Pointer to buffer containing term */ 1710 int nTerm, /* Size of term in bytes */ 1711 const char *aDoclist, /* Pointer to buffer containing doclist */ 1712 int nDoclist /* Size of doclist in bytes */ 1713 ){ 1714 int nPrefix; /* Size of term prefix in bytes */ 1715 int nSuffix; /* Size of term suffix in bytes */ 1716 int nReq; /* Number of bytes required on leaf page */ 1717 int nData; 1718 SegmentWriter *pWriter = *ppWriter; 1719 1720 if( !pWriter ){ 1721 int rc; 1722 sqlite3_stmt *pStmt; 1723 1724 /* Allocate the SegmentWriter structure */ 1725 pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); 1726 if( !pWriter ) return SQLITE_NOMEM; 1727 memset(pWriter, 0, sizeof(SegmentWriter)); 1728 *ppWriter = pWriter; 1729 1730 /* Allocate a buffer in which to accumulate data */ 1731 pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); 1732 if( !pWriter->aData ) return SQLITE_NOMEM; 1733 pWriter->nSize = p->nNodeSize; 1734 1735 /* Find the next free blockid in the %_segments table */ 1736 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); 1737 if( rc!=SQLITE_OK ) return rc; 1738 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 1739 pWriter->iFree = sqlite3_column_int64(pStmt, 0); 1740 pWriter->iFirst = pWriter->iFree; 1741 } 1742 rc = sqlite3_reset(pStmt); 1743 if( rc!=SQLITE_OK ) return rc; 1744 } 1745 nData = pWriter->nData; 1746 1747 nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); 1748 nSuffix = nTerm-nPrefix; 1749 1750 /* Figure out how many bytes are required by this new entry */ 1751 nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ 1752 sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ 1753 nSuffix + /* Term suffix */ 1754 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ 1755 nDoclist; /* Doclist data */ 1756 1757 if( nData>0 && nData+nReq>p->nNodeSize ){ 1758 int rc; 1759 1760 /* The current leaf node is full. Write it out to the database. */ 1761 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); 1762 if( rc!=SQLITE_OK ) return rc; 1763 1764 /* Add the current term to the interior node tree. The term added to 1765 ** the interior tree must: 1766 ** 1767 ** a) be greater than the largest term on the leaf node just written 1768 ** to the database (still available in pWriter->zTerm), and 1769 ** 1770 ** b) be less than or equal to the term about to be added to the new 1771 ** leaf node (zTerm/nTerm). 1772 ** 1773 ** In other words, it must be the prefix of zTerm 1 byte longer than 1774 ** the common prefix (if any) of zTerm and pWriter->zTerm. 1775 */ 1776 assert( nPrefix<nTerm ); 1777 rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1); 1778 if( rc!=SQLITE_OK ) return rc; 1779 1780 nData = 0; 1781 pWriter->nTerm = 0; 1782 1783 nPrefix = 0; 1784 nSuffix = nTerm; 1785 nReq = 1 + /* varint containing prefix size */ 1786 sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ 1787 nTerm + /* Term suffix */ 1788 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ 1789 nDoclist; /* Doclist data */ 1790 } 1791 1792 /* If the buffer currently allocated is too small for this entry, realloc 1793 ** the buffer to make it large enough. 1794 */ 1795 if( nReq>pWriter->nSize ){ 1796 char *aNew = sqlite3_realloc(pWriter->aData, nReq); 1797 if( !aNew ) return SQLITE_NOMEM; 1798 pWriter->aData = aNew; 1799 pWriter->nSize = nReq; 1800 } 1801 assert( nData+nReq<=pWriter->nSize ); 1802 1803 /* Append the prefix-compressed term and doclist to the buffer. */ 1804 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); 1805 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); 1806 memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); 1807 nData += nSuffix; 1808 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); 1809 memcpy(&pWriter->aData[nData], aDoclist, nDoclist); 1810 pWriter->nData = nData + nDoclist; 1811 1812 /* Save the current term so that it can be used to prefix-compress the next. 1813 ** If the isCopyTerm parameter is true, then the buffer pointed to by 1814 ** zTerm is transient, so take a copy of the term data. Otherwise, just 1815 ** store a copy of the pointer. 1816 */ 1817 if( isCopyTerm ){ 1818 if( nTerm>pWriter->nMalloc ){ 1819 char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); 1820 if( !zNew ){ 1821 return SQLITE_NOMEM; 1822 } 1823 pWriter->nMalloc = nTerm*2; 1824 pWriter->zMalloc = zNew; 1825 pWriter->zTerm = zNew; 1826 } 1827 assert( pWriter->zTerm==pWriter->zMalloc ); 1828 memcpy(pWriter->zTerm, zTerm, nTerm); 1829 }else{ 1830 pWriter->zTerm = (char *)zTerm; 1831 } 1832 pWriter->nTerm = nTerm; 1833 1834 return SQLITE_OK; 1835 } 1836 1837 /* 1838 ** Flush all data associated with the SegmentWriter object pWriter to the 1839 ** database. This function must be called after all terms have been added 1840 ** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is 1841 ** returned. Otherwise, an SQLite error code. 1842 */ 1843 static int fts3SegWriterFlush( 1844 Fts3Table *p, /* Virtual table handle */ 1845 SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ 1846 int iLevel, /* Value for 'level' column of %_segdir */ 1847 int iIdx /* Value for 'idx' column of %_segdir */ 1848 ){ 1849 int rc; /* Return code */ 1850 if( pWriter->pTree ){ 1851 sqlite3_int64 iLast = 0; /* Largest block id written to database */ 1852 sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ 1853 char *zRoot = NULL; /* Pointer to buffer containing root node */ 1854 int nRoot = 0; /* Size of buffer zRoot */ 1855 1856 iLastLeaf = pWriter->iFree; 1857 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); 1858 if( rc==SQLITE_OK ){ 1859 rc = fts3NodeWrite(p, pWriter->pTree, 1, 1860 pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); 1861 } 1862 if( rc==SQLITE_OK ){ 1863 rc = fts3WriteSegdir( 1864 p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot); 1865 } 1866 }else{ 1867 /* The entire tree fits on the root node. Write it to the segdir table. */ 1868 rc = fts3WriteSegdir( 1869 p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData); 1870 } 1871 return rc; 1872 } 1873 1874 /* 1875 ** Release all memory held by the SegmentWriter object passed as the 1876 ** first argument. 1877 */ 1878 static void fts3SegWriterFree(SegmentWriter *pWriter){ 1879 if( pWriter ){ 1880 sqlite3_free(pWriter->aData); 1881 sqlite3_free(pWriter->zMalloc); 1882 fts3NodeFree(pWriter->pTree); 1883 sqlite3_free(pWriter); 1884 } 1885 } 1886 1887 /* 1888 ** The first value in the apVal[] array is assumed to contain an integer. 1889 ** This function tests if there exist any documents with docid values that 1890 ** are different from that integer. i.e. if deleting the document with docid 1891 ** apVal[0] would mean the FTS3 table were empty. 1892 ** 1893 ** If successful, *pisEmpty is set to true if the table is empty except for 1894 ** document apVal[0], or false otherwise, and SQLITE_OK is returned. If an 1895 ** error occurs, an SQLite error code is returned. 1896 */ 1897 static int fts3IsEmpty(Fts3Table *p, sqlite3_value **apVal, int *pisEmpty){ 1898 sqlite3_stmt *pStmt; 1899 int rc; 1900 rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, apVal); 1901 if( rc==SQLITE_OK ){ 1902 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 1903 *pisEmpty = sqlite3_column_int(pStmt, 0); 1904 } 1905 rc = sqlite3_reset(pStmt); 1906 } 1907 return rc; 1908 } 1909 1910 /* 1911 ** Set *pnSegment to the total number of segments in the database. Set 1912 ** *pnMax to the largest segment level in the database (segment levels 1913 ** are stored in the 'level' column of the %_segdir table). 1914 ** 1915 ** Return SQLITE_OK if successful, or an SQLite error code if not. 1916 */ 1917 static int fts3SegmentCountMax(Fts3Table *p, int *pnSegment, int *pnMax){ 1918 sqlite3_stmt *pStmt; 1919 int rc; 1920 1921 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_COUNT_MAX, &pStmt, 0); 1922 if( rc!=SQLITE_OK ) return rc; 1923 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 1924 *pnSegment = sqlite3_column_int(pStmt, 0); 1925 *pnMax = sqlite3_column_int(pStmt, 1); 1926 } 1927 return sqlite3_reset(pStmt); 1928 } 1929 1930 /* 1931 ** This function is used after merging multiple segments into a single large 1932 ** segment to delete the old, now redundant, segment b-trees. Specifically, 1933 ** it: 1934 ** 1935 ** 1) Deletes all %_segments entries for the segments associated with 1936 ** each of the SegReader objects in the array passed as the third 1937 ** argument, and 1938 ** 1939 ** 2) deletes all %_segdir entries with level iLevel, or all %_segdir 1940 ** entries regardless of level if (iLevel<0). 1941 ** 1942 ** SQLITE_OK is returned if successful, otherwise an SQLite error code. 1943 */ 1944 static int fts3DeleteSegdir( 1945 Fts3Table *p, /* Virtual table handle */ 1946 int iLevel, /* Level of %_segdir entries to delete */ 1947 Fts3SegReader **apSegment, /* Array of SegReader objects */ 1948 int nReader /* Size of array apSegment */ 1949 ){ 1950 int rc; /* Return Code */ 1951 int i; /* Iterator variable */ 1952 sqlite3_stmt *pDelete; /* SQL statement to delete rows */ 1953 1954 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); 1955 for(i=0; rc==SQLITE_OK && i<nReader; i++){ 1956 Fts3SegReader *pSegment = apSegment[i]; 1957 if( pSegment->iStartBlock ){ 1958 sqlite3_bind_int64(pDelete, 1, pSegment->iStartBlock); 1959 sqlite3_bind_int64(pDelete, 2, pSegment->iEndBlock); 1960 sqlite3_step(pDelete); 1961 rc = sqlite3_reset(pDelete); 1962 } 1963 } 1964 if( rc!=SQLITE_OK ){ 1965 return rc; 1966 } 1967 1968 if( iLevel==FTS3_SEGCURSOR_ALL ){ 1969 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); 1970 }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ 1971 sqlite3Fts3PendingTermsClear(p); 1972 }else{ 1973 assert( iLevel>=0 ); 1974 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0); 1975 if( rc==SQLITE_OK ){ 1976 sqlite3_bind_int(pDelete, 1, iLevel); 1977 sqlite3_step(pDelete); 1978 rc = sqlite3_reset(pDelete); 1979 } 1980 } 1981 1982 return rc; 1983 } 1984 1985 /* 1986 ** When this function is called, buffer *ppList (size *pnList bytes) contains 1987 ** a position list that may (or may not) feature multiple columns. This 1988 ** function adjusts the pointer *ppList and the length *pnList so that they 1989 ** identify the subset of the position list that corresponds to column iCol. 1990 ** 1991 ** If there are no entries in the input position list for column iCol, then 1992 ** *pnList is set to zero before returning. 1993 */ 1994 static void fts3ColumnFilter( 1995 int iCol, /* Column to filter on */ 1996 char **ppList, /* IN/OUT: Pointer to position list */ 1997 int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ 1998 ){ 1999 char *pList = *ppList; 2000 int nList = *pnList; 2001 char *pEnd = &pList[nList]; 2002 int iCurrent = 0; 2003 char *p = pList; 2004 2005 assert( iCol>=0 ); 2006 while( 1 ){ 2007 char c = 0; 2008 while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80; 2009 2010 if( iCol==iCurrent ){ 2011 nList = (int)(p - pList); 2012 break; 2013 } 2014 2015 nList -= (int)(p - pList); 2016 pList = p; 2017 if( nList==0 ){ 2018 break; 2019 } 2020 p = &pList[1]; 2021 p += sqlite3Fts3GetVarint32(p, &iCurrent); 2022 } 2023 2024 *ppList = pList; 2025 *pnList = nList; 2026 } 2027 2028 int sqlite3Fts3SegReaderStart( 2029 Fts3Table *p, /* Virtual table handle */ 2030 Fts3SegReaderCursor *pCsr, /* Cursor object */ 2031 Fts3SegFilter *pFilter /* Restrictions on range of iteration */ 2032 ){ 2033 int i; 2034 2035 /* Initialize the cursor object */ 2036 pCsr->pFilter = pFilter; 2037 2038 /* If the Fts3SegFilter defines a specific term (or term prefix) to search 2039 ** for, then advance each segment iterator until it points to a term of 2040 ** equal or greater value than the specified term. This prevents many 2041 ** unnecessary merge/sort operations for the case where single segment 2042 ** b-tree leaf nodes contain more than one term. 2043 */ 2044 for(i=0; i<pCsr->nSegment; i++){ 2045 int nTerm = pFilter->nTerm; 2046 const char *zTerm = pFilter->zTerm; 2047 Fts3SegReader *pSeg = pCsr->apSegment[i]; 2048 do { 2049 int rc = fts3SegReaderNext(p, pSeg); 2050 if( rc!=SQLITE_OK ) return rc; 2051 }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ); 2052 } 2053 fts3SegReaderSort( 2054 pCsr->apSegment, pCsr->nSegment, pCsr->nSegment, fts3SegReaderCmp); 2055 2056 return SQLITE_OK; 2057 } 2058 2059 int sqlite3Fts3SegReaderStep( 2060 Fts3Table *p, /* Virtual table handle */ 2061 Fts3SegReaderCursor *pCsr /* Cursor object */ 2062 ){ 2063 int rc = SQLITE_OK; 2064 2065 int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); 2066 int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); 2067 int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); 2068 int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); 2069 int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); 2070 2071 Fts3SegReader **apSegment = pCsr->apSegment; 2072 int nSegment = pCsr->nSegment; 2073 Fts3SegFilter *pFilter = pCsr->pFilter; 2074 2075 if( pCsr->nSegment==0 ) return SQLITE_OK; 2076 2077 do { 2078 int nMerge; 2079 int i; 2080 2081 /* Advance the first pCsr->nAdvance entries in the apSegment[] array 2082 ** forward. Then sort the list in order of current term again. 2083 */ 2084 for(i=0; i<pCsr->nAdvance; i++){ 2085 rc = fts3SegReaderNext(p, apSegment[i]); 2086 if( rc!=SQLITE_OK ) return rc; 2087 } 2088 fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); 2089 pCsr->nAdvance = 0; 2090 2091 /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ 2092 assert( rc==SQLITE_OK ); 2093 if( apSegment[0]->aNode==0 ) break; 2094 2095 pCsr->nTerm = apSegment[0]->nTerm; 2096 pCsr->zTerm = apSegment[0]->zTerm; 2097 2098 /* If this is a prefix-search, and if the term that apSegment[0] points 2099 ** to does not share a suffix with pFilter->zTerm/nTerm, then all 2100 ** required callbacks have been made. In this case exit early. 2101 ** 2102 ** Similarly, if this is a search for an exact match, and the first term 2103 ** of segment apSegment[0] is not a match, exit early. 2104 */ 2105 if( pFilter->zTerm && !isScan ){ 2106 if( pCsr->nTerm<pFilter->nTerm 2107 || (!isPrefix && pCsr->nTerm>pFilter->nTerm) 2108 || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) 2109 ){ 2110 break; 2111 } 2112 } 2113 2114 nMerge = 1; 2115 while( nMerge<nSegment 2116 && apSegment[nMerge]->aNode 2117 && apSegment[nMerge]->nTerm==pCsr->nTerm 2118 && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) 2119 ){ 2120 nMerge++; 2121 } 2122 2123 assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); 2124 if( nMerge==1 && !isIgnoreEmpty ){ 2125 pCsr->aDoclist = apSegment[0]->aDoclist; 2126 pCsr->nDoclist = apSegment[0]->nDoclist; 2127 rc = SQLITE_ROW; 2128 }else{ 2129 int nDoclist = 0; /* Size of doclist */ 2130 sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ 2131 2132 /* The current term of the first nMerge entries in the array 2133 ** of Fts3SegReader objects is the same. The doclists must be merged 2134 ** and a single term returned with the merged doclist. 2135 */ 2136 for(i=0; i<nMerge; i++){ 2137 fts3SegReaderFirstDocid(apSegment[i]); 2138 } 2139 fts3SegReaderSort(apSegment, nMerge, nMerge, fts3SegReaderDoclistCmp); 2140 while( apSegment[0]->pOffsetList ){ 2141 int j; /* Number of segments that share a docid */ 2142 char *pList; 2143 int nList; 2144 int nByte; 2145 sqlite3_int64 iDocid = apSegment[0]->iDocid; 2146 fts3SegReaderNextDocid(apSegment[0], &pList, &nList); 2147 j = 1; 2148 while( j<nMerge 2149 && apSegment[j]->pOffsetList 2150 && apSegment[j]->iDocid==iDocid 2151 ){ 2152 fts3SegReaderNextDocid(apSegment[j], 0, 0); 2153 j++; 2154 } 2155 2156 if( isColFilter ){ 2157 fts3ColumnFilter(pFilter->iCol, &pList, &nList); 2158 } 2159 2160 if( !isIgnoreEmpty || nList>0 ){ 2161 nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0); 2162 if( nDoclist+nByte>pCsr->nBuffer ){ 2163 char *aNew; 2164 pCsr->nBuffer = (nDoclist+nByte)*2; 2165 aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); 2166 if( !aNew ){ 2167 return SQLITE_NOMEM; 2168 } 2169 pCsr->aBuffer = aNew; 2170 } 2171 nDoclist += sqlite3Fts3PutVarint( 2172 &pCsr->aBuffer[nDoclist], iDocid-iPrev 2173 ); 2174 iPrev = iDocid; 2175 if( isRequirePos ){ 2176 memcpy(&pCsr->aBuffer[nDoclist], pList, nList); 2177 nDoclist += nList; 2178 pCsr->aBuffer[nDoclist++] = '\0'; 2179 } 2180 } 2181 2182 fts3SegReaderSort(apSegment, nMerge, j, fts3SegReaderDoclistCmp); 2183 } 2184 if( nDoclist>0 ){ 2185 pCsr->aDoclist = pCsr->aBuffer; 2186 pCsr->nDoclist = nDoclist; 2187 rc = SQLITE_ROW; 2188 } 2189 } 2190 pCsr->nAdvance = nMerge; 2191 }while( rc==SQLITE_OK ); 2192 2193 return rc; 2194 } 2195 2196 void sqlite3Fts3SegReaderFinish( 2197 Fts3SegReaderCursor *pCsr /* Cursor object */ 2198 ){ 2199 if( pCsr ){ 2200 int i; 2201 for(i=0; i<pCsr->nSegment; i++){ 2202 sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); 2203 } 2204 sqlite3_free(pCsr->apSegment); 2205 sqlite3_free(pCsr->aBuffer); 2206 2207 pCsr->nSegment = 0; 2208 pCsr->apSegment = 0; 2209 pCsr->aBuffer = 0; 2210 } 2211 } 2212 2213 /* 2214 ** Merge all level iLevel segments in the database into a single 2215 ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a 2216 ** single segment with a level equal to the numerically largest level 2217 ** currently present in the database. 2218 ** 2219 ** If this function is called with iLevel<0, but there is only one 2220 ** segment in the database, SQLITE_DONE is returned immediately. 2221 ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, 2222 ** an SQLite error code is returned. 2223 */ 2224 static int fts3SegmentMerge(Fts3Table *p, int iLevel){ 2225 int rc; /* Return code */ 2226 int iIdx = 0; /* Index of new segment */ 2227 int iNewLevel = 0; /* Level to create new segment at */ 2228 SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ 2229 Fts3SegFilter filter; /* Segment term filter condition */ 2230 Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */ 2231 2232 rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, 0, &csr); 2233 if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; 2234 2235 if( iLevel==FTS3_SEGCURSOR_ALL ){ 2236 /* This call is to merge all segments in the database to a single 2237 ** segment. The level of the new segment is equal to the the numerically 2238 ** greatest segment level currently present in the database. The index 2239 ** of the new segment is always 0. */ 2240 int nDummy; /* TODO: Remove this */ 2241 if( csr.nSegment==1 ){ 2242 rc = SQLITE_DONE; 2243 goto finished; 2244 } 2245 rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel); 2246 }else{ 2247 /* This call is to merge all segments at level iLevel. Find the next 2248 ** available segment index at level iLevel+1. The call to 2249 ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 2250 ** a single iLevel+2 segment if necessary. */ 2251 iNewLevel = iLevel+1; 2252 rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx); 2253 } 2254 if( rc!=SQLITE_OK ) goto finished; 2255 assert( csr.nSegment>0 ); 2256 assert( iNewLevel>=0 ); 2257 2258 memset(&filter, 0, sizeof(Fts3SegFilter)); 2259 filter.flags = FTS3_SEGMENT_REQUIRE_POS; 2260 filter.flags |= (iLevel==FTS3_SEGCURSOR_ALL ? FTS3_SEGMENT_IGNORE_EMPTY : 0); 2261 2262 rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); 2263 while( SQLITE_OK==rc ){ 2264 rc = sqlite3Fts3SegReaderStep(p, &csr); 2265 if( rc!=SQLITE_ROW ) break; 2266 rc = fts3SegWriterAdd(p, &pWriter, 1, 2267 csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); 2268 } 2269 if( rc!=SQLITE_OK ) goto finished; 2270 assert( pWriter ); 2271 2272 rc = fts3DeleteSegdir(p, iLevel, csr.apSegment, csr.nSegment); 2273 if( rc!=SQLITE_OK ) goto finished; 2274 rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); 2275 2276 finished: 2277 fts3SegWriterFree(pWriter); 2278 sqlite3Fts3SegReaderFinish(&csr); 2279 return rc; 2280 } 2281 2282 2283 /* 2284 ** Flush the contents of pendingTerms to a level 0 segment. 2285 */ 2286 int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ 2287 return fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING); 2288 } 2289 2290 /* 2291 ** Encode N integers as varints into a blob. 2292 */ 2293 static void fts3EncodeIntArray( 2294 int N, /* The number of integers to encode */ 2295 u32 *a, /* The integer values */ 2296 char *zBuf, /* Write the BLOB here */ 2297 int *pNBuf /* Write number of bytes if zBuf[] used here */ 2298 ){ 2299 int i, j; 2300 for(i=j=0; i<N; i++){ 2301 j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]); 2302 } 2303 *pNBuf = j; 2304 } 2305 2306 /* 2307 ** Decode a blob of varints into N integers 2308 */ 2309 static void fts3DecodeIntArray( 2310 int N, /* The number of integers to decode */ 2311 u32 *a, /* Write the integer values */ 2312 const char *zBuf, /* The BLOB containing the varints */ 2313 int nBuf /* size of the BLOB */ 2314 ){ 2315 int i, j; 2316 UNUSED_PARAMETER(nBuf); 2317 for(i=j=0; i<N; i++){ 2318 sqlite3_int64 x; 2319 j += sqlite3Fts3GetVarint(&zBuf[j], &x); 2320 assert(j<=nBuf); 2321 a[i] = (u32)(x & 0xffffffff); 2322 } 2323 } 2324 2325 /* 2326 ** Insert the sizes (in tokens) for each column of the document 2327 ** with docid equal to p->iPrevDocid. The sizes are encoded as 2328 ** a blob of varints. 2329 */ 2330 static void fts3InsertDocsize( 2331 int *pRC, /* Result code */ 2332 Fts3Table *p, /* Table into which to insert */ 2333 u32 *aSz /* Sizes of each column */ 2334 ){ 2335 char *pBlob; /* The BLOB encoding of the document size */ 2336 int nBlob; /* Number of bytes in the BLOB */ 2337 sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ 2338 int rc; /* Result code from subfunctions */ 2339 2340 if( *pRC ) return; 2341 pBlob = sqlite3_malloc( 10*p->nColumn ); 2342 if( pBlob==0 ){ 2343 *pRC = SQLITE_NOMEM; 2344 return; 2345 } 2346 fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); 2347 rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); 2348 if( rc ){ 2349 sqlite3_free(pBlob); 2350 *pRC = rc; 2351 return; 2352 } 2353 sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); 2354 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); 2355 sqlite3_step(pStmt); 2356 *pRC = sqlite3_reset(pStmt); 2357 } 2358 2359 /* 2360 ** Record 0 of the %_stat table contains a blob consisting of N varints, 2361 ** where N is the number of user defined columns in the fts3 table plus 2362 ** two. If nCol is the number of user defined columns, then values of the 2363 ** varints are set as follows: 2364 ** 2365 ** Varint 0: Total number of rows in the table. 2366 ** 2367 ** Varint 1..nCol: For each column, the total number of tokens stored in 2368 ** the column for all rows of the table. 2369 ** 2370 ** Varint 1+nCol: The total size, in bytes, of all text values in all 2371 ** columns of all rows of the table. 2372 ** 2373 */ 2374 static void fts3UpdateDocTotals( 2375 int *pRC, /* The result code */ 2376 Fts3Table *p, /* Table being updated */ 2377 u32 *aSzIns, /* Size increases */ 2378 u32 *aSzDel, /* Size decreases */ 2379 int nChng /* Change in the number of documents */ 2380 ){ 2381 char *pBlob; /* Storage for BLOB written into %_stat */ 2382 int nBlob; /* Size of BLOB written into %_stat */ 2383 u32 *a; /* Array of integers that becomes the BLOB */ 2384 sqlite3_stmt *pStmt; /* Statement for reading and writing */ 2385 int i; /* Loop counter */ 2386 int rc; /* Result code from subfunctions */ 2387 2388 const int nStat = p->nColumn+2; 2389 2390 if( *pRC ) return; 2391 a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); 2392 if( a==0 ){ 2393 *pRC = SQLITE_NOMEM; 2394 return; 2395 } 2396 pBlob = (char*)&a[nStat]; 2397 rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0); 2398 if( rc ){ 2399 sqlite3_free(a); 2400 *pRC = rc; 2401 return; 2402 } 2403 if( sqlite3_step(pStmt)==SQLITE_ROW ){ 2404 fts3DecodeIntArray(nStat, a, 2405 sqlite3_column_blob(pStmt, 0), 2406 sqlite3_column_bytes(pStmt, 0)); 2407 }else{ 2408 memset(a, 0, sizeof(u32)*(nStat) ); 2409 } 2410 sqlite3_reset(pStmt); 2411 if( nChng<0 && a[0]<(u32)(-nChng) ){ 2412 a[0] = 0; 2413 }else{ 2414 a[0] += nChng; 2415 } 2416 for(i=0; i<p->nColumn+1; i++){ 2417 u32 x = a[i+1]; 2418 if( x+aSzIns[i] < aSzDel[i] ){ 2419 x = 0; 2420 }else{ 2421 x = x + aSzIns[i] - aSzDel[i]; 2422 } 2423 a[i+1] = x; 2424 } 2425 fts3EncodeIntArray(nStat, a, pBlob, &nBlob); 2426 rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0); 2427 if( rc ){ 2428 sqlite3_free(a); 2429 *pRC = rc; 2430 return; 2431 } 2432 sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC); 2433 sqlite3_step(pStmt); 2434 *pRC = sqlite3_reset(pStmt); 2435 sqlite3_free(a); 2436 } 2437 2438 /* 2439 ** Handle a 'special' INSERT of the form: 2440 ** 2441 ** "INSERT INTO tbl(tbl) VALUES(<expr>)" 2442 ** 2443 ** Argument pVal contains the result of <expr>. Currently the only 2444 ** meaningful value to insert is the text 'optimize'. 2445 */ 2446 static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ 2447 int rc; /* Return Code */ 2448 const char *zVal = (const char *)sqlite3_value_text(pVal); 2449 int nVal = sqlite3_value_bytes(pVal); 2450 2451 if( !zVal ){ 2452 return SQLITE_NOMEM; 2453 }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ 2454 rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL); 2455 if( rc==SQLITE_DONE ){ 2456 rc = SQLITE_OK; 2457 }else{ 2458 sqlite3Fts3PendingTermsClear(p); 2459 } 2460 #ifdef SQLITE_TEST 2461 }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ 2462 p->nNodeSize = atoi(&zVal[9]); 2463 rc = SQLITE_OK; 2464 }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ 2465 p->nMaxPendingData = atoi(&zVal[11]); 2466 rc = SQLITE_OK; 2467 #endif 2468 }else{ 2469 rc = SQLITE_ERROR; 2470 } 2471 2472 sqlite3Fts3SegmentsClose(p); 2473 return rc; 2474 } 2475 2476 /* 2477 ** Return the deferred doclist associated with deferred token pDeferred. 2478 ** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already 2479 ** been called to allocate and populate the doclist. 2480 */ 2481 char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *pDeferred, int *pnByte){ 2482 if( pDeferred->pList ){ 2483 *pnByte = pDeferred->pList->nData; 2484 return pDeferred->pList->aData; 2485 } 2486 *pnByte = 0; 2487 return 0; 2488 } 2489 2490 /* 2491 ** Helper fucntion for FreeDeferredDoclists(). This function removes all 2492 ** references to deferred doclists from within the tree of Fts3Expr 2493 ** structures headed by 2494 */ 2495 static void fts3DeferredDoclistClear(Fts3Expr *pExpr){ 2496 if( pExpr ){ 2497 fts3DeferredDoclistClear(pExpr->pLeft); 2498 fts3DeferredDoclistClear(pExpr->pRight); 2499 if( pExpr->isLoaded ){ 2500 sqlite3_free(pExpr->aDoclist); 2501 pExpr->isLoaded = 0; 2502 pExpr->aDoclist = 0; 2503 pExpr->nDoclist = 0; 2504 pExpr->pCurrent = 0; 2505 pExpr->iCurrent = 0; 2506 } 2507 } 2508 } 2509 2510 /* 2511 ** Delete all cached deferred doclists. Deferred doclists are cached 2512 ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. 2513 */ 2514 void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ 2515 Fts3DeferredToken *pDef; 2516 for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ 2517 sqlite3_free(pDef->pList); 2518 pDef->pList = 0; 2519 } 2520 if( pCsr->pDeferred ){ 2521 fts3DeferredDoclistClear(pCsr->pExpr); 2522 } 2523 } 2524 2525 /* 2526 ** Free all entries in the pCsr->pDeffered list. Entries are added to 2527 ** this list using sqlite3Fts3DeferToken(). 2528 */ 2529 void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ 2530 Fts3DeferredToken *pDef; 2531 Fts3DeferredToken *pNext; 2532 for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ 2533 pNext = pDef->pNext; 2534 sqlite3_free(pDef->pList); 2535 sqlite3_free(pDef); 2536 } 2537 pCsr->pDeferred = 0; 2538 } 2539 2540 /* 2541 ** Generate deferred-doclists for all tokens in the pCsr->pDeferred list 2542 ** based on the row that pCsr currently points to. 2543 ** 2544 ** A deferred-doclist is like any other doclist with position information 2545 ** included, except that it only contains entries for a single row of the 2546 ** table, not for all rows. 2547 */ 2548 int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ 2549 int rc = SQLITE_OK; /* Return code */ 2550 if( pCsr->pDeferred ){ 2551 int i; /* Used to iterate through table columns */ 2552 sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ 2553 Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ 2554 2555 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; 2556 sqlite3_tokenizer *pT = p->pTokenizer; 2557 sqlite3_tokenizer_module const *pModule = pT->pModule; 2558 2559 assert( pCsr->isRequireSeek==0 ); 2560 iDocid = sqlite3_column_int64(pCsr->pStmt, 0); 2561 2562 for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ 2563 const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); 2564 sqlite3_tokenizer_cursor *pTC = 0; 2565 2566 rc = pModule->xOpen(pT, zText, -1, &pTC); 2567 while( rc==SQLITE_OK ){ 2568 char const *zToken; /* Buffer containing token */ 2569 int nToken; /* Number of bytes in token */ 2570 int iDum1, iDum2; /* Dummy variables */ 2571 int iPos; /* Position of token in zText */ 2572 2573 pTC->pTokenizer = pT; 2574 rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); 2575 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ 2576 Fts3PhraseToken *pPT = pDef->pToken; 2577 if( (pDef->iCol>=p->nColumn || pDef->iCol==i) 2578 && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) 2579 && (0==memcmp(zToken, pPT->z, pPT->n)) 2580 ){ 2581 fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); 2582 } 2583 } 2584 } 2585 if( pTC ) pModule->xClose(pTC); 2586 if( rc==SQLITE_DONE ) rc = SQLITE_OK; 2587 } 2588 2589 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ 2590 if( pDef->pList ){ 2591 rc = fts3PendingListAppendVarint(&pDef->pList, 0); 2592 } 2593 } 2594 } 2595 2596 return rc; 2597 } 2598 2599 /* 2600 ** Add an entry for token pToken to the pCsr->pDeferred list. 2601 */ 2602 int sqlite3Fts3DeferToken( 2603 Fts3Cursor *pCsr, /* Fts3 table cursor */ 2604 Fts3PhraseToken *pToken, /* Token to defer */ 2605 int iCol /* Column that token must appear in (or -1) */ 2606 ){ 2607 Fts3DeferredToken *pDeferred; 2608 pDeferred = sqlite3_malloc(sizeof(*pDeferred)); 2609 if( !pDeferred ){ 2610 return SQLITE_NOMEM; 2611 } 2612 memset(pDeferred, 0, sizeof(*pDeferred)); 2613 pDeferred->pToken = pToken; 2614 pDeferred->pNext = pCsr->pDeferred; 2615 pDeferred->iCol = iCol; 2616 pCsr->pDeferred = pDeferred; 2617 2618 assert( pToken->pDeferred==0 ); 2619 pToken->pDeferred = pDeferred; 2620 2621 return SQLITE_OK; 2622 } 2623 2624 2625 /* 2626 ** This function does the work for the xUpdate method of FTS3 virtual 2627 ** tables. 2628 */ 2629 int sqlite3Fts3UpdateMethod( 2630 sqlite3_vtab *pVtab, /* FTS3 vtab object */ 2631 int nArg, /* Size of argument array */ 2632 sqlite3_value **apVal, /* Array of arguments */ 2633 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ 2634 ){ 2635 Fts3Table *p = (Fts3Table *)pVtab; 2636 int rc = SQLITE_OK; /* Return Code */ 2637 int isRemove = 0; /* True for an UPDATE or DELETE */ 2638 sqlite3_int64 iRemove = 0; /* Rowid removed by UPDATE or DELETE */ 2639 u32 *aSzIns; /* Sizes of inserted documents */ 2640 u32 *aSzDel; /* Sizes of deleted documents */ 2641 int nChng = 0; /* Net change in number of documents */ 2642 2643 assert( p->pSegments==0 ); 2644 2645 /* Allocate space to hold the change in document sizes */ 2646 aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 ); 2647 if( aSzIns==0 ) return SQLITE_NOMEM; 2648 aSzDel = &aSzIns[p->nColumn+1]; 2649 memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2); 2650 2651 /* If this is a DELETE or UPDATE operation, remove the old record. */ 2652 if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ 2653 int isEmpty = 0; 2654 rc = fts3IsEmpty(p, apVal, &isEmpty); 2655 if( rc==SQLITE_OK ){ 2656 if( isEmpty ){ 2657 /* Deleting this row means the whole table is empty. In this case 2658 ** delete the contents of all three tables and throw away any 2659 ** data in the pendingTerms hash table. 2660 */ 2661 rc = fts3DeleteAll(p); 2662 }else{ 2663 isRemove = 1; 2664 iRemove = sqlite3_value_int64(apVal[0]); 2665 rc = fts3PendingTermsDocid(p, iRemove); 2666 fts3DeleteTerms(&rc, p, apVal, aSzDel); 2667 fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, apVal); 2668 if( p->bHasDocsize ){ 2669 fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, apVal); 2670 } 2671 nChng--; 2672 } 2673 } 2674 }else if( sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){ 2675 sqlite3_free(aSzIns); 2676 return fts3SpecialInsert(p, apVal[p->nColumn+2]); 2677 } 2678 2679 /* If this is an INSERT or UPDATE operation, insert the new record. */ 2680 if( nArg>1 && rc==SQLITE_OK ){ 2681 rc = fts3InsertData(p, apVal, pRowid); 2682 if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){ 2683 rc = fts3PendingTermsDocid(p, *pRowid); 2684 } 2685 if( rc==SQLITE_OK ){ 2686 rc = fts3InsertTerms(p, apVal, aSzIns); 2687 } 2688 if( p->bHasDocsize ){ 2689 fts3InsertDocsize(&rc, p, aSzIns); 2690 } 2691 nChng++; 2692 } 2693 2694 if( p->bHasStat ){ 2695 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); 2696 } 2697 2698 sqlite3_free(aSzIns); 2699 sqlite3Fts3SegmentsClose(p); 2700 return rc; 2701 } 2702 2703 /* 2704 ** Flush any data in the pending-terms hash table to disk. If successful, 2705 ** merge all segments in the database (including the new segment, if 2706 ** there was any data to flush) into a single segment. 2707 */ 2708 int sqlite3Fts3Optimize(Fts3Table *p){ 2709 int rc; 2710 rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); 2711 if( rc==SQLITE_OK ){ 2712 rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL); 2713 if( rc==SQLITE_OK ){ 2714 rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); 2715 if( rc==SQLITE_OK ){ 2716 sqlite3Fts3PendingTermsClear(p); 2717 } 2718 }else{ 2719 sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); 2720 sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); 2721 } 2722 } 2723 sqlite3Fts3SegmentsClose(p); 2724 return rc; 2725 } 2726 2727 #endif 2728