1 /* 2 ** 2012 Jan 11 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 */ 11 /* TODO(shess): THIS MODULE IS STILL EXPERIMENTAL. DO NOT USE IT. */ 12 /* Implements a virtual table "recover" which can be used to recover 13 * data from a corrupt table. The table is walked manually, with 14 * corrupt items skipped. Additionally, any errors while reading will 15 * be skipped. 16 * 17 * Given a table with this definition: 18 * 19 * CREATE TABLE Stuff ( 20 * name TEXT PRIMARY KEY, 21 * value TEXT NOT NULL 22 * ); 23 * 24 * to recover the data from teh table, you could do something like: 25 * 26 * -- Attach another database, the original is not trustworthy. 27 * ATTACH DATABASE '/tmp/db.db' AS rdb; 28 * -- Create a new version of the table. 29 * CREATE TABLE rdb.Stuff ( 30 * name TEXT PRIMARY KEY, 31 * value TEXT NOT NULL 32 * ); 33 * -- This will read the original table's data. 34 * CREATE VIRTUAL TABLE temp.recover_Stuff using recover( 35 * main.Stuff, 36 * name TEXT STRICT NOT NULL, -- only real TEXT data allowed 37 * value TEXT STRICT NOT NULL 38 * ); 39 * -- Corruption means the UNIQUE constraint may no longer hold for 40 * -- Stuff, so either OR REPLACE or OR IGNORE must be used. 41 * INSERT OR REPLACE INTO rdb.Stuff (rowid, name, value ) 42 * SELECT rowid, name, value FROM temp.recover_Stuff; 43 * DROP TABLE temp.recover_Stuff; 44 * DETACH DATABASE rdb; 45 * -- Move db.db to replace original db in filesystem. 46 * 47 * 48 * Usage 49 * 50 * Given the goal of dealing with corruption, it would not be safe to 51 * create a recovery table in the database being recovered. So 52 * recovery tables must be created in the temp database. They are not 53 * appropriate to persist, in any case. [As a bonus, sqlite_master 54 * tables can be recovered. Perhaps more cute than useful, though.] 55 * 56 * The parameters are a specifier for the table to read, and a column 57 * definition for each bit of data stored in that table. The named 58 * table must be convertable to a root page number by reading the 59 * sqlite_master table. Bare table names are assumed to be in 60 * database 0 ("main"), other databases can be specified in db.table 61 * fashion. 62 * 63 * Column definitions are similar to BUT NOT THE SAME AS those 64 * provided to CREATE statements: 65 * column-def: column-name [type-name [STRICT] [NOT NULL]] 66 * type-name: (ANY|ROWID|INTEGER|FLOAT|NUMERIC|TEXT|BLOB) 67 * 68 * Only those exact type names are accepted, there is no type 69 * intuition. The only constraints accepted are STRICT (see below) 70 * and NOT NULL. Anything unexpected will cause the create to fail. 71 * 72 * ANY is a convenience to indicate that manifest typing is desired. 73 * It is equivalent to not specifying a type at all. The results for 74 * such columns will have the type of the data's storage. The exposed 75 * schema will contain no type for that column. 76 * 77 * ROWID is used for columns representing aliases to the rowid 78 * (INTEGER PRIMARY KEY, with or without AUTOINCREMENT), to make the 79 * concept explicit. Such columns are actually stored as NULL, so 80 * they cannot be simply ignored. The exposed schema will be INTEGER 81 * for that column. 82 * 83 * NOT NULL causes rows with a NULL in that column to be skipped. It 84 * also adds NOT NULL to the column in the exposed schema. If the 85 * table has ever had columns added using ALTER TABLE, then those 86 * columns implicitly contain NULL for rows which have not been 87 * updated. [Workaround using COALESCE() in your SELECT statement.] 88 * 89 * The created table is read-only, with no indices. Any SELECT will 90 * be a full-table scan, returning each valid row read from the 91 * storage of the backing table. The rowid will be the rowid of the 92 * row from the backing table. "Valid" means: 93 * - The cell metadata for the row is well-formed. Mainly this means that 94 * the cell header info describes a payload of the size indicated by 95 * the cell's payload size. 96 * - The cell does not run off the page. 97 * - The cell does not overlap any other cell on the page. 98 * - The cell contains doesn't contain too many columns. 99 * - The types of the serialized data match the indicated types (see below). 100 * 101 * 102 * Type affinity versus type storage. 103 * 104 * http://www.sqlite.org/datatype3.html describes SQLite's type 105 * affinity system. The system provides for automated coercion of 106 * types in certain cases, transparently enough that many developers 107 * do not realize that it is happening. Importantly, it implies that 108 * the raw data stored in the database may not have the obvious type. 109 * 110 * Differences between the stored data types and the expected data 111 * types may be a signal of corruption. This module makes some 112 * allowances for automatic coercion. It is important to be concious 113 * of the difference between the schema exposed by the module, and the 114 * data types read from storage. The following table describes how 115 * the module interprets things: 116 * 117 * type schema data STRICT 118 * ---- ------ ---- ------ 119 * ANY <none> any any 120 * ROWID INTEGER n/a n/a 121 * INTEGER INTEGER integer integer 122 * FLOAT FLOAT integer or float float 123 * NUMERIC NUMERIC integer, float, or text integer or float 124 * TEXT TEXT text or blob text 125 * BLOB BLOB blob blob 126 * 127 * type is the type provided to the recover module, schema is the 128 * schema exposed by the module, data is the acceptable types of data 129 * decoded from storage, and STRICT is a modification of that. 130 * 131 * A very loose recovery system might use ANY for all columns, then 132 * use the appropriate sqlite3_column_*() calls to coerce to expected 133 * types. This doesn't provide much protection if a page from a 134 * different table with the same column count is linked into an 135 * inappropriate btree. 136 * 137 * A very tight recovery system might use STRICT to enforce typing on 138 * all columns, preferring to skip rows which are valid at the storage 139 * level but don't contain the right types. Note that FLOAT STRICT is 140 * almost certainly not appropriate, since integral values are 141 * transparently stored as integers, when that is more efficient. 142 * 143 * Another option is to use ANY for all columns and inspect each 144 * result manually (using sqlite3_column_*). This should only be 145 * necessary in cases where developers have used manifest typing (test 146 * to make sure before you decide that you aren't using manifest 147 * typing!). 148 * 149 * 150 * Caveats 151 * 152 * Leaf pages not referenced by interior nodes will not be found. 153 * 154 * Leaf pages referenced from interior nodes of other tables will not 155 * be resolved. 156 * 157 * Rows referencing invalid overflow pages will be skipped. 158 * 159 * SQlite rows have a header which describes how to interpret the rest 160 * of the payload. The header can be valid in cases where the rest of 161 * the record is actually corrupt (in the sense that the data is not 162 * the intended data). This can especially happen WRT overflow pages, 163 * as lack of atomic updates between pages is the primary form of 164 * corruption I have seen in the wild. 165 */ 166 /* The implementation is via a series of cursors. The cursor 167 * implementations follow the pattern: 168 * 169 * // Creates the cursor using various initialization info. 170 * int cursorCreate(...); 171 * 172 * // Returns 1 if there is no more data, 0 otherwise. 173 * int cursorEOF(Cursor *pCursor); 174 * 175 * // Various accessors can be used if not at EOF. 176 * 177 * // Move to the next item. 178 * int cursorNext(Cursor *pCursor); 179 * 180 * // Destroy the memory associated with the cursor. 181 * void cursorDestroy(Cursor *pCursor); 182 * 183 * References in the following are to sections at 184 * http://www.sqlite.org/fileformat2.html . 185 * 186 * RecoverLeafCursor iterates the records in a leaf table node 187 * described in section 1.5 "B-tree Pages". When the node is 188 * exhausted, an interior cursor is used to get the next leaf node, 189 * and iteration continues there. 190 * 191 * RecoverInteriorCursor iterates the child pages in an interior table 192 * node described in section 1.5 "B-tree Pages". When the node is 193 * exhausted, a parent interior cursor is used to get the next 194 * interior node at the same level, and iteration continues there. 195 * 196 * Together these record the path from the leaf level to the root of 197 * the tree. Iteration happens from the leaves rather than the root 198 * both for efficiency and putting the special case at the front of 199 * the list is easier to implement. 200 * 201 * RecoverCursor uses a RecoverLeafCursor to iterate the rows of a 202 * table, returning results via the SQLite virtual table interface. 203 */ 204 /* TODO(shess): It might be useful to allow DEFAULT in types to 205 * specify what to do for NULL when an ALTER TABLE case comes up. 206 * Unfortunately, simply adding it to the exposed schema and using 207 * sqlite3_result_null() does not cause the default to be generate. 208 * Handling it ourselves seems hard, unfortunately. 209 */ 210 211 #include <assert.h> 212 #include <ctype.h> 213 #include <stdio.h> 214 #include <string.h> 215 216 /* Internal SQLite things that are used: 217 * u32, u64, i64 types. 218 * Btree, Pager, and DbPage structs. 219 * DbPage.pData, .pPager, and .pgno 220 * sqlite3 struct. 221 * sqlite3BtreePager() and sqlite3BtreeGetPageSize() 222 * sqlite3PagerAcquire() and sqlite3PagerUnref() 223 * getVarint(). 224 */ 225 #include "sqliteInt.h" 226 227 /* For debugging. */ 228 #if 0 229 #define FNENTRY() fprintf(stderr, "In %s\n", __FUNCTION__) 230 #else 231 #define FNENTRY() 232 #endif 233 234 /* Generic constants and helper functions. */ 235 236 static const unsigned char kTableLeafPage = 0x0D; 237 static const unsigned char kTableInteriorPage = 0x05; 238 239 /* From section 1.5. */ 240 static const unsigned kiPageTypeOffset = 0; 241 static const unsigned kiPageFreeBlockOffset = 1; 242 static const unsigned kiPageCellCountOffset = 3; 243 static const unsigned kiPageCellContentOffset = 5; 244 static const unsigned kiPageFragmentedBytesOffset = 7; 245 static const unsigned knPageLeafHeaderBytes = 8; 246 /* Interior pages contain an additional field. */ 247 static const unsigned kiPageRightChildOffset = 8; 248 static const unsigned kiPageInteriorHeaderBytes = 12; 249 250 /* Accepted types are specified by a mask. */ 251 #define MASK_ROWID (1<<0) 252 #define MASK_INTEGER (1<<1) 253 #define MASK_FLOAT (1<<2) 254 #define MASK_TEXT (1<<3) 255 #define MASK_BLOB (1<<4) 256 #define MASK_NULL (1<<5) 257 258 /* Helpers to decode fixed-size fields. */ 259 static u32 decodeUnsigned16(const unsigned char *pData){ 260 return (pData[0]<<8) + pData[1]; 261 } 262 static u32 decodeUnsigned32(const unsigned char *pData){ 263 return (decodeUnsigned16(pData)<<16) + decodeUnsigned16(pData+2); 264 } 265 static i64 decodeSigned(const unsigned char *pData, unsigned nBytes){ 266 i64 r = (char)(*pData); 267 while( --nBytes ){ 268 r <<= 8; 269 r += *(++pData); 270 } 271 return r; 272 } 273 /* Derived from vdbeaux.c, sqlite3VdbeSerialGet(), case 7. */ 274 /* TODO(shess): Determine if swapMixedEndianFloat() applies. */ 275 static double decodeFloat64(const unsigned char *pData){ 276 #if !defined(NDEBUG) 277 static const u64 t1 = ((u64)0x3ff00000)<<32; 278 static const double r1 = 1.0; 279 u64 t2 = t1; 280 assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 ); 281 #endif 282 i64 x = decodeSigned(pData, 8); 283 double d; 284 memcpy(&d, &x, sizeof(x)); 285 return d; 286 } 287 288 /* Return true if a varint can safely be read from pData/nData. */ 289 /* TODO(shess): DbPage points into the middle of a buffer which 290 * contains the page data before DbPage. So code should always be 291 * able to read a small number of varints safely. Consider whether to 292 * trust that or not. 293 */ 294 static int checkVarint(const unsigned char *pData, unsigned nData){ 295 unsigned i; 296 297 /* In the worst case the decoder takes all 8 bits of the 9th byte. */ 298 if( nData>=9 ){ 299 return 1; 300 } 301 302 /* Look for a high-bit-clear byte in what's left. */ 303 for( i=0; i<nData; ++i ){ 304 if( !(pData[i]&0x80) ){ 305 return 1; 306 } 307 } 308 309 /* Cannot decode in the space given. */ 310 return 0; 311 } 312 313 /* Return 1 if n varints can be read from pData/nData. */ 314 static int checkVarints(const unsigned char *pData, unsigned nData, 315 unsigned n){ 316 unsigned nCur = 0; /* Byte offset within current varint. */ 317 unsigned nFound = 0; /* Number of varints found. */ 318 unsigned i; 319 320 /* In the worst case the decoder takes all 8 bits of the 9th byte. */ 321 if( nData>=9*n ){ 322 return 1; 323 } 324 325 for( i=0; nFound<n && i<nData; ++i ){ 326 nCur++; 327 if( nCur==9 || !(pData[i]&0x80) ){ 328 nFound++; 329 nCur = 0; 330 } 331 } 332 333 return nFound==n; 334 } 335 336 /* ctype and str[n]casecmp() can be affected by locale (eg, tr_TR). 337 * These versions consider only the ASCII space. 338 */ 339 /* TODO(shess): It may be reasonable to just remove the need for these 340 * entirely. The module could require "TEXT STRICT NOT NULL", not 341 * "Text Strict Not Null" or whatever the developer felt like typing 342 * that day. Handling corrupt data is a PERFECT place to be pedantic. 343 */ 344 static int ascii_isspace(char c){ 345 /* From fts3_expr.c */ 346 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; 347 } 348 static int ascii_isalnum(int x){ 349 /* From fts3_tokenizer1.c */ 350 return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); 351 } 352 static int ascii_tolower(int x){ 353 /* From fts3_tokenizer1.c */ 354 return (x>='A' && x<='Z') ? x-'A'+'a' : x; 355 } 356 /* TODO(shess): Consider sqlite3_strnicmp() */ 357 static int ascii_strncasecmp(const char *s1, const char *s2, size_t n){ 358 const unsigned char *us1 = (const unsigned char *)s1; 359 const unsigned char *us2 = (const unsigned char *)s2; 360 while( *us1 && *us2 && n && ascii_tolower(*us1)==ascii_tolower(*us2) ){ 361 us1++, us2++, n--; 362 } 363 return n ? ascii_tolower(*us1)-ascii_tolower(*us2) : 0; 364 } 365 static int ascii_strcasecmp(const char *s1, const char *s2){ 366 /* If s2 is equal through strlen(s1), will exit while() due to s1's 367 * trailing NUL, and return NUL-s2[strlen(s1)]. 368 */ 369 return ascii_strncasecmp(s1, s2, strlen(s1)+1); 370 } 371 372 /* For some reason I kept making mistakes with offset calculations. */ 373 static const unsigned char *PageData(DbPage *pPage, unsigned iOffset){ 374 assert( iOffset<=pPage->nPageSize ); 375 return (unsigned char *)pPage->pData + iOffset; 376 } 377 378 /* The first page in the file contains a file header in the first 100 379 * bytes. The page's header information comes after that. Note that 380 * the offsets in the page's header information are relative to the 381 * beginning of the page, NOT the end of the page header. 382 */ 383 static const unsigned char *PageHeader(DbPage *pPage){ 384 if( pPage->pgno==1 ){ 385 const unsigned nDatabaseHeader = 100; 386 return PageData(pPage, nDatabaseHeader); 387 }else{ 388 return PageData(pPage, 0); 389 } 390 } 391 392 /* Helper to fetch the pager and page size for the named database. */ 393 static int GetPager(sqlite3 *db, const char *zName, 394 Pager **pPager, unsigned *pnPageSize){ 395 Btree *pBt = NULL; 396 int i; 397 for( i=0; i<db->nDb; ++i ){ 398 if( ascii_strcasecmp(db->aDb[i].zName, zName)==0 ){ 399 pBt = db->aDb[i].pBt; 400 break; 401 } 402 } 403 if( !pBt ){ 404 return SQLITE_ERROR; 405 } 406 407 *pPager = sqlite3BtreePager(pBt); 408 *pnPageSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt); 409 return SQLITE_OK; 410 } 411 412 /* iSerialType is a type read from a record header. See "2.1 Record Format". 413 */ 414 415 /* Storage size of iSerialType in bytes. My interpretation of SQLite 416 * documentation is that text and blob fields can have 32-bit length. 417 * Values past 2^31-12 will need more than 32 bits to encode, which is 418 * why iSerialType is u64. 419 */ 420 static u32 SerialTypeLength(u64 iSerialType){ 421 switch( iSerialType ){ 422 case 0 : return 0; /* NULL */ 423 case 1 : return 1; /* Various integers. */ 424 case 2 : return 2; 425 case 3 : return 3; 426 case 4 : return 4; 427 case 5 : return 6; 428 case 6 : return 8; 429 case 7 : return 8; /* 64-bit float. */ 430 case 8 : return 0; /* Constant 0. */ 431 case 9 : return 0; /* Constant 1. */ 432 case 10 : case 11 : assert( !"RESERVED TYPE"); return 0; 433 } 434 return (u32)((iSerialType>>1) - 6); 435 } 436 437 /* True if iSerialType refers to a blob. */ 438 static int SerialTypeIsBlob(u64 iSerialType){ 439 assert( iSerialType>=12 ); 440 return (iSerialType%2)==0; 441 } 442 443 /* Returns true if the serialized type represented by iSerialType is 444 * compatible with the given type mask. 445 */ 446 static int SerialTypeIsCompatible(u64 iSerialType, unsigned char mask){ 447 switch( iSerialType ){ 448 case 0 : return (mask&MASK_NULL)!=0; 449 case 1 : return (mask&MASK_INTEGER)!=0; 450 case 2 : return (mask&MASK_INTEGER)!=0; 451 case 3 : return (mask&MASK_INTEGER)!=0; 452 case 4 : return (mask&MASK_INTEGER)!=0; 453 case 5 : return (mask&MASK_INTEGER)!=0; 454 case 6 : return (mask&MASK_INTEGER)!=0; 455 case 7 : return (mask&MASK_FLOAT)!=0; 456 case 8 : return (mask&MASK_INTEGER)!=0; 457 case 9 : return (mask&MASK_INTEGER)!=0; 458 case 10 : assert( !"RESERVED TYPE"); return 0; 459 case 11 : assert( !"RESERVED TYPE"); return 0; 460 } 461 return (mask&(SerialTypeIsBlob(iSerialType) ? MASK_BLOB : MASK_TEXT)); 462 } 463 464 /* Versions of strdup() with return values appropriate for 465 * sqlite3_free(). malloc.c has sqlite3DbStrDup()/NDup(), but those 466 * need sqlite3DbFree(), which seems intrusive. 467 */ 468 static char *sqlite3_strndup(const char *z, unsigned n){ 469 char *zNew; 470 471 if( z==NULL ){ 472 return NULL; 473 } 474 475 zNew = sqlite3_malloc(n+1); 476 if( zNew!=NULL ){ 477 memcpy(zNew, z, n); 478 zNew[n] = '\0'; 479 } 480 return zNew; 481 } 482 static char *sqlite3_strdup(const char *z){ 483 if( z==NULL ){ 484 return NULL; 485 } 486 return sqlite3_strndup(z, strlen(z)); 487 } 488 489 /* Fetch the page number of zTable in zDb from sqlite_master in zDb, 490 * and put it in *piRootPage. 491 */ 492 static int getRootPage(sqlite3 *db, const char *zDb, const char *zTable, 493 u32 *piRootPage){ 494 char *zSql; /* SQL selecting root page of named element. */ 495 sqlite3_stmt *pStmt; 496 int rc; 497 498 if( strcmp(zTable, "sqlite_master")==0 ){ 499 *piRootPage = 1; 500 return SQLITE_OK; 501 } 502 503 zSql = sqlite3_mprintf("SELECT rootpage FROM %s.sqlite_master " 504 "WHERE type = 'table' AND tbl_name = %Q", 505 zDb, zTable); 506 if( !zSql ){ 507 return SQLITE_NOMEM; 508 } 509 510 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 511 sqlite3_free(zSql); 512 if( rc!=SQLITE_OK ){ 513 return rc; 514 } 515 516 /* Require a result. */ 517 rc = sqlite3_step(pStmt); 518 if( rc==SQLITE_DONE ){ 519 rc = SQLITE_CORRUPT; 520 }else if( rc==SQLITE_ROW ){ 521 *piRootPage = sqlite3_column_int(pStmt, 0); 522 523 /* Require only one result. */ 524 rc = sqlite3_step(pStmt); 525 if( rc==SQLITE_DONE ){ 526 rc = SQLITE_OK; 527 }else if( rc==SQLITE_ROW ){ 528 rc = SQLITE_CORRUPT; 529 } 530 } 531 sqlite3_finalize(pStmt); 532 return rc; 533 } 534 535 static int getEncoding(sqlite3 *db, const char *zDb, int* piEncoding){ 536 sqlite3_stmt *pStmt; 537 int rc; 538 char *zSql = sqlite3_mprintf("PRAGMA %s.encoding", zDb); 539 if( !zSql ){ 540 return SQLITE_NOMEM; 541 } 542 543 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 544 sqlite3_free(zSql); 545 if( rc!=SQLITE_OK ){ 546 return rc; 547 } 548 549 /* Require a result. */ 550 rc = sqlite3_step(pStmt); 551 if( rc==SQLITE_DONE ){ 552 /* This case should not be possible. */ 553 rc = SQLITE_CORRUPT; 554 }else if( rc==SQLITE_ROW ){ 555 if( sqlite3_column_type(pStmt, 0)==SQLITE_TEXT ){ 556 const char* z = (const char *)sqlite3_column_text(pStmt, 0); 557 /* These strings match the literals in pragma.c. */ 558 if( !strcmp(z, "UTF-16le") ){ 559 *piEncoding = SQLITE_UTF16LE; 560 }else if( !strcmp(z, "UTF-16be") ){ 561 *piEncoding = SQLITE_UTF16BE; 562 }else if( !strcmp(z, "UTF-8") ){ 563 *piEncoding = SQLITE_UTF8; 564 }else{ 565 /* This case should not be possible. */ 566 *piEncoding = SQLITE_UTF8; 567 } 568 }else{ 569 /* This case should not be possible. */ 570 *piEncoding = SQLITE_UTF8; 571 } 572 573 /* Require only one result. */ 574 rc = sqlite3_step(pStmt); 575 if( rc==SQLITE_DONE ){ 576 rc = SQLITE_OK; 577 }else if( rc==SQLITE_ROW ){ 578 /* This case should not be possible. */ 579 rc = SQLITE_CORRUPT; 580 } 581 } 582 sqlite3_finalize(pStmt); 583 return rc; 584 } 585 586 /* Cursor for iterating interior nodes. Interior page cells contain a 587 * child page number and a rowid. The child page contains items left 588 * of the rowid (less than). The rightmost page of the subtree is 589 * stored in the page header. 590 * 591 * interiorCursorDestroy - release all resources associated with the 592 * cursor and any parent cursors. 593 * interiorCursorCreate - create a cursor with the given parent and page. 594 * interiorCursorEOF - returns true if neither the cursor nor the 595 * parent cursors can return any more data. 596 * interiorCursorNextPage - fetch the next child page from the cursor. 597 * 598 * Logically, interiorCursorNextPage() returns the next child page 599 * number from the page the cursor is currently reading, calling the 600 * parent cursor as necessary to get new pages to read, until done. 601 * SQLITE_ROW if a page is returned, SQLITE_DONE if out of pages, 602 * error otherwise. Unfortunately, if the table is corrupted 603 * unexpected pages can be returned. If any unexpected page is found, 604 * leaf or otherwise, it is returned to the caller for processing, 605 * with the interior cursor left empty. The next call to 606 * interiorCursorNextPage() will recurse to the parent cursor until an 607 * interior page to iterate is returned. 608 * 609 * Note that while interiorCursorNextPage() will refuse to follow 610 * loops, it does not keep track of pages returned for purposes of 611 * preventing duplication. 612 * 613 * Note that interiorCursorEOF() could return false (not at EOF), and 614 * interiorCursorNextPage() could still return SQLITE_DONE. This 615 * could happen if there are more cells to iterate in an interior 616 * page, but those cells refer to invalid pages. 617 */ 618 typedef struct RecoverInteriorCursor RecoverInteriorCursor; 619 struct RecoverInteriorCursor { 620 RecoverInteriorCursor *pParent; /* Parent node to this node. */ 621 DbPage *pPage; /* Reference to leaf page. */ 622 unsigned nPageSize; /* Size of page. */ 623 unsigned nChildren; /* Number of children on the page. */ 624 unsigned iChild; /* Index of next child to return. */ 625 }; 626 627 static void interiorCursorDestroy(RecoverInteriorCursor *pCursor){ 628 /* Destroy all the cursors to the root. */ 629 while( pCursor ){ 630 RecoverInteriorCursor *p = pCursor; 631 pCursor = pCursor->pParent; 632 633 if( p->pPage ){ 634 sqlite3PagerUnref(p->pPage); 635 p->pPage = NULL; 636 } 637 638 memset(p, 0xA5, sizeof(*p)); 639 sqlite3_free(p); 640 } 641 } 642 643 /* Internal helper. Reset storage in preparation for iterating pPage. */ 644 static void interiorCursorSetPage(RecoverInteriorCursor *pCursor, 645 DbPage *pPage){ 646 assert( PageHeader(pPage)[kiPageTypeOffset]==kTableInteriorPage ); 647 648 if( pCursor->pPage ){ 649 sqlite3PagerUnref(pCursor->pPage); 650 pCursor->pPage = NULL; 651 } 652 pCursor->pPage = pPage; 653 pCursor->iChild = 0; 654 655 /* A child for each cell, plus one in the header. */ 656 /* TODO(shess): Sanity-check the count? Page header plus per-cell 657 * cost of 16-bit offset, 32-bit page number, and one varint 658 * (minimum 1 byte). 659 */ 660 pCursor->nChildren = decodeUnsigned16(PageHeader(pPage) + 661 kiPageCellCountOffset) + 1; 662 } 663 664 static int interiorCursorCreate(RecoverInteriorCursor *pParent, 665 DbPage *pPage, int nPageSize, 666 RecoverInteriorCursor **ppCursor){ 667 RecoverInteriorCursor *pCursor = 668 sqlite3_malloc(sizeof(RecoverInteriorCursor)); 669 if( !pCursor ){ 670 return SQLITE_NOMEM; 671 } 672 673 memset(pCursor, 0, sizeof(*pCursor)); 674 pCursor->pParent = pParent; 675 pCursor->nPageSize = nPageSize; 676 interiorCursorSetPage(pCursor, pPage); 677 *ppCursor = pCursor; 678 return SQLITE_OK; 679 } 680 681 /* Internal helper. Return the child page number at iChild. */ 682 static unsigned interiorCursorChildPage(RecoverInteriorCursor *pCursor){ 683 const unsigned char *pPageHeader; /* Header of the current page. */ 684 const unsigned char *pCellOffsets; /* Offset to page's cell offsets. */ 685 unsigned iCellOffset; /* Offset of target cell. */ 686 687 assert( pCursor->iChild<pCursor->nChildren ); 688 689 /* Rightmost child is in the header. */ 690 pPageHeader = PageHeader(pCursor->pPage); 691 if( pCursor->iChild==pCursor->nChildren-1 ){ 692 return decodeUnsigned32(pPageHeader + kiPageRightChildOffset); 693 } 694 695 /* Each cell is a 4-byte integer page number and a varint rowid 696 * which is greater than the rowid of items in that sub-tree (this 697 * module ignores ordering). The offset is from the beginning of the 698 * page, not from the page header. 699 */ 700 pCellOffsets = pPageHeader + kiPageInteriorHeaderBytes; 701 iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iChild*2); 702 if( iCellOffset<=pCursor->nPageSize-4 ){ 703 return decodeUnsigned32(PageData(pCursor->pPage, iCellOffset)); 704 } 705 706 /* TODO(shess): Check for cell overlaps? Cells require 4 bytes plus 707 * a varint. Check could be identical to leaf check (or even a 708 * shared helper testing for "Cells starting in this range"?). 709 */ 710 711 /* If the offset is broken, return an invalid page number. */ 712 return 0; 713 } 714 715 static int interiorCursorEOF(RecoverInteriorCursor *pCursor){ 716 /* Find a parent with remaining children. EOF if none found. */ 717 while( pCursor && pCursor->iChild>=pCursor->nChildren ){ 718 pCursor = pCursor->pParent; 719 } 720 return pCursor==NULL; 721 } 722 723 /* Internal helper. Used to detect if iPage would cause a loop. */ 724 static int interiorCursorPageInUse(RecoverInteriorCursor *pCursor, 725 unsigned iPage){ 726 /* Find any parent using the indicated page. */ 727 while( pCursor && pCursor->pPage->pgno!=iPage ){ 728 pCursor = pCursor->pParent; 729 } 730 return pCursor!=NULL; 731 } 732 733 /* Get the next page from the interior cursor at *ppCursor. Returns 734 * SQLITE_ROW with the page in *ppPage, or SQLITE_DONE if out of 735 * pages, or the error SQLite returned. 736 * 737 * If the tree is uneven, then when the cursor attempts to get a new 738 * interior page from the parent cursor, it may get a non-interior 739 * page. In that case, the new page is returned, and *ppCursor is 740 * updated to point to the parent cursor (this cursor is freed). 741 */ 742 /* TODO(shess): I've tried to avoid recursion in most of this code, 743 * but this case is more challenging because the recursive call is in 744 * the middle of operation. One option for converting it without 745 * adding memory management would be to retain the head pointer and 746 * use a helper to "back up" as needed. Another option would be to 747 * reverse the list during traversal. 748 */ 749 static int interiorCursorNextPage(RecoverInteriorCursor **ppCursor, 750 DbPage **ppPage){ 751 RecoverInteriorCursor *pCursor = *ppCursor; 752 while( 1 ){ 753 int rc; 754 const unsigned char *pPageHeader; /* Header of found page. */ 755 756 /* Find a valid child page which isn't on the stack. */ 757 while( pCursor->iChild<pCursor->nChildren ){ 758 const unsigned iPage = interiorCursorChildPage(pCursor); 759 pCursor->iChild++; 760 if( interiorCursorPageInUse(pCursor, iPage) ){ 761 fprintf(stderr, "Loop detected at %d\n", iPage); 762 }else{ 763 int rc = sqlite3PagerAcquire(pCursor->pPage->pPager, iPage, ppPage, 0); 764 if( rc==SQLITE_OK ){ 765 return SQLITE_ROW; 766 } 767 } 768 } 769 770 /* This page has no more children. Get next page from parent. */ 771 if( !pCursor->pParent ){ 772 return SQLITE_DONE; 773 } 774 rc = interiorCursorNextPage(&pCursor->pParent, ppPage); 775 if( rc!=SQLITE_ROW ){ 776 return rc; 777 } 778 779 /* If a non-interior page is received, that either means that the 780 * tree is uneven, or that a child was re-used (say as an overflow 781 * page). Remove this cursor and let the caller handle the page. 782 */ 783 pPageHeader = PageHeader(*ppPage); 784 if( pPageHeader[kiPageTypeOffset]!=kTableInteriorPage ){ 785 *ppCursor = pCursor->pParent; 786 pCursor->pParent = NULL; 787 interiorCursorDestroy(pCursor); 788 return SQLITE_ROW; 789 } 790 791 /* Iterate the new page. */ 792 interiorCursorSetPage(pCursor, *ppPage); 793 *ppPage = NULL; 794 } 795 796 assert(NULL); /* NOTREACHED() */ 797 return SQLITE_CORRUPT; 798 } 799 800 /* Large rows are spilled to overflow pages. The row's main page 801 * stores the overflow page number after the local payload, with a 802 * linked list forward from there as necessary. overflowMaybeCreate() 803 * and overflowGetSegment() provide an abstraction for accessing such 804 * data while centralizing the code. 805 * 806 * overflowDestroy - releases all resources associated with the structure. 807 * overflowMaybeCreate - create the overflow structure if it is needed 808 * to represent the given record. See function comment. 809 * overflowGetSegment - fetch a segment from the record, accounting 810 * for overflow pages. Segments which are not 811 * entirely contained with a page are constructed 812 * into a buffer which is returned. See function comment. 813 */ 814 typedef struct RecoverOverflow RecoverOverflow; 815 struct RecoverOverflow { 816 RecoverOverflow *pNextOverflow; 817 DbPage *pPage; 818 unsigned nPageSize; 819 }; 820 821 static void overflowDestroy(RecoverOverflow *pOverflow){ 822 while( pOverflow ){ 823 RecoverOverflow *p = pOverflow; 824 pOverflow = p->pNextOverflow; 825 826 if( p->pPage ){ 827 sqlite3PagerUnref(p->pPage); 828 p->pPage = NULL; 829 } 830 831 memset(p, 0xA5, sizeof(*p)); 832 sqlite3_free(p); 833 } 834 } 835 836 /* Internal helper. Used to detect if iPage would cause a loop. */ 837 static int overflowPageInUse(RecoverOverflow *pOverflow, unsigned iPage){ 838 while( pOverflow && pOverflow->pPage->pgno!=iPage ){ 839 pOverflow = pOverflow->pNextOverflow; 840 } 841 return pOverflow!=NULL; 842 } 843 844 /* Setup to access an nRecordBytes record beginning at iRecordOffset 845 * in pPage. If nRecordBytes can be satisfied entirely from pPage, 846 * then no overflow pages are needed an *pnLocalRecordBytes is set to 847 * nRecordBytes. Otherwise, *ppOverflow is set to the head of a list 848 * of overflow pages, and *pnLocalRecordBytes is set to the number of 849 * bytes local to pPage. 850 * 851 * overflowGetSegment() will do the right thing regardless of whether 852 * those values are set to be in-page or not. 853 */ 854 static int overflowMaybeCreate(DbPage *pPage, unsigned nPageSize, 855 unsigned iRecordOffset, unsigned nRecordBytes, 856 unsigned *pnLocalRecordBytes, 857 RecoverOverflow **ppOverflow){ 858 unsigned nLocalRecordBytes; /* Record bytes in the leaf page. */ 859 unsigned iNextPage; /* Next page number for record data. */ 860 unsigned nBytes; /* Maximum record bytes as of current page. */ 861 int rc; 862 RecoverOverflow *pFirstOverflow; /* First in linked list of pages. */ 863 RecoverOverflow *pLastOverflow; /* End of linked list. */ 864 865 /* Calculations from the "Table B-Tree Leaf Cell" part of section 866 * 1.5 of http://www.sqlite.org/fileformat2.html . maxLocal and 867 * minLocal to match naming in btree.c. 868 */ 869 const unsigned maxLocal = nPageSize - 35; 870 const unsigned minLocal = ((nPageSize-12)*32/255)-23; /* m */ 871 872 /* Always fit anything smaller than maxLocal. */ 873 if( nRecordBytes<=maxLocal ){ 874 *pnLocalRecordBytes = nRecordBytes; 875 *ppOverflow = NULL; 876 return SQLITE_OK; 877 } 878 879 /* Calculate the remainder after accounting for minLocal on the leaf 880 * page and what packs evenly into overflow pages. If the remainder 881 * does not fit into maxLocal, then a partially-full overflow page 882 * will be required in any case, so store as little as possible locally. 883 */ 884 nLocalRecordBytes = minLocal+((nRecordBytes-minLocal)%(nPageSize-4)); 885 if( maxLocal<nLocalRecordBytes ){ 886 nLocalRecordBytes = minLocal; 887 } 888 889 /* Don't read off the end of the page. */ 890 if( iRecordOffset+nLocalRecordBytes+4>nPageSize ){ 891 return SQLITE_CORRUPT; 892 } 893 894 /* First overflow page number is after the local bytes. */ 895 iNextPage = 896 decodeUnsigned32(PageData(pPage, iRecordOffset + nLocalRecordBytes)); 897 nBytes = nLocalRecordBytes; 898 899 /* While there are more pages to read, and more bytes are needed, 900 * get another page. 901 */ 902 pFirstOverflow = pLastOverflow = NULL; 903 rc = SQLITE_OK; 904 while( iNextPage && nBytes<nRecordBytes ){ 905 RecoverOverflow *pOverflow; /* New overflow page for the list. */ 906 907 rc = sqlite3PagerAcquire(pPage->pPager, iNextPage, &pPage, 0); 908 if( rc!=SQLITE_OK ){ 909 break; 910 } 911 912 pOverflow = sqlite3_malloc(sizeof(RecoverOverflow)); 913 if( !pOverflow ){ 914 sqlite3PagerUnref(pPage); 915 rc = SQLITE_NOMEM; 916 break; 917 } 918 memset(pOverflow, 0, sizeof(*pOverflow)); 919 pOverflow->pPage = pPage; 920 pOverflow->nPageSize = nPageSize; 921 922 if( !pFirstOverflow ){ 923 pFirstOverflow = pOverflow; 924 }else{ 925 pLastOverflow->pNextOverflow = pOverflow; 926 } 927 pLastOverflow = pOverflow; 928 929 iNextPage = decodeUnsigned32(pPage->pData); 930 nBytes += nPageSize-4; 931 932 /* Avoid loops. */ 933 if( overflowPageInUse(pFirstOverflow, iNextPage) ){ 934 fprintf(stderr, "Overflow loop detected at %d\n", iNextPage); 935 rc = SQLITE_CORRUPT; 936 break; 937 } 938 } 939 940 /* If there were not enough pages, or too many, things are corrupt. 941 * Not having enough pages is an obvious problem, all the data 942 * cannot be read. Too many pages means that the contents of the 943 * row between the main page and the overflow page(s) is 944 * inconsistent (most likely one or more of the overflow pages does 945 * not really belong to this row). 946 */ 947 if( rc==SQLITE_OK && (nBytes<nRecordBytes || iNextPage) ){ 948 rc = SQLITE_CORRUPT; 949 } 950 951 if( rc==SQLITE_OK ){ 952 *ppOverflow = pFirstOverflow; 953 *pnLocalRecordBytes = nLocalRecordBytes; 954 }else if( pFirstOverflow ){ 955 overflowDestroy(pFirstOverflow); 956 } 957 return rc; 958 } 959 960 /* Use in concert with overflowMaybeCreate() to efficiently read parts 961 * of a potentially-overflowing record. pPage and iRecordOffset are 962 * the values passed into overflowMaybeCreate(), nLocalRecordBytes and 963 * pOverflow are the values returned by that call. 964 * 965 * On SQLITE_OK, *ppBase points to nRequestBytes of data at 966 * iRequestOffset within the record. If the data exists contiguously 967 * in a page, a direct pointer is returned, otherwise a buffer from 968 * sqlite3_malloc() is returned with the data. *pbFree is set true if 969 * sqlite3_free() should be called on *ppBase. 970 */ 971 /* Operation of this function is subtle. At any time, pPage is the 972 * current page, with iRecordOffset and nLocalRecordBytes being record 973 * data within pPage, and pOverflow being the overflow page after 974 * pPage. This allows the code to handle both the initial leaf page 975 * and overflow pages consistently by adjusting the values 976 * appropriately. 977 */ 978 static int overflowGetSegment(DbPage *pPage, unsigned iRecordOffset, 979 unsigned nLocalRecordBytes, 980 RecoverOverflow *pOverflow, 981 unsigned iRequestOffset, unsigned nRequestBytes, 982 unsigned char **ppBase, int *pbFree){ 983 unsigned nBase; /* Amount of data currently collected. */ 984 unsigned char *pBase; /* Buffer to collect record data into. */ 985 986 /* Skip to the page containing the start of the data. */ 987 while( iRequestOffset>=nLocalRecordBytes && pOverflow ){ 988 /* Factor out current page's contribution. */ 989 iRequestOffset -= nLocalRecordBytes; 990 991 /* Move forward to the next page in the list. */ 992 pPage = pOverflow->pPage; 993 iRecordOffset = 4; 994 nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset; 995 pOverflow = pOverflow->pNextOverflow; 996 } 997 998 /* If the requested data is entirely within this page, return a 999 * pointer into the page. 1000 */ 1001 if( iRequestOffset+nRequestBytes<=nLocalRecordBytes ){ 1002 /* TODO(shess): "assignment discards qualifiers from pointer target type" 1003 * Having ppBase be const makes sense, but sqlite3_free() takes non-const. 1004 */ 1005 *ppBase = (unsigned char *)PageData(pPage, iRecordOffset + iRequestOffset); 1006 *pbFree = 0; 1007 return SQLITE_OK; 1008 } 1009 1010 /* The data range would require additional pages. */ 1011 if( !pOverflow ){ 1012 /* Should never happen, the range is outside the nRecordBytes 1013 * passed to overflowMaybeCreate(). 1014 */ 1015 assert(NULL); /* NOTREACHED */ 1016 return SQLITE_ERROR; 1017 } 1018 1019 /* Get a buffer to construct into. */ 1020 nBase = 0; 1021 pBase = sqlite3_malloc(nRequestBytes); 1022 if( !pBase ){ 1023 return SQLITE_NOMEM; 1024 } 1025 while( nBase<nRequestBytes ){ 1026 /* Copy over data present on this page. */ 1027 unsigned nCopyBytes = nRequestBytes - nBase; 1028 if( nLocalRecordBytes-iRequestOffset<nCopyBytes ){ 1029 nCopyBytes = nLocalRecordBytes - iRequestOffset; 1030 } 1031 memcpy(pBase + nBase, PageData(pPage, iRecordOffset + iRequestOffset), 1032 nCopyBytes); 1033 nBase += nCopyBytes; 1034 1035 if( pOverflow ){ 1036 /* Copy from start of record data in future pages. */ 1037 iRequestOffset = 0; 1038 1039 /* Move forward to the next page in the list. Should match 1040 * first while() loop. 1041 */ 1042 pPage = pOverflow->pPage; 1043 iRecordOffset = 4; 1044 nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset; 1045 pOverflow = pOverflow->pNextOverflow; 1046 }else if( nBase<nRequestBytes ){ 1047 /* Ran out of overflow pages with data left to deliver. Not 1048 * possible if the requested range fits within nRecordBytes 1049 * passed to overflowMaybeCreate() when creating pOverflow. 1050 */ 1051 assert(NULL); /* NOTREACHED */ 1052 sqlite3_free(pBase); 1053 return SQLITE_ERROR; 1054 } 1055 } 1056 assert( nBase==nRequestBytes ); 1057 *ppBase = pBase; 1058 *pbFree = 1; 1059 return SQLITE_OK; 1060 } 1061 1062 /* Primary structure for iterating the contents of a table. 1063 * 1064 * leafCursorDestroy - release all resources associated with the cursor. 1065 * leafCursorCreate - create a cursor to iterate items from tree at 1066 * the provided root page. 1067 * leafCursorNextValidCell - get the cursor ready to access data from 1068 * the next valid cell in the table. 1069 * leafCursorCellRowid - get the current cell's rowid. 1070 * leafCursorCellColumns - get current cell's column count. 1071 * leafCursorCellColInfo - get type and data for a column in current cell. 1072 * 1073 * leafCursorNextValidCell skips cells which fail simple integrity 1074 * checks, such as overlapping other cells, or being located at 1075 * impossible offsets, or where header data doesn't correctly describe 1076 * payload data. Returns SQLITE_ROW if a valid cell is found, 1077 * SQLITE_DONE if all pages in the tree were exhausted. 1078 * 1079 * leafCursorCellColInfo() accounts for overflow pages in the style of 1080 * overflowGetSegment(). 1081 */ 1082 typedef struct RecoverLeafCursor RecoverLeafCursor; 1083 struct RecoverLeafCursor { 1084 RecoverInteriorCursor *pParent; /* Parent node to this node. */ 1085 DbPage *pPage; /* Reference to leaf page. */ 1086 unsigned nPageSize; /* Size of pPage. */ 1087 unsigned nCells; /* Number of cells in pPage. */ 1088 unsigned iCell; /* Current cell. */ 1089 1090 /* Info parsed from data in iCell. */ 1091 i64 iRowid; /* rowid parsed. */ 1092 unsigned nRecordCols; /* how many items in the record. */ 1093 u64 iRecordOffset; /* offset to record data. */ 1094 /* TODO(shess): nRecordBytes and nRecordHeaderBytes are used in 1095 * leafCursorCellColInfo() to prevent buffer overruns. 1096 * leafCursorCellDecode() already verified that the cell is valid, so 1097 * those checks should be redundant. 1098 */ 1099 u64 nRecordBytes; /* Size of record data. */ 1100 unsigned nLocalRecordBytes; /* Amount of record data in-page. */ 1101 unsigned nRecordHeaderBytes; /* Size of record header data. */ 1102 unsigned char *pRecordHeader; /* Pointer to record header data. */ 1103 int bFreeRecordHeader; /* True if record header requires free. */ 1104 RecoverOverflow *pOverflow; /* Cell overflow info, if needed. */ 1105 }; 1106 1107 /* Internal helper shared between next-page and create-cursor. If 1108 * pPage is a leaf page, it will be stored in the cursor and state 1109 * initialized for reading cells. 1110 * 1111 * If pPage is an interior page, a new parent cursor is created and 1112 * injected on the stack. This is necessary to handle trees with 1113 * uneven depth, but also is used during initial setup. 1114 * 1115 * If pPage is not a table page at all, it is discarded. 1116 * 1117 * If SQLITE_OK is returned, the caller no longer owns pPage, 1118 * otherwise the caller is responsible for discarding it. 1119 */ 1120 static int leafCursorLoadPage(RecoverLeafCursor *pCursor, DbPage *pPage){ 1121 const unsigned char *pPageHeader; /* Header of *pPage */ 1122 1123 /* Release the current page. */ 1124 if( pCursor->pPage ){ 1125 sqlite3PagerUnref(pCursor->pPage); 1126 pCursor->pPage = NULL; 1127 pCursor->iCell = pCursor->nCells = 0; 1128 } 1129 1130 /* If the page is an unexpected interior node, inject a new stack 1131 * layer and try again from there. 1132 */ 1133 pPageHeader = PageHeader(pPage); 1134 if( pPageHeader[kiPageTypeOffset]==kTableInteriorPage ){ 1135 RecoverInteriorCursor *pParent; 1136 int rc = interiorCursorCreate(pCursor->pParent, pPage, pCursor->nPageSize, 1137 &pParent); 1138 if( rc!=SQLITE_OK ){ 1139 return rc; 1140 } 1141 pCursor->pParent = pParent; 1142 return SQLITE_OK; 1143 } 1144 1145 /* Not a leaf page, skip it. */ 1146 if( pPageHeader[kiPageTypeOffset]!=kTableLeafPage ){ 1147 sqlite3PagerUnref(pPage); 1148 return SQLITE_OK; 1149 } 1150 1151 /* Take ownership of the page and start decoding. */ 1152 pCursor->pPage = pPage; 1153 pCursor->iCell = 0; 1154 pCursor->nCells = decodeUnsigned16(pPageHeader + kiPageCellCountOffset); 1155 return SQLITE_OK; 1156 } 1157 1158 /* Get the next leaf-level page in the tree. Returns SQLITE_ROW when 1159 * a leaf page is found, SQLITE_DONE when no more leaves exist, or any 1160 * error which occurred. 1161 */ 1162 static int leafCursorNextPage(RecoverLeafCursor *pCursor){ 1163 if( !pCursor->pParent ){ 1164 return SQLITE_DONE; 1165 } 1166 1167 /* Repeatedly load the parent's next child page until a leaf is found. */ 1168 do { 1169 DbPage *pNextPage; 1170 int rc = interiorCursorNextPage(&pCursor->pParent, &pNextPage); 1171 if( rc!=SQLITE_ROW ){ 1172 assert( rc==SQLITE_DONE ); 1173 return rc; 1174 } 1175 1176 rc = leafCursorLoadPage(pCursor, pNextPage); 1177 if( rc!=SQLITE_OK ){ 1178 sqlite3PagerUnref(pNextPage); 1179 return rc; 1180 } 1181 } while( !pCursor->pPage ); 1182 1183 return SQLITE_ROW; 1184 } 1185 1186 static void leafCursorDestroyCellData(RecoverLeafCursor *pCursor){ 1187 if( pCursor->bFreeRecordHeader ){ 1188 sqlite3_free(pCursor->pRecordHeader); 1189 } 1190 pCursor->bFreeRecordHeader = 0; 1191 pCursor->pRecordHeader = NULL; 1192 1193 if( pCursor->pOverflow ){ 1194 overflowDestroy(pCursor->pOverflow); 1195 pCursor->pOverflow = NULL; 1196 } 1197 } 1198 1199 static void leafCursorDestroy(RecoverLeafCursor *pCursor){ 1200 leafCursorDestroyCellData(pCursor); 1201 1202 if( pCursor->pParent ){ 1203 interiorCursorDestroy(pCursor->pParent); 1204 pCursor->pParent = NULL; 1205 } 1206 1207 if( pCursor->pPage ){ 1208 sqlite3PagerUnref(pCursor->pPage); 1209 pCursor->pPage = NULL; 1210 } 1211 1212 memset(pCursor, 0xA5, sizeof(*pCursor)); 1213 sqlite3_free(pCursor); 1214 } 1215 1216 /* Create a cursor to iterate the rows from the leaf pages of a table 1217 * rooted at iRootPage. 1218 */ 1219 /* TODO(shess): recoverOpen() calls this to setup the cursor, and I 1220 * think that recoverFilter() may make a hard assumption that the 1221 * cursor returned will turn up at least one valid cell. 1222 * 1223 * The cases I can think of which break this assumption are: 1224 * - pPage is a valid leaf page with no valid cells. 1225 * - pPage is a valid interior page with no valid leaves. 1226 * - pPage is a valid interior page who's leaves contain no valid cells. 1227 * - pPage is not a valid leaf or interior page. 1228 */ 1229 static int leafCursorCreate(Pager *pPager, unsigned nPageSize, 1230 u32 iRootPage, RecoverLeafCursor **ppCursor){ 1231 DbPage *pPage; /* Reference to page at iRootPage. */ 1232 RecoverLeafCursor *pCursor; /* Leaf cursor being constructed. */ 1233 int rc; 1234 1235 /* Start out with the root page. */ 1236 rc = sqlite3PagerAcquire(pPager, iRootPage, &pPage, 0); 1237 if( rc!=SQLITE_OK ){ 1238 return rc; 1239 } 1240 1241 pCursor = sqlite3_malloc(sizeof(RecoverLeafCursor)); 1242 if( !pCursor ){ 1243 sqlite3PagerUnref(pPage); 1244 return SQLITE_NOMEM; 1245 } 1246 memset(pCursor, 0, sizeof(*pCursor)); 1247 1248 pCursor->nPageSize = nPageSize; 1249 1250 rc = leafCursorLoadPage(pCursor, pPage); 1251 if( rc!=SQLITE_OK ){ 1252 sqlite3PagerUnref(pPage); 1253 leafCursorDestroy(pCursor); 1254 return rc; 1255 } 1256 1257 /* pPage wasn't a leaf page, find the next leaf page. */ 1258 if( !pCursor->pPage ){ 1259 rc = leafCursorNextPage(pCursor); 1260 if( rc!=SQLITE_DONE && rc!=SQLITE_ROW ){ 1261 leafCursorDestroy(pCursor); 1262 return rc; 1263 } 1264 } 1265 1266 *ppCursor = pCursor; 1267 return SQLITE_OK; 1268 } 1269 1270 /* Useful for setting breakpoints. */ 1271 static int ValidateError(){ 1272 return SQLITE_ERROR; 1273 } 1274 1275 /* Setup the cursor for reading the information from cell iCell. */ 1276 static int leafCursorCellDecode(RecoverLeafCursor *pCursor){ 1277 const unsigned char *pPageHeader; /* Header of current page. */ 1278 const unsigned char *pCellOffsets; /* Pointer to page's cell offsets. */ 1279 unsigned iCellOffset; /* Offset of current cell (iCell). */ 1280 const unsigned char *pCell; /* Pointer to data at iCellOffset. */ 1281 unsigned nCellMaxBytes; /* Maximum local size of iCell. */ 1282 unsigned iEndOffset; /* End of iCell's in-page data. */ 1283 u64 nRecordBytes; /* Expected size of cell, w/overflow. */ 1284 u64 iRowid; /* iCell's rowid (in table). */ 1285 unsigned nRead; /* Amount of cell read. */ 1286 unsigned nRecordHeaderRead; /* Header data read. */ 1287 u64 nRecordHeaderBytes; /* Header size expected. */ 1288 unsigned nRecordCols; /* Columns read from header. */ 1289 u64 nRecordColBytes; /* Bytes in payload for those columns. */ 1290 unsigned i; 1291 int rc; 1292 1293 assert( pCursor->iCell<pCursor->nCells ); 1294 1295 leafCursorDestroyCellData(pCursor); 1296 1297 /* Find the offset to the row. */ 1298 pPageHeader = PageHeader(pCursor->pPage); 1299 pCellOffsets = pPageHeader + knPageLeafHeaderBytes; 1300 iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iCell*2); 1301 if( iCellOffset>=pCursor->nPageSize ){ 1302 return ValidateError(); 1303 } 1304 1305 pCell = PageData(pCursor->pPage, iCellOffset); 1306 nCellMaxBytes = pCursor->nPageSize - iCellOffset; 1307 1308 /* B-tree leaf cells lead with varint record size, varint rowid and 1309 * varint header size. 1310 */ 1311 /* TODO(shess): The smallest page size is 512 bytes, which has an m 1312 * of 39. Three varints need at most 27 bytes to encode. I think. 1313 */ 1314 if( !checkVarints(pCell, nCellMaxBytes, 3) ){ 1315 return ValidateError(); 1316 } 1317 1318 nRead = getVarint(pCell, &nRecordBytes); 1319 assert( iCellOffset+nRead<=pCursor->nPageSize ); 1320 pCursor->nRecordBytes = nRecordBytes; 1321 1322 nRead += getVarint(pCell + nRead, &iRowid); 1323 assert( iCellOffset+nRead<=pCursor->nPageSize ); 1324 pCursor->iRowid = (i64)iRowid; 1325 1326 pCursor->iRecordOffset = iCellOffset + nRead; 1327 1328 /* Start overflow setup here because nLocalRecordBytes is needed to 1329 * check cell overlap. 1330 */ 1331 rc = overflowMaybeCreate(pCursor->pPage, pCursor->nPageSize, 1332 pCursor->iRecordOffset, pCursor->nRecordBytes, 1333 &pCursor->nLocalRecordBytes, 1334 &pCursor->pOverflow); 1335 if( rc!=SQLITE_OK ){ 1336 return ValidateError(); 1337 } 1338 1339 /* Check that no other cell starts within this cell. */ 1340 iEndOffset = pCursor->iRecordOffset + pCursor->nLocalRecordBytes; 1341 for( i=0; i<pCursor->nCells; ++i ){ 1342 const unsigned iOtherOffset = decodeUnsigned16(pCellOffsets + i*2); 1343 if( iOtherOffset>iCellOffset && iOtherOffset<iEndOffset ){ 1344 return ValidateError(); 1345 } 1346 } 1347 1348 nRecordHeaderRead = getVarint(pCell + nRead, &nRecordHeaderBytes); 1349 assert( nRecordHeaderBytes<=nRecordBytes ); 1350 pCursor->nRecordHeaderBytes = nRecordHeaderBytes; 1351 1352 /* Large headers could overflow if pages are small. */ 1353 rc = overflowGetSegment(pCursor->pPage, 1354 pCursor->iRecordOffset, pCursor->nLocalRecordBytes, 1355 pCursor->pOverflow, 0, nRecordHeaderBytes, 1356 &pCursor->pRecordHeader, &pCursor->bFreeRecordHeader); 1357 if( rc!=SQLITE_OK ){ 1358 return ValidateError(); 1359 } 1360 1361 /* Tally up the column count and size of data. */ 1362 nRecordCols = 0; 1363 nRecordColBytes = 0; 1364 while( nRecordHeaderRead<nRecordHeaderBytes ){ 1365 u64 iSerialType; /* Type descriptor for current column. */ 1366 if( !checkVarint(pCursor->pRecordHeader + nRecordHeaderRead, 1367 nRecordHeaderBytes - nRecordHeaderRead) ){ 1368 return ValidateError(); 1369 } 1370 nRecordHeaderRead += getVarint(pCursor->pRecordHeader + nRecordHeaderRead, 1371 &iSerialType); 1372 if( iSerialType==10 || iSerialType==11 ){ 1373 return ValidateError(); 1374 } 1375 nRecordColBytes += SerialTypeLength(iSerialType); 1376 nRecordCols++; 1377 } 1378 pCursor->nRecordCols = nRecordCols; 1379 1380 /* Parsing the header used as many bytes as expected. */ 1381 if( nRecordHeaderRead!=nRecordHeaderBytes ){ 1382 return ValidateError(); 1383 } 1384 1385 /* Calculated record is size of expected record. */ 1386 if( nRecordHeaderBytes+nRecordColBytes!=nRecordBytes ){ 1387 return ValidateError(); 1388 } 1389 1390 return SQLITE_OK; 1391 } 1392 1393 static i64 leafCursorCellRowid(RecoverLeafCursor *pCursor){ 1394 return pCursor->iRowid; 1395 } 1396 1397 static unsigned leafCursorCellColumns(RecoverLeafCursor *pCursor){ 1398 return pCursor->nRecordCols; 1399 } 1400 1401 /* Get the column info for the cell. Pass NULL for ppBase to prevent 1402 * retrieving the data segment. If *pbFree is true, *ppBase must be 1403 * freed by the caller using sqlite3_free(). 1404 */ 1405 static int leafCursorCellColInfo(RecoverLeafCursor *pCursor, 1406 unsigned iCol, u64 *piColType, 1407 unsigned char **ppBase, int *pbFree){ 1408 const unsigned char *pRecordHeader; /* Current cell's header. */ 1409 u64 nRecordHeaderBytes; /* Bytes in pRecordHeader. */ 1410 unsigned nRead; /* Bytes read from header. */ 1411 u64 iColEndOffset; /* Offset to end of column in cell. */ 1412 unsigned nColsSkipped; /* Count columns as procesed. */ 1413 u64 iSerialType; /* Type descriptor for current column. */ 1414 1415 /* Implicit NULL for columns past the end. This case happens when 1416 * rows have not been updated since an ALTER TABLE added columns. 1417 * It is more convenient to address here than in callers. 1418 */ 1419 if( iCol>=pCursor->nRecordCols ){ 1420 *piColType = 0; 1421 if( ppBase ){ 1422 *ppBase = 0; 1423 *pbFree = 0; 1424 } 1425 return SQLITE_OK; 1426 } 1427 1428 /* Must be able to decode header size. */ 1429 pRecordHeader = pCursor->pRecordHeader; 1430 if( !checkVarint(pRecordHeader, pCursor->nRecordHeaderBytes) ){ 1431 return SQLITE_CORRUPT; 1432 } 1433 1434 /* Rather than caching the header size and how many bytes it took, 1435 * decode it every time. 1436 */ 1437 nRead = getVarint(pRecordHeader, &nRecordHeaderBytes); 1438 assert( nRecordHeaderBytes==pCursor->nRecordHeaderBytes ); 1439 1440 /* Scan forward to the indicated column. Scans to _after_ column 1441 * for later range checking. 1442 */ 1443 /* TODO(shess): This could get expensive for very wide tables. An 1444 * array of iSerialType could be built in leafCursorCellDecode(), but 1445 * the number of columns is dynamic per row, so it would add memory 1446 * management complexity. Enough info to efficiently forward 1447 * iterate could be kept, if all clients forward iterate 1448 * (recoverColumn() may not). 1449 */ 1450 iColEndOffset = 0; 1451 nColsSkipped = 0; 1452 while( nColsSkipped<=iCol && nRead<nRecordHeaderBytes ){ 1453 if( !checkVarint(pRecordHeader + nRead, nRecordHeaderBytes - nRead) ){ 1454 return SQLITE_CORRUPT; 1455 } 1456 nRead += getVarint(pRecordHeader + nRead, &iSerialType); 1457 iColEndOffset += SerialTypeLength(iSerialType); 1458 nColsSkipped++; 1459 } 1460 1461 /* Column's data extends past record's end. */ 1462 if( nRecordHeaderBytes+iColEndOffset>pCursor->nRecordBytes ){ 1463 return SQLITE_CORRUPT; 1464 } 1465 1466 *piColType = iSerialType; 1467 if( ppBase ){ 1468 const u32 nColBytes = SerialTypeLength(iSerialType); 1469 1470 /* Offset from start of record to beginning of column. */ 1471 const unsigned iColOffset = nRecordHeaderBytes+iColEndOffset-nColBytes; 1472 1473 return overflowGetSegment(pCursor->pPage, pCursor->iRecordOffset, 1474 pCursor->nLocalRecordBytes, pCursor->pOverflow, 1475 iColOffset, nColBytes, ppBase, pbFree); 1476 } 1477 return SQLITE_OK; 1478 } 1479 1480 static int leafCursorNextValidCell(RecoverLeafCursor *pCursor){ 1481 while( 1 ){ 1482 int rc; 1483 1484 /* Move to the next cell. */ 1485 pCursor->iCell++; 1486 1487 /* No more cells, get the next leaf. */ 1488 if( pCursor->iCell>=pCursor->nCells ){ 1489 rc = leafCursorNextPage(pCursor); 1490 if( rc!=SQLITE_ROW ){ 1491 return rc; 1492 } 1493 assert( pCursor->iCell==0 ); 1494 } 1495 1496 /* If the cell is valid, indicate that a row is available. */ 1497 rc = leafCursorCellDecode(pCursor); 1498 if( rc==SQLITE_OK ){ 1499 return SQLITE_ROW; 1500 } 1501 1502 /* Iterate until done or a valid row is found. */ 1503 /* TODO(shess): Remove debugging output. */ 1504 fprintf(stderr, "Skipping invalid cell\n"); 1505 } 1506 return SQLITE_ERROR; 1507 } 1508 1509 typedef struct Recover Recover; 1510 struct Recover { 1511 sqlite3_vtab base; 1512 sqlite3 *db; /* Host database connection */ 1513 char *zDb; /* Database containing target table */ 1514 char *zTable; /* Target table */ 1515 unsigned nCols; /* Number of columns in target table */ 1516 unsigned char *pTypes; /* Types of columns in target table */ 1517 }; 1518 1519 /* Internal helper for deleting the module. */ 1520 static void recoverRelease(Recover *pRecover){ 1521 sqlite3_free(pRecover->zDb); 1522 sqlite3_free(pRecover->zTable); 1523 sqlite3_free(pRecover->pTypes); 1524 memset(pRecover, 0xA5, sizeof(*pRecover)); 1525 sqlite3_free(pRecover); 1526 } 1527 1528 /* Helper function for initializing the module. Forward-declared so 1529 * recoverCreate() and recoverConnect() can see it. 1530 */ 1531 static int recoverInit( 1532 sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char ** 1533 ); 1534 1535 static int recoverCreate( 1536 sqlite3 *db, 1537 void *pAux, 1538 int argc, const char *const*argv, 1539 sqlite3_vtab **ppVtab, 1540 char **pzErr 1541 ){ 1542 FNENTRY(); 1543 return recoverInit(db, pAux, argc, argv, ppVtab, pzErr); 1544 } 1545 1546 /* This should never be called. */ 1547 static int recoverConnect( 1548 sqlite3 *db, 1549 void *pAux, 1550 int argc, const char *const*argv, 1551 sqlite3_vtab **ppVtab, 1552 char **pzErr 1553 ){ 1554 FNENTRY(); 1555 return recoverInit(db, pAux, argc, argv, ppVtab, pzErr); 1556 } 1557 1558 /* No indices supported. */ 1559 static int recoverBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ 1560 FNENTRY(); 1561 return SQLITE_OK; 1562 } 1563 1564 /* Logically, this should never be called. */ 1565 static int recoverDisconnect(sqlite3_vtab *pVtab){ 1566 FNENTRY(); 1567 recoverRelease((Recover*)pVtab); 1568 return SQLITE_OK; 1569 } 1570 1571 static int recoverDestroy(sqlite3_vtab *pVtab){ 1572 FNENTRY(); 1573 recoverRelease((Recover*)pVtab); 1574 return SQLITE_OK; 1575 } 1576 1577 typedef struct RecoverCursor RecoverCursor; 1578 struct RecoverCursor { 1579 sqlite3_vtab_cursor base; 1580 RecoverLeafCursor *pLeafCursor; 1581 int iEncoding; 1582 int bEOF; 1583 }; 1584 1585 static int recoverOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ 1586 Recover *pRecover = (Recover*)pVTab; 1587 u32 iRootPage; /* Root page of the backing table. */ 1588 int iEncoding; /* UTF encoding for backing database. */ 1589 unsigned nPageSize; /* Size of pages in backing database. */ 1590 Pager *pPager; /* Backing database pager. */ 1591 RecoverLeafCursor *pLeafCursor; /* Cursor to read table's leaf pages. */ 1592 RecoverCursor *pCursor; /* Cursor to read rows from leaves. */ 1593 int rc; 1594 1595 FNENTRY(); 1596 1597 iRootPage = 0; 1598 rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, 1599 &iRootPage); 1600 if( rc!=SQLITE_OK ){ 1601 return rc; 1602 } 1603 1604 iEncoding = 0; 1605 rc = getEncoding(pRecover->db, pRecover->zDb, &iEncoding); 1606 if( rc!=SQLITE_OK ){ 1607 return rc; 1608 } 1609 1610 rc = GetPager(pRecover->db, pRecover->zDb, &pPager, &nPageSize); 1611 if( rc!=SQLITE_OK ){ 1612 return rc; 1613 } 1614 1615 rc = leafCursorCreate(pPager, nPageSize, iRootPage, &pLeafCursor); 1616 if( rc!=SQLITE_OK ){ 1617 return rc; 1618 } 1619 1620 pCursor = sqlite3_malloc(sizeof(RecoverCursor)); 1621 if( !pCursor ){ 1622 leafCursorDestroy(pLeafCursor); 1623 return SQLITE_NOMEM; 1624 } 1625 memset(pCursor, 0, sizeof(*pCursor)); 1626 pCursor->base.pVtab = pVTab; 1627 pCursor->pLeafCursor = pLeafCursor; 1628 pCursor->iEncoding = iEncoding; 1629 1630 /* If no leaf pages were found, empty result set. */ 1631 /* TODO(shess): leafCursorNextValidCell() would return SQLITE_ROW or 1632 * SQLITE_DONE to indicate whether there is further data to consider. 1633 */ 1634 pCursor->bEOF = (pLeafCursor->pPage==NULL); 1635 1636 *ppCursor = (sqlite3_vtab_cursor*)pCursor; 1637 return SQLITE_OK; 1638 } 1639 1640 static int recoverClose(sqlite3_vtab_cursor *cur){ 1641 RecoverCursor *pCursor = (RecoverCursor*)cur; 1642 FNENTRY(); 1643 if( pCursor->pLeafCursor ){ 1644 leafCursorDestroy(pCursor->pLeafCursor); 1645 pCursor->pLeafCursor = NULL; 1646 } 1647 memset(pCursor, 0xA5, sizeof(*pCursor)); 1648 sqlite3_free(cur); 1649 return SQLITE_OK; 1650 } 1651 1652 /* Helpful place to set a breakpoint. */ 1653 static int RecoverInvalidCell(){ 1654 return SQLITE_ERROR; 1655 } 1656 1657 /* Returns SQLITE_OK if the cell has an appropriate number of columns 1658 * with the appropriate types of data. 1659 */ 1660 static int recoverValidateLeafCell(Recover *pRecover, RecoverCursor *pCursor){ 1661 unsigned i; 1662 1663 /* If the row's storage has too many columns, skip it. */ 1664 if( leafCursorCellColumns(pCursor->pLeafCursor)>pRecover->nCols ){ 1665 return RecoverInvalidCell(); 1666 } 1667 1668 /* Skip rows with unexpected types. */ 1669 for( i=0; i<pRecover->nCols; ++i ){ 1670 u64 iType; /* Storage type of column i. */ 1671 int rc; 1672 1673 /* ROWID alias. */ 1674 if( (pRecover->pTypes[i]&MASK_ROWID) ){ 1675 continue; 1676 } 1677 1678 rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iType, NULL, NULL); 1679 assert( rc==SQLITE_OK ); 1680 if( rc!=SQLITE_OK || !SerialTypeIsCompatible(iType, pRecover->pTypes[i]) ){ 1681 return RecoverInvalidCell(); 1682 } 1683 } 1684 1685 return SQLITE_OK; 1686 } 1687 1688 static int recoverNext(sqlite3_vtab_cursor *pVtabCursor){ 1689 RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1690 Recover *pRecover = (Recover*)pCursor->base.pVtab; 1691 int rc; 1692 1693 FNENTRY(); 1694 1695 /* Scan forward to the next cell with valid storage, then check that 1696 * the stored data matches the schema. 1697 */ 1698 while( (rc = leafCursorNextValidCell(pCursor->pLeafCursor))==SQLITE_ROW ){ 1699 if( recoverValidateLeafCell(pRecover, pCursor)==SQLITE_OK ){ 1700 return SQLITE_OK; 1701 } 1702 } 1703 1704 if( rc==SQLITE_DONE ){ 1705 pCursor->bEOF = 1; 1706 return SQLITE_OK; 1707 } 1708 1709 assert( rc!=SQLITE_OK ); 1710 return rc; 1711 } 1712 1713 static int recoverFilter( 1714 sqlite3_vtab_cursor *pVtabCursor, 1715 int idxNum, const char *idxStr, 1716 int argc, sqlite3_value **argv 1717 ){ 1718 RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1719 Recover *pRecover = (Recover*)pCursor->base.pVtab; 1720 int rc; 1721 1722 FNENTRY(); 1723 1724 /* There were no valid leaf pages in the table. */ 1725 if( pCursor->bEOF ){ 1726 return SQLITE_OK; 1727 } 1728 1729 /* Load the first cell, and iterate forward if it's not valid. If no cells at 1730 * all are valid, recoverNext() sets bEOF and returns appropriately. 1731 */ 1732 rc = leafCursorCellDecode(pCursor->pLeafCursor); 1733 if( rc!=SQLITE_OK || recoverValidateLeafCell(pRecover, pCursor)!=SQLITE_OK ){ 1734 return recoverNext(pVtabCursor); 1735 } 1736 1737 return SQLITE_OK; 1738 } 1739 1740 static int recoverEof(sqlite3_vtab_cursor *pVtabCursor){ 1741 RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1742 FNENTRY(); 1743 return pCursor->bEOF; 1744 } 1745 1746 static int recoverColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ 1747 RecoverCursor *pCursor = (RecoverCursor*)cur; 1748 Recover *pRecover = (Recover*)pCursor->base.pVtab; 1749 u64 iColType; /* Storage type of column i. */ 1750 unsigned char *pColData; /* Column i's data. */ 1751 int shouldFree; /* Non-zero if pColData should be freed. */ 1752 int rc; 1753 1754 FNENTRY(); 1755 1756 if( i>=pRecover->nCols ){ 1757 return SQLITE_ERROR; 1758 } 1759 1760 /* ROWID alias. */ 1761 if( (pRecover->pTypes[i]&MASK_ROWID) ){ 1762 sqlite3_result_int64(ctx, leafCursorCellRowid(pCursor->pLeafCursor)); 1763 return SQLITE_OK; 1764 } 1765 1766 pColData = NULL; 1767 shouldFree = 0; 1768 rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iColType, 1769 &pColData, &shouldFree); 1770 if( rc!=SQLITE_OK ){ 1771 return rc; 1772 } 1773 /* recoverValidateLeafCell() should guarantee that this will never 1774 * occur. 1775 */ 1776 if( !SerialTypeIsCompatible(iColType, pRecover->pTypes[i]) ){ 1777 if( shouldFree ){ 1778 sqlite3_free(pColData); 1779 } 1780 return SQLITE_ERROR; 1781 } 1782 1783 switch( iColType ){ 1784 case 0 : sqlite3_result_null(ctx); break; 1785 case 1 : sqlite3_result_int64(ctx, decodeSigned(pColData, 1)); break; 1786 case 2 : sqlite3_result_int64(ctx, decodeSigned(pColData, 2)); break; 1787 case 3 : sqlite3_result_int64(ctx, decodeSigned(pColData, 3)); break; 1788 case 4 : sqlite3_result_int64(ctx, decodeSigned(pColData, 4)); break; 1789 case 5 : sqlite3_result_int64(ctx, decodeSigned(pColData, 6)); break; 1790 case 6 : sqlite3_result_int64(ctx, decodeSigned(pColData, 8)); break; 1791 case 7 : sqlite3_result_double(ctx, decodeFloat64(pColData)); break; 1792 case 8 : sqlite3_result_int(ctx, 0); break; 1793 case 9 : sqlite3_result_int(ctx, 1); break; 1794 case 10 : assert( iColType!=10 ); break; 1795 case 11 : assert( iColType!=11 ); break; 1796 1797 default : { 1798 u32 l = SerialTypeLength(iColType); 1799 1800 /* If pColData was already allocated, arrange to pass ownership. */ 1801 sqlite3_destructor_type pFn = SQLITE_TRANSIENT; 1802 if( shouldFree ){ 1803 pFn = sqlite3_free; 1804 shouldFree = 0; 1805 } 1806 1807 if( SerialTypeIsBlob(iColType) ){ 1808 sqlite3_result_blob(ctx, pColData, l, pFn); 1809 }else{ 1810 if( pCursor->iEncoding==SQLITE_UTF16LE ){ 1811 sqlite3_result_text16le(ctx, (const void*)pColData, l, pFn); 1812 }else if( pCursor->iEncoding==SQLITE_UTF16BE ){ 1813 sqlite3_result_text16be(ctx, (const void*)pColData, l, pFn); 1814 }else{ 1815 sqlite3_result_text(ctx, (const char*)pColData, l, pFn); 1816 } 1817 } 1818 } break; 1819 } 1820 if( shouldFree ){ 1821 sqlite3_free(pColData); 1822 } 1823 return SQLITE_OK; 1824 } 1825 1826 static int recoverRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ 1827 RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1828 FNENTRY(); 1829 *pRowid = leafCursorCellRowid(pCursor->pLeafCursor); 1830 return SQLITE_OK; 1831 } 1832 1833 static sqlite3_module recoverModule = { 1834 0, /* iVersion */ 1835 recoverCreate, /* xCreate - create a table */ 1836 recoverConnect, /* xConnect - connect to an existing table */ 1837 recoverBestIndex, /* xBestIndex - Determine search strategy */ 1838 recoverDisconnect, /* xDisconnect - Disconnect from a table */ 1839 recoverDestroy, /* xDestroy - Drop a table */ 1840 recoverOpen, /* xOpen - open a cursor */ 1841 recoverClose, /* xClose - close a cursor */ 1842 recoverFilter, /* xFilter - configure scan constraints */ 1843 recoverNext, /* xNext - advance a cursor */ 1844 recoverEof, /* xEof */ 1845 recoverColumn, /* xColumn - read data */ 1846 recoverRowid, /* xRowid - read data */ 1847 0, /* xUpdate - write data */ 1848 0, /* xBegin - begin transaction */ 1849 0, /* xSync - sync transaction */ 1850 0, /* xCommit - commit transaction */ 1851 0, /* xRollback - rollback transaction */ 1852 0, /* xFindFunction - function overloading */ 1853 0, /* xRename - rename the table */ 1854 }; 1855 1856 int recoverVtableInit(sqlite3 *db){ 1857 return sqlite3_create_module_v2(db, "recover", &recoverModule, NULL, 0); 1858 } 1859 1860 /* This section of code is for parsing the create input and 1861 * initializing the module. 1862 */ 1863 1864 /* Find the next word in zText and place the endpoints in pzWord*. 1865 * Returns true if the word is non-empty. "Word" is defined as 1866 * ASCII alphanumeric plus '_' at this time. 1867 */ 1868 static int findWord(const char *zText, 1869 const char **pzWordStart, const char **pzWordEnd){ 1870 int r; 1871 while( ascii_isspace(*zText) ){ 1872 zText++; 1873 } 1874 *pzWordStart = zText; 1875 while( ascii_isalnum(*zText) || *zText=='_' ){ 1876 zText++; 1877 } 1878 r = zText>*pzWordStart; /* In case pzWordStart==pzWordEnd */ 1879 *pzWordEnd = zText; 1880 return r; 1881 } 1882 1883 /* Return true if the next word in zText is zWord, also setting 1884 * *pzContinue to the character after the word. 1885 */ 1886 static int expectWord(const char *zText, const char *zWord, 1887 const char **pzContinue){ 1888 const char *zWordStart, *zWordEnd; 1889 if( findWord(zText, &zWordStart, &zWordEnd) && 1890 ascii_strncasecmp(zWord, zWordStart, zWordEnd - zWordStart)==0 ){ 1891 *pzContinue = zWordEnd; 1892 return 1; 1893 } 1894 return 0; 1895 } 1896 1897 /* Parse the name and type information out of parameter. In case of 1898 * success, *pzNameStart/End contain the name of the column, 1899 * *pzTypeStart/End contain the top-level type, and *pTypeMask has the 1900 * type mask to use for the column. 1901 */ 1902 static int findNameAndType(const char *parameter, 1903 const char **pzNameStart, const char **pzNameEnd, 1904 const char **pzTypeStart, const char **pzTypeEnd, 1905 unsigned char *pTypeMask){ 1906 unsigned nNameLen; /* Length of found name. */ 1907 const char *zEnd; /* Current end of parsed column information. */ 1908 int bNotNull; /* Non-zero if NULL is not allowed for name. */ 1909 int bStrict; /* Non-zero if column requires exact type match. */ 1910 const char *zDummy; /* Dummy parameter, result unused. */ 1911 unsigned i; 1912 1913 /* strictMask is used for STRICT, strictMask|otherMask if STRICT is 1914 * not supplied. zReplace provides an alternate type to expose to 1915 * the caller. 1916 */ 1917 static struct { 1918 const char *zName; 1919 unsigned char strictMask; 1920 unsigned char otherMask; 1921 const char *zReplace; 1922 } kTypeInfo[] = { 1923 { "ANY", 1924 MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL, 1925 0, "", 1926 }, 1927 { "ROWID", MASK_INTEGER | MASK_ROWID, 0, "INTEGER", }, 1928 { "INTEGER", MASK_INTEGER | MASK_NULL, 0, NULL, }, 1929 { "FLOAT", MASK_FLOAT | MASK_NULL, MASK_INTEGER, NULL, }, 1930 { "NUMERIC", MASK_INTEGER | MASK_FLOAT | MASK_NULL, MASK_TEXT, NULL, }, 1931 { "TEXT", MASK_TEXT | MASK_NULL, MASK_BLOB, NULL, }, 1932 { "BLOB", MASK_BLOB | MASK_NULL, 0, NULL, }, 1933 }; 1934 1935 if( !findWord(parameter, pzNameStart, pzNameEnd) ){ 1936 return SQLITE_MISUSE; 1937 } 1938 1939 /* Manifest typing, accept any storage type. */ 1940 if( !findWord(*pzNameEnd, pzTypeStart, pzTypeEnd) ){ 1941 *pzTypeEnd = *pzTypeStart = ""; 1942 *pTypeMask = MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL; 1943 return SQLITE_OK; 1944 } 1945 1946 nNameLen = *pzTypeEnd - *pzTypeStart; 1947 for( i=0; i<ArraySize(kTypeInfo); ++i ){ 1948 if( ascii_strncasecmp(kTypeInfo[i].zName, *pzTypeStart, nNameLen)==0 ){ 1949 break; 1950 } 1951 } 1952 if( i==ArraySize(kTypeInfo) ){ 1953 return SQLITE_MISUSE; 1954 } 1955 1956 zEnd = *pzTypeEnd; 1957 bStrict = 0; 1958 if( expectWord(zEnd, "STRICT", &zEnd) ){ 1959 /* TODO(shess): Ick. But I don't want another single-purpose 1960 * flag, either. 1961 */ 1962 if( kTypeInfo[i].zReplace && !kTypeInfo[i].zReplace[0] ){ 1963 return SQLITE_MISUSE; 1964 } 1965 bStrict = 1; 1966 } 1967 1968 bNotNull = 0; 1969 if( expectWord(zEnd, "NOT", &zEnd) ){ 1970 if( expectWord(zEnd, "NULL", &zEnd) ){ 1971 bNotNull = 1; 1972 }else{ 1973 /* Anything other than NULL after NOT is an error. */ 1974 return SQLITE_MISUSE; 1975 } 1976 } 1977 1978 /* Anything else is an error. */ 1979 if( findWord(zEnd, &zDummy, &zDummy) ){ 1980 return SQLITE_MISUSE; 1981 } 1982 1983 *pTypeMask = kTypeInfo[i].strictMask; 1984 if( !bStrict ){ 1985 *pTypeMask |= kTypeInfo[i].otherMask; 1986 } 1987 if( bNotNull ){ 1988 *pTypeMask &= ~MASK_NULL; 1989 } 1990 if( kTypeInfo[i].zReplace ){ 1991 *pzTypeStart = kTypeInfo[i].zReplace; 1992 *pzTypeEnd = *pzTypeStart + strlen(*pzTypeStart); 1993 } 1994 return SQLITE_OK; 1995 } 1996 1997 /* Parse the arguments, placing type masks in *pTypes and the exposed 1998 * schema in *pzCreateSql (for sqlite3_declare_vtab). 1999 */ 2000 static int ParseColumnsAndGenerateCreate(unsigned nCols, 2001 const char *const *pCols, 2002 char **pzCreateSql, 2003 unsigned char *pTypes, 2004 char **pzErr){ 2005 unsigned i; 2006 char *zCreateSql = sqlite3_mprintf("CREATE TABLE x("); 2007 if( !zCreateSql ){ 2008 return SQLITE_NOMEM; 2009 } 2010 2011 for( i=0; i<nCols; i++ ){ 2012 const char *zSep = (i < nCols - 1 ? ", " : ")"); 2013 const char *zNotNull = ""; 2014 const char *zNameStart, *zNameEnd; 2015 const char *zTypeStart, *zTypeEnd; 2016 int rc = findNameAndType(pCols[i], 2017 &zNameStart, &zNameEnd, 2018 &zTypeStart, &zTypeEnd, 2019 &pTypes[i]); 2020 if( rc!=SQLITE_OK ){ 2021 *pzErr = sqlite3_mprintf("unable to parse column %d", i); 2022 sqlite3_free(zCreateSql); 2023 return rc; 2024 } 2025 2026 if( !(pTypes[i]&MASK_NULL) ){ 2027 zNotNull = " NOT NULL"; 2028 } 2029 2030 /* Add name and type to the create statement. */ 2031 zCreateSql = sqlite3_mprintf("%z%.*s %.*s%s%s", 2032 zCreateSql, 2033 zNameEnd - zNameStart, zNameStart, 2034 zTypeEnd - zTypeStart, zTypeStart, 2035 zNotNull, zSep); 2036 if( !zCreateSql ){ 2037 return SQLITE_NOMEM; 2038 } 2039 } 2040 2041 *pzCreateSql = zCreateSql; 2042 return SQLITE_OK; 2043 } 2044 2045 /* Helper function for initializing the module. */ 2046 /* argv[0] module name 2047 * argv[1] db name for virtual table 2048 * argv[2] virtual table name 2049 * argv[3] backing table name 2050 * argv[4] columns 2051 */ 2052 /* TODO(shess): Since connect isn't supported, could inline into 2053 * recoverCreate(). 2054 */ 2055 /* TODO(shess): Explore cases where it would make sense to set *pzErr. */ 2056 static int recoverInit( 2057 sqlite3 *db, /* Database connection */ 2058 void *pAux, /* unused */ 2059 int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ 2060 sqlite3_vtab **ppVtab, /* OUT: New virtual table */ 2061 char **pzErr /* OUT: Error message, if any */ 2062 ){ 2063 const unsigned kTypeCol = 4; /* First argument with column type info. */ 2064 Recover *pRecover; /* Virtual table structure being created. */ 2065 char *zDot; /* Any dot found in "db.table" backing. */ 2066 u32 iRootPage; /* Root page of backing table. */ 2067 char *zCreateSql; /* Schema of created virtual table. */ 2068 int rc; 2069 2070 /* Require to be in the temp database. */ 2071 if( ascii_strcasecmp(argv[1], "temp")!=0 ){ 2072 *pzErr = sqlite3_mprintf("recover table must be in temp database"); 2073 return SQLITE_MISUSE; 2074 } 2075 2076 /* Need the backing table and at least one column. */ 2077 if( argc<=kTypeCol ){ 2078 *pzErr = sqlite3_mprintf("no columns specified"); 2079 return SQLITE_MISUSE; 2080 } 2081 2082 pRecover = sqlite3_malloc(sizeof(Recover)); 2083 if( !pRecover ){ 2084 return SQLITE_NOMEM; 2085 } 2086 memset(pRecover, 0, sizeof(*pRecover)); 2087 pRecover->base.pModule = &recoverModule; 2088 pRecover->db = db; 2089 2090 /* Parse out db.table, assuming main if no dot. */ 2091 zDot = strchr(argv[3], '.'); 2092 if( !zDot ){ 2093 pRecover->zDb = sqlite3_strdup(db->aDb[0].zName); 2094 pRecover->zTable = sqlite3_strdup(argv[3]); 2095 }else if( zDot>argv[3] && zDot[1]!='\0' ){ 2096 pRecover->zDb = sqlite3_strndup(argv[3], zDot - argv[3]); 2097 pRecover->zTable = sqlite3_strdup(zDot + 1); 2098 }else{ 2099 /* ".table" or "db." not allowed. */ 2100 *pzErr = sqlite3_mprintf("ill-formed table specifier"); 2101 recoverRelease(pRecover); 2102 return SQLITE_ERROR; 2103 } 2104 2105 pRecover->nCols = argc - kTypeCol; 2106 pRecover->pTypes = sqlite3_malloc(pRecover->nCols); 2107 if( !pRecover->zDb || !pRecover->zTable || !pRecover->pTypes ){ 2108 recoverRelease(pRecover); 2109 return SQLITE_NOMEM; 2110 } 2111 2112 /* Require the backing table to exist. */ 2113 /* TODO(shess): Be more pedantic about the form of the descriptor 2114 * string. This already fails for poorly-formed strings, simply 2115 * because there won't be a root page, but it would make more sense 2116 * to be explicit. 2117 */ 2118 rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, &iRootPage); 2119 if( rc!=SQLITE_OK ){ 2120 *pzErr = sqlite3_mprintf("unable to find backing table"); 2121 recoverRelease(pRecover); 2122 return rc; 2123 } 2124 2125 /* Parse the column definitions. */ 2126 rc = ParseColumnsAndGenerateCreate(pRecover->nCols, argv + kTypeCol, 2127 &zCreateSql, pRecover->pTypes, pzErr); 2128 if( rc!=SQLITE_OK ){ 2129 recoverRelease(pRecover); 2130 return rc; 2131 } 2132 2133 rc = sqlite3_declare_vtab(db, zCreateSql); 2134 sqlite3_free(zCreateSql); 2135 if( rc!=SQLITE_OK ){ 2136 recoverRelease(pRecover); 2137 return rc; 2138 } 2139 2140 *ppVtab = (sqlite3_vtab *)pRecover; 2141 return SQLITE_OK; 2142 } 2143