Home | History | Annotate | Download | only in sqlite
      1 Add new virtual table 'recover' to src/ and the amalgamation.
      2 
      3 Since recover.c is in somewhat active development, it is possible that
      4 the patch below will not reliably re-create the file.
      5 
      6 shess (a] chromium.org
      7 
      8 Generated with:
      9 git diff --cached --relative=third_party/sqlite/src --src-prefix='' --dst-prefix='' > third_party/sqlite/recover.patch
     10 [--cached because otherwise the diff adding recover.c wasn't generated.]
     11 
     12 diff --git Makefile.in Makefile.in
     13 index f3239f3..216742c 100644
     14 --- Makefile.in
     15 +++ Makefile.in
     16 @@ -251,6 +251,7 @@ SRC = \
     17    $(TOP)/src/prepare.c \
     18    $(TOP)/src/printf.c \
     19    $(TOP)/src/random.c \
     20 +  $(TOP)/src/recover.c \
     21    $(TOP)/src/resolve.c \
     22    $(TOP)/src/rowset.c \
     23    $(TOP)/src/select.c \
     24 diff --git src/sqlite.h.in src/sqlite.h.in
     25 index 62b9326..fb76659 100644
     26 --- src/sqlite.h.in
     27 +++ src/sqlite.h.in
     28 @@ -6403,6 +6403,17 @@ int sqlite3_wal_checkpoint_v2(
     29  #define SQLITE_CHECKPOINT_RESTART 2
     30  
     31  
     32 +/* Begin recover.patch for Chromium */
     33 +/*
     34 +** Call to initialize the recover virtual-table modules (see recover.c).
     35 +**
     36 +** This could be loaded by default in main.c, but that would make the
     37 +** virtual table available to Web SQL.  Breaking it out allows only
     38 +** selected users to enable it (currently sql/recovery.cc).
     39 +*/
     40 +int recoverVtableInit(sqlite3 *db);
     41 +/* End recover.patch for Chromium */
     42 +
     43  /*
     44  ** Undo the hack that converts floating point types to integer for
     45  ** builds on processors without floating point support.
     46 diff --git tool/mksqlite3c.tcl tool/mksqlite3c.tcl
     47 index fa99f2d..df2df07 100644
     48 --- tool/mksqlite3c.tcl
     49 +++ tool/mksqlite3c.tcl
     50 @@ -293,6 +293,8 @@ foreach file {
     51     main.c
     52     notify.c
     53  
     54 +   recover.c
     55 +
     56     fts3.c
     57     fts3_aux.c
     58     fts3_expr.c
     59 diff --git src/recover.c src/recover.c
     60 new file mode 100644
     61 index 0000000..6430c8b
     62 --- /dev/null
     63 +++ src/recover.c
     64 @@ -0,0 +1,2130 @@
     65 +/*
     66 +** 2012 Jan 11
     67 +**
     68 +** The author disclaims copyright to this source code.  In place of
     69 +** a legal notice, here is a blessing:
     70 +**
     71 +**    May you do good and not evil.
     72 +**    May you find forgiveness for yourself and forgive others.
     73 +**    May you share freely, never taking more than you give.
     74 +*/
     75 +/* TODO(shess): THIS MODULE IS STILL EXPERIMENTAL.  DO NOT USE IT. */
     76 +/* Implements a virtual table "recover" which can be used to recover
     77 + * data from a corrupt table.  The table is walked manually, with
     78 + * corrupt items skipped.  Additionally, any errors while reading will
     79 + * be skipped.
     80 + *
     81 + * Given a table with this definition:
     82 + *
     83 + * CREATE TABLE Stuff (
     84 + *   name TEXT PRIMARY KEY,
     85 + *   value TEXT NOT NULL
     86 + * );
     87 + *
     88 + * to recover the data from teh table, you could do something like:
     89 + *
     90 + * -- Attach another database, the original is not trustworthy.
     91 + * ATTACH DATABASE '/tmp/db.db' AS rdb;
     92 + * -- Create a new version of the table.
     93 + * CREATE TABLE rdb.Stuff (
     94 + *   name TEXT PRIMARY KEY,
     95 + *   value TEXT NOT NULL
     96 + * );
     97 + * -- This will read the original table's data.
     98 + * CREATE VIRTUAL TABLE temp.recover_Stuff using recover(
     99 + *   main.Stuff,
    100 + *   name TEXT STRICT NOT NULL,  -- only real TEXT data allowed
    101 + *   value TEXT STRICT NOT NULL
    102 + * );
    103 + * -- Corruption means the UNIQUE constraint may no longer hold for
    104 + * -- Stuff, so either OR REPLACE or OR IGNORE must be used.
    105 + * INSERT OR REPLACE INTO rdb.Stuff (rowid, name, value )
    106 + *   SELECT rowid, name, value FROM temp.recover_Stuff;
    107 + * DROP TABLE temp.recover_Stuff;
    108 + * DETACH DATABASE rdb;
    109 + * -- Move db.db to replace original db in filesystem.
    110 + *
    111 + *
    112 + * Usage
    113 + *
    114 + * Given the goal of dealing with corruption, it would not be safe to
    115 + * create a recovery table in the database being recovered.  So
    116 + * recovery tables must be created in the temp database.  They are not
    117 + * appropriate to persist, in any case.  [As a bonus, sqlite_master
    118 + * tables can be recovered.  Perhaps more cute than useful, though.]
    119 + *
    120 + * The parameters are a specifier for the table to read, and a column
    121 + * definition for each bit of data stored in that table.  The named
    122 + * table must be convertable to a root page number by reading the
    123 + * sqlite_master table.  Bare table names are assumed to be in
    124 + * database 0 ("main"), other databases can be specified in db.table
    125 + * fashion.
    126 + *
    127 + * Column definitions are similar to BUT NOT THE SAME AS those
    128 + * provided to CREATE statements:
    129 + *  column-def: column-name [type-name [STRICT] [NOT NULL]]
    130 + *  type-name: (ANY|ROWID|INTEGER|FLOAT|NUMERIC|TEXT|BLOB)
    131 + *
    132 + * Only those exact type names are accepted, there is no type
    133 + * intuition.  The only constraints accepted are STRICT (see below)
    134 + * and NOT NULL.  Anything unexpected will cause the create to fail.
    135 + *
    136 + * ANY is a convenience to indicate that manifest typing is desired.
    137 + * It is equivalent to not specifying a type at all.  The results for
    138 + * such columns will have the type of the data's storage.  The exposed
    139 + * schema will contain no type for that column.
    140 + *
    141 + * ROWID is used for columns representing aliases to the rowid
    142 + * (INTEGER PRIMARY KEY, with or without AUTOINCREMENT), to make the
    143 + * concept explicit.  Such columns are actually stored as NULL, so
    144 + * they cannot be simply ignored.  The exposed schema will be INTEGER
    145 + * for that column.
    146 + *
    147 + * NOT NULL causes rows with a NULL in that column to be skipped.  It
    148 + * also adds NOT NULL to the column in the exposed schema.  If the
    149 + * table has ever had columns added using ALTER TABLE, then those
    150 + * columns implicitly contain NULL for rows which have not been
    151 + * updated.  [Workaround using COALESCE() in your SELECT statement.]
    152 + *
    153 + * The created table is read-only, with no indices.  Any SELECT will
    154 + * be a full-table scan, returning each valid row read from the
    155 + * storage of the backing table.  The rowid will be the rowid of the
    156 + * row from the backing table.  "Valid" means:
    157 + * - The cell metadata for the row is well-formed.  Mainly this means that
    158 + *   the cell header info describes a payload of the size indicated by
    159 + *   the cell's payload size.
    160 + * - The cell does not run off the page.
    161 + * - The cell does not overlap any other cell on the page.
    162 + * - The cell contains doesn't contain too many columns.
    163 + * - The types of the serialized data match the indicated types (see below).
    164 + *
    165 + *
    166 + * Type affinity versus type storage.
    167 + *
    168 + * http://www.sqlite.org/datatype3.html describes SQLite's type
    169 + * affinity system.  The system provides for automated coercion of
    170 + * types in certain cases, transparently enough that many developers
    171 + * do not realize that it is happening.  Importantly, it implies that
    172 + * the raw data stored in the database may not have the obvious type.
    173 + *
    174 + * Differences between the stored data types and the expected data
    175 + * types may be a signal of corruption.  This module makes some
    176 + * allowances for automatic coercion.  It is important to be concious
    177 + * of the difference between the schema exposed by the module, and the
    178 + * data types read from storage.  The following table describes how
    179 + * the module interprets things:
    180 + *
    181 + * type     schema   data                     STRICT
    182 + * ----     ------   ----                     ------
    183 + * ANY      <none>   any                      any
    184 + * ROWID    INTEGER  n/a                      n/a
    185 + * INTEGER  INTEGER  integer                  integer
    186 + * FLOAT    FLOAT    integer or float         float
    187 + * NUMERIC  NUMERIC  integer, float, or text  integer or float
    188 + * TEXT     TEXT     text or blob             text
    189 + * BLOB     BLOB     blob                     blob
    190 + *
    191 + * type is the type provided to the recover module, schema is the
    192 + * schema exposed by the module, data is the acceptable types of data
    193 + * decoded from storage, and STRICT is a modification of that.
    194 + *
    195 + * A very loose recovery system might use ANY for all columns, then
    196 + * use the appropriate sqlite3_column_*() calls to coerce to expected
    197 + * types.  This doesn't provide much protection if a page from a
    198 + * different table with the same column count is linked into an
    199 + * inappropriate btree.
    200 + *
    201 + * A very tight recovery system might use STRICT to enforce typing on
    202 + * all columns, preferring to skip rows which are valid at the storage
    203 + * level but don't contain the right types.  Note that FLOAT STRICT is
    204 + * almost certainly not appropriate, since integral values are
    205 + * transparently stored as integers, when that is more efficient.
    206 + *
    207 + * Another option is to use ANY for all columns and inspect each
    208 + * result manually (using sqlite3_column_*).  This should only be
    209 + * necessary in cases where developers have used manifest typing (test
    210 + * to make sure before you decide that you aren't using manifest
    211 + * typing!).
    212 + *
    213 + *
    214 + * Caveats
    215 + *
    216 + * Leaf pages not referenced by interior nodes will not be found.
    217 + *
    218 + * Leaf pages referenced from interior nodes of other tables will not
    219 + * be resolved.
    220 + *
    221 + * Rows referencing invalid overflow pages will be skipped.
    222 + *
    223 + * SQlite rows have a header which describes how to interpret the rest
    224 + * of the payload.  The header can be valid in cases where the rest of
    225 + * the record is actually corrupt (in the sense that the data is not
    226 + * the intended data).  This can especially happen WRT overflow pages,
    227 + * as lack of atomic updates between pages is the primary form of
    228 + * corruption I have seen in the wild.
    229 + */
    230 +/* The implementation is via a series of cursors.  The cursor
    231 + * implementations follow the pattern:
    232 + *
    233 + * // Creates the cursor using various initialization info.
    234 + * int cursorCreate(...);
    235 + *
    236 + * // Returns 1 if there is no more data, 0 otherwise.
    237 + * int cursorEOF(Cursor *pCursor);
    238 + *
    239 + * // Various accessors can be used if not at EOF.
    240 + *
    241 + * // Move to the next item.
    242 + * int cursorNext(Cursor *pCursor);
    243 + *
    244 + * // Destroy the memory associated with the cursor.
    245 + * void cursorDestroy(Cursor *pCursor);
    246 + *
    247 + * References in the following are to sections at
    248 + * http://www.sqlite.org/fileformat2.html .
    249 + *
    250 + * RecoverLeafCursor iterates the records in a leaf table node
    251 + * described in section 1.5 "B-tree Pages".  When the node is
    252 + * exhausted, an interior cursor is used to get the next leaf node,
    253 + * and iteration continues there.
    254 + *
    255 + * RecoverInteriorCursor iterates the child pages in an interior table
    256 + * node described in section 1.5 "B-tree Pages".  When the node is
    257 + * exhausted, a parent interior cursor is used to get the next
    258 + * interior node at the same level, and iteration continues there.
    259 + *
    260 + * Together these record the path from the leaf level to the root of
    261 + * the tree.  Iteration happens from the leaves rather than the root
    262 + * both for efficiency and putting the special case at the front of
    263 + * the list is easier to implement.
    264 + *
    265 + * RecoverCursor uses a RecoverLeafCursor to iterate the rows of a
    266 + * table, returning results via the SQLite virtual table interface.
    267 + */
    268 +/* TODO(shess): It might be useful to allow DEFAULT in types to
    269 + * specify what to do for NULL when an ALTER TABLE case comes up.
    270 + * Unfortunately, simply adding it to the exposed schema and using
    271 + * sqlite3_result_null() does not cause the default to be generate.
    272 + * Handling it ourselves seems hard, unfortunately.
    273 + */
    274 +
    275 +#include <assert.h>
    276 +#include <ctype.h>
    277 +#include <stdio.h>
    278 +#include <string.h>
    279 +
    280 +/* Internal SQLite things that are used:
    281 + * u32, u64, i64 types.
    282 + * Btree, Pager, and DbPage structs.
    283 + * DbPage.pData, .pPager, and .pgno
    284 + * sqlite3 struct.
    285 + * sqlite3BtreePager() and sqlite3BtreeGetPageSize()
    286 + * sqlite3PagerAcquire() and sqlite3PagerUnref()
    287 + * getVarint().
    288 + */
    289 +#include "sqliteInt.h"
    290 +
    291 +/* For debugging. */
    292 +#if 0
    293 +#define FNENTRY() fprintf(stderr, "In %s\n", __FUNCTION__)
    294 +#else
    295 +#define FNENTRY()
    296 +#endif
    297 +
    298 +/* Generic constants and helper functions. */
    299 +
    300 +static const unsigned char kTableLeafPage = 0x0D;
    301 +static const unsigned char kTableInteriorPage = 0x05;
    302 +
    303 +/* From section 1.5. */
    304 +static const unsigned kiPageTypeOffset = 0;
    305 +static const unsigned kiPageFreeBlockOffset = 1;
    306 +static const unsigned kiPageCellCountOffset = 3;
    307 +static const unsigned kiPageCellContentOffset = 5;
    308 +static const unsigned kiPageFragmentedBytesOffset = 7;
    309 +static const unsigned knPageLeafHeaderBytes = 8;
    310 +/* Interior pages contain an additional field. */
    311 +static const unsigned kiPageRightChildOffset = 8;
    312 +static const unsigned kiPageInteriorHeaderBytes = 12;
    313 +
    314 +/* Accepted types are specified by a mask. */
    315 +#define MASK_ROWID (1<<0)
    316 +#define MASK_INTEGER (1<<1)
    317 +#define MASK_FLOAT (1<<2)
    318 +#define MASK_TEXT (1<<3)
    319 +#define MASK_BLOB (1<<4)
    320 +#define MASK_NULL (1<<5)
    321 +
    322 +/* Helpers to decode fixed-size fields. */
    323 +static u32 decodeUnsigned16(const unsigned char *pData){
    324 +  return (pData[0]<<8) + pData[1];
    325 +}
    326 +static u32 decodeUnsigned32(const unsigned char *pData){
    327 +  return (decodeUnsigned16(pData)<<16) + decodeUnsigned16(pData+2);
    328 +}
    329 +static i64 decodeSigned(const unsigned char *pData, unsigned nBytes){
    330 +  i64 r = (char)(*pData);
    331 +  while( --nBytes ){
    332 +    r <<= 8;
    333 +    r += *(++pData);
    334 +  }
    335 +  return r;
    336 +}
    337 +/* Derived from vdbeaux.c, sqlite3VdbeSerialGet(), case 7. */
    338 +/* TODO(shess): Determine if swapMixedEndianFloat() applies. */
    339 +static double decodeFloat64(const unsigned char *pData){
    340 +#if !defined(NDEBUG)
    341 +  static const u64 t1 = ((u64)0x3ff00000)<<32;
    342 +  static const double r1 = 1.0;
    343 +  u64 t2 = t1;
    344 +  assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );
    345 +#endif
    346 +  i64 x = decodeSigned(pData, 8);
    347 +  double d;
    348 +  memcpy(&d, &x, sizeof(x));
    349 +  return d;
    350 +}
    351 +
    352 +/* Return true if a varint can safely be read from pData/nData. */
    353 +/* TODO(shess): DbPage points into the middle of a buffer which
    354 + * contains the page data before DbPage.  So code should always be
    355 + * able to read a small number of varints safely.  Consider whether to
    356 + * trust that or not.
    357 + */
    358 +static int checkVarint(const unsigned char *pData, unsigned nData){
    359 +  unsigned i;
    360 +
    361 +  /* In the worst case the decoder takes all 8 bits of the 9th byte. */
    362 +  if( nData>=9 ){
    363 +    return 1;
    364 +  }
    365 +
    366 +  /* Look for a high-bit-clear byte in what's left. */
    367 +  for( i=0; i<nData; ++i ){
    368 +    if( !(pData[i]&0x80) ){
    369 +      return 1;
    370 +    }
    371 +  }
    372 +
    373 +  /* Cannot decode in the space given. */
    374 +  return 0;
    375 +}
    376 +
    377 +/* Return 1 if n varints can be read from pData/nData. */
    378 +static int checkVarints(const unsigned char *pData, unsigned nData,
    379 +                        unsigned n){
    380 +  unsigned nCur = 0;   /* Byte offset within current varint. */
    381 +  unsigned nFound = 0; /* Number of varints found. */
    382 +  unsigned i;
    383 +
    384 +  /* In the worst case the decoder takes all 8 bits of the 9th byte. */
    385 +  if( nData>=9*n ){
    386 +    return 1;
    387 +  }
    388 +
    389 +  for( i=0; nFound<n && i<nData; ++i ){
    390 +    nCur++;
    391 +    if( nCur==9 || !(pData[i]&0x80) ){
    392 +      nFound++;
    393 +      nCur = 0;
    394 +    }
    395 +  }
    396 +
    397 +  return nFound==n;
    398 +}
    399 +
    400 +/* ctype and str[n]casecmp() can be affected by locale (eg, tr_TR).
    401 + * These versions consider only the ASCII space.
    402 + */
    403 +/* TODO(shess): It may be reasonable to just remove the need for these
    404 + * entirely.  The module could require "TEXT STRICT NOT NULL", not
    405 + * "Text Strict Not Null" or whatever the developer felt like typing
    406 + * that day.  Handling corrupt data is a PERFECT place to be pedantic.
    407 + */
    408 +static int ascii_isspace(char c){
    409 +  /* From fts3_expr.c */
    410 +  return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
    411 +}
    412 +static int ascii_isalnum(int x){
    413 +  /* From fts3_tokenizer1.c */
    414 +  return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z');
    415 +}
    416 +static int ascii_tolower(int x){
    417 +  /* From fts3_tokenizer1.c */
    418 +  return (x>='A' && x<='Z') ? x-'A'+'a' : x;
    419 +}
    420 +/* TODO(shess): Consider sqlite3_strnicmp() */
    421 +static int ascii_strncasecmp(const char *s1, const char *s2, size_t n){
    422 +  const unsigned char *us1 = (const unsigned char *)s1;
    423 +  const unsigned char *us2 = (const unsigned char *)s2;
    424 +  while( *us1 && *us2 && n && ascii_tolower(*us1)==ascii_tolower(*us2) ){
    425 +    us1++, us2++, n--;
    426 +  }
    427 +  return n ? ascii_tolower(*us1)-ascii_tolower(*us2) : 0;
    428 +}
    429 +static int ascii_strcasecmp(const char *s1, const char *s2){
    430 +  /* If s2 is equal through strlen(s1), will exit while() due to s1's
    431 +   * trailing NUL, and return NUL-s2[strlen(s1)].
    432 +   */
    433 +  return ascii_strncasecmp(s1, s2, strlen(s1)+1);
    434 +}
    435 +
    436 +/* For some reason I kept making mistakes with offset calculations. */
    437 +static const unsigned char *PageData(DbPage *pPage, unsigned iOffset){
    438 +  assert( iOffset<=pPage->nPageSize );
    439 +  return (unsigned char *)pPage->pData + iOffset;
    440 +}
    441 +
    442 +/* The first page in the file contains a file header in the first 100
    443 + * bytes.  The page's header information comes after that.  Note that
    444 + * the offsets in the page's header information are relative to the
    445 + * beginning of the page, NOT the end of the page header.
    446 + */
    447 +static const unsigned char *PageHeader(DbPage *pPage){
    448 +  if( pPage->pgno==1 ){
    449 +    const unsigned nDatabaseHeader = 100;
    450 +    return PageData(pPage, nDatabaseHeader);
    451 +  }else{
    452 +    return PageData(pPage, 0);
    453 +  }
    454 +}
    455 +
    456 +/* Helper to fetch the pager and page size for the named database. */
    457 +static int GetPager(sqlite3 *db, const char *zName,
    458 +                    Pager **pPager, unsigned *pnPageSize){
    459 +  Btree *pBt = NULL;
    460 +  int i;
    461 +  for( i=0; i<db->nDb; ++i ){
    462 +    if( ascii_strcasecmp(db->aDb[i].zName, zName)==0 ){
    463 +      pBt = db->aDb[i].pBt;
    464 +      break;
    465 +    }
    466 +  }
    467 +  if( !pBt ){
    468 +    return SQLITE_ERROR;
    469 +  }
    470 +
    471 +  *pPager = sqlite3BtreePager(pBt);
    472 +  *pnPageSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt);
    473 +  return SQLITE_OK;
    474 +}
    475 +
    476 +/* iSerialType is a type read from a record header.  See "2.1 Record Format".
    477 + */
    478 +
    479 +/* Storage size of iSerialType in bytes.  My interpretation of SQLite
    480 + * documentation is that text and blob fields can have 32-bit length.
    481 + * Values past 2^31-12 will need more than 32 bits to encode, which is
    482 + * why iSerialType is u64.
    483 + */
    484 +static u32 SerialTypeLength(u64 iSerialType){
    485 +  switch( iSerialType ){
    486 +    case 0 : return 0;  /* NULL */
    487 +    case 1 : return 1;  /* Various integers. */
    488 +    case 2 : return 2;
    489 +    case 3 : return 3;
    490 +    case 4 : return 4;
    491 +    case 5 : return 6;
    492 +    case 6 : return 8;
    493 +    case 7 : return 8;  /* 64-bit float. */
    494 +    case 8 : return 0;  /* Constant 0. */
    495 +    case 9 : return 0;  /* Constant 1. */
    496 +    case 10 : case 11 : assert( !"RESERVED TYPE"); return 0;
    497 +  }
    498 +  return (u32)((iSerialType>>1) - 6);
    499 +}
    500 +
    501 +/* True if iSerialType refers to a blob. */
    502 +static int SerialTypeIsBlob(u64 iSerialType){
    503 +  assert( iSerialType>=12 );
    504 +  return (iSerialType%2)==0;
    505 +}
    506 +
    507 +/* Returns true if the serialized type represented by iSerialType is
    508 + * compatible with the given type mask.
    509 + */
    510 +static int SerialTypeIsCompatible(u64 iSerialType, unsigned char mask){
    511 +  switch( iSerialType ){
    512 +    case 0  : return (mask&MASK_NULL)!=0;
    513 +    case 1  : return (mask&MASK_INTEGER)!=0;
    514 +    case 2  : return (mask&MASK_INTEGER)!=0;
    515 +    case 3  : return (mask&MASK_INTEGER)!=0;
    516 +    case 4  : return (mask&MASK_INTEGER)!=0;
    517 +    case 5  : return (mask&MASK_INTEGER)!=0;
    518 +    case 6  : return (mask&MASK_INTEGER)!=0;
    519 +    case 7  : return (mask&MASK_FLOAT)!=0;
    520 +    case 8  : return (mask&MASK_INTEGER)!=0;
    521 +    case 9  : return (mask&MASK_INTEGER)!=0;
    522 +    case 10 : assert( !"RESERVED TYPE"); return 0;
    523 +    case 11 : assert( !"RESERVED TYPE"); return 0;
    524 +  }
    525 +  return (mask&(SerialTypeIsBlob(iSerialType) ? MASK_BLOB : MASK_TEXT));
    526 +}
    527 +
    528 +/* Versions of strdup() with return values appropriate for
    529 + * sqlite3_free().  malloc.c has sqlite3DbStrDup()/NDup(), but those
    530 + * need sqlite3DbFree(), which seems intrusive.
    531 + */
    532 +static char *sqlite3_strndup(const char *z, unsigned n){
    533 +  char *zNew;
    534 +
    535 +  if( z==NULL ){
    536 +    return NULL;
    537 +  }
    538 +
    539 +  zNew = sqlite3_malloc(n+1);
    540 +  if( zNew!=NULL ){
    541 +    memcpy(zNew, z, n);
    542 +    zNew[n] = '\0';
    543 +  }
    544 +  return zNew;
    545 +}
    546 +static char *sqlite3_strdup(const char *z){
    547 +  if( z==NULL ){
    548 +    return NULL;
    549 +  }
    550 +  return sqlite3_strndup(z, strlen(z));
    551 +}
    552 +
    553 +/* Fetch the page number of zTable in zDb from sqlite_master in zDb,
    554 + * and put it in *piRootPage.
    555 + */
    556 +static int getRootPage(sqlite3 *db, const char *zDb, const char *zTable,
    557 +                       u32 *piRootPage){
    558 +  char *zSql;  /* SQL selecting root page of named element. */
    559 +  sqlite3_stmt *pStmt;
    560 +  int rc;
    561 +
    562 +  if( strcmp(zTable, "sqlite_master")==0 ){
    563 +    *piRootPage = 1;
    564 +    return SQLITE_OK;
    565 +  }
    566 +
    567 +  zSql = sqlite3_mprintf("SELECT rootpage FROM %s.sqlite_master "
    568 +                         "WHERE type = 'table' AND tbl_name = %Q",
    569 +                         zDb, zTable);
    570 +  if( !zSql ){
    571 +    return SQLITE_NOMEM;
    572 +  }
    573 +
    574 +  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
    575 +  sqlite3_free(zSql);
    576 +  if( rc!=SQLITE_OK ){
    577 +    return rc;
    578 +  }
    579 +
    580 +  /* Require a result. */
    581 +  rc = sqlite3_step(pStmt);
    582 +  if( rc==SQLITE_DONE ){
    583 +    rc = SQLITE_CORRUPT;
    584 +  }else if( rc==SQLITE_ROW ){
    585 +    *piRootPage = sqlite3_column_int(pStmt, 0);
    586 +
    587 +    /* Require only one result. */
    588 +    rc = sqlite3_step(pStmt);
    589 +    if( rc==SQLITE_DONE ){
    590 +      rc = SQLITE_OK;
    591 +    }else if( rc==SQLITE_ROW ){
    592 +      rc = SQLITE_CORRUPT;
    593 +    }
    594 +  }
    595 +  sqlite3_finalize(pStmt);
    596 +  return rc;
    597 +}
    598 +
    599 +static int getEncoding(sqlite3 *db, const char *zDb, int* piEncoding){
    600 +  sqlite3_stmt *pStmt;
    601 +  int rc;
    602 +  char *zSql = sqlite3_mprintf("PRAGMA %s.encoding", zDb);
    603 +  if( !zSql ){
    604 +    return SQLITE_NOMEM;
    605 +  }
    606 +
    607 +  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
    608 +  sqlite3_free(zSql);
    609 +  if( rc!=SQLITE_OK ){
    610 +    return rc;
    611 +  }
    612 +
    613 +  /* Require a result. */
    614 +  rc = sqlite3_step(pStmt);
    615 +  if( rc==SQLITE_DONE ){
    616 +    /* This case should not be possible. */
    617 +    rc = SQLITE_CORRUPT;
    618 +  }else if( rc==SQLITE_ROW ){
    619 +    if( sqlite3_column_type(pStmt, 0)==SQLITE_TEXT ){
    620 +      const char* z = (const char *)sqlite3_column_text(pStmt, 0);
    621 +      /* These strings match the literals in pragma.c. */
    622 +      if( !strcmp(z, "UTF-16le") ){
    623 +        *piEncoding = SQLITE_UTF16LE;
    624 +      }else if( !strcmp(z, "UTF-16be") ){
    625 +        *piEncoding = SQLITE_UTF16BE;
    626 +      }else if( !strcmp(z, "UTF-8") ){
    627 +        *piEncoding = SQLITE_UTF8;
    628 +      }else{
    629 +        /* This case should not be possible. */
    630 +        *piEncoding = SQLITE_UTF8;
    631 +      }
    632 +    }else{
    633 +      /* This case should not be possible. */
    634 +      *piEncoding = SQLITE_UTF8;
    635 +    }
    636 +
    637 +    /* Require only one result. */
    638 +    rc = sqlite3_step(pStmt);
    639 +    if( rc==SQLITE_DONE ){
    640 +      rc = SQLITE_OK;
    641 +    }else if( rc==SQLITE_ROW ){
    642 +      /* This case should not be possible. */
    643 +      rc = SQLITE_CORRUPT;
    644 +    }
    645 +  }
    646 +  sqlite3_finalize(pStmt);
    647 +  return rc;
    648 +}
    649 +
    650 +/* Cursor for iterating interior nodes.  Interior page cells contain a
    651 + * child page number and a rowid.  The child page contains items left
    652 + * of the rowid (less than).  The rightmost page of the subtree is
    653 + * stored in the page header.
    654 + *
    655 + * interiorCursorDestroy - release all resources associated with the
    656 + *                         cursor and any parent cursors.
    657 + * interiorCursorCreate - create a cursor with the given parent and page.
    658 + * interiorCursorEOF - returns true if neither the cursor nor the
    659 + *                     parent cursors can return any more data.
    660 + * interiorCursorNextPage - fetch the next child page from the cursor.
    661 + *
    662 + * Logically, interiorCursorNextPage() returns the next child page
    663 + * number from the page the cursor is currently reading, calling the
    664 + * parent cursor as necessary to get new pages to read, until done.
    665 + * SQLITE_ROW if a page is returned, SQLITE_DONE if out of pages,
    666 + * error otherwise.  Unfortunately, if the table is corrupted
    667 + * unexpected pages can be returned.  If any unexpected page is found,
    668 + * leaf or otherwise, it is returned to the caller for processing,
    669 + * with the interior cursor left empty.  The next call to
    670 + * interiorCursorNextPage() will recurse to the parent cursor until an
    671 + * interior page to iterate is returned.
    672 + *
    673 + * Note that while interiorCursorNextPage() will refuse to follow
    674 + * loops, it does not keep track of pages returned for purposes of
    675 + * preventing duplication.
    676 + *
    677 + * Note that interiorCursorEOF() could return false (not at EOF), and
    678 + * interiorCursorNextPage() could still return SQLITE_DONE.  This
    679 + * could happen if there are more cells to iterate in an interior
    680 + * page, but those cells refer to invalid pages.
    681 + */
    682 +typedef struct RecoverInteriorCursor RecoverInteriorCursor;
    683 +struct RecoverInteriorCursor {
    684 +  RecoverInteriorCursor *pParent; /* Parent node to this node. */
    685 +  DbPage *pPage;                  /* Reference to leaf page. */
    686 +  unsigned nPageSize;             /* Size of page. */
    687 +  unsigned nChildren;             /* Number of children on the page. */
    688 +  unsigned iChild;                /* Index of next child to return. */
    689 +};
    690 +
    691 +static void interiorCursorDestroy(RecoverInteriorCursor *pCursor){
    692 +  /* Destroy all the cursors to the root. */
    693 +  while( pCursor ){
    694 +    RecoverInteriorCursor *p = pCursor;
    695 +    pCursor = pCursor->pParent;
    696 +
    697 +    if( p->pPage ){
    698 +      sqlite3PagerUnref(p->pPage);
    699 +      p->pPage = NULL;
    700 +    }
    701 +
    702 +    memset(p, 0xA5, sizeof(*p));
    703 +    sqlite3_free(p);
    704 +  }
    705 +}
    706 +
    707 +/* Internal helper.  Reset storage in preparation for iterating pPage. */
    708 +static void interiorCursorSetPage(RecoverInteriorCursor *pCursor,
    709 +                                  DbPage *pPage){
    710 +  assert( PageHeader(pPage)[kiPageTypeOffset]==kTableInteriorPage );
    711 +
    712 +  if( pCursor->pPage ){
    713 +    sqlite3PagerUnref(pCursor->pPage);
    714 +    pCursor->pPage = NULL;
    715 +  }
    716 +  pCursor->pPage = pPage;
    717 +  pCursor->iChild = 0;
    718 +
    719 +  /* A child for each cell, plus one in the header. */
    720 +  /* TODO(shess): Sanity-check the count?  Page header plus per-cell
    721 +   * cost of 16-bit offset, 32-bit page number, and one varint
    722 +   * (minimum 1 byte).
    723 +   */
    724 +  pCursor->nChildren = decodeUnsigned16(PageHeader(pPage) +
    725 +                                        kiPageCellCountOffset) + 1;
    726 +}
    727 +
    728 +static int interiorCursorCreate(RecoverInteriorCursor *pParent,
    729 +                                DbPage *pPage, int nPageSize,
    730 +                                RecoverInteriorCursor **ppCursor){
    731 +  RecoverInteriorCursor *pCursor =
    732 +    sqlite3_malloc(sizeof(RecoverInteriorCursor));
    733 +  if( !pCursor ){
    734 +    return SQLITE_NOMEM;
    735 +  }
    736 +
    737 +  memset(pCursor, 0, sizeof(*pCursor));
    738 +  pCursor->pParent = pParent;
    739 +  pCursor->nPageSize = nPageSize;
    740 +  interiorCursorSetPage(pCursor, pPage);
    741 +  *ppCursor = pCursor;
    742 +  return SQLITE_OK;
    743 +}
    744 +
    745 +/* Internal helper.  Return the child page number at iChild. */
    746 +static unsigned interiorCursorChildPage(RecoverInteriorCursor *pCursor){
    747 +  const unsigned char *pPageHeader;  /* Header of the current page. */
    748 +  const unsigned char *pCellOffsets; /* Offset to page's cell offsets. */
    749 +  unsigned iCellOffset;              /* Offset of target cell. */
    750 +
    751 +  assert( pCursor->iChild<pCursor->nChildren );
    752 +
    753 +  /* Rightmost child is in the header. */
    754 +  pPageHeader = PageHeader(pCursor->pPage);
    755 +  if( pCursor->iChild==pCursor->nChildren-1 ){
    756 +    return decodeUnsigned32(pPageHeader + kiPageRightChildOffset);
    757 +  }
    758 +
    759 +  /* Each cell is a 4-byte integer page number and a varint rowid
    760 +   * which is greater than the rowid of items in that sub-tree (this
    761 +   * module ignores ordering). The offset is from the beginning of the
    762 +   * page, not from the page header.
    763 +   */
    764 +  pCellOffsets = pPageHeader + kiPageInteriorHeaderBytes;
    765 +  iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iChild*2);
    766 +  if( iCellOffset<=pCursor->nPageSize-4 ){
    767 +    return decodeUnsigned32(PageData(pCursor->pPage, iCellOffset));
    768 +  }
    769 +
    770 +  /* TODO(shess): Check for cell overlaps?  Cells require 4 bytes plus
    771 +   * a varint.  Check could be identical to leaf check (or even a
    772 +   * shared helper testing for "Cells starting in this range"?).
    773 +   */
    774 +
    775 +  /* If the offset is broken, return an invalid page number. */
    776 +  return 0;
    777 +}
    778 +
    779 +static int interiorCursorEOF(RecoverInteriorCursor *pCursor){
    780 +  /* Find a parent with remaining children.  EOF if none found. */
    781 +  while( pCursor && pCursor->iChild>=pCursor->nChildren ){
    782 +    pCursor = pCursor->pParent;
    783 +  }
    784 +  return pCursor==NULL;
    785 +}
    786 +
    787 +/* Internal helper.  Used to detect if iPage would cause a loop. */
    788 +static int interiorCursorPageInUse(RecoverInteriorCursor *pCursor,
    789 +                                   unsigned iPage){
    790 +  /* Find any parent using the indicated page. */
    791 +  while( pCursor && pCursor->pPage->pgno!=iPage ){
    792 +    pCursor = pCursor->pParent;
    793 +  }
    794 +  return pCursor!=NULL;
    795 +}
    796 +
    797 +/* Get the next page from the interior cursor at *ppCursor.  Returns
    798 + * SQLITE_ROW with the page in *ppPage, or SQLITE_DONE if out of
    799 + * pages, or the error SQLite returned.
    800 + *
    801 + * If the tree is uneven, then when the cursor attempts to get a new
    802 + * interior page from the parent cursor, it may get a non-interior
    803 + * page.  In that case, the new page is returned, and *ppCursor is
    804 + * updated to point to the parent cursor (this cursor is freed).
    805 + */
    806 +/* TODO(shess): I've tried to avoid recursion in most of this code,
    807 + * but this case is more challenging because the recursive call is in
    808 + * the middle of operation.  One option for converting it without
    809 + * adding memory management would be to retain the head pointer and
    810 + * use a helper to "back up" as needed.  Another option would be to
    811 + * reverse the list during traversal.
    812 + */
    813 +static int interiorCursorNextPage(RecoverInteriorCursor **ppCursor,
    814 +                                  DbPage **ppPage){
    815 +  RecoverInteriorCursor *pCursor = *ppCursor;
    816 +  while( 1 ){
    817 +    int rc;
    818 +    const unsigned char *pPageHeader;  /* Header of found page. */
    819 +
    820 +    /* Find a valid child page which isn't on the stack. */
    821 +    while( pCursor->iChild<pCursor->nChildren ){
    822 +      const unsigned iPage = interiorCursorChildPage(pCursor);
    823 +      pCursor->iChild++;
    824 +      if( interiorCursorPageInUse(pCursor, iPage) ){
    825 +        fprintf(stderr, "Loop detected at %d\n", iPage);
    826 +      }else{
    827 +        int rc = sqlite3PagerAcquire(pCursor->pPage->pPager, iPage, ppPage, 0);
    828 +        if( rc==SQLITE_OK ){
    829 +          return SQLITE_ROW;
    830 +        }
    831 +      }
    832 +    }
    833 +
    834 +    /* This page has no more children.  Get next page from parent. */
    835 +    if( !pCursor->pParent ){
    836 +      return SQLITE_DONE;
    837 +    }
    838 +    rc = interiorCursorNextPage(&pCursor->pParent, ppPage);
    839 +    if( rc!=SQLITE_ROW ){
    840 +      return rc;
    841 +    }
    842 +
    843 +    /* If a non-interior page is received, that either means that the
    844 +     * tree is uneven, or that a child was re-used (say as an overflow
    845 +     * page).  Remove this cursor and let the caller handle the page.
    846 +     */
    847 +    pPageHeader = PageHeader(*ppPage);
    848 +    if( pPageHeader[kiPageTypeOffset]!=kTableInteriorPage ){
    849 +      *ppCursor = pCursor->pParent;
    850 +      pCursor->pParent = NULL;
    851 +      interiorCursorDestroy(pCursor);
    852 +      return SQLITE_ROW;
    853 +    }
    854 +
    855 +    /* Iterate the new page. */
    856 +    interiorCursorSetPage(pCursor, *ppPage);
    857 +    *ppPage = NULL;
    858 +  }
    859 +
    860 +  assert(NULL);  /* NOTREACHED() */
    861 +  return SQLITE_CORRUPT;
    862 +}
    863 +
    864 +/* Large rows are spilled to overflow pages.  The row's main page
    865 + * stores the overflow page number after the local payload, with a
    866 + * linked list forward from there as necessary.  overflowMaybeCreate()
    867 + * and overflowGetSegment() provide an abstraction for accessing such
    868 + * data while centralizing the code.
    869 + *
    870 + * overflowDestroy - releases all resources associated with the structure.
    871 + * overflowMaybeCreate - create the overflow structure if it is needed
    872 + *                       to represent the given record.  See function comment.
    873 + * overflowGetSegment - fetch a segment from the record, accounting
    874 + *                      for overflow pages.  Segments which are not
    875 + *                      entirely contained with a page are constructed
    876 + *                      into a buffer which is returned.  See function comment.
    877 + */
    878 +typedef struct RecoverOverflow RecoverOverflow;
    879 +struct RecoverOverflow {
    880 +  RecoverOverflow *pNextOverflow;
    881 +  DbPage *pPage;
    882 +  unsigned nPageSize;
    883 +};
    884 +
    885 +static void overflowDestroy(RecoverOverflow *pOverflow){
    886 +  while( pOverflow ){
    887 +    RecoverOverflow *p = pOverflow;
    888 +    pOverflow = p->pNextOverflow;
    889 +
    890 +    if( p->pPage ){
    891 +      sqlite3PagerUnref(p->pPage);
    892 +      p->pPage = NULL;
    893 +    }
    894 +
    895 +    memset(p, 0xA5, sizeof(*p));
    896 +    sqlite3_free(p);
    897 +  }
    898 +}
    899 +
    900 +/* Internal helper.  Used to detect if iPage would cause a loop. */
    901 +static int overflowPageInUse(RecoverOverflow *pOverflow, unsigned iPage){
    902 +  while( pOverflow && pOverflow->pPage->pgno!=iPage ){
    903 +    pOverflow = pOverflow->pNextOverflow;
    904 +  }
    905 +  return pOverflow!=NULL;
    906 +}
    907 +
    908 +/* Setup to access an nRecordBytes record beginning at iRecordOffset
    909 + * in pPage.  If nRecordBytes can be satisfied entirely from pPage,
    910 + * then no overflow pages are needed an *pnLocalRecordBytes is set to
    911 + * nRecordBytes.  Otherwise, *ppOverflow is set to the head of a list
    912 + * of overflow pages, and *pnLocalRecordBytes is set to the number of
    913 + * bytes local to pPage.
    914 + *
    915 + * overflowGetSegment() will do the right thing regardless of whether
    916 + * those values are set to be in-page or not.
    917 + */
    918 +static int overflowMaybeCreate(DbPage *pPage, unsigned nPageSize,
    919 +                               unsigned iRecordOffset, unsigned nRecordBytes,
    920 +                               unsigned *pnLocalRecordBytes,
    921 +                               RecoverOverflow **ppOverflow){
    922 +  unsigned nLocalRecordBytes;  /* Record bytes in the leaf page. */
    923 +  unsigned iNextPage;          /* Next page number for record data. */
    924 +  unsigned nBytes;             /* Maximum record bytes as of current page. */
    925 +  int rc;
    926 +  RecoverOverflow *pFirstOverflow;  /* First in linked list of pages. */
    927 +  RecoverOverflow *pLastOverflow;   /* End of linked list. */
    928 +
    929 +  /* Calculations from the "Table B-Tree Leaf Cell" part of section
    930 +   * 1.5 of http://www.sqlite.org/fileformat2.html .  maxLocal and
    931 +   * minLocal to match naming in btree.c.
    932 +   */
    933 +  const unsigned maxLocal = nPageSize - 35;
    934 +  const unsigned minLocal = ((nPageSize-12)*32/255)-23;  /* m */
    935 +
    936 +  /* Always fit anything smaller than maxLocal. */
    937 +  if( nRecordBytes<=maxLocal ){
    938 +    *pnLocalRecordBytes = nRecordBytes;
    939 +    *ppOverflow = NULL;
    940 +    return SQLITE_OK;
    941 +  }
    942 +
    943 +  /* Calculate the remainder after accounting for minLocal on the leaf
    944 +   * page and what packs evenly into overflow pages.  If the remainder
    945 +   * does not fit into maxLocal, then a partially-full overflow page
    946 +   * will be required in any case, so store as little as possible locally.
    947 +   */
    948 +  nLocalRecordBytes = minLocal+((nRecordBytes-minLocal)%(nPageSize-4));
    949 +  if( maxLocal<nLocalRecordBytes ){
    950 +    nLocalRecordBytes = minLocal;
    951 +  }
    952 +
    953 +  /* Don't read off the end of the page. */
    954 +  if( iRecordOffset+nLocalRecordBytes+4>nPageSize ){
    955 +    return SQLITE_CORRUPT;
    956 +  }
    957 +
    958 +  /* First overflow page number is after the local bytes. */
    959 +  iNextPage =
    960 +      decodeUnsigned32(PageData(pPage, iRecordOffset + nLocalRecordBytes));
    961 +  nBytes = nLocalRecordBytes;
    962 +
    963 +  /* While there are more pages to read, and more bytes are needed,
    964 +   * get another page.
    965 +   */
    966 +  pFirstOverflow = pLastOverflow = NULL;
    967 +  rc = SQLITE_OK;
    968 +  while( iNextPage && nBytes<nRecordBytes ){
    969 +    RecoverOverflow *pOverflow;  /* New overflow page for the list. */
    970 +
    971 +    rc = sqlite3PagerAcquire(pPage->pPager, iNextPage, &pPage, 0);
    972 +    if( rc!=SQLITE_OK ){
    973 +      break;
    974 +    }
    975 +
    976 +    pOverflow = sqlite3_malloc(sizeof(RecoverOverflow));
    977 +    if( !pOverflow ){
    978 +      sqlite3PagerUnref(pPage);
    979 +      rc = SQLITE_NOMEM;
    980 +      break;
    981 +    }
    982 +    memset(pOverflow, 0, sizeof(*pOverflow));
    983 +    pOverflow->pPage = pPage;
    984 +    pOverflow->nPageSize = nPageSize;
    985 +
    986 +    if( !pFirstOverflow ){
    987 +      pFirstOverflow = pOverflow;
    988 +    }else{
    989 +      pLastOverflow->pNextOverflow = pOverflow;
    990 +    }
    991 +    pLastOverflow = pOverflow;
    992 +
    993 +    iNextPage = decodeUnsigned32(pPage->pData);
    994 +    nBytes += nPageSize-4;
    995 +
    996 +    /* Avoid loops. */
    997 +    if( overflowPageInUse(pFirstOverflow, iNextPage) ){
    998 +      fprintf(stderr, "Overflow loop detected at %d\n", iNextPage);
    999 +      rc = SQLITE_CORRUPT;
   1000 +      break;
   1001 +    }
   1002 +  }
   1003 +
   1004 +  /* If there were not enough pages, or too many, things are corrupt.
   1005 +   * Not having enough pages is an obvious problem, all the data
   1006 +   * cannot be read.  Too many pages means that the contents of the
   1007 +   * row between the main page and the overflow page(s) is
   1008 +   * inconsistent (most likely one or more of the overflow pages does
   1009 +   * not really belong to this row).
   1010 +   */
   1011 +  if( rc==SQLITE_OK && (nBytes<nRecordBytes || iNextPage) ){
   1012 +    rc = SQLITE_CORRUPT;
   1013 +  }
   1014 +
   1015 +  if( rc==SQLITE_OK ){
   1016 +    *ppOverflow = pFirstOverflow;
   1017 +    *pnLocalRecordBytes = nLocalRecordBytes;
   1018 +  }else if( pFirstOverflow ){
   1019 +    overflowDestroy(pFirstOverflow);
   1020 +  }
   1021 +  return rc;
   1022 +}
   1023 +
   1024 +/* Use in concert with overflowMaybeCreate() to efficiently read parts
   1025 + * of a potentially-overflowing record.  pPage and iRecordOffset are
   1026 + * the values passed into overflowMaybeCreate(), nLocalRecordBytes and
   1027 + * pOverflow are the values returned by that call.
   1028 + *
   1029 + * On SQLITE_OK, *ppBase points to nRequestBytes of data at
   1030 + * iRequestOffset within the record.  If the data exists contiguously
   1031 + * in a page, a direct pointer is returned, otherwise a buffer from
   1032 + * sqlite3_malloc() is returned with the data.  *pbFree is set true if
   1033 + * sqlite3_free() should be called on *ppBase.
   1034 + */
   1035 +/* Operation of this function is subtle.  At any time, pPage is the
   1036 + * current page, with iRecordOffset and nLocalRecordBytes being record
   1037 + * data within pPage, and pOverflow being the overflow page after
   1038 + * pPage.  This allows the code to handle both the initial leaf page
   1039 + * and overflow pages consistently by adjusting the values
   1040 + * appropriately.
   1041 + */
   1042 +static int overflowGetSegment(DbPage *pPage, unsigned iRecordOffset,
   1043 +                              unsigned nLocalRecordBytes,
   1044 +                              RecoverOverflow *pOverflow,
   1045 +                              unsigned iRequestOffset, unsigned nRequestBytes,
   1046 +                              unsigned char **ppBase, int *pbFree){
   1047 +  unsigned nBase;         /* Amount of data currently collected. */
   1048 +  unsigned char *pBase;   /* Buffer to collect record data into. */
   1049 +
   1050 +  /* Skip to the page containing the start of the data. */
   1051 +  while( iRequestOffset>=nLocalRecordBytes && pOverflow ){
   1052 +    /* Factor out current page's contribution. */
   1053 +    iRequestOffset -= nLocalRecordBytes;
   1054 +
   1055 +    /* Move forward to the next page in the list. */
   1056 +    pPage = pOverflow->pPage;
   1057 +    iRecordOffset = 4;
   1058 +    nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset;
   1059 +    pOverflow = pOverflow->pNextOverflow;
   1060 +  }
   1061 +
   1062 +  /* If the requested data is entirely within this page, return a
   1063 +   * pointer into the page.
   1064 +   */
   1065 +  if( iRequestOffset+nRequestBytes<=nLocalRecordBytes ){
   1066 +    /* TODO(shess): "assignment discards qualifiers from pointer target type"
   1067 +     * Having ppBase be const makes sense, but sqlite3_free() takes non-const.
   1068 +     */
   1069 +    *ppBase = (unsigned char *)PageData(pPage, iRecordOffset + iRequestOffset);
   1070 +    *pbFree = 0;
   1071 +    return SQLITE_OK;
   1072 +  }
   1073 +
   1074 +  /* The data range would require additional pages. */
   1075 +  if( !pOverflow ){
   1076 +    /* Should never happen, the range is outside the nRecordBytes
   1077 +     * passed to overflowMaybeCreate().
   1078 +     */
   1079 +    assert(NULL);  /* NOTREACHED */
   1080 +    return SQLITE_ERROR;
   1081 +  }
   1082 +
   1083 +  /* Get a buffer to construct into. */
   1084 +  nBase = 0;
   1085 +  pBase = sqlite3_malloc(nRequestBytes);
   1086 +  if( !pBase ){
   1087 +    return SQLITE_NOMEM;
   1088 +  }
   1089 +  while( nBase<nRequestBytes ){
   1090 +    /* Copy over data present on this page. */
   1091 +    unsigned nCopyBytes = nRequestBytes - nBase;
   1092 +    if( nLocalRecordBytes-iRequestOffset<nCopyBytes ){
   1093 +      nCopyBytes = nLocalRecordBytes - iRequestOffset;
   1094 +    }
   1095 +    memcpy(pBase + nBase, PageData(pPage, iRecordOffset + iRequestOffset),
   1096 +           nCopyBytes);
   1097 +    nBase += nCopyBytes;
   1098 +
   1099 +    if( pOverflow ){
   1100 +      /* Copy from start of record data in future pages. */
   1101 +      iRequestOffset = 0;
   1102 +
   1103 +      /* Move forward to the next page in the list.  Should match
   1104 +       * first while() loop.
   1105 +       */
   1106 +      pPage = pOverflow->pPage;
   1107 +      iRecordOffset = 4;
   1108 +      nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset;
   1109 +      pOverflow = pOverflow->pNextOverflow;
   1110 +    }else if( nBase<nRequestBytes ){
   1111 +      /* Ran out of overflow pages with data left to deliver.  Not
   1112 +       * possible if the requested range fits within nRecordBytes
   1113 +       * passed to overflowMaybeCreate() when creating pOverflow.
   1114 +       */
   1115 +      assert(NULL);  /* NOTREACHED */
   1116 +      sqlite3_free(pBase);
   1117 +      return SQLITE_ERROR;
   1118 +    }
   1119 +  }
   1120 +  assert( nBase==nRequestBytes );
   1121 +  *ppBase = pBase;
   1122 +  *pbFree = 1;
   1123 +  return SQLITE_OK;
   1124 +}
   1125 +
   1126 +/* Primary structure for iterating the contents of a table.
   1127 + *
   1128 + * leafCursorDestroy - release all resources associated with the cursor.
   1129 + * leafCursorCreate - create a cursor to iterate items from tree at
   1130 + *                    the provided root page.
   1131 + * leafCursorNextValidCell - get the cursor ready to access data from
   1132 + *                           the next valid cell in the table.
   1133 + * leafCursorCellRowid - get the current cell's rowid.
   1134 + * leafCursorCellColumns - get current cell's column count.
   1135 + * leafCursorCellColInfo - get type and data for a column in current cell.
   1136 + *
   1137 + * leafCursorNextValidCell skips cells which fail simple integrity
   1138 + * checks, such as overlapping other cells, or being located at
   1139 + * impossible offsets, or where header data doesn't correctly describe
   1140 + * payload data.  Returns SQLITE_ROW if a valid cell is found,
   1141 + * SQLITE_DONE if all pages in the tree were exhausted.
   1142 + *
   1143 + * leafCursorCellColInfo() accounts for overflow pages in the style of
   1144 + * overflowGetSegment().
   1145 + */
   1146 +typedef struct RecoverLeafCursor RecoverLeafCursor;
   1147 +struct RecoverLeafCursor {
   1148 +  RecoverInteriorCursor *pParent;  /* Parent node to this node. */
   1149 +  DbPage *pPage;                   /* Reference to leaf page. */
   1150 +  unsigned nPageSize;              /* Size of pPage. */
   1151 +  unsigned nCells;                 /* Number of cells in pPage. */
   1152 +  unsigned iCell;                  /* Current cell. */
   1153 +
   1154 +  /* Info parsed from data in iCell. */
   1155 +  i64 iRowid;                      /* rowid parsed. */
   1156 +  unsigned nRecordCols;            /* how many items in the record. */
   1157 +  u64 iRecordOffset;               /* offset to record data. */
   1158 +  /* TODO(shess): nRecordBytes and nRecordHeaderBytes are used in
   1159 +   * leafCursorCellColInfo() to prevent buffer overruns.
   1160 +   * leafCursorCellDecode() already verified that the cell is valid, so
   1161 +   * those checks should be redundant.
   1162 +   */
   1163 +  u64 nRecordBytes;                /* Size of record data. */
   1164 +  unsigned nLocalRecordBytes;      /* Amount of record data in-page. */
   1165 +  unsigned nRecordHeaderBytes;     /* Size of record header data. */
   1166 +  unsigned char *pRecordHeader;    /* Pointer to record header data. */
   1167 +  int bFreeRecordHeader;           /* True if record header requires free. */
   1168 +  RecoverOverflow *pOverflow;      /* Cell overflow info, if needed. */
   1169 +};
   1170 +
   1171 +/* Internal helper shared between next-page and create-cursor.  If
   1172 + * pPage is a leaf page, it will be stored in the cursor and state
   1173 + * initialized for reading cells.
   1174 + *
   1175 + * If pPage is an interior page, a new parent cursor is created and
   1176 + * injected on the stack.  This is necessary to handle trees with
   1177 + * uneven depth, but also is used during initial setup.
   1178 + *
   1179 + * If pPage is not a table page at all, it is discarded.
   1180 + *
   1181 + * If SQLITE_OK is returned, the caller no longer owns pPage,
   1182 + * otherwise the caller is responsible for discarding it.
   1183 + */
   1184 +static int leafCursorLoadPage(RecoverLeafCursor *pCursor, DbPage *pPage){
   1185 +  const unsigned char *pPageHeader;  /* Header of *pPage */
   1186 +
   1187 +  /* Release the current page. */
   1188 +  if( pCursor->pPage ){
   1189 +    sqlite3PagerUnref(pCursor->pPage);
   1190 +    pCursor->pPage = NULL;
   1191 +    pCursor->iCell = pCursor->nCells = 0;
   1192 +  }
   1193 +
   1194 +  /* If the page is an unexpected interior node, inject a new stack
   1195 +   * layer and try again from there.
   1196 +   */
   1197 +  pPageHeader = PageHeader(pPage);
   1198 +  if( pPageHeader[kiPageTypeOffset]==kTableInteriorPage ){
   1199 +    RecoverInteriorCursor *pParent;
   1200 +    int rc = interiorCursorCreate(pCursor->pParent, pPage, pCursor->nPageSize,
   1201 +                                  &pParent);
   1202 +    if( rc!=SQLITE_OK ){
   1203 +      return rc;
   1204 +    }
   1205 +    pCursor->pParent = pParent;
   1206 +    return SQLITE_OK;
   1207 +  }
   1208 +
   1209 +  /* Not a leaf page, skip it. */
   1210 +  if( pPageHeader[kiPageTypeOffset]!=kTableLeafPage ){
   1211 +    sqlite3PagerUnref(pPage);
   1212 +    return SQLITE_OK;
   1213 +  }
   1214 +
   1215 +  /* Take ownership of the page and start decoding. */
   1216 +  pCursor->pPage = pPage;
   1217 +  pCursor->iCell = 0;
   1218 +  pCursor->nCells = decodeUnsigned16(pPageHeader + kiPageCellCountOffset);
   1219 +  return SQLITE_OK;
   1220 +}
   1221 +
   1222 +/* Get the next leaf-level page in the tree.  Returns SQLITE_ROW when
   1223 + * a leaf page is found, SQLITE_DONE when no more leaves exist, or any
   1224 + * error which occurred.
   1225 + */
   1226 +static int leafCursorNextPage(RecoverLeafCursor *pCursor){
   1227 +  if( !pCursor->pParent ){
   1228 +    return SQLITE_DONE;
   1229 +  }
   1230 +
   1231 +  /* Repeatedly load the parent's next child page until a leaf is found. */
   1232 +  do {
   1233 +    DbPage *pNextPage;
   1234 +    int rc = interiorCursorNextPage(&pCursor->pParent, &pNextPage);
   1235 +    if( rc!=SQLITE_ROW ){
   1236 +      assert( rc==SQLITE_DONE );
   1237 +      return rc;
   1238 +    }
   1239 +
   1240 +    rc = leafCursorLoadPage(pCursor, pNextPage);
   1241 +    if( rc!=SQLITE_OK ){
   1242 +      sqlite3PagerUnref(pNextPage);
   1243 +      return rc;
   1244 +    }
   1245 +  } while( !pCursor->pPage );
   1246 +
   1247 +  return SQLITE_ROW;
   1248 +}
   1249 +
   1250 +static void leafCursorDestroyCellData(RecoverLeafCursor *pCursor){
   1251 +  if( pCursor->bFreeRecordHeader ){
   1252 +    sqlite3_free(pCursor->pRecordHeader);
   1253 +  }
   1254 +  pCursor->bFreeRecordHeader = 0;
   1255 +  pCursor->pRecordHeader = NULL;
   1256 +
   1257 +  if( pCursor->pOverflow ){
   1258 +    overflowDestroy(pCursor->pOverflow);
   1259 +    pCursor->pOverflow = NULL;
   1260 +  }
   1261 +}
   1262 +
   1263 +static void leafCursorDestroy(RecoverLeafCursor *pCursor){
   1264 +  leafCursorDestroyCellData(pCursor);
   1265 +
   1266 +  if( pCursor->pParent ){
   1267 +    interiorCursorDestroy(pCursor->pParent);
   1268 +    pCursor->pParent = NULL;
   1269 +  }
   1270 +
   1271 +  if( pCursor->pPage ){
   1272 +    sqlite3PagerUnref(pCursor->pPage);
   1273 +    pCursor->pPage = NULL;
   1274 +  }
   1275 +
   1276 +  memset(pCursor, 0xA5, sizeof(*pCursor));
   1277 +  sqlite3_free(pCursor);
   1278 +}
   1279 +
   1280 +/* Create a cursor to iterate the rows from the leaf pages of a table
   1281 + * rooted at iRootPage.
   1282 + */
   1283 +/* TODO(shess): recoverOpen() calls this to setup the cursor, and I
   1284 + * think that recoverFilter() may make a hard assumption that the
   1285 + * cursor returned will turn up at least one valid cell.
   1286 + *
   1287 + * The cases I can think of which break this assumption are:
   1288 + * - pPage is a valid leaf page with no valid cells.
   1289 + * - pPage is a valid interior page with no valid leaves.
   1290 + * - pPage is a valid interior page who's leaves contain no valid cells.
   1291 + * - pPage is not a valid leaf or interior page.
   1292 + */
   1293 +static int leafCursorCreate(Pager *pPager, unsigned nPageSize,
   1294 +                            u32 iRootPage, RecoverLeafCursor **ppCursor){
   1295 +  DbPage *pPage;               /* Reference to page at iRootPage. */
   1296 +  RecoverLeafCursor *pCursor;  /* Leaf cursor being constructed. */
   1297 +  int rc;
   1298 +
   1299 +  /* Start out with the root page. */
   1300 +  rc = sqlite3PagerAcquire(pPager, iRootPage, &pPage, 0);
   1301 +  if( rc!=SQLITE_OK ){
   1302 +    return rc;
   1303 +  }
   1304 +
   1305 +  pCursor = sqlite3_malloc(sizeof(RecoverLeafCursor));
   1306 +  if( !pCursor ){
   1307 +    sqlite3PagerUnref(pPage);
   1308 +    return SQLITE_NOMEM;
   1309 +  }
   1310 +  memset(pCursor, 0, sizeof(*pCursor));
   1311 +
   1312 +  pCursor->nPageSize = nPageSize;
   1313 +
   1314 +  rc = leafCursorLoadPage(pCursor, pPage);
   1315 +  if( rc!=SQLITE_OK ){
   1316 +    sqlite3PagerUnref(pPage);
   1317 +    leafCursorDestroy(pCursor);
   1318 +    return rc;
   1319 +  }
   1320 +
   1321 +  /* pPage wasn't a leaf page, find the next leaf page. */
   1322 +  if( !pCursor->pPage ){
   1323 +    rc = leafCursorNextPage(pCursor);
   1324 +    if( rc!=SQLITE_DONE && rc!=SQLITE_ROW ){
   1325 +      leafCursorDestroy(pCursor);
   1326 +      return rc;
   1327 +    }
   1328 +  }
   1329 +
   1330 +  *ppCursor = pCursor;
   1331 +  return SQLITE_OK;
   1332 +}
   1333 +
   1334 +/* Useful for setting breakpoints. */
   1335 +static int ValidateError(){
   1336 +  return SQLITE_ERROR;
   1337 +}
   1338 +
   1339 +/* Setup the cursor for reading the information from cell iCell. */
   1340 +static int leafCursorCellDecode(RecoverLeafCursor *pCursor){
   1341 +  const unsigned char *pPageHeader;  /* Header of current page. */
   1342 +  const unsigned char *pCellOffsets; /* Pointer to page's cell offsets. */
   1343 +  unsigned iCellOffset;              /* Offset of current cell (iCell). */
   1344 +  const unsigned char *pCell;        /* Pointer to data at iCellOffset. */
   1345 +  unsigned nCellMaxBytes;            /* Maximum local size of iCell. */
   1346 +  unsigned iEndOffset;               /* End of iCell's in-page data. */
   1347 +  u64 nRecordBytes;                  /* Expected size of cell, w/overflow. */
   1348 +  u64 iRowid;                        /* iCell's rowid (in table). */
   1349 +  unsigned nRead;                    /* Amount of cell read. */
   1350 +  unsigned nRecordHeaderRead;        /* Header data read. */
   1351 +  u64 nRecordHeaderBytes;            /* Header size expected. */
   1352 +  unsigned nRecordCols;              /* Columns read from header. */
   1353 +  u64 nRecordColBytes;               /* Bytes in payload for those columns. */
   1354 +  unsigned i;
   1355 +  int rc;
   1356 +
   1357 +  assert( pCursor->iCell<pCursor->nCells );
   1358 +
   1359 +  leafCursorDestroyCellData(pCursor);
   1360 +
   1361 +  /* Find the offset to the row. */
   1362 +  pPageHeader = PageHeader(pCursor->pPage);
   1363 +  pCellOffsets = pPageHeader + knPageLeafHeaderBytes;
   1364 +  iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iCell*2);
   1365 +  if( iCellOffset>=pCursor->nPageSize ){
   1366 +    return ValidateError();
   1367 +  }
   1368 +
   1369 +  pCell = PageData(pCursor->pPage, iCellOffset);
   1370 +  nCellMaxBytes = pCursor->nPageSize - iCellOffset;
   1371 +
   1372 +  /* B-tree leaf cells lead with varint record size, varint rowid and
   1373 +   * varint header size.
   1374 +   */
   1375 +  /* TODO(shess): The smallest page size is 512 bytes, which has an m
   1376 +   * of 39.  Three varints need at most 27 bytes to encode.  I think.
   1377 +   */
   1378 +  if( !checkVarints(pCell, nCellMaxBytes, 3) ){
   1379 +    return ValidateError();
   1380 +  }
   1381 +
   1382 +  nRead = getVarint(pCell, &nRecordBytes);
   1383 +  assert( iCellOffset+nRead<=pCursor->nPageSize );
   1384 +  pCursor->nRecordBytes = nRecordBytes;
   1385 +
   1386 +  nRead += getVarint(pCell + nRead, &iRowid);
   1387 +  assert( iCellOffset+nRead<=pCursor->nPageSize );
   1388 +  pCursor->iRowid = (i64)iRowid;
   1389 +
   1390 +  pCursor->iRecordOffset = iCellOffset + nRead;
   1391 +
   1392 +  /* Start overflow setup here because nLocalRecordBytes is needed to
   1393 +   * check cell overlap.
   1394 +   */
   1395 +  rc = overflowMaybeCreate(pCursor->pPage, pCursor->nPageSize,
   1396 +                           pCursor->iRecordOffset, pCursor->nRecordBytes,
   1397 +                           &pCursor->nLocalRecordBytes,
   1398 +                           &pCursor->pOverflow);
   1399 +  if( rc!=SQLITE_OK ){
   1400 +    return ValidateError();
   1401 +  }
   1402 +
   1403 +  /* Check that no other cell starts within this cell. */
   1404 +  iEndOffset = pCursor->iRecordOffset + pCursor->nLocalRecordBytes;
   1405 +  for( i=0; i<pCursor->nCells; ++i ){
   1406 +    const unsigned iOtherOffset = decodeUnsigned16(pCellOffsets + i*2);
   1407 +    if( iOtherOffset>iCellOffset && iOtherOffset<iEndOffset ){
   1408 +      return ValidateError();
   1409 +    }
   1410 +  }
   1411 +
   1412 +  nRecordHeaderRead = getVarint(pCell + nRead, &nRecordHeaderBytes);
   1413 +  assert( nRecordHeaderBytes<=nRecordBytes );
   1414 +  pCursor->nRecordHeaderBytes = nRecordHeaderBytes;
   1415 +
   1416 +  /* Large headers could overflow if pages are small. */
   1417 +  rc = overflowGetSegment(pCursor->pPage,
   1418 +                          pCursor->iRecordOffset, pCursor->nLocalRecordBytes,
   1419 +                          pCursor->pOverflow, 0, nRecordHeaderBytes,
   1420 +                          &pCursor->pRecordHeader, &pCursor->bFreeRecordHeader);
   1421 +  if( rc!=SQLITE_OK ){
   1422 +    return ValidateError();
   1423 +  }
   1424 +
   1425 +  /* Tally up the column count and size of data. */
   1426 +  nRecordCols = 0;
   1427 +  nRecordColBytes = 0;
   1428 +  while( nRecordHeaderRead<nRecordHeaderBytes ){
   1429 +    u64 iSerialType;  /* Type descriptor for current column. */
   1430 +    if( !checkVarint(pCursor->pRecordHeader + nRecordHeaderRead,
   1431 +                     nRecordHeaderBytes - nRecordHeaderRead) ){
   1432 +      return ValidateError();
   1433 +    }
   1434 +    nRecordHeaderRead += getVarint(pCursor->pRecordHeader + nRecordHeaderRead,
   1435 +                                   &iSerialType);
   1436 +    if( iSerialType==10 || iSerialType==11 ){
   1437 +      return ValidateError();
   1438 +    }
   1439 +    nRecordColBytes += SerialTypeLength(iSerialType);
   1440 +    nRecordCols++;
   1441 +  }
   1442 +  pCursor->nRecordCols = nRecordCols;
   1443 +
   1444 +  /* Parsing the header used as many bytes as expected. */
   1445 +  if( nRecordHeaderRead!=nRecordHeaderBytes ){
   1446 +    return ValidateError();
   1447 +  }
   1448 +
   1449 +  /* Calculated record is size of expected record. */
   1450 +  if( nRecordHeaderBytes+nRecordColBytes!=nRecordBytes ){
   1451 +    return ValidateError();
   1452 +  }
   1453 +
   1454 +  return SQLITE_OK;
   1455 +}
   1456 +
   1457 +static i64 leafCursorCellRowid(RecoverLeafCursor *pCursor){
   1458 +  return pCursor->iRowid;
   1459 +}
   1460 +
   1461 +static unsigned leafCursorCellColumns(RecoverLeafCursor *pCursor){
   1462 +  return pCursor->nRecordCols;
   1463 +}
   1464 +
   1465 +/* Get the column info for the cell.  Pass NULL for ppBase to prevent
   1466 + * retrieving the data segment.  If *pbFree is true, *ppBase must be
   1467 + * freed by the caller using sqlite3_free().
   1468 + */
   1469 +static int leafCursorCellColInfo(RecoverLeafCursor *pCursor,
   1470 +                                 unsigned iCol, u64 *piColType,
   1471 +                                 unsigned char **ppBase, int *pbFree){
   1472 +  const unsigned char *pRecordHeader;  /* Current cell's header. */
   1473 +  u64 nRecordHeaderBytes;              /* Bytes in pRecordHeader. */
   1474 +  unsigned nRead;                      /* Bytes read from header. */
   1475 +  u64 iColEndOffset;                   /* Offset to end of column in cell. */
   1476 +  unsigned nColsSkipped;               /* Count columns as procesed. */
   1477 +  u64 iSerialType;                     /* Type descriptor for current column. */
   1478 +
   1479 +  /* Implicit NULL for columns past the end.  This case happens when
   1480 +   * rows have not been updated since an ALTER TABLE added columns.
   1481 +   * It is more convenient to address here than in callers.
   1482 +   */
   1483 +  if( iCol>=pCursor->nRecordCols ){
   1484 +    *piColType = 0;
   1485 +    if( ppBase ){
   1486 +      *ppBase = 0;
   1487 +      *pbFree = 0;
   1488 +    }
   1489 +    return SQLITE_OK;
   1490 +  }
   1491 +
   1492 +  /* Must be able to decode header size. */
   1493 +  pRecordHeader = pCursor->pRecordHeader;
   1494 +  if( !checkVarint(pRecordHeader, pCursor->nRecordHeaderBytes) ){
   1495 +    return SQLITE_CORRUPT;
   1496 +  }
   1497 +
   1498 +  /* Rather than caching the header size and how many bytes it took,
   1499 +   * decode it every time.
   1500 +   */
   1501 +  nRead = getVarint(pRecordHeader, &nRecordHeaderBytes);
   1502 +  assert( nRecordHeaderBytes==pCursor->nRecordHeaderBytes );
   1503 +
   1504 +  /* Scan forward to the indicated column.  Scans to _after_ column
   1505 +   * for later range checking.
   1506 +   */
   1507 +  /* TODO(shess): This could get expensive for very wide tables.  An
   1508 +   * array of iSerialType could be built in leafCursorCellDecode(), but
   1509 +   * the number of columns is dynamic per row, so it would add memory
   1510 +   * management complexity.  Enough info to efficiently forward
   1511 +   * iterate could be kept, if all clients forward iterate
   1512 +   * (recoverColumn() may not).
   1513 +   */
   1514 +  iColEndOffset = 0;
   1515 +  nColsSkipped = 0;
   1516 +  while( nColsSkipped<=iCol && nRead<nRecordHeaderBytes ){
   1517 +    if( !checkVarint(pRecordHeader + nRead, nRecordHeaderBytes - nRead) ){
   1518 +      return SQLITE_CORRUPT;
   1519 +    }
   1520 +    nRead += getVarint(pRecordHeader + nRead, &iSerialType);
   1521 +    iColEndOffset += SerialTypeLength(iSerialType);
   1522 +    nColsSkipped++;
   1523 +  }
   1524 +
   1525 +  /* Column's data extends past record's end. */
   1526 +  if( nRecordHeaderBytes+iColEndOffset>pCursor->nRecordBytes ){
   1527 +    return SQLITE_CORRUPT;
   1528 +  }
   1529 +
   1530 +  *piColType = iSerialType;
   1531 +  if( ppBase ){
   1532 +    const u32 nColBytes = SerialTypeLength(iSerialType);
   1533 +
   1534 +    /* Offset from start of record to beginning of column. */
   1535 +    const unsigned iColOffset = nRecordHeaderBytes+iColEndOffset-nColBytes;
   1536 +
   1537 +    return overflowGetSegment(pCursor->pPage, pCursor->iRecordOffset,
   1538 +                              pCursor->nLocalRecordBytes, pCursor->pOverflow,
   1539 +                              iColOffset, nColBytes, ppBase, pbFree);
   1540 +  }
   1541 +  return SQLITE_OK;
   1542 +}
   1543 +
   1544 +static int leafCursorNextValidCell(RecoverLeafCursor *pCursor){
   1545 +  while( 1 ){
   1546 +    int rc;
   1547 +
   1548 +    /* Move to the next cell. */
   1549 +    pCursor->iCell++;
   1550 +
   1551 +    /* No more cells, get the next leaf. */
   1552 +    if( pCursor->iCell>=pCursor->nCells ){
   1553 +      rc = leafCursorNextPage(pCursor);
   1554 +      if( rc!=SQLITE_ROW ){
   1555 +        return rc;
   1556 +      }
   1557 +      assert( pCursor->iCell==0 );
   1558 +    }
   1559 +
   1560 +    /* If the cell is valid, indicate that a row is available. */
   1561 +    rc = leafCursorCellDecode(pCursor);
   1562 +    if( rc==SQLITE_OK ){
   1563 +      return SQLITE_ROW;
   1564 +    }
   1565 +
   1566 +    /* Iterate until done or a valid row is found. */
   1567 +    /* TODO(shess): Remove debugging output. */
   1568 +    fprintf(stderr, "Skipping invalid cell\n");
   1569 +  }
   1570 +  return SQLITE_ERROR;
   1571 +}
   1572 +
   1573 +typedef struct Recover Recover;
   1574 +struct Recover {
   1575 +  sqlite3_vtab base;
   1576 +  sqlite3 *db;                /* Host database connection */
   1577 +  char *zDb;                  /* Database containing target table */
   1578 +  char *zTable;               /* Target table */
   1579 +  unsigned nCols;             /* Number of columns in target table */
   1580 +  unsigned char *pTypes;      /* Types of columns in target table */
   1581 +};
   1582 +
   1583 +/* Internal helper for deleting the module. */
   1584 +static void recoverRelease(Recover *pRecover){
   1585 +  sqlite3_free(pRecover->zDb);
   1586 +  sqlite3_free(pRecover->zTable);
   1587 +  sqlite3_free(pRecover->pTypes);
   1588 +  memset(pRecover, 0xA5, sizeof(*pRecover));
   1589 +  sqlite3_free(pRecover);
   1590 +}
   1591 +
   1592 +/* Helper function for initializing the module.  Forward-declared so
   1593 + * recoverCreate() and recoverConnect() can see it.
   1594 + */
   1595 +static int recoverInit(
   1596 +  sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **
   1597 +);
   1598 +
   1599 +static int recoverCreate(
   1600 +  sqlite3 *db,
   1601 +  void *pAux,
   1602 +  int argc, const char *const*argv,
   1603 +  sqlite3_vtab **ppVtab,
   1604 +  char **pzErr
   1605 +){
   1606 +  FNENTRY();
   1607 +  return recoverInit(db, pAux, argc, argv, ppVtab, pzErr);
   1608 +}
   1609 +
   1610 +/* This should never be called. */
   1611 +static int recoverConnect(
   1612 +  sqlite3 *db,
   1613 +  void *pAux,
   1614 +  int argc, const char *const*argv,
   1615 +  sqlite3_vtab **ppVtab,
   1616 +  char **pzErr
   1617 +){
   1618 +  FNENTRY();
   1619 +  return recoverInit(db, pAux, argc, argv, ppVtab, pzErr);
   1620 +}
   1621 +
   1622 +/* No indices supported. */
   1623 +static int recoverBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
   1624 +  FNENTRY();
   1625 +  return SQLITE_OK;
   1626 +}
   1627 +
   1628 +/* Logically, this should never be called. */
   1629 +static int recoverDisconnect(sqlite3_vtab *pVtab){
   1630 +  FNENTRY();
   1631 +  recoverRelease((Recover*)pVtab);
   1632 +  return SQLITE_OK;
   1633 +}
   1634 +
   1635 +static int recoverDestroy(sqlite3_vtab *pVtab){
   1636 +  FNENTRY();
   1637 +  recoverRelease((Recover*)pVtab);
   1638 +  return SQLITE_OK;
   1639 +}
   1640 +
   1641 +typedef struct RecoverCursor RecoverCursor;
   1642 +struct RecoverCursor {
   1643 +  sqlite3_vtab_cursor base;
   1644 +  RecoverLeafCursor *pLeafCursor;
   1645 +  int iEncoding;
   1646 +  int bEOF;
   1647 +};
   1648 +
   1649 +static int recoverOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
   1650 +  Recover *pRecover = (Recover*)pVTab;
   1651 +  u32 iRootPage;                   /* Root page of the backing table. */
   1652 +  int iEncoding;                   /* UTF encoding for backing database. */
   1653 +  unsigned nPageSize;              /* Size of pages in backing database. */
   1654 +  Pager *pPager;                   /* Backing database pager. */
   1655 +  RecoverLeafCursor *pLeafCursor;  /* Cursor to read table's leaf pages. */
   1656 +  RecoverCursor *pCursor;          /* Cursor to read rows from leaves. */
   1657 +  int rc;
   1658 +
   1659 +  FNENTRY();
   1660 +
   1661 +  iRootPage = 0;
   1662 +  rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable,
   1663 +                   &iRootPage);
   1664 +  if( rc!=SQLITE_OK ){
   1665 +    return rc;
   1666 +  }
   1667 +
   1668 +  iEncoding = 0;
   1669 +  rc = getEncoding(pRecover->db, pRecover->zDb, &iEncoding);
   1670 +  if( rc!=SQLITE_OK ){
   1671 +    return rc;
   1672 +  }
   1673 +
   1674 +  rc = GetPager(pRecover->db, pRecover->zDb, &pPager, &nPageSize);
   1675 +  if( rc!=SQLITE_OK ){
   1676 +    return rc;
   1677 +  }
   1678 +
   1679 +  rc = leafCursorCreate(pPager, nPageSize, iRootPage, &pLeafCursor);
   1680 +  if( rc!=SQLITE_OK ){
   1681 +    return rc;
   1682 +  }
   1683 +
   1684 +  pCursor = sqlite3_malloc(sizeof(RecoverCursor));
   1685 +  if( !pCursor ){
   1686 +    leafCursorDestroy(pLeafCursor);
   1687 +    return SQLITE_NOMEM;
   1688 +  }
   1689 +  memset(pCursor, 0, sizeof(*pCursor));
   1690 +  pCursor->base.pVtab = pVTab;
   1691 +  pCursor->pLeafCursor = pLeafCursor;
   1692 +  pCursor->iEncoding = iEncoding;
   1693 +
   1694 +  *ppCursor = (sqlite3_vtab_cursor*)pCursor;
   1695 +  return SQLITE_OK;
   1696 +}
   1697 +
   1698 +static int recoverClose(sqlite3_vtab_cursor *cur){
   1699 +  RecoverCursor *pCursor = (RecoverCursor*)cur;
   1700 +  FNENTRY();
   1701 +  if( pCursor->pLeafCursor ){
   1702 +    leafCursorDestroy(pCursor->pLeafCursor);
   1703 +    pCursor->pLeafCursor = NULL;
   1704 +  }
   1705 +  memset(pCursor, 0xA5, sizeof(*pCursor));
   1706 +  sqlite3_free(cur);
   1707 +  return SQLITE_OK;
   1708 +}
   1709 +
   1710 +/* Helpful place to set a breakpoint. */
   1711 +static int RecoverInvalidCell(){
   1712 +  return SQLITE_ERROR;
   1713 +}
   1714 +
   1715 +/* Returns SQLITE_OK if the cell has an appropriate number of columns
   1716 + * with the appropriate types of data.
   1717 + */
   1718 +static int recoverValidateLeafCell(Recover *pRecover, RecoverCursor *pCursor){
   1719 +  unsigned i;
   1720 +
   1721 +  /* If the row's storage has too many columns, skip it. */
   1722 +  if( leafCursorCellColumns(pCursor->pLeafCursor)>pRecover->nCols ){
   1723 +    return RecoverInvalidCell();
   1724 +  }
   1725 +
   1726 +  /* Skip rows with unexpected types. */
   1727 +  for( i=0; i<pRecover->nCols; ++i ){
   1728 +    u64 iType;  /* Storage type of column i. */
   1729 +    int rc;
   1730 +
   1731 +    /* ROWID alias. */
   1732 +    if( (pRecover->pTypes[i]&MASK_ROWID) ){
   1733 +      continue;
   1734 +    }
   1735 +
   1736 +    rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iType, NULL, NULL);
   1737 +    assert( rc==SQLITE_OK );
   1738 +    if( rc!=SQLITE_OK || !SerialTypeIsCompatible(iType, pRecover->pTypes[i]) ){
   1739 +      return RecoverInvalidCell();
   1740 +    }
   1741 +  }
   1742 +
   1743 +  return SQLITE_OK;
   1744 +}
   1745 +
   1746 +static int recoverNext(sqlite3_vtab_cursor *pVtabCursor){
   1747 +  RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor;
   1748 +  Recover *pRecover = (Recover*)pCursor->base.pVtab;
   1749 +  int rc;
   1750 +
   1751 +  FNENTRY();
   1752 +
   1753 +  /* Scan forward to the next cell with valid storage, then check that
   1754 +   * the stored data matches the schema.
   1755 +   */
   1756 +  while( (rc = leafCursorNextValidCell(pCursor->pLeafCursor))==SQLITE_ROW ){
   1757 +    if( recoverValidateLeafCell(pRecover, pCursor)==SQLITE_OK ){
   1758 +      return SQLITE_OK;
   1759 +    }
   1760 +  }
   1761 +
   1762 +  if( rc==SQLITE_DONE ){
   1763 +    pCursor->bEOF = 1;
   1764 +    return SQLITE_OK;
   1765 +  }
   1766 +
   1767 +  assert( rc!=SQLITE_OK );
   1768 +  return rc;
   1769 +}
   1770 +
   1771 +static int recoverFilter(
   1772 +  sqlite3_vtab_cursor *pVtabCursor,
   1773 +  int idxNum, const char *idxStr,
   1774 +  int argc, sqlite3_value **argv
   1775 +){
   1776 +  RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor;
   1777 +  Recover *pRecover = (Recover*)pCursor->base.pVtab;
   1778 +  int rc;
   1779 +
   1780 +  FNENTRY();
   1781 +
   1782 +  /* Load the first cell, and iterate forward if it's not valid. */
   1783 +  /* TODO(shess): What happens if no cells at all are valid? */
   1784 +  rc = leafCursorCellDecode(pCursor->pLeafCursor);
   1785 +  if( rc!=SQLITE_OK || recoverValidateLeafCell(pRecover, pCursor)!=SQLITE_OK ){
   1786 +    return recoverNext(pVtabCursor);
   1787 +  }
   1788 +
   1789 +  return SQLITE_OK;
   1790 +}
   1791 +
   1792 +static int recoverEof(sqlite3_vtab_cursor *pVtabCursor){
   1793 +  RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor;
   1794 +  FNENTRY();
   1795 +  return pCursor->bEOF;
   1796 +}
   1797 +
   1798 +static int recoverColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
   1799 +  RecoverCursor *pCursor = (RecoverCursor*)cur;
   1800 +  Recover *pRecover = (Recover*)pCursor->base.pVtab;
   1801 +  u64 iColType;             /* Storage type of column i. */
   1802 +  unsigned char *pColData;  /* Column i's data. */
   1803 +  int shouldFree;           /* Non-zero if pColData should be freed. */
   1804 +  int rc;
   1805 +
   1806 +  FNENTRY();
   1807 +
   1808 +  if( i>=pRecover->nCols ){
   1809 +    return SQLITE_ERROR;
   1810 +  }
   1811 +
   1812 +  /* ROWID alias. */
   1813 +  if( (pRecover->pTypes[i]&MASK_ROWID) ){
   1814 +    sqlite3_result_int64(ctx, leafCursorCellRowid(pCursor->pLeafCursor));
   1815 +    return SQLITE_OK;
   1816 +  }
   1817 +
   1818 +  pColData = NULL;
   1819 +  shouldFree = 0;
   1820 +  rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iColType,
   1821 +                             &pColData, &shouldFree);
   1822 +  if( rc!=SQLITE_OK ){
   1823 +    return rc;
   1824 +  }
   1825 +  /* recoverValidateLeafCell() should guarantee that this will never
   1826 +   * occur.
   1827 +   */
   1828 +  if( !SerialTypeIsCompatible(iColType, pRecover->pTypes[i]) ){
   1829 +    if( shouldFree ){
   1830 +      sqlite3_free(pColData);
   1831 +    }
   1832 +    return SQLITE_ERROR;
   1833 +  }
   1834 +
   1835 +  switch( iColType ){
   1836 +    case 0 : sqlite3_result_null(ctx); break;
   1837 +    case 1 : sqlite3_result_int64(ctx, decodeSigned(pColData, 1)); break;
   1838 +    case 2 : sqlite3_result_int64(ctx, decodeSigned(pColData, 2)); break;
   1839 +    case 3 : sqlite3_result_int64(ctx, decodeSigned(pColData, 3)); break;
   1840 +    case 4 : sqlite3_result_int64(ctx, decodeSigned(pColData, 4)); break;
   1841 +    case 5 : sqlite3_result_int64(ctx, decodeSigned(pColData, 6)); break;
   1842 +    case 6 : sqlite3_result_int64(ctx, decodeSigned(pColData, 8)); break;
   1843 +    case 7 : sqlite3_result_double(ctx, decodeFloat64(pColData)); break;
   1844 +    case 8 : sqlite3_result_int(ctx, 0); break;
   1845 +    case 9 : sqlite3_result_int(ctx, 1); break;
   1846 +    case 10 : assert( iColType!=10 ); break;
   1847 +    case 11 : assert( iColType!=11 ); break;
   1848 +
   1849 +    default : {
   1850 +      u32 l = SerialTypeLength(iColType);
   1851 +
   1852 +      /* If pColData was already allocated, arrange to pass ownership. */
   1853 +      sqlite3_destructor_type pFn = SQLITE_TRANSIENT;
   1854 +      if( shouldFree ){
   1855 +        pFn = sqlite3_free;
   1856 +        shouldFree = 0;
   1857 +      }
   1858 +
   1859 +      if( SerialTypeIsBlob(iColType) ){
   1860 +        sqlite3_result_blob(ctx, pColData, l, pFn);
   1861 +      }else{
   1862 +        if( pCursor->iEncoding==SQLITE_UTF16LE ){
   1863 +          sqlite3_result_text16le(ctx, (const void*)pColData, l, pFn);
   1864 +        }else if( pCursor->iEncoding==SQLITE_UTF16BE ){
   1865 +          sqlite3_result_text16be(ctx, (const void*)pColData, l, pFn);
   1866 +        }else{
   1867 +          sqlite3_result_text(ctx, (const char*)pColData, l, pFn);
   1868 +        }
   1869 +      }
   1870 +    } break;
   1871 +  }
   1872 +  if( shouldFree ){
   1873 +    sqlite3_free(pColData);
   1874 +  }
   1875 +  return SQLITE_OK;
   1876 +}
   1877 +
   1878 +static int recoverRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){
   1879 +  RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor;
   1880 +  FNENTRY();
   1881 +  *pRowid = leafCursorCellRowid(pCursor->pLeafCursor);
   1882 +  return SQLITE_OK;
   1883 +}
   1884 +
   1885 +static sqlite3_module recoverModule = {
   1886 +  0,                         /* iVersion */
   1887 +  recoverCreate,             /* xCreate - create a table */
   1888 +  recoverConnect,            /* xConnect - connect to an existing table */
   1889 +  recoverBestIndex,          /* xBestIndex - Determine search strategy */
   1890 +  recoverDisconnect,         /* xDisconnect - Disconnect from a table */
   1891 +  recoverDestroy,            /* xDestroy - Drop a table */
   1892 +  recoverOpen,               /* xOpen - open a cursor */
   1893 +  recoverClose,              /* xClose - close a cursor */
   1894 +  recoverFilter,             /* xFilter - configure scan constraints */
   1895 +  recoverNext,               /* xNext - advance a cursor */
   1896 +  recoverEof,                /* xEof */
   1897 +  recoverColumn,             /* xColumn - read data */
   1898 +  recoverRowid,              /* xRowid - read data */
   1899 +  0,                         /* xUpdate - write data */
   1900 +  0,                         /* xBegin - begin transaction */
   1901 +  0,                         /* xSync - sync transaction */
   1902 +  0,                         /* xCommit - commit transaction */
   1903 +  0,                         /* xRollback - rollback transaction */
   1904 +  0,                         /* xFindFunction - function overloading */
   1905 +  0,                         /* xRename - rename the table */
   1906 +};
   1907 +
   1908 +int recoverVtableInit(sqlite3 *db){
   1909 +  return sqlite3_create_module_v2(db, "recover", &recoverModule, NULL, 0);
   1910 +}
   1911 +
   1912 +/* This section of code is for parsing the create input and
   1913 + * initializing the module.
   1914 + */
   1915 +
   1916 +/* Find the next word in zText and place the endpoints in pzWord*.
   1917 + * Returns true if the word is non-empty.  "Word" is defined as
   1918 + * ASCII alphanumeric plus '_' at this time.
   1919 + */
   1920 +static int findWord(const char *zText,
   1921 +                    const char **pzWordStart, const char **pzWordEnd){
   1922 +  int r;
   1923 +  while( ascii_isspace(*zText) ){
   1924 +    zText++;
   1925 +  }
   1926 +  *pzWordStart = zText;
   1927 +  while( ascii_isalnum(*zText) || *zText=='_' ){
   1928 +    zText++;
   1929 +  }
   1930 +  r = zText>*pzWordStart;  /* In case pzWordStart==pzWordEnd */
   1931 +  *pzWordEnd = zText;
   1932 +  return r;
   1933 +}
   1934 +
   1935 +/* Return true if the next word in zText is zWord, also setting
   1936 + * *pzContinue to the character after the word.
   1937 + */
   1938 +static int expectWord(const char *zText, const char *zWord,
   1939 +                      const char **pzContinue){
   1940 +  const char *zWordStart, *zWordEnd;
   1941 +  if( findWord(zText, &zWordStart, &zWordEnd) &&
   1942 +      ascii_strncasecmp(zWord, zWordStart, zWordEnd - zWordStart)==0 ){
   1943 +    *pzContinue = zWordEnd;
   1944 +    return 1;
   1945 +  }
   1946 +  return 0;
   1947 +}
   1948 +
   1949 +/* Parse the name and type information out of parameter.  In case of
   1950 + * success, *pzNameStart/End contain the name of the column,
   1951 + * *pzTypeStart/End contain the top-level type, and *pTypeMask has the
   1952 + * type mask to use for the column.
   1953 + */
   1954 +static int findNameAndType(const char *parameter,
   1955 +                           const char **pzNameStart, const char **pzNameEnd,
   1956 +                           const char **pzTypeStart, const char **pzTypeEnd,
   1957 +                           unsigned char *pTypeMask){
   1958 +  unsigned nNameLen;   /* Length of found name. */
   1959 +  const char *zEnd;    /* Current end of parsed column information. */
   1960 +  int bNotNull;        /* Non-zero if NULL is not allowed for name. */
   1961 +  int bStrict;         /* Non-zero if column requires exact type match. */
   1962 +  const char *zDummy;  /* Dummy parameter, result unused. */
   1963 +  unsigned i;
   1964 +
   1965 +  /* strictMask is used for STRICT, strictMask|otherMask if STRICT is
   1966 +   * not supplied.  zReplace provides an alternate type to expose to
   1967 +   * the caller.
   1968 +   */
   1969 +  static struct {
   1970 +    const char *zName;
   1971 +    unsigned char strictMask;
   1972 +    unsigned char otherMask;
   1973 +    const char *zReplace;
   1974 +  } kTypeInfo[] = {
   1975 +    { "ANY",
   1976 +      MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL,
   1977 +      0, "",
   1978 +    },
   1979 +    { "ROWID",   MASK_INTEGER | MASK_ROWID,             0, "INTEGER", },
   1980 +    { "INTEGER", MASK_INTEGER | MASK_NULL,              0, NULL, },
   1981 +    { "FLOAT",   MASK_FLOAT | MASK_NULL,                MASK_INTEGER, NULL, },
   1982 +    { "NUMERIC", MASK_INTEGER | MASK_FLOAT | MASK_NULL, MASK_TEXT, NULL, },
   1983 +    { "TEXT",    MASK_TEXT | MASK_NULL,                 MASK_BLOB, NULL, },
   1984 +    { "BLOB",    MASK_BLOB | MASK_NULL,                 0, NULL, },
   1985 +  };
   1986 +
   1987 +  if( !findWord(parameter, pzNameStart, pzNameEnd) ){
   1988 +    return SQLITE_MISUSE;
   1989 +  }
   1990 +
   1991 +  /* Manifest typing, accept any storage type. */
   1992 +  if( !findWord(*pzNameEnd, pzTypeStart, pzTypeEnd) ){
   1993 +    *pzTypeEnd = *pzTypeStart = "";
   1994 +    *pTypeMask = MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL;
   1995 +    return SQLITE_OK;
   1996 +  }
   1997 +
   1998 +  nNameLen = *pzTypeEnd - *pzTypeStart;
   1999 +  for( i=0; i<ArraySize(kTypeInfo); ++i ){
   2000 +    if( ascii_strncasecmp(kTypeInfo[i].zName, *pzTypeStart, nNameLen)==0 ){
   2001 +      break;
   2002 +    }
   2003 +  }
   2004 +  if( i==ArraySize(kTypeInfo) ){
   2005 +    return SQLITE_MISUSE;
   2006 +  }
   2007 +
   2008 +  zEnd = *pzTypeEnd;
   2009 +  bStrict = 0;
   2010 +  if( expectWord(zEnd, "STRICT", &zEnd) ){
   2011 +    /* TODO(shess): Ick.  But I don't want another single-purpose
   2012 +     * flag, either.
   2013 +     */
   2014 +    if( kTypeInfo[i].zReplace && !kTypeInfo[i].zReplace[0] ){
   2015 +      return SQLITE_MISUSE;
   2016 +    }
   2017 +    bStrict = 1;
   2018 +  }
   2019 +
   2020 +  bNotNull = 0;
   2021 +  if( expectWord(zEnd, "NOT", &zEnd) ){
   2022 +    if( expectWord(zEnd, "NULL", &zEnd) ){
   2023 +      bNotNull = 1;
   2024 +    }else{
   2025 +      /* Anything other than NULL after NOT is an error. */
   2026 +      return SQLITE_MISUSE;
   2027 +    }
   2028 +  }
   2029 +
   2030 +  /* Anything else is an error. */
   2031 +  if( findWord(zEnd, &zDummy, &zDummy) ){
   2032 +    return SQLITE_MISUSE;
   2033 +  }
   2034 +
   2035 +  *pTypeMask = kTypeInfo[i].strictMask;
   2036 +  if( !bStrict ){
   2037 +    *pTypeMask |= kTypeInfo[i].otherMask;
   2038 +  }
   2039 +  if( bNotNull ){
   2040 +    *pTypeMask &= ~MASK_NULL;
   2041 +  }
   2042 +  if( kTypeInfo[i].zReplace ){
   2043 +    *pzTypeStart = kTypeInfo[i].zReplace;
   2044 +    *pzTypeEnd = *pzTypeStart + strlen(*pzTypeStart);
   2045 +  }
   2046 +  return SQLITE_OK;
   2047 +}
   2048 +
   2049 +/* Parse the arguments, placing type masks in *pTypes and the exposed
   2050 + * schema in *pzCreateSql (for sqlite3_declare_vtab).
   2051 + */
   2052 +static int ParseColumnsAndGenerateCreate(unsigned nCols,
   2053 +                                         const char *const *pCols,
   2054 +                                         char **pzCreateSql,
   2055 +                                         unsigned char *pTypes,
   2056 +                                         char **pzErr){
   2057 +  unsigned i;
   2058 +  char *zCreateSql = sqlite3_mprintf("CREATE TABLE x(");
   2059 +  if( !zCreateSql ){
   2060 +    return SQLITE_NOMEM;
   2061 +  }
   2062 +
   2063 +  for( i=0; i<nCols; i++ ){
   2064 +    const char *zSep = (i < nCols - 1 ? ", " : ")");
   2065 +    const char *zNotNull = "";
   2066 +    const char *zNameStart, *zNameEnd;
   2067 +    const char *zTypeStart, *zTypeEnd;
   2068 +    int rc = findNameAndType(pCols[i],
   2069 +                             &zNameStart, &zNameEnd,
   2070 +                             &zTypeStart, &zTypeEnd,
   2071 +                             &pTypes[i]);
   2072 +    if( rc!=SQLITE_OK ){
   2073 +      *pzErr = sqlite3_mprintf("unable to parse column %d", i);
   2074 +      sqlite3_free(zCreateSql);
   2075 +      return rc;
   2076 +    }
   2077 +
   2078 +    if( !(pTypes[i]&MASK_NULL) ){
   2079 +      zNotNull = " NOT NULL";
   2080 +    }
   2081 +
   2082 +    /* Add name and type to the create statement. */
   2083 +    zCreateSql = sqlite3_mprintf("%z%.*s %.*s%s%s",
   2084 +                                 zCreateSql,
   2085 +                                 zNameEnd - zNameStart, zNameStart,
   2086 +                                 zTypeEnd - zTypeStart, zTypeStart,
   2087 +                                 zNotNull, zSep);
   2088 +    if( !zCreateSql ){
   2089 +      return SQLITE_NOMEM;
   2090 +    }
   2091 +  }
   2092 +
   2093 +  *pzCreateSql = zCreateSql;
   2094 +  return SQLITE_OK;
   2095 +}
   2096 +
   2097 +/* Helper function for initializing the module. */
   2098 +/* argv[0] module name
   2099 + * argv[1] db name for virtual table
   2100 + * argv[2] virtual table name
   2101 + * argv[3] backing table name
   2102 + * argv[4] columns
   2103 + */
   2104 +/* TODO(shess): Since connect isn't supported, could inline into
   2105 + * recoverCreate().
   2106 + */
   2107 +/* TODO(shess): Explore cases where it would make sense to set *pzErr. */
   2108 +static int recoverInit(
   2109 +  sqlite3 *db,                        /* Database connection */
   2110 +  void *pAux,                         /* unused */
   2111 +  int argc, const char *const*argv,   /* Parameters to CREATE TABLE statement */
   2112 +  sqlite3_vtab **ppVtab,              /* OUT: New virtual table */
   2113 +  char **pzErr                        /* OUT: Error message, if any */
   2114 +){
   2115 +  const unsigned kTypeCol = 4;  /* First argument with column type info. */
   2116 +  Recover *pRecover;            /* Virtual table structure being created. */
   2117 +  char *zDot;                   /* Any dot found in "db.table" backing. */
   2118 +  u32 iRootPage;                /* Root page of backing table. */
   2119 +  char *zCreateSql;             /* Schema of created virtual table. */
   2120 +  int rc;
   2121 +
   2122 +  /* Require to be in the temp database. */
   2123 +  if( ascii_strcasecmp(argv[1], "temp")!=0 ){
   2124 +    *pzErr = sqlite3_mprintf("recover table must be in temp database");
   2125 +    return SQLITE_MISUSE;
   2126 +  }
   2127 +
   2128 +  /* Need the backing table and at least one column. */
   2129 +  if( argc<=kTypeCol ){
   2130 +    *pzErr = sqlite3_mprintf("no columns specified");
   2131 +    return SQLITE_MISUSE;
   2132 +  }
   2133 +
   2134 +  pRecover = sqlite3_malloc(sizeof(Recover));
   2135 +  if( !pRecover ){
   2136 +    return SQLITE_NOMEM;
   2137 +  }
   2138 +  memset(pRecover, 0, sizeof(*pRecover));
   2139 +  pRecover->base.pModule = &recoverModule;
   2140 +  pRecover->db = db;
   2141 +
   2142 +  /* Parse out db.table, assuming main if no dot. */
   2143 +  zDot = strchr(argv[3], '.');
   2144 +  if( !zDot ){
   2145 +    pRecover->zDb = sqlite3_strdup(db->aDb[0].zName);
   2146 +    pRecover->zTable = sqlite3_strdup(argv[3]);
   2147 +  }else if( zDot>argv[3] && zDot[1]!='\0' ){
   2148 +    pRecover->zDb = sqlite3_strndup(argv[3], zDot - argv[3]);
   2149 +    pRecover->zTable = sqlite3_strdup(zDot + 1);
   2150 +  }else{
   2151 +    /* ".table" or "db." not allowed. */
   2152 +    *pzErr = sqlite3_mprintf("ill-formed table specifier");
   2153 +    recoverRelease(pRecover);
   2154 +    return SQLITE_ERROR;
   2155 +  }
   2156 +
   2157 +  pRecover->nCols = argc - kTypeCol;
   2158 +  pRecover->pTypes = sqlite3_malloc(pRecover->nCols);
   2159 +  if( !pRecover->zDb || !pRecover->zTable || !pRecover->pTypes ){
   2160 +    recoverRelease(pRecover);
   2161 +    return SQLITE_NOMEM;
   2162 +  }
   2163 +
   2164 +  /* Require the backing table to exist. */
   2165 +  /* TODO(shess): Be more pedantic about the form of the descriptor
   2166 +   * string.  This already fails for poorly-formed strings, simply
   2167 +   * because there won't be a root page, but it would make more sense
   2168 +   * to be explicit.
   2169 +   */
   2170 +  rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, &iRootPage);
   2171 +  if( rc!=SQLITE_OK ){
   2172 +    *pzErr = sqlite3_mprintf("unable to find backing table");
   2173 +    recoverRelease(pRecover);
   2174 +    return rc;
   2175 +  }
   2176 +
   2177 +  /* Parse the column definitions. */
   2178 +  rc = ParseColumnsAndGenerateCreate(pRecover->nCols, argv + kTypeCol,
   2179 +                                     &zCreateSql, pRecover->pTypes, pzErr);
   2180 +  if( rc!=SQLITE_OK ){
   2181 +    recoverRelease(pRecover);
   2182 +    return rc;
   2183 +  }
   2184 +
   2185 +  rc = sqlite3_declare_vtab(db, zCreateSql);
   2186 +  sqlite3_free(zCreateSql);
   2187 +  if( rc!=SQLITE_OK ){
   2188 +    recoverRelease(pRecover);
   2189 +    return rc;
   2190 +  }
   2191 +
   2192 +  *ppVtab = (sqlite3_vtab *)pRecover;
   2193 +  return SQLITE_OK;
   2194 +}
   2195