Home | History | Annotate | Download | only in async
      1 /*
      2 ** 2005 December 14
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 *************************************************************************
     12 **
     13 ** $Id: sqlite3async.c,v 1.7 2009/07/18 11:52:04 danielk1977 Exp $
     14 **
     15 ** This file contains the implementation of an asynchronous IO backend
     16 ** for SQLite.
     17 */
     18 
     19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
     20 
     21 #include "sqlite3async.h"
     22 #include "sqlite3.h"
     23 #include <stdarg.h>
     24 #include <string.h>
     25 #include <assert.h>
     26 
     27 /* Useful macros used in several places */
     28 #define MIN(x,y) ((x)<(y)?(x):(y))
     29 #define MAX(x,y) ((x)>(y)?(x):(y))
     30 
     31 #ifndef SQLITE_AMALGAMATION
     32 /* Macro to mark parameters as unused and silence compiler warnings. */
     33 #define UNUSED_PARAMETER(x) (void)(x)
     34 #endif
     35 
     36 /* Forward references */
     37 typedef struct AsyncWrite AsyncWrite;
     38 typedef struct AsyncFile AsyncFile;
     39 typedef struct AsyncFileData AsyncFileData;
     40 typedef struct AsyncFileLock AsyncFileLock;
     41 typedef struct AsyncLock AsyncLock;
     42 
     43 /* Enable for debugging */
     44 #ifndef NDEBUG
     45 #include <stdio.h>
     46 static int sqlite3async_trace = 0;
     47 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
     48 static void asyncTrace(const char *zFormat, ...){
     49   char *z;
     50   va_list ap;
     51   va_start(ap, zFormat);
     52   z = sqlite3_vmprintf(zFormat, ap);
     53   va_end(ap);
     54   fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
     55   sqlite3_free(z);
     56 }
     57 #else
     58 # define ASYNC_TRACE(X)
     59 #endif
     60 
     61 /*
     62 ** THREAD SAFETY NOTES
     63 **
     64 ** Basic rules:
     65 **
     66 **     * Both read and write access to the global write-op queue must be
     67 **       protected by the async.queueMutex. As are the async.ioError and
     68 **       async.nFile variables.
     69 **
     70 **     * The async.pLock list and all AsyncLock and AsyncFileLock
     71 **       structures must be protected by the async.lockMutex mutex.
     72 **
     73 **     * The file handles from the underlying system are not assumed to
     74 **       be thread safe.
     75 **
     76 **     * See the last two paragraphs under "The Writer Thread" for
     77 **       an assumption to do with file-handle synchronization by the Os.
     78 **
     79 ** Deadlock prevention:
     80 **
     81 **     There are three mutex used by the system: the "writer" mutex,
     82 **     the "queue" mutex and the "lock" mutex. Rules are:
     83 **
     84 **     * It is illegal to block on the writer mutex when any other mutex
     85 **       are held, and
     86 **
     87 **     * It is illegal to block on the queue mutex when the lock mutex
     88 **       is held.
     89 **
     90 **     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
     91 **
     92 ** File system operations (invoked by SQLite thread):
     93 **
     94 **     xOpen
     95 **     xDelete
     96 **     xFileExists
     97 **
     98 ** File handle operations (invoked by SQLite thread):
     99 **
    100 **         asyncWrite, asyncClose, asyncTruncate, asyncSync
    101 **
    102 **     The operations above add an entry to the global write-op list. They
    103 **     prepare the entry, acquire the async.queueMutex momentarily while
    104 **     list pointers are  manipulated to insert the new entry, then release
    105 **     the mutex and signal the writer thread to wake up in case it happens
    106 **     to be asleep.
    107 **
    108 **
    109 **         asyncRead, asyncFileSize.
    110 **
    111 **     Read operations. Both of these read from both the underlying file
    112 **     first then adjust their result based on pending writes in the
    113 **     write-op queue.   So async.queueMutex is held for the duration
    114 **     of these operations to prevent other threads from changing the
    115 **     queue in mid operation.
    116 **
    117 **
    118 **         asyncLock, asyncUnlock, asyncCheckReservedLock
    119 **
    120 **     These primitives implement in-process locking using a hash table
    121 **     on the file name.  Files are locked correctly for connections coming
    122 **     from the same process.  But other processes cannot see these locks
    123 **     and will therefore not honor them.
    124 **
    125 **
    126 ** The writer thread:
    127 **
    128 **     The async.writerMutex is used to make sure only there is only
    129 **     a single writer thread running at a time.
    130 **
    131 **     Inside the writer thread is a loop that works like this:
    132 **
    133 **         WHILE (write-op list is not empty)
    134 **             Do IO operation at head of write-op list
    135 **             Remove entry from head of write-op list
    136 **         END WHILE
    137 **
    138 **     The async.queueMutex is always held during the <write-op list is
    139 **     not empty> test, and when the entry is removed from the head
    140 **     of the write-op list. Sometimes it is held for the interim
    141 **     period (while the IO is performed), and sometimes it is
    142 **     relinquished. It is relinquished if (a) the IO op is an
    143 **     ASYNC_CLOSE or (b) when the file handle was opened, two of
    144 **     the underlying systems handles were opened on the same
    145 **     file-system entry.
    146 **
    147 **     If condition (b) above is true, then one file-handle
    148 **     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
    149 **     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
    150 **     threads to perform write() operations. This means that read
    151 **     operations are not blocked by asynchronous writes (although
    152 **     asynchronous writes may still be blocked by reads).
    153 **
    154 **     This assumes that the OS keeps two handles open on the same file
    155 **     properly in sync. That is, any read operation that starts after a
    156 **     write operation on the same file system entry has completed returns
    157 **     data consistent with the write. We also assume that if one thread
    158 **     reads a file while another is writing it all bytes other than the
    159 **     ones actually being written contain valid data.
    160 **
    161 **     If the above assumptions are not true, set the preprocessor symbol
    162 **     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
    163 */
    164 
    165 
    166 #ifndef NDEBUG
    167 # define TESTONLY( X ) X
    168 #else
    169 # define TESTONLY( X )
    170 #endif
    171 
    172 /*
    173 ** PORTING FUNCTIONS
    174 **
    175 ** There are two definitions of the following functions. One for pthreads
    176 ** compatible systems and one for Win32. These functions isolate the OS
    177 ** specific code required by each platform.
    178 **
    179 ** The system uses three mutexes and a single condition variable. To
    180 ** block on a mutex, async_mutex_enter() is called. The parameter passed
    181 ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
    182 ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
    183 ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
    184 ** called with a parameter identifying the mutex being unlocked. Mutexes
    185 ** are not recursive - it is an error to call async_mutex_enter() to
    186 ** lock a mutex that is already locked, or to call async_mutex_leave()
    187 ** to unlock a mutex that is not currently locked.
    188 **
    189 ** The async_cond_wait() and async_cond_signal() functions are modelled
    190 ** on the pthreads functions with similar names. The first parameter to
    191 ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
    192 ** is called the mutex identified by the second parameter must be held.
    193 ** The mutex is unlocked, and the calling thread simultaneously begins
    194 ** waiting for the condition variable to be signalled by another thread.
    195 ** After another thread signals the condition variable, the calling
    196 ** thread stops waiting, locks mutex eMutex and returns. The
    197 ** async_cond_signal() function is used to signal the condition variable.
    198 ** It is assumed that the mutex used by the thread calling async_cond_wait()
    199 ** is held by the caller of async_cond_signal() (otherwise there would be
    200 ** a race condition).
    201 **
    202 ** It is guaranteed that no other thread will call async_cond_wait() when
    203 ** there is already a thread waiting on the condition variable.
    204 **
    205 ** The async_sched_yield() function is called to suggest to the operating
    206 ** system that it would be a good time to shift the current thread off the
    207 ** CPU. The system will still work if this function is not implemented
    208 ** (it is not currently implemented for win32), but it might be marginally
    209 ** more efficient if it is.
    210 */
    211 static void async_mutex_enter(int eMutex);
    212 static void async_mutex_leave(int eMutex);
    213 static void async_cond_wait(int eCond, int eMutex);
    214 static void async_cond_signal(int eCond);
    215 static void async_sched_yield(void);
    216 
    217 /*
    218 ** There are also two definitions of the following. async_os_initialize()
    219 ** is called when the asynchronous VFS is first installed, and os_shutdown()
    220 ** is called when it is uninstalled (from within sqlite3async_shutdown()).
    221 **
    222 ** For pthreads builds, both of these functions are no-ops. For win32,
    223 ** they provide an opportunity to initialize and finalize the required
    224 ** mutex and condition variables.
    225 **
    226 ** If async_os_initialize() returns other than zero, then the initialization
    227 ** fails and SQLITE_ERROR is returned to the user.
    228 */
    229 static int async_os_initialize(void);
    230 static void async_os_shutdown(void);
    231 
    232 /* Values for use as the 'eMutex' argument of the above functions. The
    233 ** integer values assigned to these constants are important for assert()
    234 ** statements that verify that mutexes are locked in the correct order.
    235 ** Specifically, it is unsafe to try to lock mutex N while holding a lock
    236 ** on mutex M if (M<=N).
    237 */
    238 #define ASYNC_MUTEX_LOCK    0
    239 #define ASYNC_MUTEX_QUEUE   1
    240 #define ASYNC_MUTEX_WRITER  2
    241 
    242 /* Values for use as the 'eCond' argument of the above functions. */
    243 #define ASYNC_COND_QUEUE    0
    244 
    245 /*************************************************************************
    246 ** Start of OS specific code.
    247 */
    248 #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
    249 
    250 #include <windows.h>
    251 
    252 /* The following block contains the win32 specific code. */
    253 
    254 #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
    255 
    256 static struct AsyncPrimitives {
    257   int isInit;
    258   DWORD aHolder[3];
    259   CRITICAL_SECTION aMutex[3];
    260   HANDLE aCond[1];
    261 } primitives = { 0 };
    262 
    263 static int async_os_initialize(void){
    264   if( !primitives.isInit ){
    265     primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
    266     if( primitives.aCond[0]==NULL ){
    267       return 1;
    268     }
    269     InitializeCriticalSection(&primitives.aMutex[0]);
    270     InitializeCriticalSection(&primitives.aMutex[1]);
    271     InitializeCriticalSection(&primitives.aMutex[2]);
    272     primitives.isInit = 1;
    273   }
    274   return 0;
    275 }
    276 static void async_os_shutdown(void){
    277   if( primitives.isInit ){
    278     DeleteCriticalSection(&primitives.aMutex[0]);
    279     DeleteCriticalSection(&primitives.aMutex[1]);
    280     DeleteCriticalSection(&primitives.aMutex[2]);
    281     CloseHandle(primitives.aCond[0]);
    282     primitives.isInit = 0;
    283   }
    284 }
    285 
    286 /* The following block contains the Win32 specific code. */
    287 static void async_mutex_enter(int eMutex){
    288   assert( eMutex==0 || eMutex==1 || eMutex==2 );
    289   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
    290   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
    291   assert( eMutex!=0 || (!mutex_held(0)) );
    292   EnterCriticalSection(&primitives.aMutex[eMutex]);
    293   TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
    294 }
    295 static void async_mutex_leave(int eMutex){
    296   assert( eMutex==0 || eMutex==1 || eMutex==2 );
    297   assert( mutex_held(eMutex) );
    298   TESTONLY( primitives.aHolder[eMutex] = 0; )
    299   LeaveCriticalSection(&primitives.aMutex[eMutex]);
    300 }
    301 static void async_cond_wait(int eCond, int eMutex){
    302   ResetEvent(primitives.aCond[eCond]);
    303   async_mutex_leave(eMutex);
    304   WaitForSingleObject(primitives.aCond[eCond], INFINITE);
    305   async_mutex_enter(eMutex);
    306 }
    307 static void async_cond_signal(int eCond){
    308   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
    309   SetEvent(primitives.aCond[eCond]);
    310 }
    311 static void async_sched_yield(void){
    312   Sleep(0);
    313 }
    314 #else
    315 
    316 /* The following block contains the pthreads specific code. */
    317 #include <pthread.h>
    318 #include <sched.h>
    319 
    320 #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
    321 
    322 static int  async_os_initialize(void) {return 0;}
    323 static void async_os_shutdown(void) {}
    324 
    325 static struct AsyncPrimitives {
    326   pthread_mutex_t aMutex[3];
    327   pthread_cond_t aCond[1];
    328   pthread_t aHolder[3];
    329 } primitives = {
    330   { PTHREAD_MUTEX_INITIALIZER,
    331     PTHREAD_MUTEX_INITIALIZER,
    332     PTHREAD_MUTEX_INITIALIZER
    333   } , {
    334     PTHREAD_COND_INITIALIZER
    335   } , { 0, 0, 0 }
    336 };
    337 
    338 static void async_mutex_enter(int eMutex){
    339   assert( eMutex==0 || eMutex==1 || eMutex==2 );
    340   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
    341   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
    342   assert( eMutex!=0 || (!mutex_held(0)) );
    343   pthread_mutex_lock(&primitives.aMutex[eMutex]);
    344   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
    345 }
    346 static void async_mutex_leave(int eMutex){
    347   assert( eMutex==0 || eMutex==1 || eMutex==2 );
    348   assert( mutex_held(eMutex) );
    349   TESTONLY( primitives.aHolder[eMutex] = 0; )
    350   pthread_mutex_unlock(&primitives.aMutex[eMutex]);
    351 }
    352 static void async_cond_wait(int eCond, int eMutex){
    353   assert( eMutex==0 || eMutex==1 || eMutex==2 );
    354   assert( mutex_held(eMutex) );
    355   TESTONLY( primitives.aHolder[eMutex] = 0; )
    356   pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
    357   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
    358 }
    359 static void async_cond_signal(int eCond){
    360   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
    361   pthread_cond_signal(&primitives.aCond[eCond]);
    362 }
    363 static void async_sched_yield(void){
    364   sched_yield();
    365 }
    366 #endif
    367 /*
    368 ** End of OS specific code.
    369 *************************************************************************/
    370 
    371 #define assert_mutex_is_held(X) assert( mutex_held(X) )
    372 
    373 
    374 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
    375 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
    376 #define SQLITE_ASYNC_TWO_FILEHANDLES 1
    377 #endif
    378 
    379 /*
    380 ** State information is held in the static variable "async" defined
    381 ** as the following structure.
    382 **
    383 ** Both async.ioError and async.nFile are protected by async.queueMutex.
    384 */
    385 static struct TestAsyncStaticData {
    386   AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
    387   AsyncWrite *pQueueLast;      /* Last write operation on the list */
    388   AsyncLock *pLock;            /* Linked list of all AsyncLock structures */
    389   volatile int ioDelay;        /* Extra delay between write operations */
    390   volatile int eHalt;          /* One of the SQLITEASYNC_HALT_XXX values */
    391   volatile int bLockFiles;     /* Current value of "lockfiles" parameter */
    392   int ioError;                 /* True if an IO error has occurred */
    393   int nFile;                   /* Number of open files (from sqlite pov) */
    394 } async = { 0,0,0,0,0,1,0,0 };
    395 
    396 /* Possible values of AsyncWrite.op */
    397 #define ASYNC_NOOP          0
    398 #define ASYNC_WRITE         1
    399 #define ASYNC_SYNC          2
    400 #define ASYNC_TRUNCATE      3
    401 #define ASYNC_CLOSE         4
    402 #define ASYNC_DELETE        5
    403 #define ASYNC_OPENEXCLUSIVE 6
    404 #define ASYNC_UNLOCK        7
    405 
    406 /* Names of opcodes.  Used for debugging only.
    407 ** Make sure these stay in sync with the macros above!
    408 */
    409 static const char *azOpcodeName[] = {
    410   "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
    411 };
    412 
    413 /*
    414 ** Entries on the write-op queue are instances of the AsyncWrite
    415 ** structure, defined here.
    416 **
    417 ** The interpretation of the iOffset and nByte variables varies depending
    418 ** on the value of AsyncWrite.op:
    419 **
    420 ** ASYNC_NOOP:
    421 **     No values used.
    422 **
    423 ** ASYNC_WRITE:
    424 **     iOffset -> Offset in file to write to.
    425 **     nByte   -> Number of bytes of data to write (pointed to by zBuf).
    426 **
    427 ** ASYNC_SYNC:
    428 **     nByte   -> flags to pass to sqlite3OsSync().
    429 **
    430 ** ASYNC_TRUNCATE:
    431 **     iOffset -> Size to truncate file to.
    432 **     nByte   -> Unused.
    433 **
    434 ** ASYNC_CLOSE:
    435 **     iOffset -> Unused.
    436 **     nByte   -> Unused.
    437 **
    438 ** ASYNC_DELETE:
    439 **     iOffset -> Contains the "syncDir" flag.
    440 **     nByte   -> Number of bytes of zBuf points to (file name).
    441 **
    442 ** ASYNC_OPENEXCLUSIVE:
    443 **     iOffset -> Value of "delflag".
    444 **     nByte   -> Number of bytes of zBuf points to (file name).
    445 **
    446 ** ASYNC_UNLOCK:
    447 **     nByte   -> Argument to sqlite3OsUnlock().
    448 **
    449 **
    450 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
    451 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
    452 ** single blob, so is deleted when sqlite3_free() is called on the parent
    453 ** structure.
    454 */
    455 struct AsyncWrite {
    456   AsyncFileData *pFileData;    /* File to write data to or sync */
    457   int op;                      /* One of ASYNC_xxx etc. */
    458   sqlite_int64 iOffset;        /* See above */
    459   int nByte;          /* See above */
    460   char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
    461   AsyncWrite *pNext;  /* Next write operation (to any file) */
    462 };
    463 
    464 /*
    465 ** An instance of this structure is created for each distinct open file
    466 ** (i.e. if two handles are opened on the one file, only one of these
    467 ** structures is allocated) and stored in the async.aLock hash table. The
    468 ** keys for async.aLock are the full pathnames of the opened files.
    469 **
    470 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
    471 ** structures, one for each handle currently open on the file.
    472 **
    473 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
    474 ** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is
    475 ** false, variables AsyncLock.pFile and AsyncLock.eLock are never used.
    476 ** Otherwise, pFile is a file handle opened on the file in question and
    477 ** used to obtain the file-system locks required by database connections
    478 ** within this process.
    479 **
    480 ** See comments above the asyncLock() function for more details on
    481 ** the implementation of database locking used by this backend.
    482 */
    483 struct AsyncLock {
    484   char *zFile;
    485   int nFile;
    486   sqlite3_file *pFile;
    487   int eLock;
    488   AsyncFileLock *pList;
    489   AsyncLock *pNext;           /* Next in linked list headed by async.pLock */
    490 };
    491 
    492 /*
    493 ** An instance of the following structure is allocated along with each
    494 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
    495 ** file was opened with the SQLITE_OPEN_MAIN_DB.
    496 */
    497 struct AsyncFileLock {
    498   int eLock;                /* Internally visible lock state (sqlite pov) */
    499   int eAsyncLock;           /* Lock-state with write-queue unlock */
    500   AsyncFileLock *pNext;
    501 };
    502 
    503 /*
    504 ** The AsyncFile structure is a subclass of sqlite3_file used for
    505 ** asynchronous IO.
    506 **
    507 ** All of the actual data for the structure is stored in the structure
    508 ** pointed to by AsyncFile.pData, which is allocated as part of the
    509 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
    510 ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
    511 ** is called, but the data in AsyncFileData may be required by the
    512 ** writer thread after that point.
    513 */
    514 struct AsyncFile {
    515   sqlite3_io_methods *pMethod;
    516   AsyncFileData *pData;
    517 };
    518 struct AsyncFileData {
    519   char *zName;               /* Underlying OS filename - used for debugging */
    520   int nName;                 /* Number of characters in zName */
    521   sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
    522   sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
    523   AsyncFileLock lock;        /* Lock state for this handle */
    524   AsyncLock *pLock;          /* AsyncLock object for this file system entry */
    525   AsyncWrite closeOp;        /* Preallocated close operation */
    526 };
    527 
    528 /*
    529 ** Add an entry to the end of the global write-op list. pWrite should point
    530 ** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
    531 ** thread will call sqlite3_free() to free the structure after the specified
    532 ** operation has been completed.
    533 **
    534 ** Once an AsyncWrite structure has been added to the list, it becomes the
    535 ** property of the writer thread and must not be read or modified by the
    536 ** caller.
    537 */
    538 static void addAsyncWrite(AsyncWrite *pWrite){
    539   /* We must hold the queue mutex in order to modify the queue pointers */
    540   if( pWrite->op!=ASYNC_UNLOCK ){
    541     async_mutex_enter(ASYNC_MUTEX_QUEUE);
    542   }
    543 
    544   /* Add the record to the end of the write-op queue */
    545   assert( !pWrite->pNext );
    546   if( async.pQueueLast ){
    547     assert( async.pQueueFirst );
    548     async.pQueueLast->pNext = pWrite;
    549   }else{
    550     async.pQueueFirst = pWrite;
    551   }
    552   async.pQueueLast = pWrite;
    553   ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
    554          pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
    555 
    556   if( pWrite->op==ASYNC_CLOSE ){
    557     async.nFile--;
    558   }
    559 
    560   /* The writer thread might have been idle because there was nothing
    561   ** on the write-op queue for it to do.  So wake it up. */
    562   async_cond_signal(ASYNC_COND_QUEUE);
    563 
    564   /* Drop the queue mutex */
    565   if( pWrite->op!=ASYNC_UNLOCK ){
    566     async_mutex_leave(ASYNC_MUTEX_QUEUE);
    567   }
    568 }
    569 
    570 /*
    571 ** Increment async.nFile in a thread-safe manner.
    572 */
    573 static void incrOpenFileCount(void){
    574   /* We must hold the queue mutex in order to modify async.nFile */
    575   async_mutex_enter(ASYNC_MUTEX_QUEUE);
    576   if( async.nFile==0 ){
    577     async.ioError = SQLITE_OK;
    578   }
    579   async.nFile++;
    580   async_mutex_leave(ASYNC_MUTEX_QUEUE);
    581 }
    582 
    583 /*
    584 ** This is a utility function to allocate and populate a new AsyncWrite
    585 ** structure and insert it (via addAsyncWrite() ) into the global list.
    586 */
    587 static int addNewAsyncWrite(
    588   AsyncFileData *pFileData,
    589   int op,
    590   sqlite3_int64 iOffset,
    591   int nByte,
    592   const char *zByte
    593 ){
    594   AsyncWrite *p;
    595   if( op!=ASYNC_CLOSE && async.ioError ){
    596     return async.ioError;
    597   }
    598   p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
    599   if( !p ){
    600     /* The upper layer does not expect operations like OsWrite() to
    601     ** return SQLITE_NOMEM. This is partly because under normal conditions
    602     ** SQLite is required to do rollback without calling malloc(). So
    603     ** if malloc() fails here, treat it as an I/O error. The above
    604     ** layer knows how to handle that.
    605     */
    606     return SQLITE_IOERR;
    607   }
    608   p->op = op;
    609   p->iOffset = iOffset;
    610   p->nByte = nByte;
    611   p->pFileData = pFileData;
    612   p->pNext = 0;
    613   if( zByte ){
    614     p->zBuf = (char *)&p[1];
    615     memcpy(p->zBuf, zByte, nByte);
    616   }else{
    617     p->zBuf = 0;
    618   }
    619   addAsyncWrite(p);
    620   return SQLITE_OK;
    621 }
    622 
    623 /*
    624 ** Close the file. This just adds an entry to the write-op list, the file is
    625 ** not actually closed.
    626 */
    627 static int asyncClose(sqlite3_file *pFile){
    628   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    629 
    630   /* Unlock the file, if it is locked */
    631   async_mutex_enter(ASYNC_MUTEX_LOCK);
    632   p->lock.eLock = 0;
    633   async_mutex_leave(ASYNC_MUTEX_LOCK);
    634 
    635   addAsyncWrite(&p->closeOp);
    636   return SQLITE_OK;
    637 }
    638 
    639 /*
    640 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
    641 ** writing to the underlying file, this function adds an entry to the end of
    642 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
    643 ** returned.
    644 */
    645 static int asyncWrite(
    646   sqlite3_file *pFile,
    647   const void *pBuf,
    648   int amt,
    649   sqlite3_int64 iOff
    650 ){
    651   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    652   return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
    653 }
    654 
    655 /*
    656 ** Read data from the file. First we read from the filesystem, then adjust
    657 ** the contents of the buffer based on ASYNC_WRITE operations in the
    658 ** write-op queue.
    659 **
    660 ** This method holds the mutex from start to finish.
    661 */
    662 static int asyncRead(
    663   sqlite3_file *pFile,
    664   void *zOut,
    665   int iAmt,
    666   sqlite3_int64 iOffset
    667 ){
    668   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    669   int rc = SQLITE_OK;
    670   sqlite3_int64 filesize = 0;
    671   sqlite3_file *pBase = p->pBaseRead;
    672   sqlite3_int64 iAmt64 = (sqlite3_int64)iAmt;
    673 
    674   /* Grab the write queue mutex for the duration of the call */
    675   async_mutex_enter(ASYNC_MUTEX_QUEUE);
    676 
    677   /* If an I/O error has previously occurred in this virtual file
    678   ** system, then all subsequent operations fail.
    679   */
    680   if( async.ioError!=SQLITE_OK ){
    681     rc = async.ioError;
    682     goto asyncread_out;
    683   }
    684 
    685   if( pBase->pMethods ){
    686     sqlite3_int64 nRead;
    687     rc = pBase->pMethods->xFileSize(pBase, &filesize);
    688     if( rc!=SQLITE_OK ){
    689       goto asyncread_out;
    690     }
    691     nRead = MIN(filesize - iOffset, iAmt64);
    692     if( nRead>0 ){
    693       rc = pBase->pMethods->xRead(pBase, zOut, (int)nRead, iOffset);
    694       ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
    695     }
    696   }
    697 
    698   if( rc==SQLITE_OK ){
    699     AsyncWrite *pWrite;
    700     char *zName = p->zName;
    701 
    702     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
    703       if( pWrite->op==ASYNC_WRITE && (
    704         (pWrite->pFileData==p) ||
    705         (zName && pWrite->pFileData->zName==zName)
    706       )){
    707         sqlite3_int64 nCopy;
    708         sqlite3_int64 nByte64 = (sqlite3_int64)pWrite->nByte;
    709 
    710         /* Set variable iBeginIn to the offset in buffer pWrite->zBuf[] from
    711         ** which data should be copied. Set iBeginOut to the offset within
    712         ** the output buffer to which data should be copied. If either of
    713         ** these offsets is a negative number, set them to 0.
    714         */
    715         sqlite3_int64 iBeginOut = (pWrite->iOffset-iOffset);
    716         sqlite3_int64 iBeginIn = -iBeginOut;
    717         if( iBeginIn<0 ) iBeginIn = 0;
    718         if( iBeginOut<0 ) iBeginOut = 0;
    719 
    720         filesize = MAX(filesize, pWrite->iOffset+nByte64);
    721 
    722         nCopy = MIN(nByte64-iBeginIn, iAmt64-iBeginOut);
    723         if( nCopy>0 ){
    724           memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], (size_t)nCopy);
    725           ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
    726         }
    727       }
    728     }
    729   }
    730 
    731 asyncread_out:
    732   async_mutex_leave(ASYNC_MUTEX_QUEUE);
    733   if( rc==SQLITE_OK && filesize<(iOffset+iAmt) ){
    734     rc = SQLITE_IOERR_SHORT_READ;
    735   }
    736   return rc;
    737 }
    738 
    739 /*
    740 ** Truncate the file to nByte bytes in length. This just adds an entry to
    741 ** the write-op list, no IO actually takes place.
    742 */
    743 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
    744   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    745   return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
    746 }
    747 
    748 /*
    749 ** Sync the file. This just adds an entry to the write-op list, the
    750 ** sync() is done later by sqlite3_async_flush().
    751 */
    752 static int asyncSync(sqlite3_file *pFile, int flags){
    753   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    754   return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
    755 }
    756 
    757 /*
    758 ** Read the size of the file. First we read the size of the file system
    759 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
    760 ** currently in the write-op list.
    761 **
    762 ** This method holds the mutex from start to finish.
    763 */
    764 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
    765   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    766   int rc = SQLITE_OK;
    767   sqlite3_int64 s = 0;
    768   sqlite3_file *pBase;
    769 
    770   async_mutex_enter(ASYNC_MUTEX_QUEUE);
    771 
    772   /* Read the filesystem size from the base file. If pMethods is NULL, this
    773   ** means the file hasn't been opened yet. In this case all relevant data
    774   ** must be in the write-op queue anyway, so we can omit reading from the
    775   ** file-system.
    776   */
    777   pBase = p->pBaseRead;
    778   if( pBase->pMethods ){
    779     rc = pBase->pMethods->xFileSize(pBase, &s);
    780   }
    781 
    782   if( rc==SQLITE_OK ){
    783     AsyncWrite *pWrite;
    784     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
    785       if( pWrite->op==ASYNC_DELETE
    786        && p->zName
    787        && strcmp(p->zName, pWrite->zBuf)==0
    788       ){
    789         s = 0;
    790       }else if( pWrite->pFileData && (
    791           (pWrite->pFileData==p)
    792        || (p->zName && pWrite->pFileData->zName==p->zName)
    793       )){
    794         switch( pWrite->op ){
    795           case ASYNC_WRITE:
    796             s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
    797             break;
    798           case ASYNC_TRUNCATE:
    799             s = MIN(s, pWrite->iOffset);
    800             break;
    801         }
    802       }
    803     }
    804     *piSize = s;
    805   }
    806   async_mutex_leave(ASYNC_MUTEX_QUEUE);
    807   return rc;
    808 }
    809 
    810 /*
    811 ** Lock or unlock the actual file-system entry.
    812 */
    813 static int getFileLock(AsyncLock *pLock){
    814   int rc = SQLITE_OK;
    815   AsyncFileLock *pIter;
    816   int eRequired = 0;
    817 
    818   if( pLock->pFile ){
    819     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
    820       assert(pIter->eAsyncLock>=pIter->eLock);
    821       if( pIter->eAsyncLock>eRequired ){
    822         eRequired = pIter->eAsyncLock;
    823         assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
    824       }
    825     }
    826 
    827     if( eRequired>pLock->eLock ){
    828       rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
    829       if( rc==SQLITE_OK ){
    830         pLock->eLock = eRequired;
    831       }
    832     }
    833     else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
    834       rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
    835       if( rc==SQLITE_OK ){
    836         pLock->eLock = eRequired;
    837       }
    838     }
    839   }
    840 
    841   return rc;
    842 }
    843 
    844 /*
    845 ** Return the AsyncLock structure from the global async.pLock list
    846 ** associated with the file-system entry identified by path zName
    847 ** (a string of nName bytes). If no such structure exists, return 0.
    848 */
    849 static AsyncLock *findLock(const char *zName, int nName){
    850   AsyncLock *p = async.pLock;
    851   while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
    852     p = p->pNext;
    853   }
    854   return p;
    855 }
    856 
    857 /*
    858 ** The following two methods - asyncLock() and asyncUnlock() - are used
    859 ** to obtain and release locks on database files opened with the
    860 ** asynchronous backend.
    861 */
    862 static int asyncLock(sqlite3_file *pFile, int eLock){
    863   int rc = SQLITE_OK;
    864   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    865 
    866   if( p->zName ){
    867     async_mutex_enter(ASYNC_MUTEX_LOCK);
    868     if( p->lock.eLock<eLock ){
    869       AsyncLock *pLock = p->pLock;
    870       AsyncFileLock *pIter;
    871       assert(pLock && pLock->pList);
    872       for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
    873         if( pIter!=&p->lock && (
    874           (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
    875           (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
    876           (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
    877           (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
    878         )){
    879           rc = SQLITE_BUSY;
    880         }
    881       }
    882       if( rc==SQLITE_OK ){
    883         p->lock.eLock = eLock;
    884         p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
    885       }
    886       assert(p->lock.eAsyncLock>=p->lock.eLock);
    887       if( rc==SQLITE_OK ){
    888         rc = getFileLock(pLock);
    889       }
    890     }
    891     async_mutex_leave(ASYNC_MUTEX_LOCK);
    892   }
    893 
    894   ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
    895   return rc;
    896 }
    897 static int asyncUnlock(sqlite3_file *pFile, int eLock){
    898   int rc = SQLITE_OK;
    899   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    900   if( p->zName ){
    901     AsyncFileLock *pLock = &p->lock;
    902     async_mutex_enter(ASYNC_MUTEX_QUEUE);
    903     async_mutex_enter(ASYNC_MUTEX_LOCK);
    904     pLock->eLock = MIN(pLock->eLock, eLock);
    905     rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
    906     async_mutex_leave(ASYNC_MUTEX_LOCK);
    907     async_mutex_leave(ASYNC_MUTEX_QUEUE);
    908   }
    909   return rc;
    910 }
    911 
    912 /*
    913 ** This function is called when the pager layer first opens a database file
    914 ** and is checking for a hot-journal.
    915 */
    916 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
    917   int ret = 0;
    918   AsyncFileLock *pIter;
    919   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
    920 
    921   async_mutex_enter(ASYNC_MUTEX_LOCK);
    922   for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
    923     if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
    924       ret = 1;
    925       break;
    926     }
    927   }
    928   async_mutex_leave(ASYNC_MUTEX_LOCK);
    929 
    930   ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
    931   *pResOut = ret;
    932   return SQLITE_OK;
    933 }
    934 
    935 /*
    936 ** sqlite3_file_control() implementation.
    937 */
    938 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
    939   switch( op ){
    940     case SQLITE_FCNTL_LOCKSTATE: {
    941       async_mutex_enter(ASYNC_MUTEX_LOCK);
    942       *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
    943       async_mutex_leave(ASYNC_MUTEX_LOCK);
    944       return SQLITE_OK;
    945     }
    946   }
    947   return SQLITE_ERROR;
    948 }
    949 
    950 /*
    951 ** Return the device characteristics and sector-size of the device. It
    952 ** is tricky to implement these correctly, as this backend might
    953 ** not have an open file handle at this point.
    954 */
    955 static int asyncSectorSize(sqlite3_file *pFile){
    956   UNUSED_PARAMETER(pFile);
    957   return 512;
    958 }
    959 static int asyncDeviceCharacteristics(sqlite3_file *pFile){
    960   UNUSED_PARAMETER(pFile);
    961   return 0;
    962 }
    963 
    964 static int unlinkAsyncFile(AsyncFileData *pData){
    965   AsyncFileLock **ppIter;
    966   int rc = SQLITE_OK;
    967 
    968   if( pData->zName ){
    969     AsyncLock *pLock = pData->pLock;
    970     for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
    971       if( (*ppIter)==&pData->lock ){
    972         *ppIter = pData->lock.pNext;
    973         break;
    974       }
    975     }
    976     if( !pLock->pList ){
    977       AsyncLock **pp;
    978       if( pLock->pFile ){
    979         pLock->pFile->pMethods->xClose(pLock->pFile);
    980       }
    981       for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
    982       *pp = pLock->pNext;
    983       sqlite3_free(pLock);
    984     }else{
    985       rc = getFileLock(pLock);
    986     }
    987   }
    988 
    989   return rc;
    990 }
    991 
    992 /*
    993 ** The parameter passed to this function is a copy of a 'flags' parameter
    994 ** passed to this modules xOpen() method. This function returns true
    995 ** if the file should be opened asynchronously, or false if it should
    996 ** be opened immediately.
    997 **
    998 ** If the file is to be opened asynchronously, then asyncOpen() will add
    999 ** an entry to the event queue and the file will not actually be opened
   1000 ** until the event is processed. Otherwise, the file is opened directly
   1001 ** by the caller.
   1002 */
   1003 static int doAsynchronousOpen(int flags){
   1004   return (flags&SQLITE_OPEN_CREATE) && (
   1005       (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
   1006       (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
   1007       (flags&SQLITE_OPEN_DELETEONCLOSE)
   1008   );
   1009 }
   1010 
   1011 /*
   1012 ** Open a file.
   1013 */
   1014 static int asyncOpen(
   1015   sqlite3_vfs *pAsyncVfs,
   1016   const char *zName,
   1017   sqlite3_file *pFile,
   1018   int flags,
   1019   int *pOutFlags
   1020 ){
   1021   static sqlite3_io_methods async_methods = {
   1022     1,                               /* iVersion */
   1023     asyncClose,                      /* xClose */
   1024     asyncRead,                       /* xRead */
   1025     asyncWrite,                      /* xWrite */
   1026     asyncTruncate,                   /* xTruncate */
   1027     asyncSync,                       /* xSync */
   1028     asyncFileSize,                   /* xFileSize */
   1029     asyncLock,                       /* xLock */
   1030     asyncUnlock,                     /* xUnlock */
   1031     asyncCheckReservedLock,          /* xCheckReservedLock */
   1032     asyncFileControl,                /* xFileControl */
   1033     asyncSectorSize,                 /* xSectorSize */
   1034     asyncDeviceCharacteristics       /* xDeviceCharacteristics */
   1035   };
   1036 
   1037   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1038   AsyncFile *p = (AsyncFile *)pFile;
   1039   int nName = 0;
   1040   int rc = SQLITE_OK;
   1041   int nByte;
   1042   AsyncFileData *pData;
   1043   AsyncLock *pLock = 0;
   1044   char *z;
   1045   int isAsyncOpen = doAsynchronousOpen(flags);
   1046 
   1047   /* If zName is NULL, then the upper layer is requesting an anonymous file */
   1048   if( zName ){
   1049     nName = (int)strlen(zName)+1;
   1050   }
   1051 
   1052   nByte = (
   1053     sizeof(AsyncFileData) +        /* AsyncFileData structure */
   1054     2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
   1055     nName                          /* AsyncFileData.zName */
   1056   );
   1057   z = sqlite3_malloc(nByte);
   1058   if( !z ){
   1059     return SQLITE_NOMEM;
   1060   }
   1061   memset(z, 0, nByte);
   1062   pData = (AsyncFileData*)z;
   1063   z += sizeof(pData[0]);
   1064   pData->pBaseRead = (sqlite3_file*)z;
   1065   z += pVfs->szOsFile;
   1066   pData->pBaseWrite = (sqlite3_file*)z;
   1067   pData->closeOp.pFileData = pData;
   1068   pData->closeOp.op = ASYNC_CLOSE;
   1069 
   1070   if( zName ){
   1071     z += pVfs->szOsFile;
   1072     pData->zName = z;
   1073     pData->nName = nName;
   1074     memcpy(pData->zName, zName, nName);
   1075   }
   1076 
   1077   if( !isAsyncOpen ){
   1078     int flagsout;
   1079     rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
   1080     if( rc==SQLITE_OK
   1081      && (flagsout&SQLITE_OPEN_READWRITE)
   1082      && (flags&SQLITE_OPEN_EXCLUSIVE)==0
   1083     ){
   1084       rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
   1085     }
   1086     if( pOutFlags ){
   1087       *pOutFlags = flagsout;
   1088     }
   1089   }
   1090 
   1091   async_mutex_enter(ASYNC_MUTEX_LOCK);
   1092 
   1093   if( zName && rc==SQLITE_OK ){
   1094     pLock = findLock(pData->zName, pData->nName);
   1095     if( !pLock ){
   1096       int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
   1097       pLock = (AsyncLock *)sqlite3_malloc(nByte);
   1098       if( pLock ){
   1099         memset(pLock, 0, nByte);
   1100         if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){
   1101           pLock->pFile = (sqlite3_file *)&pLock[1];
   1102           rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
   1103           if( rc!=SQLITE_OK ){
   1104             sqlite3_free(pLock);
   1105             pLock = 0;
   1106           }
   1107         }
   1108         if( pLock ){
   1109           pLock->nFile = pData->nName;
   1110           pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
   1111           memcpy(pLock->zFile, pData->zName, pLock->nFile);
   1112           pLock->pNext = async.pLock;
   1113           async.pLock = pLock;
   1114         }
   1115       }else{
   1116         rc = SQLITE_NOMEM;
   1117       }
   1118     }
   1119   }
   1120 
   1121   if( rc==SQLITE_OK ){
   1122     p->pMethod = &async_methods;
   1123     p->pData = pData;
   1124 
   1125     /* Link AsyncFileData.lock into the linked list of
   1126     ** AsyncFileLock structures for this file.
   1127     */
   1128     if( zName ){
   1129       pData->lock.pNext = pLock->pList;
   1130       pLock->pList = &pData->lock;
   1131       pData->zName = pLock->zFile;
   1132     }
   1133   }else{
   1134     if( pData->pBaseRead->pMethods ){
   1135       pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
   1136     }
   1137     if( pData->pBaseWrite->pMethods ){
   1138       pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
   1139     }
   1140     sqlite3_free(pData);
   1141   }
   1142 
   1143   async_mutex_leave(ASYNC_MUTEX_LOCK);
   1144 
   1145   if( rc==SQLITE_OK ){
   1146     pData->pLock = pLock;
   1147   }
   1148 
   1149   if( rc==SQLITE_OK && isAsyncOpen ){
   1150     rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
   1151     if( rc==SQLITE_OK ){
   1152       if( pOutFlags ) *pOutFlags = flags;
   1153     }else{
   1154       async_mutex_enter(ASYNC_MUTEX_LOCK);
   1155       unlinkAsyncFile(pData);
   1156       async_mutex_leave(ASYNC_MUTEX_LOCK);
   1157       sqlite3_free(pData);
   1158     }
   1159   }
   1160   if( rc!=SQLITE_OK ){
   1161     p->pMethod = 0;
   1162   }else{
   1163     incrOpenFileCount();
   1164   }
   1165 
   1166   return rc;
   1167 }
   1168 
   1169 /*
   1170 ** Implementation of sqlite3OsDelete. Add an entry to the end of the
   1171 ** write-op queue to perform the delete.
   1172 */
   1173 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
   1174   UNUSED_PARAMETER(pAsyncVfs);
   1175   return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, (int)strlen(z)+1, z);
   1176 }
   1177 
   1178 /*
   1179 ** Implementation of sqlite3OsAccess. This method holds the mutex from
   1180 ** start to finish.
   1181 */
   1182 static int asyncAccess(
   1183   sqlite3_vfs *pAsyncVfs,
   1184   const char *zName,
   1185   int flags,
   1186   int *pResOut
   1187 ){
   1188   int rc;
   1189   int ret;
   1190   AsyncWrite *p;
   1191   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1192 
   1193   assert(flags==SQLITE_ACCESS_READWRITE
   1194       || flags==SQLITE_ACCESS_READ
   1195       || flags==SQLITE_ACCESS_EXISTS
   1196   );
   1197 
   1198   async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1199   rc = pVfs->xAccess(pVfs, zName, flags, &ret);
   1200   if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
   1201     for(p=async.pQueueFirst; p; p = p->pNext){
   1202       if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
   1203         ret = 0;
   1204       }else if( p->op==ASYNC_OPENEXCLUSIVE
   1205              && p->pFileData->zName
   1206              && 0==strcmp(p->pFileData->zName, zName)
   1207       ){
   1208         ret = 1;
   1209       }
   1210     }
   1211   }
   1212   ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
   1213     flags==SQLITE_ACCESS_READWRITE?"read-write":
   1214     flags==SQLITE_ACCESS_READ?"read":"exists"
   1215     , zName, ret)
   1216   );
   1217   async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1218   *pResOut = ret;
   1219   return rc;
   1220 }
   1221 
   1222 /*
   1223 ** Fill in zPathOut with the full path to the file identified by zPath.
   1224 */
   1225 static int asyncFullPathname(
   1226   sqlite3_vfs *pAsyncVfs,
   1227   const char *zPath,
   1228   int nPathOut,
   1229   char *zPathOut
   1230 ){
   1231   int rc;
   1232   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1233   rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
   1234 
   1235   /* Because of the way intra-process file locking works, this backend
   1236   ** needs to return a canonical path. The following block assumes the
   1237   ** file-system uses unix style paths.
   1238   */
   1239   if( rc==SQLITE_OK ){
   1240     int i, j;
   1241     char *z = zPathOut;
   1242     int n = (int)strlen(z);
   1243     while( n>1 && z[n-1]=='/' ){ n--; }
   1244     for(i=j=0; i<n; i++){
   1245       if( z[i]=='/' ){
   1246         if( z[i+1]=='/' ) continue;
   1247         if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
   1248           i += 1;
   1249           continue;
   1250         }
   1251         if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
   1252           while( j>0 && z[j-1]!='/' ){ j--; }
   1253           if( j>0 ){ j--; }
   1254           i += 2;
   1255           continue;
   1256         }
   1257       }
   1258       z[j++] = z[i];
   1259     }
   1260     z[j] = 0;
   1261   }
   1262 
   1263   return rc;
   1264 }
   1265 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
   1266   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1267   return pVfs->xDlOpen(pVfs, zPath);
   1268 }
   1269 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
   1270   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1271   pVfs->xDlError(pVfs, nByte, zErrMsg);
   1272 }
   1273 static void (*asyncDlSym(
   1274   sqlite3_vfs *pAsyncVfs,
   1275   void *pHandle,
   1276   const char *zSymbol
   1277 ))(void){
   1278   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1279   return pVfs->xDlSym(pVfs, pHandle, zSymbol);
   1280 }
   1281 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
   1282   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1283   pVfs->xDlClose(pVfs, pHandle);
   1284 }
   1285 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
   1286   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1287   return pVfs->xRandomness(pVfs, nByte, zBufOut);
   1288 }
   1289 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
   1290   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1291   return pVfs->xSleep(pVfs, nMicro);
   1292 }
   1293 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
   1294   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
   1295   return pVfs->xCurrentTime(pVfs, pTimeOut);
   1296 }
   1297 
   1298 static sqlite3_vfs async_vfs = {
   1299   1,                    /* iVersion */
   1300   sizeof(AsyncFile),    /* szOsFile */
   1301   0,                    /* mxPathname */
   1302   0,                    /* pNext */
   1303   SQLITEASYNC_VFSNAME,  /* zName */
   1304   0,                    /* pAppData */
   1305   asyncOpen,            /* xOpen */
   1306   asyncDelete,          /* xDelete */
   1307   asyncAccess,          /* xAccess */
   1308   asyncFullPathname,    /* xFullPathname */
   1309   asyncDlOpen,          /* xDlOpen */
   1310   asyncDlError,         /* xDlError */
   1311   asyncDlSym,           /* xDlSym */
   1312   asyncDlClose,         /* xDlClose */
   1313   asyncRandomness,      /* xDlError */
   1314   asyncSleep,           /* xDlSym */
   1315   asyncCurrentTime      /* xDlClose */
   1316 };
   1317 
   1318 /*
   1319 ** This procedure runs in a separate thread, reading messages off of the
   1320 ** write queue and processing them one by one.
   1321 **
   1322 ** If async.writerHaltNow is true, then this procedure exits
   1323 ** after processing a single message.
   1324 **
   1325 ** If async.writerHaltWhenIdle is true, then this procedure exits when
   1326 ** the write queue is empty.
   1327 **
   1328 ** If both of the above variables are false, this procedure runs
   1329 ** indefinately, waiting for operations to be added to the write queue
   1330 ** and processing them in the order in which they arrive.
   1331 **
   1332 ** An artifical delay of async.ioDelay milliseconds is inserted before
   1333 ** each write operation in order to simulate the effect of a slow disk.
   1334 **
   1335 ** Only one instance of this procedure may be running at a time.
   1336 */
   1337 static void asyncWriterThread(void){
   1338   sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
   1339   AsyncWrite *p = 0;
   1340   int rc = SQLITE_OK;
   1341   int holdingMutex = 0;
   1342 
   1343   async_mutex_enter(ASYNC_MUTEX_WRITER);
   1344 
   1345   while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
   1346     int doNotFree = 0;
   1347     sqlite3_file *pBase = 0;
   1348 
   1349     if( !holdingMutex ){
   1350       async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1351     }
   1352     while( (p = async.pQueueFirst)==0 ){
   1353       if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
   1354         async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1355         break;
   1356       }else{
   1357         ASYNC_TRACE(("IDLE\n"));
   1358         async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
   1359         ASYNC_TRACE(("WAKEUP\n"));
   1360       }
   1361     }
   1362     if( p==0 ) break;
   1363     holdingMutex = 1;
   1364 
   1365     /* Right now this thread is holding the mutex on the write-op queue.
   1366     ** Variable 'p' points to the first entry in the write-op queue. In
   1367     ** the general case, we hold on to the mutex for the entire body of
   1368     ** the loop.
   1369     **
   1370     ** However in the cases enumerated below, we relinquish the mutex,
   1371     ** perform the IO, and then re-request the mutex before removing 'p' from
   1372     ** the head of the write-op queue. The idea is to increase concurrency with
   1373     ** sqlite threads.
   1374     **
   1375     **     * An ASYNC_CLOSE operation.
   1376     **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
   1377     **       the mutex, call the underlying xOpenExclusive() function, then
   1378     **       re-aquire the mutex before seting the AsyncFile.pBaseRead
   1379     **       variable.
   1380     **     * ASYNC_SYNC and ASYNC_WRITE operations, if
   1381     **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
   1382     **       file-handles are open for the particular file being "synced".
   1383     */
   1384     if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
   1385       p->op = ASYNC_NOOP;
   1386     }
   1387     if( p->pFileData ){
   1388       pBase = p->pFileData->pBaseWrite;
   1389       if(
   1390         p->op==ASYNC_CLOSE ||
   1391         p->op==ASYNC_OPENEXCLUSIVE ||
   1392         (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
   1393       ){
   1394         async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1395         holdingMutex = 0;
   1396       }
   1397       if( !pBase->pMethods ){
   1398         pBase = p->pFileData->pBaseRead;
   1399       }
   1400     }
   1401 
   1402     switch( p->op ){
   1403       case ASYNC_NOOP:
   1404         break;
   1405 
   1406       case ASYNC_WRITE:
   1407         assert( pBase );
   1408         ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
   1409                 p->pFileData->zName, p->nByte, p->iOffset));
   1410         rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
   1411         break;
   1412 
   1413       case ASYNC_SYNC:
   1414         assert( pBase );
   1415         ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
   1416         rc = pBase->pMethods->xSync(pBase, p->nByte);
   1417         break;
   1418 
   1419       case ASYNC_TRUNCATE:
   1420         assert( pBase );
   1421         ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
   1422                 p->pFileData->zName, p->iOffset));
   1423         rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
   1424         break;
   1425 
   1426       case ASYNC_CLOSE: {
   1427         AsyncFileData *pData = p->pFileData;
   1428         ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
   1429         if( pData->pBaseWrite->pMethods ){
   1430           pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
   1431         }
   1432         if( pData->pBaseRead->pMethods ){
   1433           pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
   1434         }
   1435 
   1436         /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
   1437         ** structures for this file. Obtain the async.lockMutex mutex
   1438         ** before doing so.
   1439         */
   1440         async_mutex_enter(ASYNC_MUTEX_LOCK);
   1441         rc = unlinkAsyncFile(pData);
   1442         async_mutex_leave(ASYNC_MUTEX_LOCK);
   1443 
   1444         if( !holdingMutex ){
   1445           async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1446           holdingMutex = 1;
   1447         }
   1448         assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
   1449         async.pQueueFirst = p->pNext;
   1450         sqlite3_free(pData);
   1451         doNotFree = 1;
   1452         break;
   1453       }
   1454 
   1455       case ASYNC_UNLOCK: {
   1456         AsyncWrite *pIter;
   1457         AsyncFileData *pData = p->pFileData;
   1458         int eLock = p->nByte;
   1459 
   1460         /* When a file is locked by SQLite using the async backend, it is
   1461         ** locked within the 'real' file-system synchronously. When it is
   1462         ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
   1463         ** unlock the file asynchronously. The design of the async backend
   1464         ** requires that the 'real' file-system file be locked from the
   1465         ** time that SQLite first locks it (and probably reads from it)
   1466         ** until all asynchronous write events that were scheduled before
   1467         ** SQLite unlocked the file have been processed.
   1468         **
   1469         ** This is more complex if SQLite locks and unlocks the file multiple
   1470         ** times in quick succession. For example, if SQLite does:
   1471         **
   1472         **   lock, write, unlock, lock, write, unlock
   1473         **
   1474         ** Each "lock" operation locks the file immediately. Each "write"
   1475         ** and "unlock" operation adds an event to the event queue. If the
   1476         ** second "lock" operation is performed before the first "unlock"
   1477         ** operation has been processed asynchronously, then the first
   1478         ** "unlock" cannot be safely processed as is, since this would mean
   1479         ** the file was unlocked when the second "write" operation is
   1480         ** processed. To work around this, when processing an ASYNC_UNLOCK
   1481         ** operation, SQLite:
   1482         **
   1483         **   1) Unlocks the file to the minimum of the argument passed to
   1484         **      the xUnlock() call and the current lock from SQLite's point
   1485         **      of view, and
   1486         **
   1487         **   2) Only unlocks the file at all if this event is the last
   1488         **      ASYNC_UNLOCK event on this file in the write-queue.
   1489         */
   1490         assert( holdingMutex==1 );
   1491         assert( async.pQueueFirst==p );
   1492         for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
   1493           if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
   1494         }
   1495         if( !pIter ){
   1496           async_mutex_enter(ASYNC_MUTEX_LOCK);
   1497           pData->lock.eAsyncLock = MIN(
   1498               pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
   1499           );
   1500           assert(pData->lock.eAsyncLock>=pData->lock.eLock);
   1501           rc = getFileLock(pData->pLock);
   1502           async_mutex_leave(ASYNC_MUTEX_LOCK);
   1503         }
   1504         break;
   1505       }
   1506 
   1507       case ASYNC_DELETE:
   1508         ASYNC_TRACE(("DELETE %s\n", p->zBuf));
   1509         rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
   1510         break;
   1511 
   1512       case ASYNC_OPENEXCLUSIVE: {
   1513         int flags = (int)p->iOffset;
   1514         AsyncFileData *pData = p->pFileData;
   1515         ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
   1516         assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
   1517         rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
   1518         assert( holdingMutex==0 );
   1519         async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1520         holdingMutex = 1;
   1521         break;
   1522       }
   1523 
   1524       default: assert(!"Illegal value for AsyncWrite.op");
   1525     }
   1526 
   1527     /* If we didn't hang on to the mutex during the IO op, obtain it now
   1528     ** so that the AsyncWrite structure can be safely removed from the
   1529     ** global write-op queue.
   1530     */
   1531     if( !holdingMutex ){
   1532       async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1533       holdingMutex = 1;
   1534     }
   1535     /* ASYNC_TRACE(("UNLINK %p\n", p)); */
   1536     if( p==async.pQueueLast ){
   1537       async.pQueueLast = 0;
   1538     }
   1539     if( !doNotFree ){
   1540       assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
   1541       async.pQueueFirst = p->pNext;
   1542       sqlite3_free(p);
   1543     }
   1544     assert( holdingMutex );
   1545 
   1546     /* An IO error has occurred. We cannot report the error back to the
   1547     ** connection that requested the I/O since the error happened
   1548     ** asynchronously.  The connection has already moved on.  There
   1549     ** really is nobody to report the error to.
   1550     **
   1551     ** The file for which the error occurred may have been a database or
   1552     ** journal file. Regardless, none of the currently queued operations
   1553     ** associated with the same database should now be performed. Nor should
   1554     ** any subsequently requested IO on either a database or journal file
   1555     ** handle for the same database be accepted until the main database
   1556     ** file handle has been closed and reopened.
   1557     **
   1558     ** Furthermore, no further IO should be queued or performed on any file
   1559     ** handle associated with a database that may have been part of a
   1560     ** multi-file transaction that included the database associated with
   1561     ** the IO error (i.e. a database ATTACHed to the same handle at some
   1562     ** point in time).
   1563     */
   1564     if( rc!=SQLITE_OK ){
   1565       async.ioError = rc;
   1566     }
   1567 
   1568     if( async.ioError && !async.pQueueFirst ){
   1569       async_mutex_enter(ASYNC_MUTEX_LOCK);
   1570       if( 0==async.pLock ){
   1571         async.ioError = SQLITE_OK;
   1572       }
   1573       async_mutex_leave(ASYNC_MUTEX_LOCK);
   1574     }
   1575 
   1576     /* Drop the queue mutex before continuing to the next write operation
   1577     ** in order to give other threads a chance to work with the write queue.
   1578     */
   1579     if( !async.pQueueFirst || !async.ioError ){
   1580       async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1581       holdingMutex = 0;
   1582       if( async.ioDelay>0 ){
   1583         pVfs->xSleep(pVfs, async.ioDelay*1000);
   1584       }else{
   1585         async_sched_yield();
   1586       }
   1587     }
   1588   }
   1589 
   1590   async_mutex_leave(ASYNC_MUTEX_WRITER);
   1591   return;
   1592 }
   1593 
   1594 /*
   1595 ** Install the asynchronous VFS.
   1596 */
   1597 int sqlite3async_initialize(const char *zParent, int isDefault){
   1598   int rc = SQLITE_OK;
   1599   if( async_vfs.pAppData==0 ){
   1600     sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
   1601     if( !pParent || async_os_initialize() ){
   1602       rc = SQLITE_ERROR;
   1603     }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
   1604       async_os_shutdown();
   1605     }else{
   1606       async_vfs.pAppData = (void *)pParent;
   1607       async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
   1608     }
   1609   }
   1610   return rc;
   1611 }
   1612 
   1613 /*
   1614 ** Uninstall the asynchronous VFS.
   1615 */
   1616 void sqlite3async_shutdown(void){
   1617   if( async_vfs.pAppData ){
   1618     async_os_shutdown();
   1619     sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
   1620     async_vfs.pAppData = 0;
   1621   }
   1622 }
   1623 
   1624 /*
   1625 ** Process events on the write-queue.
   1626 */
   1627 void sqlite3async_run(void){
   1628   asyncWriterThread();
   1629 }
   1630 
   1631 /*
   1632 ** Control/configure the asynchronous IO system.
   1633 */
   1634 int sqlite3async_control(int op, ...){
   1635   va_list ap;
   1636   va_start(ap, op);
   1637   switch( op ){
   1638     case SQLITEASYNC_HALT: {
   1639       int eWhen = va_arg(ap, int);
   1640       if( eWhen!=SQLITEASYNC_HALT_NEVER
   1641        && eWhen!=SQLITEASYNC_HALT_NOW
   1642        && eWhen!=SQLITEASYNC_HALT_IDLE
   1643       ){
   1644         return SQLITE_MISUSE;
   1645       }
   1646       async.eHalt = eWhen;
   1647       async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1648       async_cond_signal(ASYNC_COND_QUEUE);
   1649       async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1650       break;
   1651     }
   1652 
   1653     case SQLITEASYNC_DELAY: {
   1654       int iDelay = va_arg(ap, int);
   1655       if( iDelay<0 ){
   1656         return SQLITE_MISUSE;
   1657       }
   1658       async.ioDelay = iDelay;
   1659       break;
   1660     }
   1661 
   1662     case SQLITEASYNC_LOCKFILES: {
   1663       int bLock = va_arg(ap, int);
   1664       async_mutex_enter(ASYNC_MUTEX_QUEUE);
   1665       if( async.nFile || async.pQueueFirst ){
   1666         async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1667         return SQLITE_MISUSE;
   1668       }
   1669       async.bLockFiles = bLock;
   1670       async_mutex_leave(ASYNC_MUTEX_QUEUE);
   1671       break;
   1672     }
   1673 
   1674     case SQLITEASYNC_GET_HALT: {
   1675       int *peWhen = va_arg(ap, int *);
   1676       *peWhen = async.eHalt;
   1677       break;
   1678     }
   1679     case SQLITEASYNC_GET_DELAY: {
   1680       int *piDelay = va_arg(ap, int *);
   1681       *piDelay = async.ioDelay;
   1682       break;
   1683     }
   1684     case SQLITEASYNC_GET_LOCKFILES: {
   1685       int *piDelay = va_arg(ap, int *);
   1686       *piDelay = async.bLockFiles;
   1687       break;
   1688     }
   1689 
   1690     default:
   1691       return SQLITE_ERROR;
   1692   }
   1693   return SQLITE_OK;
   1694 }
   1695 
   1696 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
   1697 
   1698