u32 minFrame; /* Ignore wal frames before this one */
u32 iReCksum; /* On commit, recalculate checksums from here */
const char *zWalName; /* Name of WAL file */
+ const char *zWalName2; /* Name of second WAL file */
u32 nCkpt; /* Checkpoint sequence counter in the wal-header */
+ #ifdef SQLITE_USE_SEH
+ u32 lockMask; /* Mask of locks held */
+ void *pFree; /* Pointer to sqlite3_free() if exception thrown */
+ int iSysErrno; /* System error code following exception */
+ #endif
#ifdef SQLITE_DEBUG
+ int nSehTry; /* Number of nested SEH_TRY{} blocks */
u8 lockError; /* True if a locking error has occurred */
#endif
#ifdef SQLITE_ENABLE_SNAPSHOT
sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
)
-
++
+ /*
+ ** Structured Exception Handling (SEH) is a Windows-specific technique
+ ** for catching exceptions raised while accessing memory-mapped files.
+ **
+ ** The -DSQLITE_USE_SEH compile-time option means to use SEH to catch and
+ ** deal with system-level errors that arise during WAL -shm file processing.
+ ** Without this compile-time option, any system-level faults that appear
+ ** while accessing the memory-mapped -shm file will cause a process-wide
+ ** signal to be deliver, which will more than likely cause the entire
+ ** process to exit.
+ */
+ #ifdef SQLITE_USE_SEH
+ #include <Windows.h>
+
+ /* Beginning of a block of code in which an exception might occur */
+ # define SEH_TRY __try { \
+ assert( walAssertLockmask(pWal) && pWal->nSehTry==0 ); \
+ VVA_ONLY(pWal->nSehTry++);
+
+ /* The end of a block of code in which an exception might occur */
+ # define SEH_EXCEPT(X) \
+ VVA_ONLY(pWal->nSehTry--); \
+ assert( pWal->nSehTry==0 ); \
+ } __except( sehExceptionFilter(pWal, GetExceptionCode(), GetExceptionInformation() ) ){ X }
+
+ /* Simulate a memory-mapping fault in the -shm file for testing purposes */
+ # define SEH_INJECT_FAULT sehInjectFault(pWal)
+
+ /*
+ ** The second argument is the return value of GetExceptionCode() for the
+ ** current exception. Return EXCEPTION_EXECUTE_HANDLER if the exception code
+ ** indicates that the exception may have been caused by accessing the *-shm
+ ** file mapping. Or EXCEPTION_CONTINUE_SEARCH otherwise.
+ */
+ static int sehExceptionFilter(Wal *pWal, int eCode, EXCEPTION_POINTERS *p){
+ VVA_ONLY(pWal->nSehTry--);
+ if( eCode==EXCEPTION_IN_PAGE_ERROR ){
+ if( p && p->ExceptionRecord && p->ExceptionRecord->NumberParameters>=3 ){
+ /* From MSDN: For this type of exception, the first element of the
+ ** ExceptionInformation[] array is a read-write flag - 0 if the exception
+ ** was thrown while reading, 1 if while writing. The second element is
+ ** the virtual address being accessed. The "third array element specifies
+ ** the underlying NTSTATUS code that resulted in the exception". */
+ pWal->iSysErrno = (int)p->ExceptionRecord->ExceptionInformation[2];
+ }
+ return EXCEPTION_EXECUTE_HANDLER;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+
+ /*
+ ** If one is configured, invoke the xTestCallback callback with 650 as
+ ** the argument. If it returns true, throw the same exception that is
+ ** thrown by the system if the *-shm file mapping is accessed after it
+ ** has been invalidated.
+ */
+ static void sehInjectFault(Wal *pWal){
+ int res;
+ assert( pWal->nSehTry>0 );
+
+ res = sqlite3FaultSim(650);
+ if( res!=0 ){
+ ULONG_PTR aArg[3];
+ aArg[0] = 0;
+ aArg[1] = 0;
+ aArg[2] = (ULONG_PTR)res;
+ RaiseException(EXCEPTION_IN_PAGE_ERROR, 0, 3, (const ULONG_PTR*)aArg);
+ }
+ }
+
+ /*
+ ** There are two ways to use this macro. To set a pointer to be freed
+ ** if an exception is thrown:
+ **
+ ** SEH_FREE_ON_ERROR(0, pPtr);
+ **
+ ** and to cancel the same:
+ **
+ ** SEH_FREE_ON_ERROR(pPtr, 0);
+ **
+ ** In the first case, there must not already be a pointer registered to
+ ** be freed. In the second case, pPtr must be the registered pointer.
+ */
+ #define SEH_FREE_ON_ERROR(X,Y) \
+ assert( (X==0 || Y==0) && pWal->pFree==X ); pWal->pFree = Y
+
+ #else
+ # define SEH_TRY VVA_ONLY(pWal->nSehTry++);
+ # define SEH_EXCEPT(X) VVA_ONLY(pWal->nSehTry--); assert( pWal->nSehTry==0 );
+ # define SEH_INJECT_FAULT assert( pWal->nSehTry>0 );
+ # define SEH_FREE_ON_ERROR(X,Y)
+ #endif /* ifdef SQLITE_USE_SEH */
+
/*
** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
return rc;
}
+/*
+** Recover a single wal file - *-wal if iWal==0, or *-wal2 if iWal==1.
+*/
+static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){
+ i64 nSize; /* Size of log file */
+ u32 aFrameCksum[2] = {0, 0};
+ int rc;
+ sqlite3_file *pWalFd = pWal->apWalFd[iWal];
+
+ assert( iWal==0 || iWal==1 );
+
+ memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
+ sqlite3_randomness(8, pWal->hdr.aSalt);
+
+ rc = sqlite3OsFileSize(pWalFd, &nSize);
+ if( rc==SQLITE_OK ){
+ if( nSize>WAL_HDRSIZE ){
+ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */
+ u32 *aPrivate = 0; /* Heap copy of *-shm pg being populated */
+ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */
+ int szFrame; /* Number of bytes in buffer aFrame[] */
+ u8 *aData; /* Pointer to data part of aFrame buffer */
+ int szPage; /* Page size according to the log */
+ u32 magic; /* Magic value read from WAL header */
+ u32 version; /* Magic value read from WAL header */
+ int isValid; /* True if this frame is valid */
+ int iPg; /* Current 32KB wal-index page */
+ int iLastFrame; /* Last frame in wal, based on size alone */
+ int iLastPg; /* Last shm page used by this wal */
+
+ /* Read in the WAL header. */
+ rc = sqlite3OsRead(pWalFd, aBuf, WAL_HDRSIZE, 0);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+
+ /* If the database page size is not a power of two, or is greater than
+ ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid
+ ** data. Similarly, if the 'magic' value is invalid, ignore the whole
+ ** WAL file.
+ */
+ magic = sqlite3Get4byte(&aBuf[0]);
+ szPage = sqlite3Get4byte(&aBuf[8]);
+ if( (magic&0xFFFFFFFE)!=WAL_MAGIC
+ || szPage&(szPage-1)
+ || szPage>SQLITE_MAX_PAGE_SIZE
+ || szPage<512
+ ){
+ return SQLITE_OK;
+ }
+ pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
+ pWal->szPage = szPage;
+
+ /* Verify that the WAL header checksum is correct */
+ walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN,
+ aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
+ );
+ if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
+ || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
+ ){
+ return SQLITE_OK;
+ }
+
+ memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
+ *pnCkpt = sqlite3Get4byte(&aBuf[12]);
+
+ /* Verify that the version number on the WAL format is one that
+ ** are able to understand */
+ version = sqlite3Get4byte(&aBuf[4]);
+ if( version!=WAL_VERSION1 && version!=WAL_VERSION2 ){
+ return SQLITE_CANTOPEN_BKPT;
+ }
+ pWal->hdr.iVersion = version;
+
+ /* Malloc a buffer to read frames into. */
+ szFrame = szPage + WAL_FRAME_HDRSIZE;
+ aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ);
++ SEH_FREE_ON_ERROR(0, aFrame);
+ if( !aFrame ){
+ return SQLITE_NOMEM_BKPT;
+ }
+ aData = &aFrame[WAL_FRAME_HDRSIZE];
+ aPrivate = (u32*)&aData[szPage];
+
+ /* Read all frames from the log file. */
+ iLastFrame = (nSize - WAL_HDRSIZE) / szFrame;
+ if( version==WAL_VERSION2 ){
+ iLastPg = walFramePage2(iWal, iLastFrame);
+ }else{
+ iLastPg = walFramePage(iLastFrame);
+ }
+ for(iPg=iWal; iPg<=iLastPg; iPg+=(version==WAL_VERSION2 ? 2 : 1)){
+ u32 *aShare;
+ int iFrame; /* Index of last frame read */
+ int iLast;
+ int iFirst;
+ int nHdr, nHdr32;
+
+ rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare);
+ assert( aShare!=0 || rc!=SQLITE_OK );
+ if( aShare==0 ) break;
+ pWal->apWiData[iPg] = aPrivate;
+
+ if( iWal ){
+ assert( version==WAL_VERSION2 );
+ iFirst = 1 + (iPg/2)*HASHTABLE_NPAGE;
+ iLast = iFirst + HASHTABLE_NPAGE - 1;
+ }else{
+ int i2 = (version==WAL_VERSION2) ? (iPg/2) : iPg;
+ iLast = HASHTABLE_NPAGE_ONE+i2*HASHTABLE_NPAGE;
+ iFirst = 1 + (i2==0?0:HASHTABLE_NPAGE_ONE+(i2-1)*HASHTABLE_NPAGE);
+ }
+ iLast = MIN(iLast, iLastFrame);
+
+ for(iFrame=iFirst; iFrame<=iLast; iFrame++){
+ i64 iOffset = walFrameOffset(iFrame, szPage);
+ u32 pgno; /* Database page number for frame */
+ u32 nTruncate; /* dbsize field from frame header */
+
+ /* Read and decode the next log frame. */
+ rc = sqlite3OsRead(pWalFd, aFrame, szFrame, iOffset);
+ if( rc!=SQLITE_OK ) break;
+ isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
+ if( !isValid ) break;
+ rc = walIndexAppend(pWal, iWal, iFrame, pgno);
+ if( NEVER(rc!=SQLITE_OK) ) break;
+
+ /* If nTruncate is non-zero, this is a commit record. */
+ if( nTruncate ){
+ pWal->hdr.mxFrame = iFrame;
+ pWal->hdr.nPage = nTruncate;
+ pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
+ testcase( szPage<=32768 );
+ testcase( szPage>=65536 );
+ aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
+ aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
+ }
+ }
+ pWal->apWiData[iPg] = aShare;
+ nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0);
+ nHdr32 = nHdr / sizeof(u32);
+#ifndef SQLITE_SAFER_WALINDEX_RECOVERY
+ /* Memcpy() should work fine here, on all reasonable implementations.
+ ** Technically, memcpy() might change the destination to some
+ ** intermediate value before setting to the final value, and that might
+ ** cause a concurrent reader to malfunction. Memcpy() is allowed to
+ ** do that, according to the spec, but no memcpy() implementation that
+ ** we know of actually does that, which is why we say that memcpy()
+ ** is safe for this. Memcpy() is certainly a lot faster.
+ */
+ memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr);
+#else
+ /* In the event that some platform is found for which memcpy()
+ ** changes the destination to some intermediate value before
+ ** setting the final value, this alternative copy routine is
+ ** provided.
+ */
+ {
+ int i;
+ for(i=nHdr32; i<WALINDEX_PGSZ/sizeof(u32); i++){
+ if( aShare[i]!=aPrivate[i] ){
+ /* Atomic memory operations are not required here because if
+ ** the value needs to be changed, that means it is not being
+ ** accessed concurrently. */
+ aShare[i] = aPrivate[i];
+ }
+ }
+ }
+#endif
++ SEH_INJECT_FAULT;
+ if( iFrame<=iLast ) break;
+ }
+
++ SEH_FREE_ON_ERROR(aFrame, 0);
+ sqlite3_free(aFrame);
+ }else if( pbZero ){
+ *pbZero = 1;
+ }
+ }
+
+ pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
+ pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
+
+ return rc;
+}
+
+static int walOpenWal2(Wal *pWal){
+ int rc = SQLITE_OK;
+ if( !isOpen(pWal->apWalFd[1]) ){
+ int f = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
+ rc = sqlite3OsOpen(pWal->pVfs, pWal->zWalName2, pWal->apWalFd[1], f, &f);
+ }
+ return rc;
+}
+
+static int walTruncateWal2(Wal *pWal){
+ int bIs;
+ int rc;
+ assert( !isOpen(pWal->apWalFd[1]) );
+ rc = sqlite3OsAccess(pWal->pVfs, pWal->zWalName2, SQLITE_ACCESS_EXISTS, &bIs);
+ if( rc==SQLITE_OK && bIs ){
+ rc = walOpenWal2(pWal);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3OsTruncate(pWal->apWalFd[1], 0);
+ sqlite3OsClose(pWal->apWalFd[1]);
+ }
+ }
+ return rc;
+}
/*
** Recover the wal-index by reading the write-ahead log file.
memset(p, 0, nByte);
p->nSegment = nSegment;
aTmp = (ht_slot*)&(((u8*)p)[nByte]);
- for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && i<nSegment; i++){
+ SEH_FREE_ON_ERROR(0, p);
+ i = iMode==2 ? iWal : walFramePage(nBackfill+1);
+ for(; rc==SQLITE_OK && i<=iLastSeg; i+=iMode){
WalHashLoc sLoc;
rc = walHashGet(pWal, i, &sLoc);
** in the SQLITE_CHECKPOINT_PASSIVE mode. */
assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
- /* Compute in mxSafeFrame the index of the last frame of the WAL that is
- ** safe to write into the database. Frames beyond mxSafeFrame might
- ** overwrite database pages that are in use by active readers and thus
- ** cannot be backfilled from the WAL.
+ /* If this is a wal system (not wal2), compute in mxSafeFrame the index
+ ** of the last frame of the WAL that is safe to write into the database.
+ ** Frames beyond mxSafeFrame might overwrite database pages that are in
+ ** use by active readers and thus cannot be backfilled from the WAL.
*/
- mxSafeFrame = pWal->hdr.mxFrame;
- mxPage = pWal->hdr.nPage;
- for(i=1; i<WAL_NREADER; i++){
- u32 y = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT;
- if( mxSafeFrame>y ){
- assert( y<=pWal->hdr.mxFrame );
- rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
- if( rc==SQLITE_OK ){
- u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
- AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT;
- walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
- }else if( rc==SQLITE_BUSY ){
- mxSafeFrame = y;
- xBusy = 0;
- }else{
- goto walcheckpoint_out;
+ if( bWal2==0 ){
+ mxSafeFrame = pWal->hdr.mxFrame;
+ mxPage = pWal->hdr.nPage;
+ for(i=1; i<WAL_NREADER; i++){
- u32 y = AtomicLoad(pInfo->aReadMark+i);
++ u32 y = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT;
+ if( mxSafeFrame>y ){
+ assert( y<=pWal->hdr.mxFrame );
+ rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
+ if( rc==SQLITE_OK ){
+ u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
- AtomicStore(pInfo->aReadMark+i, iMark);
++ AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT;
+ walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
+ }else if( rc==SQLITE_BUSY ){
+ mxSafeFrame = y;
+ xBusy = 0;
+ }else{
+ goto walcheckpoint_out;
+ }
}
}
}
assert( rc==SQLITE_OK || pIter==0 );
}
- if( pIter
- && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
- ){
+ if( pIter && (bWal2
+ || (rc = walBusyLock(pWal, xBusy, pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
+ )){
u32 nBackfill = pInfo->nBackfill;
- pInfo->nBackfillAttempted = mxSafeFrame;
+
+ assert( bWal2==0 || nBackfill==0 );
+ pInfo->nBackfillAttempted = mxSafeFrame; SEH_INJECT_FAULT;
- /* Sync the WAL to disk */
- rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags));
+ /* Sync the wal file being checkpointed to disk */
+ rc = sqlite3OsSync(pWalFd, CKPT_SYNC_FLAGS(sync_flags));
/* If the database may grow as a result of this checkpoint, hint
- ** about the eventual size of the db file to the VFS layer.
- */
+ ** about the eventual size of the db file to the VFS layer. */
if( rc==SQLITE_OK ){
i64 nReq = ((i64)mxPage * szPage);
i64 nSize; /* Current size of database file */
/* Iterate through the contents of the WAL, copying data to the db file */
while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
i64 iOffset;
- assert( walFramePgno(pWal, iFrame)==iDbpage );
+
+ assert( bWal2==1 || walFramePgno(pWal, iFrame)==iDbpage );
+ assert( bWal2==0 || walFramePgno2(pWal, iCkpt, iFrame)==iDbpage );
+
+ SEH_INJECT_FAULT;
if( AtomicLoad(&db->u1.isInterrupted) ){
rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT;
break;
i64 szDb = pWal->hdr.nPage*(i64)szPage;
testcase( IS_BIG_INT(szDb) );
rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
- if( rc==SQLITE_OK ){
- rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags));
- }
}
if( rc==SQLITE_OK ){
- AtomicStore(&pInfo->nBackfill, mxSafeFrame); SEH_INJECT_FAULT;
+ rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags));
}
}
+ if( rc==SQLITE_OK ){
+ AtomicStore(&pInfo->nBackfill, (bWal2 ? 1 : mxSafeFrame));
++ SEH_INJECT_FAULT;
+ }
/* Release the reader lock held while backfilling */
- walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
+ if( bWal2==0 ){
+ walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
+ }
}
if( rc==SQLITE_BUSY ){
** until all readers have finished using the wal file. This ensures that
** the next process to write to the database restarts the wal file.
*/
- if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
+ if( bWal2==0 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
assert( pWal->writeLock );
+ SEH_INJECT_FAULT;
if( pInfo->nBackfill<pWal->hdr.mxFrame ){
rc = SQLITE_BUSY;
}else if( eMode>=SQLITE_CHECKPOINT_RESTART ){
int rc = SQLITE_OK;
if( pWal ){
int isDelete = 0; /* True to unlink wal and wal-index files */
+
+ assert( walAssertLockmask(pWal) );
+ pWal->bClosing = 1;
/* If an EXCLUSIVE lock can be obtained on the database file (using the
** ordinary, rollback-mode locking methods, this guarantees that the
if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
- rc = sqlite3WalCheckpoint(pWal, db,
- SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
- );
- if( rc==SQLITE_OK ){
- int bPersist = -1;
- sqlite3OsFileControlHint(
- pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist
+ for(i=0; rc==SQLITE_OK && i<2; i++){
+ rc = sqlite3WalCheckpoint(pWal, db,
+ SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
);
- if( bPersist!=1 ){
- /* Try to delete the WAL file if the checkpoint completed and
- ** fsynced (rc==SQLITE_OK) and if we are not in persistent-wal
- ** mode (!bPersist) */
- isDelete = 1;
- }else if( pWal->mxWalSize>=0 ){
- /* Try to truncate the WAL file to zero bytes if the checkpoint
- ** completed and fsynced (rc==SQLITE_OK) and we are in persistent
- ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a
- ** non-negative value (pWal->mxWalSize>=0). Note that we truncate
- ** to zero bytes as truncating to the journal_size_limit might
- ** leave a corrupt WAL file on disk. */
- walLimitSize(pWal, 0);
+ if( rc==SQLITE_OK ){
+ int bPersist = -1;
+ sqlite3OsFileControlHint(
+ pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist
+ );
+ if( bPersist!=1 ){
+ /* Try to delete the WAL file if the checkpoint completed and
+ ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal
+ ** mode (!bPersist) */
+ isDelete = 1;
+ }else if( pWal->mxWalSize>=0 ){
+ /* Try to truncate the WAL file to zero bytes if the checkpoint
+ ** completed and fsynced (rc==SQLITE_OK) and we are in persistent
+ ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a
+ ** non-negative value (pWal->mxWalSize>=0). Note that we truncate
+ ** to zero bytes as truncating to the journal_size_limit might
+ ** leave a corrupt WAL file on disk. */
+ walLimitSize(pWal, 0);
+ }
}
- walCkptInfo(pWal)->nBackfill = 0;
- walidxSetFile(&pWal->hdr, !walidxGetFile(&pWal->hdr));
- pWal->writeLock = 1;
- walIndexWriteHdr(pWal);
- pWal->writeLock = 0;
-
+
+ if( isWalMode2(pWal)==0 ) break;
+
++ SEH_TRY {
++ walCkptInfo(pWal)->nBackfill = 0;
++ walidxSetFile(&pWal->hdr, !walidxGetFile(&pWal->hdr));
++ pWal->writeLock = 1;
++ walIndexWriteHdr(pWal);
++ pWal->writeLock = 0;
++ }
++ SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
}
}
assert( pWal->nWiData>0 );
assert( pWal->apWiData[0]!=0 );
pInfo = walCkptInfo(pWal);
- if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame
-#ifdef SQLITE_ENABLE_SNAPSHOT
- && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0)
-#endif
- ){
- /* The WAL has been completely backfilled (or it is empty).
- ** and can be safely ignored.
- */
- rc = walLockShared(pWal, WAL_READ_LOCK(0));
+ SEH_INJECT_FAULT;
+ if( isWalMode2(pWal) ){
+ /* This connection needs a "part" lock on the current wal file and,
+ ** unless pInfo->nBackfill is set to indicate that it has already been
+ ** checkpointed, a "full" lock on the other wal file. */
+ int iWal = walidxGetFile(&pWal->hdr);
+ int nBackfill = pInfo->nBackfill || walidxGetMxFrame(&pWal->hdr, !iWal)==0;
+ int eLock = 1 + (iWal*2) + (nBackfill==iWal);
+
+ assert( nBackfill==0 || nBackfill==1 );
+ assert( iWal==0 || iWal==1 );
+ assert( iWal!=0 || nBackfill!=1 || eLock==WAL_LOCK_PART1 );
+ assert( iWal!=0 || nBackfill!=0 || eLock==WAL_LOCK_PART1_FULL2 );
+ assert( iWal!=1 || nBackfill!=1 || eLock==WAL_LOCK_PART2 );
+ assert( iWal!=1 || nBackfill!=0 || eLock==WAL_LOCK_PART2_FULL1 );
+
+ rc = walLockShared(pWal, WAL_READ_LOCK(eLock));
+ if( rc!=SQLITE_OK ){
+ return (rc==SQLITE_BUSY ? WAL_RETRY : rc);
+ }
walShmBarrier(pWal);
- if( rc==SQLITE_OK ){
- if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
- /* It is not safe to allow the reader to continue here if frames
- ** may have been appended to the log before READ_LOCK(0) was obtained.
- ** When holding READ_LOCK(0), the reader ignores the entire log file,
- ** which implies that the database file contains a trustworthy
- ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from
- ** happening, this is usually correct.
- **
- ** However, if frames have been appended to the log (or if the log
- ** is wrapped and written for that matter) before the READ_LOCK(0)
- ** is obtained, that is not necessarily true. A checkpointer may
- ** have started to backfill the appended frames but crashed before
- ** it finished. Leaving a corrupt image in the database file.
- */
- walUnlockShared(pWal, WAL_READ_LOCK(0));
- return WAL_RETRY;
+ if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
+ walUnlockShared(pWal, WAL_READ_LOCK(eLock));
+ return WAL_RETRY;
+ }else{
+ pWal->readLock = eLock;
+ }
+ assert( pWal->minFrame==0 && walFramePage(pWal->minFrame)==0 );
+ }else{
+ u32 mxReadMark; /* Largest aReadMark[] value */
+ int mxI; /* Index of largest aReadMark[] value */
+ int i; /* Loop counter */
+ u32 mxFrame; /* Wal frame to lock to */
+ if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame
+ #ifdef SQLITE_ENABLE_SNAPSHOT
+ && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0)
+ #endif
+ ){
+ /* The WAL has been completely backfilled (or it is empty).
+ ** and can be safely ignored.
+ */
+ rc = walLockShared(pWal, WAL_READ_LOCK(0));
+ walShmBarrier(pWal);
+ if( rc==SQLITE_OK ){
+ if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr,sizeof(WalIndexHdr)) ){
+ /* It is not safe to allow the reader to continue here if frames
+ ** may have been appended to the log before READ_LOCK(0) was obtained.
+ ** When holding READ_LOCK(0), the reader ignores the entire log file,
+ ** which implies that the database file contains a trustworthy
+ ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from
+ ** happening, this is usually correct.
+ **
+ ** However, if frames have been appended to the log (or if the log
+ ** is wrapped and written for that matter) before the READ_LOCK(0)
+ ** is obtained, that is not necessarily true. A checkpointer may
+ ** have started to backfill the appended frames but crashed before
+ ** it finished. Leaving a corrupt image in the database file.
+ */
+ walUnlockShared(pWal, WAL_READ_LOCK(0));
+ return WAL_RETRY;
+ }
+ pWal->readLock = 0;
+ return SQLITE_OK;
+ }else if( rc!=SQLITE_BUSY ){
+ return rc;
}
- pWal->readLock = 0;
- return SQLITE_OK;
- }else if( rc!=SQLITE_BUSY ){
- return rc;
}
- }
-
- /* If we get this far, it means that the reader will want to use
- ** the WAL to get at content from recent commits. The job now is
- ** to select one of the aReadMark[] entries that is closest to
- ** but not exceeding pWal->hdr.mxFrame and lock that entry.
- */
- mxReadMark = 0;
- mxI = 0;
- mxFrame = pWal->hdr.mxFrame;
-#ifdef SQLITE_ENABLE_SNAPSHOT
- if( pWal->pSnapshot && pWal->pSnapshot->mxFrame<mxFrame ){
- mxFrame = pWal->pSnapshot->mxFrame;
- }
-#endif
- for(i=1; i<WAL_NREADER; i++){
- u32 thisMark = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT;
- if( mxReadMark<=thisMark && thisMark<=mxFrame ){
- assert( thisMark!=READMARK_NOT_USED );
- mxReadMark = thisMark;
- mxI = i;
+
+ /* If we get this far, it means that the reader will want to use
+ ** the WAL to get at content from recent commits. The job now is
+ ** to select one of the aReadMark[] entries that is closest to
+ ** but not exceeding pWal->hdr.mxFrame and lock that entry.
+ */
+ mxReadMark = 0;
+ mxI = 0;
+ mxFrame = pWal->hdr.mxFrame;
+ #ifdef SQLITE_ENABLE_SNAPSHOT
+ if( pWal->pSnapshot && pWal->pSnapshot->mxFrame<mxFrame ){
+ mxFrame = pWal->pSnapshot->mxFrame;
}
- }
- if( (pWal->readOnly & WAL_SHM_RDONLY)==0
- && (mxReadMark<mxFrame || mxI==0)
- ){
+ #endif
for(i=1; i<WAL_NREADER; i++){
- u32 thisMark = AtomicLoad(pInfo->aReadMark+i);
- rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
- if( rc==SQLITE_OK ){
- AtomicStore(pInfo->aReadMark+i,mxFrame);
- mxReadMark = mxFrame;
++ u32 thisMark = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT;
+ if( mxReadMark<=thisMark && thisMark<=mxFrame ){
+ assert( thisMark!=READMARK_NOT_USED );
+ mxReadMark = thisMark;
mxI = i;
- walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
- break;
- }else if( rc!=SQLITE_BUSY ){
- return rc;
}
}
- }
- if( mxI==0 ){
- assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
- return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT;
- }
+ if( (pWal->readOnly & WAL_SHM_RDONLY)==0
+ && (mxReadMark<mxFrame || mxI==0)
+ ){
+ for(i=1; i<WAL_NREADER; i++){
+ rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
+ if( rc==SQLITE_OK ){
+ AtomicStore(pInfo->aReadMark+i,mxFrame);
+ mxReadMark = mxFrame;
+ mxI = i;
+ walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
+ break;
+ }else if( rc!=SQLITE_BUSY ){
+ return rc;
+ }
+ }
+ }
+ if( mxI==0 ){
+ assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
+ return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT;
+ }
+
+ rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
+ if( rc ){
+ return rc==SQLITE_BUSY ? WAL_RETRY : rc;
+ }
+ /* Now that the read-lock has been obtained, check that neither the
+ ** value in the aReadMark[] array or the contents of the wal-index
+ ** header have changed.
+ **
+ ** It is necessary to check that the wal-index header did not change
+ ** between the time it was read and when the shared-lock was obtained
+ ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
+ ** that the log file may have been wrapped by a writer, or that frames
+ ** that occur later in the log than pWal->hdr.mxFrame may have been
+ ** copied into the database by a checkpointer. If either of these things
+ ** happened, then reading the database with the current value of
+ ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
+ ** instead.
+ **
+ ** Before checking that the live wal-index header has not changed
+ ** since it was read, set Wal.minFrame to the first frame in the wal
+ ** file that has not yet been checkpointed. This client will not need
+ ** to read any frames earlier than minFrame from the wal file - they
+ ** can be safely read directly from the database file.
+ **
+ ** Because a ShmBarrier() call is made between taking the copy of
+ ** nBackfill and checking that the wal-header in shared-memory still
+ ** matches the one cached in pWal->hdr, it is guaranteed that the
+ ** checkpointer that set nBackfill was not working with a wal-index
+ ** header newer than that cached in pWal->hdr. If it were, that could
+ ** cause a problem. The checkpointer could omit to checkpoint
+ ** a version of page X that lies before pWal->minFrame (call that version
+ ** A) on the basis that there is a newer version (version B) of the same
+ ** page later in the wal file. But if version B happens to like past
+ ** frame pWal->hdr.mxFrame - then the client would incorrectly assume
+ ** that it can read version A from the database file. However, since
+ ** we can guarantee that the checkpointer that set nBackfill could not
+ ** see any pages past pWal->hdr.mxFrame, this problem does not come up.
+ */
- pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1;
++ pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT;
+ walShmBarrier(pWal);
+ if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark
+ || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
+ ){
+ walUnlockShared(pWal, WAL_READ_LOCK(mxI));
+ return WAL_RETRY;
+ }else{
+ assert( mxReadMark<=pWal->hdr.mxFrame );
+ pWal->readLock = (i16)mxI;
+ }
- rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
- if( rc ){
- return rc==SQLITE_BUSY ? WAL_RETRY : rc;
- }
- /* Now that the read-lock has been obtained, check that neither the
- ** value in the aReadMark[] array or the contents of the wal-index
- ** header have changed.
- **
- ** It is necessary to check that the wal-index header did not change
- ** between the time it was read and when the shared-lock was obtained
- ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
- ** that the log file may have been wrapped by a writer, or that frames
- ** that occur later in the log than pWal->hdr.mxFrame may have been
- ** copied into the database by a checkpointer. If either of these things
- ** happened, then reading the database with the current value of
- ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
- ** instead.
- **
- ** Before checking that the live wal-index header has not changed
- ** since it was read, set Wal.minFrame to the first frame in the wal
- ** file that has not yet been checkpointed. This client will not need
- ** to read any frames earlier than minFrame from the wal file - they
- ** can be safely read directly from the database file.
- **
- ** Because a ShmBarrier() call is made between taking the copy of
- ** nBackfill and checking that the wal-header in shared-memory still
- ** matches the one cached in pWal->hdr, it is guaranteed that the
- ** checkpointer that set nBackfill was not working with a wal-index
- ** header newer than that cached in pWal->hdr. If it were, that could
- ** cause a problem. The checkpointer could omit to checkpoint
- ** a version of page X that lies before pWal->minFrame (call that version
- ** A) on the basis that there is a newer version (version B) of the same
- ** page later in the wal file. But if version B happens to like past
- ** frame pWal->hdr.mxFrame - then the client would incorrectly assume
- ** that it can read version A from the database file. However, since
- ** we can guarantee that the checkpointer that set nBackfill could not
- ** see any pages past pWal->hdr.mxFrame, this problem does not come up.
- */
- pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT;
- walShmBarrier(pWal);
- if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark
- || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
- ){
- walUnlockShared(pWal, WAL_READ_LOCK(mxI));
- return WAL_RETRY;
- }else{
- assert( mxReadMark<=pWal->hdr.mxFrame );
- pWal->readLock = (i16)mxI;
}
return rc;
}
#ifdef SQLITE_ENABLE_SNAPSHOT
- rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff);
+ /*
+ ** This function does the work of sqlite3WalSnapshotRecover().
+ */
+ static int walSnapshotRecover(
+ Wal *pWal, /* WAL handle */
+ void *pBuf1, /* Temp buffer pWal->szPage bytes in size */
+ void *pBuf2 /* Temp buffer pWal->szPage bytes in size */
+ ){
+ int szPage = (int)pWal->szPage;
+ int rc;
+ i64 szDb; /* Size of db file in bytes */
+
+ rc = sqlite3OsFileSize(pWal->pDbFd, &szDb);
+ if( rc==SQLITE_OK ){
+ volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
+ u32 i = pInfo->nBackfillAttempted;
+ for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){
+ WalHashLoc sLoc; /* Hash table location */
+ u32 pgno; /* Page number in db file */
+ i64 iDbOff; /* Offset of db file entry */
+ i64 iWalOff; /* Offset of wal file entry */
+
+ rc = walHashGet(pWal, walFramePage(i), &sLoc);
+ if( rc!=SQLITE_OK ) break;
+ pgno = sLoc.aPgno[i-sLoc.iZero];
+ iDbOff = (i64)(pgno-1) * szPage;
+
+ if( iDbOff+szPage<=szDb ){
+ iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE;
++ rc = sqlite3OsRead(pWal->apWalFd[0], pBuf1, szPage, iWalOff);
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff);
+ }
+
+ if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){
+ break;
+ }
+ }
+
+ pInfo->nBackfillAttempted = i-1;
+ }
+ }
+
+ return rc;
+ }
+
/*
** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted
** variable so that older snapshots can be accessed. To do this, loop
return rc;
}
+ /*
+ ** Begin a read transaction on the database.
+ **
+ ** This routine used to be called sqlite3OpenSnapshot() and with good reason:
+ ** it takes a snapshot of the state of the WAL and wal-index for the current
+ ** instant in time. The current thread will continue to use this snapshot.
+ ** Other threads might append new content to the WAL and wal-index but
+ ** that extra content is ignored by the current thread.
+ **
+ ** If the database contents have changes since the previous read
+ ** transaction, then *pChanged is set to 1 before returning. The
+ ** Pager layer will use this to know that its cache is stale and
+ ** needs to be flushed.
+ */
+ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
+ int rc;
+ SEH_TRY {
+ rc = walBeginReadTransaction(pWal, pChanged);
+ }
+ SEH_EXCEPT( rc = walHandleException(pWal); )
+ return rc;
+ }
+
++
/*
** Finish with a read transaction. All this does is release the
** read-lock.
}
}
- for(iKey=walHash(pgno); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){
- u32 iFrame = sLoc.aHash[iKey] + sLoc.iZero;
- if( iFrame<=iLast
- && iFrame>=pWal->minFrame
- && sLoc.aPgno[sLoc.aHash[iKey]-1]==pgno
- ){
+/* Search hash table iHash for an entry matching page number
+** pgno. Each call to this function searches a single hash table
+** (each hash table indexes up to HASHTABLE_NPAGE frames).
+**
+** This code might run concurrently to the code in walIndexAppend()
+** that adds entries to the wal-index (and possibly to this hash
+** table). This means the value just read from the hash
+** slot (aHash[iKey]) may have been added before or after the
+** current read transaction was opened. Values added after the
+** read transaction was opened may have been written incorrectly -
+** i.e. these slots may contain garbage data. However, we assume
+** that any slots written before the current read transaction was
+** opened remain unmodified.
+**
+** For the reasons above, the if(...) condition featured in the inner
+** loop of the following block is more stringent that would be required
+** if we had exclusive access to the hash-table:
+**
+** (aPgno[iFrame]==pgno):
+** This condition filters out normal hash-table collisions.
+**
+** (iFrame<=iLast):
+** This condition filters out entries that were added to the hash
+** table after the current read-transaction had started.
+*/
+static int walSearchHash(
+ Wal *pWal,
+ u32 iLast,
+ int iHash,
+ Pgno pgno,
+ u32 *piRead
+){
+ WalHashLoc sLoc; /* Hash table location */
+ int iKey; /* Hash slot index */
+ int nCollide; /* Number of hash collisions remaining */
+ int rc; /* Error code */
++ u32 iH;
+
+ rc = walHashGet(pWal, iHash, &sLoc);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+ nCollide = HASHTABLE_NSLOT;
++ iKey = walHash(pgno);
++ SEH_INJECT_FAULT;
++ while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){
++ u32 iFrame = iH + sLoc.iZero;
++ if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){
+ assert( iFrame>*piRead || CORRUPT_DB );
+ *piRead = iFrame;
+ }
+ if( (nCollide--)==0 ){
+ return SQLITE_CORRUPT_BKPT;
+ }
++ iKey = walNextHash(iKey);
+ }
+
+ return SQLITE_OK;
+}
+
+static int walSearchWal(
+ Wal *pWal,
+ int iWal,
+ Pgno pgno,
+ u32 *piRead
+){
+ int rc = SQLITE_OK;
+ int bWal2 = isWalMode2(pWal);
+ u32 iLast = walidxGetMxFrame(&pWal->hdr, iWal);
+ if( iLast ){
+ int iHash;
+ int iMinHash = walFramePage(pWal->minFrame);
+ u32 iExternal = bWal2 ? walExternalEncode(iWal, iLast) : iLast;
+ assert( bWal2==0 || pWal->minFrame==0 );
+ for(iHash=walFramePage(iExternal);
+ iHash>=iMinHash && *piRead==0;
+ iHash-=(1+bWal2)
+ ){
+ rc = walSearchHash(pWal, iExternal, iHash, pgno, piRead);
+ if( rc!=SQLITE_OK ) break;
+ }
+ }
+ return rc;
+}
+
/*
** Search the wal file for page pgno. If found, set *piRead to the frame that
** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
** time the read transaction on this connection was started, then
** the write is disallowed.
*/
- if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
+ SEH_TRY {
+ if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
+ rc = SQLITE_BUSY_SNAPSHOT;
+ }
- }
++ }
+ SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
+
+ if( rc!=SQLITE_OK ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
- rc = SQLITE_BUSY_SNAPSHOT;
}
+
return rc;
}
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
int rc = SQLITE_OK;
if( ALWAYS(pWal->writeLock) ){
- Pgno iMax = pWal->hdr.mxFrame;
+ int iWal = walidxGetFile(&pWal->hdr);
+ Pgno iMax = walidxGetMxFrame(&pWal->hdr, iWal);
+ Pgno iNew;
Pgno iFrame;
- /* Restore the clients cache of the wal-index header to the state it
- ** was in before the client began writing to the database.
- */
- memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
- assert( walidxGetFile(&pWal->hdr)==iWal );
- iNew = walidxGetMxFrame(&pWal->hdr, walidxGetFile(&pWal->hdr));
-
- for(iFrame=iNew+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++){
- /* This call cannot fail. Unless the page for which the page number
- ** is passed as the second argument is (a) in the cache and
- ** (b) has an outstanding reference, then xUndo is either a no-op
- ** (if (a) is false) or simply expels the page from the cache (if (b)
- ** is false).
- **
- ** If the upper layer is doing a rollback, it is guaranteed that there
- ** are no outstanding references to any page other than page 1. And
- ** page 1 is never written to the log until the transaction is
- ** committed. As a result, the call to xUndo may not fail.
+ assert( isWalMode2(pWal) || iWal==0 );
+
- ** was in before the client began writing to the database.
+ SEH_TRY {
+ /* Restore the clients cache of the wal-index header to the state it
++ ** was in before the client began writing to the database.
*/
- Pgno pgno;
- if( isWalMode2(pWal) ){
- pgno = walFramePgno2(pWal, iWal, iFrame);
- }else{
- pgno = walFramePgno(pWal, iFrame);
+ memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
-
- for(iFrame=pWal->hdr.mxFrame+1;
- ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
- iFrame++
- ){
++ assert( walidxGetFile(&pWal->hdr)==iWal );
++ iNew = walidxGetMxFrame(&pWal->hdr, walidxGetFile(&pWal->hdr));
++
++ for(iFrame=iNew+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++){
+ /* This call cannot fail. Unless the page for which the page number
+ ** is passed as the second argument is (a) in the cache and
+ ** (b) has an outstanding reference, then xUndo is either a no-op
+ ** (if (a) is false) or simply expels the page from the cache (if (b)
+ ** is false).
+ **
+ ** If the upper layer is doing a rollback, it is guaranteed that there
+ ** are no outstanding references to any page other than page 1. And
+ ** page 1 is never written to the log until the transaction is
+ ** committed. As a result, the call to xUndo may not fail.
+ */
- assert( walFramePgno(pWal, iFrame)!=1 );
- rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
++ Pgno pgno;
++ if( isWalMode2(pWal) ){
++ pgno = walFramePgno2(pWal, iWal, iFrame);
++ }else{
++ pgno = walFramePgno(pWal, iFrame);
++ }
++ assert( pgno!=1 );
++ rc = xUndo(pUndoCtx, pgno);
}
- assert( pgno!=1 );
- rc = xUndo(pUndoCtx, pgno);
- if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal);
++ if( iMax!=iNew ) walCleanupHash(pWal);
}
- if( iMax!=iNew ) walCleanupHash(pWal);
+ SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
}
return rc;
}
** to the start of the log. Update the savepoint values to match.
*/
aWalData[0] = 0;
- aWalData[3] = pWal->nCkpt;
+ aWalData[3] = iCmp;
}
- if( aWalData[0]<pWal->hdr.mxFrame ){
- pWal->hdr.mxFrame = aWalData[0];
+ if( aWalData[0]<walidxGetMxFrame(&pWal->hdr, iWal) ){
+ walidxSetMxFrame(&pWal->hdr, iWal, aWalData[0]);
pWal->hdr.aFrameCksum[0] = aWalData[1];
pWal->hdr.aFrameCksum[1] = aWalData[2];
- walCleanupHash(pWal);
+ SEH_TRY {
+ walCleanupHash(pWal);
+ }
+ SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
}
return rc;
return rc;
}
++
+ /*
+ ** Write a set of frames to the log. The caller must hold the write-lock
+ ** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
+ **
+ ** The difference between this function and walFrames() is that this
+ ** function wraps walFrames() in an SEH_TRY{...} block.
+ */
+ int sqlite3WalFrames(
+ Wal *pWal, /* Wal handle to write to */
+ int szPage, /* Database page-size in bytes */
+ PgHdr *pList, /* List of dirty pages to write */
+ Pgno nTruncate, /* Database size after this commit */
+ int isCommit, /* True if this is a commit */
+ int sync_flags /* Flags to pass to OsSync() (or 0) */
+ ){
+ int rc;
+ SEH_TRY {
+ rc = walFrames(pWal, szPage, pList, nTruncate, isCommit, sync_flags);
+ }
+ SEH_EXCEPT( rc = walHandleException(pWal); )
+ return rc;
+ }
+
/*
** This routine is called to implement sqlite3_wal_checkpoint() and
** related interfaces.
/* Read the wal-index header. */
- if( rc==SQLITE_OK ){
- walDisableBlocking(pWal);
- rc = walIndexReadHdr(pWal, &isChanged);
- (void)walEnableBlocking(pWal);
- if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
- sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
- }
- }
-
- /* Copy data from the log to the database file. */
- if( rc==SQLITE_OK ){
- int iCkpt = walidxGetFile(&pWal->hdr);
-
- if( (walPagesize(pWal)!=nBuf)
- && ((pWal->hdr.mxFrame2 & 0x7FFFFFFF) || pWal->hdr.mxFrame)
- ){
- rc = SQLITE_CORRUPT_BKPT;
- }else{
- rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf);
+ SEH_TRY {
+ if( rc==SQLITE_OK ){
+ walDisableBlocking(pWal);
+ rc = walIndexReadHdr(pWal, &isChanged);
+ (void)walEnableBlocking(pWal);
+ if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
+ sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
+ }
}
-
- /* If no error occurred, set the output variables. */
- if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
- if( pnLog ){
- *pnLog = walidxGetMxFrame(&pWal->hdr,0)+walidxGetMxFrame(&pWal->hdr,1);
+
+ /* Copy data from the log to the database file. */
+ if( rc==SQLITE_OK ){
- if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){
++ int iCkpt = walidxGetFile(&pWal->hdr);
++
++ if( (walPagesize(pWal)!=nBuf)
++ && ((pWal->hdr.mxFrame2 & 0x7FFFFFFF) || pWal->hdr.mxFrame)
++ ){
+ rc = SQLITE_CORRUPT_BKPT;
+ }else{
+ rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags,zBuf);
}
- if( pnCkpt ){
- if( isWalMode2(pWal) ){
- if( (int)(walCkptInfo(pWal)->nBackfill) ){
- *pnCkpt = walidxGetMxFrame(&pWal->hdr, iCkpt);
-
++
+ /* If no error occurred, set the output variables. */
+ if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
- if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
++ if( pnLog ){
++ WalIndexHdr *pHdr = &pWal->hdr;
++ *pnLog = walidxGetMxFrame(pHdr, 0) + walidxGetMxFrame(pHdr, 1);
++ }
+ SEH_INJECT_FAULT;
- if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
++ if( pnCkpt ){
++ if( isWalMode2(pWal) ){
++ if( (int)(walCkptInfo(pWal)->nBackfill) ){
++ *pnCkpt = walidxGetMxFrame(&pWal->hdr, iCkpt);
++ }else{
++ *pnCkpt = 0;
++ }
+ }else{
- *pnCkpt = 0;
++ *pnCkpt = walCkptInfo(pWal)->nBackfill;
+ }
- }else{
- *pnCkpt = walCkptInfo(pWal)->nBackfill;
+ }
}
}
}
+ SEH_EXCEPT( rc = walHandleException(pWal); )
- if( isChanged ){
+ if( isChanged && pWal->bClosing==0 ){
/* If a new wal-index header was loaded before the checkpoint was
** performed, then the pager-cache associated with pWal is now
** out of date. So zero the cached wal-index header to ensure that
** locks are taken in this case). Nor should the pager attempt to
** upgrade to exclusive-mode following such an error.
*/
- assert( pWal->readLock>=0 || pWal->lockError );
+ #ifndef SQLITE_USE_SEH
- assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
+ assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError );
+ #endif
+ assert( pWal->readLock!=WAL_LOCK_NONE || (op<=0 && pWal->exclusiveMode==0) );
if( op==0 ){
- if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){
+ if( pWal->exclusiveMode ){
pWal->exclusiveMode = WAL_NORMAL_MODE;
- if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
+ rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
+ if( rc!=SQLITE_OK ){
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
*/
int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){
int rc;
- rc = walLockShared(pWal, WAL_CKPT_LOCK);
- if( rc==SQLITE_OK ){
- WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot;
- if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
- || pNew->mxFrame<walCkptInfo(pWal)->nBackfillAttempted
- ){
- rc = SQLITE_ERROR_SNAPSHOT;
- walUnlockShared(pWal, WAL_CKPT_LOCK);
+
+ /* Snapshots may not be used with wal2 mode databases. */
+ if( isWalMode2(pWal) ) return SQLITE_ERROR;
+
+ SEH_TRY {
+ rc = walLockShared(pWal, WAL_CKPT_LOCK);
+ if( rc==SQLITE_OK ){
+ WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot;
+ if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
+ || pNew->mxFrame<walCkptInfo(pWal)->nBackfillAttempted
+ ){
+ rc = SQLITE_ERROR_SNAPSHOT;
+ walUnlockShared(pWal, WAL_CKPT_LOCK);
+ }
}
}
- SEH_EXCEPT( rc = walHandleException(pWal); )
++ SEH_EXCEPT( rc = walHandleException(pWal) );
return rc;
}