From: drh <> Date: Fri, 13 Dec 2024 16:37:09 +0000 (+0000) Subject: Try to reduce incidental divergence between trunk and wal2. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fwal2-conflict-reduction;p=thirdparty%2Fsqlite.git Try to reduce incidental divergence between trunk and wal2. FossilOrigin-Name: 01c8ebfb1f154ef8a15103e13fba0cffa5008898a5d5def32d131ad0e081fc87 --- diff --git a/manifest b/manifest index 37a5ca6607..7be202cf97 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sharmless\scompiler\swarning\sthat\scomes\sup\swhen\susing\sSQLITE_DEBUG\sin\nseparate\scompilation\smode. -D 2024-12-13T01:29:22.397 +C Try\sto\sreduce\sincidental\sdivergence\sbetween\strunk\sand\swal2. +D 2024-12-13T16:37:09.919 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d @@ -764,8 +764,8 @@ F src/os_setup.h 6011ad7af5db4e05155f385eb3a9b4470688de6f65d6166b8956e58a3d87210 F src/os_unix.c d2edbd92b07a3f778c2defa8a2e9d75acceb6267bda56948c41e8cdda65224d6 F src/os_win.c 49c7725b500f5867e8360e75eeb30f9d70b62fa1f05c8a101da627210578df32 F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a -F src/pager.c 9656ad4e8331efb8a4f94f7a0c6440b98caea073950a367ea0c728a53b8e62c9 -F src/pager.h 4b1140d691860de0be1347474c51fee07d5420bd7f802d38cbab8ea4ab9f538a +F src/pager.c f7bbc582ce772e6e0dfe44a165e433c35d6ca8f6477c5c60f1102ac1c26e7e18 +F src/pager.h 6137149346e6c8a3ddc1eeb40aee46381e9bc8b0fcc6dda8a1efde993c2275b8 F src/parse.y dcf45a81b61223ac93e61fdfe9b22d635dd371c446e8222634d90aa37e25e5f6 F src/pcache.c 588cc3c5ccaaadde689ed35ce5c5c891a1f7b1f4d1f56f6cf0143b74d8ee6484 F src/pcache.h 1497ce1b823cf00094bb0cf3bac37b345937e6f910890c626b16512316d3abf5 @@ -846,7 +846,7 @@ F src/upsert.c 215328c3f91623c520ec8672c44323553f12caeb4f01b1090ebdca99fdf7b4f1 F src/utf.c 8b29d9a5956569ea2700f869669b8ef67a9662ee5e724ff77ab3c387e27094ba F src/util.c e5f6a5eeaa26b69054a43bbd0048cfe3d2851f6961052b35aed8f695df922850 F src/vacuum.c b763b6457bd058d2072ef9364832351fd8d11e8abf70cbb349657360f7d55c40 -F src/vdbe.c 8a6eb02823b424b273614bae41579392a5c495424592b60423dd2c443a583df0 +F src/vdbe.c 174011c8cf323895f0ec776e048576422a3ea0d2e9168fbd2aafba93694e77ca F src/vdbe.h 9676348d342bd04e21e384c63b57224171ce84fac77853357334ef94c4d33cf4 F src/vdbeInt.h bf294a0c8fc4cc80779e74b04b8bd82c6e1197b3137cefe0b16cdf002fc7dfd6 F src/vdbeapi.c 38c252a202d70b56cfb734460bc888ddbd581afec1a10cd4d6c894c9e0b5baea @@ -858,7 +858,7 @@ F src/vdbetrace.c fe0bc29ebd4e02c8bc5c1945f1d2e6be5927ec12c06d89b03ef2a4def34bf8 F src/vdbevtab.c fc46b9cbd759dc013f0b3724549cc0d71379183c667df3a5988f7e2f1bd485f3 F src/vtab.c 316cd48e9320660db3047cd306cd056e4361180cebb4d0f10a39244e10c11422 F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c 8b7e309a8012659ac9275ad8cdcc6acaf73fa04b1090e38a01335f230fd10681 +F src/wal.c 88772db313628564c19b6248d919373ae493c82de429b1b4fce1312f92be8cd4 F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 F src/where.c 9ad3dea8003a8913da6a4ca8322e2fe30773f46e88a0d4fbf9db13bdb999efa2 @@ -2202,8 +2202,8 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e8d7d68ba0bb0bc2f948db5d9966990a5d23597fc3658b7cd0bc99d53c7353a9 -R 5cbe0e5d51e97ea31f8e1a24559f27b0 +P 52e0f8cab9852538da0778d5f57dd85b0774e764157692111a007aecd963f10a +R 9d530ae53a73ebf9f791dadc86ff5f9d U drh -Z 0752bc133a053f7fece0328823dfa9c8 +Z 08fccf053e2d79ae9443a9dc323c44e6 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 4e1eaec06a..9ba34ea2c5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -52e0f8cab9852538da0778d5f57dd85b0774e764157692111a007aecd963f10a +01c8ebfb1f154ef8a15103e13fba0cffa5008898a5d5def32d131ad0e081fc87 diff --git a/src/pager.c b/src/pager.c index 4f616f0c7f..94aa0f6519 100644 --- a/src/pager.c +++ b/src/pager.c @@ -789,20 +789,6 @@ static const unsigned char aJournalMagic[] = { # define USEFETCH(x) 0 #endif -/* -** The argument to this macro is a file descriptor (type sqlite3_file*). -** Return 0 if it is not open, or non-zero (but not 1) if it is. -** -** This is so that expressions can be written as: -** -** if( isOpen(pPager->jfd) ){ ... -** -** instead of -** -** if( pPager->jfd->pMethods ){ ... -*/ -#define isOpen(pFd) ((pFd)->pMethods!=0) - #ifdef SQLITE_DIRECT_OVERFLOW_READ /* ** Return true if page pgno can be read directly from the database file diff --git a/src/pager.h b/src/pager.h index 7ef9a237ae..9b2cfc0bcf 100644 --- a/src/pager.h +++ b/src/pager.h @@ -83,6 +83,22 @@ typedef struct PgHdr DbPage; #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ #define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ +#define isWalMode(x) ((x)==PAGER_JOURNALMODE_WAL) + +/* +** The argument to this macro is a file descriptor (type sqlite3_file*). +** Return 0 if it is not open, or non-zero (but not 1) if it is. +** +** This is so that expressions can be written as: +** +** if( isOpen(pPager->jfd) ){ ... +** +** instead of +** +** if( pPager->jfd->pMethods ){ ... +*/ +#define isOpen(pFd) ((pFd)->pMethods!=0) + /* ** Flags that make up the mask passed to sqlite3PagerGet(). */ diff --git a/src/vdbe.c b/src/vdbe.c index 558970ed95..7cac5536f4 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -7932,16 +7932,14 @@ case OP_JournalMode: { /* out2 */ /* Do not allow a transition to journal_mode=WAL for a database ** in temporary storage or if the VFS does not support shared memory */ - if( eNew==PAGER_JOURNALMODE_WAL + if( isWalMode(eNew) && (sqlite3Strlen30(zFilename)==0 /* Temp file */ || !sqlite3PagerWalSupported(pPager)) /* No shared-memory support */ ){ eNew = eOld; } - if( (eNew!=eOld) - && (eOld==PAGER_JOURNALMODE_WAL || eNew==PAGER_JOURNALMODE_WAL) - ){ + if( (eNew!=eOld) && (isWalMode(eNew) || isWalMode(eOld)) ){ if( !db->autoCommit || db->nVdbeRead>1 ){ rc = SQLITE_ERROR; sqlite3VdbeError(p, @@ -7951,7 +7949,7 @@ case OP_JournalMode: { /* out2 */ goto abort_due_to_error; }else{ - if( eOld==PAGER_JOURNALMODE_WAL ){ + if( isWalMode(eOld) ){ /* If leaving WAL mode, close the log file. If successful, the call ** to PagerCloseWal() checkpoints and deletes the write-ahead-log ** file. An EXCLUSIVE lock may still be held on the database file diff --git a/src/wal.c b/src/wal.c index ea9bc4df88..730d5d26e9 100644 --- a/src/wal.c +++ b/src/wal.c @@ -1708,7 +1708,7 @@ int sqlite3WalOpen( pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); - /* Open file handle on the write-ahead log file. */ + /* Open a file handle on the write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ @@ -1931,14 +1931,18 @@ static void walIteratorFree(WalIterator *p){ ** nBackfill or earlier may be included - excluding them is an optimization ** only. The caller must hold the checkpoint lock. ** -** On success, make *pp point to the newly allocated WalInterator object -** return SQLITE_OK. Otherwise, return an error code. If this routine -** returns an error, the value of *pp is undefined. +** On success, make *pp point to the newly allocated WalIterator object +** and return SQLITE_OK. Otherwise, return an error code. If this routine +** returns an error, the final value of *pp is undefined. ** ** The calling routine should invoke walIteratorFree() to destroy the ** WalIterator object when it has finished with it. */ -static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){ +static int walIteratorInit( + Wal *pWal, + u32 nBackfill, + WalIterator **pp +){ WalIterator *p; /* Return value */ int nSegment; /* Number of segments to merge */ u32 iLast; /* Last frame in log */ @@ -2710,7 +2714,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ ** sure the wal-index was not constructed with some future format that ** this version of SQLite cannot understand. */ - if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ + if( badHdr==0 + && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION + ){ rc = SQLITE_CANTOPEN_BKPT; } if( pWal->bShmUnreliable ){ @@ -2986,11 +2992,7 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ */ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ - u32 mxReadMark; /* Largest aReadMark[] value */ - int mxI; /* Index of largest aReadMark[] value */ - int i; /* Loop counter */ int rc = SQLITE_OK; /* Return code */ - u32 mxFrame; /* Wal frame to lock to */ #ifdef SQLITE_ENABLE_SETLK_TIMEOUT int nBlockTmout = 0; #endif @@ -3047,190 +3049,198 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ *pCnt &= ~WAL_RETRY_BLOCKED_MASK; } - if( !useWal ){ - assert( rc==SQLITE_OK ); - if( pWal->bShmUnreliable==0 ){ - rc = walIndexReadHdr(pWal, pChanged); - } -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - walDisableBlocking(pWal); - if( rc==SQLITE_BUSY_TIMEOUT ){ - rc = SQLITE_BUSY; - *pCnt |= WAL_RETRY_BLOCKED_MASK; + { + u32 mxReadMark; /* Largest aReadMark[] value */ + int mxI; /* Index of largest aReadMark[] value */ + int i; /* Loop counter */ + u32 mxFrame; /* Wal frame to lock to */ + if( !useWal ){ + assert( rc==SQLITE_OK ); + if( pWal->bShmUnreliable==0 ){ + rc = walIndexReadHdr(pWal, pChanged); + } + #ifdef SQLITE_ENABLE_SETLK_TIMEOUT + walDisableBlocking(pWal); + if( rc==SQLITE_BUSY_TIMEOUT ){ + rc = SQLITE_BUSY; + *pCnt |= WAL_RETRY_BLOCKED_MASK; + } + #endif + if( rc==SQLITE_BUSY ){ + /* If there is not a recovery running in another thread or process + ** then convert BUSY errors to WAL_RETRY. If recovery is known to + ** be running, convert BUSY to BUSY_RECOVERY. There is a race here + ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY + ** would be technically correct. But the race is benign since with + ** WAL_RETRY this routine will be called again and will probably be + ** right on the second iteration. + */ + if( pWal->apWiData[0]==0 ){ + /* This branch is taken when the xShmMap() method returns SQLITE_BUSY. + ** We assume this is a transient condition, so return WAL_RETRY. The + ** xShmMap() implementation used by the default unix and win32 VFS + ** modules may return SQLITE_BUSY due to a race condition in the + ** code that determines whether or not the shared-memory region + ** must be zeroed before the requested page is returned. + */ + rc = WAL_RETRY; + }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){ + walUnlockShared(pWal, WAL_RECOVER_LOCK); + rc = WAL_RETRY; + }else if( rc==SQLITE_BUSY ){ + rc = SQLITE_BUSY_RECOVERY; + } + } + if( rc!=SQLITE_OK ){ + return rc; + } + else if( pWal->bShmUnreliable ){ + return walBeginShmUnreliable(pWal, pChanged); + } } -#endif - if( rc==SQLITE_BUSY ){ - /* If there is not a recovery running in another thread or process - ** then convert BUSY errors to WAL_RETRY. If recovery is known to - ** be running, convert BUSY to BUSY_RECOVERY. There is a race here - ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY - ** would be technically correct. But the race is benign since with - ** WAL_RETRY this routine will be called again and will probably be - ** right on the second iteration. + + assert( pWal->nWiData>0 ); + assert( pWal->apWiData[0]!=0 ); + pInfo = walCkptInfo(pWal); + SEH_INJECT_FAULT; + if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame + #ifdef SQLITE_ENABLE_SNAPSHOT + && ((pWal->bGetSnapshot==0 && pWal->pSnapshot==0) || pWal->hdr.mxFrame==0) + #endif + ){ + /* The WAL has been completely backfilled (or it is empty). + ** and can be safely ignored. */ - if( pWal->apWiData[0]==0 ){ - /* This branch is taken when the xShmMap() method returns SQLITE_BUSY. - ** We assume this is a transient condition, so return WAL_RETRY. The - ** xShmMap() implementation used by the default unix and win32 VFS - ** modules may return SQLITE_BUSY due to a race condition in the - ** code that determines whether or not the shared-memory region - ** must be zeroed before the requested page is returned. - */ - rc = WAL_RETRY; - }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){ - walUnlockShared(pWal, WAL_RECOVER_LOCK); - rc = WAL_RETRY; - }else if( rc==SQLITE_BUSY ){ - rc = SQLITE_BUSY_RECOVERY; + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + walShmBarrier(pWal); + if( rc==SQLITE_OK ){ + if( memcmp((void*)walIndexHdr(pWal),&pWal->hdr,sizeof(WalIndexHdr)) ){ + /* It is not safe to allow the reader to continue here if frames + ** may have been appended to the log before READ_LOCK(0) was obtained. + ** When holding READ_LOCK(0), the reader ignores the entire log file, + ** which implies that the database file contains a trustworthy + ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from + ** happening, this is usually correct. + ** + ** However, if frames have been appended to the log (or if the log + ** is wrapped and written for that matter) before the READ_LOCK(0) + ** is obtained, that is not necessarily true. A checkpointer may + ** have started to backfill the appended frames but crashed before + ** it finished. Leaving a corrupt image in the database file. + */ + walUnlockShared(pWal, WAL_READ_LOCK(0)); + return WAL_RETRY; + } + pWal->readLock = 0; + return SQLITE_OK; + }else if( rc!=SQLITE_BUSY ){ + return rc; } } - if( rc!=SQLITE_OK ){ - return rc; + + /* If we get this far, it means that the reader will want to use + ** the WAL to get at content from recent commits. The job now is + ** to select one of the aReadMark[] entries that is closest to + ** but not exceeding pWal->hdr.mxFrame and lock that entry. + */ + mxReadMark = 0; + mxI = 0; + mxFrame = pWal->hdr.mxFrame; + #ifdef SQLITE_ENABLE_SNAPSHOT + if( pWal->pSnapshot && pWal->pSnapshot->mxFramepSnapshot->mxFrame; } - else if( pWal->bShmUnreliable ){ - return walBeginShmUnreliable(pWal, pChanged); + #endif + for(i=1; iaReadMark+i); SEH_INJECT_FAULT; + if( mxReadMark<=thisMark && thisMark<=mxFrame ){ + assert( thisMark!=READMARK_NOT_USED ); + mxReadMark = thisMark; + mxI = i; + } } - } - - assert( pWal->nWiData>0 ); - assert( pWal->apWiData[0]!=0 ); - pInfo = walCkptInfo(pWal); - SEH_INJECT_FAULT; - if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame -#ifdef SQLITE_ENABLE_SNAPSHOT - && ((pWal->bGetSnapshot==0 && pWal->pSnapshot==0) || pWal->hdr.mxFrame==0) -#endif - ){ - /* The WAL has been completely backfilled (or it is empty). - ** and can be safely ignored. - */ - rc = walLockShared(pWal, WAL_READ_LOCK(0)); - walShmBarrier(pWal); - if( rc==SQLITE_OK ){ - if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ - /* It is not safe to allow the reader to continue here if frames - ** may have been appended to the log before READ_LOCK(0) was obtained. - ** When holding READ_LOCK(0), the reader ignores the entire log file, - ** which implies that the database file contains a trustworthy - ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from - ** happening, this is usually correct. - ** - ** However, if frames have been appended to the log (or if the log - ** is wrapped and written for that matter) before the READ_LOCK(0) - ** is obtained, that is not necessarily true. A checkpointer may - ** have started to backfill the appended frames but crashed before - ** it finished. Leaving a corrupt image in the database file. - */ - walUnlockShared(pWal, WAL_READ_LOCK(0)); - return WAL_RETRY; + if( (pWal->readOnly & WAL_SHM_RDONLY)==0 + && (mxReadMarkaReadMark+i,mxFrame); + mxReadMark = mxFrame; + mxI = i; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + break; + }else if( rc!=SQLITE_BUSY ){ + return rc; + } } - pWal->readLock = 0; - return SQLITE_OK; - }else if( rc!=SQLITE_BUSY ){ - return rc; } - } - - /* If we get this far, it means that the reader will want to use - ** the WAL to get at content from recent commits. The job now is - ** to select one of the aReadMark[] entries that is closest to - ** but not exceeding pWal->hdr.mxFrame and lock that entry. - */ - mxReadMark = 0; - mxI = 0; - mxFrame = pWal->hdr.mxFrame; -#ifdef SQLITE_ENABLE_SNAPSHOT - if( pWal->pSnapshot && pWal->pSnapshot->mxFramepSnapshot->mxFrame; - } -#endif - for(i=1; iaReadMark+i); SEH_INJECT_FAULT; - if( mxReadMark<=thisMark && thisMark<=mxFrame ){ - assert( thisMark!=READMARK_NOT_USED ); - mxReadMark = thisMark; - mxI = i; + if( mxI==0 ){ + assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); + return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; } - } - if( (pWal->readOnly & WAL_SHM_RDONLY)==0 - && (mxReadMarkaReadMark+i,mxFrame); - mxReadMark = mxFrame; - mxI = i; - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - break; - }else if( rc!=SQLITE_BUSY ){ - return rc; + + (void)walEnableBlockingMs(pWal, nBlockTmout); + rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); + walDisableBlocking(pWal); + if( rc ){ + #ifdef SQLITE_ENABLE_SETLK_TIMEOUT + if( rc==SQLITE_BUSY_TIMEOUT ){ + *pCnt |= WAL_RETRY_BLOCKED_MASK; } + #else + assert( rc!=SQLITE_BUSY_TIMEOUT ); + #endif + assert( (rc&0xFF)!=SQLITE_BUSY + || rc==SQLITE_BUSY + || rc==SQLITE_BUSY_TIMEOUT ); + return (rc&0xFF)==SQLITE_BUSY ? WAL_RETRY : rc; } - } - if( mxI==0 ){ - assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); - return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; - } - - (void)walEnableBlockingMs(pWal, nBlockTmout); - rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); - walDisableBlocking(pWal); - if( rc ){ -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - if( rc==SQLITE_BUSY_TIMEOUT ){ - *pCnt |= WAL_RETRY_BLOCKED_MASK; + /* Now that the read-lock has been obtained, check that neither the + ** value in the aReadMark[] array or the contents of the wal-index + ** header have changed. + ** + ** It is necessary to check that the wal-index header did not change + ** between the time it was read and when the shared-lock was obtained + ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility + ** that the log file may have been wrapped by a writer, or that frames + ** that occur later in the log than pWal->hdr.mxFrame may have been + ** copied into the database by a checkpointer. If either of these things + ** happened, then reading the database with the current value of + ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry + ** instead. + ** + ** Before checking that the live wal-index header has not changed + ** since it was read, set Wal.minFrame to the first frame in the wal + ** file that has not yet been checkpointed. This client will not need + ** to read any frames earlier than minFrame from the wal file - they + ** can be safely read directly from the database file. + ** + ** Because a ShmBarrier() call is made between taking the copy of + ** nBackfill and checking that the wal-header in shared-memory still + ** matches the one cached in pWal->hdr, it is guaranteed that the + ** checkpointer that set nBackfill was not working with a wal-index + ** header newer than that cached in pWal->hdr. If it were, that could + ** cause a problem. The checkpointer could omit to checkpoint + ** a version of page X that lies before pWal->minFrame (call that version + ** A) on the basis that there is a newer version (version B) of the same + ** page later in the wal file. But if version B happens to like past + ** frame pWal->hdr.mxFrame - then the client would incorrectly assume + ** that it can read version A from the database file. However, since + ** we can guarantee that the checkpointer that set nBackfill could not + ** see any pages past pWal->hdr.mxFrame, this problem does not come up. + */ + pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT; + walShmBarrier(pWal); + if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark + || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) + ){ + walUnlockShared(pWal, WAL_READ_LOCK(mxI)); + return WAL_RETRY; + }else{ + assert( mxReadMark<=pWal->hdr.mxFrame ); + pWal->readLock = (i16)mxI; } -#else - assert( rc!=SQLITE_BUSY_TIMEOUT ); -#endif - assert( (rc&0xFF)!=SQLITE_BUSY||rc==SQLITE_BUSY||rc==SQLITE_BUSY_TIMEOUT ); - return (rc&0xFF)==SQLITE_BUSY ? WAL_RETRY : rc; - } - /* Now that the read-lock has been obtained, check that neither the - ** value in the aReadMark[] array or the contents of the wal-index - ** header have changed. - ** - ** It is necessary to check that the wal-index header did not change - ** between the time it was read and when the shared-lock was obtained - ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility - ** that the log file may have been wrapped by a writer, or that frames - ** that occur later in the log than pWal->hdr.mxFrame may have been - ** copied into the database by a checkpointer. If either of these things - ** happened, then reading the database with the current value of - ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry - ** instead. - ** - ** Before checking that the live wal-index header has not changed - ** since it was read, set Wal.minFrame to the first frame in the wal - ** file that has not yet been checkpointed. This client will not need - ** to read any frames earlier than minFrame from the wal file - they - ** can be safely read directly from the database file. - ** - ** Because a ShmBarrier() call is made between taking the copy of - ** nBackfill and checking that the wal-header in shared-memory still - ** matches the one cached in pWal->hdr, it is guaranteed that the - ** checkpointer that set nBackfill was not working with a wal-index - ** header newer than that cached in pWal->hdr. If it were, that could - ** cause a problem. The checkpointer could omit to checkpoint - ** a version of page X that lies before pWal->minFrame (call that version - ** A) on the basis that there is a newer version (version B) of the same - ** page later in the wal file. But if version B happens to like past - ** frame pWal->hdr.mxFrame - then the client would incorrectly assume - ** that it can read version A from the database file. However, since - ** we can guarantee that the checkpointer that set nBackfill could not - ** see any pages past pWal->hdr.mxFrame, this problem does not come up. - */ - pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT; - walShmBarrier(pWal); - if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark - || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) - ){ - walUnlockShared(pWal, WAL_READ_LOCK(mxI)); - return WAL_RETRY; - }else{ - assert( mxReadMark<=pWal->hdr.mxFrame ); - pWal->readLock = (i16)mxI; } return rc; } @@ -3941,12 +3951,13 @@ static int walWriteOneFrame( ** SQLITE_OK is returned if successful, or an SQLite error code otherwise. */ static int walRewriteChecksums(Wal *pWal, u32 iLast){ - const int szPage = pWal->szPage;/* Database page size */ int rc = SQLITE_OK; /* Return code */ + const int szPage = pWal->szPage;/* Database page size */ u8 *aBuf; /* Buffer to load data from wal file into */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ u32 iRead; /* Next frame to read from wal file */ i64 iCksumOff; + sqlite3_file *pWalFd = pWal->pWalFd; aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); if( aBuf==0 ) return SQLITE_NOMEM_BKPT; @@ -3962,7 +3973,7 @@ static int walRewriteChecksums(Wal *pWal, u32 iLast){ }else{ iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16; } - rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff); + rc = sqlite3OsRead(pWalFd, aBuf, sizeof(u32)*2, iCksumOff); pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); @@ -3970,14 +3981,14 @@ static int walRewriteChecksums(Wal *pWal, u32 iLast){ pWal->iReCksum = 0; for(; rc==SQLITE_OK && iRead<=iLast; iRead++){ i64 iOff = walFrameOffset(iRead, szPage); - rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); + rc = sqlite3OsRead(pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); if( rc==SQLITE_OK ){ u32 iPgno, nDbSize; iPgno = sqlite3Get4byte(aBuf); nDbSize = sqlite3Get4byte(&aBuf[4]); walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); - rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); + rc = sqlite3OsWrite(pWalFd, aFrame, sizeof(aFrame), iOff); } }