From: dan Date: Thu, 10 Aug 2023 18:58:07 +0000 (+0000) Subject: Experimental merge of the wal-shm-exceptions and wal2 branches. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=31b064944a5505c46e7244f84846ffcaa8687981;p=thirdparty%2Fsqlite.git Experimental merge of the wal-shm-exceptions and wal2 branches. FossilOrigin-Name: fd58c616244dc70a2d6c17b62830398cea8177366828260882df5ab1f361a347 --- 31b064944a5505c46e7244f84846ffcaa8687981 diff --cc manifest index 7f9b3beb95,bcbb0a69ea..7664ce83c8 --- a/manifest +++ b/manifest @@@ -1,11 -1,11 +1,11 @@@ - C Mark\stest\sscript\swal2big.test\sas\s"TESTRUNNER:\sslow". - D 2023-08-10T17:04:29.698 -C Merge\slatest\strunk\schanges\sinto\sthis\sbranch. -D 2023-08-10T17:07:34.643 ++C Experimental\smerge\sof\sthe\swal-shm-exceptions\sand\swal2\sbranches. ++D 2023-08-10T18:58:07.618 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 -F Makefile.in 0f4cb3955aaff8a40ec3857ba1784bd98b69802e51eff979f874b65713b627b2 +F Makefile.in 3a780c12cb464f43e680a4f36f59615d45acf5a6762b56c205b6ef51ebd26c6d F Makefile.linux-gcc f3842a0b1efbfbb74ac0ef60e56b301836d05b4d867d014f714fa750048f1ab6 - F Makefile.msc 97f9b14ef0418943c315db08f4961932f2fc839603c13ab646079840b0d83329 -F Makefile.msc daad4a19e0b3c3c3b79b64d4ddbf75e3f506405e8d3f3f604d6f48b26043c51f ++F Makefile.msc 0b57ab2867b1fdc90c4e35a6777a3b24b81006cc7ba947c8db13929b4edd3800 F README.md c1c4218efcc4071a6e26db2b517fdbc1035696a29b370edd655faddbef02b224 F VERSION c6366dc72582d3144ce87b013cc35fe48d62f6d07d5be0c9716ea33c862144aa F aclocal.m4 a5c22d164aff7ed549d53a90fa56d56955281f50 @@@ -633,8 -631,8 +633,8 @@@ F src/os_setup.h 6011ad7af5db4e05155f38 F src/os_unix.c 2e8b12107f75d1bd16412f312b4c5d5103191807a37836d3b81beb26436ad81b F src/os_win.c 7038223a1cda0a47e2ab4db47f63bf1833fe53ba0542f0f283a062ea13894103 F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a - F src/pager.c 0a7a695950183a745ec9b2cf0aa3cbc3803dea7d674a1a5a0b0080ab4aa7df9a - F src/pager.h c49ff262186a78bc5f27e3891edefb900afa769b9e2eaeca0322c7f3553536d4 -F src/pager.c 993445a19b611d473ca007542ab3149840661a4c7e9f2d9e1ec008b7cc2abe78 -F src/pager.h 6e326bd05970a24dd28d41d3980b6964fbaa37b4da54a2c0d4e0c5bdb06ff187 ++F src/pager.c e7af170f5799a5edd77fc267e81cb0292e1a1cb7b488b62c6e2fff78aedc2efb ++F src/pager.h ca3b415a3f03359d5692b71ec0ba068a58ce447fac7312f89293280d95ee38da F src/parse.y aeb7760d41cfa86465e3adba506500c021597049fd55f82a30e5b7045862c28c F src/pcache.c 4cd4a0043167da9ba7e19b4d179a0e6354e7fe32c16f781ecf9bf0a5ff63b40b F src/pcache.h 1497ce1b823cf00094bb0cf3bac37b345937e6f910890c626b16512316d3abf5 @@@ -714,9 -712,9 +714,9 @@@ F src/trigger.c ad6ab9452715fa9a8075442 F src/update.c 0bb9171afaa4d0b100ad946873bccda7aef90ffe083ef5c63668fce08c4df9da F src/upsert.c 5303dc6c518fa7d4b280ec65170f465c7a70b7ac2b22491598f6d0b4875b3145 F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0 - F src/util.c b3532a95ad56db67b3acd3955e688e4cb80ebec6fd1f459a8eb51cceedd6de69 + F src/util.c a40062117e705eb3339201842717a022092816b92479eead6397cde28af32ff9 F src/vacuum.c 604fcdaebe76f3497c855afcbf91b8fa5046b32de3045bab89cc008d68e40104 -F src/vdbe.c 346d848a0bf8128e3e3722c5406f4bde6c32d7093b93402c6f8e0718d19305c3 +F src/vdbe.c 11f1a00443991bd12bd0c97050b2614f7fd3e82b9cc0ffdb13d14d424d733e77 F src/vdbe.h 41485521f68e9437fdb7ec4a90f9d86ab294e9bb8281e33b235915e29122cfc0 F src/vdbeInt.h 949669dfd8a41550d27dcb905b494f2ccde9a2e6c1b0b04daa1227e2e74c2b2c F src/vdbeapi.c f37822f215740ede2a8fcae99bc13f2cc3a72dd0e1d22b81b9298c5ca67dbc38 @@@ -728,8 -726,8 +728,8 @@@ F src/vdbetrace.c fe0bc29ebd4e02c8bc5c1 F src/vdbevtab.c 57fa8f56478e5b5cb558cb425e7878515e0a105c54f96f1d1bbf4b9433529254 F src/vtab.c 1ecf8c3745d29275688d583e12822fa984d421e0286b5ef50c137bc3bf6d7a64 F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 - F src/wal.c bb8a96b6b9607d9e4a6f89b385e002507e6eac6ba3d5797fefed3f13e9d49f32 - F src/wal.h d01234e828943e002040c22a7e017642962f9fd9b2dc142fa599769ae4e459e9 -F src/wal.c 02e10f033a6972bc7d50122b400318003199c504cda48f61ad404564505f4e89 -F src/wal.h 04a9e53121d5076f2a173b0f2facb39d33047093fee71bd3bbe6b1f6f1f5fd4b ++F src/wal.c 15cf63e763e2613c89f5dd20e150e715f4dcc836a0488342a1ab60939ff4951e ++F src/wal.h a3e64f463acf027f041c2081841e15e8c81c56d0daa45e308df423d9d66fd5a7 F src/walker.c 7c7ea0115345851c3da4e04e2e239a29983b61fb5b038b94eede6aba462640e2 F src/where.c b8917792f1e0dbfa28fb29e6cd3d560060d69667be0ba4c491cbc772363264f5 F src/whereInt.h c7d19902863beadec1d04e66aca39c0bcd60b74f05f0eaa7422c7005dfc5d51a @@@ -2064,8 -2051,8 +2065,11 @@@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a9 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 - P 72f7807b34eee1fcfd94f72d27535e5273ed1f7ff3d464179a774b7c7241f5f5 - R 26af008ae6d036cbeb33f554ac503b68 -P c44fb4d06591ef4872f015aa9e81ce8b93a38bd9143cf20db38cbe5c77a0d71f aa6de539c09faa320b68c63659e602107145c4263fa680d5b40fe4d7d7ac4534 -R 306d4b3f8aa93e07edcdfd262b597d0d ++P ae7f3bcb5babfa50084f5b4d3c5663489dbf306440706152642b7f0fb47fa1f5 3ed89c344fcb3b7ee8b764d95144643e42e053e1116150d6eda8355fbd6669df ++R 6d5e54ac94c2b0ca3b4f8e1bb783c1f1 ++T *branch * wal2-shm-exceptions ++T *sym-wal2-shm-exceptions * ++T -sym-wal2 * U dan - Z 4fd2c88d6b68e9363568fe5cf8860077 -Z d6eddbca55d948f45c2bde6455133439 ++Z 906ed34078bfe0eb7b259e1913b75925 # Remove this line to create a well-formed Fossil manifest. diff --cc manifest.uuid index 8aceda7a74,90cc22ad97..c62ef76cad --- a/manifest.uuid +++ b/manifest.uuid @@@ -1,1 -1,1 +1,1 @@@ - ae7f3bcb5babfa50084f5b4d3c5663489dbf306440706152642b7f0fb47fa1f5 -3ed89c344fcb3b7ee8b764d95144643e42e053e1116150d6eda8355fbd6669df ++fd58c616244dc70a2d6c17b62830398cea8177366828260882df5ab1f361a347 diff --cc src/wal.c index 2c0f4fe19d,c9d8eaebe5..3e7207a5c6 --- a/src/wal.c +++ b/src/wal.c @@@ -787,9 -527,14 +787,15 @@@ struct Wal u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ const char *zWalName; /* Name of WAL file */ + const char *zWalName2; /* Name of second WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ + #ifdef SQLITE_USE_SEH + u32 lockMask; /* Mask of locks held */ + void *pFree; /* Pointer to sqlite3_free() if exception thrown */ + int iSysErrno; /* System error code following exception */ + #endif #ifdef SQLITE_DEBUG + int nSehTry; /* Number of nested SEH_TRY{} blocks */ u8 lockError; /* True if a locking error has occurred */ #endif #ifdef SQLITE_ENABLE_SNAPSHOT @@@ -873,6 -616,100 +879,100 @@@ struct WalIterator sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ ) ++ + /* + ** Structured Exception Handling (SEH) is a Windows-specific technique + ** for catching exceptions raised while accessing memory-mapped files. + ** + ** The -DSQLITE_USE_SEH compile-time option means to use SEH to catch and + ** deal with system-level errors that arise during WAL -shm file processing. + ** Without this compile-time option, any system-level faults that appear + ** while accessing the memory-mapped -shm file will cause a process-wide + ** signal to be deliver, which will more than likely cause the entire + ** process to exit. + */ + #ifdef SQLITE_USE_SEH + #include + + /* Beginning of a block of code in which an exception might occur */ + # define SEH_TRY __try { \ + assert( walAssertLockmask(pWal) && pWal->nSehTry==0 ); \ + VVA_ONLY(pWal->nSehTry++); + + /* The end of a block of code in which an exception might occur */ + # define SEH_EXCEPT(X) \ + VVA_ONLY(pWal->nSehTry--); \ + assert( pWal->nSehTry==0 ); \ + } __except( sehExceptionFilter(pWal, GetExceptionCode(), GetExceptionInformation() ) ){ X } + + /* Simulate a memory-mapping fault in the -shm file for testing purposes */ + # define SEH_INJECT_FAULT sehInjectFault(pWal) + + /* + ** The second argument is the return value of GetExceptionCode() for the + ** current exception. Return EXCEPTION_EXECUTE_HANDLER if the exception code + ** indicates that the exception may have been caused by accessing the *-shm + ** file mapping. Or EXCEPTION_CONTINUE_SEARCH otherwise. + */ + static int sehExceptionFilter(Wal *pWal, int eCode, EXCEPTION_POINTERS *p){ + VVA_ONLY(pWal->nSehTry--); + if( eCode==EXCEPTION_IN_PAGE_ERROR ){ + if( p && p->ExceptionRecord && p->ExceptionRecord->NumberParameters>=3 ){ + /* From MSDN: For this type of exception, the first element of the + ** ExceptionInformation[] array is a read-write flag - 0 if the exception + ** was thrown while reading, 1 if while writing. The second element is + ** the virtual address being accessed. The "third array element specifies + ** the underlying NTSTATUS code that resulted in the exception". */ + pWal->iSysErrno = (int)p->ExceptionRecord->ExceptionInformation[2]; + } + return EXCEPTION_EXECUTE_HANDLER; + } + return EXCEPTION_CONTINUE_SEARCH; + } + + /* + ** If one is configured, invoke the xTestCallback callback with 650 as + ** the argument. If it returns true, throw the same exception that is + ** thrown by the system if the *-shm file mapping is accessed after it + ** has been invalidated. + */ + static void sehInjectFault(Wal *pWal){ + int res; + assert( pWal->nSehTry>0 ); + + res = sqlite3FaultSim(650); + if( res!=0 ){ + ULONG_PTR aArg[3]; + aArg[0] = 0; + aArg[1] = 0; + aArg[2] = (ULONG_PTR)res; + RaiseException(EXCEPTION_IN_PAGE_ERROR, 0, 3, (const ULONG_PTR*)aArg); + } + } + + /* + ** There are two ways to use this macro. To set a pointer to be freed + ** if an exception is thrown: + ** + ** SEH_FREE_ON_ERROR(0, pPtr); + ** + ** and to cancel the same: + ** + ** SEH_FREE_ON_ERROR(pPtr, 0); + ** + ** In the first case, there must not already be a pointer registered to + ** be freed. In the second case, pPtr must be the registered pointer. + */ + #define SEH_FREE_ON_ERROR(X,Y) \ + assert( (X==0 || Y==0) && pWal->pFree==X ); pWal->pFree = Y + + #else + # define SEH_TRY VVA_ONLY(pWal->nSehTry++); + # define SEH_EXCEPT(X) VVA_ONLY(pWal->nSehTry--); assert( pWal->nSehTry==0 ); + # define SEH_INJECT_FAULT assert( pWal->nSehTry>0 ); + # define SEH_FREE_ON_ERROR(X,Y) + #endif /* ifdef SQLITE_USE_SEH */ + - /* ** Obtain a pointer to the iPage'th page of the wal-index. The wal-index ** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are @@@ -1561,212 -1347,6 +1678,215 @@@ static int walIndexAppend(Wal *pWal, in return rc; } +/* +** Recover a single wal file - *-wal if iWal==0, or *-wal2 if iWal==1. +*/ +static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ + i64 nSize; /* Size of log file */ + u32 aFrameCksum[2] = {0, 0}; + int rc; + sqlite3_file *pWalFd = pWal->apWalFd[iWal]; + + assert( iWal==0 || iWal==1 ); + + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + sqlite3_randomness(8, pWal->hdr.aSalt); + + rc = sqlite3OsFileSize(pWalFd, &nSize); + if( rc==SQLITE_OK ){ + if( nSize>WAL_HDRSIZE ){ + u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u32 *aPrivate = 0; /* Heap copy of *-shm pg being populated */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int szFrame; /* Number of bytes in buffer aFrame[] */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int szPage; /* Page size according to the log */ + u32 magic; /* Magic value read from WAL header */ + u32 version; /* Magic value read from WAL header */ + int isValid; /* True if this frame is valid */ + int iPg; /* Current 32KB wal-index page */ + int iLastFrame; /* Last frame in wal, based on size alone */ + int iLastPg; /* Last shm page used by this wal */ + + /* Read in the WAL header. */ + rc = sqlite3OsRead(pWalFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid + ** data. Similarly, if the 'magic' value is invalid, ignore the whole + ** WAL file. + */ + magic = sqlite3Get4byte(&aBuf[0]); + szPage = sqlite3Get4byte(&aBuf[8]); + if( (magic&0xFFFFFFFE)!=WAL_MAGIC + || szPage&(szPage-1) + || szPage>SQLITE_MAX_PAGE_SIZE + || szPage<512 + ){ + return SQLITE_OK; + } + pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); + pWal->szPage = szPage; + + /* Verify that the WAL header checksum is correct */ + walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, + aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum + ); + if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) + || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) + ){ + return SQLITE_OK; + } + + memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); + *pnCkpt = sqlite3Get4byte(&aBuf[12]); + + /* Verify that the version number on the WAL format is one that + ** are able to understand */ + version = sqlite3Get4byte(&aBuf[4]); + if( version!=WAL_VERSION1 && version!=WAL_VERSION2 ){ + return SQLITE_CANTOPEN_BKPT; + } + pWal->hdr.iVersion = version; + + /* Malloc a buffer to read frames into. */ + szFrame = szPage + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ); ++ SEH_FREE_ON_ERROR(0, aFrame); + if( !aFrame ){ + return SQLITE_NOMEM_BKPT; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + aPrivate = (u32*)&aData[szPage]; + + /* Read all frames from the log file. */ + iLastFrame = (nSize - WAL_HDRSIZE) / szFrame; + if( version==WAL_VERSION2 ){ + iLastPg = walFramePage2(iWal, iLastFrame); + }else{ + iLastPg = walFramePage(iLastFrame); + } + for(iPg=iWal; iPg<=iLastPg; iPg+=(version==WAL_VERSION2 ? 2 : 1)){ + u32 *aShare; + int iFrame; /* Index of last frame read */ + int iLast; + int iFirst; + int nHdr, nHdr32; + + rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare); + assert( aShare!=0 || rc!=SQLITE_OK ); + if( aShare==0 ) break; + pWal->apWiData[iPg] = aPrivate; + + if( iWal ){ + assert( version==WAL_VERSION2 ); + iFirst = 1 + (iPg/2)*HASHTABLE_NPAGE; + iLast = iFirst + HASHTABLE_NPAGE - 1; + }else{ + int i2 = (version==WAL_VERSION2) ? (iPg/2) : iPg; + iLast = HASHTABLE_NPAGE_ONE+i2*HASHTABLE_NPAGE; + iFirst = 1 + (i2==0?0:HASHTABLE_NPAGE_ONE+(i2-1)*HASHTABLE_NPAGE); + } + iLast = MIN(iLast, iLastFrame); + + for(iFrame=iFirst; iFrame<=iLast; iFrame++){ + i64 iOffset = walFrameOffset(iFrame, szPage); + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); + if( !isValid ) break; + rc = walIndexAppend(pWal, iWal, iFrame, pgno); + if( NEVER(rc!=SQLITE_OK) ) break; + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + pWal->hdr.mxFrame = iFrame; + pWal->hdr.nPage = nTruncate; + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; + aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + } + } + pWal->apWiData[iPg] = aShare; + nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0); + nHdr32 = nHdr / sizeof(u32); +#ifndef SQLITE_SAFER_WALINDEX_RECOVERY + /* Memcpy() should work fine here, on all reasonable implementations. + ** Technically, memcpy() might change the destination to some + ** intermediate value before setting to the final value, and that might + ** cause a concurrent reader to malfunction. Memcpy() is allowed to + ** do that, according to the spec, but no memcpy() implementation that + ** we know of actually does that, which is why we say that memcpy() + ** is safe for this. Memcpy() is certainly a lot faster. + */ + memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr); +#else + /* In the event that some platform is found for which memcpy() + ** changes the destination to some intermediate value before + ** setting the final value, this alternative copy routine is + ** provided. + */ + { + int i; + for(i=nHdr32; ihdr.aFrameCksum[0] = aFrameCksum[0]; + pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; + + return rc; +} + +static int walOpenWal2(Wal *pWal){ + int rc = SQLITE_OK; + if( !isOpen(pWal->apWalFd[1]) ){ + int f = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); + rc = sqlite3OsOpen(pWal->pVfs, pWal->zWalName2, pWal->apWalFd[1], f, &f); + } + return rc; +} + +static int walTruncateWal2(Wal *pWal){ + int bIs; + int rc; + assert( !isOpen(pWal->apWalFd[1]) ); + rc = sqlite3OsAccess(pWal->pVfs, pWal->zWalName2, SQLITE_ACCESS_EXISTS, &bIs); + if( rc==SQLITE_OK && bIs ){ + rc = walOpenWal2(pWal); + if( rc==SQLITE_OK ){ + rc = sqlite3OsTruncate(pWal->apWalFd[1], 0); + sqlite3OsClose(pWal->apWalFd[1]); + } + } + return rc; +} /* ** Recover the wal-index by reading the write-ahead log file. @@@ -2332,8 -1949,8 +2452,9 @@@ static int walIteratorInit memset(p, 0, nByte); p->nSegment = nSegment; aTmp = (ht_slot*)&(((u8*)p)[nByte]); + SEH_FREE_ON_ERROR(0, p); - for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && ihdr.mxFrame; - mxPage = pWal->hdr.nPage; - for(i=1; iaReadMark+i); SEH_INJECT_FAULT; - if( mxSafeFrame>y ){ - assert( y<=pWal->hdr.mxFrame ); - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED); - AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT; - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc==SQLITE_BUSY ){ - mxSafeFrame = y; - xBusy = 0; - }else{ - goto walcheckpoint_out; + if( bWal2==0 ){ + mxSafeFrame = pWal->hdr.mxFrame; + mxPage = pWal->hdr.nPage; + for(i=1; iaReadMark+i); ++ u32 y = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT; + if( mxSafeFrame>y ){ + assert( y<=pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); + if( rc==SQLITE_OK ){ + u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED); - AtomicStore(pInfo->aReadMark+i, iMark); ++ AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc==SQLITE_BUSY ){ + mxSafeFrame = y; + xBusy = 0; + }else{ + goto walcheckpoint_out; + } } } } @@@ -2699,19 -2227,18 +2821,19 @@@ assert( rc==SQLITE_OK || pIter==0 ); } - if( pIter - && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK - ){ + if( pIter && (bWal2 + || (rc = walBusyLock(pWal, xBusy, pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK + )){ u32 nBackfill = pInfo->nBackfill; + + assert( bWal2==0 || nBackfill==0 ); - pInfo->nBackfillAttempted = mxSafeFrame; + pInfo->nBackfillAttempted = mxSafeFrame; SEH_INJECT_FAULT; - /* Sync the WAL to disk */ - rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); + /* Sync the wal file being checkpointed to disk */ + rc = sqlite3OsSync(pWalFd, CKPT_SYNC_FLAGS(sync_flags)); /* If the database may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. - */ + ** about the eventual size of the db file to the VFS layer. */ if( rc==SQLITE_OK ){ i64 nReq = ((i64)mxPage * szPage); i64 nSize; /* Current size of database file */ @@@ -2735,10 -2261,8 +2857,11 @@@ /* Iterate through the contents of the WAL, copying data to the db file */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ i64 iOffset; - assert( walFramePgno(pWal, iFrame)==iDbpage ); + + assert( bWal2==1 || walFramePgno(pWal, iFrame)==iDbpage ); + assert( bWal2==0 || walFramePgno2(pWal, iCkpt, iFrame)==iDbpage ); + + SEH_INJECT_FAULT; if( AtomicLoad(&db->u1.isInterrupted) ){ rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT; break; @@@ -2768,19 -2287,17 +2891,20 @@@ i64 szDb = pWal->hdr.nPage*(i64)szPage; testcase( IS_BIG_INT(szDb) ); rc = sqlite3OsTruncate(pWal->pDbFd, szDb); - if( rc==SQLITE_OK ){ - rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags)); - } } if( rc==SQLITE_OK ){ - AtomicStore(&pInfo->nBackfill, mxSafeFrame); SEH_INJECT_FAULT; + rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags)); } } + if( rc==SQLITE_OK ){ + AtomicStore(&pInfo->nBackfill, (bWal2 ? 1 : mxSafeFrame)); ++ SEH_INJECT_FAULT; + } /* Release the reader lock held while backfilling */ - walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + if( bWal2==0 ){ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + } } if( rc==SQLITE_BUSY ){ @@@ -2796,8 -2312,9 +2920,9 @@@ ** until all readers have finished using the wal file. This ensures that ** the next process to write to the database restarts the wal file. */ - if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + if( bWal2==0 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ assert( pWal->writeLock ); + SEH_INJECT_FAULT; if( pInfo->nBackfillhdr.mxFrame ){ rc = SQLITE_BUSY; }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ @@@ -2866,7 -2463,8 +3073,9 @@@ int sqlite3WalClose int rc = SQLITE_OK; if( pWal ){ int isDelete = 0; /* True to unlink wal and wal-index files */ + + assert( walAssertLockmask(pWal) ); + pWal->bClosing = 1; /* If an EXCLUSIVE lock can be obtained on the database file (using the ** ordinary, rollback-mode locking methods, this guarantees that the @@@ -3420,170 -3008,184 +3629,218 @@@ static int walTryBeginRead(Wal *pWal, i assert( pWal->nWiData>0 ); assert( pWal->apWiData[0]!=0 ); pInfo = walCkptInfo(pWal); + SEH_INJECT_FAULT; - if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame -#ifdef SQLITE_ENABLE_SNAPSHOT - && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0) -#endif - ){ - /* The WAL has been completely backfilled (or it is empty). - ** and can be safely ignored. - */ - rc = walLockShared(pWal, WAL_READ_LOCK(0)); + if( isWalMode2(pWal) ){ + /* This connection needs a "part" lock on the current wal file and, + ** unless pInfo->nBackfill is set to indicate that it has already been + ** checkpointed, a "full" lock on the other wal file. */ + int iWal = walidxGetFile(&pWal->hdr); + int nBackfill = pInfo->nBackfill || walidxGetMxFrame(&pWal->hdr, !iWal)==0; + int eLock = 1 + (iWal*2) + (nBackfill==iWal); + + assert( nBackfill==0 || nBackfill==1 ); + assert( iWal==0 || iWal==1 ); + assert( iWal!=0 || nBackfill!=1 || eLock==WAL_LOCK_PART1 ); + assert( iWal!=0 || nBackfill!=0 || eLock==WAL_LOCK_PART1_FULL2 ); + assert( iWal!=1 || nBackfill!=1 || eLock==WAL_LOCK_PART2 ); + assert( iWal!=1 || nBackfill!=0 || eLock==WAL_LOCK_PART2_FULL1 ); + + rc = walLockShared(pWal, WAL_READ_LOCK(eLock)); + if( rc!=SQLITE_OK ){ + return (rc==SQLITE_BUSY ? WAL_RETRY : rc); + } walShmBarrier(pWal); - if( rc==SQLITE_OK ){ - if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ - /* It is not safe to allow the reader to continue here if frames - ** may have been appended to the log before READ_LOCK(0) was obtained. - ** When holding READ_LOCK(0), the reader ignores the entire log file, - ** which implies that the database file contains a trustworthy - ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from - ** happening, this is usually correct. - ** - ** However, if frames have been appended to the log (or if the log - ** is wrapped and written for that matter) before the READ_LOCK(0) - ** is obtained, that is not necessarily true. A checkpointer may - ** have started to backfill the appended frames but crashed before - ** it finished. Leaving a corrupt image in the database file. - */ - walUnlockShared(pWal, WAL_READ_LOCK(0)); - return WAL_RETRY; + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ + walUnlockShared(pWal, WAL_READ_LOCK(eLock)); + return WAL_RETRY; + }else{ + pWal->readLock = eLock; + } + assert( pWal->minFrame==0 && walFramePage(pWal->minFrame)==0 ); + }else{ + u32 mxReadMark; /* Largest aReadMark[] value */ + int mxI; /* Index of largest aReadMark[] value */ + int i; /* Loop counter */ + u32 mxFrame; /* Wal frame to lock to */ + if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame + #ifdef SQLITE_ENABLE_SNAPSHOT + && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0) + #endif + ){ + /* The WAL has been completely backfilled (or it is empty). + ** and can be safely ignored. + */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + walShmBarrier(pWal); + if( rc==SQLITE_OK ){ + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr,sizeof(WalIndexHdr)) ){ + /* It is not safe to allow the reader to continue here if frames + ** may have been appended to the log before READ_LOCK(0) was obtained. + ** When holding READ_LOCK(0), the reader ignores the entire log file, + ** which implies that the database file contains a trustworthy + ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from + ** happening, this is usually correct. + ** + ** However, if frames have been appended to the log (or if the log + ** is wrapped and written for that matter) before the READ_LOCK(0) + ** is obtained, that is not necessarily true. A checkpointer may + ** have started to backfill the appended frames but crashed before + ** it finished. Leaving a corrupt image in the database file. + */ + walUnlockShared(pWal, WAL_READ_LOCK(0)); + return WAL_RETRY; + } + pWal->readLock = 0; + return SQLITE_OK; + }else if( rc!=SQLITE_BUSY ){ + return rc; } - pWal->readLock = 0; - return SQLITE_OK; - }else if( rc!=SQLITE_BUSY ){ - return rc; } - } - - /* If we get this far, it means that the reader will want to use - ** the WAL to get at content from recent commits. The job now is - ** to select one of the aReadMark[] entries that is closest to - ** but not exceeding pWal->hdr.mxFrame and lock that entry. - */ - mxReadMark = 0; - mxI = 0; - mxFrame = pWal->hdr.mxFrame; -#ifdef SQLITE_ENABLE_SNAPSHOT - if( pWal->pSnapshot && pWal->pSnapshot->mxFramepSnapshot->mxFrame; - } -#endif - for(i=1; iaReadMark+i); SEH_INJECT_FAULT; - if( mxReadMark<=thisMark && thisMark<=mxFrame ){ - assert( thisMark!=READMARK_NOT_USED ); - mxReadMark = thisMark; - mxI = i; + + /* If we get this far, it means that the reader will want to use + ** the WAL to get at content from recent commits. The job now is + ** to select one of the aReadMark[] entries that is closest to + ** but not exceeding pWal->hdr.mxFrame and lock that entry. + */ + mxReadMark = 0; + mxI = 0; + mxFrame = pWal->hdr.mxFrame; + #ifdef SQLITE_ENABLE_SNAPSHOT + if( pWal->pSnapshot && pWal->pSnapshot->mxFramepSnapshot->mxFrame; } - } - if( (pWal->readOnly & WAL_SHM_RDONLY)==0 - && (mxReadMarkaReadMark+i); - rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - AtomicStore(pInfo->aReadMark+i,mxFrame); - mxReadMark = mxFrame; ++ u32 thisMark = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT; + if( mxReadMark<=thisMark && thisMark<=mxFrame ){ + assert( thisMark!=READMARK_NOT_USED ); + mxReadMark = thisMark; mxI = i; - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - break; - }else if( rc!=SQLITE_BUSY ){ - return rc; } } - } - if( mxI==0 ){ - assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); - return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; - } + if( (pWal->readOnly & WAL_SHM_RDONLY)==0 + && (mxReadMarkaReadMark+i,mxFrame); + mxReadMark = mxFrame; + mxI = i; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + break; + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + } + if( mxI==0 ){ + assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); + return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; + } + + rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); + if( rc ){ + return rc==SQLITE_BUSY ? WAL_RETRY : rc; + } + /* Now that the read-lock has been obtained, check that neither the + ** value in the aReadMark[] array or the contents of the wal-index + ** header have changed. + ** + ** It is necessary to check that the wal-index header did not change + ** between the time it was read and when the shared-lock was obtained + ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility + ** that the log file may have been wrapped by a writer, or that frames + ** that occur later in the log than pWal->hdr.mxFrame may have been + ** copied into the database by a checkpointer. If either of these things + ** happened, then reading the database with the current value of + ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry + ** instead. + ** + ** Before checking that the live wal-index header has not changed + ** since it was read, set Wal.minFrame to the first frame in the wal + ** file that has not yet been checkpointed. This client will not need + ** to read any frames earlier than minFrame from the wal file - they + ** can be safely read directly from the database file. + ** + ** Because a ShmBarrier() call is made between taking the copy of + ** nBackfill and checking that the wal-header in shared-memory still + ** matches the one cached in pWal->hdr, it is guaranteed that the + ** checkpointer that set nBackfill was not working with a wal-index + ** header newer than that cached in pWal->hdr. If it were, that could + ** cause a problem. The checkpointer could omit to checkpoint + ** a version of page X that lies before pWal->minFrame (call that version + ** A) on the basis that there is a newer version (version B) of the same + ** page later in the wal file. But if version B happens to like past + ** frame pWal->hdr.mxFrame - then the client would incorrectly assume + ** that it can read version A from the database file. However, since + ** we can guarantee that the checkpointer that set nBackfill could not + ** see any pages past pWal->hdr.mxFrame, this problem does not come up. + */ - pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; ++ pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT; + walShmBarrier(pWal); + if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark + || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) + ){ + walUnlockShared(pWal, WAL_READ_LOCK(mxI)); + return WAL_RETRY; + }else{ + assert( mxReadMark<=pWal->hdr.mxFrame ); + pWal->readLock = (i16)mxI; + } - rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); - if( rc ){ - return rc==SQLITE_BUSY ? WAL_RETRY : rc; - } - /* Now that the read-lock has been obtained, check that neither the - ** value in the aReadMark[] array or the contents of the wal-index - ** header have changed. - ** - ** It is necessary to check that the wal-index header did not change - ** between the time it was read and when the shared-lock was obtained - ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility - ** that the log file may have been wrapped by a writer, or that frames - ** that occur later in the log than pWal->hdr.mxFrame may have been - ** copied into the database by a checkpointer. If either of these things - ** happened, then reading the database with the current value of - ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry - ** instead. - ** - ** Before checking that the live wal-index header has not changed - ** since it was read, set Wal.minFrame to the first frame in the wal - ** file that has not yet been checkpointed. This client will not need - ** to read any frames earlier than minFrame from the wal file - they - ** can be safely read directly from the database file. - ** - ** Because a ShmBarrier() call is made between taking the copy of - ** nBackfill and checking that the wal-header in shared-memory still - ** matches the one cached in pWal->hdr, it is guaranteed that the - ** checkpointer that set nBackfill was not working with a wal-index - ** header newer than that cached in pWal->hdr. If it were, that could - ** cause a problem. The checkpointer could omit to checkpoint - ** a version of page X that lies before pWal->minFrame (call that version - ** A) on the basis that there is a newer version (version B) of the same - ** page later in the wal file. But if version B happens to like past - ** frame pWal->hdr.mxFrame - then the client would incorrectly assume - ** that it can read version A from the database file. However, since - ** we can guarantee that the checkpointer that set nBackfill could not - ** see any pages past pWal->hdr.mxFrame, this problem does not come up. - */ - pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT; - walShmBarrier(pWal); - if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark - || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) - ){ - walUnlockShared(pWal, WAL_READ_LOCK(mxI)); - return WAL_RETRY; - }else{ - assert( mxReadMark<=pWal->hdr.mxFrame ); - pWal->readLock = (i16)mxI; } return rc; } #ifdef SQLITE_ENABLE_SNAPSHOT + /* + ** This function does the work of sqlite3WalSnapshotRecover(). + */ + static int walSnapshotRecover( + Wal *pWal, /* WAL handle */ + void *pBuf1, /* Temp buffer pWal->szPage bytes in size */ + void *pBuf2 /* Temp buffer pWal->szPage bytes in size */ + ){ + int szPage = (int)pWal->szPage; + int rc; + i64 szDb; /* Size of db file in bytes */ + + rc = sqlite3OsFileSize(pWal->pDbFd, &szDb); + if( rc==SQLITE_OK ){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + u32 i = pInfo->nBackfillAttempted; + for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){ + WalHashLoc sLoc; /* Hash table location */ + u32 pgno; /* Page number in db file */ + i64 iDbOff; /* Offset of db file entry */ + i64 iWalOff; /* Offset of wal file entry */ + + rc = walHashGet(pWal, walFramePage(i), &sLoc); + if( rc!=SQLITE_OK ) break; + pgno = sLoc.aPgno[i-sLoc.iZero]; + iDbOff = (i64)(pgno-1) * szPage; + + if( iDbOff+szPage<=szDb ){ + iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE; - rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff); ++ rc = sqlite3OsRead(pWal->apWalFd[0], pBuf1, szPage, iWalOff); + + if( rc==SQLITE_OK ){ + rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff); + } + + if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){ + break; + } + } + + pInfo->nBackfillAttempted = i-1; + } + } + + return rc; + } + /* ** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted ** variable so that older snapshots can be accessed. To do this, loop @@@ -3786,100 -3342,39 +4005,125 @@@ static int walBeginReadTransaction(Wal return rc; } + /* + ** Begin a read transaction on the database. + ** + ** This routine used to be called sqlite3OpenSnapshot() and with good reason: + ** it takes a snapshot of the state of the WAL and wal-index for the current + ** instant in time. The current thread will continue to use this snapshot. + ** Other threads might append new content to the WAL and wal-index but + ** that extra content is ignored by the current thread. + ** + ** If the database contents have changes since the previous read + ** transaction, then *pChanged is set to 1 before returning. The + ** Pager layer will use this to know that its cache is stale and + ** needs to be flushed. + */ + int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ + int rc; + SEH_TRY { + rc = walBeginReadTransaction(pWal, pChanged); + } + SEH_EXCEPT( rc = walHandleException(pWal); ) + return rc; + } + ++ /* ** Finish with a read transaction. All this does is release the ** read-lock. */ void sqlite3WalEndReadTransaction(Wal *pWal){ sqlite3WalEndWriteTransaction(pWal); - if( pWal->readLock>=0 ){ + if( pWal->readLock!=WAL_LOCK_NONE ){ walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - pWal->readLock = -1; + pWal->readLock = WAL_LOCK_NONE; + } +} + +/* Search hash table iHash for an entry matching page number +** pgno. Each call to this function searches a single hash table +** (each hash table indexes up to HASHTABLE_NPAGE frames). +** +** This code might run concurrently to the code in walIndexAppend() +** that adds entries to the wal-index (and possibly to this hash +** table). This means the value just read from the hash +** slot (aHash[iKey]) may have been added before or after the +** current read transaction was opened. Values added after the +** read transaction was opened may have been written incorrectly - +** i.e. these slots may contain garbage data. However, we assume +** that any slots written before the current read transaction was +** opened remain unmodified. +** +** For the reasons above, the if(...) condition featured in the inner +** loop of the following block is more stringent that would be required +** if we had exclusive access to the hash-table: +** +** (aPgno[iFrame]==pgno): +** This condition filters out normal hash-table collisions. +** +** (iFrame<=iLast): +** This condition filters out entries that were added to the hash +** table after the current read-transaction had started. +*/ +static int walSearchHash( + Wal *pWal, + u32 iLast, + int iHash, + Pgno pgno, + u32 *piRead +){ + WalHashLoc sLoc; /* Hash table location */ + int iKey; /* Hash slot index */ + int nCollide; /* Number of hash collisions remaining */ + int rc; /* Error code */ ++ u32 iH; + + rc = walHashGet(pWal, iHash, &sLoc); + if( rc!=SQLITE_OK ){ + return rc; + } + nCollide = HASHTABLE_NSLOT; - for(iKey=walHash(pgno); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ - u32 iFrame = sLoc.aHash[iKey] + sLoc.iZero; - if( iFrame<=iLast - && iFrame>=pWal->minFrame - && sLoc.aPgno[sLoc.aHash[iKey]-1]==pgno - ){ ++ iKey = walHash(pgno); ++ SEH_INJECT_FAULT; ++ while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){ ++ u32 iFrame = iH + sLoc.iZero; ++ if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){ + assert( iFrame>*piRead || CORRUPT_DB ); + *piRead = iFrame; + } + if( (nCollide--)==0 ){ + return SQLITE_CORRUPT_BKPT; + } ++ iKey = walNextHash(iKey); + } + + return SQLITE_OK; +} + +static int walSearchWal( + Wal *pWal, + int iWal, + Pgno pgno, + u32 *piRead +){ + int rc = SQLITE_OK; + int bWal2 = isWalMode2(pWal); + u32 iLast = walidxGetMxFrame(&pWal->hdr, iWal); + if( iLast ){ + int iHash; + int iMinHash = walFramePage(pWal->minFrame); + u32 iExternal = bWal2 ? walExternalEncode(iWal, iLast) : iLast; + assert( bWal2==0 || pWal->minFrame==0 ); + for(iHash=walFramePage(iExternal); + iHash>=iMinHash && *piRead==0; + iHash-=(1+bWal2) + ){ + rc = walSearchHash(pWal, iExternal, iHash, pgno, piRead); + if( rc!=SQLITE_OK ) break; + } } + return rc; } /* @@@ -4065,12 -3590,17 +4333,18 @@@ int sqlite3WalBeginWriteTransaction(Wa ** time the read transaction on this connection was started, then ** the write is disallowed. */ - if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ + SEH_TRY { + if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ + rc = SQLITE_BUSY_SNAPSHOT; + } - } ++ } + SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) + + if( rc!=SQLITE_OK ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; - rc = SQLITE_BUSY_SNAPSHOT; } + return rc; } @@@ -4103,42 -3633,36 +4377,45 @@@ int sqlite3WalEndWriteTransaction(Wal * int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; if( ALWAYS(pWal->writeLock) ){ - Pgno iMax = pWal->hdr.mxFrame; + int iWal = walidxGetFile(&pWal->hdr); + Pgno iMax = walidxGetMxFrame(&pWal->hdr, iWal); + Pgno iNew; Pgno iFrame; + assert( isWalMode2(pWal) || iWal==0 ); + - /* Restore the clients cache of the wal-index header to the state it - ** was in before the client began writing to the database. - */ - memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); - assert( walidxGetFile(&pWal->hdr)==iWal ); - iNew = walidxGetMxFrame(&pWal->hdr, walidxGetFile(&pWal->hdr)); - - for(iFrame=iNew+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++){ - /* This call cannot fail. Unless the page for which the page number - ** is passed as the second argument is (a) in the cache and - ** (b) has an outstanding reference, then xUndo is either a no-op - ** (if (a) is false) or simply expels the page from the cache (if (b) - ** is false). - ** - ** If the upper layer is doing a rollback, it is guaranteed that there - ** are no outstanding references to any page other than page 1. And - ** page 1 is never written to the log until the transaction is - ** committed. As a result, the call to xUndo may not fail. + SEH_TRY { + /* Restore the clients cache of the wal-index header to the state it - ** was in before the client began writing to the database. ++ ** was in before the client began writing to the database. */ - Pgno pgno; - if( isWalMode2(pWal) ){ - pgno = walFramePgno2(pWal, iWal, iFrame); - }else{ - pgno = walFramePgno(pWal, iFrame); + memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); - - for(iFrame=pWal->hdr.mxFrame+1; - ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; - iFrame++ - ){ ++ assert( walidxGetFile(&pWal->hdr)==iWal ); ++ iNew = walidxGetMxFrame(&pWal->hdr, walidxGetFile(&pWal->hdr)); ++ ++ for(iFrame=iNew+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++){ + /* This call cannot fail. Unless the page for which the page number + ** is passed as the second argument is (a) in the cache and + ** (b) has an outstanding reference, then xUndo is either a no-op + ** (if (a) is false) or simply expels the page from the cache (if (b) + ** is false). + ** + ** If the upper layer is doing a rollback, it is guaranteed that there + ** are no outstanding references to any page other than page 1. And + ** page 1 is never written to the log until the transaction is + ** committed. As a result, the call to xUndo may not fail. + */ - assert( walFramePgno(pWal, iFrame)!=1 ); - rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); ++ Pgno pgno; ++ if( isWalMode2(pWal) ){ ++ pgno = walFramePgno2(pWal, iWal, iFrame); ++ }else{ ++ pgno = walFramePgno(pWal, iFrame); ++ } ++ assert( pgno!=1 ); ++ rc = xUndo(pUndoCtx, pgno); } - assert( pgno!=1 ); - rc = xUndo(pUndoCtx, pgno); - if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); ++ if( iMax!=iNew ) walCleanupHash(pWal); } - if( iMax!=iNew ) walCleanupHash(pWal); + SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) } return rc; } @@@ -4180,14 -3699,17 +4457,17 @@@ int sqlite3WalSavepointUndo(Wal *pWal, ** to the start of the log. Update the savepoint values to match. */ aWalData[0] = 0; - aWalData[3] = pWal->nCkpt; + aWalData[3] = iCmp; } - if( aWalData[0]hdr.mxFrame ){ - pWal->hdr.mxFrame = aWalData[0]; + if( aWalData[0]hdr, iWal) ){ + walidxSetMxFrame(&pWal->hdr, iWal, aWalData[0]); pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; - walCleanupHash(pWal); + SEH_TRY { + walCleanupHash(pWal); + } + SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) } return rc; @@@ -4675,6 -4120,29 +4955,30 @@@ static int walFrames return rc; } ++ + /* + ** Write a set of frames to the log. The caller must hold the write-lock + ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). + ** + ** The difference between this function and walFrames() is that this + ** function wraps walFrames() in an SEH_TRY{...} block. + */ + int sqlite3WalFrames( + Wal *pWal, /* Wal handle to write to */ + int szPage, /* Database page-size in bytes */ + PgHdr *pList, /* List of dirty pages to write */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int sync_flags /* Flags to pass to OsSync() (or 0) */ + ){ + int rc; + SEH_TRY { + rc = walFrames(pWal, szPage, pList, nTruncate, isCommit, sync_flags); + } + SEH_EXCEPT( rc = walHandleException(pWal); ) + return rc; + } + /* ** This routine is called to implement sqlite3_wal_checkpoint() and ** related interfaces. @@@ -4754,47 -4222,35 +5058,52 @@@ int sqlite3WalCheckpoint /* Read the wal-index header. */ - if( rc==SQLITE_OK ){ - walDisableBlocking(pWal); - rc = walIndexReadHdr(pWal, &isChanged); - (void)walEnableBlocking(pWal); - if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ - sqlite3OsUnfetch(pWal->pDbFd, 0, 0); - } - } - - /* Copy data from the log to the database file. */ - if( rc==SQLITE_OK ){ - int iCkpt = walidxGetFile(&pWal->hdr); - - if( (walPagesize(pWal)!=nBuf) - && ((pWal->hdr.mxFrame2 & 0x7FFFFFFF) || pWal->hdr.mxFrame) - ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); + SEH_TRY { + if( rc==SQLITE_OK ){ + walDisableBlocking(pWal); + rc = walIndexReadHdr(pWal, &isChanged); + (void)walEnableBlocking(pWal); + if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ + sqlite3OsUnfetch(pWal->pDbFd, 0, 0); + } } - - /* If no error occurred, set the output variables. */ - if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ - if( pnLog ){ - *pnLog = walidxGetMxFrame(&pWal->hdr,0)+walidxGetMxFrame(&pWal->hdr,1); + + /* Copy data from the log to the database file. */ + if( rc==SQLITE_OK ){ - if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ ++ int iCkpt = walidxGetFile(&pWal->hdr); ++ ++ if( (walPagesize(pWal)!=nBuf) ++ && ((pWal->hdr.mxFrame2 & 0x7FFFFFFF) || pWal->hdr.mxFrame) ++ ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags,zBuf); } - if( pnCkpt ){ - if( isWalMode2(pWal) ){ - if( (int)(walCkptInfo(pWal)->nBackfill) ){ - *pnCkpt = walidxGetMxFrame(&pWal->hdr, iCkpt); - ++ + /* If no error occurred, set the output variables. */ + if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ - if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; ++ if( pnLog ){ ++ WalIndexHdr *pHdr = &pWal->hdr; ++ *pnLog = walidxGetMxFrame(pHdr, 0) + walidxGetMxFrame(pHdr, 1); ++ } + SEH_INJECT_FAULT; - if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); ++ if( pnCkpt ){ ++ if( isWalMode2(pWal) ){ ++ if( (int)(walCkptInfo(pWal)->nBackfill) ){ ++ *pnCkpt = walidxGetMxFrame(&pWal->hdr, iCkpt); ++ }else{ ++ *pnCkpt = 0; ++ } + }else{ - *pnCkpt = 0; ++ *pnCkpt = walCkptInfo(pWal)->nBackfill; + } - }else{ - *pnCkpt = walCkptInfo(pWal)->nBackfill; + } } } } + SEH_EXCEPT( rc = walHandleException(pWal); ) - if( isChanged ){ + if( isChanged && pWal->bClosing==0 ){ /* If a new wal-index header was loaded before the checkpoint was ** performed, then the pager-cache associated with pWal is now ** out of date. So zero the cached wal-index header to ensure that @@@ -4874,14 -4325,15 +5183,16 @@@ int sqlite3WalExclusiveMode(Wal *pWal, ** locks are taken in this case). Nor should the pager attempt to ** upgrade to exclusive-mode following such an error. */ + #ifndef SQLITE_USE_SEH - assert( pWal->readLock>=0 || pWal->lockError ); + assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError ); + #endif - assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) ); + assert( pWal->readLock!=WAL_LOCK_NONE || (op<=0 && pWal->exclusiveMode==0) ); if( op==0 ){ - if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){ + if( pWal->exclusiveMode ){ pWal->exclusiveMode = WAL_NORMAL_MODE; - if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ + rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + if( rc!=SQLITE_OK ){ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; } rc = pWal->exclusiveMode==WAL_NORMAL_MODE; @@@ -4979,20 -4428,19 +5290,23 @@@ int sqlite3_snapshot_cmp(sqlite3_snapsh */ int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){ int rc; + + /* Snapshots may not be used with wal2 mode databases. */ + if( isWalMode2(pWal) ) return SQLITE_ERROR; + - rc = walLockShared(pWal, WAL_CKPT_LOCK); - if( rc==SQLITE_OK ){ - WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot; - if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) - || pNew->mxFramenBackfillAttempted - ){ - rc = SQLITE_ERROR_SNAPSHOT; - walUnlockShared(pWal, WAL_CKPT_LOCK); + SEH_TRY { + rc = walLockShared(pWal, WAL_CKPT_LOCK); + if( rc==SQLITE_OK ){ + WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot; + if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) + || pNew->mxFramenBackfillAttempted + ){ + rc = SQLITE_ERROR_SNAPSHOT; + walUnlockShared(pWal, WAL_CKPT_LOCK); + } } } - SEH_EXCEPT( rc = walHandleException(pWal); ) ++ SEH_EXCEPT( rc = walHandleException(pWal) ); return rc; }