From: dan Date: Sat, 25 Jul 2020 20:16:27 +0000 (+0000) Subject: Allow a wal mode recovery to proceed even if there are readers. X-Git-Tag: version-3.33.0~37^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d3e38b7c0ea40acedc5fc3d95bb86baebd77cb82;p=thirdparty%2Fsqlite.git Allow a wal mode recovery to proceed even if there are readers. FossilOrigin-Name: 74374aebf9abf3d6b6a3920967a079ceaa4c6276dc6c177682742c2be405b7b7 --- diff --git a/manifest b/manifest index 80b02e2e4b..78768b9243 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sa\ssurplus\sspace\sfrom\sa\scomment -D 2020-07-24T11:01:29.118 +C Allow\sa\swal\smode\srecovery\sto\sproceed\seven\sif\sthere\sare\sreaders. +D 2020-07-25T20:16:27.306 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -619,7 +619,7 @@ F src/vdbetrace.c fa3bf238002f0bbbdfb66cc8afb0cea284ff9f148d6439bc1f6f2b4c3b7143 F src/vdbevtab.c ee5b4c902fdda2230f9503ac7b84c6d614c91e8f6f4dc1633e2e8dfef8ffb144 F src/vtab.c 5f5fc793092f53bbdfde296c50f563fb7bda58cf48e9cf6a8bdfbc5abd409845 F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c 231044ecf7d5d78bc705af9dcec6c10ec59e891366362b6be54bb6a0bc7c17db +F src/wal.c 477491b1996746fb197ac44cb7e5e2bdc14645e0663c6422445534254bee37fe F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a F src/walker.c 3df26a33dc4f54e8771600fb7fdebe1ece0896c2ad68c30ab40b017aa4395049 F src/where.c 2ea911238674e9baaeddf105dddabed92692a01996073c4d4983f9a7efe481f9 @@ -1878,7 +1878,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 270ac1a0f232d75537be40abae559004e950b992cb2c7e94cd6de66e96ae17bd -R 0212ebf857b7a36fab6e1708725a20d6 -U drh -Z 7c8911d20a18c22808c4eb3363f6ff12 +P 73fecc688ab5c459245c9bb89432139a083ef378404b4dae3397a7f4e253f72a +R 7245f53be8944a8b99a3f69058418f3e +T *branch * unlocked-recovery +T *sym-unlocked-recovery * +T -sym-trunk * +U dan +Z c489f8aaf2222395682cc1ddd5e61e86 diff --git a/manifest.uuid b/manifest.uuid index ef6483f2d6..6e841e436f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -73fecc688ab5c459245c9bb89432139a083ef378404b4dae3397a7f4e253f72a \ No newline at end of file +74374aebf9abf3d6b6a3920967a079ceaa4c6276dc6c177682742c2be405b7b7 \ No newline at end of file diff --git a/src/wal.c b/src/wal.c index c6d4476e7b..9ad0b496e2 100644 --- a/src/wal.c +++ b/src/wal.c @@ -1161,12 +1161,6 @@ static int walIndexRecover(Wal *pWal){ assert( pWal->writeLock ); iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); - if( rc==SQLITE_OK ){ - rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); - if( rc!=SQLITE_OK ){ - walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); - } - } if( rc ){ return rc; } @@ -1182,15 +1176,16 @@ static int walIndexRecover(Wal *pWal){ if( nSize>WAL_HDRSIZE ){ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u32 *aPrivate = 0; /* Heap copy of *-shm hash being populated */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int szFrame; /* Number of bytes in buffer aFrame[] */ u8 *aData; /* Pointer to data part of aFrame buffer */ - int iFrame; /* Index of last frame read */ - i64 iOffset; /* Next offset to read from log file */ int szPage; /* Page size according to the log */ u32 magic; /* Magic value read from WAL header */ u32 version; /* Magic value read from WAL header */ int isValid; /* True if this frame is valid */ + int iPg; /* Current 32KB wal-index page */ + int iLastFrame; /* Last frame in wal, based on nSize alone */ /* Read in the WAL header. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); @@ -1237,38 +1232,59 @@ static int walIndexRecover(Wal *pWal){ /* Malloc a buffer to read frames into. */ szFrame = szPage + WAL_FRAME_HDRSIZE; - aFrame = (u8 *)sqlite3_malloc64(szFrame); + aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ); if( !aFrame ){ rc = SQLITE_NOMEM_BKPT; goto recovery_error; } aData = &aFrame[WAL_FRAME_HDRSIZE]; + aPrivate = (u32*)&aData[szPage]; /* Read all frames from the log file. */ - iFrame = 0; - for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ - u32 pgno; /* Database page number for frame */ - u32 nTruncate; /* dbsize field from frame header */ - - /* Read and decode the next log frame. */ - iFrame++; - rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); - if( rc!=SQLITE_OK ) break; - isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); - if( !isValid ) break; - rc = walIndexAppend(pWal, iFrame, pgno); - if( rc!=SQLITE_OK ) break; - - /* If nTruncate is non-zero, this is a commit record. */ - if( nTruncate ){ - pWal->hdr.mxFrame = iFrame; - pWal->hdr.nPage = nTruncate; - pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); - testcase( szPage<=32768 ); - testcase( szPage>=65536 ); - aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; - aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + iLastFrame = (nSize - WAL_HDRSIZE) / szFrame; + for(iPg=0; iPg<=walFramePage(iLastFrame); iPg++){ + u32 *aShare; + int iFrame; /* Index of last frame read */ + int iLast = MIN(iLastFrame, HASHTABLE_NPAGE_ONE+iPg*HASHTABLE_NPAGE); + int iFirst = 1 + (iPg==0?0:HASHTABLE_NPAGE_ONE+(iPg-1)*HASHTABLE_NPAGE); + rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare); + if( rc ) break; + pWal->apWiData[iPg] = aPrivate; + + for(iFrame=iFirst; iFrame<=iLast; iFrame++){ + i64 iOffset = walFrameOffset(iFrame, szPage); + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); + if( !isValid ) break; + rc = walIndexAppend(pWal, iFrame, pgno); + if( rc!=SQLITE_OK ) break; + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + pWal->hdr.mxFrame = iFrame; + pWal->hdr.nPage = nTruncate; + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; + aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + } } + pWal->apWiData[iPg] = aShare; + + { + int nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0); + int nHdr32 = nHdr / sizeof(u32); + if( memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr) ){ + memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr); + } + } + if( iFrame<=iLast ) break; } sqlite3_free(aFrame); @@ -1283,15 +1299,24 @@ finished: walIndexWriteHdr(pWal); /* Reset the checkpoint-header. This is safe because this thread is - ** currently holding locks that exclude all other readers, writers and - ** checkpointers. + ** currently holding locks that exclude all other writers and + ** checkpointers. Then set the values of read-mark slots 1 through N. */ pInfo = walCkptInfo(pWal); pInfo->nBackfill = 0; pInfo->nBackfillAttempted = pWal->hdr.mxFrame; pInfo->aReadMark[0] = 0; - for(i=1; iaReadMark[i] = READMARK_NOT_USED; - if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; + for(i=1; ihdr.mxFrame ){ + pInfo->aReadMark[i] = pWal->hdr.mxFrame; + }else{ + pInfo->aReadMark[i] = READMARK_NOT_USED; + } + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + } + } /* If more than one frame was recovered from the log file, report an ** event via sqlite3_log(). This is to help with identifying performance @@ -1309,7 +1334,6 @@ finished: recovery_error: WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); - walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); return rc; }