From: dan Date: Tue, 11 Dec 2018 17:56:23 +0000 (+0000) Subject: Change the way wal2 locks work to ensure a reader only ever has to lock a X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f17e510ee599c7f5521ab8f2ea49563b0010c17e;p=thirdparty%2Fsqlite.git Change the way wal2 locks work to ensure a reader only ever has to lock a single slot. FossilOrigin-Name: 18b2c23ac53d985ccc5798ea2d92fb75644b857c373fb490e0d04d5d0194a3d5 --- diff --git a/manifest b/manifest index 1e2d309518..b182fb73af 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\sinto\sthis\sbranch. -D 2018-12-11T13:44:59.901 +C Change\sthe\sway\swal2\slocks\swork\sto\sensure\sa\sreader\sonly\sever\shas\sto\slock\sa\nsingle\sslot. +D 2018-12-11T17:56:23.255 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F Makefile.in 68d0ba0f0b533d5bc84c78c13a6ce84ee81183a67014caa47a969e67f028fa1c @@ -590,7 +590,7 @@ F src/vdbesort.c 90aad5a92608f2dd771c96749beabdb562c9d881131a860a7a5bccf66dc3be7 F src/vdbetrace.c 79d6dbbc479267b255a7de8080eee6e729928a0ef93ed9b0bfa5618875b48392 F src/vtab.c 70188a745dc4e57d26e942681ff4b2912b7c8249ad5de3f60f0677b4337bcfaa F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c c01f62932ae8458e77aed8846e819e167a52224c0a13f972c9e7d90354205911 +F src/wal.c 22850fcd2efa9b13e237f6097f931fe7dbd9e50f401a659288d70ae3951396b8 F src/wal.h d2a69695c84137f76e19a247a342cb02ab0131001b6f58153d94b71195bbd84d F src/walker.c fb94aadc9099ff9c6506d0a8b88d51266005bcaa265403f3d7caf732a562eb66 F src/where.c 3818e8a736a05d2cb194e64399af707e367fbcc5c251d785804d02eaf121288e @@ -1597,7 +1597,7 @@ F test/vtab_shared.test 5253bff2355a9a3f014c15337da7e177ab0ef8ad F test/wal.test 613efec03e517e1775d86b993a54877d2e29a477 F test/wal2.test 155b9efa999bdb38ce1cd729b9a4fcdbffd6b88be27f039bad1d2929d287d918 F test/wal2rewrite.test 6ca6f631ffcf871240beab5f02608913fd075c6d0d31310b026c8383c65c9f9c -F test/wal2simple.test 8c9dfb8f1bca01a0deb57f7074cdb83865c2292e89b13f7a51a1c160dca3f5f4 +F test/wal2simple.test ce30899ab9c396d0d91905f401bb9a591f12bdf0b6b6bd5d1a62b6f560eac6e3 F test/wal2snapshot.test 95a919e1c73dee0e0212d10931d03cc1116f68a0ff603163e551aaa5ac7025c1 F test/wal3.test 2a93004bc0fb2b5c29888964024695bade278ab2 F test/wal4.test 4744e155cd6299c6bd99d3eab1c82f77db9cdb3c @@ -1787,7 +1787,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P f6baa7e1163ed5f61375b0554337030fec23e8a9f60c6412e1b5d626961e93f9 d1db8d5894450b24bb0335983503d9bbf6cc48a0ae4b83291283fb2d32b6b25b -R 01aa34cbfc00cc4424502da843942732 +P d8dd98a39ea061dea264a7d81153f7b1be2f81b554b30d0ce289897c802209bd +R 7649521e36559a5f6fcaed801161c713 U dan -Z d3ebc7a4959beab3fb6b05d6d91630aa +Z 45541997f2aea4861e4215e9e1638ee2 diff --git a/manifest.uuid b/manifest.uuid index 6216c305ae..5aa435a1c6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d8dd98a39ea061dea264a7d81153f7b1be2f81b554b30d0ce289897c802209bd \ No newline at end of file +18b2c23ac53d985ccc5798ea2d92fb75644b857c373fb490e0d04d5d0194a3d5 \ No newline at end of file diff --git a/src/wal.c b/src/wal.c index ee5d2e1779..634de7ec6f 100644 --- a/src/wal.c +++ b/src/wal.c @@ -362,7 +362,6 @@ ** recovery procedure still takes the same exclusive lock on the entire ** range of SQLITE_SHM_NLOCK shm-locks. This works because the read-locks ** above use four of the six read-locking slots used by legacy wal mode. -** See the header comment for function walLockReader() for details. ** ** STARTUP/RECOVERY ** @@ -473,16 +472,20 @@ int sqlite3WalTrace = 0; ** is held, or else is the index of the read-mark on which a lock is ** held. ** -** In wal2 mode, Wal.readLock must be set to one of the following values. -** A value of -1 still indicates that no read-lock is held, but the other -** values are symbolic. See the implementation of walLockReader() for -** details of how the symbols map to OS level locks. +** In wal2 mode, a value of -1 still indicates that no read-lock is held. +** And a non-zero value still represents the index of the read-mark on +** which a lock is held. There are two differences: +** +** 1. wal2 mode never uses read-mark 0. +** +** 2. locks on each read-mark have a different interpretation, as +** indicated by the symbolic names below. */ #define WAL_LOCK_NONE -1 #define WAL_LOCK_PART1 1 #define WAL_LOCK_PART1_FULL2 2 -#define WAL_LOCK_PART2 3 -#define WAL_LOCK_PART2_FULL1 4 +#define WAL_LOCK_PART2_FULL1 3 +#define WAL_LOCK_PART2 4 /* ** This constant is used in wal2 mode only. @@ -1111,36 +1114,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ walLockName(lockIdx), n)); } -/* -** This function is used to take and release read-locks in wal2 mode. -** -** Use of WAL_READ_LOCK(x) slots for (1<=x<=4). -** -** 1) Partial read of *-wal-1 (blocks checkpointer from checkpointing) -** 2) Full read of *-wal-2 (blocks writer from writing) -** 3) Partial read of *-wal-2 (blocks checkpointer from checkpointing) -** 4) Full read of *-wal-1 (blocks writer from writing) -*/ -static int walLockReader(Wal *pWal, int eLock, int bLock){ - int i; /* Index of first readmark to lock */ - int n; /* Number of readmarks to lock */ - - assert( pWal->hdr.iVersion==WAL_VERSION2 ); - if( pWal->exclusiveMode ) return SQLITE_OK; - - switch( eLock ){ - case WAL_LOCK_PART1 : i = 1; n = 1; break; - case WAL_LOCK_PART1_FULL2: i = 1; n = 2; break; - case WAL_LOCK_PART2 : i = 3; n = 1; break; - case WAL_LOCK_PART2_FULL1: i = 3; n = 2; break; - default: assert( !"cannot happen" ); - } - - return sqlite3OsShmLock(pWal->pDbFd, WAL_READ_LOCK(i), n, - SQLITE_SHM_SHARED | (bLock ? SQLITE_SHM_LOCK : SQLITE_SHM_UNLOCK) - ); -} - /* ** Compute a hash on a page number. The resulting hash value must land ** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances @@ -2251,6 +2224,68 @@ static void walRestartHdr(Wal *pWal, u32 salt1){ assert( pInfo->aReadMark[0]==0 ); } +/* +** This function is used in wal2 mode. +** +** This function is called when writer pWal is just about to start +** writing out frames. Parameter iApp is the current wal file. The "other" wal +** file (wal file !iApp) has been fully checkpointed. This function returns +** SQLITE_OK if there are no readers preventing the writer from switching to +** the other wal file. Or SQLITE_BUSY if there are. +*/ +static int wal2RestartOk(Wal *pWal, int iApp){ + /* The other wal file (wal file !iApp) can be overwritten if there + ** are no readers reading from it - no "full" or "partial" locks. + ** Technically speaking it is not possible for any reader to hold + ** a "part" lock, as this would have prevented the file from being + ** checkpointed. But checking anyway doesn't hurt. The following + ** is equivalent to: + ** + ** if( iApp==0 ) eLock = WAL_LOCK_PART1_FULL2; + ** if( iApp==1 ) eLock = WAL_LOCK_PART1; + */ + int eLock = 1 + (iApp==0); + + assert( WAL_LOCK_PART1==1 ); + assert( WAL_LOCK_PART1_FULL2==2 ); + assert( WAL_LOCK_PART2_FULL1==3 ); + assert( WAL_LOCK_PART2==4 ); + + assert( iApp!=0 || eLock==WAL_LOCK_PART1_FULL2 ); + assert( iApp!=1 || eLock==WAL_LOCK_PART1 ); + + return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 3); +} +static void wal2RestartFinished(Wal *pWal, int iApp){ + walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iApp==0)), 3); +} + +/* +** This function is used in wal2 mode. +** +** This function is called when a checkpointer wishes to checkpoint wal +** file iCkpt. It takes the required lock and, if successful, returns +** SQLITE_OK. Otherwise, an SQLite error code (e.g. SQLITE_BUSY). If this +** function returns SQLITE_OK, it is the responsibility of the caller +** to invoke wal2CheckpointFinished() to release the lock. +*/ +static int wal2CheckpointOk(Wal *pWal, int iCkpt){ + int eLock = 1 + (iCkpt*2); + + assert( WAL_LOCK_PART1==1 ); + assert( WAL_LOCK_PART1_FULL2==2 ); + assert( WAL_LOCK_PART2_FULL1==3 ); + assert( WAL_LOCK_PART2==4 ); + + assert( iCkpt!=0 || eLock==WAL_LOCK_PART1 ); + assert( iCkpt!=1 || eLock==WAL_LOCK_PART2_FULL1 ); + + return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 2); +} +static void wal2CheckpointFinished(Wal *pWal, int iCkpt){ + walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iCkpt*2)), 2); +} + /* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. @@ -2318,7 +2353,7 @@ static int walCheckpoint( ** preventing this checkpoint operation. If one is found, return ** early. */ if( bWal2 ){ - rc = walLockExclusive(pWal, WAL_READ_LOCK(1 + iCkpt*2), 1); + rc = wal2CheckpointOk(pWal, iCkpt); if( rc!=SQLITE_OK ) return rc; } @@ -2367,9 +2402,9 @@ static int walCheckpoint( assert( rc==SQLITE_OK || pIter==0 ); } - if( pIter - && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK - ){ + if( pIter && (bWal2 + || (rc = walBusyLock(pWal, xBusy, pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK + )){ u32 nBackfill = pInfo->nBackfill; assert( bWal2==0 || nBackfill==0 ); @@ -2434,7 +2469,9 @@ static int walCheckpoint( } /* Release the reader lock held while backfilling */ - walUnlockExclusive(pWal, WAL_READ_LOCK(bWal2 ? 1 + iCkpt*2 : 0), 1); + if( bWal2==0 ){ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + } } if( rc==SQLITE_BUSY ){ @@ -2442,6 +2479,7 @@ static int walCheckpoint( ** just because there are active readers. */ rc = SQLITE_OK; } + if( bWal2 ) wal2CheckpointFinished(pWal, iCkpt); } /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the @@ -3060,18 +3098,27 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ assert( pWal->apWiData[0]!=0 ); pInfo = walCkptInfo(pWal); if( isWalMode2(pWal) ){ - int eLock = 1 + (walidxGetFile(&pWal->hdr)*2); - if( pInfo->nBackfill==0 ){ - eLock += walidxGetMxFrame(&pWal->hdr, !walidxGetFile(&pWal->hdr))>0; - } - rc = walLockReader(pWal, eLock, 1); + /* This connection needs a "part" lock on the current wal file and, + ** unless pInfo->nBackfill is set to indicate that it has already been + ** checkpointed, a "full" lock on the other wal file. */ + int iWal = walidxGetFile(&pWal->hdr); + int nBackfill = pInfo->nBackfill || walidxGetMxFrame(&pWal->hdr, !iWal)==0; + int eLock = 1 + (iWal*2) + (nBackfill==iWal); + + assert( nBackfill==0 || nBackfill==1 ); + assert( iWal==0 || iWal==1 ); + assert( iWal!=0 || nBackfill!=1 || eLock==WAL_LOCK_PART1 ); + assert( iWal!=0 || nBackfill!=0 || eLock==WAL_LOCK_PART1_FULL2 ); + assert( iWal!=1 || nBackfill!=1 || eLock==WAL_LOCK_PART2 ); + assert( iWal!=1 || nBackfill!=0 || eLock==WAL_LOCK_PART2_FULL1 ); + + rc = walLockShared(pWal, WAL_READ_LOCK(eLock)); if( rc!=SQLITE_OK ){ return rc; } - walShmBarrier(pWal); if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ - walLockReader(pWal, eLock, 0); + walUnlockShared(pWal, WAL_READ_LOCK(eLock)); return WAL_RETRY; }else{ pWal->readLock = eLock; @@ -3403,11 +3450,7 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ void sqlite3WalEndReadTransaction(Wal *pWal){ sqlite3WalEndWriteTransaction(pWal); if( pWal->readLock!=WAL_LOCK_NONE ){ - if( isWalMode2(pWal) ){ - (void)walLockReader(pWal, pWal->readLock, 0); - }else{ - walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - } + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->readLock = WAL_LOCK_NONE; } } @@ -3794,33 +3837,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ return rc; } -/* -** This function is used in wal2 mode. -** -** This function is called when writer pWal is just about to start -** writing out frames. The "other" wal file (wal file !pWal->hdr.iAppend) -** has been fully checkpointed. This function returns SQLITE_OK if there -** are no readers preventing the writer from switching to the other wal -** file. Or SQLITE_BUSY if there are. -*/ -static int walRestartOk(Wal *pWal){ - int rc; /* Return code */ - int iApp = walidxGetFile(&pWal->hdr); /* Current WAL file */ - - /* No reader can be doing a "partial" read of wal file !iApp - in that - ** case it would not have been possible to checkpoint the file. So - ** it is only necessary to test for "full" readers. See the comment - ** above walLockReader() function for exactly what this means in terms - ** of locks. */ - int i = (iApp==0) ? 2 : 4; - - rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - } - return rc; -} - /* ** This function is called just before writing a set of frames to the log ** file (see sqlite3WalFrames()). It checks to see if, instead of appending @@ -3848,19 +3864,20 @@ static int walRestartLog(Wal *pWal){ if( walidxGetMxFrame(&pWal->hdr, iApp)>=nWalSize ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); if( walidxGetMxFrame(&pWal->hdr, !iApp)==0 || pInfo->nBackfill ){ - rc = walRestartOk(pWal); + rc = wal2RestartOk(pWal, iApp); if( rc==SQLITE_OK ){ - iApp = !iApp; + int iNew = !iApp; pWal->nCkpt++; - walidxSetFile(&pWal->hdr, iApp); - walidxSetMxFrame(&pWal->hdr, iApp, 0); + walidxSetFile(&pWal->hdr, iNew); + walidxSetMxFrame(&pWal->hdr, iNew, 0); sqlite3Put4byte((u8*)&pWal->hdr.aSalt[0], pWal->hdr.aFrameCksum[0]); sqlite3Put4byte((u8*)&pWal->hdr.aSalt[1], pWal->hdr.aFrameCksum[1]); walIndexWriteHdr(pWal); pInfo->nBackfill = 0; - walLockReader(pWal, pWal->readLock, 0); - pWal->readLock = iApp ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2; - rc = walLockReader(pWal, pWal->readLock, 1); + wal2RestartFinished(pWal, iApp); + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->readLock = iNew ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2; + rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock)); }else if( rc==SQLITE_BUSY ){ rc = SQLITE_OK; } @@ -4490,11 +4507,7 @@ int sqlite3WalExclusiveMode(Wal *pWal, int op){ if( op==0 ){ if( pWal->exclusiveMode ){ pWal->exclusiveMode = WAL_NORMAL_MODE; - if( isWalMode2(pWal) ){ - rc = walLockReader(pWal, pWal->readLock, 1); - }else{ - rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - } + rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock)); if( rc!=SQLITE_OK ){ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; } @@ -4506,11 +4519,7 @@ int sqlite3WalExclusiveMode(Wal *pWal, int op){ }else if( op>0 ){ assert( pWal->exclusiveMode==WAL_NORMAL_MODE ); assert( pWal->readLock>=0 ); - if( isWalMode2(pWal) ){ - walLockReader(pWal, pWal->readLock, 0); - }else{ - walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - } + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; rc = 1; }else{ diff --git a/test/wal2simple.test b/test/wal2simple.test index becbb232fc..28e13bf162 100644 --- a/test/wal2simple.test +++ b/test/wal2simple.test @@ -243,8 +243,6 @@ do_test 6.1 { } } {} -puts "[file size test.db-wal] [file size test.db-wal2]" - do_test 6.2.1 { foreach f [glob -nocomplain test.db2*] { forcedelete $f } forcecopy test.db-wal2 test.db2-wal2