From 11caf4f4b73736f6b209d591d652c7e0398aa024 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 4 Nov 2017 18:10:03 +0000 Subject: [PATCH] In cases where a readonly_shm client cannot take the DMS lock on the *-shm file, have it parse the wal file and create a wal-index to access it in heap memory. FossilOrigin-Name: 18b268433d739486eac1b04947bd418655e4bc56e8dc63ffa558aa4552a32e30 --- manifest | 16 ++--- manifest.uuid | 2 +- src/wal.c | 176 +++++++++++++++++++++++++++++++++++++++++++---- test/walro.test | 10 +-- test/walro2.test | 19 +++-- 5 files changed, 192 insertions(+), 31 deletions(-) diff --git a/manifest b/manifest index 5ad595579c..7e5b9e5a86 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\stest\scases\sin\swal2.test\sbroken\sby\sthe\slocking\schange\sin\sthe\sprevious\ncommit. -D 2017-11-02T18:57:46.036 +C In\scases\swhere\sa\sreadonly_shm\sclient\scannot\stake\sthe\sDMS\slock\son\sthe\s*-shm\nfile,\shave\sit\sparse\sthe\swal\sfile\sand\screate\sa\swal-index\sto\saccess\sit\sin\sheap\nmemory. +D 2017-11-04T18:10:03.528 F Makefile.in 5bae3f2f3d42f2ad52b141562d74872c97ac0fca6c54953c91bb150a0e6427a8 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 3a5cb477ec3ce5274663b693164e349db63348667cd45bad78cc13d580b691e2 @@ -543,7 +543,7 @@ F src/vdbesort.c 731a09e5cb9e96b70c394c1b7cf3860fbe84acca7682e178615eb941a3a0ef2 F src/vdbetrace.c 48e11ebe040c6b41d146abed2602e3d00d621d7ebe4eb29b0a0f1617fd3c2f6c F src/vtab.c 0e4885495172e1bdf54b12cce23b395ac74ef5729031f15e1bc1e3e6b360ed1a F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c 38480e7bdc697cf88a13a22ffe60f7bd761bc02b45f7a323f1bb9e61a136b3ae +F src/wal.c 2b287b5250e89d548c6bbd1d204d0db41046bb3984b9b4a79fc84e22359f1beb F src/wal.h 8de5d2d3de0956d6f6cb48c83a4012d5f227b8fe940f3a349a4b7e85ebcb492a F src/walker.c d591e8a9ccf60abb010966b354fcea4aa08eba4d83675c2b281a8764c76cc22f F src/where.c b7a075f5fb3d912a891dcc3257f538372bb4a1622dd8ca7d752ad95ce8949ba4 @@ -1526,8 +1526,8 @@ F test/walnoshm.test 84ca10c544632a756467336b7c3b864d493ee496 F test/waloverwrite.test dad2f26567f1b45174e54fbf9a8dc1cb876a7f03 F test/walpersist.test 8c6b7e3ec1ba91b5e4dc4e0921d6d3f87cd356a6 F test/walprotocol.test 0b92feb132ccebd855494d917d3f6c2d717ace20 -F test/walro.test e492598baa8cd7777fef6203f6fe922c20cd691cc19e60ccd0dd0dbc68394d0a -F test/walro2.test 23fea1e7abae13072b0640ef846d32080b2fc435658d4c4eb9db266b07b33776 +F test/walro.test 906586c3ae7a991d8c840ceed92400aee21a0a3e4155ce7c4220399777311552 +F test/walro2.test 611ceebd190edeca9bf39e5068cbc864f15294371b4acf9ee837db477840af54 F test/walshared.test 0befc811dcf0b287efae21612304d15576e35417 F test/walslow.test c05c68d4dc2700a982f89133ce103a1a84cc285f F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e @@ -1668,7 +1668,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 5190d84a296b7cf716ef43bf7b6d4d351ef1a4d650de37dc01a5ab333da7c05d -R 30693e487c23aaa3c080ebb269e4e38f +P f569c3517234881f9425075aab65a32ffd0deb8e793f421a241d8cca881da33f +R 10fc0a1645ce290a2a283e11360fe887 U dan -Z a5242a9fca30f5b5641106b18cdb01ae +Z 79ea7f3203670e23f752126d91be88fd diff --git a/manifest.uuid b/manifest.uuid index a8bcfdb1fe..e8b4f82532 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f569c3517234881f9425075aab65a32ffd0deb8e793f421a241d8cca881da33f \ No newline at end of file +18b268433d739486eac1b04947bd418655e4bc56e8dc63ffa558aa4552a32e30 \ No newline at end of file diff --git a/src/wal.c b/src/wal.c index 28fade96b0..17e253a8b6 100644 --- a/src/wal.c +++ b/src/wal.c @@ -455,6 +455,7 @@ struct Wal { u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ + u8 bUnlocked; WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ @@ -1270,13 +1271,14 @@ recovery_error: ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ - if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ + if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bUnlocked ){ int i; for(i=0; inWiData; i++){ sqlite3_free((void *)pWal->apWiData[i]); pWal->apWiData[i] = 0; } - }else{ + } + if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){ sqlite3OsShmUnmap(pWal->pDbFd, isDelete); } } @@ -2091,15 +2093,13 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ */ assert( pChanged ); rc = walIndexPage(pWal, 0, &page0); + if( rc==SQLITE_READONLY_CANTLOCK ){ + assert( page0==0 && pWal->writeLock==0 ); + pWal->bUnlocked = 1; + pWal->exclusiveMode = WAL_HEAPMEMORY_MODE; + *pChanged = 1; + }else if( rc!=SQLITE_OK ){ - if( rc==SQLITE_READONLY_CANTLOCK -#ifdef SQLITE_ENABLE_SNAPSHOT - && pWal->pSnapshot==0 -#endif - ){ - memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); - rc = SQLITE_OK; - } return rc; }; assert( page0 || pWal->writeLock==0 ); @@ -2116,7 +2116,7 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ */ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr ){ - if( pWal->readOnly & WAL_SHM_RDONLY ){ + if( pWal->bUnlocked==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){ if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; @@ -2146,6 +2146,12 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ rc = SQLITE_CANTOPEN_BKPT; } + if( pWal->bUnlocked ){ + if( rc!=SQLITE_OK ){ + walIndexClose(pWal, 0); + } + pWal->exclusiveMode = WAL_NORMAL_MODE; + } return rc; } @@ -2156,6 +2162,144 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ */ #define WAL_RETRY (-1) +/* +** Open an "unlocked" transaction. An unlocked transaction is a read +** transaction used by a read-only client in cases where the *-shm +** file cannot be mapped and its contents cannot be trusted. It is +** assumed that the *-wal file has been read and that a wal-index +** constructed in heap memory is currently available in Wal.apWiData[]. +** +** If this function returns SQLITE_OK, then the read transaction has +** been successfully opened. In this case output variable (*pChanged) +** is set to true before returning if the caller should discard the +** contents of the page cache before proceeding. Or, if it returns +** WAL_RETRY, then the heap memory wal-index has been discarded and +** the caller should retry opening the read transaction from the +** beginning (including attempting to map the *-shm file). +** +** If an error occurs, an SQLite error code is returned. +*/ +static int walBeginUnlocked(Wal *pWal, int *pChanged){ + i64 szWal; /* Size of wal file on disk in bytes */ + i64 iOffset; /* Current offset when reading wal file */ + u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int szFrame; /* Number of bytes in buffer aFrame[] */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + volatile void *pDummy; /* Dummy argument for xShmMap */ + int rc; /* Return code */ + u32 aSaveCksum[2]; /* Saved copy of pWal->hdr.aFrameCksum */ + + assert( pWal->bUnlocked ); + assert( pWal->readOnly & WAL_SHM_RDONLY ); + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + + /* Take WAL_READ_LOCK(0). This has the effect of preventing any + ** live clients from running a checkpoint, but does not stop them + ** from running recovery. */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + if( rc!=SQLITE_OK ){ + return (rc==SQLITE_BUSY ? WAL_RETRY : rc); + } + pWal->readLock = 0; + + /* Try to map the *-shm file again. If it succeeds this time, then + ** a non-readonly_shm connection has already connected to the database. + ** In this case, start over with opening the transaction. + ** + ** The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint + ** from taking place. But it does not prevent the wal from being wrapped + ** if a checkpoint has already taken place. This means that if another + ** client is connected at this point, it may have already checkpointed + ** the entire wal. In that case it would not be safe to continue with + ** the unlocked transaction, as the other client may overwrite wal + ** frames that this client is still using. */ + rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); + if( rc!=SQLITE_READONLY_CANTLOCK ){ + assert( rc!=SQLITE_OK ); + rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc); + goto begin_unlocked_out; + } + + memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); + rc = sqlite3OsFileSize(pWal->pWalFd, &szWal); + if( rc!=SQLITE_OK || (szWalhdr.mxFrame==0) ){ + /* If the wal file is too small to contain a wal-header and the + ** wal-index header has mxFrame==0, then it must be safe to proceed + ** reading the database file only. However, the page cache cannot + ** be trusted, as a read/write connection may have connected, written + ** the db, run a checkpoint, truncated the wal file and disconnected + ** since this client's last read transaction. */ + *pChanged = 1; + goto begin_unlocked_out; + } + + /* Check the salt keys at the start of the wal file still match. */ + rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + goto begin_unlocked_out; + } + if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){ + rc = WAL_RETRY; + goto begin_unlocked_out; + } + + /* Allocate a buffer to read frames into */ + szFrame = pWal->hdr.szPage + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc64(szFrame); + if( aFrame==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto begin_unlocked_out; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + + aSaveCksum[0] = pWal->hdr.aFrameCksum[0]; + aSaveCksum[1] = pWal->hdr.aFrameCksum[1]; + for(iOffset=walFrameOffset(pWal->hdr.mxFrame+1, pWal->hdr.szPage); + iOffset+szFrame<=szWal; + iOffset+=szFrame + ){ + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_IOERR_SHORT_READ ){ + /* If this branch is taken, some other client has truncated the + ** *-wal file since the call to sqlite3OsFileSize() above. This + ** indicates that a read-write client has connected to the system. + ** So retry opening this read transaction. */ + rc = WAL_RETRY; + } + break; + } + if( !walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame) ) break; + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + rc = WAL_RETRY; + break; + } + } + pWal->hdr.aFrameCksum[0] = aSaveCksum[0]; + pWal->hdr.aFrameCksum[1] = aSaveCksum[1]; + + begin_unlocked_out: + sqlite3_free(aFrame); + if( rc!=SQLITE_OK ){ + int i; + for(i=0; inWiData; i++){ + sqlite3_free((void*)pWal->apWiData[i]); + pWal->apWiData[i] = 0; + } + pWal->bUnlocked = 0; + sqlite3WalEndReadTransaction(pWal); + *pChanged = 1; + } + return rc; +} + /* ** Attempt to start a read transaction. This might fail due to a race or ** other transient condition. When that happens, it returns WAL_RETRY to @@ -2244,7 +2388,10 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ } if( !useWal ){ - rc = walIndexReadHdr(pWal, pChanged); + assert( rc==SQLITE_OK ); + if( pWal->bUnlocked==0 ){ + rc = walIndexReadHdr(pWal, pChanged); + } if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to @@ -2273,6 +2420,9 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ if( rc!=SQLITE_OK ){ return rc; } + else if( pWal->bUnlocked ){ + return walBeginUnlocked(pWal, pChanged); + } } assert( pWal->nWiData>0 ); @@ -2626,7 +2776,7 @@ int sqlite3WalFindFrame( ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ - if( iLast==0 || pWal->readLock==0 ){ + if( iLast==0 || (pWal->readLock==0 && pWal->bUnlocked==0) ){ *piRead = 0; return SQLITE_OK; } diff --git a/test/walro.test b/test/walro.test index 09a33bbd3e..150344e151 100644 --- a/test/walro.test +++ b/test/walro.test @@ -105,7 +105,7 @@ do_multiclient_test tn { do_test 1.2.2 { code1 { sqlite3 db file:test.db?readonly_shm=1 } list [catch { sql1 { SELECT * FROM t1 } } msg] $msg - } {1 {unable to open database file}} + } {0 {a b c d e f g h i j}} do_test 1.2.3 { code1 { db close } @@ -114,10 +114,10 @@ do_multiclient_test tn { file attributes test.db-shm -permissions r--r--r-- code1 { sqlite3 db file:test.db?readonly_shm=1 } csql1 { SELECT * FROM t1 } - } {1 {unable to open database file}} + } {0 {a b c d e f g h i j}} do_test 1.2.4 { code1 { sqlite3_extended_errcode db } - } {SQLITE_CANTOPEN} + } {SQLITE_OK} do_test 1.2.5 { file attributes test.db-shm -permissions rw-r--r-- @@ -162,10 +162,10 @@ do_multiclient_test tn { file attributes test.db-shm -permissions r--r--r-- code1 { sqlite3 db file:test.db?readonly_shm=1 } csql1 { SELECT * FROM t1 } - } {1 {unable to open database file}} + } {0 {a b c d e f g h i j k l}} do_test 1.3.2.4 { code1 { sqlite3_extended_errcode db } - } {SQLITE_CANTOPEN} + } {SQLITE_OK} #----------------------------------------------------------------------- # Test cases 1.4.* check that checkpoints and log wraps don't prevent diff --git a/test/walro2.test b/test/walro2.test index fb41d17f79..ee4a341e24 100644 --- a/test/walro2.test +++ b/test/walro2.test @@ -62,7 +62,7 @@ do_multiclient_test tn { file exists test.db-shm } {1} - do_test 1.2 { + do_test 1.2.1 { forcecopy test.db test.db2 forcecopy test.db-wal test.db2-wal forcecopy test.db-shm test.db2-shm @@ -71,7 +71,10 @@ do_multiclient_test tn { } sql1 { SELECT * FROM t1 } - } {} + } {a b c d} + do_test 1.2.2 { + sql1 { SELECT * FROM t1 } + } {a b c d} do_test 1.3.1 { code3 { sqlite3 db3 test.db2 } @@ -106,13 +109,21 @@ do_multiclient_test tn { BEGIN; SELECT * FROM t1; } - } {a b c d} + } {a b c d e f g h} do_test 2.3.1 { code3 { sqlite3 db3 test.db2 } sql3 { SELECT * FROM t1 } } {a b c d e f g h} - + do_test 2.3.2 { + sql3 { INSERT INTO t1 VALUES('i', 'j') } + code3 { db3 close } + sql1 { COMMIT } + } {} + breakpoint + do_test 2.3.3 { + sql1 { SELECT * FROM t1 } + } {a b c d e f g h i j} } finish_test -- 2.47.2