From: dan Date: Fri, 3 Nov 2017 17:17:44 +0000 (+0000) Subject: Allow readonly_shm connections to access the *-shm file using read() even if X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=891e024ec6106e2a7626ca788db8e2741316f7da;p=thirdparty%2Fsqlite.git Allow readonly_shm connections to access the *-shm file using read() even if it is unable to take a DMS lock. FossilOrigin-Name: 9b0d5c4ff707f58b07ae28cfe0a7de62fbd22fa20678e446350d1472391bc0b9 --- diff --git a/manifest b/manifest index 5ad595579c..aaea49591f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\stest\scases\sin\swal2.test\sbroken\sby\sthe\slocking\schange\sin\sthe\sprevious\ncommit. -D 2017-11-02T18:57:46.036 +C Allow\sreadonly_shm\sconnections\sto\saccess\sthe\s*-shm\sfile\susing\sread()\seven\sif\nit\sis\sunable\sto\stake\sa\sDMS\slock. +D 2017-11-03T17:17:44.636 F Makefile.in 5bae3f2f3d42f2ad52b141562d74872c97ac0fca6c54953c91bb150a0e6427a8 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 3a5cb477ec3ce5274663b693164e349db63348667cd45bad78cc13d580b691e2 @@ -447,7 +447,7 @@ F src/os.c 22d31db3ca5a96a408fbf1ceeaaebcaf64c87024d2ff9fe1cf2ddbec3e75c104 F src/os.h 48388821692e87da174ea198bf96b1b2d9d83be5dfc908f673ee21fafbe0d432 F src/os_common.h b2f4707a603e36811d9b1a13278bffd757857b85 F src/os_setup.h 0dbaea40a7d36bf311613d31342e0b99e2536586 -F src/os_unix.c e376adf6014df7d1a73faaaa6c87e6eb9b299b157a58cccff02fad8abc943fe7 +F src/os_unix.c 7cc36e2b1c3bc45cab3f4af4b9ef498a5ba14c60edc71bdce8c712ce78abdc24 F src/os_win.c 6892c3ff23b7886577e47f13d827ca220c0831bae3ce00eea8c258352692f8c6 F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a F src/pager.c 07cf850241667874fcce9d7d924c814305e499b26c804322e2261247b5921903 @@ -465,7 +465,7 @@ F src/resolve.c 5a461643f294ec510ca615b67256fc3861e4c8eff5f29e5940491e70553b1955 F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac F src/select.c 660ef7977841fb462f24c8561e4212615bb6e5c9835fd3556257ce8316c50fee F src/shell.c.in 08cbffc31900359fea85896342a46147e9772c370d8a5079b7be26e3a1f50e8a -F src/sqlite.h.in ba9029e71a605bc5f236cd693abeb7920285785f2e1a92313ecf8fb1c9f52a86 +F src/sqlite.h.in bdd51182dd37e5583f9a201e933a20c9f0c779ceabe500de7e67d57a5fb55ab4 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h c02d628cca67f3889c689d82d25c3eb45e2c155db08e4c6089b5840d64687d34 F src/sqliteInt.h f5377febf86654c975e1d4e4353a5ad2fbaa5bc86b584ba3761ed33e24ce2c0e @@ -543,7 +543,7 @@ F src/vdbesort.c 731a09e5cb9e96b70c394c1b7cf3860fbe84acca7682e178615eb941a3a0ef2 F src/vdbetrace.c 48e11ebe040c6b41d146abed2602e3d00d621d7ebe4eb29b0a0f1617fd3c2f6c F src/vtab.c 0e4885495172e1bdf54b12cce23b395ac74ef5729031f15e1bc1e3e6b360ed1a F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c 38480e7bdc697cf88a13a22ffe60f7bd761bc02b45f7a323f1bb9e61a136b3ae +F src/wal.c 84172f8556012866db16aa2c385f972d112accbb37c1df6d709ccb30b7533063 F src/wal.h 8de5d2d3de0956d6f6cb48c83a4012d5f227b8fe940f3a349a4b7e85ebcb492a F src/walker.c d591e8a9ccf60abb010966b354fcea4aa08eba4d83675c2b281a8764c76cc22f F src/where.c b7a075f5fb3d912a891dcc3257f538372bb4a1622dd8ca7d752ad95ce8949ba4 @@ -1526,8 +1526,8 @@ F test/walnoshm.test 84ca10c544632a756467336b7c3b864d493ee496 F test/waloverwrite.test dad2f26567f1b45174e54fbf9a8dc1cb876a7f03 F test/walpersist.test 8c6b7e3ec1ba91b5e4dc4e0921d6d3f87cd356a6 F test/walprotocol.test 0b92feb132ccebd855494d917d3f6c2d717ace20 -F test/walro.test e492598baa8cd7777fef6203f6fe922c20cd691cc19e60ccd0dd0dbc68394d0a -F test/walro2.test 23fea1e7abae13072b0640ef846d32080b2fc435658d4c4eb9db266b07b33776 +F test/walro.test 04f52c23ecd18acb493c9c12798d7ef1618c5f547558fe6767cca8f613591e62 +F test/walro2.test 216e952db319eec60b33579e4436cbe97319c3c7c4769ff1a907d1ccde241f66 F test/walshared.test 0befc811dcf0b287efae21612304d15576e35417 F test/walslow.test c05c68d4dc2700a982f89133ce103a1a84cc285f F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e @@ -1668,7 +1668,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 5190d84a296b7cf716ef43bf7b6d4d351ef1a4d650de37dc01a5ab333da7c05d -R 30693e487c23aaa3c080ebb269e4e38f +P f569c3517234881f9425075aab65a32ffd0deb8e793f421a241d8cca881da33f +R 20903fe541f5da42c2200a9eba3c1e5d U dan -Z a5242a9fca30f5b5641106b18cdb01ae +Z 111bc60b3d02df6a9da28fff434dc070 diff --git a/manifest.uuid b/manifest.uuid index a8bcfdb1fe..910f081e0e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f569c3517234881f9425075aab65a32ffd0deb8e793f421a241d8cca881da33f \ No newline at end of file +9b0d5c4ff707f58b07ae28cfe0a7de62fbd22fa20678e446350d1472391bc0b9 \ No newline at end of file diff --git a/src/os_unix.c b/src/os_unix.c index e90e335cb7..a09ba92817 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -3795,6 +3795,7 @@ static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ /* Forward declaration */ static int unixGetTempname(int nBuf, char *zBuf); +static int fcntlReadShm(unixFile*,void*); /* ** Information and control of an open file handle. @@ -3817,6 +3818,10 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + case SQLITE_FCNTL_READ_SHM: { + return fcntlReadShm(pFile, pArg); + } + case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; @@ -4147,6 +4152,51 @@ struct unixShm { #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ +/* +** Implementation of file-control SQLITE_FCNTL_READ_SHM. +*/ +static int fcntlReadShm(unixFile *pFile, void *pArg){ + struct ReadShmParam { + void *pBuf; + int nBuf; + } *pParam = (struct ReadShmParam*)pArg; + unixShmNode *pShmNode = pFile->pShm->pShmNode; + int rc = SQLITE_OK; + sqlite3_mutex_enter( pShmNode->mutex ); + if( pShmNode->isUnlocked==1 ){ + struct stat buf; /* Used to hold return values of fstat() */ + if( osFstat(pShmNode->h, &buf) ){ + rc = SQLITE_IOERR_FSTAT; + }else if( 0!=lseek(pShmNode->h, 0, SEEK_SET) ){ + rc = SQLITE_IOERR_READ; + }else{ + int nRead = buf.st_size; + if( nRead==0 ){ + pParam->nBuf = 0; + pParam->pBuf = 0; + }else{ + pParam->pBuf = sqlite3_malloc(nRead); + if( pParam->pBuf ){ + int res = osRead(pShmNode->h, pParam->pBuf, nRead); + if( res!=nRead ){ + sqlite3_free(pParam->pBuf); + pParam->pBuf = 0; + rc = SQLITE_IOERR_READ; + }else{ + pParam->nBuf = nRead; + } + }else{ + rc = SQLITE_NOMEM_BKPT; + } + } + } + }else{ + rc = SQLITE_BUSY; + } + sqlite3_mutex_leave( pShmNode->mutex ); + return rc; +} + /* ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. ** diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 0c0ed25b30..0ae0fc5fd1 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -1095,6 +1095,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE 31 #define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE 32 #define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE 33 +#define SQLITE_FCNTL_READ_SHM 34 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE diff --git a/src/wal.c b/src/wal.c index 28fade96b0..397a42f8a9 100644 --- a/src/wal.c +++ b/src/wal.c @@ -455,6 +455,7 @@ struct Wal { u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ + u8 bUnlocked; /* A readonly_shm connection without DMS lock */ WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ @@ -539,6 +540,27 @@ struct WalIterator { sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ ) +/* +** Grow the pWal->apWiData[] array so that it is at least nPage entries +** in size. Return SQLITE_NOMEM if an OOM error occurs, or SQLITE_OK +** otherwise. +*/ +static int walIndexGrowArray(Wal *pWal, int nPage){ + if( nPage>pWal->nWiData ){ + int nByte = sizeof(u32*)*nPage; + volatile u32 **apNew; + apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); + if( !apNew ){ + return SQLITE_NOMEM_BKPT; + } + memset((void*)&apNew[pWal->nWiData], 0, + sizeof(u32*)*(nPage-pWal->nWiData)); + pWal->apWiData = apNew; + pWal->nWiData = nPage; + } + return SQLITE_OK; +} + /* ** Obtain a pointer to the iPage'th page of the wal-index. The wal-index ** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are @@ -552,18 +574,9 @@ static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ int rc = SQLITE_OK; /* Enlarge the pWal->apWiData[] array if required */ - if( pWal->nWiData<=iPage ){ - int nByte = sizeof(u32*)*(iPage+1); - volatile u32 **apNew; - apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); - if( !apNew ){ - *ppPage = 0; - return SQLITE_NOMEM_BKPT; - } - memset((void*)&apNew[pWal->nWiData], 0, - sizeof(u32*)*(iPage+1-pWal->nWiData)); - pWal->apWiData = apNew; - pWal->nWiData = iPage+1; + if( walIndexGrowArray(pWal, iPage+1) ){ + *ppPage = 0; + return SQLITE_NOMEM; } /* Request a pointer to the required page from the VFS */ @@ -1102,12 +1115,11 @@ static int walIndexRecover(Wal *pWal){ u32 aFrameCksum[2] = {0, 0}; int iLock; /* Lock offset to lock for checkpoint */ - /* Obtain an exclusive lock on all byte in the locking range not already - ** locked by the caller. The caller is guaranteed to have locked the - ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte. - ** If successful, the same bytes that are locked here are unlocked before - ** this function returns. - */ + /* Obtain an exclusive lock on all bytes in the locking range except + ** WAL_READ_LOCK(0) not already locked by the caller. The caller is + ** guaranteed to have locked the WAL_WRITE_LOCK byte, and may have also + ** locked the WAL_CKPT_LOCK byte. If successful, the same bytes that are + ** locked here are unlocked before this function returns. */ assert( pWal->ckptLock==1 || pWal->ckptLock==0 ); assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); @@ -2090,16 +2102,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ ** wal-index header) is mapped. Return early if an error occurs here. */ assert( pChanged ); + assert( pWal->bUnlocked==0 ); rc = walIndexPage(pWal, 0, &page0); if( rc!=SQLITE_OK ){ - if( rc==SQLITE_READONLY_CANTLOCK -#ifdef SQLITE_ENABLE_SNAPSHOT - && pWal->pSnapshot==0 -#endif - ){ - memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); - rc = SQLITE_OK; - } return rc; }; assert( page0 || pWal->writeLock==0 ); @@ -2117,7 +2122,10 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr ){ if( pWal->readOnly & WAL_SHM_RDONLY ){ - if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ + if( pWal->bUnlocked ){ + rc = SQLITE_READONLY_RECOVERY; + } + else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; } @@ -2156,6 +2164,95 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ */ #define WAL_RETRY (-1) +/* +** When this function is called, a call to xShmMap has just returned +** SQLITE_READONLY_CANTLOCK, indicating that the *-shm file was opened +** read-only and that there were no other connections to the database +** at that point. +*/ +static int walBeginUnlocked(Wal *pWal, int *pChanged){ + struct ReadShmParam { + void *pBuf; + int nBuf; + } buf = {0, 0}; + i64 nSize; + int rc; + volatile void *pDummy; + + assert( pWal->nWiData>0 && pWal->apWiData[0]==0 ); + assert( pWal->readOnly & WAL_SHM_RDONLY ); + + /* Take WAL_READ_LOCK(0). This has the effect of preventing any + ** live clients from running a checkpoint, but does not stop them + ** from running recovery. */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + if( rc!=SQLITE_OK ){ + return (rc==SQLITE_BUSY ? WAL_RETRY : rc); + } + + /* Try to map the *-shm file again. If it succeeds this time, then + ** a non-readonly_shm connection has already connected to the database. + ** In this case, start over with opening the transaction. + ** + ** The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint + ** from taking place. But it does not prevent the wal from being wrapped + ** if a checkpoint has already taken place. This means that if another + ** client is connected at this point, it may have already checkpointed + ** the entire wal. In that case it would not be safe to continue with + ** the unlocked transaction, as the other client may overwrite wal + ** frames that this client is still using. */ + rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); + if( rc!=SQLITE_READONLY_CANTLOCK ){ + rc = (rc==SQLITE_OK ? WAL_RETRY : rc); + goto begin_unlocked_out; + } + pWal->readLock = 0; + + /* Figure out how large the wal file is. If it is zero bytes in size, + ** do not bother loading the *-shm file. Just read from the db file + ** exclusively like any other Wal.readLock==0 client. Otherwise, + ** use SQLITE_FCNTL_READ_SHM to load the contents of the *-shm file + ** into heap memory. */ + rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); + if( rc!=SQLITE_OK ) return rc; + + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + if( nSize>0 ){ + rc = sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_READ_SHM, (void*)&buf); + if( rc!=SQLITE_OK ){ + return rc==SQLITE_BUSY ? WAL_RETRY : rc; + } + if( buf.nBufapWiData[i] = (volatile u32*)&aBuf[i*WALINDEX_PGSZ]; + } + if( walIndexTryHdr(pWal, pChanged) ){ + rc = SQLITE_READONLY_RECOVERY; + } + } + } + } + + begin_unlocked_out: + if( rc!=SQLITE_OK ){ + sqlite3_free(buf.pBuf); + memset(pWal->apWiData, 0, pWal->nWiData*sizeof(u32*)); + walUnlockShared(pWal, WAL_READ_LOCK(0)); + }else{ + pWal->bUnlocked = 1; + *pChanged = 1; + } + return rc; +} + /* ** Attempt to start a read transaction. This might fail due to a race or ** other transient condition. When that happens, it returns WAL_RETRY to @@ -2245,6 +2342,14 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); + if( rc==SQLITE_READONLY_CANTLOCK ){ + /* This is a readonly_shm connection and there are no other connections + ** to the database. So the *-shm file may not be accessed using mmap. + ** Take WAL_READ_LOCK(0) before proceding. */ + assert( pWal->nWiData>0 && pWal->apWiData[0]==0 ); + assert( pWal->readOnly & WAL_SHM_RDONLY ); + return walBeginUnlocked(pWal, pChanged); + } if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to @@ -2596,6 +2701,11 @@ void sqlite3WalEndReadTransaction(Wal *pWal){ if( pWal->readLock>=0 ){ walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->readLock = -1; + if( pWal->bUnlocked ){ + sqlite3_free((void*)pWal->apWiData[0]); + memset(pWal->apWiData, 0, sizeof(u32*)*pWal->nWiData); + pWal->bUnlocked = 0; + } } } @@ -2626,7 +2736,7 @@ int sqlite3WalFindFrame( ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ - if( iLast==0 || pWal->readLock==0 ){ + if( iLast==0 || (pWal->readLock==0 && !pWal->bUnlocked) ){ *piRead = 0; return SQLITE_OK; } diff --git a/test/walro.test b/test/walro.test index 09a33bbd3e..948228a465 100644 --- a/test/walro.test +++ b/test/walro.test @@ -101,11 +101,10 @@ do_multiclient_test tn { code1 { db close } list [file exists test.db-wal] [file exists test.db-shm] } {1 1} - do_test 1.2.2 { code1 { sqlite3 db file:test.db?readonly_shm=1 } - list [catch { sql1 { SELECT * FROM t1 } } msg] $msg - } {1 {unable to open database file}} + sql1 { SELECT * FROM t1 } + } {a b c d e f g h i j} do_test 1.2.3 { code1 { db close } @@ -114,10 +113,10 @@ do_multiclient_test tn { file attributes test.db-shm -permissions r--r--r-- code1 { sqlite3 db file:test.db?readonly_shm=1 } csql1 { SELECT * FROM t1 } - } {1 {unable to open database file}} + } {1 {attempt to write a readonly database}} do_test 1.2.4 { code1 { sqlite3_extended_errcode db } - } {SQLITE_CANTOPEN} + } {SQLITE_READONLY_RECOVERY} do_test 1.2.5 { file attributes test.db-shm -permissions rw-r--r-- @@ -156,16 +155,21 @@ do_multiclient_test tn { code1 { sqlite3 db file:test.db?readonly_shm=1 } csql1 { SELECT * FROM sqlite_master } } {1 {unable to open database file}} + + # Update: This test used to fail (because a 0 byte *-shm file does not + # contain a valid shm-header) but now succeeds (because a readonly_shm + # connection ignores the *-shm file completely if the wal file is also + # zero bytes in size). do_test 1.3.2.3 { code1 { db close } close [open test.db-shm w] file attributes test.db-shm -permissions r--r--r-- code1 { sqlite3 db file:test.db?readonly_shm=1 } csql1 { SELECT * FROM t1 } - } {1 {unable to open database file}} + } {0 {a b c d e f g h i j k l}} do_test 1.3.2.4 { code1 { sqlite3_extended_errcode db } - } {SQLITE_CANTOPEN} + } {SQLITE_OK} #----------------------------------------------------------------------- # Test cases 1.4.* check that checkpoints and log wraps don't prevent diff --git a/test/walro2.test b/test/walro2.test index fb41d17f79..c81514ab46 100644 --- a/test/walro2.test +++ b/test/walro2.test @@ -62,16 +62,17 @@ do_multiclient_test tn { file exists test.db-shm } {1} - do_test 1.2 { + do_test 1.2.1 { forcecopy test.db test.db2 forcecopy test.db-wal test.db2-wal forcecopy test.db-shm test.db2-shm code1 { sqlite3 db file:test.db2?readonly_shm=1 } - - sql1 { SELECT * FROM t1 } } {} + do_test 1.2.2 { + sql1 { SELECT * FROM t1 } + } {a b c d} do_test 1.3.1 { code3 { sqlite3 db3 test.db2 } @@ -106,7 +107,7 @@ do_multiclient_test tn { BEGIN; SELECT * FROM t1; } - } {a b c d} + } {a b c d e f g h} do_test 2.3.1 { code3 { sqlite3 db3 test.db2 }