From: dan Date: Sat, 9 Jan 2016 16:39:29 +0000 (+0000) Subject: If a single page is written to the wal file more than once, have each subsequent... X-Git-Tag: version-3.11.0~169^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d6f7c979523446f8aa8706bb7a335a5eebf63966;p=thirdparty%2Fsqlite.git If a single page is written to the wal file more than once, have each subsequent copy overwrite the original frame. FossilOrigin-Name: 5d113aef2c7d746e8eda88d4e36c04a39b0a11be --- diff --git a/manifest b/manifest index 45e5e88c6f..929a82b2bd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Typo\sfix\sin\sthe\sprevious\scommit. -D 2016-01-08T22:31:00.176 +C If\sa\ssingle\spage\sis\swritten\sto\sthe\swal\sfile\smore\sthan\sonce,\shave\seach\ssubsequent\scopy\soverwrite\sthe\soriginal\sframe. +D 2016-01-09T16:39:29.213 F Makefile.in 7c8cc4c2f0179efc6fa9492141d1fb65f4807054 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc e45d8b9b56dfa3f2cd860b2c28bd9d304513b042 @@ -320,11 +320,11 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 82986e1e75782b54da7822dca42d36d974fc2948 F src/os_win.c 386fba30419e8458b13209781c2af5590eab2811 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 58d2593612acb6b542de6715b4af397ea1fa0a35 -F src/pager.h bf25005b4656cd805af43487c3139fca9678d0cc +F src/pager.c c241cace996e19248ddc60f4b8c2e654177e4d3b +F src/pager.h bbbfe27618ac2df8aba22ee0dace08f013351c40 F src/parse.y caad1e98edeca6960493d0c60d31b76820dd7776 F src/pcache.c 73895411fa6b7bd6f0091212feabbe833b358d23 -F src/pcache.h 1ff11adce609ba7de139b6abfabaf9a2bac947b5 +F src/pcache.h 4d0ccaad264d360981ec5e6a2b596d6e85242545 F src/pcache1.c 72f644dc9e1468c72922eff5904048427b817051 F src/pragma.c f3e7147299ca05ef4304a36f1fd6e002729c72c6 F src/pragma.h 64c78a648751b9f4f297276c4eb7507b14b4628c @@ -409,7 +409,7 @@ F src/vdbesort.c a7ec02da4494c59dfd071126dd3726be5a11459d F src/vdbetrace.c 8befe829faff6d9e6f6e4dee5a7d3f85cc85f1a0 F src/vtab.c 2a8b44aa372c33f6154208e7a7f6c44254549806 F src/vxworks.h c18586c8edc1bddbc15c004fa16aeb1e1342b4fb -F src/wal.c 974928c988681c5157202c79dd9f26afaa7b5086 +F src/wal.c 88661c24c86d88e40560c0be5df39d902502a29a F src/wal.h 907943dfdef10b583e81906679a347e0ec6f1b1b F src/walker.c 2e14d17f592d176b6dc879c33fbdec4fbccaa2ba F src/where.c c6d3d2f6af57d574a7365ee2b225a5024f2a6bec @@ -1279,7 +1279,7 @@ F test/vtabI.test 751b07636700dbdea328e4265b6077ccd6811a3f F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test ea8778d5b0df200adef2ca7c00c3c37d4375f772 -F test/wal.test dbfc482e10c7263298833bb1fc60b3ac9d6340a1 +F test/wal.test 351bd83d33d41aa1a32f48f74bfebeb8cfcfc23b F test/wal2.test 1f841d2048080d32f552942e333fd99ce541dada F test/wal3.test b1d425f68a1f61d12563f0fa1ee6fca7d5afabf4 F test/wal4.test 4744e155cd6299c6bd99d3eab1c82f77db9cdb3c @@ -1301,6 +1301,7 @@ F test/walfault.test 1f8389f7709877e9b4cc679033d71d6fe529056b F test/walhook.test ed00a40ba7255da22d6b66433ab61fab16a63483 F test/walmode.test 4022fe03ae6e830583672caa101f046438a0473c F test/walnoshm.test 84ca10c544632a756467336b7c3b864d493ee496 +F test/waloverwrite.test 59476a2cf0638a057ced738b62dc719ed4e32e53 F test/walpersist.test 8c6b7e3ec1ba91b5e4dc4e0921d6d3f87cd356a6 F test/walro.test 34422d1d95aaff0388f0791ec20edb34e2a3ed57 F test/walshared.test 0befc811dcf0b287efae21612304d15576e35417 @@ -1406,7 +1407,10 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 122c111e360761de8166e16ca151cb7260ee9bf8 -R 459dbc63e3c56e9f1bc36ac4dabf3593 -U drh -Z 6b54e5c713520cd21bd942bd06142b38 +P 52c166039831cc8423e2252019ef64a21b9d7c2a +R 7ec438cc7bd0e0a87d33dca3e9bb783a +T *branch * wal-overwrite-frames +T *sym-wal-overwrite-frames * +T -sym-trunk * +U dan +Z eba50524221f3f22640b286e867693f1 diff --git a/manifest.uuid b/manifest.uuid index 50a7a74425..2b25ead966 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -52c166039831cc8423e2252019ef64a21b9d7c2a \ No newline at end of file +5d113aef2c7d746e8eda88d4e36c04a39b0a11be \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 2c8dceb750..b333a746e4 100644 --- a/src/pager.c +++ b/src/pager.c @@ -7301,6 +7301,13 @@ int sqlite3PagerCloseWal(Pager *pPager){ return rc; } +/* +** Return the number of open savepoints. +**/ +int sqlite3PagerSavepointCount(Pager *pPager){ + return pPager->nSavepoint; +} + #ifdef SQLITE_ENABLE_SNAPSHOT /* ** If this is a WAL database, obtain a snapshot handle for the snapshot diff --git a/src/pager.h b/src/pager.h index ba4eec438d..c9b766a55c 100644 --- a/src/pager.h +++ b/src/pager.h @@ -168,6 +168,7 @@ int sqlite3PagerSharedLock(Pager *pPager); int sqlite3PagerWalCallback(Pager *pPager); int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); int sqlite3PagerCloseWal(Pager *pPager); + int sqlite3PagerSavepointCount(Pager *pPager); # ifdef SQLITE_ENABLE_SNAPSHOT int sqlite3PagerSnapshotGet(Pager *pPager, sqlite3_snapshot **ppSnapshot); int sqlite3PagerSnapshotOpen(Pager *pPager, sqlite3_snapshot *pSnapshot); diff --git a/src/pcache.h b/src/pcache.h index 42c44cf7ba..475c04c061 100644 --- a/src/pcache.h +++ b/src/pcache.h @@ -55,6 +55,8 @@ struct PgHdr { #define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ #define PGHDR_MMAP 0x040 /* This is an mmap page object */ +#define PGHDR_WAL_APPEND 0x080 /* Appended to wal file */ + /* Initialize and shutdown the page cache subsystem */ int sqlite3PcacheInitialize(void); void sqlite3PcacheShutdown(void); diff --git a/src/wal.c b/src/wal.c index 4238ea483b..c900879d12 100644 --- a/src/wal.c +++ b/src/wal.c @@ -462,6 +462,13 @@ struct Wal { #define WAL_EXCLUSIVE_MODE 1 #define WAL_HEAPMEMORY_MODE 2 +/* +** Values for Wal.writeLock. +*/ +#define WAL_WRITELOCK_UNLOCKED 0 +#define WAL_WRITELOCK_LOCKED 1 +#define WAL_WRITELOCK_RECKSUM 2 + /* ** Possible values for WAL.readOnly */ @@ -2885,6 +2892,60 @@ static int walWriteOneFrame( return rc; } +/* +** This function is called as part of committing a transaction within which +** one or more frames have been overwritten. It updates the checksums for +** all frames written to the wal file by the current transaction. +** +** Argument pLive is a pointer to the first wal-index header in shared +** memory (the copy readers will see if they open a read-transaction now, +** before the current commit is finished). This is safe to use because the +** caller holds the WRITER lock. The first frame to update the checksum +** for is (pLive->mxFrame+1). The last is argument iLast. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int walRewriteChecksums(Wal *pWal, WalIndexHdr *pLive, u32 iLast){ + const int szPage = pWal->szPage;/* Database page size */ + int rc = SQLITE_OK; /* Return code */ + u8 *aBuf; /* Buffer to load data from wal file into */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ + u32 iRead; /* Next frame to read from wal file */ + + aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); + if( aBuf==0 ) return SQLITE_NOMEM; + + /* Find the checksum values to use as input for the checksum of the + ** first frame written by this transaction. If that frame is frame 1 + ** (implying that the current transaction restarted the wal file), + ** these values must be read from the wal-file header. If the first + ** frame to update the checksum of is not frame 1, then the initial + ** checksum values can be copied from pLive. */ + if( pLive->mxFrame==0 ){ + rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, 24); + pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); + pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); + }else{ + memcpy(pWal->hdr.aFrameCksum, pLive->aFrameCksum, sizeof(u32)*2); + } + + for(iRead=pLive->mxFrame+1; rc==SQLITE_OK && iRead<=iLast; iRead++){ + i64 iOff = walFrameOffset(iRead, szPage); + rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); + if( rc==SQLITE_OK ){ + u32 iPgno, nDbSize; + iPgno = sqlite3Get4byte(aBuf); + nDbSize = sqlite3Get4byte(&aBuf[4]); + + walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); + rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); + } + } + + sqlite3_free(aBuf); + return rc; +} + /* ** Write a set of frames to the log. The caller must hold the write-lock ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). @@ -2905,6 +2966,8 @@ int sqlite3WalFrames( int szFrame; /* The size of a single frame */ i64 iOffset; /* Next byte to write in WAL file */ WalWriter w; /* The writer */ + u32 iFirst = 0; /* First frame that may be overwritten */ + WalIndexHdr *pLive; /* Pointer to shared header */ assert( pList ); assert( pWal->writeLock ); @@ -2920,6 +2983,13 @@ int sqlite3WalFrames( } #endif + pLive = (WalIndexHdr*)walIndexHdr(pWal); + if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 + && (isCommit || sqlite3PagerSavepointCount(pList->pPager)==0) + ){ + iFirst = pLive->mxFrame+1; + } + /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ @@ -2984,6 +3054,25 @@ int sqlite3WalFrames( /* Write all frames into the log file exactly once */ for(p=pList; p; p=p->pDirty){ int nDbSize; /* 0 normally. Positive == commit flag */ + + /* Check if this page has already been written into the wal file by + ** the current transaction. If so, overwrite the existing frame and + ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that + ** checksums must be recomputed when the transaction is committed. */ + if( iFirst && (p->pDirty || isCommit==0) ){ + u32 iWrite = 0; + rc = sqlite3WalFindFrame(pWal, p->pgno, &iWrite); + if( rc ) return rc; + if( iWrite>=iFirst ){ + i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE; + pWal->writeLock = WAL_WRITELOCK_RECKSUM; + rc = sqlite3OsWrite(pWal->pWalFd, p->pData, szPage, iOff); + if( rc ) return rc; + p->flags &= ~PGHDR_WAL_APPEND; + continue; + } + } + iFrame++; assert( iOffset==walFrameOffset(iFrame, szPage) ); nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; @@ -2991,6 +3080,13 @@ int sqlite3WalFrames( if( rc ) return rc; pLast = p; iOffset += szFrame; + p->flags |= PGHDR_WAL_APPEND; + } + + /* Recalculate checksums within the wal file if required. */ + if( isCommit && pWal->writeLock==WAL_WRITELOCK_RECKSUM ){ + rc = walRewriteChecksums(pWal, pLive, iFrame); + if( rc ) return rc; } /* If this is the end of a transaction, then we might need to pad @@ -3042,6 +3138,7 @@ int sqlite3WalFrames( */ iFrame = pWal->hdr.mxFrame; for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ + if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; iFrame++; rc = walIndexAppend(pWal, iFrame, p->pgno); } @@ -3154,6 +3251,7 @@ int sqlite3WalCheckpoint( /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ + if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; }else{ diff --git a/test/wal.test b/test/wal.test index bfe3634577..3646b46c17 100644 --- a/test/wal.test +++ b/test/wal.test @@ -712,7 +712,7 @@ do_test wal-11.5 { do_test wal-11.6 { execsql COMMIT list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 3 [wal_file_size 41 1024]] +} [list 3 [wal_file_size 40 1024]] do_test wal-11.7 { execsql { SELECT count(*) FROM t1; @@ -722,7 +722,7 @@ do_test wal-11.7 { do_test wal-11.8 { execsql { PRAGMA wal_checkpoint } list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 37 [wal_file_size 41 1024]] +} [list 37 [wal_file_size 40 1024]] do_test wal-11.9 { db close list [expr [file size test.db]/1024] [log_deleted test.db-wal] diff --git a/test/waloverwrite.test b/test/waloverwrite.test new file mode 100644 index 0000000000..02fe4c1ccf --- /dev/null +++ b/test/waloverwrite.test @@ -0,0 +1,104 @@ +# 2010 May 5 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/wal_common.tcl +set testprefix waloverwrite + +ifcapable !wal {finish_test ; return } + +# Simple test: +# +# 1. Create a database of blobs roughly 50 pages in size. +# +# 2. Set the db cache size to something much smaller than this (5 pages) +# +# 3. Within a transaction, loop through the set of blobs 5 times. Update +# each blob as it is visited. +# +# 4. Test that the wal file is roughly 50 pages in size - even though many +# database pages have been written to it multiple times. +# +# 5. Take a copy of the database and wal file. Test that recovery can +# be run on it. +# +# The above is run twice - once where the wal file is empty at the start of +# step 3 (tn==1) and once where it already contains a transaction (tn==2). +# +foreach {tn xtra} { + 1 {} + 2 { UPDATE t1 SET y = randomblob(799) WHERE x=4 } +} { + reset_db + do_execsql_test 1.$tn.0 { + CREATE TABLE t1(x, y); + CREATE INDEX i1y ON t1(y); + + WITH cnt(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM cnt WHERE i<20 + ) + INSERT INTO t1 SELECT i, randomblob(800) FROM cnt; + } {} + + do_test 1.$tn.1 { + set nPg [db one { PRAGMA page_count } ] + expr $nPg>40 && $nPg<50 + } {1} + + do_test 1.$tn.2 { + db close + sqlite3 db test.db + + execsql {PRAGMA journal_mode = wal} + execsql {PRAGMA cache_size = 5} + execsql $xtra + + db transaction { + for {set i 0} {$i < 5} {incr i} { + foreach x [db eval {SELECT x FROM t1}] { + execsql { UPDATE t1 SET y = randomblob(799) WHERE x=$x } + } + } + } + + set nPg [wal_frame_count test.db-wal 1024] + expr $nPg>40 && $nPg<60 + } {1} + + do_execsql_test 1.$tn.3 { PRAGMA integrity_check } ok + + do_test 1.$tn.4 { + forcedelete test.db2 test.db2-wal + forcecopy test.db test.db2 + sqlite3 db2 test.db2 + execsql { SELECT sum(length(y)) FROM t1 } db2 + } [expr 20*800] + + do_test 1.$tn.5 { + db2 close + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + sqlite3 db2 test.db2 + execsql { SELECT sum(length(y)) FROM t1 } db2 + } [expr 20*799] + + do_test 1.$tn.6 { + execsql { PRAGMA integrity_check } db2 + } ok +} + +finish_test +