From: dan Date: Thu, 28 Nov 2024 18:58:54 +0000 (+0000) Subject: In wal2 mode, attempt to zero old *-shm hash table pages within non-PASSIVE checkpoin... X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3a5e45a243602cb10cad70747b55962a4ae8dcb0;p=thirdparty%2Fsqlite.git In wal2 mode, attempt to zero old *-shm hash table pages within non-PASSIVE checkpoints. This saves writers from having to do it. FossilOrigin-Name: 40aafd458593c17ad19cd9b464edc92d914fca20bc1aa38bf7014bc9beb0e9d5 --- diff --git a/manifest b/manifest index 80b7c985ac..90c93e5462 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\sever\slogging\stimes\sfor\sROLLBACK\scommands. -D 2024-11-26T17:30:15.871 +C In\swal2\smode,\sattempt\sto\szero\sold\s*-shm\shash\stable\spages\swithin\snon-PASSIVE\scheckpoints.\sThis\ssaves\swriters\sfrom\shaving\sto\sdo\sit. +D 2024-11-28T18:58:54.286 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -857,7 +857,7 @@ F src/vdbetrace.c fe0bc29ebd4e02c8bc5c1945f1d2e6be5927ec12c06d89b03ef2a4def34bf8 F src/vdbevtab.c fc46b9cbd759dc013f0b3724549cc0d71379183c667df3a5988f7e2f1bd485f3 F src/vtab.c 5fb499d20494b7eecaadb7584634af9afcb374cb0524912b475fcb1712458a1b F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c 7468f953990f1fd22a64a0127e1492d8d7b422b38c3655a7bafe20f83a68ce74 +F src/wal.c 46aa25616c21ea10366ec23934ea8623598c83bdc8116553492402a855f22664 F src/wal.h 8c59ee7a835574396d7cbd04626d11fd849613e361a46e7e9519091ab03bdb29 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 F src/where.c c046dd58c3410f7b7528e1e6317cb876398557bad346d568ed8562321a7d002d @@ -2008,6 +2008,7 @@ F test/vtabrhs1.test 9b5ecbc74a689500c33a4b2b36761f9bcc22fcc4e3f9d21066ee0c9c74c F test/wal.test a5d6c7f4bd79251ed344229d96d44ecdfb896bdbd32b7e65f118756699c7e473 F test/wal2.test e89ca97593b5e92849039f6b68ce1719a853ef20fa22c669ec1ac452fbc31cab F test/wal2big.test 829141cbecdda4329db8fa38705424c4a73db72a06b9540b06811a825d330409 +F test/wal2big2.test d000b0de61388bdf2da39f747c070a05a8995b994f52490674605923d79f65f3 F test/wal2concurrent.test 7fc3e570073683a2a28f42bda46ecf516f5bc82afd703c1fbf4aa38e18fb3361 F test/wal2fault.test 2e8e60cacd5bcd451618aeffd05f676894d17202d3e2986e288d36e2c5993249 F test/wal2lock.test 0ef98d72dc6bcf7711dedd684760488400d9a9a6eec0dc5d3822060437793552 @@ -2249,8 +2250,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937 -R 69f1f4cdbed55beee441559e50f1f550 +P bc7f7986ec115b3b89c48742da1bd1bb5fc5e7a4f9b7f35623404d546437d687 +R 55a54d0ca25a8ebd4474604466ee1527 U dan -Z 21397c067de85450c8a331914ec4a3ea +Z 3c73c71cb637afc22474f4e8be3a5be1 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index f4a0f05bce..767d709fad 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bc7f7986ec115b3b89c48742da1bd1bb5fc5e7a4f9b7f35623404d546437d687 +40aafd458593c17ad19cd9b464edc92d914fca20bc1aa38bf7014bc9beb0e9d5 diff --git a/src/wal.c b/src/wal.c index d764fec427..c0a2312870 100644 --- a/src/wal.c +++ b/src/wal.c @@ -1669,7 +1669,7 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** entire hash table and aPgno[] array before proceeding. */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 ){ + if( idx==1 && sLoc.aPgno[0]!=0 ){ int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); assert( nByte>=0 ); memset((void*)sLoc.aPgno, 0, nByte); @@ -1842,6 +1842,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; + memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -2970,6 +2971,39 @@ static int walCheckpoint( } } + if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + /* In wal2 mode, a non-passive checkpoint waits for all readers of + ** the wal file just checkpointed to finish, then zeroes the hash + ** tables associated with that wal file. This is because in some + ** deployments, zeroing the hash tables as they are overwritten within + ** COMMIT commands is a significant performance hit. + ** + ** Currently, both of the "PART" locks are held for the wal file + ** being checkpointed. i.e. if iCkpt==0, then we already hold both + ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an + ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that + ** there are no remaining readers of the (iCkpt==0) wal file. Similar + ** logic, with different locks, is used for (iCkpt==1). + */ + int lockIdx = WAL_READ_LOCK( + iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 + ); + assert( iCkpt==0 || iCkpt==1 ); + rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); + if( rc==SQLITE_OK ){ + int iHash; + for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ + WalHashLoc sLoc; + int nByte; + memset(&sLoc, 0, sizeof(sLoc)); + walHashGet(pWal, iHash, &sLoc); + nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); + memset((void*)sLoc.aPgno, 0, nByte); + } + walUnlockExclusive(pWal, lockIdx, 1); + } + } + if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ diff --git a/test/wal2big2.test b/test/wal2big2.test new file mode 100644 index 0000000000..b87f92d69f --- /dev/null +++ b/test/wal2big2.test @@ -0,0 +1,199 @@ +# 2024 November 28 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# TESTRUNNER: slow +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2big2 +ifcapable !wal {finish_test ; return } + +do_execsql_test 1.0 { + PRAGMA journal_mode = wal2; + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + CREATE INDEX i1 ON t1(b); + + PRAGMA wal_autocheckpoint = 0; + PRAGMA journal_size_limit = 100000; + PRAGMA synchronous = off; +} {wal2 0 100000} + +do_execsql_test 1.1 { + INSERT INTO t1 VALUES(1, 'one'); +} + +do_test 1.2 { + list [file size test.db-wal] [file size test.db-wal2] +} {6320 0} + +do_execsql_test 1.3 { + PRAGMA wal_checkpoint; + INSERT INTO t1 VALUES(2, 'two'); +} {0 6 0} + +do_test 1.4 { + list [file size test.db-wal] [file size test.db-wal2] +} {8416 0} + +do_test 1.5 { + for {set ii 3} {$ii < 100} {incr ii} { + execsql { + INSERT INTO t1 VALUES($ii, hex(randomblob(40))); + } + } + list [file size test.db-wal] [file size test.db-wal2] +} {103784 223256} + +do_execsql_test 1.6 { + PRAGMA integrity_check; +} {ok} + +do_execsql_test 1.7 { + PRAGMA wal_checkpoint = RESTART; +} {0 312 99} + +do_execsql_test 1.8 { + PRAGMA integrity_check; +} {ok} + +sqlite3 db2 test.db + +do_execsql_test -db db2 1.9 { + PRAGMA integrity_check; +} {ok} + +do_execsql_test 1.10 { + PRAGMA journal_size_limit = 10000000; +} {10000000} + +do_test 1.11 { + for {set ii 0} {$ii < 8000} {incr ii} { + execsql { + INSERT INTO t1 VALUES(nULL, hex(randomblob(40))); + } + } + list [expr [file size test.db-wal]>10000000] \ + [expr [file size test.db-wal2]>10000000] \ +} {1 1} + +do_execsql_test -db db2 1.12 { + PRAGMA integrity_check; +} {ok} + +do_test 1.13 { + db eval { PRAGMA wal_checkpoint = RESTART } + set {} {} +} {} + +do_execsql_test -db db2 1.14 { + PRAGMA integrity_check; +} {ok} + +do_execsql_test 1.15 { + INSERT INTO t1 VALUES(nULL, hex(randomblob(40))); +} + +#------------------------------------------------------------------------- +do_multiclient_test tn { + do_test 1.$tn.0 { + sql1 { + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES('A', 'B'); + CREATE INDEX i1 ON t1(a, b); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 100000; + } + } {wal2 100000} + + do_test 1.$tn.1 { + sql2 { + PRAGMA cache_size = 5; + } + } {} + + do_test 1.$tn.2 { + for {set ii 0} {$ii < 500} {incr ii} { + sql1 { + INSERT INTO t1 VALUES(hex(randomblob(20)), hex(randomblob(20))); + } + } + } {} + + do_test 1.$tn.3 { + list [expr [file size test.db-wal]>100000] \ + [expr [file size test.db-wal2]>100000] + } {1 1} + + do_test 1.$tn.4 { + sql2 { + BEGIN; + PRAGMA integrity_check; + } + } {ok} + + do_test 1.$tn.5 { + sql1 { PRAGMA wal_checkpoint = RESTART; } + set {} {} + } {} + + do_test 1.$tn.6 { + sql2 { + PRAGMA integrity_check; + COMMIT; + } + } {ok} + + do_test 1.$tn.7 { + sql1 { + INSERT INTO t1 VALUES(hex(randomblob(20)), hex(randomblob(20))); + } + code1 { + set ::n_handler 0 + proc handler {nTry} { + incr ::n_handler + if {$nTry>10} { + sql2 { + COMMIT; + BEGIN; + PRAGMA integrity_check; + } + } + return 0 + } + db busy handler + } + } {} + + do_test 1.$tn.8 { + sql2 { + BEGIN; + PRAGMA integrity_check; + } + } {ok} + + do_test 1.$tn.9 { + sql1 { + PRAGMA wal_checkpoint = RESTART; + } + code1 { set ::n_handler } + } {12} + + do_test 1.$tn.10 { + sql2 COMMIT + } {} +} + +finish_test +