From: dan Date: Sat, 23 Feb 2013 16:40:46 +0000 (+0000) Subject: If a rollback mode transaction reduces the size of the database file, avoid actually... X-Git-Tag: version-3.7.16~34^2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bc1a3c6ce2dcb1864285b5e2a3d8ce12c075f2a5;p=thirdparty%2Fsqlite.git If a rollback mode transaction reduces the size of the database file, avoid actually truncating the file until after the transaction has been committed (but before the db has been unlocked). This means pages that are removed from the database by truncating the file need not be journalled. FossilOrigin-Name: b73847f17b7ae1298dfc52de6c4c4bc809bc77a3 --- diff --git a/manifest b/manifest index ff7dff0ba5..a4147e0d00 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sthe\sprevious\scommit. -D 2013-02-22T20:57:47.269 +C If\sa\srollback\smode\stransaction\sreduces\sthe\ssize\sof\sthe\sdatabase\sfile,\savoid\sactually\struncating\sthe\sfile\suntil\safter\sthe\stransaction\shas\sbeen\scommitted\s(but\sbefore\sthe\sdb\shas\sbeen\sunlocked).\sThis\smeans\spages\sthat\sare\sremoved\sfrom\sthe\sdatabase\sby\struncating\sthe\sfile\sneed\snot\sbe\sjournalled. +D 2013-02-23T16:40:46.526 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in a48faa9e7dd7d556d84f5456eabe5825dd8a6282 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -118,12 +118,12 @@ F src/alter.c f8db986c03eb0bfb221523fc9bbb9d0b70de3168 F src/analyze.c 7553068d21e32a57fc33ab6b2393fc8c1ba41410 F src/attach.c ea5247f240e2c08afd608e9beb380814b86655e1 F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34 -F src/backup.c 32e35a3a4ea55b45c0e5f74eeb793aec71491517 +F src/backup.c 0b8d2108a10ada00e642525921960b0c27451955 F src/bitvec.c 26675fe8e431dc555e6f2d0e11e651d172234aa1 F src/btmutex.c 976f45a12e37293e32cae0281b15a21d48a8aaa7 -F src/btree.c 76f063ffd70aba11b3f4701d216c157e1a81b51d +F src/btree.c f021a7f59fd340577c2f9b7d6e951576af335cfb F src/btree.h 3ad7964d6c5b1c7bff569aab6adfa075f8bf06cd -F src/btreeInt.h 4e5c2bd0f9b36b2a815a6d84f771a61a65830621 +F src/btreeInt.h eecc84f02375b2bb7a44abbcbbe3747dde73edb2 F src/build.c 73ca65f32938e4e0d94e831b61b5749b211b79be F src/callback.c d7e46f40c3cf53c43550b7da7a1d0479910b62cc F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac @@ -162,7 +162,7 @@ F src/os.h 027491c77d2404c0a678bb3fb06286f331eb9b57 F src/os_common.h 92815ed65f805560b66166e3583470ff94478f04 F src/os_unix.c dfdc04b126f7b05dcb2e2cc5c1262f98acbb49d9 F src/os_win.c eabd00b813577d36bd66271cb08dd64ea0589dac -F src/pager.c 4092c907222cfd451c74fe6bd2fd64b342f7190f +F src/pager.c 0dbf5ff5d5d7d3a21fcab82e9e4d129b6fe6314f F src/pager.h 1109a06578ec5574dc2c74cf8d9f69daf36fe3e0 F src/parse.y 5d5e12772845805fdfeb889163516b84fbb9ae95 F src/pcache.c f8043b433a57aba85384a531e3937a804432a346 @@ -871,7 +871,7 @@ F test/tkt3718.test 3b59dcb5c4e7754dacd91e7fd353a61492cc402a F test/tkt3731.test 0c5f4cbffe102d43c3b2188af91a9e36348f974b F test/tkt3757.test 10cd679a88675c880533083fc79ac04324525595 F test/tkt3761.test b95ea9c98f21cf91325f18a984887e62caceab33 -F test/tkt3762.test 2a9f3b03df44ec49ec0cfa8d5da6574c2a7853df +F test/tkt3762.test 4d439ff7abdc8d9323150269d182c37c2d514576 F test/tkt3773.test 7bca904d2a647a6a4a291bd86d7fd7c73855b789 F test/tkt3791.test a6624b9a80b216a26cf473607f42f3e51898c267 F test/tkt3793.test d90ffd75c52413908d15e1c44fc2ea9c80fcc449 @@ -1034,7 +1034,7 @@ F tool/vdbe-compress.tcl f12c884766bd14277f4fcedcae07078011717381 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac -P c3939d249119b47bd57baa11a5ed7cc6014fc795 -R f58494bafc2bb0d7737cf9b62d925506 +P 720a3ceafc35b81936ed2eb1f07a7187d104f0a0 +R 2fcb8a3a4c948e70b98e89248d0d9b21 U dan -Z 7eeaf98ba8f2d987bdc515914ba34037 +Z 61c42a3e0f1600d37d1de1cf170b2233 diff --git a/manifest.uuid b/manifest.uuid index 6accd36d88..5875324cc6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -720a3ceafc35b81936ed2eb1f07a7187d104f0a0 \ No newline at end of file +b73847f17b7ae1298dfc52de6c4c4bc809bc77a3 \ No newline at end of file diff --git a/src/backup.c b/src/backup.c index d3614b88c3..89e0f00e66 100644 --- a/src/backup.c +++ b/src/backup.c @@ -462,7 +462,6 @@ int sqlite3_backup_step(sqlite3_backup *p, int nPage){ nDestTruncate = nSrcPage * (pgszSrc/pgszDest); } assert( nDestTruncate>0 ); - sqlite3PagerTruncateImage(pDestPager, nDestTruncate); if( pgszSrc=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest )); - /* This call ensures that all data required to recreate the original + /* This block ensures that all data required to recreate the original ** database has been stored in the journal for pDestPager and the ** journal synced to disk. So at this point we may safely modify ** the database file in any way, knowing that if a power failure ** occurs, the original database will be reconstructed from the ** journal file. */ + sqlite3PagerPagecount(pDestPager, &nDstPage); + for(iPg=nDestTruncate; rc==SQLITE_OK && iPg<=(Pgno)nDstPage; iPg++){ + if( iPg!=PENDING_BYTE_PAGE(p->pDest->pBt) ){ + DbPage *pPg; + rc = sqlite3PagerGet(pDestPager, iPg, &pPg); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pPg); + sqlite3PagerUnref(pPg); + } + } + } rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1); /* Write the extra pages and truncate the database file as required */ @@ -519,6 +531,7 @@ int sqlite3_backup_step(sqlite3_backup *p, int nPage){ rc = sqlite3PagerSync(pDestPager); } }else{ + sqlite3PagerTruncateImage(pDestPager, nDestTruncate); rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0); } diff --git a/src/btree.c b/src/btree.c index b3a907c5ab..89f7acdb5e 100644 --- a/src/btree.c +++ b/src/btree.c @@ -2595,6 +2595,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ goto trans_begun; } + assert( pBt->bDoTruncate==0 ); /* Write transactions are not possible on a read-only database */ if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ @@ -3003,10 +3004,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ }while( bCommit && iFreePg>nFin ); assert( iFreePgpDbPage); - if( rc==SQLITE_OK ){ - rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0); - } + rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0); releasePage(pLastPg); if( rc!=SQLITE_OK ){ return rc; @@ -3015,23 +3013,10 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ } if( bCommit==0 ){ - iLastPg--; - while( iLastPg==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, iLastPg) ){ - if( PTRMAP_ISPAGE(pBt, iLastPg) ){ - MemPage *pPg; - rc = btreeGetPage(pBt, iLastPg, &pPg, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - rc = sqlite3PagerWrite(pPg->pDbPage); - releasePage(pPg); - if( rc!=SQLITE_OK ){ - return rc; - } - } + do { iLastPg--; - } - sqlite3PagerTruncateImage(pBt->pPager, iLastPg); + }while( iLastPg==PENDING_BYTE_PAGE(pBt) || PTRMAP_ISPAGE(pBt, iLastPg) ); + pBt->bDoTruncate = 1; pBt->nPage = iLastPg; } return SQLITE_OK; @@ -3141,7 +3126,7 @@ static int autoVacuumCommit(BtShared *pBt){ put4byte(&pBt->pPage1->aData[32], 0); put4byte(&pBt->pPage1->aData[36], 0); put4byte(&pBt->pPage1->aData[28], nFin); - sqlite3PagerTruncateImage(pBt->pPager, nFin); + pBt->bDoTruncate = 1; pBt->nPage = nFin; } if( rc!=SQLITE_OK ){ @@ -3196,6 +3181,9 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ return rc; } } + if( pBt->bDoTruncate ){ + sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); + } #endif rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0); sqlite3BtreeLeave(p); @@ -3211,6 +3199,9 @@ static void btreeEndTransaction(Btree *p){ BtShared *pBt = p->pBt; assert( sqlite3BtreeHoldsMutex(p) ); +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->bDoTruncate = 0; +#endif btreeClearHasContent(pBt); if( p->inTrans>TRANS_NONE && p->db->activeVdbeCnt>1 ){ /* If there are other active statements that belong to this database @@ -5121,8 +5112,26 @@ static int allocateBtreePage( pPrevTrunk = 0; }while( searchList ); }else{ - /* There are no pages on the freelist, so create a new page at the - ** end of the file */ + /* There are no pages on the freelist, so append a new page to the + ** database image. + ** + ** Normally, new pages allocated by this block can be requested from the + ** pager layer with the 'no-content' flag set. This prevents the pager + ** from trying to read the pages content from disk. However, if the + ** current transaction has already run one or more incremental-vacuum + ** steps, then the page we are about to allocate may contain content + ** that is required in the event of a rollback. In this case, do + ** not set the no-content flag. This causes the pager to load and journal + ** the current page content before overwriting it. + ** + ** Note that the pager will not actually attempt to load or journal + ** content for any page that really does lie past the end of the database + ** file on disk. So the effects of disabling the no-content optimization + ** here are confined to those pages that lie between the end of the + ** database image and the end of the database file. + */ + int bNoContent = (0==pBt->bDoTruncate); + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); if( rc ) return rc; pBt->nPage++; @@ -5137,7 +5146,7 @@ static int allocateBtreePage( MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, pBt->nPage, &pPg, 1); + rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); @@ -5151,7 +5160,7 @@ static int allocateBtreePage( *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, *pPgno, ppPage, 1); + rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ diff --git a/src/btreeInt.h b/src/btreeInt.h index b157decec7..ce3c5493f8 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -411,6 +411,7 @@ struct BtShared { #ifndef SQLITE_OMIT_AUTOVACUUM u8 autoVacuum; /* True if auto-vacuum is enabled */ u8 incrVacuum; /* True if incr-vacuum is enabled */ + u8 bDoTruncate; /* True to truncate db on commit */ #endif u8 inTransaction; /* Transaction state */ u8 max1bytePayload; /* Maximum first byte of cell for a 1-byte payload */ diff --git a/src/pager.c b/src/pager.c index 5879cf760a..863368f591 100644 --- a/src/pager.c +++ b/src/pager.c @@ -1838,6 +1838,8 @@ static int pager_error(Pager *pPager, int rc){ return rc; } +static int pager_truncate(Pager *pPager, Pgno nPage); + /* ** This routine ends a transaction. A transaction is usually ended by ** either a COMMIT or a ROLLBACK operation. This routine may be called @@ -1891,7 +1893,7 @@ static int pager_error(Pager *pPager, int rc){ ** to the first error encountered (the journal finalization one) is ** returned. */ -static int pager_end_transaction(Pager *pPager, int hasMaster){ +static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){ int rc = SQLITE_OK; /* Error code from journal finalization operation */ int rc2 = SQLITE_OK; /* Error code from db file unlock operation */ @@ -1977,7 +1979,17 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ */ rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); assert( rc2==SQLITE_OK ); + }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){ + /* This branch is taken when committing a transaction in rollback-journal + ** mode if the database file on disk is larger than the database image. + ** At this point the journal has been finalized and the transaction + ** successfully committed, but the EXCLUSIVE lock is still held on the + ** file. So it is safe to truncate the database file to its minimum + ** required size. */ + assert( pPager->eLock==EXCLUSIVE_LOCK ); + rc = pager_truncate(pPager, pPager->dbSize); } + if( !pPager->exclusiveMode && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) ){ @@ -2016,7 +2028,7 @@ static void pagerUnlockAndRollback(Pager *pPager){ sqlite3EndBenignMalloc(); }else if( !pPager->exclusiveMode ){ assert( pPager->eState==PAGER_READER ); - pager_end_transaction(pPager, 0); + pager_end_transaction(pPager, 0, 0); } } pager_unlock(pPager); @@ -2791,7 +2803,7 @@ end_playback: rc = sqlite3PagerSync(pPager); } if( rc==SQLITE_OK ){ - rc = pager_end_transaction(pPager, zMaster[0]!='\0'); + rc = pager_end_transaction(pPager, zMaster[0]!='\0', 0); testcase( rc!=SQLITE_OK ); } if( rc==SQLITE_OK && zMaster[0] && res ){ @@ -5885,36 +5897,6 @@ int sqlite3PagerCommitPhaseOne( #endif if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - /* If this transaction has made the database smaller, then all pages - ** being discarded by the truncation must be written to the journal - ** file. - ** - ** Before reading the pages with page numbers larger than the - ** current value of Pager.dbSize, set dbSize back to the value - ** that it took at the start of the transaction. Otherwise, the - ** calls to sqlite3PagerGet() return zeroed pages instead of - ** reading data from the database file. - */ - if( pPager->dbSizedbOrigSize - && pPager->journalMode!=PAGER_JOURNALMODE_OFF - ){ - Pgno i; /* Iterator variable */ - const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ - const Pgno dbSize = pPager->dbSize; /* Database image size */ - pPager->dbSize = pPager->dbOrigSize; - for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ - if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ - PgHdr *pPage; /* Page to journal */ - rc = sqlite3PagerGet(pPager, i, &pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - rc = sqlite3PagerWrite(pPage); - sqlite3PagerUnref(pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - } - } - pPager->dbSize = dbSize; - } - /* Write the master journal name into the journal file. If a master ** journal file name has already been written to the journal file, ** or if zMaster is NULL (no master journal), then this call is a no-op. @@ -5942,11 +5924,14 @@ int sqlite3PagerCommitPhaseOne( goto commit_phase_one_exit; } sqlite3PcacheCleanAll(pPager->pPCache); - - /* If the file on disk is not the same size as the database image, - ** then use pager_truncate to grow or shrink the file here. - */ - if( pPager->dbSize!=pPager->dbFileSize ){ + + /* If the file on disk is smaller than the database image, use + ** pager_truncate to grow the file here. This can happen if the database + ** image was extended as part of the current transaction and then the + ** last page in the db image moved to the free-list. In this case the + ** last page is never written out to disk, leaving the database file + ** undersized. Fix this now if it is the case. */ + if( pPager->dbSize>pPager->dbFileSize ){ Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); assert( pPager->eState==PAGER_WRITER_DBMOD ); rc = pager_truncate(pPager, nNew); @@ -6019,7 +6004,7 @@ int sqlite3PagerCommitPhaseTwo(Pager *pPager){ } PAGERTRACE(("COMMIT %d\n", PAGERID(pPager))); - rc = pager_end_transaction(pPager, pPager->setMaster); + rc = pager_end_transaction(pPager, pPager->setMaster, 1); return pager_error(pPager, rc); } @@ -6064,11 +6049,11 @@ int sqlite3PagerRollback(Pager *pPager){ if( pagerUseWal(pPager) ){ int rc2; rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); - rc2 = pager_end_transaction(pPager, pPager->setMaster); + rc2 = pager_end_transaction(pPager, pPager->setMaster, 0); if( rc==SQLITE_OK ) rc = rc2; }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){ int eState = pPager->eState; - rc = pager_end_transaction(pPager, 0); + rc = pager_end_transaction(pPager, 0, 0); if( !MEMDB && eState>PAGER_WRITER_LOCKED ){ /* This can happen using journal_mode=off. Move the pager to the error ** state to indicate that the contents of the cache may not be trusted. diff --git a/test/tkt3762.test b/test/tkt3762.test index c24041c96e..424b1e6487 100644 --- a/test/tkt3762.test +++ b/test/tkt3762.test @@ -10,8 +10,8 @@ #*********************************************************************** # # Ticket #3762: Make sure that an incremental vacuum that reduces the -# size of the database file such that a pointer-map page is elemented -# can be correctly rolled back. +# size of the database file such that if a pointer-map page is eliminated +# it can be correctly rolled back. # # That ticket #3762 has been fixed has already been verified by the # savepoint6.test test script. But this script is simplier and a