** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: btree.c,v 1.552 2008/12/23 15:58:06 drh Exp $
+** $Id: btree.c,v 1.553 2008/12/27 15:23:13 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** number of pages the database file will contain after this
** process is complete.
*/
-static int incrVacuumStep(BtShared *pBt, Pgno nFin){
- Pgno iLastPg; /* Last page in the database */
+static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg){
Pgno nFreeList; /* Number of pages still on the free-list */
assert( sqlite3_mutex_held(pBt->mutex) );
- iLastPg = pBt->nTrunc;
- if( iLastPg==0 ){
- iLastPg = pagerPagecount(pBt);
- }
if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
int rc;
}
}
- pBt->nTrunc = iLastPg - 1;
- while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){
- pBt->nTrunc--;
+ if( nFin==0 ){
+ iLastPg--;
+ while( iLastPg==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, iLastPg) ){
+ iLastPg--;
+ }
+ sqlite3PagerTruncateImage(pBt->pPager, iLastPg);
}
return SQLITE_OK;
}
rc = SQLITE_DONE;
}else{
invalidateAllOverflowCache(pBt);
- rc = incrVacuumStep(pBt, 0);
+ rc = incrVacuumStep(pBt, 0, sqlite3PagerImageSize(pBt->pPager));
}
sqlite3BtreeLeave(p);
return rc;
** i.e. the database has been reorganized so that only the first *pnTrunc
** pages are in use.
*/
-static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){
+static int autoVacuumCommit(BtShared *pBt){
int rc = SQLITE_OK;
Pager *pPager = pBt->pPager;
VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager) );
invalidateAllOverflowCache(pBt);
assert(pBt->autoVacuum);
if( !pBt->incrVacuum ){
- Pgno nFin = 0;
-
- if( pBt->nTrunc==0 ){
- Pgno nFree;
- Pgno nPtrmap;
- const int pgsz = pBt->pageSize;
- Pgno nOrig = pagerPagecount(pBt);
-
- if( PTRMAP_ISPAGE(pBt, nOrig) ){
- return SQLITE_CORRUPT_BKPT;
- }
- if( nOrig==PENDING_BYTE_PAGE(pBt) ){
- nOrig--;
- }
- nFree = get4byte(&pBt->pPage1->aData[36]);
- nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5);
- nFin = nOrig - nFree - nPtrmap;
- if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){
- nFin--;
- }
- while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
- nFin--;
- }
+ Pgno nFin;
+ Pgno nFree;
+ Pgno nPtrmap;
+ Pgno iFree;
+ const int pgsz = pBt->pageSize;
+ Pgno nOrig = pagerPagecount(pBt);
+
+ if( PTRMAP_ISPAGE(pBt, nOrig) ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ if( nOrig==PENDING_BYTE_PAGE(pBt) ){
+ nOrig--;
+ }
+ nFree = get4byte(&pBt->pPage1->aData[36]);
+ nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5);
+ nFin = nOrig - nFree - nPtrmap;
+ if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){
+ nFin--;
+ }
+ while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
+ nFin--;
}
- while( rc==SQLITE_OK ){
- rc = incrVacuumStep(pBt, nFin);
+ for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
+ rc = incrVacuumStep(pBt, nFin, iFree);
}
- if( rc==SQLITE_DONE ){
- assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc);
+ if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){
rc = SQLITE_OK;
- if( pBt->nTrunc && nFin ){
- rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
- put4byte(&pBt->pPage1->aData[32], 0);
- put4byte(&pBt->pPage1->aData[36], 0);
- pBt->nTrunc = nFin;
- }
+ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
+ put4byte(&pBt->pPage1->aData[32], 0);
+ put4byte(&pBt->pPage1->aData[36], 0);
+ sqlite3PagerTruncateImage(pBt->pPager, nFin);
}
if( rc!=SQLITE_OK ){
sqlite3PagerRollback(pPager);
}
}
- if( rc==SQLITE_OK ){
- *pnTrunc = pBt->nTrunc;
- pBt->nTrunc = 0;
- }
assert( nRef==sqlite3PagerRefcount(pPager) );
return rc;
}
pBt->db = p->db;
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pBt->autoVacuum ){
- rc = autoVacuumCommit(pBt, &nTrunc);
+ rc = autoVacuumCommit(pBt);
if( rc!=SQLITE_OK ){
sqlite3BtreeLeave(p);
return rc;
if( p->inTrans==TRANS_WRITE ){
int rc2;
-#ifndef SQLITE_OMIT_AUTOVACUUM
- pBt->nTrunc = 0;
-#endif
-
assert( TRANS_WRITE==pBt->inTransaction );
rc2 = sqlite3PagerRollback(pBt->pPager);
if( rc2!=SQLITE_OK ){
*pPgno = nPage + 1;
#ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->nTrunc ){
- /* An incr-vacuum has already run within this transaction. So the
- ** page to allocate is not from the physical end of the file, but
- ** at pBt->nTrunc.
- */
- *pPgno = pBt->nTrunc+1;
- if( *pPgno==PENDING_BYTE_PAGE(pBt) ){
- (*pPgno)++;
- }
- }
if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
/* If *pPgno refers to a pointer-map page, allocate two new pages
** at the end of the file instead of one. The first allocated page
(*pPgno)++;
if( *pPgno==PENDING_BYTE_PAGE(pBt) ){ (*pPgno)++; }
}
- if( pBt->nTrunc ){
- pBt->nTrunc = *pPgno;
- }
#endif
assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
sCheck.nErr = 0;
sCheck.mallocFailed = 0;
*pnErr = 0;
-#ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->nTrunc!=0 ){
- sCheck.nPage = pBt->nTrunc;
- }
-#endif
if( sCheck.nPage==0 ){
unlockBtreeIfUnused(pBt);
sqlite3BtreeLeave(p);
** file simultaneously, or one process from reading the database while
** another is writing.
**
-** @(#) $Id: pager.c,v 1.523 2008/12/23 19:15:57 danielk1977 Exp $
+** @(#) $Id: pager.c,v 1.524 2008/12/27 15:23:13 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
** next successful rollback is performed on the pager cache. Also,
** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
** APIs, they may still be used successfully.
+**
+** Managing the size of the database file in pages is a little complicated.
+** The variable Pager.dbSize contains the number of pages that the database
+** image currently contains. As the database image grows or shrinks this
+** variable is updated. The variable Pager.dbFileSize contains the number
+** of pages in the database file. This may be different from Pager.dbSize
+** if some pages have been appended to the database image but not yet written
+** out from the cache to the actual file on disk. Or if the image has been
+** truncated by an incremental-vacuum operation. The Pager.dbOrigSize variable
+** contains the number of pages in the database image when the current
+** transaction was opened. The contents of all three of these variables is
+** only guaranteed to be correct if the boolean Pager.dbSizeValid is true.
*/
struct Pager {
sqlite3_vfs *pVfs; /* OS functions to use for IO */
u8 dbModified; /* True if there are any changes to the Db */
u8 changeCountDone; /* Set after incrementing the change-counter */
u8 dbSizeValid; /* Set when dbSize is correct */
+ Pgno dbSize; /* Number of pages in the database */
+ Pgno dbOrigSize; /* dbSize before the current transaction */
+ Pgno dbFileSize; /* Number of pages in the database file */
u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
int errCode; /* One of several kinds of errors */
- Pgno dbSize; /* Number of pages in the file */
- Pgno origDbSize; /* dbSize before the current change */
int nRec; /* Number of pages written to the journal */
u32 cksumInit; /* Quasi-random value added to every checksum */
int stmtNRec; /* Number of records in stmt subjournal */
#define PAGER_MAX_PGNO 2147483647
/*
-** Return false if it is necessary to write page *pPg into the sub-journal.
-** More accurately, true is returned if either:
-**
-** * No savepoints are open, or
-** * The page has been saved to the sub-journal since the most recent
-** savepoint was opened.
+** Return true if it is necessary to write page *pPg into the sub-journal.
+** A page needs to be written into the sub-journal if there exists one
+** or more open savepoints for which:
**
-** TODO: There's a bug here. To do with PagerSavepoint.nOrig. Also consider
-** the idea that the page may not be required by the outermost savepoint
-** but may be required by some earlier savepoint, due to an incremental
-** vacuum operation.
+** * The page-number is less than or equal to PagerSavepoint.nOrig, and
+** * The bit corresponding to the page-number is not set in
+** PagerSavepoint.pInSavepoint.
*/
-static int pageInSavepoint(PgHdr *pPg){
+static int subjRequiresPage(PgHdr *pPg){
+ Pgno pgno = pPg->pgno;
Pager *pPager = pPg->pPager;
- if( pPager->nSavepoint==0 ){
- return 1;
+ int i;
+ for(i=0; i<pPager->nSavepoint; i++){
+ PagerSavepoint *p = &pPager->aSavepoint[i];
+ if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
+ return 1;
+ }
}
- return sqlite3BitvecTest(
- pPager->aSavepoint[pPager->nSavepoint-1].pInSavepoint, pPg->pgno
- );
+ return 0;
}
+/*
+** Return true if the page is already in the journal file.
+*/
static int pageInJournal(PgHdr *pPg){
return sqlite3BitvecTest(pPg->pPager->pInJournal, pPg->pgno);
}
sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
/* The initial database size */
- put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
+ put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
/* The assumed sector size for this process */
put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
releaseAllSavepoint(pPager);
pPager->journalOff = 0;
pPager->journalStarted = 0;
- pPager->origDbSize = 0;
+ pPager->dbOrigSize = 0;
}
pPager->state = PAGER_UNLOCK;
}else if( pPager->state==PAGER_SYNCED ){
pPager->state = PAGER_EXCLUSIVE;
}
- pPager->origDbSize = 0;
+ pPager->dbOrigSize = 0;
pPager->setMaster = 0;
pPager->needSync = 0;
/* lruListSetFirstSynced(pPager); */
){
i64 ofst = (pgno-1)*(i64)pPager->pageSize;
rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst);
+ if( pgno>pPager->dbFileSize ){
+ pPager->dbFileSize = pgno;
+ }
}
if( pPg ){
/* No page should ever be explicitly rolled back that is in use, except
}else{
rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
}
+ if( rc==SQLITE_OK ){
+ pPager->dbFileSize = nPage;
+ }
}
}
if( rc==SQLITE_OK ){
/* Truncate the database back to the size it was before the
** savepoint being reverted was opened.
*/
- rc = pager_truncate(pPager, pSavepoint?pSavepoint->nOrig:pPager->origDbSize);
+ rc = pager_truncate(pPager, pSavepoint?pSavepoint->nOrig:pPager->dbOrigSize);
assert( pPager->state>=PAGER_SHARED );
/* Now roll back all main journal file records that occur after byte
n /= pPager->pageSize;
}
if( pPager->state!=PAGER_UNLOCK ){
- pPager->dbSize = (int)n;
+ pPager->dbSize = (Pgno)n;
+ pPager->dbFileSize = (Pgno)n;
pPager->dbSizeValid = 1;
}
}
}
/*
-** Truncate the file to the number of pages specified.
+** Truncate the file to the number of pages specified.
+**
+** Unless an IO error occurs, this function is guaranteed to modify the
+** database file itself. If an exclusive lock is not held when this function
+** is called, one is obtained before truncating the file.
*/
int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
int rc = SQLITE_OK;
sqlite3PagerPagecount(pPager, 0);
if( pPager->errCode ){
rc = pPager->errCode;
- }else if( nPage<pPager->dbSize ){
+ }else if( nPage<pPager->dbFileSize ){
rc = syncJournal(pPager);
if( rc==SQLITE_OK ){
/* Get an exclusive lock on the database before truncating. */
return rc;
}
+#ifndef SQLITE_OMIT_AUTOVACUUM
+/*
+** Truncate the in-memory database file image to nPage pages. Unlike
+** sqlite3PagerTruncate(), this function does not actually modify the
+** database file on disk. It just sets the internal state of the pager
+** object so that the truncation will be done when the current
+** transaction is committed.
+*/
+void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
+ assert( pPager->dbSizeValid );
+ assert( pPager->dbSize>=nPage );
+ pPager->dbSize = nPage;
+}
+
+/*
+** Return the current size of the database file image in pages. This
+** function differs from sqlite3PagerPagecount() in two ways:
+**
+** a) It may only be called when at least one reference to a database
+** page is held. This guarantees that the database size is already
+** known and a call to sqlite3OsFileSize() is not required.
+**
+** b) The return value is not adjusted for the locking page.
+*/
+Pgno sqlite3PagerImageSize(Pager *pPager){
+ assert( pPager->dbSizeValid );
+ return pPager->dbSize;
+}
+#endif /* ifndef SQLITE_OMIT_AUTOVACUUM */
+
/*
** Shutdown the page cache. Free all memory and close all files.
**
if( pList->pgno==1 ){
memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
}
+ if( pList->pgno>pPager->dbFileSize ){
+ pPager->dbFileSize = pList->pgno;
+ }
}
#ifndef NDEBUG
else{
rc = pPager->errCode;
goto failed_to_open_journal;
}
- pPager->origDbSize = pPager->dbSize;
+ pPager->dbOrigSize = pPager->dbSize;
rc = writeJournalHdr(pPager);
** overwritten with zeros.
*/
assert( pPager->nRec==0 );
- assert( pPager->origDbSize==0 );
+ assert( pPager->dbOrigSize==0 );
assert( pPager->pInJournal==0 );
sqlite3PagerPagecount(pPager, 0);
pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
if( !pPager->pInJournal ){
rc = SQLITE_NOMEM;
}else{
- pPager->origDbSize = pPager->dbSize;
+ pPager->dbOrigSize = pPager->dbSize;
rc = writeJournalHdr(pPager);
}
}
** to the journal then we can return right away.
*/
sqlite3PcacheMakeDirty(pPg);
- if( pageInJournal(pPg) && pageInSavepoint(pPg) ){
+ if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
pPager->dirtyCache = 1;
pPager->dbModified = 1;
}else{
** the transaction journal if it is not there already.
*/
if( !pageInJournal(pPg) && pPager->journalOpen ){
- if( pPg->pgno<=pPager->origDbSize ){
+ if( pPg->pgno<=pPager->dbOrigSize ){
u32 cksum;
char *pData2;
** the statement journal format differs from the standard journal format
** in that it omits the checksums and the header.
*/
- if( !pageInSavepoint(pPg) ){
+ if( subjRequiresPage(pPg) ){
i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
- assert( pageInJournal(pPg) || pPg->pgno>pPager->origDbSize );
+ assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
rc = write32bits(pPager->sjfd, offset, pPg->pgno);
if( rc==SQLITE_OK ){
rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
Pager *pPager = pPg->pPager;
int rc;
- if( pPg->pgno>pPager->origDbSize ){
+ if( pPg->pgno>pPager->dbOrigSize ){
return SQLITE_OK;
}
if( pPager->pAlwaysRollback==0 ){
assert( pPager->pInJournal );
- pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->origDbSize);
+ pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->dbOrigSize);
if( !pPager->pAlwaysRollback ){
return SQLITE_NOMEM;
}
if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
assert( pPager->state>=PAGER_SHARED );
- if( pPager->dbSize==pPg->pgno && pPager->origDbSize<pPager->dbSize ){
+ if( pPager->dbSize==pPg->pgno && pPager->dbOrigSize<pPager->dbSize ){
/* If this pages is the last page in the file and the file has grown
** during the current transaction, then do NOT mark the page as clean.
** When the database file grows, we must make sure that the last page
*/
if( pPager->journalOpen==0
|| sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno)
- || pPg->pgno>pPager->origDbSize
+ || pPg->pgno>pPager->dbOrigSize
){
return;
}
#ifdef SQLITE_SECURE_DELETE
if( sqlite3BitvecTest(pPager->pInJournal, pPg->pgno)!=0
- || pPg->pgno>pPager->origDbSize ){
+ || pPg->pgno>pPager->dbOrigSize ){
return;
}
#endif
** pages on the freelist (ex: corrupt9.test) then the following is not
** necessarily true:
*/
- /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
+ /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->dbOrigSize ); */
assert( pPager->pInJournal!=0 );
sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
if( isDirect && pPager->fd->pMethods ){
const void *zBuf = pPgHdr->pData;
+ assert( pPager->dbFileSize>0 );
rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
}
#endif
return SQLITE_OK;
}
- PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
- pPager->zFilename, zMaster, nTrunc);
+ PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n",
+ pPager->zFilename, zMaster, pPager->dbSize);
/* If this is an in-memory db, or no pages have been written to, or this
** function has already been called, it is a no-op.
!zMaster &&
pPager->journalOpen &&
pPager->journalOff==jrnlBufferSize(pPager) &&
- nTrunc==0 &&
+ pPager->dbSize>=pPager->dbFileSize &&
(pPg==0 || pPg->pDirty==0)
);
assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
if( rc!=SQLITE_OK ) goto sync_exit;
if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
#ifndef SQLITE_OMIT_AUTOVACUUM
- if( nTrunc!=0 ){
+ if( pPager->dbSize<pPager->dbOrigSize ){
/* If this transaction has made the database smaller, then all pages
** being discarded by the truncation must be written to the journal
** file.
*/
Pgno i;
Pgno iSkip = PAGER_MJ_PGNO(pPager);
- for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
+ Pgno dbSize = pPager->dbSize;
+ for( i=pPager->dbSize+1; i<=pPager->dbOrigSize; i++ ){
if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
rc = sqlite3PagerGet(pPager, i, &pPg);
if( rc!=SQLITE_OK ) goto sync_exit;
if( rc!=SQLITE_OK ) goto sync_exit;
}
}
+ pPager->dbSize = dbSize;
}
#endif
rc = writeMasterJournal(pPager, zMaster);
if( rc!=SQLITE_OK ) goto sync_exit;
#ifndef SQLITE_OMIT_AUTOVACUUM
- if( nTrunc!=0 ){
- rc = sqlite3PagerTruncate(pPager, nTrunc);
+ if( pPager->dbSize<pPager->dbFileSize ){
+ rc = sqlite3PagerTruncate(pPager, pPager->dbSize);
if( rc!=SQLITE_OK ) goto sync_exit;
}
#endif
IOTRACE(("DBSYNC %p\n", pPager))
pPager->state = PAGER_SYNCED;
- }else if( MEMDB && nTrunc!=0 ){
- rc = sqlite3PagerTruncate(pPager, nTrunc);
+ }else if( MEMDB && pPager->dbSize<pPager->dbFileSize ){
+ rc = sqlite3PagerTruncate(pPager, pPager->dbSize);
}
sync_exit:
*/
if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
needSyncPgno = pPg->pgno;
- assert( pageInJournal(pPg) || pPg->pgno>pPager->origDbSize );
+ assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
assert( pPg->flags&PGHDR_DIRTY );
assert( pPager->needSync );
}
assert( pPager->needSync );
rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
if( rc!=SQLITE_OK ){
- if( pPager->pInJournal && needSyncPgno<=pPager->origDbSize ){
+ if( pPager->pInJournal && needSyncPgno<=pPager->dbOrigSize ){
sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
}
return rc;