** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: btree.c,v 1.346 2007/03/30 11:12:08 drh Exp $
+** $Id: btree.c,v 1.347 2007/03/30 14:06:34 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
}
#endif
+/*
+** This routine does the first phase of a two-phase commit. This routine
+** causes a rollback journal to be created (if it does not already exist)
+** and populated with enough information so that if a power loss occurs
+** the database can be restored to its original state by playing back
+** the journal. Then the contents of the journal are flushed out to
+** the disk. After the journal is safely on oxide, the changes to the
+** database are written into the database file and flushed to oxide.
+** At the end of this call, the rollback journal still exists on the
+** disk and we are still holding all locks, so the transaction has not
+** committed. See sqlite3BtreeCommit() for the second phase of the
+** commit process.
+**
+** This call is a no-op if no write-transaction is currently active on pBt.
+**
+** Otherwise, sync the database file for the btree pBt. zMaster points to
+** the name of a master journal file that should be written into the
+** individual journal file, or is NULL, indicating no master journal file
+** (single database transaction).
+**
+** When this is called, the master journal should already have been
+** created, populated with this journal pointer and synced to disk.
+**
+** Once this is routine has returned, the only thing required to commit
+** the write-transaction for this database file is to delete the journal.
+*/
+int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){
+ int rc = SQLITE_OK;
+ if( p->inTrans==TRANS_WRITE ){
+ BtShared *pBt = p->pBt;
+ Pgno nTrunc = 0;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+ if( pBt->autoVacuum ){
+ rc = autoVacuumCommit(pBt, &nTrunc);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+ }
+#endif
+ rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc);
+ }
+ return rc;
+}
+
/*
** Commit the transaction currently in progress.
**
** This will release the write lock on the database file. If there
** are no active cursors, it also releases the read lock.
*/
-int sqlite3BtreeCommit(Btree *p){
+int sqlite3BtreeCommitPhaseTwo(Btree *p){
BtShared *pBt = p->pBt;
btreeIntegrity(p);
int rc;
assert( pBt->inTransaction==TRANS_WRITE );
assert( pBt->nTransaction>0 );
- rc = sqlite3PagerCommit(pBt->pPager);
+ rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
if( rc!=SQLITE_OK ){
return rc;
}
return SQLITE_OK;
}
+/*
+** Do both phases of a commit.
+*/
+int sqlite3BtreeCommit(Btree *p){
+ int rc;
+ rc = sqlite3BtreeCommitPhaseOne(p, 0);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3BtreeCommitPhaseTwo(p);
+ }
+ return rc;
+}
+
#ifndef NDEBUG
/*
** Return the number of write-cursors open on this handle. This is for use
return (p && (p->inTrans!=TRANS_NONE));
}
-/*
-** This routine does the first phase of a 2-phase commit. This routine
-** causes a rollback journal to be created (if it does not already exist)
-** and populated with enough information so that if a power loss occurs
-** the database can be restored to its original state by playing back
-** the journal. Then the contents of the journal are flushed out to
-** the disk. After the journal is safely on oxide, the changes to the
-** database are written into the database file and flushed to oxide.
-** At the end of this call, the rollback journal still exists on the
-** disk and we are still holding all locks, so the transaction has not
-** committed. See sqlite3BtreeCommit() for the second phase of the
-** commit process.
-**
-** This call is a no-op if no write-transaction is currently active on pBt.
-**
-** Otherwise, sync the database file for the btree pBt. zMaster points to
-** the name of a master journal file that should be written into the
-** individual journal file, or is NULL, indicating no master journal file
-** (single database transaction).
-**
-** When this is called, the master journal should already have been
-** created, populated with this journal pointer and synced to disk.
-**
-** Once this is routine has returned, the only thing required to commit
-** the write-transaction for this database file is to delete the journal.
-*/
-int sqlite3BtreeSync(Btree *p, const char *zMaster){
- int rc = SQLITE_OK;
- if( p->inTrans==TRANS_WRITE ){
- BtShared *pBt = p->pBt;
- Pgno nTrunc = 0;
-#ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- rc = autoVacuumCommit(pBt, &nTrunc);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- }
-#endif
- rc = sqlite3PagerSync(pBt->pPager, zMaster, nTrunc);
- }
- return rc;
-}
-
/*
** This function returns a pointer to a blob of memory associated with
** a single shared-btree. The memory is used by client code for it's own
** file simultaneously, or one process from reading the database while
** another is writing.
**
-** @(#) $Id: pager.c,v 1.306 2007/03/29 18:19:52 drh Exp $
+** @(#) $Id: pager.c,v 1.307 2007/03/30 14:06:34 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
u8 inStmt; /* TRUE if in the statement subjournal */
u8 dirty; /* TRUE if we need to write back changes */
u8 needSync; /* Sync journal before writing this page */
- u8 alwaysRollback; /* Disable dont_rollback() for this page */
+ u8 alwaysRollback; /* Disable DontRollback() for this page */
short int nRef; /* Number of users of this page */
PgHdr *pDirty, *pPrevDirty; /* Dirty pages */
u32 notUsed; /* Buffer space */
u8 readOnly; /* True for a read-only database */
u8 needSync; /* True if an fsync() is needed on the journal */
u8 dirtyCache; /* True if cached pages have changed */
- u8 alwaysRollback; /* Disable dont_rollback() for all pages */
+ u8 alwaysRollback; /* Disable DontRollback() for all pages */
u8 memDb; /* True to inhibit all file I/O */
u8 setMaster; /* True if a m-j name has been written to jrnl */
+ u8 doNotSync; /* Boolean. While true, do not spill the cache */
+ u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
+ u8 changeCountDone; /* Set after incrementing the change-counter */
int errCode; /* One of several kinds of errors */
int dbSize; /* Number of pages in the file */
int origDbSize; /* dbSize before the current change */
#endif
char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
u32 iChangeCount; /* Db change-counter for which cache is valid */
- u8 doNotSync; /* Boolean. While true, do not spill the cache */
- u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
- u8 changeCountDone; /* Set after incrementing the change-counter */
};
/*
}
/*
+** This routine ends a transaction. A transaction is ended by either
+** a COMMIT or a ROLLBACK.
+**
** When this routine is called, the pager has the journal file open and
-** a RESERVED or EXCLUSIVE lock on the database. This routine releases
-** the database lock and acquires a SHARED lock in its place. The journal
-** file is deleted and closed.
+** a RESERVED or EXCLUSIVE lock on the database. This routine will release
+** the database lock and acquires a SHARED lock in its place if that is
+** the appropriate thing to do. Release locks usually is appropriate,
+** unless we are in exclusive access mode or unless this is a
+** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
+**
+** The journal file is either deleted or truncated.
**
** TODO: Consider keeping the journal file open for temporary databases.
** This might give a performance improvement on windows where opening
** a file is an expensive operation.
*/
-static int pager_unwritelock(Pager *pPager){
+static int pager_end_transaction(Pager *pPager){
PgHdr *pPg;
int rc = SQLITE_OK;
int rc2 = SQLITE_OK;
end_playback:
if( rc==SQLITE_OK ){
- rc = pager_unwritelock(pPager);
+ rc = pager_end_transaction(pPager);
}
if( zMaster ){
/* If there was a master journal and this routine will return success,
/* If the page we are recycling is marked as alwaysRollback, then
** set the global alwaysRollback flag, thus disabling the
- ** sqlite_dont_rollback() optimization for the rest of this transaction.
+ ** sqlite3PagerDontRollback() optimization for the rest of this transaction.
** It is necessary to do this because the page marked alwaysRollback
** might be reloaded at a later time but at that point we won't remember
** that is was marked alwaysRollback. This means that all pages must
rc = sqlite3PagerStmtBegin(pPager);
}
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
- rc = pager_unwritelock(pPager);
+ rc = pager_end_transaction(pPager);
if( rc==SQLITE_OK ){
rc = SQLITE_FULL;
}
** Acquire a write-lock on the database. The lock is removed when
** the any of the following happen:
**
-** * sqlite3PagerCommit() is called.
+** * sqlite3PagerCommitPhaseTwo() is called.
** * sqlite3PagerRollback() is called.
** * sqlite3PagerClose() is called.
** * sqlite3PagerUnref() is called to on every outstanding page.
** a transaction then removed from the freelist during a later part
** of the same transaction and reused for some other purpose. When it
** is first added to the freelist, this routine is called. When reused,
-** the dont_rollback() routine is called. But because the page contains
-** critical data, we still need to be sure it gets rolled back in spite
-** of the dont_rollback() call.
+** the sqlite3PagerDontRollback() routine is called. But because the
+** page contains critical data, we still need to be sure it gets
+** rolled back in spite of the sqlite3PagerDontRollback() call.
*/
void sqlite3PagerDontWrite(Pager *pPager, Pgno pgno){
PgHdr *pPg;
}
+/*
+** This routine is called to increment the database file change-counter,
+** stored at byte 24 of the pager file.
+*/
+static int pager_incr_changecounter(Pager *pPager){
+ PgHdr *pPgHdr;
+ u32 change_counter;
+ int rc;
+
+ if( !pPager->changeCountDone ){
+ /* Open page 1 of the file for writing. */
+ rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
+ if( rc!=SQLITE_OK ) return rc;
+ rc = sqlite3PagerWrite(pPgHdr);
+ if( rc!=SQLITE_OK ) return rc;
+
+ /* Read the current value at byte 24. */
+ change_counter = retrieve32bits(pPgHdr, 24);
+
+ /* Increment the value just read and write it back to byte 24. */
+ change_counter++;
+ put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
+ pPager->iChangeCount = change_counter;
+
+ /* Release the page reference. */
+ sqlite3PagerUnref(pPgHdr);
+ pPager->changeCountDone = 1;
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Sync the database file for the pager pPager. zMaster points to the name
+** of a master journal file that should be written into the individual
+** journal file. zMaster may be NULL, which is interpreted as no master
+** journal (a single database transaction).
+**
+** This routine ensures that the journal is synced, all dirty pages written
+** to the database file and the database file synced. The only thing that
+** remains to commit the transaction is to delete the journal file (or
+** master journal file if specified).
+**
+** Note that if zMaster==NULL, this does not overwrite a previous value
+** passed to an sqlite3PagerCommitPhaseOne() call.
+**
+** If parameter nTrunc is non-zero, then the pager file is truncated to
+** nTrunc pages (this is used by auto-vacuum databases).
+*/
+int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
+ int rc = SQLITE_OK;
+
+ PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
+ pPager->zFilename, zMaster, nTrunc);
+
+ /* If this is an in-memory db, or no pages have been written to, or this
+ ** function has already been called, it is a no-op.
+ */
+ if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
+ PgHdr *pPg;
+ assert( pPager->journalOpen );
+
+ /* If a master journal file name has already been written to the
+ ** journal file, then no sync is required. This happens when it is
+ ** written, then the process fails to upgrade from a RESERVED to an
+ ** EXCLUSIVE lock. The next time the process tries to commit the
+ ** transaction the m-j name will have already been written.
+ */
+ if( !pPager->setMaster ){
+ rc = pager_incr_changecounter(pPager);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+ if( nTrunc!=0 ){
+ /* If this transaction has made the database smaller, then all pages
+ ** being discarded by the truncation must be written to the journal
+ ** file.
+ */
+ Pgno i;
+ int iSkip = PAGER_MJ_PGNO(pPager);
+ for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
+ if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
+ rc = sqlite3PagerGet(pPager, i, &pPg);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ rc = sqlite3PagerWrite(pPg);
+ sqlite3PagerUnref(pPg);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ }
+ }
+ }
+#endif
+ rc = writeMasterJournal(pPager, zMaster);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ rc = syncJournal(pPager);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ }
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+ if( nTrunc!=0 ){
+ rc = sqlite3PagerTruncate(pPager, nTrunc);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ }
+#endif
+
+ /* Write all dirty pages to the database file */
+ pPg = pager_get_all_dirty_pages(pPager);
+ rc = pager_write_pagelist(pPg);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+
+ /* Sync the database file. */
+ if( !pPager->noSync ){
+ rc = sqlite3OsSync(pPager->fd, 0);
+ }
+ IOTRACE(("DBSYNC %p\n", pPager))
+
+ pPager->state = PAGER_SYNCED;
+ }else if( MEMDB && nTrunc!=0 ){
+ rc = sqlite3PagerTruncate(pPager, nTrunc);
+ }
+
+sync_exit:
+ return rc;
+}
+
+
/*
** Commit all changes to the database and release the write lock.
**
** and an error code is returned. If the commit worked, SQLITE_OK
** is returned.
*/
-int sqlite3PagerCommit(Pager *pPager){
+int sqlite3PagerCommitPhaseTwo(Pager *pPager){
int rc;
PgHdr *pPg;
/* Exit early (without doing the time-consuming sqlite3OsSync() calls)
** if there have been no changes to the database file. */
assert( pPager->needSync==0 );
- rc = pager_unwritelock(pPager);
+ rc = pager_end_transaction(pPager);
}else{
assert( pPager->journalOpen );
- rc = sqlite3PagerSync(pPager, 0, 0);
+ rc = sqlite3PagerCommitPhaseOne(pPager, 0, 0);
if( rc==SQLITE_OK ){
- rc = pager_unwritelock(pPager);
+ rc = pager_end_transaction(pPager);
}
}
return pager_error(pPager, rc);
}
if( !pPager->dirtyCache || !pPager->journalOpen ){
- rc = pager_unwritelock(pPager);
+ rc = pager_end_transaction(pPager);
return rc;
}
if( pPager->state==PAGER_RESERVED ){
int rc2;
rc = pager_playback(pPager, 0);
- rc2 = pager_unwritelock(pPager);
+ rc2 = pager_end_transaction(pPager);
if( rc==SQLITE_OK ){
rc = rc2;
}
pPager->pCodecArg = pCodecArg;
}
-/*
-** This routine is called to increment the database file change-counter,
-** stored at byte 24 of the pager file.
-*/
-static int pager_incr_changecounter(Pager *pPager){
- PgHdr *pPgHdr;
- u32 change_counter;
- int rc;
-
- if( !pPager->changeCountDone ){
- /* Open page 1 of the file for writing. */
- rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
- if( rc!=SQLITE_OK ) return rc;
- rc = sqlite3PagerWrite(pPgHdr);
- if( rc!=SQLITE_OK ) return rc;
-
- /* Read the current value at byte 24. */
- change_counter = retrieve32bits(pPgHdr, 24);
-
- /* Increment the value just read and write it back to byte 24. */
- change_counter++;
- put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
- pPager->iChangeCount = change_counter;
-
- /* Release the page reference. */
- sqlite3PagerUnref(pPgHdr);
- pPager->changeCountDone = 1;
- }
- return SQLITE_OK;
-}
-
-/*
-** Sync the database file for the pager pPager. zMaster points to the name
-** of a master journal file that should be written into the individual
-** journal file. zMaster may be NULL, which is interpreted as no master
-** journal (a single database transaction).
-**
-** This routine ensures that the journal is synced, all dirty pages written
-** to the database file and the database file synced. The only thing that
-** remains to commit the transaction is to delete the journal file (or
-** master journal file if specified).
-**
-** Note that if zMaster==NULL, this does not overwrite a previous value
-** passed to an sqlite3PagerSync() call.
-**
-** If parameter nTrunc is non-zero, then the pager file is truncated to
-** nTrunc pages (this is used by auto-vacuum databases).
-*/
-int sqlite3PagerSync(Pager *pPager, const char *zMaster, Pgno nTrunc){
- int rc = SQLITE_OK;
-
- PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
- pPager->zFilename, zMaster, nTrunc);
-
- /* If this is an in-memory db, or no pages have been written to, or this
- ** function has already been called, it is a no-op.
- */
- if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
- PgHdr *pPg;
- assert( pPager->journalOpen );
-
- /* If a master journal file name has already been written to the
- ** journal file, then no sync is required. This happens when it is
- ** written, then the process fails to upgrade from a RESERVED to an
- ** EXCLUSIVE lock. The next time the process tries to commit the
- ** transaction the m-j name will have already been written.
- */
- if( !pPager->setMaster ){
- rc = pager_incr_changecounter(pPager);
- if( rc!=SQLITE_OK ) goto sync_exit;
-#ifndef SQLITE_OMIT_AUTOVACUUM
- if( nTrunc!=0 ){
- /* If this transaction has made the database smaller, then all pages
- ** being discarded by the truncation must be written to the journal
- ** file.
- */
- Pgno i;
- int iSkip = PAGER_MJ_PGNO(pPager);
- for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
- if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
- rc = sqlite3PagerGet(pPager, i, &pPg);
- if( rc!=SQLITE_OK ) goto sync_exit;
- rc = sqlite3PagerWrite(pPg);
- sqlite3PagerUnref(pPg);
- if( rc!=SQLITE_OK ) goto sync_exit;
- }
- }
- }
-#endif
- rc = writeMasterJournal(pPager, zMaster);
- if( rc!=SQLITE_OK ) goto sync_exit;
- rc = syncJournal(pPager);
- if( rc!=SQLITE_OK ) goto sync_exit;
- }
-
-#ifndef SQLITE_OMIT_AUTOVACUUM
- if( nTrunc!=0 ){
- rc = sqlite3PagerTruncate(pPager, nTrunc);
- if( rc!=SQLITE_OK ) goto sync_exit;
- }
-#endif
-
- /* Write all dirty pages to the database file */
- pPg = pager_get_all_dirty_pages(pPager);
- rc = pager_write_pagelist(pPg);
- if( rc!=SQLITE_OK ) goto sync_exit;
-
- /* Sync the database file. */
- if( !pPager->noSync ){
- rc = sqlite3OsSync(pPager->fd, 0);
- }
- IOTRACE(("DBSYNC %p\n", pPager))
-
- pPager->state = PAGER_SYNCED;
- }else if( MEMDB && nTrunc!=0 ){
- rc = sqlite3PagerTruncate(pPager, nTrunc);
- }
-
-sync_exit:
- return rc;
-}
-
#ifndef SQLITE_OMIT_AUTOVACUUM
/*
** Move the page identified by pData to location pgno in the file.
for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
Btree *pBt = db->aDb[i].pBt;
if( pBt ){
- rc = sqlite3BtreeSync(pBt, 0);
+ rc = sqlite3BtreeCommitPhaseOne(pBt, 0);
}
}
- /* Do the commit only if all databases successfully synced.
- ** If one of the BtreeCommit() calls fails, this indicates an IO error
- ** while deleting or truncating a journal file. It is unlikely, but
- ** could happen. In this case abandon processing and return the error.
+ /* Do the commit only if all databases successfully complete phase 1.
+ ** If one of the BtreeCommitPhaseOne() calls fails, this indicates an
+ ** IO error while deleting or truncating a journal file. It is unlikely,
+ ** but could happen. In this case abandon processing and return the error.
*/
for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
Btree *pBt = db->aDb[i].pBt;
if( pBt ){
- rc = sqlite3BtreeCommit(pBt);
+ rc = sqlite3BtreeCommitPhaseTwo(pBt);
}
}
if( rc==SQLITE_OK ){
** sets the master journal pointer in each individual journal. If
** an error occurs here, do not delete the master journal file.
**
- ** If the error occurs during the first call to sqlite3BtreeSync(),
- ** then there is a chance that the master journal file will be
- ** orphaned. But we cannot delete it, in case the master journal
- ** file name was written into the journal file before the failure
- ** occured.
+ ** If the error occurs during the first call to
+ ** sqlite3BtreeCommitPhaseOne(), then there is a chance that the
+ ** master journal file will be orphaned. But we cannot delete it,
+ ** in case the master journal file name was written into the journal
+ ** file before the failure occured.
*/
for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
Btree *pBt = db->aDb[i].pBt;
if( pBt && sqlite3BtreeIsInTrans(pBt) ){
- rc = sqlite3BtreeSync(pBt, zMaster);
+ rc = sqlite3BtreeCommitPhaseOne(pBt, zMaster);
}
}
sqlite3OsClose(&master);
}
/* All files and directories have already been synced, so the following
- ** calls to sqlite3BtreeCommit() are only closing files and deleting
- ** journals. If something goes wrong while this is happening we don't
- ** really care. The integrity of the transaction is already guaranteed,
- ** but some stray 'cold' journals may be lying around. Returning an
- ** error code won't help matters.
+ ** calls to sqlite3BtreeCommitPhaseTwo() are only closing files and
+ ** deleting or truncating journals. If something goes wrong while
+ ** this is happening we don't really care. The integrity of the
+ ** transaction is already guaranteed, but some stray 'cold' journals
+ ** may be lying around. Returning an error code won't help matters.
*/
disable_simulated_io_errors();
for(i=0; i<db->nDb; i++){
Btree *pBt = db->aDb[i].pBt;
if( pBt ){
- sqlite3BtreeCommit(pBt);
+ sqlite3BtreeCommitPhaseTwo(pBt);
}
}
enable_simulated_io_errors();