){
int rc;
id->dirfd = -1;
- id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
- if( id->fd<0 ){
- id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
- if( id->fd<0 ){
+ id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
+ if( id->h<0 ){
+ id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
+ if( id->h<0 ){
return SQLITE_CANTOPEN;
}
*pReadonly = 1;
*pReadonly = 0;
}
sqlite3OsEnterMutex();
- rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
+ rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
sqlite3OsLeaveMutex();
if( rc ){
- close(id->fd);
+ close(id->h);
return SQLITE_NOMEM;
}
id->locktype = 0;
- TRACE3("OPEN %-3d %s\n", id->fd, zFilename);
+ TRACE3("OPEN %-3d %s\n", id->h, zFilename);
OpenCounter(+1);
return SQLITE_OK;
}
return SQLITE_CANTOPEN;
}
id->dirfd = -1;
- id->fd = open(zFilename,
+ id->h = open(zFilename,
O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600);
- if( id->fd<0 ){
+ if( id->h<0 ){
return SQLITE_CANTOPEN;
}
sqlite3OsEnterMutex();
- rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
+ rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
sqlite3OsLeaveMutex();
if( rc ){
- close(id->fd);
+ close(id->h);
unlink(zFilename);
return SQLITE_NOMEM;
}
if( delFlag ){
unlink(zFilename);
}
- TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
+ TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename);
OpenCounter(+1);
return SQLITE_OK;
}
int sqlite3OsOpenReadOnly(const char *zFilename, OsFile *id){
int rc;
id->dirfd = -1;
- id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
- if( id->fd<0 ){
+ id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
+ if( id->h<0 ){
return SQLITE_CANTOPEN;
}
sqlite3OsEnterMutex();
- rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
+ rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
sqlite3OsLeaveMutex();
if( rc ){
- close(id->fd);
+ close(id->h);
return SQLITE_NOMEM;
}
id->locktype = 0;
- TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
+ TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename);
OpenCounter(+1);
return SQLITE_OK;
}
const char *zDirname,
OsFile *id
){
- if( id->fd<0 ){
+ if( id->h<0 ){
/* Do not open the directory if the corresponding file is not already
** open. */
return SQLITE_CANTOPEN;
** Close a file.
*/
int sqlite3OsClose(OsFile *id){
- sqlite3OsUnlock(id);
+ sqlite3OsUnlock(id, NO_LOCK);
if( id->dirfd>=0 ) close(id->dirfd);
id->dirfd = -1;
sqlite3OsEnterMutex();
/* If a malloc fails, just leak the file descriptor */
}else{
pOpen->aPending = aNew;
- pOpen->aPending[pOpen->nPending-1] = id->fd;
+ pOpen->aPending[pOpen->nPending-1] = id->h;
}
}else{
/* There are no outstanding locks so we can close the file immediately */
- close(id->fd);
+ close(id->h);
}
releaseLockInfo(id->pLock);
releaseOpenCnt(id->pOpen);
sqlite3OsLeaveMutex();
- TRACE2("CLOSE %-3d\n", id->fd);
+ TRACE2("CLOSE %-3d\n", id->h);
OpenCounter(-1);
return SQLITE_OK;
}
int got;
SimulateIOError(SQLITE_IOERR);
TIMER_START;
- got = read(id->fd, pBuf, amt);
+ got = read(id->h, pBuf, amt);
TIMER_END;
- TRACE4("READ %-3d %7d %d\n", id->fd, last_page, elapse);
+ TRACE4("READ %-3d %7d %d\n", id->h, last_page, elapse);
SEEK(0);
/* if( got<0 ) got = 0; */
if( got==amt ){
int wrote = 0;
SimulateIOError(SQLITE_IOERR);
TIMER_START;
- while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
+ while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
amt -= wrote;
pBuf = &((char*)pBuf)[wrote];
}
TIMER_END;
- TRACE4("WRITE %-3d %7d %d\n", id->fd, last_page, elapse);
+ TRACE4("WRITE %-3d %7d %d\n", id->h, last_page, elapse);
SEEK(0);
if( amt>0 ){
return SQLITE_FULL;
*/
int sqlite3OsSeek(OsFile *id, off_t offset){
SEEK(offset/1024 + 1);
- lseek(id->fd, offset, SEEK_SET);
+ lseek(id->h, offset, SEEK_SET);
return SQLITE_OK;
}
*/
int sqlite3OsSync(OsFile *id){
SimulateIOError(SQLITE_IOERR);
- TRACE2("SYNC %-3d\n", id->fd);
- if( fsync(id->fd) ){
+ TRACE2("SYNC %-3d\n", id->h);
+ if( fsync(id->h) ){
return SQLITE_IOERR;
}else{
if( id->dirfd>=0 ){
*/
int sqlite3OsTruncate(OsFile *id, off_t nByte){
SimulateIOError(SQLITE_IOERR);
- return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
+ return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
}
/*
int sqlite3OsFileSize(OsFile *id, off_t *pSize){
struct stat buf;
SimulateIOError(SQLITE_IOERR);
- if( fstat(id->fd, &buf)!=0 ){
+ if( fstat(id->h, &buf)!=0 ){
return SQLITE_IOERR;
}
*pSize = buf.st_size;
** non-zero. If the file is unlocked or holds only SHARED locks, then
** return zero.
*/
-int sqlite3OsCheckWriteLock(OsFile *id){
+int sqlite3OsCheckReservedLock(OsFile *id){
int r = 0;
sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
lock.l_start = RESERVED_BYTE;
lock.l_len = 1;
lock.l_type = F_WRLCK;
- fcntl(id->fd, F_GETLK, &lock);
+ fcntl(id->h, F_GETLK, &lock);
if( lock.l_type!=F_UNLCK ){
r = 1;
}
}
sqlite3OsLeaveMutex();
- TRACE3("TEST WR-LOCK %d %d\n", id->fd, r);
+ TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
return r;
}
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
-** This routine will only increase a lock. The sqlite3OsUnlock() routine
-** erases all locks at once and returns us immediately to locking level 0.
-** It is not possible to lower the locking level one step at a time. You
-** must go straight to locking level 0.
+** This routine will only increase a lock. Use the sqlite3OsUnlock()
+** routine to lower a locking level.
*/
int sqlite3OsLock(OsFile *id, int locktype){
int rc = SQLITE_OK;
struct flock lock;
int s;
- TRACE5("LOCK %d %d was %d(%d)\n",
- id->fd, locktype, id->locktype, pLock->locktype);
+ TRACE6("LOCK %d %d was %d(%d,%d)\n",
+ id->h, locktype, id->locktype, pLock->locktype, pLock->cnt);
/* If there is already a lock of this type or more restrictive on the
** OsFile, do nothing. Don't use the end_lock: exit path, as
*/
lock.l_type = F_RDLCK;
lock.l_start = PENDING_BYTE;
- s = fcntl(id->fd, F_SETLK, &lock);
+ s = fcntl(id->h, F_SETLK, &lock);
if( s ){
rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
goto end_lock;
/* Now get the read-lock */
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
- s = fcntl(id->fd, F_SETLK, &lock);
+ s = fcntl(id->h, F_SETLK, &lock);
/* Drop the temporary PENDING lock */
lock.l_start = PENDING_BYTE;
lock.l_len = 1L;
lock.l_type = F_UNLCK;
- fcntl(id->fd, F_SETLK, &lock);
+ fcntl(id->h, F_SETLK, &lock);
if( s ){
rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
}else{
default:
assert(0);
}
- s = fcntl(id->fd, F_SETLK, &lock);
+ s = fcntl(id->h, F_SETLK, &lock);
if( s ){
rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
}
end_lock:
sqlite3OsLeaveMutex();
- TRACE4("LOCK %d %d %s\n", id->fd, locktype, rc==SQLITE_OK ? "ok" : "failed");
+ TRACE4("LOCK %d %d %s\n", id->h, locktype, rc==SQLITE_OK ? "ok" : "failed");
return rc;
}
/*
-** Unlock the given file descriptor. If the file descriptor was
-** not previously locked, then this routine is a no-op. If this
-** library was compiled with large file support (LFS) but LFS is not
-** available on the host, then an SQLITE_NOLFS is returned.
+** Lower the locking level on file descriptor id to locktype. locktype
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+**
+** It is not possible for this routine to fail.
*/
-int sqlite3OsUnlock(OsFile *id){
- int rc;
- if( !id->locktype ) return SQLITE_OK;
- id->locktype = 0;
+int sqlite3OsUnlock(OsFile *id, int locktype){
+ struct lockInfo *pLock;
+ struct flock lock;
+
+ TRACE6("UNLOCK %d %d was %d(%d,%d)\n",
+ id->h, locktype, id->locktype, id->pLock->locktype, id->pLock->cnt);
+
+ assert( locktype<=SHARED_LOCK );
+ if( id->locktype<=locktype ){
+ return SQLITE_OK;
+ }
sqlite3OsEnterMutex();
- assert( id->pLock->cnt!=0 );
- if( id->pLock->cnt>1 ){
- id->pLock->cnt--;
- rc = SQLITE_OK;
- }else{
- struct flock lock;
- int s;
+ pLock = id->pLock;
+ assert( pLock->cnt!=0 );
+ if( id->locktype>SHARED_LOCK ){
+ assert( pLock->locktype==id->locktype );
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
- lock.l_start = lock.l_len = 0L;
- s = fcntl(id->fd, F_SETLK, &lock);
- if( s!=0 ){
- rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
- }else{
- rc = SQLITE_OK;
- id->pLock->cnt = 0;
- id->pLock->locktype = 0;
- }
+ lock.l_start = PENDING_BYTE;
+ lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
+ fcntl(id->h, F_SETLK, &lock);
+ pLock->locktype = SHARED_LOCK;
}
+ if( locktype==NO_LOCK ){
+ struct openCnt *pOpen;
+
+ /* Decrement the shared lock counter. Release the lock using an
+ ** OS call only when all threads in this same process have released
+ ** the lock.
+ */
+ pLock->cnt--;
+ if( pLock->cnt==0 ){
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = lock.l_len = 0L;
+ fcntl(id->h, F_SETLK, &lock);
+ pLock->locktype = NO_LOCK;
+ }
- if( rc==SQLITE_OK ){
/* Decrement the count of locks against this same file. When the
** count reaches zero, close any other file descriptors whose close
** was deferred because of outstanding locks.
*/
- struct openCnt *pOpen = id->pOpen;
+ pOpen = id->pOpen;
pOpen->nLock--;
assert( pOpen->nLock>=0 );
if( pOpen->nLock==0 && pOpen->nPending>0 ){
}
}
sqlite3OsLeaveMutex();
- id->locktype = 0;
- return rc;
+ id->locktype = locktype;
+ return SQLITE_OK;
}
/*
** file simultaneously, or one process from reading the database while
** another is writing.
**
-** @(#) $Id: pager.c,v 1.114 2004/06/09 14:17:21 drh Exp $
+** @(#) $Id: pager.c,v 1.115 2004/06/09 17:37:28 drh Exp $
*/
#include "os.h" /* Must be first to enable large file support */
#include "sqliteInt.h"
** The page cache as a whole is always in one of the following
** states:
**
-** SQLITE_UNLOCK The page cache is not currently reading or
+** PAGER_UNLOCK The page cache is not currently reading or
** writing the database file. There is no
** data held in memory. This is the initial
** state.
**
-** SQLITE_READLOCK The page cache is reading the database.
+** PAGER_SHARED The page cache is reading the database.
** Writing is not permitted. There can be
** multiple readers accessing the same database
** file at the same time.
**
-** SQLITE_WRITELOCK The page cache is writing the database.
+** PAGER_RESERVED Writing is permitted to the page cache only.
+** The original database file has not been modified.
+** Other processes may still be reading the on-disk
+** database file.
+**
+** PAGER_EXCLUSIVE The page cache is writing the database.
** Access is exclusive. No other processes or
** threads can be reading or writing while one
** process is writing.
**
-** The page cache comes up in SQLITE_UNLOCK. The first time a
-** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
+** The page cache comes up in PAGER_UNLOCK. The first time a
+** sqlite_page_get() occurs, the state transitions to PAGER_SHARED.
** After all pages have been released using sqlite_page_unref(),
-** the state transitions back to SQLITE_UNLOCK. The first time
+** the state transitions back to PAGER_UNLOCK. The first time
** that sqlite_page_write() is called, the state transitions to
-** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
+** PAGER_RESERVED. (Note that sqlite_page_write() can only be
** called on an outstanding page which means that the pager must
-** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
+** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
** The sqlite_page_rollback() and sqlite_page_commit() functions
-** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
+** transition the state from PAGER_RESERVED to PAGER_EXCLUSIVE to
+** PAGER_SHARED.
*/
-#define SQLITE_UNLOCK 0
-#define SQLITE_READLOCK 1
-#define SQLITE_WRITELOCK 2
+#define PAGER_UNLOCK 0
+#define PAGER_SHARED 1
+#define PAGER_RESERVED 2
+#define PAGER_EXCLUSIVE 3
/*
int stmtNRec; /* Number of records in stmt subjournal */
int nExtra; /* Add this many bytes to each in-memory page */
void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
+ void (*xReiniter)(void*,int); /* Call this routine when reloading pages */
int pageSize; /* Number of bytes in a page */
int nPage; /* Total number of in-memory pages */
int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
u8 noSync; /* Do not sync the journal if true */
u8 fullSync; /* Do extra syncs of the journal for robustness */
- u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
+ u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
u8 errMask; /* One of several kinds of errors */
u8 tempFile; /* zFilename is a temporary file */
u8 readOnly; /* True for a read-only database */
u8 needSync; /* True if an fsync() is needed on the journal */
- u8 dirtyFile; /* True if database file has changed in any way */
+ u8 dirtyCache; /* True if cached pages have changed */
u8 alwaysRollback; /* Disable dont_rollback() for all pages */
u8 memDb; /* True to inhibit all file I/O */
u8 *aInJournal; /* One bit for each page in the database file */
pPager->pAll = 0;
memset(pPager->aHash, 0, sizeof(pPager->aHash));
pPager->nPage = 0;
- if( pPager->state>=SQLITE_WRITELOCK ){
+ if( pPager->state>=PAGER_RESERVED ){
sqlite3pager_rollback(pPager);
}
- sqlite3OsUnlock(&pPager->fd);
- pPager->state = SQLITE_UNLOCK;
+ sqlite3OsUnlock(&pPager->fd, NO_LOCK);
+ pPager->state = PAGER_UNLOCK;
pPager->dbSize = -1;
pPager->nRef = 0;
assert( pPager->journalOpen==0 );
/*
** When this routine is called, the pager has the journal file open and
-** a write lock on the database. This routine releases the database
-** write lock and acquires a read lock in its place. The journal file
-** is deleted and closed.
+** a RESERVED or EXCLUSIVE lock on the database. This routine releases
+** the database lock and acquires a SHARED lock in its place. The journal
+** file is deleted and closed.
**
** TODO: Consider keeping the journal file open for temporary databases.
** This might give a performance improvement on windows where opening
** a file is an expensive operation.
*/
static int pager_unwritelock(Pager *pPager){
- int rc;
PgHdr *pPg;
- if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
+ if( pPager->state<PAGER_RESERVED ){
+ return SQLITE_OK;
+ }
sqlite3pager_stmt_commit(pPager);
if( pPager->stmtOpen ){
sqlite3OsClose(&pPager->stfd);
pPg->needSync = 0;
}
}else{
- assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
- }
- rc = sqlite3OsLock(&pPager->fd, SHARED_LOCK);
- if( rc==SQLITE_OK ){
- pPager->state = SQLITE_READLOCK;
- }else{
- /* This can only happen if a process does a BEGIN, then forks and the
- ** child process does the COMMIT. Because of the semantics of unix
- ** file locking, the unlock will fail.
- */
- pPager->state = SQLITE_UNLOCK;
+ assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
}
- return rc;
+ sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
+ pPager->state = PAGER_SHARED;
+ return SQLITE_OK;
}
/*
** at the same time, if there is one.
*/
pPg = pager_lookup(pPager, pgRec.pgno);
- TRACE2("PLAYBACK %d\n", pgRec.pgno);
+ TRACE2("PLAYBACK page %d\n", pgRec.pgno);
sqlite3OsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqlite3OsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
if( pPg ){
return rc;
}
+/*
+** Make every page in the cache agree with what is on disk. In other words,
+** reread the disk to reset the state of the cache.
+**
+** This routine is called after a rollback in which some of the dirty cache
+** pages had never been written out to disk. We need to roll back the
+** cache content and the easiest way to do that is to reread the old content
+** back from the disk.
+*/
+static int pager_reload_cache(Pager *pPager){
+ PgHdr *pPg;
+ int rc = SQLITE_OK;
+ for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
+ char zBuf[SQLITE_PAGE_SIZE];
+ if( !pPg->dirty ) continue;
+ if( (int)pPg->pgno <= pPager->origDbSize ){
+ sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
+ rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
+ TRACE2("REFETCH page %d\n", pPg->pgno);
+ CODEC(pPager, zBuf, pPg->pgno, 2);
+ if( rc ) break;
+ }else{
+ memset(zBuf, 0, SQLITE_PAGE_SIZE);
+ }
+ if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
+ memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
+ if( pPager->xReiniter ){
+ pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize);
+ }else{
+ memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
+ }
+ }
+ pPg->needSync = 0;
+ pPg->dirty = 0;
+ }
+ return rc;
+}
+
+
/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.
** pages by reading them back from the original database.
*/
if( rc==SQLITE_OK ){
- PgHdr *pPg;
- for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
- char zBuf[SQLITE_PAGE_SIZE];
- if( !pPg->dirty ) continue;
- if( (int)pPg->pgno <= pPager->origDbSize ){
- sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
- rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
- TRACE2("REFETCH %d\n", pPg->pgno);
- CODEC(pPager, zBuf, pPg->pgno, 2);
- if( rc ) break;
- }else{
- memset(zBuf, 0, SQLITE_PAGE_SIZE);
- }
- if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
- memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
- memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
- }
- pPg->needSync = 0;
- pPg->dirty = 0;
- }
+ pager_reload_cache(pPager);
}
end_playback:
pPager->stmtJSize = 0;
pPager->nPage = 0;
pPager->mxPage = mxPage>5 ? mxPage : 10;
- pPager->state = SQLITE_UNLOCK;
+ pPager->state = PAGER_UNLOCK;
pPager->errMask = 0;
pPager->tempFile = tempFile;
pPager->memDb = memDb;
pPager->xDestructor = xDesc;
}
+/*
+** Set the reinitializer for this pager. If not NULL, the reinitializer
+** is called when the content of a page in cache is restored to its original
+** value as a result of a rollback. The callback gives higher-level code
+** an opportunity to restore the EXTRA section to agree with the restored
+** page data.
+*/
+void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){
+ pPager->xReiniter = xReinit;
+}
+
/*
** Return the total number of pages in the disk file associated with
** pPager.
return 0;
}
n /= SQLITE_PAGE_SIZE;
- if( pPager->state!=SQLITE_UNLOCK ){
+ if( pPager->state!=PAGER_UNLOCK ){
pPager->dbSize = n;
}
return n;
int sqlite3pager_close(Pager *pPager){
PgHdr *pPg, *pNext;
switch( pPager->state ){
- case SQLITE_WRITELOCK: {
+ case PAGER_RESERVED:
+ case PAGER_EXCLUSIVE: {
sqlite3pager_rollback(pPager);
if( !pPager->memDb ){
- sqlite3OsUnlock(&pPager->fd);
+ sqlite3OsUnlock(&pPager->fd, NO_LOCK);
}
assert( pPager->journalOpen==0 );
break;
}
- case SQLITE_READLOCK: {
+ case PAGER_SHARED: {
if( !pPager->memDb ){
- sqlite3OsUnlock(&pPager->fd);
+ sqlite3OsUnlock(&pPager->fd, NO_LOCK);
}
break;
}
/* Write the nRec value into the journal file header */
off_t szJ;
if( pPager->fullSync ){
- TRACE1("SYNC\n");
+ TRACE2("SYNC journal of %d\n", pPager->fd.h);
rc = sqlite3OsSync(&pPager->jfd);
if( rc!=0 ) return rc;
}
pPager->nRec*JOURNAL_PG_SZ(journal_format);
sqlite3OsSeek(&pPager->jfd, szJ);
}
- TRACE1("SYNC\n");
+ TRACE2("SYNC journal of %d\n", pPager->fd.h);
rc = sqlite3OsSync(&pPager->jfd);
if( rc!=0 ) return rc;
pPager->journalStarted = 1;
** database file. If there is already an EXCLUSIVE lock, the following
** calls to sqlite3OsLock() are no-ops.
**
- ** The upgrade from a RESERVED to PENDING might return SQLITE_BUSY on
- ** windows because the windows locking mechanism acquires a transient
- ** PENDING lock during its attempts to get a SHARED lock. So if another
- ** process were trying to get a SHARED lock at the same time this process
- ** is upgrading from RESERVED to PENDING, the two could collide.
+ ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
+ ** through an intermediate state PENDING. A PENDING lock prevents new
+ ** readers from attaching to the database but is unsufficient for us to
+ ** write. The idea of a PENDING lock is to prevent new readers from
+ ** coming in while we wait for existing readers to clear.
**
- ** The upgrade from PENDING to EXCLUSIVE can return SQLITE_BUSY if there
- ** are still active readers that were created before the PENDING lock
- ** was acquired.
+ ** While the pager is in the RESERVED state, the original database file
+ ** is unchanged and we can rollback without having to playback the
+ ** journal into the original database file. Once we transition to
+ ** EXCLUSIVE, it means the database file has been changed and any rollback
+ ** will require a journal playback.
*/
do {
rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
if( rc!=SQLITE_OK ){
return rc;
}
+ pPager->state = PAGER_EXCLUSIVE;
while( pList ){
assert( pList->dirty );
sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
- TRACE2("STORE %d\n", pList->pgno);
+ TRACE2("STORE page %d\n", pList->pgno);
rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
if( rc ) return rc;
if( rc!=SQLITE_OK ){
return rc;
}
- pPager->state = SQLITE_READLOCK;
+ pPager->state = PAGER_SHARED;
/* If a journal file exists, and there is no RESERVED lock on the
** database file, then it either needs to be played back or deleted.
*/
if( pPager->useJournal &&
sqlite3OsFileExists(pPager->zJournal) &&
- !sqlite3OsCheckWriteLock(&pPager->fd)
+ !sqlite3OsCheckReservedLock(&pPager->fd)
){
int rc;
/* Get an EXCLUSIVE lock on the database file. */
rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
if( rc!=SQLITE_OK ){
- if( sqlite3OsUnlock(&pPager->fd)!=SQLITE_OK ){
- /* This should never happen! */
- rc = SQLITE_INTERNAL;
- }
+ sqlite3OsUnlock(&pPager->fd, NO_LOCK);
+ pPager->state = PAGER_UNLOCK;
return rc;
}
- pPager->state = SQLITE_WRITELOCK;
+ pPager->state = PAGER_EXCLUSIVE;
/* Open the journal for reading only. Return SQLITE_BUSY if
** we are unable to open the journal file.
*/
rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd);
if( rc!=SQLITE_OK ){
- rc = sqlite3OsUnlock(&pPager->fd);
- assert( rc==SQLITE_OK );
+ sqlite3OsUnlock(&pPager->fd, NO_LOCK);
+ pPager->state = PAGER_UNLOCK;
return SQLITE_BUSY;
}
pPager->journalOpen = 1;
}else{
/* Search for page in cache */
pPg = pager_lookup(pPager, pgno);
- if( pPager->memDb && pPager->state==SQLITE_UNLOCK ){
- pPager->state = SQLITE_READLOCK;
+ if( pPager->memDb && pPager->state==PAGER_UNLOCK ){
+ pPager->state = PAGER_SHARED;
}
}
if( pPg==0 ){
assert( pPager->memDb==0 );
sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
- TRACE2("FETCH %d\n", pPg->pgno);
+ TRACE2("FETCH page %d\n", pPg->pgno);
CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
if( rc!=SQLITE_OK ){
off_t fileSize;
}
/*
-** Create a journal file for pPager. There should already be a write
-** lock on the database file when this routine is called.
+** Create a journal file for pPager. There should already be a RESERVED
+** or EXCLUSIVE lock on the database file when this routine is called.
**
** Return SQLITE_OK if everything. Return an error code and release the
** write lock if anything goes wrong.
*/
static int pager_open_journal(Pager *pPager){
int rc;
- assert( pPager->state==SQLITE_WRITELOCK );
+ assert( pPager->state>=PAGER_RESERVED );
assert( pPager->journalOpen==0 );
assert( pPager->useJournal );
sqlite3pager_pagecount(pPager);
pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
if( pPager->aInJournal==0 ){
- sqlite3OsLock(&pPager->fd, SHARED_LOCK);
- pPager->state = SQLITE_READLOCK;
+ sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
+ pPager->state = PAGER_SHARED;
return SQLITE_NOMEM;
}
rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
if( rc!=SQLITE_OK ){
sqliteFree(pPager->aInJournal);
pPager->aInJournal = 0;
- sqlite3OsLock(&pPager->fd, SHARED_LOCK);
- pPager->state = SQLITE_READLOCK;
+ sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
+ pPager->state = PAGER_SHARED;
return SQLITE_CANTOPEN;
}
sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd);
** files, the opening of the journal file is deferred until there is an
** actual need to write to the journal.
**
-** If the database is already write-locked, this routine is a no-op.
+** If the database is already reserved for writing, this routine is a no-op.
*/
int sqlite3pager_begin(void *pData, int nMaster){
PgHdr *pPg = DATA_TO_PGHDR(pData);
int rc = SQLITE_OK;
assert( pPg->nRef>0 );
assert( nMaster>=0 );
- assert( pPager->state!=SQLITE_UNLOCK );
- if( pPager->state==SQLITE_READLOCK ){
+ assert( pPager->state!=PAGER_UNLOCK );
+ if( pPager->state==PAGER_SHARED ){
assert( pPager->aInJournal==0 );
if( pPager->memDb ){
- pPager->state = SQLITE_WRITELOCK;
+ pPager->state = PAGER_EXCLUSIVE;
pPager->origDbSize = pPager->dbSize;
}else{
int busy = 1;
do {
- /* If the library grabs an EXCLUSIVE lock here, as in the commented
- ** out line, then it exhibits the old locking behaviour - a writer
- ** excludes all readers, not just other writers.
- */
- /* rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); */
rc = sqlite3OsLock(&pPager->fd, RESERVED_LOCK);
}while( rc==SQLITE_BUSY &&
pPager->pBusyHandler &&
return rc;
}
pPager->nMaster = nMaster;
- pPager->state = SQLITE_WRITELOCK;
- pPager->dirtyFile = 0;
- TRACE1("TRANSACTION\n");
+ pPager->state = PAGER_RESERVED;
+ pPager->dirtyCache = 0;
+ TRACE3("TRANSACTION %d nMaster=%d\n", pPager->fd.h, nMaster);
if( pPager->useJournal && !pPager->tempFile ){
rc = pager_open_journal(pPager);
}
** changes to a page.
**
** The first time this routine is called, the pager creates a new
-** journal and acquires a write lock on the database. If the write
+** journal and acquires a RESERVED lock on the database. If the RESERVED
** lock could not be acquired, this routine returns SQLITE_BUSY. The
** calling routine must check for that return value and be careful not to
** change any page data until this routine returns SQLITE_OK.
*/
pPg->dirty = 1;
if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){
- pPager->dirtyFile = 1;
+ pPager->dirtyCache = 1;
return SQLITE_OK;
}
** First check to see that the transaction journal exists and
** create it if it does not.
*/
- assert( pPager->state!=SQLITE_UNLOCK );
+ assert( pPager->state!=PAGER_UNLOCK );
rc = sqlite3pager_begin(pData, 0);
if( rc!=SQLITE_OK ){
return rc;
}
- assert( pPager->state==SQLITE_WRITELOCK );
+ assert( pPager->state>=PAGER_RESERVED );
if( !pPager->journalOpen && pPager->useJournal ){
rc = pager_open_journal(pPager);
if( rc!=SQLITE_OK ) return rc;
}
assert( pPager->journalOpen || !pPager->useJournal );
- pPager->dirtyFile = 1;
+ pPager->dirtyCache = 1;
- /* The transaction journal now exists and we have a write lock on the
- ** main database file. Write the current page to the transaction
- ** journal if it is not there already.
+ /* The transaction journal now exists and we have a RESERVED or an
+ ** EXCLUSIVE lock on the main database file. Write the current page to
+ ** the transaction journal if it is not there already.
*/
if( !pPg->inJournal && (pPager->useJournal || pPager->memDb) ){
if( (int)pPg->pgno <= pPager->origDbSize ){
u32 saved;
if( pPager->memDb ){
PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
- TRACE2("JOURNAL %d\n", pPg->pgno);
+ TRACE2("JOURNAL page %d\n", pPg->pgno);
assert( pHist->pOrig==0 );
pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
if( pHist->pOrig ){
store32bits(pPg->pgno, pPg, -4);
CODEC(pPager, pData, pPg->pgno, 7);
rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
- TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
+ TRACE3("JOURNAL page %d needSync=%d\n", pPg->pgno, pPg->needSync);
CODEC(pPager, pData, pPg->pgno, 0);
if( journal_format>=JOURNAL_FORMAT_3 ){
*(u32*)PGHDR_TO_EXTRA(pPg) = saved;
}
}else{
pPg->needSync = !pPager->journalStarted && !pPager->noSync;
- TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
+ TRACE3("APPEND page %d needSync=%d\n", pPg->pgno, pPg->needSync);
}
if( pPg->needSync ){
pPager->needSync = 1;
if( pHist->pStmt ){
memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
}
- TRACE2("STMT-JOURNAL %d\n", pPg->pgno);
+ TRACE2("STMT-JOURNAL page %d\n", pPg->pgno);
}else{
store32bits(pPg->pgno, pPg, -4);
CODEC(pPager, pData, pPg->pgno, 7);
rc = sqlite3OsWrite(&pPager->stfd, ((char*)pData)-4, SQLITE_PAGE_SIZE+4);
- TRACE2("STMT-JOURNAL %d\n", pPg->pgno);
+ TRACE2("STMT-JOURNAL page %d\n", pPg->pgno);
CODEC(pPager, pData, pPg->pgno, 0);
if( rc!=SQLITE_OK ){
sqlite3pager_rollback(pPager);
** corruption during the next transaction.
*/
}else{
- TRACE2("DONT_WRITE %d\n", pgno);
+ TRACE3("DONT_WRITE page %d of %d\n", pgno, pPager->fd.h);
pPg->dirty = 0;
}
}
PgHdr *pPg = DATA_TO_PGHDR(pData);
Pager *pPager = pPg->pPager;
- if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
+ if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return;
if( pPg->alwaysRollback || pPager->alwaysRollback || pPager->memDb ) return;
if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
assert( pPager->aInJournal!=0 );
pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_stmt_list(pPg);
}
- TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
+ TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, pPager->fd.h);
}
if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
rc = pager_errcode(pPager);
return rc;
}
- if( pPager->state!=SQLITE_WRITELOCK ){
+ if( pPager->state<PAGER_RESERVED ){
return SQLITE_ERROR;
}
- TRACE1("COMMIT\n");
+ TRACE2("COMMIT %d\n", pPager->fd.h);
if( pPager->memDb ){
pPg = pager_get_all_dirty_pages(pPager);
while( pPg ){
pPg = pPg->pDirty;
}
pPager->pStmt = 0;
- pPager->state = SQLITE_READLOCK;
+ pPager->state = PAGER_SHARED;
return SQLITE_OK;
}
-#if 0
- if( pPager->dirtyFile==0 ){
+ if( pPager->dirtyCache==0 ){
/* Exit early (without doing the time-consuming sqlite3OsSync() calls)
** if there have been no changes to the database file. */
assert( pPager->needSync==0 );
return rc;
}
assert( pPager->journalOpen );
+#if 0
rc = syncJournal(pPager, 0);
if( rc!=SQLITE_OK ){
goto commit_abort;
}
#endif
rc = sqlite3pager_sync(pPager, 0);
- if( rc!=SQLITE_OK ) goto commit_abort;
-
+ if( rc!=SQLITE_OK ){
+ goto commit_abort;
+ }
rc = pager_unwritelock(pPager);
pPager->dbSize = -1;
return rc;
}
/*
-** Rollback all changes. The database falls back to read-only mode.
+** Rollback all changes. The database falls back to PAGER_SHARED mode.
** All in-memory cache pages revert to their original data contents.
** The journal is deleted.
**
*/
int sqlite3pager_rollback(Pager *pPager){
int rc;
- TRACE1("ROLLBACK\n");
+ TRACE2("ROLLBACK %d\n", pPager->fd.h);
if( pPager->memDb ){
PgHdr *p;
for(p=pPager->pAll; p; p=p->pNextAll){
pPager->dbSize = pPager->origDbSize;
memoryTruncate(pPager);
pPager->stmtInUse = 0;
- pPager->state = SQLITE_READLOCK;
+ pPager->state = PAGER_SHARED;
return SQLITE_OK;
}
- if( !pPager->dirtyFile || !pPager->journalOpen ){
+ if( !pPager->dirtyCache || !pPager->journalOpen ){
rc = pager_unwritelock(pPager);
pPager->dbSize = -1;
return rc;
}
if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
- if( pPager->state>=SQLITE_WRITELOCK ){
+ if( pPager->state>=PAGER_EXCLUSIVE ){
pager_playback(pPager, 1);
}
return pager_errcode(pPager);
}
- if( pPager->state!=SQLITE_WRITELOCK ){
- return SQLITE_OK;
+ if( pPager->state==PAGER_RESERVED ){
+ int rc2;
+ rc = pager_reload_cache(pPager);
+ rc2 = pager_unwritelock(pPager);
+ if( rc==SQLITE_OK ){
+ rc = rc2;
+ }
+ }else{
+ rc = pager_playback(pPager, 1);
}
- rc = pager_playback(pPager, 1);
if( rc!=SQLITE_OK ){
rc = SQLITE_CORRUPT;
pPager->errMask |= PAGER_ERR_CORRUPT;
int rc;
char zTemp[SQLITE_TEMPNAME_SIZE];
assert( !pPager->stmtInUse );
- TRACE1("STMT-BEGIN\n");
+ TRACE2("STMT-BEGIN %d\n", pPager->fd.h);
if( pPager->memDb ){
pPager->stmtInUse = 1;
pPager->stmtSize = pPager->dbSize;
int sqlite3pager_stmt_commit(Pager *pPager){
if( pPager->stmtInUse ){
PgHdr *pPg, *pNext;
- TRACE1("STMT-COMMIT\n");
+ TRACE2("STMT-COMMIT %d\n", pPager->fd.h);
if( !pPager->memDb ){
sqlite3OsSeek(&pPager->stfd, 0);
/* sqlite3OsTruncate(&pPager->stfd, 0); */
int sqlite3pager_stmt_rollback(Pager *pPager){
int rc;
if( pPager->stmtInUse ){
- TRACE1("STMT-ROLLBACK\n");
+ TRACE2("STMT-ROLLBACK %d\n", pPager->fd.h);
if( pPager->memDb ){
PgHdr *pPg;
for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){
/* If this is an in-memory db, or no pages have been written to, this
** function is a no-op.
*/
- if( !pPager->memDb && pPager->dirtyFile ){
+ if( !pPager->memDb && pPager->dirtyCache ){
PgHdr *pPg;
assert( pPager->journalOpen );