From: dan Date: Tue, 13 Apr 2010 19:27:31 +0000 (+0000) Subject: Add experimental locking scheme. X-Git-Tag: version-3.7.2~455^2~85 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=64d039e512c8a2cf63dab37616b4fb362790d3b3;p=thirdparty%2Fsqlite.git Add experimental locking scheme. FossilOrigin-Name: 3f958e87c33d667d299b03ffdef58db5dc6363f4 --- diff --git a/manifest b/manifest index 8eff149f3c..faab67f851 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Fix\san\suninitialized\svariable\sin\sreadDbPage\sof\spager.c. -D 2010-04-13T15:30:53 +C Add\sexperimental\slocking\sscheme. +D 2010-04-13T19:27:31 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -134,8 +131,8 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 -F src/log.c 6e8f296f6c566a297cd074c4165f1695fd1df5b7 -F src/log.h e691f7d935d6a8ad63b9de2e6014627056f01e1a +F src/log.c d89988bb26a3cd414858c97642a612b4ce6e540f +F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9 F src/main.c c0e7192bad5b90544508b241eb2487ac661de890 F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 @@ -157,7 +154,7 @@ F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30 F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1 F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053 -F src/pager.c c4937e7175f0aa66b9122d05cc163c039f854855 +F src/pager.c 9e9ee38c923fd225d73127751b7959bd826d0686 F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf @@ -806,14 +803,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 27dc5977c19e717afd65d3805557e38dec7bedcb -R ca7988a67487ccd526ae5248710a4503 -U drh -Z c2421237a2b7de8b8aa2d4d5608541bc ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFLxI4woxKgR168RlERAp8sAJ9x+oJYnqOMgUNsC198WiuQpy6qyQCfcaVa -jjZp98/Y+EUb/i8plNNLbiM= -=2Ctn ------END PGP SIGNATURE----- +P f4e1150fed2c520c7c52612cb1019429d78dc32a +R da172d13acb33c194abd32bf4ed20cc9 +U dan +Z cd10eeca0187a805032040fdd27191f0 diff --git a/manifest.uuid b/manifest.uuid index 95954aef22..43b6c91f02 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f4e1150fed2c520c7c52612cb1019429d78dc32a \ No newline at end of file +3f958e87c33d667d299b03ffdef58db5dc6363f4 \ No newline at end of file diff --git a/src/log.c b/src/log.c index 7d37dfa1de..2b2764f361 100644 --- a/src/log.c +++ b/src/log.c @@ -13,6 +13,7 @@ typedef struct LogSummaryHdr LogSummaryHdr; typedef struct LogSummary LogSummary; typedef struct LogCheckpoint LogCheckpoint; +typedef struct LogLock LogLock; /* @@ -55,29 +56,51 @@ struct LogSummary { int nRef; /* Number of pointers to this structure */ int fd; /* File descriptor open on log-summary */ char *zPath; /* Path to associated WAL file */ + LogLock *pLock; /* Linked list of locks on this object */ LogSummary *pNext; /* Next in global list */ int nData; /* Size of aData allocation/mapping */ u32 *aData; /* File body */ }; + /* -** List of all LogSummary objects created by this process. Protected by -** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex -** here instead of borrowing the LRU mutex. +** The four lockable regions associated with each log-summary. A connection +** may take either a SHARED or EXCLUSIVE lock on each. */ -#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU -static LogSummary *pLogSummary = 0; +#define LOG_REGION_A 0x01 +#define LOG_REGION_B 0x02 +#define LOG_REGION_C 0x04 +#define LOG_REGION_D 0x08 + +/* +** A single instance of this structure is allocated as part of each +** connection to a database log. All structures associated with the +** same log file are linked together into a list using LogLock.pNext +** starting at LogSummary.pLock. +** +** The mLock field of the structure describes the locks (if any) +** currently held by the connection. If a SHARED lock is held on +** any of the four locking regions, then the associated LOG_REGION_X +** bit (see above) is set. If an EXCLUSIVE lock is held on the region, +** then the (LOG_REGION_X << 8) bit is set. +*/ +struct LogLock { + LogLock *pNext; /* Next lock on the same log */ + u32 mLock; /* Mask of locks */ +}; struct Log { LogSummary *pSummary; /* Log file summary data */ sqlite3_vfs *pVfs; /* The VFS used to create pFd */ sqlite3_file *pFd; /* File handle for log file */ int sync_flags; /* Flags to use with OsSync() */ - int isLocked; /* True if a snapshot is held open */ + int isLocked; /* Non-zero if a snapshot is held open */ int isWriteLocked; /* True if this is the writer connection */ LogSummaryHdr hdr; /* Log summary header for current snapshot */ + LogLock lock; /* Lock held by this connection (if any) */ }; + /* ** This structure is used to implement an iterator that iterates through ** all frames in the log in database page order. Where two or more frames @@ -102,6 +125,15 @@ struct LogCheckpoint { } aSegment[1]; }; + +/* +** List of all LogSummary objects created by this process. Protected by +** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex +** here instead of borrowing the LRU mutex. +*/ +#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU +static LogSummary *pLogSummary = 0; + /* ** Generate an 8 byte checksum based on the data in array aByte[] and the ** initial values of aCksum[0] and aCksum[1]. The checksum is written into @@ -664,6 +696,9 @@ int sqlite3LogOpen( rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED); } + pRet->lock.pNext = pSummary->pLock; + pSummary->pLock = &pRet->lock; + out: sqlite3_mutex_leave(mutex); sqlite3_free(zWal); @@ -838,9 +873,15 @@ int sqlite3LogClose( ){ int rc = SQLITE_OK; if( pLog ){ + LogLock **ppL; LogSummary *pSummary = pLog->pSummary; sqlite3_mutex *mutex = 0; + sqlite3_mutex_enter(pSummary->mutex); + for(ppL=&pSummary->pLock; *ppL!=&pLog->lock; ppL=&(*ppL)->pNext); + *ppL = pLog->lock.pNext; + sqlite3_mutex_leave(pSummary->mutex); + if( sqlite3GlobalConfig.bCoreMutex ){ mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); } @@ -939,7 +980,108 @@ static void logLeaveMutex(Log *pLog){ } /* -** The caller must hold a SHARED lock on the database file. +** Values for the second parameter to logLockRegion(). +*/ +#define LOG_UNLOCK 0 +#define LOG_RDLOCK 1 +#define LOG_WRLOCK 2 + +static int logLockRegion(Log *pLog, u32 mRegion, int op){ + LogSummary *pSummary = pLog->pSummary; + LogLock *p; /* Used to iterate through in-process locks */ + u32 mNew; /* New locks on file */ + u32 mOld; /* Old locks on file */ + u32 mNewLock; /* New locks held by pLog */ + + assert( + /* Writer lock operations */ + (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) + + /* Reader lock operations */ + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B)) + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D)) + + /* Checkpointer lock operations */ + || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) + || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) + ); + + sqlite3_mutex_enter(pSummary->mutex); + + /* If obtaining (not releasing) a lock, check if there exist any + ** conflicting locks in process. Return SQLITE_BUSY in this case. + */ + if( op ){ + u32 mConflict = (mRegion<<8) | ((op==LOG_WRLOCK) ? mRegion : 0); + for(p=pSummary->pLock; p; p=p->pNext){ + if( p!=&pLog->lock && (p->mLock & mConflict) ){ + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_BUSY; + } + } + } + + /* Determine the new lock mask for this log connection */ + switch( op ){ + case LOG_UNLOCK: + mNewLock = (pLog->lock.mLock & ~(mRegion|(mRegion<<8))); + break; + case LOG_RDLOCK: + mNewLock = ((pLog->lock.mLock & ~(mRegion<<8)) | mRegion); + break; + default: + assert( op==LOG_WRLOCK ); + mNewLock = (pLog->lock.mLock | (mRegion<<8) | mRegion); + break; + } + + /* Determine the current and desired sets of locks at the file level. */ + mNew = 0; + for(p=pSummary->pLock; p; p=p->pNext){ + assert( (p->mLock & (p->mLock<<8))==(p->mLock & 0x00000F00) ); + if( p!=&pLog->lock ) mNew |= p->mLock; + } + mOld = mNew | pLog->lock.mLock; + mNew = mNew | mNewLock; + + if( mNew!=mOld ){ + int rc; + u32 mChange = (mNew^mOld) | ((mNew^mOld)>>8); + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = (op==LOG_WRLOCK?F_WRLCK:(op==LOG_RDLOCK?F_RDLCK:F_UNLCK)); + f.l_whence = SEEK_SET; + + if( mChange & LOG_REGION_A ) f.l_start = 12; + else if( mChange & LOG_REGION_B ) f.l_start = 13; + else if( mChange & LOG_REGION_C ) f.l_start = 14; + else if( mChange & LOG_REGION_D ) f.l_start = 15; + + if( mChange & LOG_REGION_D ) f.l_len = 16 - f.l_start; + else if( mChange & LOG_REGION_C ) f.l_len = 15 - f.l_start; + else if( mChange & LOG_REGION_B ) f.l_len = 14 - f.l_start; + else if( mChange & LOG_REGION_A ) f.l_len = 13 - f.l_start; + + rc = fcntl(pSummary->fd, F_SETLK, &f); + if( rc!=0 ){ + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_BUSY; + } + } + + pLog->lock.mLock = mNewLock; + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_OK; +} + +/* +** Lock a snapshot. ** ** If this call obtains a new read-lock and the database contents have been ** modified since the most recent call to LogCloseSnapshot() on this Log @@ -950,6 +1092,36 @@ static void logLeaveMutex(Log *pLog){ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){ int rc = SQLITE_OK; if( pLog->isLocked==0 ){ + int nAttempt; + + /* Obtain a snapshot-lock on the log-summary file. The procedure + ** for obtaining the snapshot log is: + ** + ** 1. Attempt a SHARED lock on regions A and B. + ** 2a. If step 1 is successful, drop the lock on region B. + ** 2b. If step 1 is unsuccessful, attempt a SHARED lock on region D. + ** 3. Repeat the above until the lock attempt in step 1 or 2b is + ** successful. + ** + ** If neither of the locks can be obtained after 5 tries, presumably + ** something is wrong (i.e. a process not following the locking protocol). + ** Return an error code in this case. + */ + rc = SQLITE_BUSY; + for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){ + rc = logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B, LOG_RDLOCK); + if( rc==SQLITE_BUSY ){ + rc = logLockRegion(pLog, LOG_REGION_D, LOG_RDLOCK); + if( rc==SQLITE_OK ) pLog->isLocked = LOG_REGION_D; + }else{ + logLockRegion(pLog, LOG_REGION_B, LOG_UNLOCK); + pLog->isLocked = LOG_REGION_A; + } + } + if( rc!=SQLITE_OK ){ + return rc; + } + if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){ u32 aCksum[2] = {1, 1}; u32 aHdr[LOGSUMMARY_HDR_NFIELD+2]; @@ -967,7 +1139,6 @@ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){ *pChanged = 1; } if( rc==SQLITE_OK ){ - pLog->isLocked = 1; if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){ *pChanged = 1; memcpy(&pLog->hdr, aHdr, LOGSUMMARY_HDR_NFIELD*sizeof(u32)); @@ -975,6 +1146,11 @@ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){ } logLeaveMutex(pLog); } + + if( rc!=SQLITE_OK ){ + /* An error occured while attempting log recovery. */ + sqlite3LogCloseSnapshot(pLog); + } } return rc; } @@ -983,6 +1159,10 @@ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){ ** Unlock the current snapshot. */ void sqlite3LogCloseSnapshot(Log *pLog){ + if( pLog->isLocked ){ + assert( pLog->isLocked==LOG_REGION_A || pLog->isLocked==LOG_REGION_D ); + logLockRegion(pLog, pLog->isLocked, LOG_UNLOCK); + } pLog->isLocked = 0; } @@ -1072,11 +1252,20 @@ void sqlite3LogMaxpgno(Log *pLog, Pgno *pPgno){ int sqlite3LogWriteLock(Log *pLog, int op){ assert( pLog->isLocked ); if( op ){ + + /* Obtain the writer lock */ + int rc = logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_WRLOCK); + if( rc!=SQLITE_OK ){ + return rc; + } + if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){ return SQLITE_BUSY; } pLog->isWriteLocked = 1; + }else if( pLog->isWriteLocked ){ + logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK); memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)); pLog->isWriteLocked = 0; } @@ -1226,18 +1415,25 @@ int sqlite3LogFrames( int sqlite3LogCheckpoint( Log *pLog, /* Log connection */ sqlite3_file *pFd, /* File descriptor open on db file */ - u8 *zBuf /* Temporary buffer to use */ + u8 *zBuf, /* Temporary buffer to use */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ ){ + int rc; - /* Assert() that the caller is holding an EXCLUSIVE lock on the - ** database file. - */ -#ifdef SQLITE_DEBUG - int lock; - sqlite3OsFileControl(pFd, SQLITE_FCNTL_LOCKSTATE, &lock); - assert( lock>=4 ); -#endif + do { + rc = logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_WRLOCK); + }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); + if( rc!=SQLITE_OK ) return rc; + + do { + rc = logLockRegion(pLog, LOG_REGION_A, LOG_WRLOCK); + }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); + if( rc!=SQLITE_OK ) return rc; - return logCheckpoint(pLog, pFd, zBuf); + rc = logCheckpoint(pLog, pFd, zBuf); + + logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK); + return rc; } diff --git a/src/log.h b/src/log.h index 63fa8ae433..816f9354eb 100644 --- a/src/log.h +++ b/src/log.h @@ -55,7 +55,9 @@ int sqlite3LogFrames(Log *pLog, int, PgHdr *, Pgno, int, int); int sqlite3LogCheckpoint( Log *pLog, /* Log connection */ sqlite3_file *pFd, /* File descriptor open on db file */ - u8 *zBuf /* Temporary buffer to use */ + u8 *zBuf, /* Temporary buffer to use */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ ); #endif /* _LOG_H_ */ diff --git a/src/pager.c b/src/pager.c index 4e8213cf9b..cd310ab887 100644 --- a/src/pager.c +++ b/src/pager.c @@ -3120,6 +3120,7 @@ static int pager_write_pagelist(PgHdr *pList){ ** EXCLUSIVE, it means the database file has been changed and any rollback ** will require a journal playback. */ + assert( !pagerUseLog(pList->pPager) ); assert( pPager->state>=PAGER_RESERVED ); rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); @@ -3785,21 +3786,13 @@ int sqlite3PagerSharedLock(Pager *pPager){ if( pagerUseLog(pPager) ){ int changed = 0; - /* TODO: Change the following block to grab a WAL read-lock. Or, - ** combine obtaining the read-lock with LogOpenSnapshot()? */ - rc = pager_wait_on_lock(pPager, SHARED_LOCK); - if( rc!=SQLITE_OK ){ - assert( pPager->state==PAGER_UNLOCK ); - return pager_error(pPager, rc); - } - rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed); if( rc==SQLITE_OK ){ if( changed ){ pager_reset(pPager); assert( pPager->errCode || pPager->dbSizeValid==0 ); } - pPager->state = PAGER_SHARED; + pPager->state = PAGER_SHARED; /* TODO: Is this right? */ rc = sqlite3PagerPagecount(pPager, &changed); } }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ @@ -4330,20 +4323,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ assert( pPager->pInJournal==0 ); assert( !MEMDB && !pPager->tempFile ); - /* Obtain a RESERVED lock on the database file. If the exFlag parameter - ** is true, then immediately upgrade this to an EXCLUSIVE lock. The - ** busy-handler callback can be used when upgrading to the EXCLUSIVE - ** lock, but not when obtaining the RESERVED lock. - */ - rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); - if( rc==SQLITE_OK ){ - pPager->state = PAGER_RESERVED; - if( exFlag ){ - rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); - } - } - - if( rc==SQLITE_OK && pagerUseLog(pPager) ){ + if( pagerUseLog(pPager) ){ /* Grab the write lock on the log file. If successful, upgrade to ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller. ** The busy-handler is not invoked if another connection already @@ -4352,6 +4332,20 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ rc = sqlite3LogWriteLock(pPager->pLog, 1); if( rc==SQLITE_OK ){ pPager->dbOrigSize = pPager->dbSize; + pPager->state = PAGER_RESERVED; + } + }else{ + /* Obtain a RESERVED lock on the database file. If the exFlag parameter + ** is true, then immediately upgrade this to an EXCLUSIVE lock. The + ** busy-handler callback can be used when upgrading to the EXCLUSIVE + ** lock, but not when obtaining the RESERVED lock. + */ + rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); + if( rc==SQLITE_OK ){ + pPager->state = PAGER_RESERVED; + if( exFlag ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } } } @@ -5657,11 +5651,10 @@ sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ int sqlite3PagerCheckpoint(Pager *pPager){ int rc = SQLITE_OK; if( pPager->pLog ){ - rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); - if( rc==SQLITE_OK ){ - u8 *zBuf = (u8 *)pPager->pTmpSpace; - rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, zBuf); - } + u8 *zBuf = (u8 *)pPager->pTmpSpace; + rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, + zBuf, pPager->xBusyHandler, pPager->pBusyHandlerArg + ); } return rc; }