From: dan Date: Sat, 17 Apr 2010 12:31:37 +0000 (+0000) Subject: Enhancements to wal-mode locking scheme. X-Git-Tag: version-3.7.2~455^2~74 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3de777fd8cb66d710c02783e9786c756fbd91db8;p=thirdparty%2Fsqlite.git Enhancements to wal-mode locking scheme. FossilOrigin-Name: 8549c286497f3d2cd118be1334fce00d6f8a26c4 --- diff --git a/manifest b/manifest index ba84322467..dacfee46f5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\slog\sfile\sformat\sto\sinclude\sa\ssmall\s(12\sbyte)\sheader\sat\sthe\sstart\sof\sthe\sfile. -D 2010-04-16T13:59:31 +C Enhancements\sto\swal-mode\slocking\sscheme. +D 2010-04-17T12:31:37 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -131,7 +131,7 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 -F src/log.c 11f683a3429319fb2731aa357717bf9117cdcba4 +F src/log.c 6ac96c84ca4564f156de564ceddb3c1a2319ac6e F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9 F src/main.c c0e7192bad5b90544508b241eb2487ac661de890 F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a @@ -154,7 +154,7 @@ F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30 F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1 F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053 -F src/pager.c 35c7e3b5bbad76f04e7143d2d4676a269a8ba9fc +F src/pager.c 674d6558a618ca7714ba28741e6dc681692bffe2 F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf @@ -757,10 +757,10 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61 F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d -F test/wal.test bb1fa35fc03353a1a154f583f01e5093e25ba001 +F test/wal.test 5fa3cdf2e93e79b0891af6cf7fab8ef9e07a23c6 F test/walcrash.test 45cfbab30bb7cbe0b2e9d5cabe90dbcad10cb89b F test/walslow.test 38076d5fad49e3678027be0f8110e6a32d531dc2 -F test/walthread.test 58cd64b06f186251f09f64e4918fb74a7e52c963 +F test/walthread.test 27e44ee6fd02f1f494a24f999c97086af3ab739d F test/where.test de337a3fe0a459ec7c93db16a519657a90552330 F test/where2.test 45eacc126aabb37959a387aa83e59ce1f1f03820 F test/where3.test aa44a9b29e8c9f3d7bb94a3bb3a95b31627d520d @@ -804,7 +804,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 67d2a89ec2d593a077eb19a6ea2b06cb1c2e9ba8 -R 2c35134540abe427220a5914099a1c09 +P 9865d14d6041874bc1239ce7a061d5c75f2d33c9 +R f6bb150cf1f3c0e2bfeac5bd76d233a7 U dan -Z a25fb37e86e611cc50a9ddc963124e5a +Z ea6a1c06d626e2f5d8edb2ab802e8ea3 diff --git a/manifest.uuid b/manifest.uuid index e6f912dc82..8375084062 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9865d14d6041874bc1239ce7a061d5c75f2d33c9 \ No newline at end of file +8549c286497f3d2cd118be1334fce00d6f8a26c4 \ No newline at end of file diff --git a/src/log.c b/src/log.c index 73b44863d0..f0b3a73ebf 100644 --- a/src/log.c +++ b/src/log.c @@ -11,19 +11,19 @@ ** The log header is 12 bytes in size and consists of the following three ** big-endian 32-bit unsigned integer values: ** -** 0: Database page size, -** 4: Randomly selected salt value 1, -** 8: Randomly selected salt value 2. +** 0: Database page size, +** 4: Randomly selected salt value 1, +** 8: Randomly selected salt value 2. ** ** Immediately following the log header are zero or more log frames. Each ** frame itself consists of a 16-byte header followed by a bytes ** of page data. The header is broken into 4 big-endian 32-bit unsigned ** integer values, as follows: ** -** 0: Page number. -** 4: For commit records, the size of the database image in pages +** 0: Page number. +** 4: For commit records, the size of the database image in pages ** after the commit. For all other records, zero. -** 8: Checksum value 1. +** 8: Checksum value 1. ** 12: Checksum value 2. */ @@ -106,13 +106,19 @@ struct LogSummary { /* ** The four lockable regions associated with each log-summary. A connection -** may take either a SHARED or EXCLUSIVE lock on each. +** may take either a SHARED or EXCLUSIVE lock on each. An ORed combination +** of the following bitmasks is passed as the second argument to the +** logLockRegion() function. */ #define LOG_REGION_A 0x01 #define LOG_REGION_B 0x02 #define LOG_REGION_C 0x04 #define LOG_REGION_D 0x08 +#define LOG_LOCK_MUTEX 12 +#define LOG_LOCK_DMH 13 +#define LOG_LOCK_REGION 14 + /* ** A single instance of this structure is allocated as part of each ** connection to a database log. All structures associated with the @@ -316,14 +322,19 @@ static int logSummaryMap(LogSummary *pSummary, int nByte){ ** Regardless of the value of isTruncate, close the file-descriptor ** opened on the log-summary file. */ -static int logSummaryUnmap(LogSummary *pSummary, int isTruncate){ +static int logSummaryUnmap(LogSummary *pSummary, int isUnlink){ int rc = SQLITE_OK; if( pSummary->aData ){ assert( pSummary->fd>0 ); munmap(pSummary->aData, pSummary->nData); pSummary->aData = 0; - if( isTruncate ){ - rc = (ftruncate(pSummary->fd, 0) ? SQLITE_IOERR : SQLITE_OK); + if( isUnlink ){ + char *zFile = sqlite3_mprintf("%s-summary", pSummary->zPath); + if( !zFile ){ + rc = SQLITE_NOMEM; + } + unlink(zFile); + sqlite3_free(zFile); } } if( pSummary->fd>0 ){ @@ -589,12 +600,197 @@ finished: return rc; } +/* +** Values for the third parameter to logLockRegion(). +*/ +#define LOG_UNLOCK 0 +#define LOG_RDLOCK 1 +#define LOG_WRLOCK 2 +#define LOG_WRLOCKW 3 + +static int logLockFd(LogSummary *pSummary, int iStart, int nByte, int op){ + int aType[4] = { + F_UNLCK, /* LOG_UNLOCK */ + F_RDLCK, /* LOG_RDLOCK */ + F_WRLCK, /* LOG_WRLOCK */ + F_WRLCK /* LOG_WRLOCKW */ + }; + int aOp[4] = { + F_SETLK, /* LOG_UNLOCK */ + F_SETLK, /* LOG_RDLOCK */ + F_SETLK, /* LOG_WRLOCK */ + F_SETLKW /* LOG_WRLOCKW */ + }; + + struct flock f; /* Locking operation */ + int rc; /* Value returned by fcntl() */ + + assert( ArraySize(aType)==ArraySize(aOp) ); + assert( op>=0 && opfd, aOp[op], &f); + return (rc==0) ? SQLITE_OK : SQLITE_BUSY; +} + +static int logLockRegion(Log *pLog, u32 mRegion, int op){ + LogSummary *pSummary = pLog->pSummary; + LogLock *p; /* Used to iterate through in-process locks */ + u32 mOther; /* Locks held by other connections */ + u32 mNew; /* New mask for pLog */ + + assert( + /* Writer lock operations */ + (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) + + /* Normal reader lock operations */ + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B)) + + /* Region D reader lock operations */ + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D)) + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D)) + + /* Checkpointer lock operations */ + || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) + || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C)) + ); + + /* Assert that a connection never tries to go from an EXCLUSIVE to a + ** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes + ** happens though (when a region D reader upgrades to a writer). + */ + assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) ); + + sqlite3_mutex_enter(pSummary->mutex); + + /* Calculate a mask of logs held by all connections in this process apart + ** from this one. The least significant byte of the mask contains a mask + ** of the SHARED logs held. The next least significant byte of the mask + ** indicates the EXCLUSIVE locks held. For example, to test if some other + ** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock + ** on region C, do: + ** + ** hasSharedOnA = (mOther & (LOG_REGION_A<<0)); + ** hasExclusiveOnC = (mOther & (LOG_REGION_C<<8)); + ** + ** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the + ** corresponding bit in the SHARED mask. + */ + mOther = 0; + for(p=pSummary->pLock; p; p=p->pNext){ + assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) ); + if( p!=&pLog->lock ){ + mOther |= p->mLock; + } + } + + /* If this call is to lock a region (not to unlock one), test if locks held + ** by any other connection in this process prevent the new locks from + ** begin granted. If so, exit the summary mutex and return SQLITE_BUSY. + */ + if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){ + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_BUSY; + } + + /* Figure out the new log mask for this connection. */ + switch( op ){ + case LOG_UNLOCK: + mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8))); + break; + case LOG_RDLOCK: + mNew = (pLog->lock.mLock | mRegion); + break; + default: + assert( op==LOG_WRLOCK ); + mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion); + break; + } + + /* Now modify the locks held on the log-summary file descriptor. This + ** file descriptor is shared by all log connections in this process. + ** Therefore: + ** + ** + If one or more log connections in this process hold a SHARED lock + ** on a region, the file-descriptor should hold a SHARED lock on + ** the file region. + ** + ** + If a log connection in this process holds an EXCLUSIVE lock on a + ** region, the file-descriptor should also hold an EXCLUSIVE lock on + ** the region in question. + ** + ** If this is an LOG_UNLOCK operation, only regions for which no other + ** connection holds a lock should actually be unlocked. And if this + ** is a LOG_RDLOCK operation and other connections already hold all + ** the required SHARED locks, then no system call is required. + */ + if( op==LOG_UNLOCK ){ + mRegion = (mRegion & ~mOther); + } + if( (op==LOG_WRLOCK) + || (op==LOG_UNLOCK && mRegion) + || (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion) + ){ + struct LockMap { + int iStart; /* Byte offset to start locking operation */ + int iLen; /* Length field for locking operation */ + } aMap[] = { + /* 0000 */ {0, 0}, /* 0001 */ {4+LOG_LOCK_REGION, 1}, + /* 0010 */ {3+LOG_LOCK_REGION, 1}, /* 0011 */ {3+LOG_LOCK_REGION, 2}, + /* 0100 */ {2+LOG_LOCK_REGION, 1}, /* 0101 */ {0, 0}, + /* 0110 */ {2+LOG_LOCK_REGION, 2}, /* 0111 */ {2+LOG_LOCK_REGION, 3}, + /* 1000 */ {1+LOG_LOCK_REGION, 1}, /* 1001 */ {0, 0}, + /* 1010 */ {0, 0}, /* 1011 */ {0, 0}, + /* 1100 */ {1+LOG_LOCK_REGION, 2}, /* 1101 */ {0, 0}, + /* 1110 */ {0, 0}, /* 1111 */ {0, 0} + }; + int rc; /* Return code of logLockFd() */ + + assert( mRegionmutex); + return rc; + } + } + + pLog->lock.mLock = mNew; + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_OK; +} + +static int logLockDMH(LogSummary *pSummary, int eLock){ + assert( eLock==LOG_RDLOCK || eLock==LOG_WRLOCK ); + return logLockFd(pSummary, LOG_LOCK_DMH, 1, eLock); +} + +static int logLockMutex(LogSummary *pSummary, int eLock){ + assert( eLock==LOG_WRLOCKW || eLock==LOG_UNLOCK ); + logLockFd(pSummary, LOG_LOCK_MUTEX, 1, eLock); + return SQLITE_OK; +} + + /* ** This function intializes the connection to the log-summary identified ** by struct pSummary. */ -static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){ +static int logSummaryInit( + LogSummary *pSummary, /* Log summary object to initialize */ + sqlite3_file *pFd /* File descriptor open on log file */ +){ int rc; /* Return Code */ char *zFile; /* File name for summary file */ @@ -614,36 +810,35 @@ static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){ return SQLITE_IOERR; } - /* Grab an exclusive lock the summary file. Then mmap() it. TODO: This - ** code needs to be enhanced to support a growable mapping. For now, just - ** make the mapping very large to start with. + /* Grab an exclusive lock the summary file. Then mmap() it. + ** + ** TODO: This code needs to be enhanced to support a growable mapping. + ** For now, just make the mapping very large to start with. The + ** pages should not be allocated until they are first accessed anyhow, + ** so using a large mapping consumes no more resources than a smaller + ** one would. */ - rc = logSummaryLock(pSummary); + assert( sqlite3_mutex_held(pSummary->mutex) ); + rc = logLockMutex(pSummary, LOG_WRLOCKW); if( rc!=SQLITE_OK ) return rc; rc = logSummaryMap(pSummary, 512*1024); if( rc!=SQLITE_OK ) goto out; - /* Grab a SHARED lock on the log file. Then try to upgrade to an EXCLUSIVE - ** lock. If successful, then this is the first (and only) connection to - ** the database. In this case assume the contents of the log-summary - ** cannot be trusted. Zero the log-summary header to make sure. - ** - ** The SHARED lock on the log file is not released until the connection - ** to the database is closed. + /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this + ** is possible, the contents of the log-summary file (if any) may not + ** be trusted. Zero the log-summary header before continuing. */ - rc = sqlite3OsLock(pFd, SQLITE_LOCK_SHARED); - if( rc!=SQLITE_OK ) goto out; - rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE); + rc = logLockDMH(pSummary, LOG_WRLOCK); if( rc==SQLITE_OK ){ - /* This is the first and only connection. */ memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) ); - rc = sqlite3OsUnlock(pFd, SQLITE_LOCK_SHARED); - }else if( rc==SQLITE_BUSY ){ - rc = SQLITE_OK; + } + rc = logLockDMH(pSummary, LOG_RDLOCK); + if( rc!=SQLITE_OK ){ + return SQLITE_IOERR; } out: - logSummaryUnlock(pSummary); + logLockMutex(pSummary, LOG_UNLOCK); return rc; } @@ -652,6 +847,12 @@ static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){ ** database file does not actually have to exist. zDb is used only to ** figure out the name of the log file to open. If the log file does not ** exist it is created by this call. +** +** A SHARED lock should be held on the database file when this function +** is called. The purpose of this SHARED lock is to prevent any other +** client from unlinking the log or log-summary file. If another process +** were to do this just after this client opened one of these files, the +** system would be badly broken. */ int sqlite3LogOpen( sqlite3_vfs *pVfs, /* vfs module to open log file with */ @@ -666,11 +867,10 @@ int sqlite3LogOpen( char *zWal = 0; /* Path to WAL file */ int nWal; /* Length of zWal in bytes */ - /* Zero output variables */ assert( zDb ); - *ppLog = 0; /* Allocate an instance of struct Log to return. */ + *ppLog = 0; pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile); if( !pRet ) goto out; pRet->pVfs = pVfs; @@ -726,15 +926,12 @@ int sqlite3LogOpen( /* Object pSummary is shared between all connections to the database made ** by this process. So at this point it may or may not be connected to - ** the log-summary. If it is not, connect it. Otherwise, just take the - ** SHARED lock on the log file. + ** the log-summary. If it is not, connect it. */ sqlite3_mutex_enter(pSummary->mutex); mutex = pSummary->mutex; if( pSummary->fd<0 ){ rc = logSummaryInit(pSummary, pRet->pFd); - }else{ - rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED); } pRet->lock.pNext = pSummary->pLock; @@ -940,45 +1137,43 @@ int sqlite3LogClose( **/ pSummary->nRef--; if( pSummary->nRef==0 ){ + int rc; LogSummary **pp; + for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + + sqlite3_mutex_leave(mutex); - rc = logSummaryLock(pSummary); + rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ - int isTruncate = 0; - int rc2 = sqlite3OsLock(pLog->pFd, SQLITE_LOCK_EXCLUSIVE); - if( rc2==SQLITE_OK ){ - /* This is the last connection to the database (including other - ** processes). Do three things: - ** - ** 1. Checkpoint the db. - ** 2. Truncate the log file to zero bytes. - ** 3. Truncate the log-summary file to zero bytes. - */ - rc2 = logCheckpoint(pLog, pFd, zBuf); - if( rc2==SQLITE_OK ){ - rc2 = sqlite3OsTruncate(pLog->pFd, 0); - } - isTruncate = 1; - }else if( rc2==SQLITE_BUSY ){ - rc2 = SQLITE_OK; + + /* This is the last connection to the database (including other + ** processes). Do three things: + ** + ** 1. Checkpoint the db. + ** 2. Truncate the log file. + ** 3. Unlink the log-summary file. + */ + rc = logCheckpoint(pLog, pFd, zBuf); + if( rc==SQLITE_OK ){ + rc = sqlite3OsDelete(pLog->pVfs, pSummary->zPath, 0); } - logSummaryUnmap(pSummary, isTruncate); - sqlite3OsUnlock(pLog->pFd, SQLITE_LOCK_NONE); - rc = logSummaryUnlock(pSummary); - if( rc2!=SQLITE_OK ) rc = rc2; + + logSummaryUnmap(pSummary, 1); + }else{ + if( rc==SQLITE_BUSY ){ + rc = SQLITE_OK; + } + logSummaryUnmap(pSummary, 0); } + sqlite3OsUnlock(pFd, SQLITE_LOCK_NONE); - /* Remove the LogSummary object from the global list. Then free the - ** mutex and the object itself. - */ - for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext); - *pp = (*pp)->pNext; sqlite3_mutex_free(pSummary->mutex); sqlite3_free(pSummary); + }else{ + sqlite3_mutex_leave(mutex); } - sqlite3_mutex_leave(mutex); - /* Close the connection to the log file and free the Log handle. */ sqlite3OsClose(pLog->pFd); sqlite3_free(pLog); @@ -1012,7 +1207,7 @@ static int logEnterMutex(Log *pLog){ int rc; sqlite3_mutex_enter(pSummary->mutex); - rc = logSummaryLock(pSummary); + rc = logLockMutex(pSummary, LOG_WRLOCKW); if( rc!=SQLITE_OK ){ sqlite3_mutex_leave(pSummary->mutex); } @@ -1020,157 +1215,10 @@ static int logEnterMutex(Log *pLog){ } static void logLeaveMutex(Log *pLog){ LogSummary *pSummary = pLog->pSummary; - logSummaryUnlock(pSummary); + logLockMutex(pSummary, LOG_UNLOCK); sqlite3_mutex_leave(pSummary->mutex); } -/* -** Values for the second parameter to logLockRegion(). -*/ -#define LOG_UNLOCK 0 -#define LOG_RDLOCK 1 -#define LOG_WRLOCK 2 - -static int logLockRegion(Log *pLog, u32 mRegion, int op){ - LogSummary *pSummary = pLog->pSummary; - LogLock *p; /* Used to iterate through in-process locks */ - u32 mOther; /* Locks held by other connections */ - u32 mNew; /* New mask for pLog */ - - assert( - /* Writer lock operations */ - (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) - - /* Normal reader lock operations */ - || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B)) - - /* Region D reader lock operations */ - || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D)) - || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D)) - - /* Checkpointer lock operations */ - || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) - || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C)) - ); - - /* Assert that a connection never tries to go from an EXCLUSIVE to a - ** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes - ** happens though (when a region D reader upgrades to a writer). - */ - assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) ); - - sqlite3_mutex_enter(pSummary->mutex); - - /* Calculate a mask of logs held by all connections in this process apart - ** from this one. The least significant byte of the mask contains a mask - ** of the SHARED logs held. The next least significant byte of the mask - ** indicates the EXCLUSIVE locks held. For example, to test if some other - ** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock - ** on region C, do: - ** - ** hasSharedOnA = (mOther & (LOG_REGION_A<<0)); - ** hasExclusiveOnC = (mOther & (LOG_REGION_C<<8)); - ** - ** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the - ** corresponding bit in the SHARED mask. - */ - mOther = 0; - for(p=pSummary->pLock; p; p=p->pNext){ - assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) ); - if( p!=&pLog->lock ){ - mOther |= p->mLock; - } - } - - /* If this call is to lock a region (not to unlock one), test if locks held - ** by any other connection in this process prevent the new locks from - ** begin granted. If so, exit the summary mutex and return SQLITE_BUSY. - */ - if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){ - sqlite3_mutex_leave(pSummary->mutex); - return SQLITE_BUSY; - } - - /* Figure out the new log mask for this connection. */ - switch( op ){ - case LOG_UNLOCK: - mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8))); - break; - case LOG_RDLOCK: - mNew = (pLog->lock.mLock | mRegion); - break; - default: - assert( op==LOG_WRLOCK ); - mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion); - break; - } - - /* Now modify the locks held on the log-summary file descriptor. This - ** file descriptor is shared by all log connections in this process. - ** Therefore: - ** - ** + If one or more log connections in this process hold a SHARED lock - ** on a region, the file-descriptor should hold a SHARED lock on - ** the file region. - ** - ** + If a log connection in this process holds an EXCLUSIVE lock on a - ** region, the file-descriptor should also hold an EXCLUSIVE lock on - ** the region in question. - ** - ** If this is an LOG_UNLOCK operation, only regions for which no other - ** connection holds a lock should actually be unlocked. And if this - ** is a LOG_RDLOCK operation and other connections already hold all - ** the required SHARED locks, then no system call is required. - */ - if( op==LOG_UNLOCK ){ - mRegion = (mRegion & ~mOther); - } - if( (op==LOG_WRLOCK) - || (op==LOG_UNLOCK && mRegion) - || (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion) - ){ - struct LockMap { - int iStart; /* Byte offset to start locking operation */ - int iLen; /* Length field for locking operation */ - } aMap[] = { - /* 0000 */ {0, 0}, /* 0001 */ {4, 1}, - /* 0010 */ {3, 1}, /* 0011 */ {3, 2}, - /* 0100 */ {2, 1}, /* 0101 */ {0, 0}, - /* 0110 */ {2, 2}, /* 0111 */ {2, 3}, - /* 1000 */ {1, 1}, /* 1001 */ {0, 0}, - /* 1010 */ {0, 0}, /* 1011 */ {0, 0}, - /* 1100 */ {1, 2}, /* 1101 */ {0, 0}, - /* 1110 */ {0, 0}, /* 1111 */ {0, 0} - }; - int rc; /* Return code of fcntl() */ - struct flock f; /* Locking operation */ - - assert( mRegionfd, F_SETLK, &f); - if( rc!=0 ){ - sqlite3_mutex_leave(pSummary->mutex); - return SQLITE_BUSY; - } - } - - pLog->lock.mLock = mNew; - sqlite3_mutex_leave(pSummary->mutex); - return SQLITE_OK; -} - /* ** Try to read the log-summary header. Attempt to verify the header ** checksum. If the checksum can be verified, copy the log-summary diff --git a/src/pager.c b/src/pager.c index 3b16b86dd8..c2ed8601aa 100644 --- a/src/pager.c +++ b/src/pager.c @@ -491,6 +491,7 @@ static int assert_pager_state(Pager *pPager){ } #endif + /* ** Return true if it is necessary to write page *pPg into the sub-journal. ** A page needs to be written into the sub-journal if there exists one @@ -1187,19 +1188,6 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ return rc; } -/* -** Open a connection to the write-ahead log file for pager pPager. -*/ -static int pagerOpenLog(Pager *pPager){ - if( !pPager->pLog ){ - int rc; /* Return code from LogOpen() */ - - rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog); - if( rc!=SQLITE_OK ) return rc; - } - return SQLITE_OK; -} - /* ** Return true if this pager uses a write-ahead log instead of the usual ** rollback journal. Otherwise false. @@ -1241,8 +1229,9 @@ static void pager_unlock(Pager *pPager){ if( pagerUseLog(pPager) ){ sqlite3LogCloseSnapshot(pPager->pLog); + }else{ + rc = osUnlock(pPager->fd, NO_LOCK); } - rc = osUnlock(pPager->fd, NO_LOCK); if( rc ){ pPager->errCode = rc; } @@ -3733,6 +3722,54 @@ static int hasHotJournal(Pager *pPager, int *pExists){ return rc; } +/* +** Open a connection to the write-ahead log file for pager pPager. If +** the log connection is already open, this function is a no-op. +*/ +static int pagerOpenLog(Pager *pPager){ + if( !pPager->pLog ){ + int rc; /* Return code */ + + /* Before opening the log file, obtain a SHARED lock on the database + ** file. This lock will not be released until after the log file + ** connection has been closed. The purpose of this lock is to stop + ** any other process from unlinking the log or log-summary files while + ** this connection still has them open. An EXCLUSIVE lock on the + ** database file is required to unlink either of those two files. + */ + assert( pPager->state==PAGER_UNLOCK ); + rc = pager_wait_on_lock(pPager, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + assert( pPager->state==PAGER_UNLOCK ); + return pager_error(pPager, rc); + } + assert( pPager->state>=SHARED_LOCK ); + + /* Open the connection to the log file. If this operation fails, + ** (e.g. due to malloc() failure), unlock the database file and + ** return an error code. + */ + rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog); + if( rc!=SQLITE_OK ){ + osUnlock(pPager->fd, SQLITE_LOCK_NONE); + pPager->state = PAGER_UNLOCK; + return rc; + } + }else{ + /* If the log file was already open, check that the pager is still holding + ** the required SHARED lock on the database file. + */ +#ifdef SQLITE_DEBUG + int locktype; + sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_LOCKSTATE, &locktype); + assert( locktype==SQLITE_LOCK_SHARED ); +#endif + } + + return SQLITE_OK; +} + + /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerAcquire() until after this function @@ -3786,17 +3823,25 @@ int sqlite3PagerSharedLock(Pager *pPager){ pager_reset(pPager); } - if( pagerUseLog(pPager) ){ - int changed = 0; + if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + int changed = 0; /* True if the cache must be flushed */ + + /* Open the log file, if it is not already open. */ + rc = pagerOpenLog(pPager); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Open a log snapshot to read from. */ rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed); if( rc==SQLITE_OK ){ + int dummy; if( changed ){ pager_reset(pPager); assert( pPager->errCode || pPager->dbSizeValid==0 ); } - pPager->state = PAGER_SHARED; /* TODO: Is this right? */ - rc = sqlite3PagerPagecount(pPager, &changed); + rc = sqlite3PagerPagecount(pPager, &dummy); } }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; @@ -5611,15 +5656,13 @@ int sqlite3PagerJournalMode(Pager *pPager, int eMode){ sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } - if( eMode==PAGER_JOURNALMODE_WAL ){ - int rc = pagerOpenLog(pPager); - if( rc!=SQLITE_OK ){ - /* TODO: The error code should not just get dropped here. Change - ** this to set a flag to force the log to be opened the first time - ** it is actually required. */ - return (int)pPager->journalMode; - } + /* Switching into WAL mode can only take place when no + ** locks are held on the database file. + */ + if( eMode==PAGER_JOURNALMODE_WAL && pPager->state!=PAGER_UNLOCK ){ + return (int)pPager->journalMode; } + pPager->journalMode = (u8)eMode; } return (int)pPager->journalMode; diff --git a/test/wal.test b/test/wal.test index a41b263c1b..5484d1eafa 100644 --- a/test/wal.test +++ b/test/wal.test @@ -32,6 +32,10 @@ proc log_file_size {nFrame pgsz} { expr {12 + ($pgsz+16)*$nFrame} } +proc log_deleted {logfile} { + return [expr [file exists $logfile]==0] +} + # # These are 'warm-body' tests used while developing the WAL code. They # serve to prove that a few really simple cases work: @@ -179,7 +183,6 @@ do_test wal-5.5 { } {1 2 3 4} db close - foreach sector {512 4096} { sqlite3_simulate_device -sectorsize $sector foreach pgsz {512 1024 2048 4096} { @@ -198,8 +201,8 @@ foreach sector {512 4096} { } [expr $pgsz*2] do_test wal-6.$sector.$pgsz.2 { - file size test.db-wal - } {0} + log_deleted test.db-wal + } {1} } } @@ -591,8 +594,8 @@ do_test wal-11.8 { do_test wal-11.9 { db close sqlite3_wal db test.db - list [expr [file size test.db]/1024] [file size test.db-wal] -} {37 0} + list [expr [file size test.db]/1024] [log_deleted test.db-wal] +} {37 1} do_test wal-11.10 { execsql { diff --git a/test/walthread.test b/test/walthread.test index fbd6c653cc..08219a7f28 100644 --- a/test/walthread.test +++ b/test/walthread.test @@ -21,12 +21,14 @@ if {[run_thread_tests]==0} { finish_test ; return } do_test walthread-1.1 { execsql { PRAGMA journal_mode = WAL; + PRAGMA lock_status; CREATE TABLE t1(x PRIMARY KEY); + PRAGMA lock_status; INSERT INTO t1 VALUES(randomblob(100)); INSERT INTO t1 VALUES(randomblob(100)); INSERT INTO t1 SELECT md5sum(x) FROM t1; } -} {wal} +} {wal main unlocked temp closed main shared temp closed} do_test walthread-1.2 { execsql { SELECT (SELECT count(*) FROM t1), ( @@ -41,7 +43,7 @@ do_test walthread-1.3 { } {ok} do_test walthread-1.4 { execsql { PRAGMA lock_status } -} {main unlocked temp unknown} +} {main shared temp unknown} #-------------------------------------------------------------------------- # Start N threads. Each thread performs both read and write transactions.