From 52800df9fcc2946ba507f64cafade052f62d7b7b Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 14 Feb 2015 18:58:22 +0000 Subject: [PATCH] Change the way the "incremental checkpoint" function of OTA works in order to reduce the effect on the SQLite core code. FossilOrigin-Name: b64a11a754dc56f3406d3b703531ebe9e4af4908 --- ext/ota/ota1.test | 2 +- ext/ota/ota6.test | 24 +- ext/ota/sqlite3ota.c | 444 +++++++++++++++++++++++++--------- manifest | 28 +-- manifest.uuid | 2 +- src/main.c | 27 --- src/pager.c | 23 -- src/pager.h | 2 - src/sqlite.h.in | 37 --- src/wal.c | 556 ++++++++++++------------------------------- src/wal.h | 11 - 11 files changed, 493 insertions(+), 663 deletions(-) diff --git a/ext/ota/ota1.test b/ext/ota/ota1.test index dec5f14a99..addcde9446 100644 --- a/ext/ota/ota1.test +++ b/ext/ota/ota1.test @@ -198,7 +198,6 @@ foreach {tn3 create_vfs destroy_vfs} { do_test $tn3.1.$tn2.$tn.1 { create_ota1 ota.db - breakpoint $cmd test.db ota.db } {SQLITE_DONE} @@ -475,6 +474,7 @@ foreach {tn3 create_vfs destroy_vfs} { } } + catch { db close } eval $destroy_vfs } diff --git a/ext/ota/ota6.test b/ext/ota/ota6.test index 8027f36c5f..3b794ee77c 100644 --- a/ext/ota/ota6.test +++ b/ext/ota/ota6.test @@ -68,34 +68,12 @@ for {set nStep 1} {$nStep < 7} {incr nStep} { } {1 {SQLITE_BUSY - database modified during ota update}} } -for {set nStep 7} {$nStep < 8} {incr nStep} { - do_test 1.$nStep.1 { - setup_test - sqlite3ota ota test.db ota.db - for {set i 0} {$i<$nStep} {incr i} {ota step} - ota close - sqlite3 db test.db - execsql { INSERT INTO t1 VALUES(5, 'hello') } - sqlite3ota ota test.db ota.db - ota step - } {SQLITE_OK} - do_test 1.$nStep.2 { - ota step - } {SQLITE_OK} - do_test 1.$nStep.3 { - list [file exists test.db-oal] [file exists test.db-wal] - } {0 1} - do_test 1.$nStep.4 { - list [catch { ota close } msg] $msg - } {0 SQLITE_OK} -} - # Test the outcome of some other client writing the db after the *-oal # file has been copied to the *-wal path. Once this has happened, any # other client writing to the db causes OTA to consider its job finished. # -for {set nStep 8} {$nStep < 20} {incr nStep} { +for {set nStep 7} {$nStep < 20} {incr nStep} { do_test 1.$nStep.1 { setup_test sqlite3ota ota test.db ota.db diff --git a/ext/ota/sqlite3ota.c b/ext/ota/sqlite3ota.c index de7f1250c5..652c21a5f5 100644 --- a/ext/ota/sqlite3ota.c +++ b/ext/ota/sqlite3ota.c @@ -75,6 +75,7 @@ #define OTA_STATE_COOKIE 7 #define OTA_STAGE_OAL 1 +#define OTA_STAGE_CAPTURE 2 #define OTA_STAGE_CKPT 3 #define OTA_STAGE_DONE 4 @@ -87,6 +88,21 @@ typedef struct OtaObjIter OtaObjIter; typedef struct ota_vfs ota_vfs; typedef struct ota_file ota_file; +#if !defined(SQLITE_AMALGAMATION) +typedef unsigned int u32; +typedef unsigned char u8; +typedef sqlite3_int64 i64; +#endif + +/* +** These values must match the values defined in wal.c for the equivalent +** locks. These are not magic numbers as they are part of the SQLite file +** format. +*/ +#define WAL_LOCK_WRITE 0 +#define WAL_LOCK_CKPT 1 +#define WAL_LOCK_READ0 3 + /* ** A structure to store values read from the ota_state table in memory. */ @@ -94,10 +110,9 @@ struct OtaState { int eStage; char *zTbl; char *zIdx; - unsigned char *pCkptState; - int nCkptState; + i64 iWalCksum; int nRow; - sqlite3_int64 nProgress; + i64 nProgress; }; /* @@ -116,8 +131,8 @@ struct OtaObjIter { char **azTblCol; /* Array of unquoted target column names */ char **azTblType; /* Array of target column types */ int *aiSrcOrder; /* src table col -> target table col */ - unsigned char *abTblPk; /* Array of flags, set on target PK columns */ - unsigned char *abNotNull; /* Array of flags, set on NOT NULL columns */ + u8 *abTblPk; /* Array of flags, set on target PK columns */ + u8 *abNotNull; /* Array of flags, set on NOT NULL columns */ int eType; /* Table type - an OTA_PK_XXX value */ /* Output variables. zTbl==0 implies EOF. */ @@ -158,6 +173,12 @@ struct OtaObjIter { #define OTA_PK_VTAB 5 +typedef struct OtaFrame OtaFrame; +struct OtaFrame { + u32 iDbPage; + u32 iWalFrame; +}; + /* ** OTA handle. */ @@ -171,16 +192,27 @@ struct sqlite3ota { int nStep; /* Rows processed for current object */ int nProgress; /* Rows processed for all objects */ OtaObjIter objiter; /* Iterator for skipping through tbl/idx */ - sqlite3_ckpt *pCkpt; /* Incr-checkpoint handle */ - ota_file *pTargetFd; /* File handle open on target db */ const char *zVfsName; /* Name of automatically created ota vfs */ + ota_file *pTargetFd; /* File handle open on target db */ + + /* The following state variables are used as part of the incremental + ** checkpoint stage (eStage==OTA_STAGE_CKPT). See function otaSetupCkpt() + ** for details. */ + u32 iMaxFrame; /* Largest iWalFrame value in aFrame[] */ + u32 mLock; + int nFrame; /* Entries in aFrame[] array */ + int nFrameAlloc; /* Allocated size of aFrame[] array */ + OtaFrame *aFrame; + int pgsz; + u8 *aBuf; + i64 iWalCksum; }; struct ota_vfs { - sqlite3_vfs base; /* ota VFS shim methods */ - sqlite3_vfs *pRealVfs; /* Underlying VFS */ - sqlite3_mutex *mutex; - const char *zOtaWal; + sqlite3_vfs base; /* ota VFS shim methods */ + sqlite3_vfs *pRealVfs; /* Underlying VFS */ + sqlite3_mutex *mutex; /* Mutex to protect pMain */ + ota_file *pMain; /* Linked list of main db files */ }; struct ota_file { @@ -190,13 +222,16 @@ struct ota_file { sqlite3ota *pOta; /* Pointer to ota object (ota target only) */ int openFlags; /* Flags this file was opened with */ - unsigned int iCookie; /* Cookie value for main db files */ - unsigned char iWriteVer; /* "write-version" value for main db files */ + u32 iCookie; /* Cookie value for main db files */ + u8 iWriteVer; /* "write-version" value for main db files */ int nShm; /* Number of entries in apShm[] array */ char **apShm; /* Array of mmap'd *-shm regions */ - const char *zWal; /* Wal filename for this db file */ char *zDel; /* Delete this when closing file */ + + const char *zWal; /* Wal filename for this main db file */ + ota_file *pWalFd; /* Wal file descriptor for this main db */ + ota_file *pMainNext; /* Next MAIN_DB file */ }; @@ -468,7 +503,7 @@ static void *otaMalloc(sqlite3ota *p, int nByte){ ** error code in the OTA handle passed as the first argument. */ static void otaAllocateIterArrays(sqlite3ota *p, OtaObjIter *pIter, int nCol){ - int nByte = (2*sizeof(char*) + sizeof(int) + 2*sizeof(unsigned char)) * nCol; + int nByte = (2*sizeof(char*) + sizeof(int) + 2*sizeof(u8)) * nCol; char **azNew; azNew = (char**)otaMalloc(p, nByte); @@ -476,8 +511,8 @@ static void otaAllocateIterArrays(sqlite3ota *p, OtaObjIter *pIter, int nCol){ pIter->azTblCol = azNew; pIter->azTblType = &azNew[nCol]; pIter->aiSrcOrder = (int*)&pIter->azTblType[nCol]; - pIter->abTblPk = (unsigned char*)&pIter->aiSrcOrder[nCol]; - pIter->abNotNull = (unsigned char*)&pIter->abTblPk[nCol]; + pIter->abTblPk = (u8*)&pIter->aiSrcOrder[nCol]; + pIter->abNotNull = (u8*)&pIter->abTblPk[nCol]; } } @@ -576,7 +611,7 @@ static int otaTableType( zSql = 0; if( pStmt==0 ) goto otaTableType_end; while( sqlite3_step(pStmt)==SQLITE_ROW ){ - const unsigned char *zOrig = sqlite3_column_text(pStmt,3); + const u8 *zOrig = sqlite3_column_text(pStmt,3); if( zOrig && zOrig[0]=='p' ){ zSql = sqlite3_mprintf("SELECT rootpage FROM main.sqlite_master" " WHERE name=%Q", sqlite3_column_text(pStmt,1)); @@ -713,7 +748,7 @@ static int otaObjIterCacheTableInfo(sqlite3ota *p, OtaObjIter *pIter){ pIter->azTblType[iOrder] = otaStrndup(zType, -1, &p->rc); pIter->abTblPk[iOrder] = (iPk!=0); - pIter->abNotNull[iOrder] = (unsigned char)bNotNull || (iPk!=0); + pIter->abNotNull[iOrder] = (u8)bNotNull || (iPk!=0); iOrder++; } } @@ -1480,21 +1515,26 @@ static void otaOpenDatabase(sqlite3ota *p){ assert( p->rc==SQLITE_OK ); assert( p->db==0 ); + p->eStage = 0; p->rc = sqlite3_open_v2(p->zTarget, &p->db, flags, p->zVfsName); if( p->rc ){ p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(p->db)); }else{ - otaMPrintfExec(p, "ATTACH %Q AS ota", p->zOta); + p->rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_OTA, (void*)p); + if( p->rc==SQLITE_OK ){ + otaMPrintfExec(p, "ATTACH %Q AS ota", p->zOta); + } /* Mark the database file just opened as an OTA target database. If ** this call returns SQLITE_NOTFOUND, then the OTA vfs is not in use. ** This is an error. */ if( p->rc==SQLITE_OK ){ p->rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_OTA, (void*)p); - if( p->rc==SQLITE_NOTFOUND ){ - p->rc = SQLITE_ERROR; - p->zErrmsg = sqlite3_mprintf("ota vfs not found"); - } + } + + if( p->rc==SQLITE_NOTFOUND ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("ota vfs not found"); } } } @@ -1533,6 +1573,101 @@ static void otaFileSuffix3(const char *zBase, char *z){ #endif } +/* +** Return the current wal-index header checksum for the target database +** as a 64-bit integer. +** +** The checksum is store in the first page of xShmMap memory as an 8-byte +** blob starting at byte offset 40. +*/ +static i64 otaShmChecksum(sqlite3ota *p){ + i64 iRet; + if( p->rc==SQLITE_OK ){ + sqlite3_file *pDb = p->pTargetFd->pReal; + u32 volatile *ptr; + p->rc = pDb->pMethods->xShmMap(pDb, 0, 32*1024, 0, (void volatile**)&ptr); + if( p->rc==SQLITE_OK ){ + iRet = ((i64)ptr[10] << 32) + ptr[11]; + } + } + return iRet; +} + +static void otaSetupCheckpoint(sqlite3ota *p, OtaState *pState){ + + if( pState==0 ){ + p->eStage = 0; + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->db, "SELECT * FROM sqlite_master", 0, 0, 0); + } + } + + if( p->rc==SQLITE_OK ){ + int rc2; + p->eStage = OTA_STAGE_CAPTURE; + rc2 = sqlite3_exec(p->db, "PRAGMA main.wal_checkpoint=restart", 0, 0, 0); + if( rc2!=SQLITE_INTERNAL ) p->rc = rc2; + } + + if( p->rc==SQLITE_OK ){ + p->eStage = OTA_STAGE_CKPT; + p->nStep = 0; + p->aBuf = otaMalloc(p, p->pgsz); + p->iWalCksum = otaShmChecksum(p); + } + + if( p->rc==SQLITE_OK && pState && pState->iWalCksum!=p->iWalCksum ){ + p->rc = SQLITE_DONE; + p->eStage = OTA_STAGE_DONE; + } +} + +static int otaCaptureWalRead(sqlite3ota *pOta, i64 iOff, int iAmt){ + const u32 mReq = (1<mLock!=mReq ){ + return SQLITE_BUSY; + } + + pOta->pgsz = iAmt; + if( pOta->nFrame==pOta->nFrameAlloc ){ + int nNew = (pOta->nFrameAlloc ? pOta->nFrameAlloc : 64) * 2; + OtaFrame *aNew; + aNew = (OtaFrame*)sqlite3_realloc(pOta->aFrame, nNew * sizeof(OtaFrame)); + if( aNew==0 ) return SQLITE_NOMEM; + pOta->aFrame = aNew; + pOta->nFrameAlloc = nNew; + } + + iFrame = (u32)((iOff-32) / (i64)(iAmt+24)) + 1; + if( pOta->iMaxFrame