From: dan Date: Fri, 16 Apr 2010 13:59:31 +0000 (+0000) Subject: Change the log file format to include a small (12 byte) header at the start of the... X-Git-Tag: version-3.7.2~455^2~75 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=97a313554d2452fcdfe60e443b1eac7f1a4e9f87;p=thirdparty%2Fsqlite.git Change the log file format to include a small (12 byte) header at the start of the file. FossilOrigin-Name: 9865d14d6041874bc1239ce7a061d5c75f2d33c9 --- diff --git a/manifest b/manifest index 547b763339..ba84322467 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sbug\sin\slog\srecovery\s(last\sframe\sin\slog\swas\sbeing\signored).\sAlso\sremove\san\sincorrect\sassert\sstatement. -D 2010-04-16T11:30:18 +C Change\sthe\slog\sfile\sformat\sto\sinclude\sa\ssmall\s(12\sbyte)\sheader\sat\sthe\sstart\sof\sthe\sfile. +D 2010-04-16T13:59:31 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -131,7 +131,7 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 -F src/log.c 40e0c5780e7a33556f421142562edbf77f8e6639 +F src/log.c 11f683a3429319fb2731aa357717bf9117cdcba4 F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9 F src/main.c c0e7192bad5b90544508b241eb2487ac661de890 F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a @@ -757,7 +757,7 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61 F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d -F test/wal.test ff3c101453044e1f9930d0421a164e09619a678c +F test/wal.test bb1fa35fc03353a1a154f583f01e5093e25ba001 F test/walcrash.test 45cfbab30bb7cbe0b2e9d5cabe90dbcad10cb89b F test/walslow.test 38076d5fad49e3678027be0f8110e6a32d531dc2 F test/walthread.test 58cd64b06f186251f09f64e4918fb74a7e52c963 @@ -804,7 +804,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P ecd828f96909895535d7dc744e5a8530e234e04d -R 6491b6f312e37bd3163c20e935da6a05 +P 67d2a89ec2d593a077eb19a6ea2b06cb1c2e9ba8 +R 2c35134540abe427220a5914099a1c09 U dan -Z 46ab8b741bc2f3b7a6a7fee62de40d58 +Z a25fb37e86e611cc50a9ddc963124e5a diff --git a/manifest.uuid b/manifest.uuid index c682fa9a6a..e6f912dc82 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -67d2a89ec2d593a077eb19a6ea2b06cb1c2e9ba8 \ No newline at end of file +9865d14d6041874bc1239ce7a061d5c75f2d33c9 \ No newline at end of file diff --git a/src/log.c b/src/log.c index 67bf9c0841..73b44863d0 100644 --- a/src/log.c +++ b/src/log.c @@ -4,6 +4,35 @@ ** "journal_mode=wal" mode. */ +/* +** LOG FILE FORMAT +** +** A log file consists of a header followed by zero or more log frames. +** The log header is 12 bytes in size and consists of the following three +** big-endian 32-bit unsigned integer values: +** +** 0: Database page size, +** 4: Randomly selected salt value 1, +** 8: Randomly selected salt value 2. +** +** Immediately following the log header are zero or more log frames. Each +** frame itself consists of a 16-byte header followed by a bytes +** of page data. The header is broken into 4 big-endian 32-bit unsigned +** integer values, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Checksum value 1. +** 12: Checksum value 2. +*/ + +/* +** LOG SUMMARY FORMAT +** +** TODO. +*/ + #include "log.h" #include @@ -39,8 +68,20 @@ struct LogSummaryHdr { #define LOGSUMMARY_FRAME_OFFSET \ (LOGSUMMARY_HDR_NFIELD + LOG_CKSM_BYTES/sizeof(u32)) + + /* Size of frame header */ -#define LOG_FRAME_HDRSIZE 20 +#define LOG_FRAME_HDRSIZE 16 +#define LOG_HDRSIZE 12 + +/* +** Return the offset of frame iFrame in the log file, assuming a database +** page size of pgsz bytes. The offset returned is to the start of the +** log frame-header. +*/ +#define logFrameOffset(iFrame, pgsz) ( \ + LOG_HDRSIZE + ((iFrame)-1)*((pgsz)+LOG_FRAME_HDRSIZE) \ +) /* ** There is one instance of this structure for each log-summary object @@ -126,6 +167,7 @@ struct LogIterator { }; + /* ** List of all LogSummary objects created by this process. Protected by ** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex @@ -291,7 +333,6 @@ static int logSummaryUnmap(LogSummary *pSummary, int isTruncate){ return rc; } - static void logSummaryWriteHdr(LogSummary *pSummary, LogSummaryHdr *pHdr){ u32 *aData = pSummary->aData; memcpy(aData, pHdr, sizeof(LogSummaryHdr)); @@ -321,17 +362,16 @@ static void logEncodeFrame( u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* OUT: Write encoded frame here */ ){ - assert( LOG_FRAME_HDRSIZE==20 ); + assert( LOG_FRAME_HDRSIZE==16 ); - sqlite3Put4byte(&aFrame[0], nData); - sqlite3Put4byte(&aFrame[4], iPage); - sqlite3Put4byte(&aFrame[8], nTruncate); + sqlite3Put4byte(&aFrame[0], iPage); + sqlite3Put4byte(&aFrame[4], nTruncate); - logChecksumBytes(aFrame, 12, aCksum); + logChecksumBytes(aFrame, 8, aCksum); logChecksumBytes(aData, nData, aCksum); - sqlite3Put4byte(&aFrame[12], aCksum[0]); - sqlite3Put4byte(&aFrame[16], aCksum[1]); + sqlite3Put4byte(&aFrame[8], aCksum[0]); + sqlite3Put4byte(&aFrame[12], aCksum[1]); } /* @@ -346,20 +386,20 @@ static int logDecodeFrame( u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* Frame data */ ){ - assert( LOG_FRAME_HDRSIZE==20 ); + assert( LOG_FRAME_HDRSIZE==16 ); - logChecksumBytes(aFrame, 12, aCksum); + logChecksumBytes(aFrame, 8, aCksum); logChecksumBytes(aData, nData, aCksum); - if( aCksum[0]!=sqlite3Get4byte(&aFrame[12]) - || aCksum[1]!=sqlite3Get4byte(&aFrame[16]) + if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[12]) ){ /* Checksum failed. */ return 0; } - *piPage = sqlite3Get4byte(&aFrame[4]); - *pnTruncate = sqlite3Get4byte(&aFrame[8]); + *piPage = sqlite3Get4byte(&aFrame[0]); + *pnTruncate = sqlite3Get4byte(&aFrame[4]); return 1; } @@ -486,12 +526,12 @@ static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ int iFrame; /* Index of last frame read */ i64 iOffset; /* Next offset to read from log file */ int nPgsz; /* Page size according to the log */ - u32 aCksum[2] = {2, 3}; /* Running checksum */ + u32 aCksum[2]; /* Running checksum */ /* Read in the first frame header in the file (to determine the ** database page size). */ - rc = sqlite3OsRead(pFd, aBuf, LOG_FRAME_HDRSIZE, 0); + rc = sqlite3OsRead(pFd, aBuf, LOG_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -503,6 +543,8 @@ static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE ){ goto finished; } + aCksum[0] = sqlite3Get4byte(&aBuf[4]); + aCksum[1] = sqlite3Get4byte(&aBuf[8]); /* Malloc a buffer to read frames into. */ nFrame = nPgsz + LOG_FRAME_HDRSIZE; @@ -514,8 +556,7 @@ static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ /* Read all frames from the log file. */ iFrame = 0; - iOffset = 0; - for(iOffset=0; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ + for(iOffset=LOG_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ int isValid; /* True if this frame is valid */ @@ -823,7 +864,7 @@ static int logCheckpoint( /* Iterate through the contents of the log, copying data to the db file. */ while( 0==logIteratorNext(pIter, &iDbpage, &iFrame) ){ rc = sqlite3OsRead(pLog->pFd, zBuf, pgsz, - (iFrame-1) * (pgsz+LOG_FRAME_HDRSIZE) + LOG_FRAME_HDRSIZE + logFrameOffset(iFrame, pgsz) + LOG_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz); @@ -1319,8 +1360,7 @@ int sqlite3LogRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ ** required page. Read and return data from the log file. */ if( iRead ){ - i64 iOffset = (iRead-1) * (pLog->hdr.pgsz+LOG_FRAME_HDRSIZE); - iOffset += LOG_FRAME_HDRSIZE; + i64 iOffset = logFrameOffset(iRead, pLog->hdr.pgsz) + LOG_FRAME_HDRSIZE; *pInLog = 1; return sqlite3OsRead(pLog->pFd, pOut, pLog->hdr.pgsz, iOffset); } @@ -1401,45 +1441,43 @@ int sqlite3LogFrames( int isCommit, /* True if this is a commit */ int isSync /* True to sync the log file */ ){ - /* Each frame has a 20 byte header, as follows: - ** - ** + Pseudo-random salt (4 bytes) - ** + Page number (4 bytes) - ** + New database size, or 0 if not a commit frame (4 bytes) - ** + Checksum (CHECKSUM_BYTES bytes); - ** - ** The checksum is computed based on the following: - ** - ** + The previous checksum, or {2, 3} for the first frame in the log. - ** + The non-checksum fields of the frame header, and - ** + The frame contents (page data). - ** - ** This format must also be understood by the code in logSummaryRecover(). - ** The size of the frame header is used by LogRead() and LogCheckpoint(). - */ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ - u8 aFrame[LOG_FRAME_HDRSIZE]; + u8 aFrame[LOG_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ PgHdr *p; /* Iterator to run through pList with. */ - u32 aCksum[2]; - + u32 aCksum[2]; /* Checksums */ PgHdr *pLast; /* Last frame in list */ int nLast = 0; /* Number of extra copies of last page */ - assert( LOG_FRAME_HDRSIZE==(4 * 3 + LOG_CKSM_BYTES) ); + assert( LOG_FRAME_HDRSIZE==(4 * 2 + LOG_CKSM_BYTES) ); assert( pList ); + /* If this is the first frame written into the log, write the log + ** header to the start of the log file. See comments at the top of + ** this file for a description of the log-header format. + */ + assert( LOG_FRAME_HDRSIZE>=LOG_HDRSIZE ); + iFrame = pLog->hdr.iLastPg; + if( iFrame==0 ){ + sqlite3Put4byte(aFrame, nPgsz); + sqlite3_randomness(8, &aFrame[4]); + pLog->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); + pLog->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); + rc = sqlite3OsWrite(pLog->pFd, aFrame, LOG_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + } + aCksum[0] = pLog->hdr.iCheck1; aCksum[1] = pLog->hdr.iCheck2; /* Write the log file. */ - iFrame = pLog->hdr.iLastPg; for(p=pList; p; p=p->pDirty){ u32 nDbsize; /* Db-size field for frame header */ i64 iOffset; /* Write offset in log file */ - iFrame++; - iOffset = (iFrame-1) * (nPgsz+sizeof(aFrame)); + iOffset = logFrameOffset(++iFrame, nPgsz); /* Populate and write the frame header */ nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; diff --git a/test/wal.test b/test/wal.test index 79b3b52d50..a41b263c1b 100644 --- a/test/wal.test +++ b/test/wal.test @@ -28,6 +28,10 @@ proc sqlite3_wal {args} { [lindex $args 0] eval { PRAGMA journal_mode = wal } } +proc log_file_size {nFrame pgsz} { + expr {12 + ($pgsz+16)*$nFrame} +} + # # These are 'warm-body' tests used while developing the WAL code. They # serve to prove that a few really simple cases work: @@ -58,7 +62,7 @@ do_test wal-1.1 { do_test wal-1.2 { # There are now two pages in the log. file size test.db-wal -} [expr (20+1024)*2] +} [log_file_size 2 1024] do_test wal-1.3 { execsql { SELECT * FROM sqlite_master } @@ -208,11 +212,11 @@ do_test wal-7.1 { INSERT INTO t1 VALUES(1, 2); } list [file size test.db] [file size test.db-wal] -} [list 0 [expr (1024+20)*3]] +} [list 0 [log_file_size 3 1024]] do_test wal-7.2 { execsql { PRAGMA checkpoint } list [file size test.db] [file size test.db-wal] -} [list 2048 [expr (1024+20)*3]] +} [list 2048 [log_file_size 3 1024]] # Execute some transactions in auto-vacuum mode to test database file # truncation. @@ -547,12 +551,12 @@ do_test wal-11.1 { } {0 3} do_test wal-11.2 { execsql { PRAGMA checkpoint } - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {3 3} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 3 1024]] do_test wal-11.3 { execsql { INSERT INTO t1 VALUES( randomblob(900) ) } - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {3 4} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 4 1024]] do_test wal-11.4 { execsql { @@ -562,8 +566,8 @@ do_test wal-11.4 { INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 8 INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 16 } - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {3 33} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 33 1024]] do_test wal-11.5 { execsql { SELECT count(*) FROM t1; @@ -572,8 +576,8 @@ do_test wal-11.5 { } {16 ok} do_test wal-11.6 { execsql COMMIT - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {3 42} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 42 1024]] do_test wal-11.7 { execsql { SELECT count(*) FROM t1; @@ -582,12 +586,12 @@ do_test wal-11.7 { } {16 ok} do_test wal-11.8 { execsql { PRAGMA checkpoint } - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {37 42} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 42 1024]] do_test wal-11.9 { db close sqlite3_wal db test.db - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] + list [expr [file size test.db]/1024] [file size test.db-wal] } {37 0} do_test wal-11.10 { @@ -597,8 +601,8 @@ do_test wal-11.10 { INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 32 SELECT count(*) FROM t1; } - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {37 38} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 40 1024]] do_test wal-11.11 { execsql { SELECT count(*) FROM t1; @@ -607,8 +611,8 @@ do_test wal-11.11 { } } {32 16} do_test wal-11.12 { - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {37 38} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 40 1024]] do_test wal-11.13 { execsql { INSERT INTO t1 VALUES( randomblob(900) ); @@ -617,12 +621,14 @@ do_test wal-11.13 { } } {17 ok} do_test wal-11.14 { - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {37 38} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 40 1024]] #------------------------------------------------------------------------- -# This block of tests, wal-12.*, tests a problem... +# This block of tests, wal-12.*, tests the fix for a problem that +# could occur if a log that is a prefix of an older log is written +# into a reused log file. # reopen_db do_test wal-12.1 { @@ -632,8 +638,8 @@ do_test wal-12.1 { CREATE TABLE t2(x, y); INSERT INTO t1 VALUES('A', 1); } - list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] -} {0 5} + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 0 [log_file_size 5 1024]] do_test wal-12.2 { db close sqlite3_wal db test.db