}
/* Zero the entries in the aPgno array that correspond to frames with
- ** frame numbers greater than pWal->hdr.mxFrame.
- */
+ ** frame numbers greater than pWal->hdr.mxFrame. */
- nByte = (int)((char *)sLoc.aHash - (char *)&sLoc.aPgno[iLimit+1]);
- memset((void *)&sLoc.aPgno[iLimit+1], 0, nByte);
+ nByte = (int)((char *)sLoc.aHash - (char *)&sLoc.aPgno[iLimit]);
+ assert( nByte>=0 );
+ memset((void *)&sLoc.aPgno[iLimit], 0, nByte);
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* Verify that the every entry in the mapping region is still reachable
return rc;
}
- if( rc ) break;
+/*
+** Recover a single wal file - *-wal if iWal==0, or *-wal2 if iWal==1.
+*/
+static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){
+ i64 nSize; /* Size of log file */
+ u32 aFrameCksum[2] = {0, 0};
+ int rc;
+ sqlite3_file *pWalFd = pWal->apWalFd[iWal];
+
+ assert( iWal==0 || iWal==1 );
+
+ memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
+ sqlite3_randomness(8, pWal->hdr.aSalt);
+
+ rc = sqlite3OsFileSize(pWalFd, &nSize);
+ if( rc==SQLITE_OK ){
+ if( nSize>WAL_HDRSIZE ){
+ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */
+ u32 *aPrivate = 0; /* Heap copy of *-shm pg being populated */
+ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */
+ int szFrame; /* Number of bytes in buffer aFrame[] */
+ u8 *aData; /* Pointer to data part of aFrame buffer */
+ int szPage; /* Page size according to the log */
+ u32 magic; /* Magic value read from WAL header */
+ u32 version; /* Magic value read from WAL header */
+ int isValid; /* True if this frame is valid */
+ int iPg; /* Current 32KB wal-index page */
+ int iLastFrame; /* Last frame in wal, based on size alone */
+ int iLastPg; /* Last shm page used by this wal */
+
+ /* Read in the WAL header. */
+ rc = sqlite3OsRead(pWalFd, aBuf, WAL_HDRSIZE, 0);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+
+ /* If the database page size is not a power of two, or is greater than
+ ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid
+ ** data. Similarly, if the 'magic' value is invalid, ignore the whole
+ ** WAL file.
+ */
+ magic = sqlite3Get4byte(&aBuf[0]);
+ szPage = sqlite3Get4byte(&aBuf[8]);
+ if( (magic&0xFFFFFFFE)!=WAL_MAGIC
+ || szPage&(szPage-1)
+ || szPage>SQLITE_MAX_PAGE_SIZE
+ || szPage<512
+ ){
+ return SQLITE_OK;
+ }
+ pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
+ pWal->szPage = szPage;
+
+ /* Verify that the WAL header checksum is correct */
+ walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN,
+ aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
+ );
+ if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
+ || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
+ ){
+ return SQLITE_OK;
+ }
+
+ memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
+ *pnCkpt = sqlite3Get4byte(&aBuf[12]);
+
+ /* Verify that the version number on the WAL format is one that
+ ** are able to understand */
+ version = sqlite3Get4byte(&aBuf[4]);
+ if( version!=WAL_VERSION1 && version!=WAL_VERSION2 ){
+ return SQLITE_CANTOPEN_BKPT;
+ }
+ pWal->hdr.iVersion = version;
+
+ /* Malloc a buffer to read frames into. */
+ szFrame = szPage + WAL_FRAME_HDRSIZE;
+ aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ);
+ if( !aFrame ){
+ return SQLITE_NOMEM_BKPT;
+ }
+ aData = &aFrame[WAL_FRAME_HDRSIZE];
+ aPrivate = (u32*)&aData[szPage];
+
+ /* Read all frames from the log file. */
+ iLastFrame = (nSize - WAL_HDRSIZE) / szFrame;
+ if( version==WAL_VERSION2 ){
+ iLastPg = walFramePage2(iWal, iLastFrame);
+ }else{
+ iLastPg = walFramePage(iLastFrame);
+ }
+ for(iPg=iWal; iPg<=iLastPg; iPg+=(version==WAL_VERSION2 ? 2 : 1)){
+ u32 *aShare;
+ int iFrame; /* Index of last frame read */
+ int iLast;
+ int iFirst;
+ int nHdr, nHdr32;
+
+ rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare);
++ assert( aShare!=0 || rc!=SQLITE_OK );
++ if( aShare==0 ) break;
+ pWal->apWiData[iPg] = aPrivate;
+
+ if( iWal ){
+ assert( version==WAL_VERSION2 );
+ iFirst = 1 + (iPg/2)*HASHTABLE_NPAGE;
+ iLast = iFirst + HASHTABLE_NPAGE - 1;
+ }else{
+ int i2 = (version==WAL_VERSION2) ? (iPg/2) : iPg;
+ iLast = HASHTABLE_NPAGE_ONE+i2*HASHTABLE_NPAGE;
+ iFirst = 1 + (i2==0?0:HASHTABLE_NPAGE_ONE+(i2-1)*HASHTABLE_NPAGE);
+ }
+ iLast = MIN(iLast, iLastFrame);
+
+ for(iFrame=iFirst; iFrame<=iLast; iFrame++){
+ i64 iOffset = walFrameOffset(iFrame, szPage);
+ u32 pgno; /* Database page number for frame */
+ u32 nTruncate; /* dbsize field from frame header */
+
+ /* Read and decode the next log frame. */
+ rc = sqlite3OsRead(pWalFd, aFrame, szFrame, iOffset);
+ if( rc!=SQLITE_OK ) break;
+ isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
+ if( !isValid ) break;
+ rc = walIndexAppend(pWal, iWal, iFrame, pgno);
+ if( NEVER(rc!=SQLITE_OK) ) break;
+
+ /* If nTruncate is non-zero, this is a commit record. */
+ if( nTruncate ){
+ pWal->hdr.mxFrame = iFrame;
+ pWal->hdr.nPage = nTruncate;
+ pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
+ testcase( szPage<=32768 );
+ testcase( szPage>=65536 );
+ aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
+ aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
+ }
+ }
+ pWal->apWiData[iPg] = aShare;
+ nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0);
+ nHdr32 = nHdr / sizeof(u32);
+#ifndef SQLITE_SAFER_WALINDEX_RECOVERY
+ /* Memcpy() should work fine here, on all reasonable implementations.
+ ** Technically, memcpy() might change the destination to some
+ ** intermediate value before setting to the final value, and that might
+ ** cause a concurrent reader to malfunction. Memcpy() is allowed to
+ ** do that, according to the spec, but no memcpy() implementation that
+ ** we know of actually does that, which is why we say that memcpy()
+ ** is safe for this. Memcpy() is certainly a lot faster.
+ */
+ memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr);
+#else
+ /* In the event that some platform is found for which memcpy()
+ ** changes the destination to some intermediate value before
+ ** setting the final value, this alternative copy routine is
+ ** provided.
+ */
+ {
+ int i;
+ for(i=nHdr32; i<WALINDEX_PGSZ/sizeof(u32); i++){
+ if( aShare[i]!=aPrivate[i] ){
+ /* Atomic memory operations are not required here because if
+ ** the value needs to be changed, that means it is not being
+ ** accessed concurrently. */
+ aShare[i] = aPrivate[i];
+ }
+ }
+ }
+#endif
+ if( iFrame<=iLast ) break;
+ }
+
+ sqlite3_free(aFrame);
+ }else if( pbZero ){
+ *pbZero = 1;
+ }
+ }
+
+ pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
+ pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
+
+ return rc;
+}
+
+static int walOpenWal2(Wal *pWal){
+ int rc = SQLITE_OK;
+ if( !isOpen(pWal->apWalFd[1]) ){
+ int f = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
+ rc = sqlite3OsOpen(pWal->pVfs, pWal->zWalName2, pWal->apWalFd[1], f, &f);
+ }
+ return rc;
+}
+
+static int walTruncateWal2(Wal *pWal){
+ int bIs;
+ int rc;
+ assert( !isOpen(pWal->apWalFd[1]) );
+ rc = sqlite3OsAccess(pWal->pVfs, pWal->zWalName2, SQLITE_ACCESS_EXISTS, &bIs);
+ if( rc==SQLITE_OK && bIs ){
+ rc = walOpenWal2(pWal);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3OsTruncate(pWal->apWalFd[1], 0);
+ sqlite3OsClose(pWal->apWalFd[1]);
+ }
+ }
+ return rc;
+}
/*
** Recover the wal-index by reading the write-ahead log file.
int j; /* Counter variable */
int nEntry; /* Number of entries in this segment */
ht_slot *aIndex; /* Sorted index for this segment */
+ u32 iZero;
- if( (i+1)==nSegment ){
- nEntry = (int)(iLast - sLoc.iZero);
+ if( iMode==2 ){
+ walExternalDecode(sLoc.iZero+1, &iZero);
+ iZero--;
+ assert( iZero==0 || i>=2 );
+ }else{
+ iZero = sLoc.iZero;
+ }
+
- sLoc.aPgno++;
+ if( i==iLastSeg ){
+ nEntry = (int)(iLast - iZero);
}else{
nEntry = (int)((u32*)sLoc.aHash - (u32*)sLoc.aPgno);
}
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
sqlite3WalEndWriteTransaction(pWal);
- if( pWal->readLock>=0 ){
+ if( pWal->readLock!=WAL_LOCK_NONE ){
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
- pWal->readLock = -1;
+ pWal->readLock = WAL_LOCK_NONE;
+ }
+}
+
+/* Search hash table iHash for an entry matching page number
+** pgno. Each call to this function searches a single hash table
+** (each hash table indexes up to HASHTABLE_NPAGE frames).
+**
+** This code might run concurrently to the code in walIndexAppend()
+** that adds entries to the wal-index (and possibly to this hash
+** table). This means the value just read from the hash
+** slot (aHash[iKey]) may have been added before or after the
+** current read transaction was opened. Values added after the
+** read transaction was opened may have been written incorrectly -
+** i.e. these slots may contain garbage data. However, we assume
+** that any slots written before the current read transaction was
+** opened remain unmodified.
+**
+** For the reasons above, the if(...) condition featured in the inner
+** loop of the following block is more stringent that would be required
+** if we had exclusive access to the hash-table:
+**
+** (aPgno[iFrame]==pgno):
+** This condition filters out normal hash-table collisions.
+**
+** (iFrame<=iLast):
+** This condition filters out entries that were added to the hash
+** table after the current read-transaction had started.
+*/
+static int walSearchHash(
+ Wal *pWal,
+ u32 iLast,
+ int iHash,
+ Pgno pgno,
+ u32 *piRead
+){
+ WalHashLoc sLoc; /* Hash table location */
+ int iKey; /* Hash slot index */
+ int nCollide; /* Number of hash collisions remaining */
+ int rc; /* Error code */
+
+ rc = walHashGet(pWal, iHash, &sLoc);
+ if( rc!=SQLITE_OK ){
+ return rc;
}
- && sLoc.aPgno[sLoc.aHash[iKey]]==pgno
+ nCollide = HASHTABLE_NSLOT;
+ for(iKey=walHash(pgno); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){
+ u32 iFrame = sLoc.aHash[iKey] + sLoc.iZero;
+ if( iFrame<=iLast
+ && iFrame>=pWal->minFrame
++ && sLoc.aPgno[sLoc.aHash[iKey]-1]==pgno
+ ){
+ assert( iFrame>*piRead || CORRUPT_DB );
+ *piRead = iFrame;
+ }
+ if( (nCollide--)==0 ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ }
+
+ return SQLITE_OK;
+}
+
+static int walSearchWal(
+ Wal *pWal,
+ int iWal,
+ Pgno pgno,
+ u32 *piRead
+){
+ int rc = SQLITE_OK;
+ int bWal2 = isWalMode2(pWal);
+ u32 iLast = walidxGetMxFrame(&pWal->hdr, iWal);
+ if( iLast ){
+ int iHash;
+ int iMinHash = walFramePage(pWal->minFrame);
+ u32 iExternal = bWal2 ? walExternalEncode(iWal, iLast) : iLast;
+ assert( bWal2==0 || pWal->minFrame==0 );
+ for(iHash=walFramePage(iExternal);
+ iHash>=iMinHash && *piRead==0;
+ iHash-=(1+bWal2)
+ ){
+ rc = walSearchHash(pWal, iExternal, iHash, pgno, piRead);
+ if( rc!=SQLITE_OK ) break;
+ }
+ }
+ return rc;
}
/*