From: drh Date: Sat, 9 Feb 2019 21:06:40 +0000 (+0000) Subject: Defer computing the MemPage.nFree value of an in-memory btree page X-Git-Tag: version-3.28.0~188^2~6 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b0ea9432a12c01597951967a18e530df1b4bffd1;p=thirdparty%2Fsqlite.git Defer computing the MemPage.nFree value of an in-memory btree page until it is actually needed, since for many pages it is never needed. This checkin works sufficiently to prove the concept, but still has issues with exception handling. FossilOrigin-Name: 1d43ee4000b71f5c6d49244dee96358c567f09ba3451b9d22895a796d3f61ad6 --- diff --git a/manifest b/manifest index 01727ffac6..dc017b8c18 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sa\sfew\sassert()\sstatements\sin\sfts3\sthat\smight\sfail\sif\sthe\sdatabase\sis\scorrupt. -D 2019-02-09T19:23:54.418 +C Defer\scomputing\sthe\sMemPage.nFree\svalue\sof\san\sin-memory\sbtree\spage\nuntil\sit\sis\sactually\sneeded,\ssince\sfor\smany\spages\sit\sis\snever\sneeded.\nThis\scheckin\sworks\ssufficiently\sto\sprove\sthe\sconcept,\sbut\sstill\shas\sissues\nwith\sexception\shandling. +D 2019-02-09T21:06:40.308 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F Makefile.in 178d8eb6840771149cee40b322d1b3be30d330198c522c903c1b66fb5a1bfca4 @@ -455,9 +455,9 @@ F src/auth.c 0fac71038875693a937e506bceb492c5f136dd7b1249fbd4ae70b4e8da14f9df F src/backup.c 78d3cecfbe28230a3a9a1793e2ead609f469be43e8f486ca996006be551857ab F src/bitvec.c 17ea48eff8ba979f1f5b04cc484c7bb2be632f33 F src/btmutex.c 8acc2f464ee76324bf13310df5692a262b801808984c1b79defb2503bbafadb6 -F src/btree.c 18046bf14f0e3fa294ef3f7c2dc30ca7e95f3ac11ec222ad906e40b150051bde +F src/btree.c b2bb677fa5d2f18b7c38869761e0d7d1d66ff3a7bdac509865e0606b281d32bc F src/btree.h 63b94fb38ce571c15eb6a3661815561b501d23d5948b2d1e951fbd7a2d04e8d3 -F src/btreeInt.h cd82f0f08886078bf99b29e1a7045960b1ca5d9d5829c38607e1299c508eaf00 +F src/btreeInt.h d7520b98e72f9a7e2a3140cc476df461fa8a34a3d56258184f8c26f70248cef9 F src/build.c 906ca6663b9dcd413e72ae9c44dd51e596d8336b04d52e678a7501e71c20cab2 F src/callback.c 25dda5e1c2334a367b94a64077b1d06b2553369f616261ca6783c48bcb6bda73 F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e @@ -1804,7 +1804,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 1969372ac72d25cc642a0268f4bb0ae4b59f2dca568c119ef61b67183b3a8bd9 -R 337fdf56fb6120be7b5570f5a6b13c97 -U dan -Z 3ee700c86d7982301bd59c67543d08c0 +P db74a56af73d92b7a9d43ceda7e4540915c580c68a0266b4ddefb9e0d5cbcbeb +R 6351a98578ba74f863fbe86b96b0dce9 +T *branch * deferred-free-space +T *sym-deferred-free-space * +T -sym-trunk * +U drh +Z 38c6e8e54a38cca2a22ac1c0553f905f diff --git a/manifest.uuid b/manifest.uuid index 63a146afab..db390c0838 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -db74a56af73d92b7a9d43ceda7e4540915c580c68a0266b4ddefb9e0d5cbcbeb \ No newline at end of file +1d43ee4000b71f5c6d49244dee96358c567f09ba3451b9d22895a796d3f61ad6 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index e4086ed0f5..375d891898 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1506,6 +1506,7 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){ data[hdr+7] = 0; defragment_out: + assert( pPage->nFree>=0 ); if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){ return SQLITE_CORRUPT_PAGE(pPage); } @@ -1657,6 +1658,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ testcase( gap+2+nByte==top ); if( gap+2+nByte>top ){ assert( pPage->nCell>0 || CORRUPT_DB ); + assert( pPage->nFree>=0 ); rc = defragmentPage(pPage, MIN(4, pPage->nFree - (2+nByte))); if( rc ) return rc; top = get2byteNotZero(&data[hdr+5]); @@ -1845,6 +1847,88 @@ static int decodeFlags(MemPage *pPage, int flagByte){ return SQLITE_OK; } +/* +** Compute the amount of freespace on the page. In other words, fill +** in the pPage->nFree field. +*/ +static int btreeComputeFreeSpace(MemPage *pPage){ + int pc; /* Address of a freeblock within pPage->aData[] */ + u8 hdr; /* Offset to beginning of page header */ + u8 *data; /* Equal to pPage->aData */ + int usableSize; /* Amount of usable space on each page */ + int nFree; /* Number of unused bytes on the page */ + int top; /* First byte of the cell content area */ + int iCellFirst; /* First allowable cell or freeblock offset */ + int iCellLast; /* Last possible cell or freeblock offset */ + + assert( pPage->pBt!=0 ); + assert( pPage->pBt->db!=0 ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); + assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); + assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); + assert( pPage->isInit==1 ); + assert( pPage->nFree<0 ); + + usableSize = pPage->pBt->usableSize; + hdr = pPage->hdrOffset; + data = pPage->aData; + /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates + ** the start of the cell content area. A zero value for this integer is + ** interpreted as 65536. */ + top = get2byteNotZero(&data[hdr+5]); + iCellFirst = hdr + 8 + pPage->childPtrSize + 2*pPage->nCell; + iCellLast = usableSize - 4; + + /* Compute the total free space on the page + ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the + ** start of the first freeblock on the page, or is zero if there are no + ** freeblocks. */ + pc = get2byte(&data[hdr+1]); + nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ + if( pc>0 ){ + u32 next, size; + if( pciCellLast ){ + /* Freeblock off the end of the page */ + return SQLITE_CORRUPT_PAGE(pPage); + } + next = get2byte(&data[pc]); + size = get2byte(&data[pc+2]); + nFree = nFree + size; + if( next<=pc+size+3 ) break; + pc = next; + } + if( next>0 ){ + /* Freeblock not in ascending order */ + return SQLITE_CORRUPT_PAGE(pPage); + } + if( pc+size>(unsigned int)usableSize ){ + /* Last freeblock extends past page end */ + return SQLITE_CORRUPT_PAGE(pPage); + } + } + + /* At this point, nFree contains the sum of the offset to the start + ** of the cell-content area plus the number of free bytes within + ** the cell-content area. If this is greater than the usable-size + ** of the page, then the page must be corrupted. This check also + ** serves to verify that the offset to the start of the cell-content + ** area, according to the page header, lies within the page. + */ + if( nFree>usableSize ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + pPage->nFree = (u16)(nFree - iCellFirst); + return SQLITE_OK; +} + /* ** Initialize the auxiliary information for a disk block. ** @@ -1861,8 +1945,6 @@ static int btreeInitPage(MemPage *pPage){ BtShared *pBt; /* The main btree structure */ int usableSize; /* Amount of usable space on each page */ u16 cellOffset; /* Offset from start of page to first cell pointer */ - int nFree; /* Number of unused bytes on the page */ - int top; /* First byte of the cell content area */ int iCellFirst; /* First allowable cell or freeblock offset */ int iCellLast; /* Last possible cell or freeblock offset */ @@ -1890,10 +1972,6 @@ static int btreeInitPage(MemPage *pPage){ pPage->aDataEnd = &data[usableSize]; pPage->aCellIdx = &data[cellOffset]; pPage->aDataOfst = &data[pPage->childPtrSize]; - /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates - ** the start of the cell content area. A zero value for this integer is - ** interpreted as 65536. */ - top = get2byteNotZero(&data[hdr+5]); /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the ** number of cells on the page. */ pPage->nCell = get2byte(&data[hdr+3]); @@ -1906,7 +1984,9 @@ static int btreeInitPage(MemPage *pPage){ ** possible for a root page of a table that contains no rows) then the ** offset to the cell content area will equal the page size minus the ** bytes of reserved space. */ - assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB ); + assert( pPage->nCell>0 + || get2byteNotZero(&data[hdr+5])==usableSize + || CORRUPT_DB ); /* A malformed database page might cause us to read past the end ** of page when parsing a cell. @@ -1937,53 +2017,7 @@ static int btreeInitPage(MemPage *pPage){ } if( !pPage->leaf ) iCellLast++; } - - /* Compute the total free space on the page - ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the - ** start of the first freeblock on the page, or is zero if there are no - ** freeblocks. */ - pc = get2byte(&data[hdr+1]); - nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ - if( pc>0 ){ - u32 next, size; - if( pciCellLast ){ - /* Freeblock off the end of the page */ - return SQLITE_CORRUPT_PAGE(pPage); - } - next = get2byte(&data[pc]); - size = get2byte(&data[pc+2]); - nFree = nFree + size; - if( next<=pc+size+3 ) break; - pc = next; - } - if( next>0 ){ - /* Freeblock not in ascending order */ - return SQLITE_CORRUPT_PAGE(pPage); - } - if( pc+size>(unsigned int)usableSize ){ - /* Last freeblock extends past page end */ - return SQLITE_CORRUPT_PAGE(pPage); - } - } - - /* At this point, nFree contains the sum of the offset to the start - ** of the cell-content area plus the number of free bytes within - ** the cell-content area. If this is greater than the usable-size - ** of the page, then the page must be corrupted. This check also - ** serves to verify that the offset to the start of the cell-content - ** area, according to the page header, lies within the page. - */ - if( nFree>usableSize ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - pPage->nFree = (u16)(nFree - iCellFirst); + pPage->nFree = -1; /* Indicate that this value is yet uncomputed */ pPage->isInit = 1; return SQLITE_OK; } @@ -2127,19 +2161,18 @@ static int getAndInitPage( if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; - goto getAndInitPage_error; + goto getAndInitPage_error1; } rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); if( rc ){ - goto getAndInitPage_error; + goto getAndInitPage_error1; } *ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); if( (*ppPage)->isInit==0 ){ btreePageFromDbPage(pDbPage, pgno, pBt); rc = btreeInitPage(*ppPage); if( rc!=SQLITE_OK ){ - releasePage(*ppPage); - goto getAndInitPage_error; + goto getAndInitPage_error2; } } assert( (*ppPage)->pgno==pgno ); @@ -2149,12 +2182,13 @@ static int getAndInitPage( ** compatible with the root page. */ if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){ rc = SQLITE_CORRUPT_PGNO(pgno); - releasePage(*ppPage); - goto getAndInitPage_error; + goto getAndInitPage_error2; } return SQLITE_OK; -getAndInitPage_error: +getAndInitPage_error2: + releasePage(*ppPage); +getAndInitPage_error1: if( pCur ){ pCur->iPage--; pCur->pPage = pCur->apPage[pCur->iPage]; @@ -6566,6 +6600,7 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->nFree>=0 ); data = pPage->aData; ptr = &pPage->aCellIdx[2*idx]; pc = get2byte(ptr); @@ -6636,6 +6671,7 @@ static void insertCell( ** might be less than 8 (leaf-size + pointer) on the interior node. Hence ** the term after the || in the following assert(). */ assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) ); + assert( pPage->nFree>=0 ); if( pPage->nOverflow || sz+2>pPage->nFree ){ if( pTemp ){ memcpy(pTemp, pCell, sz); @@ -7187,8 +7223,17 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); assert( pPage->nOverflow==1 ); - + if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; /* dbfuzz001.test */ + if( pPage->nFree<0 ){ + rc = btreeComputeFreeSpace(pPage); + if( rc ) return rc; + } + if( pParent->nFree<0 ){ + rc = btreeComputeFreeSpace(pParent); + if( rc ) return rc; + } + /* Allocate a new page. This page will become the right-sibling of ** pPage. Make the parent page writable, so that the new divider cell @@ -7466,6 +7511,10 @@ static int balance_nonroot( if( !aOvflSpace ){ return SQLITE_NOMEM_BKPT; } + if( pParent->nFree<0 ){ + rc = btreeComputeFreeSpace(pParent); + if( rc ) return rc; + } /* Find the sibling pages to balance. Also locate the cells in pParent ** that divide the siblings. An attempt is made to find NN siblings on @@ -7501,6 +7550,9 @@ static int balance_nonroot( pgno = get4byte(pRight); while( 1 ){ rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); + if( rc==0 && apOld[i]->nFree<0 ){ + rc = btreeComputeFreeSpace(apOld[i]); + } if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; @@ -7704,6 +7756,7 @@ static int balance_nonroot( b.apEnd[k] = pParent->aDataEnd; b.ixNx[k] = cntOld[i]+1; } + assert( p->nFree>=0 ); szNew[i] = usableSpace - p->nFree; for(j=0; jnOverflow; j++){ szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); @@ -8247,6 +8300,10 @@ static int balance(BtCursor *pCur){ int iPage = pCur->iPage; MemPage *pPage = pCur->pPage; + if( pPage->nFree<0 ){ + rc = btreeComputeFreeSpace(pPage); + if( rc ) break; + } if( iPage==0 ){ if( pPage->nOverflow ){ /* The root page of the b-tree is overfull. In this case call the @@ -8621,6 +8678,10 @@ int sqlite3BtreeInsert( pPage = pCur->pPage; assert( pPage->intKey || pX->nKey>=0 ); assert( pPage->leaf || !pPage->intKey ); + if( pPage->nFree<0 ){ + rc = btreeComputeFreeSpace(pPage); + if( rc ) return rc; + } TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno, @@ -8771,6 +8832,7 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){ iCellIdx = pCur->ix; pPage = pCur->pPage; pCell = findCell(pPage, iCellIdx); + if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ) return SQLITE_CORRUPT; /* If the bPreserve flag is set to true, then the cursor position must ** be preserved following this delete operation. If the current delete @@ -8841,6 +8903,10 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){ Pgno n; unsigned char *pTmp; + if( pLeaf->nFree<0 ){ + rc = btreeComputeFreeSpace(pLeaf); + if( rc ) return rc; + } if( iCellDepthiPage-1 ){ n = pCur->apPage[iCellDepth+1]->pgno; }else{ @@ -9732,6 +9798,11 @@ static int checkTreePage( "btreeInitPage() returns error code %d", rc); goto end_of_check; } + if( (rc = btreeComputeFreeSpace(pPage))!=0 ){ + assert( rc==SQLITE_CORRUPT ); + checkAppendMsg(pCheck, "free space corruption", rc); + goto end_of_check; + } data = pPage->aData; hdr = pPage->hdrOffset; diff --git a/src/btreeInt.h b/src/btreeInt.h index ea5e0815a1..1f7a436b3e 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -286,7 +286,7 @@ struct MemPage { u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */ u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */ u16 cellOffset; /* Index in aData of first cell pointer */ - u16 nFree; /* Number of free bytes on the page */ + int nFree; /* Number of free bytes on the page */ u16 nCell; /* Number of cells on this page, local and ovfl */ u16 maskPage; /* Mask for page offset */ u16 aiOvfl[4]; /* Insert the i-th overflow cell before the aiOvfl-th