From 1ffd247c0c9e01226a6803d130d00a2321daeb75 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 23 Jun 2015 02:37:30 +0000 Subject: [PATCH] Avoid computing cell sizes in balance_nonroot() until they are really needed. This gives an overall 1.7% performance gain for about 1000 extra bytes of code space. FossilOrigin-Name: 43844537e8a372953386663f8177202901ba7566 --- manifest | 15 ++-- manifest.uuid | 2 +- src/btree.c | 229 ++++++++++++++++++++++++++++++++------------------ 3 files changed, 154 insertions(+), 92 deletions(-) diff --git a/manifest b/manifest index 0d513dcffa..2e7c75d3cc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sway\sthat\sbalance_nonroot()\spartitions\scells\sbetween\sthe\ssibling\npages\ssuch\sthat\sa\sscan\sof\sthe\scell\ssize\sarray\sis\snot\srequired. -D 2015-06-22T20:02:04.079 +C Avoid\scomputing\scell\ssizes\sin\sbalance_nonroot()\suntil\sthey\sare\sreally\sneeded.\nThis\sgives\san\soverall\s1.7%\sperformance\sgain\sfor\sabout\s1000\sextra\sbytes\sof\ncode\sspace. +D 2015-06-23T02:37:30.513 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -192,7 +192,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 8f65725c7c138aecb1a6339ef2b276b4b3632a20 +F src/btree.c e6ac65de1742116fb5abbb106a26eb5bf48cd79e F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 6ece2dd9c8e2eac05f0a8ded8772a44e96486c65 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1286,10 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b97f9cf73e503c7285ba3a801e1f932f222d96b2 -R d9f18f053d77ccfca6ce052159dd7203 -T *branch * btree-opt2 -T *sym-btree-opt2 * -T -sym-trunk * +P 168728715156d756ac8c0df45710d054eee027ec +R 9b9baae7202760bc8528aa5e0a31b7cf U drh -Z ffa77b78cbb43ac17ffd24e89fe96b6a +Z 9735e4632bc637c34becd65ed89566cb diff --git a/manifest.uuid b/manifest.uuid index 49c14f7000..e09e14ca9e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -168728715156d756ac8c0df45710d054eee027ec \ No newline at end of file +43844537e8a372953386663f8177202901ba7566 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index ad86c36e78..9693b0d393 100644 --- a/src/btree.c +++ b/src/btree.c @@ -6283,6 +6283,52 @@ static void insertCell( } } +/* +** A CellArray object contains a cache of pointers and sizes for a +** consecutive sequence of cells that might be held multiple pages. +*/ +typedef struct CellArray CellArray; +struct CellArray { + int nCell; /* Number of cells in apCell[] */ + MemPage *pRef; /* Reference page */ + u8 **apCell; /* All cells begin balanced */ + u16 *szCell; /* Local size of all cells in apCell[] */ +}; + +/* +** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been +** computed. +*/ +static void populateCellCache(CellArray *p, int idx, int N){ + assert( idx>=0 && idx+N<=p->nCell ); + while( N>0 ){ + assert( p->apCell[idx]!=0 ); + if( p->szCell[idx]==0 ){ + p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); + }else{ + assert( CORRUPT_DB || + p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); + } + idx++; + N--; + } +} + +/* +** Return the size of the Nth element of the cell array +*/ +static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ + assert( N>=0 && NnCell ); + assert( p->szCell[N]==0 ); + p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); + return p->szCell[N]; +} +static u16 cachedCellSize(CellArray *p, int N){ + assert( N>=0 && NnCell ); + if( p->szCell[N] ) return p->szCell[N]; + return computeCellSize(p, N); +} + /* ** Array apCell[] contains pointers to nCell b-tree page cells. The ** szCell[] array contains the size in bytes of each cell. This function @@ -6462,8 +6508,7 @@ static int editPage( int iOld, /* Index of first cell currently on page */ int iNew, /* Index of new first cell on page */ int nNew, /* Final number of cells on page */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells and sizes */ ){ u8 * const aData = pPg->aData; const int hdr = pPg->hdrOffset; @@ -6482,15 +6527,19 @@ static int editPage( /* Remove cells from the start and end of the page */ if( iOldapCell[iOld], &pCArray->szCell[iOld] ); memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); nCell -= nShift; } if( iNewEnd < iOldEnd ){ + populateCellCache(pCArray, iNewEnd, iOldEnd-iNewEnd); nCell -= pageFreeArray( - pPg, iOldEnd-iNewEnd, &apCell[iNewEnd], &szCell[iNewEnd] + pPg, iOldEnd-iNewEnd, + &pCArray->apCell[iNewEnd], &pCArray->szCell[iNewEnd] ); } @@ -6503,9 +6552,10 @@ static int editPage( assert( (iOld-iNew)aCellIdx; memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); + populateCellCache(pCArray, iNew, nAdd); if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - nAdd, &apCell[iNew], &szCell[iNew] + nAdd, &pCArray->apCell[iNew], &pCArray->szCell[iNew] ) ) goto editpage_fail; nCell += nAdd; } @@ -6517,18 +6567,20 @@ static int editPage( pCellptr = &pPg->aCellIdx[iCell * 2]; memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2); nCell++; + (void)cachedCellSize(pCArray, iCell + iNew); if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - 1, &apCell[iCell + iNew], &szCell[iCell + iNew] + 1, &pCArray->apCell[iCell + iNew], &pCArray->szCell[iCell + iNew] ) ) goto editpage_fail; } } /* Append cells to the end of the page */ pCellptr = &pPg->aCellIdx[nCell*2]; + populateCellCache(pCArray, iNew+nCell, nNew-nCell); if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - nNew-nCell, &apCell[iNew+nCell], &szCell[iNew+nCell] + nNew-nCell, &pCArray->apCell[iNew+nCell], &pCArray->szCell[iNew+nCell] ) ) goto editpage_fail; pPg->nCell = nNew; @@ -6539,19 +6591,21 @@ static int editPage( #ifdef SQLITE_DEBUG for(i=0; iapCell[i+iNew]; int iOff = get2byte(&pPg->aCellIdx[i*2]); if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } - assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) ); + assert( 0==memcmp(pCell, &aData[iOff], + pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); } #endif return SQLITE_OK; editpage_fail: /* Unable to edit this page. Rebuild it from scratch instead. */ - return rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]); + populateCellCache(pCArray, iNew, nNew); + return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]); } /* @@ -6828,7 +6882,6 @@ static int balance_nonroot( int bBulk /* True if this call is part of a bulk load */ ){ BtShared *pBt; /* The whole database */ - int nCell = 0; /* Number of cells in apCell[] */ int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ int nNew = 0; /* Number of pages in apNew[] */ int nOld; /* Number of pages in apOld[] */ @@ -6846,19 +6899,20 @@ static int balance_nonroot( MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ u8 *pRight; /* Location in parent of right-sibling pointer */ u8 *apDiv[NB-1]; /* Divider cells in pParent */ - int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */ - int cntOld[NB+2]; /* Old index in aCell[] after i-th page */ + int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ + int cntOld[NB+2]; /* Old index in b.apCell[] */ int szNew[NB+2]; /* Combined size of cells placed on i-th page */ - u8 **apCell = 0; /* All cells begin balanced */ - u16 *szCell; /* Local size of all cells in apCell[] */ u8 *aSpace1; /* Space for copies of dividers cells */ Pgno pgno; /* Temp var to store a page number in */ u8 abDone[NB+2]; /* True after i'th new page is populated */ Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ + CellArray b; /* Parsed information on cells being balanced */ memset(abDone, 0, sizeof(abDone)); + b.nCell = 0; + b.apCell = 0; pBt = pParent->pBt; assert( sqlite3_mutex_held(pBt->mutex) ); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); @@ -6967,40 +7021,41 @@ static int balance_nonroot( ** Allocate space for memory structures */ szScratch = - nMaxCells*sizeof(u8*) /* apCell */ - + nMaxCells*sizeof(u16) /* szCell */ + nMaxCells*sizeof(u8*) /* b.apCell */ + + nMaxCells*sizeof(u16) /* b.szCell */ + pBt->pageSize; /* aSpace1 */ /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer ** that is more than 6 times the database page size. */ assert( szScratch<=6*(int)pBt->pageSize ); - apCell = sqlite3ScratchMalloc( szScratch ); - if( apCell==0 ){ + b.apCell = sqlite3ScratchMalloc( szScratch ); + if( b.apCell==0 ){ rc = SQLITE_NOMEM; goto balance_cleanup; } - szCell = (u16*)&apCell[nMaxCells]; - aSpace1 = (u8*)&szCell[nMaxCells]; + b.szCell = (u16*)&b.apCell[nMaxCells]; + aSpace1 = (u8*)&b.szCell[nMaxCells]; assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); /* ** Load pointers to all cells on sibling pages and the divider cells - ** into the local apCell[] array. Make copies of the divider cells + ** into the local b.apCell[] array. Make copies of the divider cells ** into space obtained from aSpace1[]. The divider cells have already ** been removed from pParent. ** ** If the siblings are on leaf pages, then the child pointers of the ** divider cells are stripped from the cells before they are copied - ** into aSpace1[]. In this way, all cells in apCell[] are without + ** into aSpace1[]. In this way, all cells in b.apCell[] are without ** child pointers. If siblings are not leaves, then all cell in - ** apCell[] include child pointers. Either way, all cells in apCell[] + ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] ** are alike. ** ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. ** leafData: 1 if pPage holds key+data and pParent holds only keys. */ - leafCorrection = apOld[0]->leaf*4; - leafData = apOld[0]->intKeyLeaf; + b.pRef = apOld[0]; + leafCorrection = b.pRef->leaf*4; + leafData = b.pRef->intKeyLeaf; for(i=0; inCell+pOld->nOverflow; + memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit); if( pOld->nOverflow>0 ){ for(j=0; jxCellSize(pOld, apCell[nCell]); - nCell++; + assert( b.nCellaData; u16 maskPage = pOld->maskPage; u16 cellOffset = pOld->cellOffset; for(j=0; jxCellSize(pOld, apCell[nCell]); - nCell++; + assert( b.nCellmaxLocal+23 ); assert( iSpace1 <= (int)pBt->pageSize ); memcpy(pTemp, apDiv[i], sz); - apCell[nCell] = pTemp+leafCorrection; + b.apCell[b.nCell] = pTemp+leafCorrection; assert( leafCorrection==0 || leafCorrection==4 ); - szCell[nCell] = szCell[nCell] - leafCorrection; + b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; if( !pOld->leaf ){ assert( leafCorrection==0 ); assert( pOld->hdrOffset==0 ); /* The right pointer of the child page pOld becomes the left ** pointer of the divider cell */ - memcpy(apCell[nCell], &pOld->aData[8], 4); + memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); }else{ assert( leafCorrection==4 ); - while( szCell[nCell]<4 ){ + while( b.szCell[b.nCell]<4 ){ /* Do not allow any cells smaller than 4 bytes. If a smaller cell ** does exist, pad it with 0x00 bytes. */ - assert( szCell[nCell]==3 || CORRUPT_DB ); - assert( apCell[nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); + assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); + assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); aSpace1[iSpace1++] = 0x00; - szCell[nCell]++; + b.szCell[b.nCell]++; } } - nCell++; + b.nCell++; } } /* - ** Figure out the number of pages needed to hold all nCell cells. + ** Figure out the number of pages needed to hold all b.nCell cells. ** Store this number in "k". Also compute szNew[] which is the total ** size of all cells on the i-th page and cntNew[] which is the index - ** in apCell[] of the cell that divides page i from page i+1. - ** cntNew[k] should equal nCell. + ** in b.apCell[] of the cell that divides page i from page i+1. + ** cntNew[k] should equal b.nCell. ** ** Values computed by this block: ** ** k: The total number of sibling pages ** szNew[i]: Spaced used on the i-th sibling page. - ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to + ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to ** the right of the i-th sibling page. ** usableSpace: Number of bytes of space available on each sibling. ** @@ -7101,27 +7155,35 @@ static int balance_nonroot( k = i+2; if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } szNew[k-1] = 0; - cntNew[k-1] = nCell; + cntNew[k-1] = b.nCell; } - sz = 2+szCell[cntNew[i]-1]; + sz = 2 + cachedCellSize(&b, cntNew[i]-1); szNew[i] -= sz; if( !leafData ){ - sz = cntNew[i]usableSpace ) break; szNew[i] += sz; cntNew[i]++; if( !leafData ){ - sz = cntNew[i]=nCell ){ + if( cntNew[i]>=b.nCell ){ k = i+1; }else if( cntNew[i] - (i>0 ? cntNew[i-1] : 0) <= 0 ){ rc = SQLITE_CORRUPT_BKPT; @@ -7146,22 +7208,24 @@ static int balance_nonroot( int r; /* Index of right-most cell in left sibling */ int d; /* Index of first cell to the left of right sibling */ - r = cntNew[i-1] - 1; - d = r + 1 - leafData; - assert( d szLeft-(b.szCell[r]+2)) ){ + break; + } + szRight += b.szCell[d] + 2; + szLeft -= b.szCell[r] + 2; cntNew[i-1]--; if( cntNew[i-1] <= 0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } - r = cntNew[i-1] - 1; - d = r + 1 - leafData; } szNew[i] = szRight; szNew[i-1] = szLeft; @@ -7200,7 +7264,7 @@ static int balance_nonroot( zeroPage(pNew, pageFlags); apNew[i] = pNew; nNew++; - cntOld[i] = nCell; + cntOld[i] = b.nCell; /* Set the pointer-map entry for the new sibling page. */ if( ISAUTOVACUUM ){ @@ -7305,8 +7369,8 @@ static int balance_nonroot( int iNew = 0; int iOld = 0; - for(i=0; inCell + pOld->nOverflow + !leafData; @@ -7332,7 +7396,7 @@ static int balance_nonroot( ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); if( rc ) goto balance_cleanup; } - if( szCell[i]>pNew->minLocal ){ + if( cachedCellSize(&b,i)>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); if( rc ) goto balance_cleanup; } @@ -7349,20 +7413,21 @@ static int balance_nonroot( j = cntNew[i]; assert( jleaf ){ memcpy(&pNew->aData[8], pCell, 4); }else if( leafData ){ /* If the tree is a leaf-data tree, and the siblings are leaves, - ** then there is no divider cell in apCell[]. Instead, the divider + ** then there is no divider cell in b.apCell[]. Instead, the divider ** cell consists of the integer key for the right-most cell of ** the sibling-page assembled above only. */ CellInfo info; j--; - pNew->xParseCell(pNew, apCell[j], &info); + pNew->xParseCell(pNew, b.apCell[j], &info); pCell = pTemp; sz = 4 + putVarint(&pCell[4], info.nKey); pTemp = 0; @@ -7379,7 +7444,7 @@ static int balance_nonroot( ** cells are at least 4 bytes. It only happens in b-trees used ** to evaluate "IN (SELECT ...)" and similar clauses. */ - if( szCell[j]==4 ){ + if( b.szCell[j]==4 ){ assert(leafCorrection==4); sz = pParent->xCellSize(pParent, pCell); } @@ -7437,12 +7502,12 @@ static int balance_nonroot( iNew = iOld = 0; nNewCell = cntNew[0]; }else{ - iOld = iPgnFree = usableSpace-szNew[iPg]; @@ -7494,7 +7559,7 @@ static int balance_nonroot( assert( pParent->isInit ); TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", - nOld, nNew, nCell)); + nOld, nNew, b.nCell)); /* Free any old pages that were not reused as new pages. */ @@ -7517,7 +7582,7 @@ static int balance_nonroot( ** Cleanup before returning. */ balance_cleanup: - sqlite3ScratchFree(apCell); + sqlite3ScratchFree(b.apCell); for(i=0; i