From: drh Date: Sat, 29 May 2004 21:46:49 +0000 (+0000) Subject: A file format change for btree.c makes it between 10 and 20% faster. (CVS 1493) X-Git-Tag: version-3.6.10~4581 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=436051582f4c8bf418d8f6702047560e8a686202;p=thirdparty%2Fsqlite.git A file format change for btree.c makes it between 10 and 20% faster. (CVS 1493) FossilOrigin-Name: cbcaece7f45a0bc994e6c54a996afa4e6529da6a --- diff --git a/manifest b/manifest index 14c75d3651..d97887ae3e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Transform\sOP_HexBlob\sand\sOP_String8\sto\sOP_Blob\sand\sOP_String\sthe\sfirst\stime\sthey\sare\sexecuted.\s(CVS\s1492) -D 2004-05-29T11:24:50 +C A\sfile\sformat\schange\sfor\sbtree.c\smakes\sit\sbetween\s10\sand\s20%\sfaster.\s(CVS\s1493) +D 2004-05-29T21:46:49 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -24,7 +24,7 @@ F sqlite.def fc4f5734786fe4743cfe2aa98eb2da4b089edb5f F sqlite.pc.in 30552343140c53304c2a658c080fbe810cd09ca2 F src/attach.c c315c58cb16fd6e913b3bfa6412aedecb4567fa5 F src/auth.c 5c2f0bea4729c98c2be3b69d6b466fc51448fe79 -F src/btree.c 7832e4247f0d14dfe25dcf85647ddde71b614170 +F src/btree.c 3287b359b0dbfef9ffcddfa776e136505f01cd77 F src/btree.h b65140b5ae891f30d2a39e64b9f0343225553545 F src/build.c fd36c4a603e23df35aa7f57772d965e1865e39e0 F src/date.c 0eb922af5c5f5e2455f8dc2f98023ed3e04a857e @@ -204,7 +204,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604 F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 0a6689be843e695902fbfef863f128915545499e -R 1d952e495024dd4407d89b7b9acbc34c -U danielk1977 -Z 654311e7b543692762f3cae151122ce2 +P 3225de8902b0e7e60c4d9f3a5ef51bc74f57d364 +R 702efc72950b18adf9baf0d005935d93 +U drh +Z e0fe70a1953600e894856c5d74936688 diff --git a/manifest.uuid b/manifest.uuid index 05c6a5bd28..5f97a8ca8e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3225de8902b0e7e60c4d9f3a5ef51bc74f57d364 \ No newline at end of file +cbcaece7f45a0bc994e6c54a996afa4e6529da6a \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 21676365c7..6252b30ca8 100644 --- a/src/btree.c +++ b/src/btree.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: btree.c,v 1.148 2004/05/29 10:23:19 danielk1977 Exp $ +** $Id: btree.c,v 1.149 2004/05/29 21:46:49 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to @@ -95,23 +95,56 @@ ** payload fraction for a LEAFDATA tree is always 100% (or 255) and it ** not specified in the header. ** -** Each btree page begins with a header described below. Note that the -** header for page one begins at byte 100. For all other btree pages, the -** header begins on byte zero. +** Each btree pages is divided into three sections: The header, the +** cell pointer array, and the cell area area. Page 1 also has a 100-byte +** file header that occurs before the page header. The 100-byte file +** header occurs on page 1 only. +** +** The page headers looks like this: ** ** OFFSET SIZE DESCRIPTION ** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf ** 1 2 byte offset to the first freeblock -** 3 2 byte offset to the first cell -** 5 1 number of fragmented free bytes -** 6 4 Right child (the Ptr(N+1) value). Omitted if leaf +** 3 2 number of cells on this page +** 5 2 first byte past the cell array area +** 7 1 number of fragmented free bytes +** 8 4 Right child (the Ptr(N+1) value). Omitted if leaf ** ** The flags define the format of this btree page. The leaf flag means that ** this page has no children. The zerodata flag means that this page carries ** only keys and no data. The intkey flag means that the key is a single ** variable length integer at the beginning of the payload. ** -** A variable-length integer is 1 to 9 bytes where the lower 7 bits of each +** The cell pointer array begins on the first byte after the page header. +** The cell pointer array contains zero or more 2-byte numbers which are +** offsets from the beginning of the page to the cell content in the cell +** content area. The cell pointers occur in sorted order. The system strives +** to keep free space after the last cell pointer so that new cells can +** be easily added without have to defragment the page. +** +** Cell content is stored at the very end of the page and grows toward the +** beginning of the page. +** +** Unused space within the cell content area is collected into a linked list of +** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset +** to the first freeblock is given in the header. Freeblocks occur in +** increasing order. Because a freeblock must be at least 4 bytes in size, +** any group of 3 or fewer unused bytes in the cell content area cannot +** exist on the freeblock chain. A group of 3 or fewer free bytes is called +** a fragment. The total number of bytes in all fragments is recorded. +** in the page header at offset 7. +** +** SIZE DESCRIPTION +** 2 Byte offset of the next freeblock +** 2 Bytes in this freeblock +** +** Cells are of variable length. Cells are stored in the cell content area at +** the end of the page. Pointers to the cells are in the cell pointer array +** that immediately follows the page header. Cells is not necessarily +** contiguous or in order, but cell pointers are contiguous and in order. +** +** Cell content makes use of variable length integers. A variable +** length integer is 1 to 9 bytes where the lower 7 bits of each ** byte are used. The integer consists of all bytes that have bit 8 set and ** the first byte with bit 8 clear. The most significant byte of the integer ** appears first. A variable-length integer may not be more than 9 bytes long. @@ -129,26 +162,9 @@ ** Variable length integers are used for rowids and to hold the number of ** bytes of key and data in a btree cell. ** -** Unused space within a btree page is collected into a linked list of -** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset -** to the first freeblock is given in the header. Freeblocks occur in -** increasing order. Because a freeblock is 4 bytes in size, the minimum -** size allocation on a btree page is 4 bytes. Because a freeblock must be -** at least 4 bytes in size, any group of 3 or fewer unused bytes cannot -** exist on the freeblock chain. A group of 3 or fewer free bytes is called -** a fragment. The total number of bytes in all fragments is recorded. -** in the page header at offset 5. -** -** SIZE DESCRIPTION -** 2 Byte offset of the next freeblock -** 2 Bytes in this freeblock -** -** Cells are of variable length. The first cell begins on the byte defined -** in the page header. Cells do not necessarily occur in order - they can -** skip around on the page. +** The content of a cell looks like this: ** ** SIZE DESCRIPTION -** 2 Byte offset of the next cell. 0 if this is the last cell ** 4 Page number of the left child. Omitted if leaf flag is set. ** var Number of bytes of data. Omitted if the zerodata flag is set. ** var Number of bytes of key. Or the key itself if intkey flag is set. @@ -180,7 +196,7 @@ /* Maximum page size. The upper bound on this value is 65536 (a limit -** imposed by the 2-byte offset at the beginning of each cell.) The +** imposed by the 2-byte size of cell array pointers.) The ** maximum page size determines the amount of stack space allocated ** by many of the routines in this module. On embedded architectures ** or any machine where memory and especially stack memory is limited, @@ -193,13 +209,13 @@ /* The following value is the maximum cell size assuming a maximum page ** size give above. */ -#define MX_CELL_SIZE (MX_PAGE_SIZE-6) +#define MX_CELL_SIZE (MX_PAGE_SIZE-8) /* The maximum number of cells on a single page of the database. This ** assumes a minimum cell size of 3 bytes. Such small cells will be ** exceedingly rare, but they are possible. */ -#define MX_CELL ((MX_PAGE_SIZE-6)/3) +#define MX_CELL ((MX_PAGE_SIZE-8)/3) /* Forward declarations */ typedef struct MemPage MemPage; @@ -230,29 +246,27 @@ static const char zMagicHeader[] = "SQLite format 3"; ** The pageDestructor() routine handles that chore. */ struct MemPage { - u8 isInit; /* True if previously initialized */ - u8 idxShift; /* True if Cell indices have changed */ - u8 isOverfull; /* Some aCell[] do not fit on page */ - u8 intKey; /* True if intkey flag is set */ - u8 leaf; /* True if leaf flag is set */ - u8 zeroData; /* True if table stores keys only */ - u8 leafData; /* True if tables stores data on leaves only */ - u8 hasData; /* True if this page stores data */ - u8 hdrOffset; /* 100 for page 1. 0 otherwise */ - u8 needRelink; /* True if cell not linked properly in aData */ - int idxParent; /* Index in pParent->aCell[] of this node */ - int nFree; /* Number of free bytes on the page */ - int nCell; /* Number of entries on this page */ - int nCellAlloc; /* Number of slots allocated in aCell[] */ - unsigned char **aCell; /* Pointer to start of each cell */ - struct Btree *pBt; /* Pointer back to BTree structure */ - - /* When page content is move from one page to the other (by the movePage() - ** subroutine) only the information about is moved. The information below - ** is fixed. */ - unsigned char *aData; /* Pointer back to the start of the page */ - Pgno pgno; /* Page number for this page */ - MemPage *pParent; /* The parent of this page. NULL for root */ + u8 isInit; /* True if previously initialized */ + u8 idxShift; /* True if Cell indices have changed */ + u8 nOverflow; /* Number of overflow cell bodies in aCell[] */ + u8 intKey; /* True if intkey flag is set */ + u8 leaf; /* True if leaf flag is set */ + u8 zeroData; /* True if table stores keys only */ + u8 leafData; /* True if tables stores data on leaves only */ + u8 hasData; /* True if this page stores data */ + u8 hdrOffset; /* 100 for page 1. 0 otherwise */ + u16 cellOffset; /* Index in aData of first cell pointer */ + u16 idxParent; /* Index in parent of this node */ + u16 nFree; /* Number of free bytes on the page */ + u16 nCell; /* Number of cells on this page, local and ovfl */ + struct _OvflCell { /* Cells that will not fit on aData[] */ + u8 *pCell; /* Pointers to the body of the overflow cell */ + u16 idx; /* Insert this cell before idx-th non-overflow cell */ + } aOvfl[3]; + struct Btree *pBt; /* Pointer back to BTree structure */ + u8 *aData; /* Pointer back to the start of the page */ + Pgno pgno; /* Page number for this page */ + MemPage *pParent; /* The parent of this page. NULL for root */ }; /* @@ -291,12 +305,13 @@ typedef Btree Bt; */ typedef struct CellInfo CellInfo; struct CellInfo { + u8 *pCell; /* Pointer to the start of cell content */ i64 nKey; /* The key for INTKEY tables, or number of bytes in key */ u32 nData; /* Number of bytes of data */ u16 nHeader; /* Size of the cell header in bytes */ u16 nLocal; /* Amount of payload held locally */ u16 iOverflow; /* Offset to overflow page number. Zero if no overflow */ - u16 nSize; /* Total size of the cell (on the main b-tree page) */ + u16 nSize; /* Total size of the cell content (on the main b-tree page) */ }; /* @@ -350,19 +365,53 @@ static void put4byte(unsigned char *p, u32 v){ #define putVarint sqlite3PutVarint /* -** Parse a cell header and fill in the CellInfo structure. +** Return a pointer to the start of cell content for the given +** cell of a page. This routine works only for pages that +** do not contain overflow cells. */ -static void parseCell( +static u8 *findCell(MemPage *pPage, int iCell){ + u8 *data = pPage->aData; + assert( iCell>=0 ); + assert( iCellhdrOffset+3]) ); + return data + get2byte(&data[pPage->cellOffset+2*iCell]); +} + +/* +** This a more complex version of findCell() that works for +** pages that do contain overflow cells. See insert +*/ +static u8 *findOverflowCell(MemPage *pPage, int iCell){ + int i; + for(i=pPage->nOverflow-1; i>=0; i--){ + if( pPage->aOvfl[i].idx<=iCell ){ + if( pPage->aOvfl[i].idx==iCell ){ + return pPage->aOvfl[i].pCell; + } + iCell--; + } + } + return findCell(pPage, iCell); +} + +/* +** Parse a cell content block and fill in the CellInfo structure. There +** are two versions of this function. parseCell() takes a cell index +** as the second argument and parseCellPtr() takes a pointer to the +** body of the cell as its second argument. +*/ +static void parseCellPtr( MemPage *pPage, /* Page containing the cell */ - unsigned char *pCell, /* Pointer to the first byte of the cell */ + u8 *pCell, /* Pointer to the cell text. */ CellInfo *pInfo /* Fill in this structure */ ){ int n; int nPayload; Btree *pBt; int minLocal, maxLocal; + + pInfo->pCell = pCell; assert( pPage->leaf==0 || pPage->leaf==1 ); - n = 6 - 4*pPage->leaf; + n = 4 - 4*pPage->leaf; if( pPage->hasData ){ n += getVarint32(&pCell[n], &pInfo->nData); }else{ @@ -386,6 +435,9 @@ static void parseCell( pInfo->nLocal = nPayload; pInfo->iOverflow = 0; pInfo->nSize = nPayload + n; + if( pInfo->nSize<4 ){ + pInfo->nSize = 4; /* Minimum cell size is 4 */ + } }else{ int surplus = minLocal + (nPayload - minLocal)%(pBt->usableSize - 4); if( surplus <= maxLocal ){ @@ -397,18 +449,28 @@ static void parseCell( pInfo->nSize = pInfo->iOverflow + 4; } } +static void parseCell( + MemPage *pPage, /* Page containing the cell */ + int iCell, /* The cell index. First cell is 0 */ + CellInfo *pInfo /* Fill in this structure */ +){ + parseCellPtr(pPage, findCell(pPage, iCell), pInfo); +} /* -** Compute the total number of bytes that a Cell needs on the main -** database page. The number returned includes the Cell header, -** local payload storage, and the pointer to overflow pages (if -** applicable). Additional space allocated on overflow pages -** is NOT included in the value returned from this routine. +** Compute the total number of bytes that a Cell needs in the cell +** data area of the btree-page. The return number includes the cell +** data header and the local payload, but not any overflow page or +** the space used by the cell pointer. */ -static int cellSize(MemPage *pPage, unsigned char *pCell){ +static int cellSize(MemPage *pPage, int iCell){ CellInfo info; - - parseCell(pPage, pCell, &info); + parseCell(pPage, iCell, &info); + return info.nSize; +} +static int cellSizePtr(MemPage *pPage, u8 *pCell){ + CellInfo info; + parseCellPtr(pPage, pCell, &info); return info.nSize; } @@ -423,7 +485,9 @@ static int cellSize(MemPage *pPage, unsigned char *pCell){ static void _pageIntegrity(MemPage *pPage){ int usableSize; u8 *data; - int i, idx, c, pc, hdr, nFree; + int i, j, idx, c, pc, hdr, nFree; + int cellOffset; + int nCell, cellLimit; u8 used[MX_PAGE_SIZE]; usableSize = pPage->pBt->usableSize; @@ -439,6 +503,8 @@ static void _pageIntegrity(MemPage *pPage){ assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) ); assert( pPage->hasData == !(pPage->zeroData || (!pPage->leaf && pPage->leafData)) ); + assert( pPage->cellOffset==pPage->hdrOffset+12-4*pPage->leaf ); + assert( pPage->nCell = get2byte(&pPage->aData[hdr+3]) ); } data = pPage->aData; memset(used, 0, usableSize); @@ -457,30 +523,33 @@ static void _pageIntegrity(MemPage *pPage){ } pc = get2byte(&data[pc]); } - assert( pPage->isInit==0 || pPage->nFree==nFree+data[hdr+5] ); idx = 0; - pc = get2byte(&data[hdr+3]); - while( pc ){ + nCell = get2byte(&data[hdr+3]); + cellLimit = get2byte(&data[hdr+5]); + assert( pPage->isInit==0 + || pPage->nFree==nFree+data[hdr+7]+cellLimit-(cellOffset+2*nCell) ); + cellOffset = pPage->cellOffset; + for(i=0; iisInit==0 || idxnCell ); + pc = get2byte(&data[cellOffset+2*i]); assert( pc>0 && pcisInit==0 || pPage->aCell[idx]==&data[pc] ); size = cellSize(pPage, &data[pc]); assert( pc+size<=usableSize ); - for(i=pc; inCell ); + for(i=cellOffset+2*nCell; iaData) ); assert( pPage->pBt!=0 ); assert( pPage->pBt->usableSize <= MX_PAGE_SIZE ); - assert( !pPage->needRelink ); - assert( !pPage->isOverfull ); - oldPage = pPage->aData; + assert( pPage->nOverflow==0 ); + data = pPage->aData; hdr = pPage->hdrOffset; - addr = 3+hdr; - n = 6+hdr; - if( !pPage->leaf ){ - n += 4; - } - memcpy(&newPage[hdr], &oldPage[hdr], n-hdr); - start = n; - pc = get2byte(&oldPage[addr]); - i = 0; - while( pc>0 ){ - assert( npBt->usableSize ); - size = cellSize(pPage, &oldPage[pc]); - memcpy(&newPage[n], &oldPage[pc], size); - put2byte(&newPage[addr],n); - assert( pPage->aCell[i]==&oldPage[pc] ); - pPage->aCell[i++] = &oldPage[n]; - addr = n; - n += size; - pc = get2byte(&oldPage[pc]); - } - assert( i==pPage->nCell ); - leftover = pPage->pBt->usableSize - n; - assert( leftover>=0 ); - assert( pPage->nFree==leftover ); - if( leftover<4 ){ - oldPage[hdr+5] = leftover; - leftover = 0; - n = pPage->pBt->usableSize; - } - memcpy(&oldPage[hdr], &newPage[hdr], n-hdr); - if( leftover==0 ){ - put2byte(&oldPage[hdr+1], 0); - }else if( leftover>=4 ){ - put2byte(&oldPage[hdr+1], n); - put2byte(&oldPage[n], 0); - put2byte(&oldPage[n+2], leftover); - memset(&oldPage[n+4], 0, leftover-4); - } - oldPage[hdr+5] = 0; + cellOffset = pPage->cellOffset; + nCell = pPage->nCell; + assert( nCell==get2byte(&data[hdr+3]) ); + usableSize = pPage->pBt->usableSize; + brk = get2byte(&data[hdr+5]); + memcpy(&temp[brk], &data[brk], usableSize - brk); + brk = usableSize; + for(i=0; ipBt->usableSize ); + size = cellSizePtr(pPage, &temp[pc]); + brk -= size; + memcpy(&data[brk], &temp[pc], size); + put2byte(pAddr, brk); + } + assert( brk>=cellOffset+2*nCell ); + put2byte(&data[hdr+5], brk); + data[hdr+1] = 0; + data[hdr+2] = 0; + data[hdr+7] = 0; + addr = cellOffset+2*nCell; + memset(&data[addr], 0, brk-addr); } /* -** Allocate nByte bytes of space on a page. If nByte is less than -** 4 it is rounded up to 4. +** Allocate nByte bytes of space on a page. ** ** Return the index into pPage->aData[] of the first byte of ** the new allocation. Or return 0 if there is not enough free @@ -559,60 +617,59 @@ static void defragmentPage(MemPage *pPage){ ** nBytes of contiguous free space, then this routine automatically ** calls defragementPage() to consolidate all free space before ** allocating the new chunk. -** -** Algorithm: Carve a piece off of the first freeblock that is -** nByte in size or larger. */ static int allocateSpace(MemPage *pPage, int nByte){ int addr, pc, hdr; int size; int nFrag; + int top; + int nCell; + int cellOffset; unsigned char *data; -#ifndef NDEBUG - int cnt = 0; -#endif - + data = pPage->aData; assert( sqlite3pager_iswriteable(data) ); assert( pPage->pBt ); if( nByte<4 ) nByte = 4; - if( pPage->nFreeisOverfull ) return 0; + if( pPage->nFreenOverflow>0 ) return 0; + pPage->nFree -= nByte; hdr = pPage->hdrOffset; - nFrag = data[hdr+5]; - if( nFrag>=60 || nFrag>pPage->nFree-nByte ){ - defragmentPage(pPage); - } - addr = hdr+1; - pc = get2byte(&data[addr]); - assert( addrpBt->usableSize-4 ); - while( (size = get2byte(&data[pc+2]))pBt->usableSize-4 ); - assert( pc>=addr+size+4 || pc==0 ); - if( pc==0 ){ - assert( (cnt++)==0 ); - defragmentPage(pPage); - assert( data[hdr+5]==0 ); - addr = pPage->hdrOffset+1; - pc = get2byte(&data[addr]); + + nFrag = data[hdr+7]; + if( nFrag<60 ){ + /* Search the freelist looking for a slot big enough to satisfy the + ** space request. */ + addr = hdr+1; + while( (pc = get2byte(&data[addr]))>0 ){ + size = get2byte(&data[pc+2]); + if( size>=nByte ){ + if( size0 && size>=nByte ); - assert( pc+size<=pPage->pBt->usableSize ); - if( size>nByte+4 ){ - int newStart = pc+nByte; - put2byte(&data[addr], newStart); - put2byte(&data[newStart], get2byte(&data[pc])); - put2byte(&data[newStart+2], size-nByte); - }else{ - put2byte(&data[addr], get2byte(&data[pc])); - data[hdr+5] += size-nByte; + + /* Allocate memory from the gap in between the cell pointer array + ** and the cell content area. + */ + top = get2byte(&data[hdr+5]); + nCell = get2byte(&data[hdr+3]); + cellOffset = pPage->cellOffset; + if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){ + defragmentPage(pPage); + top = get2byte(&data[hdr+5]); } - pPage->nFree -= nByte; - assert( pPage->nFree>=0 ); - return pc; + top -= nByte; + assert( cellOffset + 2*nCell <= top ); + put2byte(&data[hdr+5], top); + return top; } /* @@ -625,10 +682,7 @@ static int allocateSpace(MemPage *pPage, int nByte){ */ static void freeSpace(MemPage *pPage, int start, int size){ int end = start + size; /* End of the segment being freed */ - int addr, pbegin; -#ifndef NDEBUG - int tsize = 0; /* Total size of all freeblocks */ -#endif + int addr, pbegin, hdr; unsigned char *data = pPage->aData; assert( pPage->pBt!=0 ); @@ -638,7 +692,8 @@ static void freeSpace(MemPage *pPage, int start, int size){ if( size<4 ) size = 4; /* Add the space back into the linked list of freeblocks */ - addr = pPage->hdrOffset + 1; + hdr = pPage->hdrOffset; + addr = hdr + 1; while( (pbegin = get2byte(&data[addr]))0 ){ assert( pbegin<=pPage->pBt->usableSize-4 ); assert( pbegin>addr ); @@ -656,41 +711,28 @@ static void freeSpace(MemPage *pPage, int start, int size){ while( (pbegin = get2byte(&data[addr]))>0 ){ int pnext, psize; assert( pbegin>addr ); - assert( pbeginpBt->usableSize-4 ); + assert( pbegin<=pPage->pBt->usableSize-4 ); pnext = get2byte(&data[pbegin]); psize = get2byte(&data[pbegin+2]); if( pbegin + psize + 3 >= pnext && pnext>0 ){ int frag = pnext - (pbegin+psize); - assert( frag<=data[pPage->hdrOffset+5] ); - data[pPage->hdrOffset+5] -= frag; + assert( frag<=data[pPage->hdrOffset+7] ); + data[pPage->hdrOffset+7] -= frag; put2byte(&data[pbegin], get2byte(&data[pnext])); put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin); }else{ - assert( (tsize += psize)>0 ); addr = pbegin; } } - assert( tsize+data[pPage->hdrOffset+5]==pPage->nFree ); -} -/* -** Resize the aCell[] array of the given page so that it is able to -** hold at least nNewSz entries. -** -** Return SQLITE_OK or SQLITE_NOMEM. -*/ -static int resizeCellArray(MemPage *pPage, int nNewSz){ - if( pPage->nCellAllocaCell[0]); - if( pPage->aCell==0 ){ - pPage->aCell = sqliteMallocRaw( n ); - }else{ - pPage->aCell = sqliteRealloc(pPage->aCell, n); - } - if( sqlite3_malloc_failed ) return SQLITE_NOMEM; - pPage->nCellAlloc = nNewSz; + /* If the cell content area begins with a freeblock, remove it. */ + if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){ + int top; + pbegin = get2byte(&data[hdr+1]); + memcpy(&data[hdr+1], &data[pbegin], 2); + top = get2byte(&data[hdr+5]); + put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2])); } - return SQLITE_OK; } /* @@ -712,10 +754,9 @@ static int initPage( ){ int c, pc, i, hdr; unsigned char *data; - int usableSize; - int nCell, nFree; - u8 *aCell[MX_PAGE_SIZE/2]; - + int usableSize, cellOffset; + int nFree; + int top; assert( pPage->pBt!=0 ); assert( pParent==0 || pParent->pBt==pPage->pBt ); @@ -728,40 +769,25 @@ static int initPage( pPage->pParent = pParent; sqlite3pager_ref(pParent->aData); } - pPage->nCell = pPage->nCellAlloc = 0; - assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); hdr = pPage->hdrOffset; data = pPage->aData; c = data[hdr]; + assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); pPage->intKey = (c & (PTF_INTKEY|PTF_LEAFDATA))!=0; pPage->zeroData = (c & PTF_ZERODATA)!=0; pPage->leafData = (c & PTF_LEAFDATA)!=0; pPage->leaf = (c & PTF_LEAF)!=0; pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); - pPage->isOverfull = 0; - pPage->needRelink = 0; + pPage->nOverflow = 0; pPage->idxShift = 0; usableSize = pPage->pBt->usableSize; - - /* Initialize the cell count and cell pointers */ - i = 0; - pc = get2byte(&data[hdr+3]); - nCell = 0; - while( pc>0 ){ - if( pc>=usableSize ) return SQLITE_CORRUPT; - if( nCell>sizeof(aCell)/sizeof(aCell[0]) ) return SQLITE_CORRUPT; - aCell[nCell++] = &data[pc]; - pc = get2byte(&data[pc]); - } - if( resizeCellArray(pPage, nCell) ){ - return SQLITE_NOMEM; - } - pPage->nCell = nCell; - memcpy(pPage->aCell, aCell, nCell*sizeof(aCell[0])); + pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf; + top = get2byte(&data[hdr+5]); + pPage->nCell = get2byte(&data[hdr+3]); /* Compute the total free space on the page */ pc = get2byte(&data[hdr+1]); - nFree = data[hdr+5]; + nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell); i = 0; while( pc>0 ){ int next, size; @@ -796,13 +822,10 @@ static void zeroPage(MemPage *pPage, int flags){ assert( sqlite3pager_iswriteable(data) ); memset(&data[hdr], 0, pBt->usableSize - hdr); data[hdr] = flags; - first = hdr + 6 + 4*((flags&PTF_LEAF)==0); - put2byte(&data[hdr+1], first); - put2byte(&data[first+2], pBt->usableSize - first); - sqliteFree(pPage->aCell); - pPage->aCell = 0; - pPage->nCell = 0; - pPage->nCellAlloc = 0; + first = hdr + 8 + 4*((flags&PTF_LEAF)==0); + memset(&data[hdr+1], 0, 4); + data[hdr+7] = 0; + put2byte(&data[hdr+5], pBt->usableSize); pPage->nFree = pBt->usableSize - first; pPage->intKey = (flags & (PTF_INTKEY|PTF_LEAFDATA))!=0; pPage->zeroData = (flags & PTF_ZERODATA)!=0; @@ -810,9 +833,10 @@ static void zeroPage(MemPage *pPage, int flags){ pPage->leaf = (flags & PTF_LEAF)!=0; pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); pPage->hdrOffset = hdr; - pPage->isOverfull = 0; - pPage->needRelink = 0; + pPage->cellOffset = first; + pPage->nOverflow = 0; pPage->idxShift = 0; + pPage->nCell = 0; pPage->isInit = 1; pageIntegrity(pPage); } @@ -875,14 +899,11 @@ static void releasePage(MemPage *pPage){ */ static void pageDestructor(void *pData, int pageSize){ MemPage *pPage = (MemPage*)&((char*)pData)[pageSize]; - assert( pPage->isInit==0 || pPage->needRelink==0 ); if( pPage->pParent ){ MemPage *pParent = pPage->pParent; pPage->pParent = 0; releasePage(pParent); } - sqliteFree(pPage->aCell); - pPage->aCell = 0; pPage->isInit = 0; } @@ -1036,18 +1057,19 @@ static int lockBtree(Btree *pBt){ ** a cell. Make sure it is small enough so that at least minFanout ** cells can will fit on one page. We assume a 10-byte page header. ** Besides the payload, the cell must store: - ** 2-byte pointer to next cell + ** 2-byte pointer to the cell ** 4-byte child pointer ** 9-byte nKey value ** 4-byte nData value ** 4-byte overflow page pointer - ** So a cell consists of a header which is as much as 19 bytes long, - ** 0 to N bytes of payload, and an optional 4 byte overflow page pointer. + ** So a cell consists of a 2-byte poiner, a header which is as much as + ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow + ** page pointer. */ - pBt->maxLocal = (pBt->usableSize-10)*pBt->maxEmbedFrac/255 - 23; - pBt->minLocal = (pBt->usableSize-10)*pBt->minEmbedFrac/255 - 23; - pBt->maxLeaf = pBt->usableSize - 33; - pBt->minLeaf = (pBt->usableSize-10)*pBt->minLeafFrac/255 - 23; + pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23; + pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23; + pBt->maxLeaf = pBt->usableSize - 35; + pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23; if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){ goto page1_init_failed; } @@ -1485,12 +1507,12 @@ static void releaseTempCursor(BtCursor *pCur){ static void getCellInfo(BtCursor *pCur){ MemPage *pPage = pCur->pPage; if( !pCur->infoValid ){ - parseCell(pPage, pPage->aCell[pCur->idx], &pCur->info); + parseCell(pPage, pCur->idx, &pCur->info); pCur->infoValid = 1; }else{ #ifndef NDEBUG CellInfo info; - parseCell(pPage, pPage->aCell[pCur->idx], &info); + parseCell(pPage, pCur->idx, &info); assert( memcmp(&info, &pCur->info, sizeof(info))==0 ); #endif } @@ -1562,8 +1584,8 @@ static int getPayload( pPage = pCur->pPage; pageIntegrity(pPage); assert( pCur->idx>=0 && pCur->idxnCell ); - aPayload = pPage->aCell[pCur->idx]; getCellInfo(pCur); + aPayload = pCur->info.pCell; aPayload += pCur->info.nHeader; if( pPage->intKey ){ nKey = 0; @@ -1701,8 +1723,8 @@ static const unsigned char *fetchPayload( pPage = pCur->pPage; pageIntegrity(pPage); assert( pCur->idx>=0 && pCur->idxnCell ); - aPayload = pPage->aCell[pCur->idx]; getCellInfo(pCur); + aPayload = pCur->info.pCell; aPayload += pCur->info.nHeader; if( pPage->intKey ){ nKey = 0; @@ -1825,33 +1847,7 @@ static void moveToParent(BtCursor *pCur){ pCur->pPage = pParent; pCur->infoValid = 0; assert( pParent->idxShift==0 ); - if( pParent->idxShift==0 ){ - pCur->idx = idxParent; -#ifndef NDEBUG - /* Verify that pCur->idx is the correct index to point back to the child - ** page we just came from - */ - if( pCur->idxnCell ){ - assert( get4byte(&pParent->aCell[idxParent][2])==oldPgno ); - }else{ - assert( get4byte(&pParent->aData[pParent->hdrOffset+6])==oldPgno ); - } -#endif - }else{ - /* The MemPage.idxShift flag indicates that cell indices might have - ** changed since idxParent was set and hence idxParent might be out - ** of date. So recompute the parent cell index by scanning all cells - ** and locating the one that points to the child we just came from. - */ - int i; - pCur->idx = pParent->nCell; - for(i=0; inCell; i++){ - if( get4byte(&pParent->aCell[i][2])==oldPgno ){ - pCur->idx = i; - break; - } - } - } + pCur->idx = idxParent; } /* @@ -1875,7 +1871,7 @@ static int moveToRoot(BtCursor *pCur){ if( pRoot->nCell==0 && !pRoot->leaf ){ Pgno subpage; assert( pRoot->pgno==1 ); - subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+6]); + subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); assert( subpage>0 ); pCur->isValid = 1; rc = moveToChild(pCur, subpage); @@ -1896,7 +1892,7 @@ static int moveToLeftmost(BtCursor *pCur){ assert( pCur->isValid ); while( !(pPage = pCur->pPage)->leaf ){ assert( pCur->idx>=0 && pCur->idxnCell ); - pgno = get4byte(&pPage->aCell[pCur->idx][2]); + pgno = get4byte(findCell(pPage, pCur->idx)); rc = moveToChild(pCur, pgno); if( rc ) return rc; } @@ -1917,7 +1913,7 @@ static int moveToRightmost(BtCursor *pCur){ assert( pCur->isValid ); while( !(pPage = pCur->pPage)->leaf ){ - pgno = get4byte(&pPage->aData[pPage->hdrOffset+6]); + pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); pCur->idx = pPage->nCell; rc = moveToChild(pCur, pgno); if( rc ) return rc; @@ -2066,9 +2062,9 @@ int sqlite3BtreeMoveto(BtCursor *pCur, const void *pKey, i64 nKey, int *pRes){ if( pPage->leaf ){ chldPg = 0; }else if( lwr>=pPage->nCell ){ - chldPg = get4byte(&pPage->aData[pPage->hdrOffset+6]); + chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); }else{ - chldPg = get4byte(&pPage->aCell[lwr][2]); + chldPg = get4byte(findCell(pPage, lwr)); } if( chldPg==0 ){ assert( pCur->idx>=0 && pCur->idxpPage->nCell ); @@ -2117,7 +2113,7 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ pCur->infoValid = 0; if( pCur->idx>=pPage->nCell ){ if( !pPage->leaf ){ - rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+6])); + rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); if( rc ) return rc; rc = moveToLeftmost(pCur); *pRes = 0; @@ -2166,7 +2162,7 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ assert( pPage->isInit ); assert( pCur->idx>=0 ); if( !pPage->leaf ){ - pgno = get4byte(&pPage->aCell[pCur->idx][2]); + pgno = get4byte( findCell(pPage, pCur->idx) ); rc = moveToChild(pCur, pgno); if( rc ) return rc; rc = moveToRightmost(pCur); @@ -2362,7 +2358,7 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){ Pgno ovflPgno; int rc; - parseCell(pPage, pCell, &info); + parseCellPtr(pPage, pCell, &info); if( info.iOverflow==0 ){ return SQLITE_OK; /* No overflow pages. Return without doing anything */ } @@ -2412,7 +2408,7 @@ static int fillInCell( CellInfo info; /* Fill in the header. */ - nHeader = 2; + nHeader = 0; if( !pPage->leaf ){ nHeader += 4; } @@ -2422,7 +2418,7 @@ static int fillInCell( nData = 0; } nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); - parseCell(pPage, pCell, &info); + parseCellPtr(pPage, pCell, &info); assert( info.nHeader==nHeader ); assert( info.nKey==nKey ); assert( info.nData==nData ); @@ -2520,9 +2516,9 @@ static void reparentChildPages(MemPage *pPage){ if( pPage->leaf ) return; pBt = pPage->pBt; for(i=0; inCell; i++){ - reparentPage(pBt, get4byte(&pPage->aCell[i][2]), pPage, i); + reparentPage(pBt, get4byte(findCell(pPage,i)), pPage, i); } - reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+6]), pPage, i); + reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]), pPage, i); pPage->idxShift = 0; } @@ -2533,46 +2529,28 @@ static void reparentChildPages(MemPage *pPage){ ** removes the reference to the cell from pPage. ** ** "sz" must be the number of bytes in the cell. -** -** Try to maintain the integrity of the linked list of cells. But if -** the cell being inserted does not fit on the page, this will not be -** possible. If the linked list is not maintained, then just update -** pPage->aCell[] and set the pPage->needRelink flag so that we will -** know to rebuild the linked list later. */ static void dropCell(MemPage *pPage, int idx, int sz){ - int j, pc; - u8 *data; + int i; /* Loop counter */ + int pc; /* Offset to cell content of cell being deleted */ + u8 *data; /* pPage->aData */ + u8 *ptr; /* Used to move bytes around within data[] */ + assert( idx>=0 && idxnCell ); - assert( sz==cellSize(pPage, pPage->aCell[idx]) ); + assert( sz==cellSize(pPage, idx) ); assert( sqlite3pager_iswriteable(pPage->aData) ); - assert( pPage->aCell[idx]>=pPage->aData ); - assert( pPage->aCell[idx]<=&pPage->aData[pPage->pBt->usableSize-sz] ); data = pPage->aData; - pc = Addr(pPage->aCell[idx]) - Addr(data); - assert( pc>pPage->hdrOffset && pc+sz<=pPage->pBt->usableSize ); + ptr = &data[pPage->cellOffset + 2*idx]; + pc = get2byte(ptr); + assert( pc>10 && pc+sz<=pPage->pBt->usableSize ); freeSpace(pPage, pc, sz); - for(j=idx; jnCell-1; j++){ - pPage->aCell[j] = pPage->aCell[j+1]; + for(i=idx+1; inCell; i++, ptr+=2){ + ptr[0] = ptr[2]; + ptr[1] = ptr[3]; } pPage->nCell--; - if( !pPage->isOverfull && !pPage->needRelink ){ - u8 *pPrev; - if( idx==0 ){ - pPrev = &data[pPage->hdrOffset+3]; - }else{ - pPrev = pPage->aCell[idx-1]; - } - if( idxnCell ){ - pc = Addr(pPage->aCell[idx]) - Addr(data); - }else{ - pc = 0; - } - put2byte(pPrev, pc); - pageIntegrity(pPage); - }else{ - pPage->needRelink = 1; - } + put2byte(&data[pPage->hdrOffset+3], pPage->nCell); + pPage->nFree += 2; pPage->idxShift = 1; } @@ -2581,63 +2559,70 @@ static void dropCell(MemPage *pPage, int idx, int sz){ ** content of the cell. ** ** If the cell content will fit on the page, then put it there. If it -** will not fit and pTemp is not NULL, then make a copy of the content -** into pTemp, set pPage->aCell[i] point to pTemp, and set pPage->isOverfull. -** If the content will not fit and pTemp is NULL, then make pPage->aCell[i] -** point to pCell and set pPage->isOverfull. -** -** Try to maintain the integrity of the linked list of cells. But if -** the cell being inserted does not fit on the page, this will not be -** possible. If the linked list is not maintained, then just update -** pPage->aCell[] and set the pPage->needRelink flag so that we will -** know to rebuild the linked list later. +** will not fit, then make a copy of the cell content into pTemp if +** pTemp is not null. Regardless of pTemp, allocate a new entry +** in pPage->aOvfl[] and make it point to the cell content (either +** in pTemp or the original pCell) and also record its index. +** Allocating a new entry in pPage->aCell[] implies that +** pPage->nOverflow is incremented. */ static void insertCell( MemPage *pPage, /* Page into which we are copying */ - int i, /* Which cell on pPage to insert after */ - u8 *pCell, /* Text of the new cell to insert */ - int sz, /* Bytes of data in pCell */ + int i, /* New cell becomes the i-th cell of the page */ + u8 *pCell, /* Content of the new cell */ + int sz, /* Bytes of content in pCell */ u8 *pTemp /* Temp storage space for pCell, if needed */ ){ - int idx, j; - assert( i>=0 && i<=pPage->nCell ); - assert( sz==cellSize(pPage, pCell) ); + int idx; /* Where to write new cell content in data[] */ + int j; /* Loop counter */ + int top; /* First byte of content for any cell in data[] */ + int end; /* First byte past the last cell pointer in data[] */ + int ins; /* Index in data[] where new cell pointer is inserted */ + int hdr; /* Offset into data[] of the page header */ + int cellOffset; /* Address of first cell pointer in data[] */ + u8 *data; /* The content of the whole page */ + u8 *ptr; /* Used for moving information around in data[] */ + + assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); + assert( sz==cellSizePtr(pPage, pCell) ); assert( sqlite3pager_iswriteable(pPage->aData) ); - idx = pPage->needRelink ? 0 : allocateSpace(pPage, sz); - resizeCellArray(pPage, pPage->nCell+1); - for(j=pPage->nCell; j>i; j--){ - pPage->aCell[j] = pPage->aCell[j-1]; - } - pPage->nCell++; - if( idx<=0 ){ - pPage->isOverfull = 1; + if( pPage->nOverflow || sz+2>pPage->nFree ){ if( pTemp ){ memcpy(pTemp, pCell, sz); - }else{ - pTemp = pCell; + pCell = pTemp; } - pPage->aCell[i] = pTemp; + j = pPage->nOverflow++; + assert( jaOvfl)/sizeof(pPage->aOvfl[0]) ); + pPage->aOvfl[j].pCell = pCell; + pPage->aOvfl[j].idx = i; + pPage->nFree = 0; }else{ - u8 *data = pPage->aData; + data = pPage->aData; + hdr = pPage->hdrOffset; + top = get2byte(&data[hdr+5]); + cellOffset = pPage->cellOffset; + end = cellOffset + 2*pPage->nCell + 2; + ins = cellOffset + 2*i; + if( end > top - sz ){ + defragmentPage(pPage); + top = get2byte(&data[hdr+5]); + assert( end + sz <= top ); + } + idx = allocateSpace(pPage, sz); + assert( idx>0 ); + assert( end <= get2byte(&data[hdr+5]) ); + pPage->nCell++; + pPage->nFree -= 2; memcpy(&data[idx], pCell, sz); - pPage->aCell[i] = &data[idx]; - } - if( !pPage->isOverfull && !pPage->needRelink ){ - u8 *pPrev; - int pc; - if( i==0 ){ - pPrev = &pPage->aData[pPage->hdrOffset+3]; - }else{ - pPrev = pPage->aCell[i-1]; + for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){ + ptr[0] = ptr[-2]; + ptr[1] = ptr[-1]; } - pc = get2byte(pPrev); - put2byte(pPrev, idx); - put2byte(pPage->aCell[i], pc); + put2byte(&data[ins], idx); + put2byte(&data[hdr+3], pPage->nCell); + pPage->idxShift = 1; pageIntegrity(pPage); - }else{ - pPage->needRelink = 1; } - pPage->idxShift = 1; } /* @@ -2647,63 +2632,41 @@ static void insertCell( static void assemblePage( MemPage *pPage, /* The page to be assemblied */ int nCell, /* The number of cells to add to this page */ - u8 **apCell, /* Pointers to cell text */ + u8 **apCell, /* Pointers to cell bodies */ int *aSize /* Sizes of the cells */ ){ int i; /* Loop counter */ int totalSize; /* Total size of all cells */ int hdr; /* Index of page header */ - int pc, prevpc; /* Addresses of cells being inserted */ + int cellptr; /* Address of next cell pointer */ + int cellbody; /* Address of next cell body */ u8 *data; /* Data for the page */ - assert( pPage->needRelink==0 ); - assert( pPage->isOverfull==0 ); + assert( pPage->nOverflow==0 ); totalSize = 0; for(i=0; inFree ); + assert( totalSize+2*nCell<=pPage->nFree ); assert( pPage->nCell==0 ); - resizeCellArray(pPage, nCell); - pc = allocateSpace(pPage, totalSize); + cellptr = pPage->cellOffset; data = pPage->aData; hdr = pPage->hdrOffset; - prevpc = hdr+3; + put2byte(&data[hdr+3], nCell); + cellbody = allocateSpace(pPage, totalSize); + assert( cellbody>0 ); + assert( pPage->nFree >= 2*nCell ); + pPage->nFree -= 2*nCell; for(i=0; iaCell[i] = data+pc; - prevpc = pc; - pc += aSize[i]; - assert( pc<=pPage->pBt->usableSize ); + put2byte(&data[cellptr], cellbody); + memcpy(&data[cellbody], apCell[i], aSize[i]); + cellptr += 2; + cellbody += aSize[i]; } + assert( cellbody==pPage->pBt->usableSize ); pPage->nCell = nCell; - put2byte(data+prevpc, 0); } -#if 0 /* Never Used */ -/* -** Rebuild the linked list of cells on a page so that the cells -** occur in the order specified by the pPage->aCell[] array. -** Invoke this routine once to repair damage after one or more -** invocations of either insertCell() or dropCell(). -*/ -static void relinkCellList(MemPage *pPage){ - int i, idxFrom; - assert( sqlite3pager_iswriteable(pPage->aData) ); - if( !pPage->needRelink ) return; - idxFrom = pPage->hdrOffset+3; - for(i=0; inCell; i++){ - int idx = Addr(pPage->aCell[i]) - Addr(pPage->aData); - assert( idx>pPage->hdrOffset && idxpBt->usableSize ); - put2byte(&pPage->aData[idxFrom], idx); - idxFrom = idx; - } - put2byte(&pPage->aData[idxFrom], 0); - pPage->needRelink = 0; -} -#endif - /* ** GCC does not define the offsetof() macro so we'll have to do it ** ourselves. @@ -2712,42 +2675,6 @@ static void relinkCellList(MemPage *pPage){ #define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD)) #endif -/* -** Move the content of the page at pFrom over to pTo. The pFrom->aCell[] -** pointers that point into pFrom->aData[] must be adjusted to point -** into pTo->aData[] instead. But some pFrom->aCell[] entries might -** not point to pFrom->aData[]. Those are unchanged. -** -** Over this operation completes, the meta data for pFrom is zeroed. -*/ -static void movePage(MemPage *pTo, MemPage *pFrom){ - uptr from, to; - int i; - int usableSize; - int ofst; - - assert( pTo->hdrOffset==0 ); - assert( pFrom->isInit ); - ofst = pFrom->hdrOffset; - usableSize = pFrom->pBt->usableSize; - sqliteFree(pTo->aCell); - memcpy(pTo->aData, &pFrom->aData[ofst], usableSize - ofst); - memcpy(pTo, pFrom, offsetof(MemPage, aData)); - pFrom->isInit = 0; - pFrom->aCell = 0; - assert( pTo->aData[5]<155 ); - pTo->aData[5] += ofst; - pTo->isOverfull = pFrom->isOverfull; - to = Addr(pTo->aData); - from = Addr(&pFrom->aData[ofst]); - for(i=0; inCell; i++){ - uptr x = Addr(pTo->aCell[i]); - if( x>from && xaCell[i]) = x + to - from; - } - } -} - /* ** The following parameters determine how many adjacent pages get involved ** in a balancing operation. NN is the number of neighbors on either side @@ -2763,6 +2690,9 @@ static void movePage(MemPage *pTo, MemPage *pFrom){ #define NN 1 /* Number of neighbors on either side of pPage */ #define NB (NN*2+1) /* Total pages involved in the balance */ +/* Forward reference */ +static int balance(MemPage*); + /* ** This routine redistributes Cells on pPage and up to NN*2 siblings ** of pPage so that all pages have about the same amount of free space. @@ -2792,7 +2722,7 @@ static void movePage(MemPage *pTo, MemPage *pFrom){ ** in a corrupted state. So if this routine fails, the database should ** be rolled back. */ -static int balance(MemPage *pPage){ +static int balance_nonroot(MemPage *pPage){ MemPage *pParent; /* The parent of pPage */ Btree *pBt; /* The whole database */ int nCell; /* Number of cells in aCell[] */ @@ -2809,7 +2739,6 @@ static int balance(MemPage *pPage){ int pageFlags; /* Value of pPage->aData[0] */ int subtotal; /* Subtotal of bytes in cells on one page */ int iSpace = 0; /* First unused byte of aSpace[] */ - MemPage *extraUnref = 0; /* Unref this page if not zero */ MemPage *apOld[NB]; /* pPage and up to two siblings */ Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */ MemPage *apCopy[NB]; /* Private copies of apOld[] pages */ @@ -2825,120 +2754,15 @@ static int balance(MemPage *pPage){ u8 aSpace[MX_PAGE_SIZE*4]; /* Space to copies of divider cells */ /* - ** Return without doing any work if pPage is neither overfull nor - ** underfull. + ** Find the parent page. */ assert( pPage->isInit ); assert( sqlite3pager_iswriteable(pPage->aData) ); pBt = pPage->pBt; - if( !pPage->isOverfull && pPage->nFreeusableSize*2/3 - && pPage->nCell>=2){ - assert( pPage->needRelink==0 ); - return SQLITE_OK; - } - - /* - ** Find the parent of the page to be balanced. If there is no parent, - ** it means this page is the root page and special rules apply. - */ pParent = pPage->pParent; - if( pParent==0 ){ - Pgno pgnoChild; - MemPage *pChild; - assert( pPage->isInit ); - if( pPage->nCell==0 ){ - if( pPage->leaf ){ - /* The table is completely empty */ - assert( pPage->needRelink==0 ); - TRACE(("BALANCE: empty table %d\n", pPage->pgno)); - }else{ - /* The root page is empty but has one child. Transfer the - ** information from that one child into the root page if it - ** will fit. This reduces the depth of the tree by one. - ** - ** If the root page is page 1, it has less space available than - ** its child (due to the 100 byte header that occurs at the beginning - ** of the database fle), so it might not be able to hold all of the - ** information currently contained in the child. If this is the - ** case, then do not do the transfer. Leave page 1 empty except - ** for the right-pointer to the child page. The child page becomes - ** the virtual root of the tree. - */ - pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+6]); - assert( pgnoChild>0 && pgnoChild<=sqlite3pager_pagecount(pBt->pPager) ); - rc = getPage(pBt, pgnoChild, &pChild); - if( rc ) return rc; - if( pPage->pgno==1 ){ - rc = initPage(pChild, pPage); - if( rc ) return rc; - if( pChild->nFree>=100 ){ - /* The child information will fit on the root page, so do the - ** copy */ - zeroPage(pPage, pChild->aData[0]); - for(i=0; inCell; i++){ - szCell[i] = cellSize(pChild, pChild->aCell[i]); - } - assemblePage(pPage, pChild->nCell, pChild->aCell, szCell); - freePage(pChild); - TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno)); - }else{ - /* The child has more information that will fit on the root. - ** The tree is already balanced. Do nothing. */ - TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno)); - } - }else{ - memcpy(pPage->aData, pChild->aData, pBt->usableSize); - pPage->isInit = 0; - pPage->pParent = 0; - rc = initPage(pPage, 0); - assert( rc==SQLITE_OK ); - freePage(pChild); - TRACE(("BALANCE: transfer child %d into root %d\n", - pChild->pgno, pPage->pgno)); - } - reparentChildPages(pPage); - releasePage(pChild); - } - return SQLITE_OK; - } - if( !pPage->isOverfull ){ - /* It is OK for the root page to be less than half full. - */ - assert( pPage->needRelink==0 ); - TRACE(("BALANCE: root page %d is low - no changes\n", pPage->pgno)); - return SQLITE_OK; - } - /* - ** If we get to here, it means the root page is overfull. - ** When this happens, Create a new child page and copy the - ** contents of the root into the child. Then make the root - ** page an empty page with rightChild pointing to the new - ** child. Then fall thru to the code below which will cause - ** the overfull child page to be split. - */ - rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno); - if( rc ) return rc; - assert( sqlite3pager_iswriteable(pChild->aData) ); - movePage(pChild, pPage); - assert( pChild->aData[0]==pPage->aData[pPage->hdrOffset] ); - pChild->pParent = pPage; - sqlite3pager_ref(pPage->aData); - pChild->idxParent = 0; - pChild->isOverfull = 1; - zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF); - put4byte(&pPage->aData[pPage->hdrOffset+6], pChild->pgno); - pParent = pPage; - pPage = pChild; - extraUnref = pChild; - TRACE(("BALANCE: copy root %d into %d and balance %d\n", - pParent->pgno, pPage->pgno, pPage->pgno)); - }else{ - TRACE(("BALANCE: begin page %d child of %d\n", - pPage->pgno, pParent->pgno)); - } - rc = sqlite3pager_write(pParent->aData); - if( rc ) return rc; - assert( pParent->isInit ); + sqlite3pager_write(pParent->aData); + assert( pParent ); + TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno)); /* ** Find the cell in the parent page whose left child points back @@ -2950,12 +2774,12 @@ static int balance(MemPage *pPage){ pgno = pPage->pgno; assert( pgno==sqlite3pager_pagenumber(pPage->aData) ); for(idx=0; idxnCell; idx++){ - if( get4byte(&pParent->aCell[idx][2])==pgno ){ + if( get4byte(findCell(pParent, idx))==pgno ){ break; } } assert( idxnCell - || get4byte(&pParent->aData[pParent->hdrOffset+6])==pgno ); + || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno ); }else{ idx = pPage->idxParent; } @@ -2985,12 +2809,12 @@ static int balance(MemPage *pPage){ for(i=0, k=nxDiv; inCell ){ idxDiv[i] = k; - apDiv[i] = pParent->aCell[k]; + apDiv[i] = findCell(pParent, k); nDiv++; assert( !pParent->leaf ); - pgnoOld[i] = get4byte(&apDiv[i][2]); + pgnoOld[i] = get4byte(apDiv[i]); }else if( k==pParent->nCell ){ - pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+6]); + pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]); }else{ break; } @@ -3010,10 +2834,9 @@ static int balance(MemPage *pPage){ */ for(i=0; iaData = &((u8*)p)[-pBt->usableSize]; - p->aCell = 0; - p->hdrOffset = 0; - movePage(p, apOld[i]); + p->aData = &((u8*)p)[-pBt->pageSize]; + memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage)); + p->aData = &((u8*)p)[-pBt->pageSize]; } /* @@ -3037,13 +2860,14 @@ static int balance(MemPage *pPage){ leafData = pPage->leafData && pPage->leaf; for(i=0; inCell; j++){ - apCell[nCell] = pOld->aCell[j]; - szCell[nCell] = cellSize(pOld, apCell[nCell]); + int limit = pOld->nCell+pOld->nOverflow; + for(j=0; jleaf ){ assert( leafCorrection==0 ); /* The right pointer of the child page pOld becomes the left ** pointer of the divider cell */ - memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4); + memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4); }else{ assert( leafCorrection==4 ); } @@ -3091,9 +2915,9 @@ static int balance(MemPage *pPage){ ** usableSpace: Number of bytes of space available on each sibling. ** */ - usableSpace = pBt->usableSize - 10 + leafCorrection; + usableSpace = pBt->usableSize - 12 + leafCorrection; for(subtotal=k=i=0; i usableSpace ){ szNew[k] = subtotal - szCell[i]; cntNew[k] = i; @@ -3124,9 +2948,9 @@ static int balance(MemPage *pPage){ r = cntNew[i-1] - 1; d = r + 1 - leafData; - while( szRight==0 || szRight+szCell[d]<=szLeft-szCell[r] ){ - szRight += szCell[d]; - szLeft -= szCell[r]; + while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){ + szRight += szCell[d] + 2; + szLeft -= szCell[r] + 2; cntNew[i-1]--; r = cntNew[i-1] - 1; d = r + 1 - leafData; @@ -3219,12 +3043,10 @@ static int balance(MemPage *pPage){ for(i=0; ipgno==pgnoNew[i] ); - resizeCellArray(pNew, cntNew[i] - j); assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]); j = cntNew[i]; assert( pNew->nCell>0 ); - assert( !pNew->isOverfull ); - assert( pNew->needRelink==0 ); + assert( pNew->nOverflow==0 ); if( ileaf ){ - memcpy(&pNew->aData[6], pCell+2, 4); + memcpy(&pNew->aData[8], pCell, 4); pTemp = 0; }else if( leafData ){ CellInfo info; j--; - parseCell(pNew, apCell[j], &info); + parseCellPtr(pNew, apCell[j], &info); pCell = &aSpace[iSpace]; fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz); iSpace += sz; @@ -3250,22 +3072,22 @@ static int balance(MemPage *pPage){ assert( iSpace<=sizeof(aSpace) ); } insertCell(pParent, nxDiv, pCell, sz, pTemp); - put4byte(&pParent->aCell[nxDiv][2], pNew->pgno); + put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno); j++; nxDiv++; } } assert( j==nCell ); if( (pageFlags & PTF_LEAF)==0 ){ - memcpy(&apNew[nNew-1]->aData[6], &apCopy[nOld-1]->aData[6], 4); + memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4); } - if( nxDiv==pParent->nCell ){ + if( nxDiv==pParent->nCell+pParent->nOverflow ){ /* Right-most sibling is the right-most child of pParent */ - put4byte(&pParent->aData[pParent->hdrOffset+6], pgnoNew[nNew-1]); + put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]); }else{ /* Right-most sibling is the left child of the first entry in pParent ** past the right-most divider entry */ - put4byte(&pParent->aCell[nxDiv][2], pgnoNew[nNew-1]); + put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]); } /* @@ -3292,20 +3114,159 @@ static int balance(MemPage *pPage){ balance_cleanup: for(i=0; iaCell); - } } for(i=0; ipgno, nOld, nNew, nCell)); return rc; } +/* +** This routine is called for the root page of a btree when the root +** page contains no cells. This is an opportunity to make the tree +** shallower by one level. +*/ +static int balance_shallower(MemPage *pPage){ + MemPage *pChild; /* The only child page of pPage */ + Pgno pgnoChild; /* Page number for pChild */ + int rc; /* Return code from subprocedures */ + u8 *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */ + int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */ + + assert( pPage->pParent==0 ); + assert( pPage->nCell==0 ); + if( pPage->leaf ){ + /* The table is completely empty */ + TRACE(("BALANCE: empty table %d\n", pPage->pgno)); + }else{ + /* The root page is empty but has one child. Transfer the + ** information from that one child into the root page if it + ** will fit. This reduces the depth of the tree by one. + ** + ** If the root page is page 1, it has less space available than + ** its child (due to the 100 byte header that occurs at the beginning + ** of the database fle), so it might not be able to hold all of the + ** information currently contained in the child. If this is the + ** case, then do not do the transfer. Leave page 1 empty except + ** for the right-pointer to the child page. The child page becomes + ** the virtual root of the tree. + */ + pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]); + assert( pgnoChild>0 ); + assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) ); + rc = getPage(pPage->pBt, pgnoChild, &pChild); + if( rc ) return rc; + if( pPage->pgno==1 ){ + rc = initPage(pChild, pPage); + if( rc ) return rc; + assert( pChild->nOverflow==0 ); + if( pChild->nFree>=100 ){ + /* The child information will fit on the root page, so do the + ** copy */ + int i; + zeroPage(pPage, pChild->aData[0]); + for(i=0; inCell; i++){ + apCell[i] = findCell(pChild,i); + szCell[i] = cellSizePtr(pChild, apCell[i]); + } + assemblePage(pPage, pChild->nCell, apCell, szCell); + freePage(pChild); + TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno)); + }else{ + /* The child has more information that will fit on the root. + ** The tree is already balanced. Do nothing. */ + TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno)); + } + }else{ + memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize); + pPage->isInit = 0; + pPage->pParent = 0; + rc = initPage(pPage, 0); + assert( rc==SQLITE_OK ); + freePage(pChild); + TRACE(("BALANCE: transfer child %d into root %d\n", + pChild->pgno, pPage->pgno)); + } + reparentChildPages(pPage); + releasePage(pChild); + } + return SQLITE_OK; +} + + +/* +** The root page is overfull +** +** When this happens, Create a new child page and copy the +** contents of the root into the child. Then make the root +** page an empty page with rightChild pointing to the new +** child. Finally, call balance_internal() on the new child +** to cause it to split. +*/ +static int balance_deeper(MemPage *pPage){ + int rc; /* Return value from subprocedures */ + MemPage *pChild; /* Pointer to a new child page */ + Pgno pgnoChild; /* Page number of the new child page */ + Btree *pBt; /* The BTree */ + int usableSize; /* Total usable size of a page */ + u8 *data; /* Content of the parent page */ + u8 *cdata; /* Content of the child page */ + int hdr; /* Offset to page header in parent */ + int brk; /* Offset to content of first cell in parent */ + + assert( pPage->pParent==0 ); + assert( pPage->nOverflow>0 ); + pBt = pPage->pBt; + rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno); + if( rc ) return rc; + assert( sqlite3pager_iswriteable(pChild->aData) ); + usableSize = pBt->usableSize; + data = pPage->aData; + hdr = pPage->hdrOffset; + brk = get2byte(&data[hdr+5]); + cdata = pChild->aData; + memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr); + memcpy(&cdata[brk], &data[brk], usableSize-brk); + rc = initPage(pChild, pPage); + if( rc ) return rc; + memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0])); + pChild->nOverflow = pPage->nOverflow; + if( pChild->nOverflow ){ + pChild->nFree = 0; + } + assert( pChild->nCell==pPage->nCell ); + zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF); + put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild); + TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno)); + rc = balance_nonroot(pChild); + releasePage(pChild); + return rc; +} + +/* +** Decide if the page pPage needs to be balanced. If balancing is +** required, call the appropriate balancing routine. +*/ +static int balance(MemPage *pPage){ + int rc = SQLITE_OK; + if( pPage->pParent==0 ){ + if( pPage->nOverflow>0 ){ + rc = balance_deeper(pPage); + } + if( pPage->nCell==0 ){ + rc = balance_shallower(pPage); + } + }else{ + if( pPage->nOverflow>0 || pPage->nFree>pPage->pBt->usableSize*2/3 ){ + rc = balance_nonroot(pPage); + } + } + return rc; +} + /* ** This routine checks all cursors that point to the same table ** as pCur points to. If any of those cursors were opened with @@ -3385,16 +3346,16 @@ int sqlite3BtreeInsert( if( rc ) return rc; rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, &szNew); if( rc ) return rc; - assert( szNew==cellSize(pPage, newCell) ); + assert( szNew==cellSizePtr(pPage, newCell) ); assert( szNew<=sizeof(newCell) ); if( loc==0 && pCur->isValid ){ int szOld; assert( pCur->idx>=0 && pCur->idxnCell ); - oldCell = pPage->aCell[pCur->idx]; + oldCell = findCell(pPage, pCur->idx); if( !pPage->leaf ){ - memcpy(&newCell[2], &oldCell[2], 4); + memcpy(newCell, oldCell, 4); } - szOld = cellSize(pPage, oldCell); + szOld = cellSizePtr(pPage, oldCell); rc = clearCell(pPage, oldCell); if( rc ) return rc; dropCell(pPage, pCur->idx, szOld); @@ -3444,9 +3405,9 @@ int sqlite3BtreeDelete(BtCursor *pCur){ } rc = sqlite3pager_write(pPage->aData); if( rc ) return rc; - pCell = pPage->aCell[pCur->idx]; + pCell = findCell(pPage, pCur->idx); if( !pPage->leaf ){ - pgnoChild = get4byte(&pCell[2]); + pgnoChild = get4byte(pCell); } clearCell(pPage, pCell); if( !pPage->leaf ){ @@ -3473,12 +3434,12 @@ int sqlite3BtreeDelete(BtCursor *pCur){ if( rc ) return rc; TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n", pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno)); - dropCell(pPage, pCur->idx, cellSize(pPage, pCell)); - pNext = leafCur.pPage->aCell[leafCur.idx]; - szNext = cellSize(leafCur.pPage, pNext); + dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell)); + pNext = findCell(leafCur.pPage, leafCur.idx); + szNext = cellSizePtr(leafCur.pPage, pNext); assert( sizeof(tempCell)>=szNext+4 ); insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell); - put4byte(pPage->aCell[pCur->idx]+2, pgnoChild); + put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild); rc = balance(pPage); if( rc ) return rc; dropCell(leafCur.pPage, leafCur.idx, szNext); @@ -3487,7 +3448,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){ }else{ TRACE(("DELETE: table=%d delete from leaf %d\n", pCur->pgnoRoot, pPage->pgno)); - dropCell(pPage, pCur->idx, cellSize(pPage, pCell)); + dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell)); rc = balance(pPage); } moveToRoot(pCur); @@ -3545,16 +3506,16 @@ static int clearDatabasePage( rc = sqlite3pager_write(pPage->aData); if( rc ) return rc; for(i=0; inCell; i++){ - pCell = pPage->aCell[i]; + pCell = findCell(pPage, i); if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(&pCell[2]), pPage->pParent, 1); + rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1); if( rc ) return rc; } rc = clearCell(pPage, pCell); if( rc ) return rc; } if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(&pPage->aData[6]), pPage->pParent, 1); + rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1); if( rc ) return rc; } if( freePageFlag ){ @@ -3696,6 +3657,7 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){ int nFree; u16 idx; int hdr; + int nCell; unsigned char *data; char range[20]; unsigned char payload[20]; @@ -3712,26 +3674,28 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){ pPage->leafData = (c & PTF_LEAFDATA)!=0; pPage->leaf = (c & PTF_LEAF)!=0; pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); + nCell = get2byte(&data[hdr+3]); printf("PAGE %d: flags=0x%02x frag=%d parent=%d\n", pgno, - data[hdr], data[hdr+5], + data[hdr], data[hdr+7], (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0); - i = 0; assert( hdr == (pgno==1 ? 100 : 0) ); - idx = get2byte(&data[hdr+3]); - while( idx>0 && idx<=pBt->usableSize ){ + idx = hdr + 12 - pPage->leaf*4; + for(i=0; ileaf ){ child = 0; }else{ - child = get4byte(&pCell[2]); + child = get4byte(pCell); } sz = info.nData; if( !pPage->intKey ) sz += info.nKey; @@ -3745,17 +3709,9 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){ "cell %2d: i=%-10s chld=%-4d nk=%-4lld nd=%-4d payload=%s\n", i, range, child, info.nKey, info.nData, payload ); - if( pPage->isInit && pPage->aCell[i]!=pCell ){ - printf("**** aCell[%d] does not match on prior entry ****\n", i); - } - i++; - idx = get2byte(pCell); - } - if( idx!=0 ){ - printf("ERROR: next cell index out of range: %d\n", idx); } if( !pPage->leaf ){ - printf("right_child: %d\n", get4byte(&data[hdr+6])); + printf("right_child: %d\n", get4byte(&data[hdr+8])); } nFree = 0; i = 0; @@ -3773,13 +3729,12 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){ printf("ERROR: next freeblock index out of range: %d\n", idx); } if( recursive && !pPage->leaf ){ - idx = get2byte(&data[hdr+3]); - while( idx>0 && idxusableSize ){ - unsigned char *pCell = &data[idx]; - sqlite3BtreePageDump(pBt, get4byte(&pCell[2]), 1); + for(i=0; iidx; aResult[2] = pPage->nCell; if( pCur->idx>=0 && pCur->idxnCell ){ - aResult[3] = cellSize(pPage, pPage->aCell[pCur->idx]); - aResult[6] = pPage->leaf ? 0 : get4byte(&pPage->aCell[pCur->idx][2]); + u8 *pCell = findCell(pPage, pCur->idx); + aResult[3] = cellSizePtr(pPage, pCell); + aResult[6] = pPage->leaf ? 0 : get4byte(pCell); }else{ aResult[3] = 0; aResult[6] = 0; @@ -3828,7 +3784,7 @@ int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult){ idx = get2byte(&pPage->aData[idx]); } aResult[5] = cnt; - aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+6]); + aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+8]); return SQLITE_OK; } #endif @@ -3964,7 +3920,8 @@ static int checkTreePage( ){ MemPage *pPage; int i, rc, depth, d2, pgno, cnt; - int hdr; + int hdr, cellStart; + int nCell; u8 *data; BtCursor cur; Btree *pBt; @@ -4004,8 +3961,8 @@ static int checkTreePage( /* Check payload overflow pages */ sprintf(zContext, "On tree page %d cell %d: ", iPage, i); - pCell = pPage->aCell[i]; - parseCell(pPage, pCell, &info); + pCell = findCell(pPage,i); + parseCellPtr(pPage, pCell, &info); sz = info.nData; if( !pPage->intKey ) sz += info.nKey; if( sz>info.nLocal ){ @@ -4016,7 +3973,7 @@ static int checkTreePage( /* Check sanity of left child page. */ if( !pPage->leaf ){ - pgno = get4byte(&pCell[2]); + pgno = get4byte(pCell); d2 = checkTreePage(pCheck,pgno,pPage,zContext,0,0,0,0); if( i>0 && d2!=depth ){ checkAppendMsg(pCheck, zContext, "Child page depth differs"); @@ -4025,22 +3982,24 @@ static int checkTreePage( } } if( !pPage->leaf ){ - pgno = get4byte(&pPage->aData[pPage->hdrOffset+6]); + pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); sprintf(zContext, "On page %d at right child: ", iPage); checkTreePage(pCheck, pgno, pPage, zContext,0,0,0,0); } /* Check for complete coverage of the page */ - memset(hit, 0, usableSize); - memset(hit, 1, pPage->hdrOffset+10-4*(pPage->leaf)); data = pPage->aData; hdr = pPage->hdrOffset; - for(cnt=0, i=get2byte(&data[hdr+3]); i>0 && ileaf; + for(i=0; i=i; j--) hit[j]++; - i = get2byte(&data[i]); + for(j=pc+size-1; j>=pc; j--) hit[j]++; } for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i