** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: btree.c,v 1.148 2004/05/29 10:23:19 danielk1977 Exp $
+** $Id: btree.c,v 1.149 2004/05/29 21:46:49 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
** not specified in the header.
**
-** Each btree page begins with a header described below. Note that the
-** header for page one begins at byte 100. For all other btree pages, the
-** header begins on byte zero.
+** Each btree pages is divided into three sections: The header, the
+** cell pointer array, and the cell area area. Page 1 also has a 100-byte
+** file header that occurs before the page header. The 100-byte file
+** header occurs on page 1 only.
+**
+** The page headers looks like this:
**
** OFFSET SIZE DESCRIPTION
** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf
** 1 2 byte offset to the first freeblock
-** 3 2 byte offset to the first cell
-** 5 1 number of fragmented free bytes
-** 6 4 Right child (the Ptr(N+1) value). Omitted if leaf
+** 3 2 number of cells on this page
+** 5 2 first byte past the cell array area
+** 7 1 number of fragmented free bytes
+** 8 4 Right child (the Ptr(N+1) value). Omitted if leaf
**
** The flags define the format of this btree page. The leaf flag means that
** this page has no children. The zerodata flag means that this page carries
** only keys and no data. The intkey flag means that the key is a single
** variable length integer at the beginning of the payload.
**
-** A variable-length integer is 1 to 9 bytes where the lower 7 bits of each
+** The cell pointer array begins on the first byte after the page header.
+** The cell pointer array contains zero or more 2-byte numbers which are
+** offsets from the beginning of the page to the cell content in the cell
+** content area. The cell pointers occur in sorted order. The system strives
+** to keep free space after the last cell pointer so that new cells can
+** be easily added without have to defragment the page.
+**
+** Cell content is stored at the very end of the page and grows toward the
+** beginning of the page.
+**
+** Unused space within the cell content area is collected into a linked list of
+** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset
+** to the first freeblock is given in the header. Freeblocks occur in
+** increasing order. Because a freeblock must be at least 4 bytes in size,
+** any group of 3 or fewer unused bytes in the cell content area cannot
+** exist on the freeblock chain. A group of 3 or fewer free bytes is called
+** a fragment. The total number of bytes in all fragments is recorded.
+** in the page header at offset 7.
+**
+** SIZE DESCRIPTION
+** 2 Byte offset of the next freeblock
+** 2 Bytes in this freeblock
+**
+** Cells are of variable length. Cells are stored in the cell content area at
+** the end of the page. Pointers to the cells are in the cell pointer array
+** that immediately follows the page header. Cells is not necessarily
+** contiguous or in order, but cell pointers are contiguous and in order.
+**
+** Cell content makes use of variable length integers. A variable
+** length integer is 1 to 9 bytes where the lower 7 bits of each
** byte are used. The integer consists of all bytes that have bit 8 set and
** the first byte with bit 8 clear. The most significant byte of the integer
** appears first. A variable-length integer may not be more than 9 bytes long.
** Variable length integers are used for rowids and to hold the number of
** bytes of key and data in a btree cell.
**
-** Unused space within a btree page is collected into a linked list of
-** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset
-** to the first freeblock is given in the header. Freeblocks occur in
-** increasing order. Because a freeblock is 4 bytes in size, the minimum
-** size allocation on a btree page is 4 bytes. Because a freeblock must be
-** at least 4 bytes in size, any group of 3 or fewer unused bytes cannot
-** exist on the freeblock chain. A group of 3 or fewer free bytes is called
-** a fragment. The total number of bytes in all fragments is recorded.
-** in the page header at offset 5.
-**
-** SIZE DESCRIPTION
-** 2 Byte offset of the next freeblock
-** 2 Bytes in this freeblock
-**
-** Cells are of variable length. The first cell begins on the byte defined
-** in the page header. Cells do not necessarily occur in order - they can
-** skip around on the page.
+** The content of a cell looks like this:
**
** SIZE DESCRIPTION
-** 2 Byte offset of the next cell. 0 if this is the last cell
** 4 Page number of the left child. Omitted if leaf flag is set.
** var Number of bytes of data. Omitted if the zerodata flag is set.
** var Number of bytes of key. Or the key itself if intkey flag is set.
/* Maximum page size. The upper bound on this value is 65536 (a limit
-** imposed by the 2-byte offset at the beginning of each cell.) The
+** imposed by the 2-byte size of cell array pointers.) The
** maximum page size determines the amount of stack space allocated
** by many of the routines in this module. On embedded architectures
** or any machine where memory and especially stack memory is limited,
/* The following value is the maximum cell size assuming a maximum page
** size give above.
*/
-#define MX_CELL_SIZE (MX_PAGE_SIZE-6)
+#define MX_CELL_SIZE (MX_PAGE_SIZE-8)
/* The maximum number of cells on a single page of the database. This
** assumes a minimum cell size of 3 bytes. Such small cells will be
** exceedingly rare, but they are possible.
*/
-#define MX_CELL ((MX_PAGE_SIZE-6)/3)
+#define MX_CELL ((MX_PAGE_SIZE-8)/3)
/* Forward declarations */
typedef struct MemPage MemPage;
** The pageDestructor() routine handles that chore.
*/
struct MemPage {
- u8 isInit; /* True if previously initialized */
- u8 idxShift; /* True if Cell indices have changed */
- u8 isOverfull; /* Some aCell[] do not fit on page */
- u8 intKey; /* True if intkey flag is set */
- u8 leaf; /* True if leaf flag is set */
- u8 zeroData; /* True if table stores keys only */
- u8 leafData; /* True if tables stores data on leaves only */
- u8 hasData; /* True if this page stores data */
- u8 hdrOffset; /* 100 for page 1. 0 otherwise */
- u8 needRelink; /* True if cell not linked properly in aData */
- int idxParent; /* Index in pParent->aCell[] of this node */
- int nFree; /* Number of free bytes on the page */
- int nCell; /* Number of entries on this page */
- int nCellAlloc; /* Number of slots allocated in aCell[] */
- unsigned char **aCell; /* Pointer to start of each cell */
- struct Btree *pBt; /* Pointer back to BTree structure */
-
- /* When page content is move from one page to the other (by the movePage()
- ** subroutine) only the information about is moved. The information below
- ** is fixed. */
- unsigned char *aData; /* Pointer back to the start of the page */
- Pgno pgno; /* Page number for this page */
- MemPage *pParent; /* The parent of this page. NULL for root */
+ u8 isInit; /* True if previously initialized */
+ u8 idxShift; /* True if Cell indices have changed */
+ u8 nOverflow; /* Number of overflow cell bodies in aCell[] */
+ u8 intKey; /* True if intkey flag is set */
+ u8 leaf; /* True if leaf flag is set */
+ u8 zeroData; /* True if table stores keys only */
+ u8 leafData; /* True if tables stores data on leaves only */
+ u8 hasData; /* True if this page stores data */
+ u8 hdrOffset; /* 100 for page 1. 0 otherwise */
+ u16 cellOffset; /* Index in aData of first cell pointer */
+ u16 idxParent; /* Index in parent of this node */
+ u16 nFree; /* Number of free bytes on the page */
+ u16 nCell; /* Number of cells on this page, local and ovfl */
+ struct _OvflCell { /* Cells that will not fit on aData[] */
+ u8 *pCell; /* Pointers to the body of the overflow cell */
+ u16 idx; /* Insert this cell before idx-th non-overflow cell */
+ } aOvfl[3];
+ struct Btree *pBt; /* Pointer back to BTree structure */
+ u8 *aData; /* Pointer back to the start of the page */
+ Pgno pgno; /* Page number for this page */
+ MemPage *pParent; /* The parent of this page. NULL for root */
};
/*
*/
typedef struct CellInfo CellInfo;
struct CellInfo {
+ u8 *pCell; /* Pointer to the start of cell content */
i64 nKey; /* The key for INTKEY tables, or number of bytes in key */
u32 nData; /* Number of bytes of data */
u16 nHeader; /* Size of the cell header in bytes */
u16 nLocal; /* Amount of payload held locally */
u16 iOverflow; /* Offset to overflow page number. Zero if no overflow */
- u16 nSize; /* Total size of the cell (on the main b-tree page) */
+ u16 nSize; /* Total size of the cell content (on the main b-tree page) */
};
/*
#define putVarint sqlite3PutVarint
/*
-** Parse a cell header and fill in the CellInfo structure.
+** Return a pointer to the start of cell content for the given
+** cell of a page. This routine works only for pages that
+** do not contain overflow cells.
*/
-static void parseCell(
+static u8 *findCell(MemPage *pPage, int iCell){
+ u8 *data = pPage->aData;
+ assert( iCell>=0 );
+ assert( iCell<get2byte(&data[pPage->hdrOffset+3]) );
+ return data + get2byte(&data[pPage->cellOffset+2*iCell]);
+}
+
+/*
+** This a more complex version of findCell() that works for
+** pages that do contain overflow cells. See insert
+*/
+static u8 *findOverflowCell(MemPage *pPage, int iCell){
+ int i;
+ for(i=pPage->nOverflow-1; i>=0; i--){
+ if( pPage->aOvfl[i].idx<=iCell ){
+ if( pPage->aOvfl[i].idx==iCell ){
+ return pPage->aOvfl[i].pCell;
+ }
+ iCell--;
+ }
+ }
+ return findCell(pPage, iCell);
+}
+
+/*
+** Parse a cell content block and fill in the CellInfo structure. There
+** are two versions of this function. parseCell() takes a cell index
+** as the second argument and parseCellPtr() takes a pointer to the
+** body of the cell as its second argument.
+*/
+static void parseCellPtr(
MemPage *pPage, /* Page containing the cell */
- unsigned char *pCell, /* Pointer to the first byte of the cell */
+ u8 *pCell, /* Pointer to the cell text. */
CellInfo *pInfo /* Fill in this structure */
){
int n;
int nPayload;
Btree *pBt;
int minLocal, maxLocal;
+
+ pInfo->pCell = pCell;
assert( pPage->leaf==0 || pPage->leaf==1 );
- n = 6 - 4*pPage->leaf;
+ n = 4 - 4*pPage->leaf;
if( pPage->hasData ){
n += getVarint32(&pCell[n], &pInfo->nData);
}else{
pInfo->nLocal = nPayload;
pInfo->iOverflow = 0;
pInfo->nSize = nPayload + n;
+ if( pInfo->nSize<4 ){
+ pInfo->nSize = 4; /* Minimum cell size is 4 */
+ }
}else{
int surplus = minLocal + (nPayload - minLocal)%(pBt->usableSize - 4);
if( surplus <= maxLocal ){
pInfo->nSize = pInfo->iOverflow + 4;
}
}
+static void parseCell(
+ MemPage *pPage, /* Page containing the cell */
+ int iCell, /* The cell index. First cell is 0 */
+ CellInfo *pInfo /* Fill in this structure */
+){
+ parseCellPtr(pPage, findCell(pPage, iCell), pInfo);
+}
/*
-** Compute the total number of bytes that a Cell needs on the main
-** database page. The number returned includes the Cell header,
-** local payload storage, and the pointer to overflow pages (if
-** applicable). Additional space allocated on overflow pages
-** is NOT included in the value returned from this routine.
+** Compute the total number of bytes that a Cell needs in the cell
+** data area of the btree-page. The return number includes the cell
+** data header and the local payload, but not any overflow page or
+** the space used by the cell pointer.
*/
-static int cellSize(MemPage *pPage, unsigned char *pCell){
+static int cellSize(MemPage *pPage, int iCell){
CellInfo info;
-
- parseCell(pPage, pCell, &info);
+ parseCell(pPage, iCell, &info);
+ return info.nSize;
+}
+static int cellSizePtr(MemPage *pPage, u8 *pCell){
+ CellInfo info;
+ parseCellPtr(pPage, pCell, &info);
return info.nSize;
}
static void _pageIntegrity(MemPage *pPage){
int usableSize;
u8 *data;
- int i, idx, c, pc, hdr, nFree;
+ int i, j, idx, c, pc, hdr, nFree;
+ int cellOffset;
+ int nCell, cellLimit;
u8 used[MX_PAGE_SIZE];
usableSize = pPage->pBt->usableSize;
assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) );
assert( pPage->hasData ==
!(pPage->zeroData || (!pPage->leaf && pPage->leafData)) );
+ assert( pPage->cellOffset==pPage->hdrOffset+12-4*pPage->leaf );
+ assert( pPage->nCell = get2byte(&pPage->aData[hdr+3]) );
}
data = pPage->aData;
memset(used, 0, usableSize);
}
pc = get2byte(&data[pc]);
}
- assert( pPage->isInit==0 || pPage->nFree==nFree+data[hdr+5] );
idx = 0;
- pc = get2byte(&data[hdr+3]);
- while( pc ){
+ nCell = get2byte(&data[hdr+3]);
+ cellLimit = get2byte(&data[hdr+5]);
+ assert( pPage->isInit==0
+ || pPage->nFree==nFree+data[hdr+7]+cellLimit-(cellOffset+2*nCell) );
+ cellOffset = pPage->cellOffset;
+ for(i=0; i<nCell; i++){
int size;
- assert( pPage->isInit==0 || idx<pPage->nCell );
+ pc = get2byte(&data[cellOffset+2*i]);
assert( pc>0 && pc<usableSize-4 );
- assert( pPage->isInit==0 || pPage->aCell[idx]==&data[pc] );
size = cellSize(pPage, &data[pc]);
assert( pc+size<=usableSize );
- for(i=pc; i<pc+size; i++){
- assert( used[i]==0 );
- used[i] = 1;
+ for(j=pc; j<pc+size; j++){
+ assert( used[j]==0 );
+ used[j] = 1;
}
- pc = get2byte(&data[pc]);
- idx++;
}
- assert( idx==pPage->nCell );
+ for(i=cellOffset+2*nCell; i<cellimit; i++){
+ assert( used[i]==0 );
+ used[i] = 1;
+ }
nFree = 0;
for(i=0; i<usableSize; i++){
assert( used[i]<=1 );
if( used[i]==0 ) nFree++;
}
- assert( nFree==data[hdr+5] );
+ assert( nFree==data[hdr+7] );
}
#define pageIntegrity(X) _pageIntegrity(X)
#else
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(MemPage *pPage){
- int pc, i, n, addr;
- int start, hdr, size;
- int leftover;
- unsigned char *oldPage;
- unsigned char newPage[MX_PAGE_SIZE];
+ int i; /* Loop counter */
+ int pc; /* Address of a i-th cell */
+ int addr; /* Offset of first byte after cell pointer array */
+ int hdr; /* Offset to the page header */
+ int size; /* Size of a cell */
+ int usableSize; /* Number of usable bytes on a page */
+ int cellOffset; /* Offset to the cell pointer array */
+ int brk; /* Offset to the cell content area */
+ int nCell; /* Number of cells on the page */
+ unsigned char *data; /* The page data */
+ unsigned char temp[MX_PAGE_SIZE]; /* Temp holding area for cell content */
assert( sqlite3pager_iswriteable(pPage->aData) );
assert( pPage->pBt!=0 );
assert( pPage->pBt->usableSize <= MX_PAGE_SIZE );
- assert( !pPage->needRelink );
- assert( !pPage->isOverfull );
- oldPage = pPage->aData;
+ assert( pPage->nOverflow==0 );
+ data = pPage->aData;
hdr = pPage->hdrOffset;
- addr = 3+hdr;
- n = 6+hdr;
- if( !pPage->leaf ){
- n += 4;
- }
- memcpy(&newPage[hdr], &oldPage[hdr], n-hdr);
- start = n;
- pc = get2byte(&oldPage[addr]);
- i = 0;
- while( pc>0 ){
- assert( n<pPage->pBt->usableSize );
- size = cellSize(pPage, &oldPage[pc]);
- memcpy(&newPage[n], &oldPage[pc], size);
- put2byte(&newPage[addr],n);
- assert( pPage->aCell[i]==&oldPage[pc] );
- pPage->aCell[i++] = &oldPage[n];
- addr = n;
- n += size;
- pc = get2byte(&oldPage[pc]);
- }
- assert( i==pPage->nCell );
- leftover = pPage->pBt->usableSize - n;
- assert( leftover>=0 );
- assert( pPage->nFree==leftover );
- if( leftover<4 ){
- oldPage[hdr+5] = leftover;
- leftover = 0;
- n = pPage->pBt->usableSize;
- }
- memcpy(&oldPage[hdr], &newPage[hdr], n-hdr);
- if( leftover==0 ){
- put2byte(&oldPage[hdr+1], 0);
- }else if( leftover>=4 ){
- put2byte(&oldPage[hdr+1], n);
- put2byte(&oldPage[n], 0);
- put2byte(&oldPage[n+2], leftover);
- memset(&oldPage[n+4], 0, leftover-4);
- }
- oldPage[hdr+5] = 0;
+ cellOffset = pPage->cellOffset;
+ nCell = pPage->nCell;
+ assert( nCell==get2byte(&data[hdr+3]) );
+ usableSize = pPage->pBt->usableSize;
+ brk = get2byte(&data[hdr+5]);
+ memcpy(&temp[brk], &data[brk], usableSize - brk);
+ brk = usableSize;
+ for(i=0; i<nCell; i++){
+ u8 *pAddr; /* The i-th cell pointer */
+ pAddr = &data[cellOffset + i*2];
+ pc = get2byte(pAddr);
+ assert( pc<pPage->pBt->usableSize );
+ size = cellSizePtr(pPage, &temp[pc]);
+ brk -= size;
+ memcpy(&data[brk], &temp[pc], size);
+ put2byte(pAddr, brk);
+ }
+ assert( brk>=cellOffset+2*nCell );
+ put2byte(&data[hdr+5], brk);
+ data[hdr+1] = 0;
+ data[hdr+2] = 0;
+ data[hdr+7] = 0;
+ addr = cellOffset+2*nCell;
+ memset(&data[addr], 0, brk-addr);
}
/*
-** Allocate nByte bytes of space on a page. If nByte is less than
-** 4 it is rounded up to 4.
+** Allocate nByte bytes of space on a page.
**
** Return the index into pPage->aData[] of the first byte of
** the new allocation. Or return 0 if there is not enough free
** nBytes of contiguous free space, then this routine automatically
** calls defragementPage() to consolidate all free space before
** allocating the new chunk.
-**
-** Algorithm: Carve a piece off of the first freeblock that is
-** nByte in size or larger.
*/
static int allocateSpace(MemPage *pPage, int nByte){
int addr, pc, hdr;
int size;
int nFrag;
+ int top;
+ int nCell;
+ int cellOffset;
unsigned char *data;
-#ifndef NDEBUG
- int cnt = 0;
-#endif
-
+
data = pPage->aData;
assert( sqlite3pager_iswriteable(data) );
assert( pPage->pBt );
if( nByte<4 ) nByte = 4;
- if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
+ if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0;
+ pPage->nFree -= nByte;
hdr = pPage->hdrOffset;
- nFrag = data[hdr+5];
- if( nFrag>=60 || nFrag>pPage->nFree-nByte ){
- defragmentPage(pPage);
- }
- addr = hdr+1;
- pc = get2byte(&data[addr]);
- assert( addr<pc );
- assert( pc<=pPage->pBt->usableSize-4 );
- while( (size = get2byte(&data[pc+2]))<nByte ){
- addr = pc;
- pc = get2byte(&data[addr]);
- assert( pc<=pPage->pBt->usableSize-4 );
- assert( pc>=addr+size+4 || pc==0 );
- if( pc==0 ){
- assert( (cnt++)==0 );
- defragmentPage(pPage);
- assert( data[hdr+5]==0 );
- addr = pPage->hdrOffset+1;
- pc = get2byte(&data[addr]);
+
+ nFrag = data[hdr+7];
+ if( nFrag<60 ){
+ /* Search the freelist looking for a slot big enough to satisfy the
+ ** space request. */
+ addr = hdr+1;
+ while( (pc = get2byte(&data[addr]))>0 ){
+ size = get2byte(&data[pc+2]);
+ if( size>=nByte ){
+ if( size<nByte+4 ){
+ memcpy(&data[addr], &data[pc], 2);
+ data[hdr+7] = nFrag + size - nByte;
+ return pc;
+ }else{
+ put2byte(&data[pc+2], size-nByte);
+ return pc + size - nByte;
+ }
+ }
+ addr = pc;
}
}
- assert( pc>0 && size>=nByte );
- assert( pc+size<=pPage->pBt->usableSize );
- if( size>nByte+4 ){
- int newStart = pc+nByte;
- put2byte(&data[addr], newStart);
- put2byte(&data[newStart], get2byte(&data[pc]));
- put2byte(&data[newStart+2], size-nByte);
- }else{
- put2byte(&data[addr], get2byte(&data[pc]));
- data[hdr+5] += size-nByte;
+
+ /* Allocate memory from the gap in between the cell pointer array
+ ** and the cell content area.
+ */
+ top = get2byte(&data[hdr+5]);
+ nCell = get2byte(&data[hdr+3]);
+ cellOffset = pPage->cellOffset;
+ if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
+ defragmentPage(pPage);
+ top = get2byte(&data[hdr+5]);
}
- pPage->nFree -= nByte;
- assert( pPage->nFree>=0 );
- return pc;
+ top -= nByte;
+ assert( cellOffset + 2*nCell <= top );
+ put2byte(&data[hdr+5], top);
+ return top;
}
/*
*/
static void freeSpace(MemPage *pPage, int start, int size){
int end = start + size; /* End of the segment being freed */
- int addr, pbegin;
-#ifndef NDEBUG
- int tsize = 0; /* Total size of all freeblocks */
-#endif
+ int addr, pbegin, hdr;
unsigned char *data = pPage->aData;
assert( pPage->pBt!=0 );
if( size<4 ) size = 4;
/* Add the space back into the linked list of freeblocks */
- addr = pPage->hdrOffset + 1;
+ hdr = pPage->hdrOffset;
+ addr = hdr + 1;
while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
assert( pbegin<=pPage->pBt->usableSize-4 );
assert( pbegin>addr );
while( (pbegin = get2byte(&data[addr]))>0 ){
int pnext, psize;
assert( pbegin>addr );
- assert( pbegin<pPage->pBt->usableSize-4 );
+ assert( pbegin<=pPage->pBt->usableSize-4 );
pnext = get2byte(&data[pbegin]);
psize = get2byte(&data[pbegin+2]);
if( pbegin + psize + 3 >= pnext && pnext>0 ){
int frag = pnext - (pbegin+psize);
- assert( frag<=data[pPage->hdrOffset+5] );
- data[pPage->hdrOffset+5] -= frag;
+ assert( frag<=data[pPage->hdrOffset+7] );
+ data[pPage->hdrOffset+7] -= frag;
put2byte(&data[pbegin], get2byte(&data[pnext]));
put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
}else{
- assert( (tsize += psize)>0 );
addr = pbegin;
}
}
- assert( tsize+data[pPage->hdrOffset+5]==pPage->nFree );
-}
-/*
-** Resize the aCell[] array of the given page so that it is able to
-** hold at least nNewSz entries.
-**
-** Return SQLITE_OK or SQLITE_NOMEM.
-*/
-static int resizeCellArray(MemPage *pPage, int nNewSz){
- if( pPage->nCellAlloc<nNewSz ){
- int n = nNewSz*sizeof(pPage->aCell[0]);
- if( pPage->aCell==0 ){
- pPage->aCell = sqliteMallocRaw( n );
- }else{
- pPage->aCell = sqliteRealloc(pPage->aCell, n);
- }
- if( sqlite3_malloc_failed ) return SQLITE_NOMEM;
- pPage->nCellAlloc = nNewSz;
+ /* If the cell content area begins with a freeblock, remove it. */
+ if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
+ int top;
+ pbegin = get2byte(&data[hdr+1]);
+ memcpy(&data[hdr+1], &data[pbegin], 2);
+ top = get2byte(&data[hdr+5]);
+ put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
}
- return SQLITE_OK;
}
/*
){
int c, pc, i, hdr;
unsigned char *data;
- int usableSize;
- int nCell, nFree;
- u8 *aCell[MX_PAGE_SIZE/2];
-
+ int usableSize, cellOffset;
+ int nFree;
+ int top;
assert( pPage->pBt!=0 );
assert( pParent==0 || pParent->pBt==pPage->pBt );
pPage->pParent = pParent;
sqlite3pager_ref(pParent->aData);
}
- pPage->nCell = pPage->nCellAlloc = 0;
- assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
hdr = pPage->hdrOffset;
data = pPage->aData;
c = data[hdr];
+ assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
pPage->intKey = (c & (PTF_INTKEY|PTF_LEAFDATA))!=0;
pPage->zeroData = (c & PTF_ZERODATA)!=0;
pPage->leafData = (c & PTF_LEAFDATA)!=0;
pPage->leaf = (c & PTF_LEAF)!=0;
pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
- pPage->isOverfull = 0;
- pPage->needRelink = 0;
+ pPage->nOverflow = 0;
pPage->idxShift = 0;
usableSize = pPage->pBt->usableSize;
-
- /* Initialize the cell count and cell pointers */
- i = 0;
- pc = get2byte(&data[hdr+3]);
- nCell = 0;
- while( pc>0 ){
- if( pc>=usableSize ) return SQLITE_CORRUPT;
- if( nCell>sizeof(aCell)/sizeof(aCell[0]) ) return SQLITE_CORRUPT;
- aCell[nCell++] = &data[pc];
- pc = get2byte(&data[pc]);
- }
- if( resizeCellArray(pPage, nCell) ){
- return SQLITE_NOMEM;
- }
- pPage->nCell = nCell;
- memcpy(pPage->aCell, aCell, nCell*sizeof(aCell[0]));
+ pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
+ top = get2byte(&data[hdr+5]);
+ pPage->nCell = get2byte(&data[hdr+3]);
/* Compute the total free space on the page */
pc = get2byte(&data[hdr+1]);
- nFree = data[hdr+5];
+ nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
i = 0;
while( pc>0 ){
int next, size;
assert( sqlite3pager_iswriteable(data) );
memset(&data[hdr], 0, pBt->usableSize - hdr);
data[hdr] = flags;
- first = hdr + 6 + 4*((flags&PTF_LEAF)==0);
- put2byte(&data[hdr+1], first);
- put2byte(&data[first+2], pBt->usableSize - first);
- sqliteFree(pPage->aCell);
- pPage->aCell = 0;
- pPage->nCell = 0;
- pPage->nCellAlloc = 0;
+ first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
+ memset(&data[hdr+1], 0, 4);
+ data[hdr+7] = 0;
+ put2byte(&data[hdr+5], pBt->usableSize);
pPage->nFree = pBt->usableSize - first;
pPage->intKey = (flags & (PTF_INTKEY|PTF_LEAFDATA))!=0;
pPage->zeroData = (flags & PTF_ZERODATA)!=0;
pPage->leaf = (flags & PTF_LEAF)!=0;
pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
pPage->hdrOffset = hdr;
- pPage->isOverfull = 0;
- pPage->needRelink = 0;
+ pPage->cellOffset = first;
+ pPage->nOverflow = 0;
pPage->idxShift = 0;
+ pPage->nCell = 0;
pPage->isInit = 1;
pageIntegrity(pPage);
}
*/
static void pageDestructor(void *pData, int pageSize){
MemPage *pPage = (MemPage*)&((char*)pData)[pageSize];
- assert( pPage->isInit==0 || pPage->needRelink==0 );
if( pPage->pParent ){
MemPage *pParent = pPage->pParent;
pPage->pParent = 0;
releasePage(pParent);
}
- sqliteFree(pPage->aCell);
- pPage->aCell = 0;
pPage->isInit = 0;
}
** a cell. Make sure it is small enough so that at least minFanout
** cells can will fit on one page. We assume a 10-byte page header.
** Besides the payload, the cell must store:
- ** 2-byte pointer to next cell
+ ** 2-byte pointer to the cell
** 4-byte child pointer
** 9-byte nKey value
** 4-byte nData value
** 4-byte overflow page pointer
- ** So a cell consists of a header which is as much as 19 bytes long,
- ** 0 to N bytes of payload, and an optional 4 byte overflow page pointer.
+ ** So a cell consists of a 2-byte poiner, a header which is as much as
+ ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
+ ** page pointer.
*/
- pBt->maxLocal = (pBt->usableSize-10)*pBt->maxEmbedFrac/255 - 23;
- pBt->minLocal = (pBt->usableSize-10)*pBt->minEmbedFrac/255 - 23;
- pBt->maxLeaf = pBt->usableSize - 33;
- pBt->minLeaf = (pBt->usableSize-10)*pBt->minLeafFrac/255 - 23;
+ pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
+ pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
+ pBt->maxLeaf = pBt->usableSize - 35;
+ pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
goto page1_init_failed;
}
static void getCellInfo(BtCursor *pCur){
MemPage *pPage = pCur->pPage;
if( !pCur->infoValid ){
- parseCell(pPage, pPage->aCell[pCur->idx], &pCur->info);
+ parseCell(pPage, pCur->idx, &pCur->info);
pCur->infoValid = 1;
}else{
#ifndef NDEBUG
CellInfo info;
- parseCell(pPage, pPage->aCell[pCur->idx], &info);
+ parseCell(pPage, pCur->idx, &info);
assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
#endif
}
pPage = pCur->pPage;
pageIntegrity(pPage);
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- aPayload = pPage->aCell[pCur->idx];
getCellInfo(pCur);
+ aPayload = pCur->info.pCell;
aPayload += pCur->info.nHeader;
if( pPage->intKey ){
nKey = 0;
pPage = pCur->pPage;
pageIntegrity(pPage);
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- aPayload = pPage->aCell[pCur->idx];
getCellInfo(pCur);
+ aPayload = pCur->info.pCell;
aPayload += pCur->info.nHeader;
if( pPage->intKey ){
nKey = 0;
pCur->pPage = pParent;
pCur->infoValid = 0;
assert( pParent->idxShift==0 );
- if( pParent->idxShift==0 ){
- pCur->idx = idxParent;
-#ifndef NDEBUG
- /* Verify that pCur->idx is the correct index to point back to the child
- ** page we just came from
- */
- if( pCur->idx<pParent->nCell ){
- assert( get4byte(&pParent->aCell[idxParent][2])==oldPgno );
- }else{
- assert( get4byte(&pParent->aData[pParent->hdrOffset+6])==oldPgno );
- }
-#endif
- }else{
- /* The MemPage.idxShift flag indicates that cell indices might have
- ** changed since idxParent was set and hence idxParent might be out
- ** of date. So recompute the parent cell index by scanning all cells
- ** and locating the one that points to the child we just came from.
- */
- int i;
- pCur->idx = pParent->nCell;
- for(i=0; i<pParent->nCell; i++){
- if( get4byte(&pParent->aCell[i][2])==oldPgno ){
- pCur->idx = i;
- break;
- }
- }
- }
+ pCur->idx = idxParent;
}
/*
if( pRoot->nCell==0 && !pRoot->leaf ){
Pgno subpage;
assert( pRoot->pgno==1 );
- subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+6]);
+ subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
assert( subpage>0 );
pCur->isValid = 1;
rc = moveToChild(pCur, subpage);
assert( pCur->isValid );
while( !(pPage = pCur->pPage)->leaf ){
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- pgno = get4byte(&pPage->aCell[pCur->idx][2]);
+ pgno = get4byte(findCell(pPage, pCur->idx));
rc = moveToChild(pCur, pgno);
if( rc ) return rc;
}
assert( pCur->isValid );
while( !(pPage = pCur->pPage)->leaf ){
- pgno = get4byte(&pPage->aData[pPage->hdrOffset+6]);
+ pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
pCur->idx = pPage->nCell;
rc = moveToChild(pCur, pgno);
if( rc ) return rc;
if( pPage->leaf ){
chldPg = 0;
}else if( lwr>=pPage->nCell ){
- chldPg = get4byte(&pPage->aData[pPage->hdrOffset+6]);
+ chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
}else{
- chldPg = get4byte(&pPage->aCell[lwr][2]);
+ chldPg = get4byte(findCell(pPage, lwr));
}
if( chldPg==0 ){
assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
pCur->infoValid = 0;
if( pCur->idx>=pPage->nCell ){
if( !pPage->leaf ){
- rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+6]));
+ rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
if( rc ) return rc;
rc = moveToLeftmost(pCur);
*pRes = 0;
assert( pPage->isInit );
assert( pCur->idx>=0 );
if( !pPage->leaf ){
- pgno = get4byte(&pPage->aCell[pCur->idx][2]);
+ pgno = get4byte( findCell(pPage, pCur->idx) );
rc = moveToChild(pCur, pgno);
if( rc ) return rc;
rc = moveToRightmost(pCur);
Pgno ovflPgno;
int rc;
- parseCell(pPage, pCell, &info);
+ parseCellPtr(pPage, pCell, &info);
if( info.iOverflow==0 ){
return SQLITE_OK; /* No overflow pages. Return without doing anything */
}
CellInfo info;
/* Fill in the header. */
- nHeader = 2;
+ nHeader = 0;
if( !pPage->leaf ){
nHeader += 4;
}
nData = 0;
}
nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
- parseCell(pPage, pCell, &info);
+ parseCellPtr(pPage, pCell, &info);
assert( info.nHeader==nHeader );
assert( info.nKey==nKey );
assert( info.nData==nData );
if( pPage->leaf ) return;
pBt = pPage->pBt;
for(i=0; i<pPage->nCell; i++){
- reparentPage(pBt, get4byte(&pPage->aCell[i][2]), pPage, i);
+ reparentPage(pBt, get4byte(findCell(pPage,i)), pPage, i);
}
- reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+6]), pPage, i);
+ reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]), pPage, i);
pPage->idxShift = 0;
}
** removes the reference to the cell from pPage.
**
** "sz" must be the number of bytes in the cell.
-**
-** Try to maintain the integrity of the linked list of cells. But if
-** the cell being inserted does not fit on the page, this will not be
-** possible. If the linked list is not maintained, then just update
-** pPage->aCell[] and set the pPage->needRelink flag so that we will
-** know to rebuild the linked list later.
*/
static void dropCell(MemPage *pPage, int idx, int sz){
- int j, pc;
- u8 *data;
+ int i; /* Loop counter */
+ int pc; /* Offset to cell content of cell being deleted */
+ u8 *data; /* pPage->aData */
+ u8 *ptr; /* Used to move bytes around within data[] */
+
assert( idx>=0 && idx<pPage->nCell );
- assert( sz==cellSize(pPage, pPage->aCell[idx]) );
+ assert( sz==cellSize(pPage, idx) );
assert( sqlite3pager_iswriteable(pPage->aData) );
- assert( pPage->aCell[idx]>=pPage->aData );
- assert( pPage->aCell[idx]<=&pPage->aData[pPage->pBt->usableSize-sz] );
data = pPage->aData;
- pc = Addr(pPage->aCell[idx]) - Addr(data);
- assert( pc>pPage->hdrOffset && pc+sz<=pPage->pBt->usableSize );
+ ptr = &data[pPage->cellOffset + 2*idx];
+ pc = get2byte(ptr);
+ assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
freeSpace(pPage, pc, sz);
- for(j=idx; j<pPage->nCell-1; j++){
- pPage->aCell[j] = pPage->aCell[j+1];
+ for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
+ ptr[0] = ptr[2];
+ ptr[1] = ptr[3];
}
pPage->nCell--;
- if( !pPage->isOverfull && !pPage->needRelink ){
- u8 *pPrev;
- if( idx==0 ){
- pPrev = &data[pPage->hdrOffset+3];
- }else{
- pPrev = pPage->aCell[idx-1];
- }
- if( idx<pPage->nCell ){
- pc = Addr(pPage->aCell[idx]) - Addr(data);
- }else{
- pc = 0;
- }
- put2byte(pPrev, pc);
- pageIntegrity(pPage);
- }else{
- pPage->needRelink = 1;
- }
+ put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
+ pPage->nFree += 2;
pPage->idxShift = 1;
}
** content of the cell.
**
** If the cell content will fit on the page, then put it there. If it
-** will not fit and pTemp is not NULL, then make a copy of the content
-** into pTemp, set pPage->aCell[i] point to pTemp, and set pPage->isOverfull.
-** If the content will not fit and pTemp is NULL, then make pPage->aCell[i]
-** point to pCell and set pPage->isOverfull.
-**
-** Try to maintain the integrity of the linked list of cells. But if
-** the cell being inserted does not fit on the page, this will not be
-** possible. If the linked list is not maintained, then just update
-** pPage->aCell[] and set the pPage->needRelink flag so that we will
-** know to rebuild the linked list later.
+** will not fit, then make a copy of the cell content into pTemp if
+** pTemp is not null. Regardless of pTemp, allocate a new entry
+** in pPage->aOvfl[] and make it point to the cell content (either
+** in pTemp or the original pCell) and also record its index.
+** Allocating a new entry in pPage->aCell[] implies that
+** pPage->nOverflow is incremented.
*/
static void insertCell(
MemPage *pPage, /* Page into which we are copying */
- int i, /* Which cell on pPage to insert after */
- u8 *pCell, /* Text of the new cell to insert */
- int sz, /* Bytes of data in pCell */
+ int i, /* New cell becomes the i-th cell of the page */
+ u8 *pCell, /* Content of the new cell */
+ int sz, /* Bytes of content in pCell */
u8 *pTemp /* Temp storage space for pCell, if needed */
){
- int idx, j;
- assert( i>=0 && i<=pPage->nCell );
- assert( sz==cellSize(pPage, pCell) );
+ int idx; /* Where to write new cell content in data[] */
+ int j; /* Loop counter */
+ int top; /* First byte of content for any cell in data[] */
+ int end; /* First byte past the last cell pointer in data[] */
+ int ins; /* Index in data[] where new cell pointer is inserted */
+ int hdr; /* Offset into data[] of the page header */
+ int cellOffset; /* Address of first cell pointer in data[] */
+ u8 *data; /* The content of the whole page */
+ u8 *ptr; /* Used for moving information around in data[] */
+
+ assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
+ assert( sz==cellSizePtr(pPage, pCell) );
assert( sqlite3pager_iswriteable(pPage->aData) );
- idx = pPage->needRelink ? 0 : allocateSpace(pPage, sz);
- resizeCellArray(pPage, pPage->nCell+1);
- for(j=pPage->nCell; j>i; j--){
- pPage->aCell[j] = pPage->aCell[j-1];
- }
- pPage->nCell++;
- if( idx<=0 ){
- pPage->isOverfull = 1;
+ if( pPage->nOverflow || sz+2>pPage->nFree ){
if( pTemp ){
memcpy(pTemp, pCell, sz);
- }else{
- pTemp = pCell;
+ pCell = pTemp;
}
- pPage->aCell[i] = pTemp;
+ j = pPage->nOverflow++;
+ assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
+ pPage->aOvfl[j].pCell = pCell;
+ pPage->aOvfl[j].idx = i;
+ pPage->nFree = 0;
}else{
- u8 *data = pPage->aData;
+ data = pPage->aData;
+ hdr = pPage->hdrOffset;
+ top = get2byte(&data[hdr+5]);
+ cellOffset = pPage->cellOffset;
+ end = cellOffset + 2*pPage->nCell + 2;
+ ins = cellOffset + 2*i;
+ if( end > top - sz ){
+ defragmentPage(pPage);
+ top = get2byte(&data[hdr+5]);
+ assert( end + sz <= top );
+ }
+ idx = allocateSpace(pPage, sz);
+ assert( idx>0 );
+ assert( end <= get2byte(&data[hdr+5]) );
+ pPage->nCell++;
+ pPage->nFree -= 2;
memcpy(&data[idx], pCell, sz);
- pPage->aCell[i] = &data[idx];
- }
- if( !pPage->isOverfull && !pPage->needRelink ){
- u8 *pPrev;
- int pc;
- if( i==0 ){
- pPrev = &pPage->aData[pPage->hdrOffset+3];
- }else{
- pPrev = pPage->aCell[i-1];
+ for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
+ ptr[0] = ptr[-2];
+ ptr[1] = ptr[-1];
}
- pc = get2byte(pPrev);
- put2byte(pPrev, idx);
- put2byte(pPage->aCell[i], pc);
+ put2byte(&data[ins], idx);
+ put2byte(&data[hdr+3], pPage->nCell);
+ pPage->idxShift = 1;
pageIntegrity(pPage);
- }else{
- pPage->needRelink = 1;
}
- pPage->idxShift = 1;
}
/*
static void assemblePage(
MemPage *pPage, /* The page to be assemblied */
int nCell, /* The number of cells to add to this page */
- u8 **apCell, /* Pointers to cell text */
+ u8 **apCell, /* Pointers to cell bodies */
int *aSize /* Sizes of the cells */
){
int i; /* Loop counter */
int totalSize; /* Total size of all cells */
int hdr; /* Index of page header */
- int pc, prevpc; /* Addresses of cells being inserted */
+ int cellptr; /* Address of next cell pointer */
+ int cellbody; /* Address of next cell body */
u8 *data; /* Data for the page */
- assert( pPage->needRelink==0 );
- assert( pPage->isOverfull==0 );
+ assert( pPage->nOverflow==0 );
totalSize = 0;
for(i=0; i<nCell; i++){
totalSize += aSize[i];
}
- assert( totalSize<=pPage->nFree );
+ assert( totalSize+2*nCell<=pPage->nFree );
assert( pPage->nCell==0 );
- resizeCellArray(pPage, nCell);
- pc = allocateSpace(pPage, totalSize);
+ cellptr = pPage->cellOffset;
data = pPage->aData;
hdr = pPage->hdrOffset;
- prevpc = hdr+3;
+ put2byte(&data[hdr+3], nCell);
+ cellbody = allocateSpace(pPage, totalSize);
+ assert( cellbody>0 );
+ assert( pPage->nFree >= 2*nCell );
+ pPage->nFree -= 2*nCell;
for(i=0; i<nCell; i++){
- memcpy(data+pc, apCell[i], aSize[i]);
- put2byte(data+prevpc, pc);
- pPage->aCell[i] = data+pc;
- prevpc = pc;
- pc += aSize[i];
- assert( pc<=pPage->pBt->usableSize );
+ put2byte(&data[cellptr], cellbody);
+ memcpy(&data[cellbody], apCell[i], aSize[i]);
+ cellptr += 2;
+ cellbody += aSize[i];
}
+ assert( cellbody==pPage->pBt->usableSize );
pPage->nCell = nCell;
- put2byte(data+prevpc, 0);
}
-#if 0 /* Never Used */
-/*
-** Rebuild the linked list of cells on a page so that the cells
-** occur in the order specified by the pPage->aCell[] array.
-** Invoke this routine once to repair damage after one or more
-** invocations of either insertCell() or dropCell().
-*/
-static void relinkCellList(MemPage *pPage){
- int i, idxFrom;
- assert( sqlite3pager_iswriteable(pPage->aData) );
- if( !pPage->needRelink ) return;
- idxFrom = pPage->hdrOffset+3;
- for(i=0; i<pPage->nCell; i++){
- int idx = Addr(pPage->aCell[i]) - Addr(pPage->aData);
- assert( idx>pPage->hdrOffset && idx<pPage->pBt->usableSize );
- put2byte(&pPage->aData[idxFrom], idx);
- idxFrom = idx;
- }
- put2byte(&pPage->aData[idxFrom], 0);
- pPage->needRelink = 0;
-}
-#endif
-
/*
** GCC does not define the offsetof() macro so we'll have to do it
** ourselves.
#define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD))
#endif
-/*
-** Move the content of the page at pFrom over to pTo. The pFrom->aCell[]
-** pointers that point into pFrom->aData[] must be adjusted to point
-** into pTo->aData[] instead. But some pFrom->aCell[] entries might
-** not point to pFrom->aData[]. Those are unchanged.
-**
-** Over this operation completes, the meta data for pFrom is zeroed.
-*/
-static void movePage(MemPage *pTo, MemPage *pFrom){
- uptr from, to;
- int i;
- int usableSize;
- int ofst;
-
- assert( pTo->hdrOffset==0 );
- assert( pFrom->isInit );
- ofst = pFrom->hdrOffset;
- usableSize = pFrom->pBt->usableSize;
- sqliteFree(pTo->aCell);
- memcpy(pTo->aData, &pFrom->aData[ofst], usableSize - ofst);
- memcpy(pTo, pFrom, offsetof(MemPage, aData));
- pFrom->isInit = 0;
- pFrom->aCell = 0;
- assert( pTo->aData[5]<155 );
- pTo->aData[5] += ofst;
- pTo->isOverfull = pFrom->isOverfull;
- to = Addr(pTo->aData);
- from = Addr(&pFrom->aData[ofst]);
- for(i=0; i<pTo->nCell; i++){
- uptr x = Addr(pTo->aCell[i]);
- if( x>from && x<from+usableSize-ofst ){
- *((uptr*)&pTo->aCell[i]) = x + to - from;
- }
- }
-}
-
/*
** The following parameters determine how many adjacent pages get involved
** in a balancing operation. NN is the number of neighbors on either side
#define NN 1 /* Number of neighbors on either side of pPage */
#define NB (NN*2+1) /* Total pages involved in the balance */
+/* Forward reference */
+static int balance(MemPage*);
+
/*
** This routine redistributes Cells on pPage and up to NN*2 siblings
** of pPage so that all pages have about the same amount of free space.
** in a corrupted state. So if this routine fails, the database should
** be rolled back.
*/
-static int balance(MemPage *pPage){
+static int balance_nonroot(MemPage *pPage){
MemPage *pParent; /* The parent of pPage */
Btree *pBt; /* The whole database */
int nCell; /* Number of cells in aCell[] */
int pageFlags; /* Value of pPage->aData[0] */
int subtotal; /* Subtotal of bytes in cells on one page */
int iSpace = 0; /* First unused byte of aSpace[] */
- MemPage *extraUnref = 0; /* Unref this page if not zero */
MemPage *apOld[NB]; /* pPage and up to two siblings */
Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
u8 aSpace[MX_PAGE_SIZE*4]; /* Space to copies of divider cells */
/*
- ** Return without doing any work if pPage is neither overfull nor
- ** underfull.
+ ** Find the parent page.
*/
assert( pPage->isInit );
assert( sqlite3pager_iswriteable(pPage->aData) );
pBt = pPage->pBt;
- if( !pPage->isOverfull && pPage->nFree<pBt->usableSize*2/3
- && pPage->nCell>=2){
- assert( pPage->needRelink==0 );
- return SQLITE_OK;
- }
-
- /*
- ** Find the parent of the page to be balanced. If there is no parent,
- ** it means this page is the root page and special rules apply.
- */
pParent = pPage->pParent;
- if( pParent==0 ){
- Pgno pgnoChild;
- MemPage *pChild;
- assert( pPage->isInit );
- if( pPage->nCell==0 ){
- if( pPage->leaf ){
- /* The table is completely empty */
- assert( pPage->needRelink==0 );
- TRACE(("BALANCE: empty table %d\n", pPage->pgno));
- }else{
- /* The root page is empty but has one child. Transfer the
- ** information from that one child into the root page if it
- ** will fit. This reduces the depth of the tree by one.
- **
- ** If the root page is page 1, it has less space available than
- ** its child (due to the 100 byte header that occurs at the beginning
- ** of the database fle), so it might not be able to hold all of the
- ** information currently contained in the child. If this is the
- ** case, then do not do the transfer. Leave page 1 empty except
- ** for the right-pointer to the child page. The child page becomes
- ** the virtual root of the tree.
- */
- pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+6]);
- assert( pgnoChild>0 && pgnoChild<=sqlite3pager_pagecount(pBt->pPager) );
- rc = getPage(pBt, pgnoChild, &pChild);
- if( rc ) return rc;
- if( pPage->pgno==1 ){
- rc = initPage(pChild, pPage);
- if( rc ) return rc;
- if( pChild->nFree>=100 ){
- /* The child information will fit on the root page, so do the
- ** copy */
- zeroPage(pPage, pChild->aData[0]);
- for(i=0; i<pChild->nCell; i++){
- szCell[i] = cellSize(pChild, pChild->aCell[i]);
- }
- assemblePage(pPage, pChild->nCell, pChild->aCell, szCell);
- freePage(pChild);
- TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
- }else{
- /* The child has more information that will fit on the root.
- ** The tree is already balanced. Do nothing. */
- TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
- }
- }else{
- memcpy(pPage->aData, pChild->aData, pBt->usableSize);
- pPage->isInit = 0;
- pPage->pParent = 0;
- rc = initPage(pPage, 0);
- assert( rc==SQLITE_OK );
- freePage(pChild);
- TRACE(("BALANCE: transfer child %d into root %d\n",
- pChild->pgno, pPage->pgno));
- }
- reparentChildPages(pPage);
- releasePage(pChild);
- }
- return SQLITE_OK;
- }
- if( !pPage->isOverfull ){
- /* It is OK for the root page to be less than half full.
- */
- assert( pPage->needRelink==0 );
- TRACE(("BALANCE: root page %d is low - no changes\n", pPage->pgno));
- return SQLITE_OK;
- }
- /*
- ** If we get to here, it means the root page is overfull.
- ** When this happens, Create a new child page and copy the
- ** contents of the root into the child. Then make the root
- ** page an empty page with rightChild pointing to the new
- ** child. Then fall thru to the code below which will cause
- ** the overfull child page to be split.
- */
- rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno);
- if( rc ) return rc;
- assert( sqlite3pager_iswriteable(pChild->aData) );
- movePage(pChild, pPage);
- assert( pChild->aData[0]==pPage->aData[pPage->hdrOffset] );
- pChild->pParent = pPage;
- sqlite3pager_ref(pPage->aData);
- pChild->idxParent = 0;
- pChild->isOverfull = 1;
- zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
- put4byte(&pPage->aData[pPage->hdrOffset+6], pChild->pgno);
- pParent = pPage;
- pPage = pChild;
- extraUnref = pChild;
- TRACE(("BALANCE: copy root %d into %d and balance %d\n",
- pParent->pgno, pPage->pgno, pPage->pgno));
- }else{
- TRACE(("BALANCE: begin page %d child of %d\n",
- pPage->pgno, pParent->pgno));
- }
- rc = sqlite3pager_write(pParent->aData);
- if( rc ) return rc;
- assert( pParent->isInit );
+ sqlite3pager_write(pParent->aData);
+ assert( pParent );
+ TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
/*
** Find the cell in the parent page whose left child points back
pgno = pPage->pgno;
assert( pgno==sqlite3pager_pagenumber(pPage->aData) );
for(idx=0; idx<pParent->nCell; idx++){
- if( get4byte(&pParent->aCell[idx][2])==pgno ){
+ if( get4byte(findCell(pParent, idx))==pgno ){
break;
}
}
assert( idx<pParent->nCell
- || get4byte(&pParent->aData[pParent->hdrOffset+6])==pgno );
+ || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
}else{
idx = pPage->idxParent;
}
for(i=0, k=nxDiv; i<NB; i++, k++){
if( k<pParent->nCell ){
idxDiv[i] = k;
- apDiv[i] = pParent->aCell[k];
+ apDiv[i] = findCell(pParent, k);
nDiv++;
assert( !pParent->leaf );
- pgnoOld[i] = get4byte(&apDiv[i][2]);
+ pgnoOld[i] = get4byte(apDiv[i]);
}else if( k==pParent->nCell ){
- pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+6]);
+ pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
}else{
break;
}
*/
for(i=0; i<nOld; i++){
MemPage *p = apCopy[i] = (MemPage*)&aCopy[i+1][-sizeof(MemPage)];
- p->aData = &((u8*)p)[-pBt->usableSize];
- p->aCell = 0;
- p->hdrOffset = 0;
- movePage(p, apOld[i]);
+ p->aData = &((u8*)p)[-pBt->pageSize];
+ memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage));
+ p->aData = &((u8*)p)[-pBt->pageSize];
}
/*
leafData = pPage->leafData && pPage->leaf;
for(i=0; i<nOld; i++){
MemPage *pOld = apCopy[i];
- for(j=0; j<pOld->nCell; j++){
- apCell[nCell] = pOld->aCell[j];
- szCell[nCell] = cellSize(pOld, apCell[nCell]);
+ int limit = pOld->nCell+pOld->nOverflow;
+ for(j=0; j<limit; j++){
+ apCell[nCell] = findOverflowCell(pOld, j);
+ szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
nCell++;
}
if( i<nOld-1 ){
- int sz = cellSize(pParent, apDiv[i]);
+ int sz = cellSizePtr(pParent, apDiv[i]);
if( leafData ){
/* With the LEAFDATA flag, pParent cells hold only INTKEYs that
** are duplicates of keys on the child pages. We need to remove
apCell[nCell] = pTemp+leafCorrection;
dropCell(pParent, nxDiv, sz);
szCell[nCell] -= leafCorrection;
- assert( get4byte(pTemp+2)==pgnoOld[i] );
+ assert( get4byte(pTemp)==pgnoOld[i] );
if( !pOld->leaf ){
assert( leafCorrection==0 );
/* The right pointer of the child page pOld becomes the left
** pointer of the divider cell */
- memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4);
+ memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
}else{
assert( leafCorrection==4 );
}
** usableSpace: Number of bytes of space available on each sibling.
**
*/
- usableSpace = pBt->usableSize - 10 + leafCorrection;
+ usableSpace = pBt->usableSize - 12 + leafCorrection;
for(subtotal=k=i=0; i<nCell; i++){
- subtotal += szCell[i];
+ subtotal += szCell[i] + 2;
if( subtotal > usableSpace ){
szNew[k] = subtotal - szCell[i];
cntNew[k] = i;
r = cntNew[i-1] - 1;
d = r + 1 - leafData;
- while( szRight==0 || szRight+szCell[d]<=szLeft-szCell[r] ){
- szRight += szCell[d];
- szLeft -= szCell[r];
+ while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
+ szRight += szCell[d] + 2;
+ szLeft -= szCell[r] + 2;
cntNew[i-1]--;
r = cntNew[i-1] - 1;
d = r + 1 - leafData;
for(i=0; i<nNew; i++){
MemPage *pNew = apNew[i];
assert( pNew->pgno==pgnoNew[i] );
- resizeCellArray(pNew, cntNew[i] - j);
assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
j = cntNew[i];
assert( pNew->nCell>0 );
- assert( !pNew->isOverfull );
- assert( pNew->needRelink==0 );
+ assert( pNew->nOverflow==0 );
if( i<nNew-1 && j<nCell ){
u8 *pCell;
u8 *pTemp;
pCell = apCell[j];
sz = szCell[j] + leafCorrection;
if( !pNew->leaf ){
- memcpy(&pNew->aData[6], pCell+2, 4);
+ memcpy(&pNew->aData[8], pCell, 4);
pTemp = 0;
}else if( leafData ){
CellInfo info;
j--;
- parseCell(pNew, apCell[j], &info);
+ parseCellPtr(pNew, apCell[j], &info);
pCell = &aSpace[iSpace];
fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);
iSpace += sz;
assert( iSpace<=sizeof(aSpace) );
}
insertCell(pParent, nxDiv, pCell, sz, pTemp);
- put4byte(&pParent->aCell[nxDiv][2], pNew->pgno);
+ put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
j++;
nxDiv++;
}
}
assert( j==nCell );
if( (pageFlags & PTF_LEAF)==0 ){
- memcpy(&apNew[nNew-1]->aData[6], &apCopy[nOld-1]->aData[6], 4);
+ memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4);
}
- if( nxDiv==pParent->nCell ){
+ if( nxDiv==pParent->nCell+pParent->nOverflow ){
/* Right-most sibling is the right-most child of pParent */
- put4byte(&pParent->aData[pParent->hdrOffset+6], pgnoNew[nNew-1]);
+ put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
}else{
/* Right-most sibling is the left child of the first entry in pParent
** past the right-most divider entry */
- put4byte(&pParent->aCell[nxDiv][2], pgnoNew[nNew-1]);
+ put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
}
/*
balance_cleanup:
for(i=0; i<nOld; i++){
releasePage(apOld[i]);
- if( apCopy[i] ){
- sqliteFree(apCopy[i]->aCell);
- }
}
for(i=0; i<nNew; i++){
releasePage(apNew[i]);
}
releasePage(pParent);
- releasePage(extraUnref);
TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
pPage->pgno, nOld, nNew, nCell));
return rc;
}
+/*
+** This routine is called for the root page of a btree when the root
+** page contains no cells. This is an opportunity to make the tree
+** shallower by one level.
+*/
+static int balance_shallower(MemPage *pPage){
+ MemPage *pChild; /* The only child page of pPage */
+ Pgno pgnoChild; /* Page number for pChild */
+ int rc; /* Return code from subprocedures */
+ u8 *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
+ int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */
+
+ assert( pPage->pParent==0 );
+ assert( pPage->nCell==0 );
+ if( pPage->leaf ){
+ /* The table is completely empty */
+ TRACE(("BALANCE: empty table %d\n", pPage->pgno));
+ }else{
+ /* The root page is empty but has one child. Transfer the
+ ** information from that one child into the root page if it
+ ** will fit. This reduces the depth of the tree by one.
+ **
+ ** If the root page is page 1, it has less space available than
+ ** its child (due to the 100 byte header that occurs at the beginning
+ ** of the database fle), so it might not be able to hold all of the
+ ** information currently contained in the child. If this is the
+ ** case, then do not do the transfer. Leave page 1 empty except
+ ** for the right-pointer to the child page. The child page becomes
+ ** the virtual root of the tree.
+ */
+ pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+ assert( pgnoChild>0 );
+ assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
+ rc = getPage(pPage->pBt, pgnoChild, &pChild);
+ if( rc ) return rc;
+ if( pPage->pgno==1 ){
+ rc = initPage(pChild, pPage);
+ if( rc ) return rc;
+ assert( pChild->nOverflow==0 );
+ if( pChild->nFree>=100 ){
+ /* The child information will fit on the root page, so do the
+ ** copy */
+ int i;
+ zeroPage(pPage, pChild->aData[0]);
+ for(i=0; i<pChild->nCell; i++){
+ apCell[i] = findCell(pChild,i);
+ szCell[i] = cellSizePtr(pChild, apCell[i]);
+ }
+ assemblePage(pPage, pChild->nCell, apCell, szCell);
+ freePage(pChild);
+ TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
+ }else{
+ /* The child has more information that will fit on the root.
+ ** The tree is already balanced. Do nothing. */
+ TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
+ }
+ }else{
+ memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
+ pPage->isInit = 0;
+ pPage->pParent = 0;
+ rc = initPage(pPage, 0);
+ assert( rc==SQLITE_OK );
+ freePage(pChild);
+ TRACE(("BALANCE: transfer child %d into root %d\n",
+ pChild->pgno, pPage->pgno));
+ }
+ reparentChildPages(pPage);
+ releasePage(pChild);
+ }
+ return SQLITE_OK;
+}
+
+
+/*
+** The root page is overfull
+**
+** When this happens, Create a new child page and copy the
+** contents of the root into the child. Then make the root
+** page an empty page with rightChild pointing to the new
+** child. Finally, call balance_internal() on the new child
+** to cause it to split.
+*/
+static int balance_deeper(MemPage *pPage){
+ int rc; /* Return value from subprocedures */
+ MemPage *pChild; /* Pointer to a new child page */
+ Pgno pgnoChild; /* Page number of the new child page */
+ Btree *pBt; /* The BTree */
+ int usableSize; /* Total usable size of a page */
+ u8 *data; /* Content of the parent page */
+ u8 *cdata; /* Content of the child page */
+ int hdr; /* Offset to page header in parent */
+ int brk; /* Offset to content of first cell in parent */
+
+ assert( pPage->pParent==0 );
+ assert( pPage->nOverflow>0 );
+ pBt = pPage->pBt;
+ rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno);
+ if( rc ) return rc;
+ assert( sqlite3pager_iswriteable(pChild->aData) );
+ usableSize = pBt->usableSize;
+ data = pPage->aData;
+ hdr = pPage->hdrOffset;
+ brk = get2byte(&data[hdr+5]);
+ cdata = pChild->aData;
+ memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
+ memcpy(&cdata[brk], &data[brk], usableSize-brk);
+ rc = initPage(pChild, pPage);
+ if( rc ) return rc;
+ memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
+ pChild->nOverflow = pPage->nOverflow;
+ if( pChild->nOverflow ){
+ pChild->nFree = 0;
+ }
+ assert( pChild->nCell==pPage->nCell );
+ zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
+ put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
+ TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
+ rc = balance_nonroot(pChild);
+ releasePage(pChild);
+ return rc;
+}
+
+/*
+** Decide if the page pPage needs to be balanced. If balancing is
+** required, call the appropriate balancing routine.
+*/
+static int balance(MemPage *pPage){
+ int rc = SQLITE_OK;
+ if( pPage->pParent==0 ){
+ if( pPage->nOverflow>0 ){
+ rc = balance_deeper(pPage);
+ }
+ if( pPage->nCell==0 ){
+ rc = balance_shallower(pPage);
+ }
+ }else{
+ if( pPage->nOverflow>0 || pPage->nFree>pPage->pBt->usableSize*2/3 ){
+ rc = balance_nonroot(pPage);
+ }
+ }
+ return rc;
+}
+
/*
** This routine checks all cursors that point to the same table
** as pCur points to. If any of those cursors were opened with
if( rc ) return rc;
rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, &szNew);
if( rc ) return rc;
- assert( szNew==cellSize(pPage, newCell) );
+ assert( szNew==cellSizePtr(pPage, newCell) );
assert( szNew<=sizeof(newCell) );
if( loc==0 && pCur->isValid ){
int szOld;
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- oldCell = pPage->aCell[pCur->idx];
+ oldCell = findCell(pPage, pCur->idx);
if( !pPage->leaf ){
- memcpy(&newCell[2], &oldCell[2], 4);
+ memcpy(newCell, oldCell, 4);
}
- szOld = cellSize(pPage, oldCell);
+ szOld = cellSizePtr(pPage, oldCell);
rc = clearCell(pPage, oldCell);
if( rc ) return rc;
dropCell(pPage, pCur->idx, szOld);
}
rc = sqlite3pager_write(pPage->aData);
if( rc ) return rc;
- pCell = pPage->aCell[pCur->idx];
+ pCell = findCell(pPage, pCur->idx);
if( !pPage->leaf ){
- pgnoChild = get4byte(&pCell[2]);
+ pgnoChild = get4byte(pCell);
}
clearCell(pPage, pCell);
if( !pPage->leaf ){
if( rc ) return rc;
TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
- dropCell(pPage, pCur->idx, cellSize(pPage, pCell));
- pNext = leafCur.pPage->aCell[leafCur.idx];
- szNext = cellSize(leafCur.pPage, pNext);
+ dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
+ pNext = findCell(leafCur.pPage, leafCur.idx);
+ szNext = cellSizePtr(leafCur.pPage, pNext);
assert( sizeof(tempCell)>=szNext+4 );
insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell);
- put4byte(pPage->aCell[pCur->idx]+2, pgnoChild);
+ put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
rc = balance(pPage);
if( rc ) return rc;
dropCell(leafCur.pPage, leafCur.idx, szNext);
}else{
TRACE(("DELETE: table=%d delete from leaf %d\n",
pCur->pgnoRoot, pPage->pgno));
- dropCell(pPage, pCur->idx, cellSize(pPage, pCell));
+ dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
rc = balance(pPage);
}
moveToRoot(pCur);
rc = sqlite3pager_write(pPage->aData);
if( rc ) return rc;
for(i=0; i<pPage->nCell; i++){
- pCell = pPage->aCell[i];
+ pCell = findCell(pPage, i);
if( !pPage->leaf ){
- rc = clearDatabasePage(pBt, get4byte(&pCell[2]), pPage->pParent, 1);
+ rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
if( rc ) return rc;
}
rc = clearCell(pPage, pCell);
if( rc ) return rc;
}
if( !pPage->leaf ){
- rc = clearDatabasePage(pBt, get4byte(&pPage->aData[6]), pPage->pParent, 1);
+ rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
if( rc ) return rc;
}
if( freePageFlag ){
int nFree;
u16 idx;
int hdr;
+ int nCell;
unsigned char *data;
char range[20];
unsigned char payload[20];
pPage->leafData = (c & PTF_LEAFDATA)!=0;
pPage->leaf = (c & PTF_LEAF)!=0;
pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
+ nCell = get2byte(&data[hdr+3]);
printf("PAGE %d: flags=0x%02x frag=%d parent=%d\n", pgno,
- data[hdr], data[hdr+5],
+ data[hdr], data[hdr+7],
(pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0);
- i = 0;
assert( hdr == (pgno==1 ? 100 : 0) );
- idx = get2byte(&data[hdr+3]);
- while( idx>0 && idx<=pBt->usableSize ){
+ idx = hdr + 12 - pPage->leaf*4;
+ for(i=0; i<nCell; i++){
CellInfo info;
Pgno child;
- unsigned char *pCell = &data[idx];
+ unsigned char *pCell;
int sz;
+ int addr;
- pCell = &data[idx];
- parseCell(pPage, pCell, &info);
+ addr = get2byte(&data[idx + 2*i]);
+ pCell = &data[addr];
+ parseCellPtr(pPage, pCell, &info);
sz = info.nSize;
- sprintf(range,"%d..%d", idx, idx+sz-1);
+ sprintf(range,"%d..%d", addr, addr+sz-1);
if( pPage->leaf ){
child = 0;
}else{
- child = get4byte(&pCell[2]);
+ child = get4byte(pCell);
}
sz = info.nData;
if( !pPage->intKey ) sz += info.nKey;
"cell %2d: i=%-10s chld=%-4d nk=%-4lld nd=%-4d payload=%s\n",
i, range, child, info.nKey, info.nData, payload
);
- if( pPage->isInit && pPage->aCell[i]!=pCell ){
- printf("**** aCell[%d] does not match on prior entry ****\n", i);
- }
- i++;
- idx = get2byte(pCell);
- }
- if( idx!=0 ){
- printf("ERROR: next cell index out of range: %d\n", idx);
}
if( !pPage->leaf ){
- printf("right_child: %d\n", get4byte(&data[hdr+6]));
+ printf("right_child: %d\n", get4byte(&data[hdr+8]));
}
nFree = 0;
i = 0;
printf("ERROR: next freeblock index out of range: %d\n", idx);
}
if( recursive && !pPage->leaf ){
- idx = get2byte(&data[hdr+3]);
- while( idx>0 && idx<pBt->usableSize ){
- unsigned char *pCell = &data[idx];
- sqlite3BtreePageDump(pBt, get4byte(&pCell[2]), 1);
+ for(i=0; i<nCell; i++){
+ unsigned char *pCell = findCell(pPage, i);
+ sqlite3BtreePageDump(pBt, get4byte(pCell), 1);
idx = get2byte(pCell);
}
- sqlite3BtreePageDump(pBt, get4byte(&data[hdr+6]), 1);
+ sqlite3BtreePageDump(pBt, get4byte(&data[hdr+8]), 1);
}
sqlite3pager_unref(data);
fflush(stdout);
aResult[1] = pCur->idx;
aResult[2] = pPage->nCell;
if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
- aResult[3] = cellSize(pPage, pPage->aCell[pCur->idx]);
- aResult[6] = pPage->leaf ? 0 : get4byte(&pPage->aCell[pCur->idx][2]);
+ u8 *pCell = findCell(pPage, pCur->idx);
+ aResult[3] = cellSizePtr(pPage, pCell);
+ aResult[6] = pPage->leaf ? 0 : get4byte(pCell);
}else{
aResult[3] = 0;
aResult[6] = 0;
idx = get2byte(&pPage->aData[idx]);
}
aResult[5] = cnt;
- aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+6]);
+ aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+8]);
return SQLITE_OK;
}
#endif
){
MemPage *pPage;
int i, rc, depth, d2, pgno, cnt;
- int hdr;
+ int hdr, cellStart;
+ int nCell;
u8 *data;
BtCursor cur;
Btree *pBt;
/* Check payload overflow pages
*/
sprintf(zContext, "On tree page %d cell %d: ", iPage, i);
- pCell = pPage->aCell[i];
- parseCell(pPage, pCell, &info);
+ pCell = findCell(pPage,i);
+ parseCellPtr(pPage, pCell, &info);
sz = info.nData;
if( !pPage->intKey ) sz += info.nKey;
if( sz>info.nLocal ){
/* Check sanity of left child page.
*/
if( !pPage->leaf ){
- pgno = get4byte(&pCell[2]);
+ pgno = get4byte(pCell);
d2 = checkTreePage(pCheck,pgno,pPage,zContext,0,0,0,0);
if( i>0 && d2!=depth ){
checkAppendMsg(pCheck, zContext, "Child page depth differs");
}
}
if( !pPage->leaf ){
- pgno = get4byte(&pPage->aData[pPage->hdrOffset+6]);
+ pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
sprintf(zContext, "On page %d at right child: ", iPage);
checkTreePage(pCheck, pgno, pPage, zContext,0,0,0,0);
}
/* Check for complete coverage of the page
*/
- memset(hit, 0, usableSize);
- memset(hit, 1, pPage->hdrOffset+10-4*(pPage->leaf));
data = pPage->aData;
hdr = pPage->hdrOffset;
- for(cnt=0, i=get2byte(&data[hdr+3]); i>0 && i<usableSize && cnt<10000; cnt++){
- int size = cellSize(pPage, &data[i]);
+ memset(hit, 0, usableSize);
+ memset(hit, 1, get2byte(&data[hdr+5]));
+ nCell = get2byte(&data[hdr+3]);
+ cellStart = hdr + 12 - 4*pPage->leaf;
+ for(i=0; i<nCell; i++){
+ int pc = get2byte(&data[cellStart+i*2]);
+ int size = cellSizePtr(pPage, &data[pc]);
int j;
- for(j=i+size-1; j>=i; j--) hit[j]++;
- i = get2byte(&data[i]);
+ for(j=pc+size-1; j>=pc; j--) hit[j]++;
}
for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; cnt++){
int size = get2byte(&data[i+2]);
break;
}
}
- if( cnt!=data[hdr+5] ){
+ if( cnt!=data[hdr+7] ){
sprintf(zMsg, "Fragmented space is %d byte reported as %d on page %d",
- cnt, data[hdr+5], iPage);
+ cnt, data[hdr+7], iPage);
checkAppendMsg(pCheck, zMsg, 0);
}
** must be active for both files.
**
** The size of file pBtFrom may be reduced by this operation.
-** If anything goes wrong, the transaction on pBtTo is rolled back.
+** If anything goes wrong, the transaction on pBtFrom is rolled back.
*/
int sqlite3BtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
int rc = SQLITE_OK;