** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: btree.c,v 1.117 2004/05/09 00:40:52 drh Exp $
+** $Id: btree.c,v 1.118 2004/05/09 01:35:06 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
u8 leaf; /* True if leaf flag is set */
u8 zeroData; /* True if zero data flag is set */
u8 hdrOffset; /* 100 for page 1. 0 otherwise */
- u8 needRelink; /* True if need to run relinkCellList() */
int idxParent; /* Index in pParent->aCell[] of this node */
int nFree; /* Number of free bytes on the page */
int nCell; /* Number of entries on this page */
return n + nPayload;
}
-/*
-** Do sanity checking on a page. Throw an exception if anything is
-** not right.
-**
-** This routine is used for internal error checking only. It is omitted
-** from most builds.
-*/
-#if defined(BTREE_DEBUG) && !defined(NDEBUG) && 0
-static void _pageIntegrity(MemPage *pPage){
- int pageSize;
- u8 *data;
- int i, idx, c, pc, hdr, nFree;
- u8 used[MX_PAGE_SIZE];
-
- pageSize = pPage->pBt->pageSize;
- assert( pPage->aData==&((unsigned char*)pPage)[-pageSize] );
- hdr = pPage->hdrOffset;
- assert( hdr==(pPage->pgno==1 ? 100 : 0) );
- assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
- c = pPage->aData[hdr];
- if( pPage->isInit ){
- assert( pPage->leaf == ((c & PTF_LEAF)!=0) );
- assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) );
- assert( pPage->intKey == ((c & PTF_INTKEY)!=0) );
- }
- data = pPage->aData;
- memset(used, 0, pageSize);
- for(i=0; i<hdr+10-pPage->leaf*4; i++) used[i] = 1;
- nFree = 0;
- pc = get2byte(&data[hdr+1]);
- while( pc ){
- int size;
- assert( pc>0 && pc<pageSize-4 );
- size = get2byte(&data[pc+2]);
- assert( pc+size<=pageSize );
- nFree += size;
- for(i=pc; i<pc+size; i++){
- assert( used[i]==0 );
- used[i] = 1;
- }
- pc = get2byte(&data[pc]);
- }
- assert( pPage->isInit==0 || pPage->nFree==nFree+data[hdr+5] );
- idx = 0;
- pc = get2byte(&data[hdr+3]);
- while( pc ){
- int size;
- assert( pPage->isInit==0 || idx<pPage->nCell );
- assert( pc>0 && pc<pageSize-4 );
- assert( pPage->isInit==0 || pPage->aCell[idx]==&data[pc] );
- size = cellSize(pPage, &data[pc]);
- assert( pc+size<=pageSize );
- for(i=pc; i<pc+size; i++){
- assert( used[i]==0 );
- used[i] = 1;
- }
- pc = get2byte(&data[pc]);
- idx++;
- }
- assert( idx==pPage->nCell );
- nFree = 0;
- for(i=0; i<pageSize; i++){
- assert( used[i]<=1 );
- if( used[i]==0 ) nFree++;
- }
- assert( nFree==data[hdr+5] );
-}
-#define pageIntegrity(X) _pageIntegrity(X)
-#else
-# define pageIntegrity(X)
-#endif
-
/*
** Defragment the page given. All Cells are moved to the
** beginning of the page and all free space is collected
assert( sqlite3pager_iswriteable(pPage->aData) );
assert( pPage->pBt!=0 );
assert( pPage->pBt->pageSize <= MX_PAGE_SIZE );
- assert( !pPage->needRelink );
- assert( !pPage->isOverfull );
oldPage = pPage->aData;
hdr = pPage->hdrOffset;
addr = 3+hdr;
size = cellSize(pPage, &oldPage[pc]);
memcpy(&newPage[n], &oldPage[pc], size);
put2byte(&newPage[addr],n);
- assert( pPage->aCell[i]==&oldPage[pc] );
pPage->aCell[i++] = &oldPage[n];
- addr = n;
n += size;
+ addr = pc;
pc = get2byte(&oldPage[pc]);
}
assert( i==pPage->nCell );
assert( pParent==0 || pParent->pBt==pPage->pBt );
assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
assert( pPage->aData == &((unsigned char*)pPage)[-pPage->pBt->pageSize] );
- assert( pPage->pParent==0 || pPage->pParent==pParent );
- if( pPage->pParent==0 && pParent!=0 ){
- pPage->pParent = pParent;
+ assert( pPage->isInit==0 || pPage->pParent==pParent );
+ if( pPage->isInit ) return SQLITE_OK;
+ assert( pPage->pParent==0 );
+ pPage->pParent = pParent;
+ if( pParent ){
sqlite3pager_ref(pParent->aData);
}
- if( pPage->isInit ) return SQLITE_OK;
pPage->nCell = pPage->nCellAlloc = 0;
assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
hdr = pPage->hdrOffset;
pPage->zeroData = (c & PTF_ZERODATA)!=0;
pPage->leaf = (c & PTF_LEAF)!=0;
pPage->isOverfull = 0;
- pPage->needRelink = 0;
pPage->idxShift = 0;
pageSize = pPage->pBt->pageSize;
}
pPage->isInit = 1;
- pageIntegrity(pPage);
return SQLITE_OK;
}
int hdr = pPage->hdrOffset;
int first;
- assert( sqlite3pager_pagenumber(data)==pPage->pgno );
- assert( &data[pBt->pageSize] == (unsigned char*)pPage );
assert( sqlite3pager_iswriteable(data) );
memset(&data[hdr], 0, pBt->pageSize - hdr);
data[hdr] = flags;
pPage->leaf = (flags & PTF_LEAF)!=0;
pPage->zeroData = (flags & PTF_ZERODATA)!=0;
pPage->hdrOffset = hdr;
- pPage->isOverfull = 0;
- pPage->needRelink = 0;
- pPage->idxShift = 0;
- pPage->isInit = 1;
- pageIntegrity(pPage);
}
/*
*/
static void pageDestructor(void *pData){
MemPage *pPage = (MemPage*)&((char*)pData)[SQLITE_PAGE_SIZE];
- assert( pPage->isInit==0 || pPage->needRelink==0 );
if( pPage->pParent ){
MemPage *pParent = pPage->pParent;
pPage->pParent = 0;
BtCursor *pCur;
for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
MemPage *pPage = pCur->pPage;
- if( pPage /* && !pPage->isInit */ ){
- pageIntegrity(pPage);
+ if( pPage && !pPage->isInit ){
releasePage(pPage);
pCur->pPage = 0;
pCur->isValid = 0;
}
}
-#ifdef SQLITE_TEST
-/*
-** Print debugging information about all cursors to standard output.
-*/
-void sqlite3BtreeCursorList(Btree *pBt){
- BtCursor *pCur;
- for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
- MemPage *pPage = pCur->pPage;
- char *zMode = pCur->wrFlag ? "rw" : "ro";
- printf("CURSOR %08x rooted at %4d(%s) currently at %d.%d%s\n",
- (int)pCur, pCur->pgnoRoot, zMode,
- pPage ? pPage->pgno : 0, pCur->idx,
- pCur->isValid ? "" : " eof"
- );
- }
-}
-#endif
-
/*
** Rollback the transaction in progress. All cursors will be
** invalided by this operation. Any attempt to use a cursor
*pSize = 0;
}else{
pPage = pCur->pPage;
- pageIntegrity(pPage);
assert( pPage!=0 );
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
cell = pPage->aCell[pCur->idx];
assert( pCur->isValid );
pBt = pCur->pBt;
pPage = pCur->pPage;
- pageIntegrity(pPage);
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
aPayload = pPage->aCell[pCur->idx];
aPayload += 2; /* Skip the next cell index */
assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
pBt = pCur->pBt;
pPage = pCur->pPage;
- pageIntegrity(pPage);
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
assert( pPage->intKey==0 );
aPayload = pPage->aCell[pCur->idx];
pPage = pCur->pPage;
assert( pPage!=0 );
assert( pPage->isInit );
- pageIntegrity(pPage);
if( pPage->zeroData ){
*pSize = 0;
}else{
assert( pCur->isValid );
rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
if( rc ) return rc;
- pageIntegrity(pNewPage);
pNewPage->idxParent = pCur->idx;
pOldPage = pCur->pPage;
pOldPage->idxShift = 0;
*/
static int isRootPage(MemPage *pPage){
MemPage *pParent = pPage->pParent;
- if( pParent==0 ) return 1;
- if( pParent->pgno>1 ) return 0;
- if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
+ assert( pParent==0 || pParent->isInit );
+ if( pParent==0 || (pParent->pgno==1 && pParent->nCell==0) ) return 1;
return 0;
}
pPage = pCur->pPage;
assert( pPage!=0 );
assert( !isRootPage(pPage) );
- pageIntegrity(pPage);
pParent = pPage->pParent;
assert( pParent!=0 );
- pageIntegrity(pParent);
idxParent = pPage->idxParent;
sqlite3pager_ref(pParent->aData);
oldPgno = pPage->pgno;
return rc;
}
releasePage(pCur->pPage);
- pageIntegrity(pRoot);
pCur->pPage = pRoot;
pCur->idx = 0;
if( pRoot->nCell==0 && !pRoot->leaf ){
int c = -1; /* pRes return if table is empty must be -1 */
lwr = 0;
upr = pPage->nCell-1;
- pageIntegrity(pPage);
while( lwr<=upr ){
void *pCellKey;
u64 nCellKey;
if( pgno==0 ) return;
assert( pBt->pPager!=0 );
aData = sqlite3pager_lookup(pBt->pPager, pgno);
- if( aData ){
- pThis = (MemPage*)&aData[pBt->pageSize];
- if( pThis->isInit ){
- if( pThis->pParent!=pNewParent ){
- if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
- pThis->pParent = pNewParent;
- if( pNewParent ) sqlite3pager_ref(pNewParent->aData);
- }
- pThis->idxParent = idx;
- }
+ pThis = (MemPage*)&aData[pBt->pageSize];
+ if( pThis && pThis->isInit ){
+ if( pThis->pParent!=pNewParent ){
+ if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
+ pThis->pParent = pNewParent;
+ if( pNewParent ) sqlite3pager_ref(pNewParent->aData);
+ }
+ pThis->idxParent = idx;
sqlite3pager_unref(aData);
}
}
**
** "sz" must be the number of bytes in the cell.
**
-** Try to maintain the integrity of the linked list of cells. But if
-** the cell being inserted does not fit on the page, this will not be
-** possible. If the linked list is not maintained, then just update
-** pPage->aCell[] and set the pPage->needRelink flag so that we will
-** know to rebuild the linked list later.
+** Do not bother maintaining the integrity of the linked list of Cells.
+** Only the pPage->aCell[] array is important. The relinkCellList()
+** routine will be called soon after this routine in order to rebuild
+** the linked list.
*/
static void dropCell(MemPage *pPage, int idx, int sz){
int j, pc;
- u8 *data;
assert( idx>=0 && idx<pPage->nCell );
assert( sz==cellSize(pPage, pPage->aCell[idx]) );
assert( sqlite3pager_iswriteable(pPage->aData) );
assert( pPage->aCell[idx]>=pPage->aData );
assert( pPage->aCell[idx]<&pPage->aData[pPage->pBt->pageSize-sz] );
- data = pPage->aData;
- pc = Addr(pPage->aCell[idx]) - Addr(data);
+ pc = Addr(pPage->aCell[idx]) - Addr(pPage->aData);
assert( pc>pPage->hdrOffset && pc+sz<=pPage->pBt->pageSize );
freeSpace(pPage, pc, sz);
for(j=idx; j<pPage->nCell-1; j++){
pPage->aCell[j] = pPage->aCell[j+1];
}
pPage->nCell--;
- if( !pPage->isOverfull && !pPage->needRelink ){
- u8 *pPrev;
- if( idx==0 ){
- pPrev = &data[pPage->hdrOffset+3];
- }else{
- pPrev = pPage->aCell[idx-1];
- }
- if( idx<pPage->nCell ){
- pc = Addr(pPage->aCell[idx]) - Addr(data);
- }else{
- pc = 0;
- }
- put2byte(pPrev, pc);
- pageIntegrity(pPage);
- }else{
- pPage->needRelink = 1;
- }
pPage->idxShift = 1;
}
** will not fit, then just make pPage->aCell[i] point to the content
** and set pPage->isOverfull.
**
-** Try to maintain the integrity of the linked list of cells. But if
-** the cell being inserted does not fit on the page, this will not be
-** possible. If the linked list is not maintained, then just update
-** pPage->aCell[] and set the pPage->needRelink flag so that we will
-** know to rebuild the linked list later.
+** Do not bother maintaining the integrity of the linked list of Cells.
+** Only the pPage->aCell[] array is important. The relinkCellList()
+** routine will be called soon after this routine in order to rebuild
+** the linked list.
*/
static void insertCell(MemPage *pPage, int i, unsigned char *pCell, int sz){
int idx, j;
assert( i>=0 && i<=pPage->nCell );
assert( sz==cellSize(pPage, pCell) );
assert( sqlite3pager_iswriteable(pPage->aData) );
- idx = pPage->needRelink ? 0 : allocateSpace(pPage, sz);
+ idx = allocateSpace(pPage, sz);
resizeCellArray(pPage, pPage->nCell+1);
for(j=pPage->nCell; j>i; j--){
pPage->aCell[j] = pPage->aCell[j-1];
pPage->isOverfull = 1;
pPage->aCell[i] = pCell;
}else{
- u8 *data = pPage->aData;
- memcpy(&data[idx], pCell, sz);
- pPage->aCell[i] = &data[idx];
- }
- if( !pPage->isOverfull && !pPage->needRelink ){
- u8 *pPrev;
- int pc;
- if( i==0 ){
- pPrev = &pPage->aData[pPage->hdrOffset+3];
- }else{
- pPrev = pPage->aCell[i-1];
- }
- pc = get2byte(pPrev);
- put2byte(pPrev, idx);
- put2byte(pPage->aCell[i], pc);
- pageIntegrity(pPage);
- }else{
- pPage->needRelink = 1;
+ memcpy(&pPage->aData[idx], pCell, sz);
+ pPage->aCell[i] = &pPage->aData[idx];
}
pPage->idxShift = 1;
}
static void relinkCellList(MemPage *pPage){
int i, idxFrom;
assert( sqlite3pager_iswriteable(pPage->aData) );
- if( !pPage->needRelink ) return;
idxFrom = pPage->hdrOffset+3;
for(i=0; i<pPage->nCell; i++){
int idx = Addr(pPage->aCell[i]) - Addr(pPage->aData);
idxFrom = idx;
}
put2byte(&pPage->aData[idxFrom], 0);
- pPage->needRelink = 0;
}
/*
**
** Over this operation completes, the meta data for pFrom is zeroed.
*/
-static void movePage(MemPage *pTo, MemPage *pFrom){
+static void copyPage(MemPage *pTo, MemPage *pFrom){
uptr from, to;
int i;
int pageSize;
}
}
-/*
-** For debugging...
-*/
-#if 1
-# define TRACE(X) if( pager3_refinfo_enable ) printf X
-#else
-# define TRACE(X)
-#endif
-
/*
** The following parameters determine how many adjacent pages get involved
** in a balancing operation. NN is the number of neighbors on either side
int usableSpace; /* Bytes in pPage beyond the header */
int pageFlags; /* Value of pPage->aData[0] */
int subtotal; /* Subtotal of bytes in cells on one page */
- MemPage *extraUnref = 0; /* Unref this page if not zero */
MemPage *apOld[NB]; /* pPage and up to two siblings */
Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
** it means this page is the root page and special rules apply.
*/
pParent = pPage->pParent;
- TRACE(("BALANCE: begin page %d\n", pPage->pgno));
if( pParent==0 ){
Pgno pgnoChild;
MemPage *pChild;
if( pPage->leaf ){
/* The table is completely empty */
relinkCellList(pPage);
- TRACE(("BALANCE: empty table\n"));
}else{
/* The root page is empty but has one child. Transfer the
** information from that one child into the root page if it
cellSize(pChild, pChild->aCell[i]));
}
freePage(pChild);
- TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
}else{
/* The child has more information that will fit on the root.
** The tree is already balanced. Do nothing. */
- TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
}
}else{
memcpy(pPage, pChild, pBt->pageSize);
rc = initPage(pPage, 0);
assert( rc==SQLITE_OK );
freePage(pChild);
- TRACE(("BALANCE: transfer child %d into root\n", pChild->pgno));
}
reparentChildPages(pPage);
releasePage(pChild);
/* It is OK for the root page to be less than half full.
*/
relinkCellList(pPage);
- TRACE(("BALANCE: Root page is underfull but that is ok\n"));
return SQLITE_OK;
}
/*
rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno);
if( rc ) return rc;
assert( sqlite3pager_iswriteable(pChild->aData) );
- movePage(pChild, pPage);
+ copyPage(pChild, pPage);
assert( pChild->aData[0]==pPage->aData[pPage->hdrOffset] );
pChild->pParent = pPage;
- sqlite3pager_ref(pPage->aData);
pChild->idxParent = 0;
+ sqlite3pager_ref(pPage->aData);
pChild->isOverfull = 1;
zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
put4byte(&pPage->aData[pPage->hdrOffset+6], pChild->pgno);
pParent = pPage;
pPage = pChild;
- extraUnref = pChild;
- TRACE(("BALANCE: Copy root into %d and blance\n", pPage->pgno));
+ initPage(pParent, 0);
}
rc = sqlite3pager_write(pParent->aData);
if( rc ) return rc;
for(i=0; i<nOld; i++){
MemPage *p = apCopy[i] = (MemPage*)&aCopy[i+1][-sizeof(MemPage)];
p->aData = &((u8*)p)[-pBt->pageSize];
- p->aCell = 0;
- p->hdrOffset = 0;
- movePage(p, apOld[i]);
+ copyPage(p, apOld[i]);
}
/*
nCell++;
}
if( i<nOld-1 ){
- szCell[nCell] = cellSize(pParent, apDiv[i]);
- memcpy(aTemp[i], apDiv[i], szCell[nCell]);
+ szCell[nCell] = cellSize(pParent, apDiv[i]) - leafCorrection;
+ memcpy(aTemp[i], apDiv[i], szCell[nCell] + leafCorrection);
apCell[nCell] = &aTemp[i][leafCorrection];
dropCell(pParent, nxDiv, szCell[nCell]);
- szCell[nCell] -= leafCorrection;
- assert( get4byte(&aTemp[i][2])==pgnoOld[i] );
+ assert( get4byte(&apCell[nCell][2])==pgnoOld[i] );
if( !pOld->leaf ){
assert( leafCorrection==0 );
/* The right pointer of the child page pOld becomes the left
assert( pPage->pgno>1 );
pageFlags = pPage->aData[0];
for(i=0; i<k; i++){
- MemPage *pNew;
if( i<nOld ){
- pNew = apNew[i] = apOld[i];
+ apNew[i] = apOld[i];
pgnoNew[i] = pgnoOld[i];
apOld[i] = 0;
- sqlite3pager_write(pNew->aData);
+ sqlite3pager_write(apNew[i]->aData);
}else{
- rc = allocatePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1]);
+ rc = allocatePage(pBt, &apNew[i], &pgnoNew[i], pgnoNew[i-1]);
if( rc ) goto balance_cleanup;
- apNew[i] = pNew;
}
nNew++;
- zeroPage(pNew, pageFlags);
+ zeroPage(apNew[i], pageFlags);
+ apNew[i]->isInit = 1;
}
/* Free any old pages that were not reused as new pages.
while( i<nOld ){
rc = freePage(apOld[i]);
if( rc ) goto balance_cleanup;
- releasePage(apOld[i]);
+ sqlite3pager_unref(apOld[i]->aData);
apOld[i] = 0;
i++;
}
/*
** balance the parent page.
*/
- assert( pPage->isInit );
- assert( pParent->isInit );
- pageIntegrity(pPage);
rc = balance(pParent);
-
/*
** Cleanup before returning.
for(i=0; i<nOld; i++){
releasePage(apOld[i]);
if( apCopy[i] ){
+ releasePage(apCopy[i]->pParent);
sqliteFree(apCopy[i]->aCell);
}
}
releasePage(apNew[i]);
}
releasePage(pParent);
- releasePage(extraUnref);
- TRACE(("BALANCE: Finished with %d\n", pPage->pgno));
return rc;
}
unsigned char *pNext;
int szNext;
int notUsed;
- unsigned char tempbuf[4];
getTempCursor(pCur, &leafCur);
rc = sqlite3BtreeNext(&leafCur, ¬Used);
if( rc!=SQLITE_OK ){
dropCell(pPage, pCur->idx, cellSize(pPage, pCell));
pNext = leafCur.pPage->aCell[leafCur.idx];
szNext = cellSize(leafCur.pPage, pNext);
- memcpy(tempbuf, &pNext[-2], 4);
- put4byte(&pNext[-2], pgnoChild);
insertCell(pPage, pCur->idx, &pNext[-4], szNext+4);
+ put4byte(&pNext[-2], pgnoChild);
rc = balance(pPage);
if( rc ) return rc;
- memcpy(&pNext[-2], tempbuf, 4);
dropCell(leafCur.pPage, leafCur.idx, szNext);
rc = balance(leafCur.pPage);
releaseTempCursor(&leafCur);
pPage->intKey = (c & PTF_INTKEY)!=0;
pPage->zeroData = (c & PTF_ZERODATA)!=0;
pPage->leaf = (c & PTF_LEAF)!=0;
- printf("PAGE %d: flags=0x%02x frag=%d parent=%d\n", pgno,
- data[hdr], data[hdr+5],
- (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0);
+ printf("PAGE %d: flags=0x%02x frag=%d\n", pgno,
+ data[hdr], data[hdr+5]);
i = 0;
assert( hdr == (pgno==1 ? 100 : 0) );
idx = get2byte(&data[hdr+3]);
**
** This routine is used for testing and debugging only.
*/
-int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult){
+int sqlite3BtreeCursorDump(BtCursor *pCur, int *aResult){
int cnt, idx;
MemPage *pPage = pCur->pPage;
-
- pageIntegrity(pPage);
assert( pPage->isInit );
aResult[0] = sqlite3pager_pagenumber(pPage->aData);
assert( aResult[0]==pPage->pgno );