*/
struct VdbeSorter {
int nWorking; /* Start a new b-tree after this many pages */
+ int nBtree; /* Current size of b-tree contents as PMA */
int nTree; /* Used size of aTree/aIter (power of 2) */
VdbeSorterIter *aIter; /* Array of iterators to merge */
int *aTree; /* Current state of incremental merge */
-
i64 iWriteOff; /* Current write offset within file pTemp1 */
+ i64 iReadOff; /* Current read offset within file pTemp1 */
sqlite3_file *pTemp1; /* PMA file 1 */
- i64 *aOffset; /* Array of PMA offsets for file 1 */
- int nOffset; /* Size of aOffset[] array */
+ int nPMA; /* Number of PMAs stored in pTemp1 */
};
/*
/* Minimum allowable value for the VdbeSorter.nWorking variable */
#define SORTER_MIN_SEGMENT_SIZE 10
-/* Maximum number of segments to merge in a single go */
+/* Maximum number of segments to merge in a single pass. */
#define SORTER_MAX_MERGE_COUNT 16
-/*
-** Append integer iOff to the VdbeSorter.aOffset[] array of the sorter object
-** passed as the second argument. SQLITE_NOMEM is returned if an OOM error
-** is encountered, or SQLITE_OK if no error occurs.
-**
-** TODO: The aOffset[] array may grow indefinitely. Fix this.
-*/
-static int vdbeSorterAppendOffset(sqlite3 *db, VdbeSorter *p, i64 iOff){
- p->aOffset = sqlite3DbReallocOrFree(
- db, p->aOffset, (p->nOffset+1)*sizeof(i64)
- );
- if( !p->aOffset ) return SQLITE_NOMEM;
- p->aOffset[p->nOffset++] = iOff;
- return SQLITE_OK;
-}
-
/*
** Free all memory belonging to the VdbeSorterIter object passed as the second
** argument. All structure fields are set to zero before returning.
int nRec;
int iOff;
- assert( pIter->nAlloc>5 );
nRead = pIter->iEof - pIter->iReadOff;
if( nRead>5 ) nRead = 5;
-
if( nRead<=0 ){
vdbeSorterIterZero(db, pIter);
return SQLITE_OK;
return rc;
}
+static int vdbeSorterWriteVarint(
+ sqlite3_file *pFile,
+ i64 iVal,
+ i64 *piOffset
+){
+ u8 aVarint[9]; /* Buffer large enough for a varint */
+ int nVarint; /* Number of used bytes in varint */
+ int rc; /* Result of write() call */
+
+ nVarint = sqlite3PutVarint(aVarint, iVal);
+ rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset);
+ *piOffset += nVarint;
+
+ return rc;
+}
+
+static int vdbeSorterReadVarint(
+ sqlite3_file *pFile,
+ i64 iEof, /* Total number of bytes in file */
+ i64 *piOffset, /* IN/OUT: Read offset */
+ i64 *piVal /* OUT: Value read from file */
+){
+ u8 aVarint[9]; /* Buffer large enough for a varint */
+ i64 iOff = *piOffset; /* Offset in file to read from */
+ int nRead = 9; /* Number of bytes to read from file */
+ int rc; /* Return code */
+
+ assert( iEof>iOff );
+ if( (iEof-iOff)<nRead ){
+ nRead = iEof-iOff;
+ }
+
+ rc = sqlite3OsRead(pFile, aVarint, nRead, iOff);
+ if( rc==SQLITE_OK ){
+ *piOffset += getVarint(aVarint, (u64 *)piVal);
+ }
+
+ return rc;
+}
+
/*
** Initialize iterator pIter to scan through the PMA stored in file pFile
** starting at offset iStart and ending at offset iEof-1. This function
*/
static int vdbeSorterIterInit(
sqlite3 *db, /* Database handle */
- sqlite3_file *pFile, /* File that the PMA is stored in */
+ VdbeSorter *pSorter, /* Sorter object */
i64 iStart, /* Start offset in pFile */
- i64 iEof, /* 1 byte past the end of the PMA in pFile */
- VdbeSorterIter *pIter /* Iterator to populate */
+ VdbeSorterIter *pIter, /* Iterator to populate */
+ i64 *pnByte /* IN/OUT: Increment this value by PMA size */
){
+ int rc;
+ i64 iEof = pSorter->iWriteOff;
+
assert( iEof>iStart );
assert( pIter->aAlloc==0 );
- pIter->pFile = pFile;
- pIter->iEof = iEof;
+ pIter->pFile = pSorter->pTemp1;
pIter->iReadOff = iStart;
pIter->nAlloc = 128;
pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
- if( !pIter->aAlloc ) return SQLITE_NOMEM;
- return vdbeSorterIterNext(db, pIter);
+ if( !pIter->aAlloc ){
+ rc = SQLITE_NOMEM;
+ }else{
+ i64 nByte;
+ rc = vdbeSorterReadVarint(pSorter->pTemp1, iEof, &pIter->iReadOff, &nByte);
+ *pnByte += nByte;
+ pIter->iEof = pIter->iReadOff + nByte;
+ }
+ if( rc==SQLITE_OK ){
+ rc = vdbeSorterIterNext(db, pIter);
+ }
+ return rc;
}
/*
if( pSorter->pTemp1 ){
sqlite3OsCloseFree(pSorter->pTemp1);
}
- sqlite3DbFree(db, pSorter->aOffset);
sqlite3DbFree(db, pSorter);
pCsr->pSorter = 0;
}
);
}
+
/*
** Write the current contents of the b-tree to a PMA. Return SQLITE_OK
** if successful, or an SQLite error code otherwise.
*/
-static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){
+static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
i64 iWriteOff = pSorter->iWriteOff;
rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
assert( rc!=SQLITE_OK || pSorter->pTemp1 );
assert( pSorter->iWriteOff==0 );
- assert( pSorter->nOffset==0 );
- assert( pSorter->aOffset==0 );
+ assert( pSorter->nPMA==0 );
}
if( rc==SQLITE_OK ){
+ pSorter->nPMA++;
+
+ /* Write a varint containg the size of the PMA in bytes into the file. */
+ assert( pSorter->nBtree>0 );
+
for(
- rc = vdbeSorterAppendOffset(db, pSorter, iWriteOff);
+ rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff);
rc==SQLITE_OK && res==0;
rc = sqlite3BtreeNext(pCsr->pCursor, &res)
){
i64 nKey; /* Size of this key in bytes */
- u8 aVarint[9]; /* Buffer containing varint(nKey) */
- int nVar; /* Number of bytes in aVarint[] used */
- (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey);
- nVar = sqlite3PutVarint(aVarint, nKey);
-
/* Write the size of the record in bytes to the output file */
- rc = sqlite3OsWrite(pSorter->pTemp1, aVarint, nVar, iWriteOff);
- iWriteOff += nVar;
+ (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey);
+ rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff);
/* Make sure the aMalloc[] buffer is large enough for the record */
if( rc==SQLITE_OK && nKey>nMalloc ){
}
}
- if( rc!=SQLITE_OK ) break;
}
+ assert( pSorter->nBtree==(
+ iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree)
+ ));
pSorter->iWriteOff = iWriteOff;
sqlite3DbFree(db, aMalloc);
}
+ pSorter->nBtree = 0;
return rc;
}
** If the current b-tree being constructed is already considered "full",
** a new tree is started.
*/
-int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){
+int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
if( pSorter ){
/* Copy the current contents of the b-tree into a PMA in sorted order.
** Close the currently open b-tree cursor. */
- rc = sorterBtreeToPma(db, pCsr);
+ rc = vdbeSorterBtreeToPMA(db, pCsr);
sqlite3BtreeCloseCursor(p);
if( rc==SQLITE_OK ){
rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p);
}
}
+
+ pSorter->nBtree += sqlite3VarintLen(nKey) + nKey;
}
return rc;
}
sqlite3 *db,
VdbeCursor *pCsr,
int iFirst,
- int *piNext
+ i64 *pnByte /* Sum of bytes in all opened PMAs */
){
VdbeSorter *pSorter = pCsr->pSorter;
int rc = SQLITE_OK;
int i;
- int N = 2;
- int nIter; /* Number of iterators to initialize. */
-
- nIter = pSorter->nOffset - iFirst;
- if( nIter>SORTER_MAX_MERGE_COUNT ){
- nIter = SORTER_MAX_MERGE_COUNT;
- }
- assert( nIter>0 );
- while( N<nIter ) N += N;
-
- /* Allocate aIter[] and aTree[], if required. */
- if( pSorter->aIter==0 ){
- int nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
- pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
- if( !pSorter->aIter ) return SQLITE_NOMEM;
- pSorter->aTree = (int *)&pSorter->aIter[N];
- }
+ i64 nByte = 0;
/* Initialize as many iterators as possible. */
for(i=iFirst;
- rc==SQLITE_OK && i<pSorter->nOffset && (i-iFirst)<SORTER_MAX_MERGE_COUNT;
+ rc==SQLITE_OK && i<pSorter->nPMA && (i-iFirst)<SORTER_MAX_MERGE_COUNT;
i++
){
- int iIter = i - iFirst;
-
- if( rc==SQLITE_OK ){
- VdbeSorterIter *pIter = &pSorter->aIter[iIter];
- i64 iStart = pSorter->aOffset[i];
- i64 iEof;
- if( i==(pSorter->nOffset-1) ){
- iEof = pSorter->iWriteOff;
- }else{
- iEof = pSorter->aOffset[i+1];
- }
- rc = vdbeSorterIterInit(db, pSorter->pTemp1, iStart, iEof, pIter);
- }
+ VdbeSorterIter *pIter = &pSorter->aIter[i - iFirst];
+ rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
+ pSorter->iReadOff = pIter->iEof;
}
- *piNext = i;
-
assert( i>iFirst );
- pSorter->nTree = N;
/* Populate the aTree[] array. */
- for(i=N-1; rc==SQLITE_OK && i>0; i--){
+ for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
rc = vdbeSorterDoCompare(pCsr, i);
}
+ *pnByte = nByte;
return rc;
}
int rc; /* Return code */
sqlite3_file *pTemp2 = 0; /* Second temp file to use */
i64 iWrite2 = 0; /* Write offset for pTemp2 */
+ int nIter; /* Number of iterators used */
+ int nByte; /* Bytes of space required for aIter/aTree */
+ int N = 2; /* Power of 2 >= nIter */
assert( pSorter );
/* Write the current b-tree to a PMA. Close the b-tree cursor. */
- rc = sorterBtreeToPma(db, pCsr);
+ rc = vdbeSorterBtreeToPMA(db, pCsr);
sqlite3BtreeCloseCursor(pCsr->pCursor);
if( rc!=SQLITE_OK ) return rc;
- if( pSorter->nOffset==0 ){
+ if( pSorter->nPMA==0 ){
*pbEof = 1;
return SQLITE_OK;
}
- while( rc==SQLITE_OK ){
- int iNext = 0; /* Index of next segment to open */
- int iNew = 0; /* Index of new, merged, PMA */
+ /* Allocate space for aIter[] and aTree[]. */
+ nIter = pSorter->nPMA;
+ if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
+ assert( nIter>0 );
+ while( N<nIter ) N += N;
+ nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
+ pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
+ if( !pSorter->aIter ) return SQLITE_NOMEM;
+ pSorter->aTree = (int *)&pSorter->aIter[N];
+ pSorter->nTree = N;
- do {
+ do {
+ int iNew = 0; /* Index of new, merged, PMA */
- /* This call configures iterators for merging. */
- rc = vdbeSorterInitMerge(db, pCsr, iNext, &iNext);
- assert( iNext>0 );
+ for(iNew=0; rc==SQLITE_OK; iNew++){
+ i64 nWrite; /* Number of bytes in new PMA */
+
+ /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
+ ** initialize an iterator for each of them and break out of the loop.
+ ** These iterators will be incrementally merged as the VDBE layer calls
+ ** sqlite3VdbeSorterNext().
+ **
+ ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
+ ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
+ ** are merged into a single PMA that is written to file pTemp2.
+ */
+ rc = vdbeSorterInitMerge(db, pCsr, iNew*SORTER_MAX_MERGE_COUNT, &nWrite);
assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
+ if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
+ break;
+ }
- if( rc==SQLITE_OK && (iNew>0 || iNext<pSorter->nOffset) ){
- int bEof = 0;
+ /* Open the second temp file, if it is not already open. */
+ if( pTemp2==0 ){
+ assert( iWrite2==0 );
+ rc = vdbeSorterOpenTempFile(db, &pTemp2);
+ }
- if( pTemp2==0 ){
- rc = vdbeSorterOpenTempFile(db, &pTemp2);
- }
- if( rc==SQLITE_OK ){
- pSorter->aOffset[iNew] = iWrite2;
- }
+ if( rc==SQLITE_OK ){
+ rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2);
+ }
+ if( rc==SQLITE_OK ){
+ int bEof = 0;
while( rc==SQLITE_OK && bEof==0 ){
int nByte;
VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
}
}
- iNew++;
}
- }while( rc==SQLITE_OK && iNext<pSorter->nOffset );
+ }
- if( iNew==0 ){
+ if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
break;
}else{
sqlite3_file *pTmp = pSorter->pTemp1;
- pSorter->nOffset = iNew;
+ pSorter->nPMA = iNew;
pSorter->pTemp1 = pTemp2;
pTemp2 = pTmp;
pSorter->iWriteOff = iWrite2;
+ pSorter->iReadOff = 0;
iWrite2 = 0;
}
- }
+ }while( rc==SQLITE_OK );
if( pTemp2 ){
sqlite3OsCloseFree(pTemp2);
}
-
*pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
return rc;
}