From: dan Date: Sat, 6 Aug 2011 12:01:58 +0000 (+0000) Subject: In temp files used for merge sorting, store the size of each packed-memory-array... X-Git-Tag: version-3.7.8~76^2~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1e74e602ec7ed82b3076d48e758e4aff9fe58a7b;p=thirdparty%2Fsqlite.git In temp files used for merge sorting, store the size of each packed-memory-array at the start of the array itself. This is to avoid having to store the offsets of all arrays in the (potentially very large) file in main-memory. FossilOrigin-Name: 8051c1767c4386b0f14a66742d9fac41e001eb07 --- diff --git a/manifest b/manifest index 6d21d1d0ea..04da70e7ef 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Minor\sinternal\schanges\sto\svdbesort.c.\sAlso,\sdefault\sto\smerging\slists\stogether\s16\sat\sa\stime. -D 2011-08-05T11:49:12.597 +C In\stemp\sfiles\sused\sfor\smerge\ssorting,\sstore\sthe\ssize\sof\seach\spacked-memory-array\sat\sthe\sstart\sof\sthe\sarray\sitself.\sThis\sis\sto\savoid\shaving\sto\sstore\sthe\soffsets\sof\sall\sarrays\sin\sthe\s(potentially\svery\slarge)\sfile\sin\smain-memory. +D 2011-08-06T12:01:58.831 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -238,14 +238,14 @@ F src/update.c 74a6cfb34e9732c1e2a86278b229913b4b51eeec F src/utf.c c53eb7404b3eb5c1cbb5655c6a7a0e0ce6bd50f0 F src/util.c 0f33bbbdfcc4a2d8cf20c3b2a16ffc3b57c58a70 F src/vacuum.c 05513dca036a1e7848fe18d5ed1265ac0b32365e -F src/vdbe.c 379ccaa6e03797e08aadb1ae6b0495cedff69209 +F src/vdbe.c ec7b04557d0849d835c4b1b95b463c2c470b60f8 F src/vdbe.h 5cf09e7ee8a3f7d93bc51f196a96550786afe7a1 -F src/vdbeInt.h 9e38e4f866faa9b25e30a1712c3ec1f489097ca1 +F src/vdbeInt.h de75338edfafb812f5bf7f1b3881cbc7256b3c17 F src/vdbeapi.c 11dc47987abacb76ad016dcf5abc0dc422482a98 F src/vdbeaux.c 8fb978eb73a97b34d352dd3ef3bff35b1b3fa7e9 F src/vdbeblob.c f024f0bf420f36b070143c32b15cc7287341ffd3 F src/vdbemem.c 0498796b6ffbe45e32960d6a1f5adfb6e419883b -F src/vdbesort.c f17fa625dbe19bfb8f0a0cb728cf9d73cab6ed1e +F src/vdbesort.c d7739da903c6eb41b864939b2e4a34288167f031 F src/vdbetrace.c 5d0dc3d5fd54878cc8d6d28eb41deb8d5885b114 F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582 F src/wal.c 0c70ad7b1cac6005fa5e2cbefd23ee05e391c290 @@ -954,7 +954,7 @@ F tool/symbols.sh caaf6ccc7300fd43353318b44524853e222557d5 F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262 -P db8518cab8e329b1dbe4cd6c81b21ef3ea69fcb1 -R 82f4652664dbb6f6efbe2830f0e7593b +P 9ddc324a34dbf97acef92eef21f8a35f63db4c5b +R fa1c073fe4f821491ae9f0a1c071e6ef U dan -Z 838a8014a1d3a0c9d59ff3654d53daf0 +Z 428b5e1af6502133e8ab01b3dcc7e84b diff --git a/manifest.uuid b/manifest.uuid index 7f82740716..c613abfb70 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9ddc324a34dbf97acef92eef21f8a35f63db4c5b \ No newline at end of file +8051c1767c4386b0f14a66742d9fac41e001eb07 \ No newline at end of file diff --git a/src/vdbe.c b/src/vdbe.c index c4a0ca2519..97edf90f05 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -4373,8 +4373,6 @@ case OP_IdxInsert: { /* in2 */ assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); - rc = sqlite3VdbeSorterWrite(db, pC); - if( rc!=SQLITE_OK ) goto abort_due_to_error; pIn2 = &aMem[pOp->p2]; assert( pIn2->flags & MEM_Blob ); pCrsr = pC->pCursor; @@ -4384,10 +4382,13 @@ case OP_IdxInsert: { /* in2 */ if( rc==SQLITE_OK ){ nKey = pIn2->n; zKey = pIn2->z; - rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, - ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) - ); - assert( pC->deferredMoveto==0 ); + rc = sqlite3VdbeSorterWrite(db, pC, nKey); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, + ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) + ); + assert( pC->deferredMoveto==0 ); + } pC->cacheStatus = CACHE_STALE; } } diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 7f53acde9d..b2f5ef661f 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -393,7 +393,7 @@ int sqlite3VdbeFrameRestore(VdbeFrame *); void sqlite3VdbeMemStoreType(Mem *pMem); int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *); -int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *); +int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, int); void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *); int sqlite3VdbeSorterRowkey(sqlite3 *, VdbeCursor *, Mem *); diff --git a/src/vdbesort.c b/src/vdbesort.c index fb22e24c04..6f16b02bd0 100644 --- a/src/vdbesort.c +++ b/src/vdbesort.c @@ -89,14 +89,14 @@ typedef struct VdbeSorterIter VdbeSorterIter; */ struct VdbeSorter { int nWorking; /* Start a new b-tree after this many pages */ + int nBtree; /* Current size of b-tree contents as PMA */ int nTree; /* Used size of aTree/aIter (power of 2) */ VdbeSorterIter *aIter; /* Array of iterators to merge */ int *aTree; /* Current state of incremental merge */ - i64 iWriteOff; /* Current write offset within file pTemp1 */ + i64 iReadOff; /* Current read offset within file pTemp1 */ sqlite3_file *pTemp1; /* PMA file 1 */ - i64 *aOffset; /* Array of PMA offsets for file 1 */ - int nOffset; /* Size of aOffset[] array */ + int nPMA; /* Number of PMAs stored in pTemp1 */ }; /* @@ -116,25 +116,9 @@ struct VdbeSorterIter { /* Minimum allowable value for the VdbeSorter.nWorking variable */ #define SORTER_MIN_SEGMENT_SIZE 10 -/* Maximum number of segments to merge in a single go */ +/* Maximum number of segments to merge in a single pass. */ #define SORTER_MAX_MERGE_COUNT 16 -/* -** Append integer iOff to the VdbeSorter.aOffset[] array of the sorter object -** passed as the second argument. SQLITE_NOMEM is returned if an OOM error -** is encountered, or SQLITE_OK if no error occurs. -** -** TODO: The aOffset[] array may grow indefinitely. Fix this. -*/ -static int vdbeSorterAppendOffset(sqlite3 *db, VdbeSorter *p, i64 iOff){ - p->aOffset = sqlite3DbReallocOrFree( - db, p->aOffset, (p->nOffset+1)*sizeof(i64) - ); - if( !p->aOffset ) return SQLITE_NOMEM; - p->aOffset[p->nOffset++] = iOff; - return SQLITE_OK; -} - /* ** Free all memory belonging to the VdbeSorterIter object passed as the second ** argument. All structure fields are set to zero before returning. @@ -156,10 +140,8 @@ static int vdbeSorterIterNext( int nRec; int iOff; - assert( pIter->nAlloc>5 ); nRead = pIter->iEof - pIter->iReadOff; if( nRead>5 ) nRead = 5; - if( nRead<=0 ){ vdbeSorterIterZero(db, pIter); return SQLITE_OK; @@ -192,6 +174,46 @@ static int vdbeSorterIterNext( return rc; } +static int vdbeSorterWriteVarint( + sqlite3_file *pFile, + i64 iVal, + i64 *piOffset +){ + u8 aVarint[9]; /* Buffer large enough for a varint */ + int nVarint; /* Number of used bytes in varint */ + int rc; /* Result of write() call */ + + nVarint = sqlite3PutVarint(aVarint, iVal); + rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset); + *piOffset += nVarint; + + return rc; +} + +static int vdbeSorterReadVarint( + sqlite3_file *pFile, + i64 iEof, /* Total number of bytes in file */ + i64 *piOffset, /* IN/OUT: Read offset */ + i64 *piVal /* OUT: Value read from file */ +){ + u8 aVarint[9]; /* Buffer large enough for a varint */ + i64 iOff = *piOffset; /* Offset in file to read from */ + int nRead = 9; /* Number of bytes to read from file */ + int rc; /* Return code */ + + assert( iEof>iOff ); + if( (iEof-iOff)iWriteOff; + assert( iEof>iStart ); assert( pIter->aAlloc==0 ); - pIter->pFile = pFile; - pIter->iEof = iEof; + pIter->pFile = pSorter->pTemp1; pIter->iReadOff = iStart; pIter->nAlloc = 128; pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc); - if( !pIter->aAlloc ) return SQLITE_NOMEM; - return vdbeSorterIterNext(db, pIter); + if( !pIter->aAlloc ){ + rc = SQLITE_NOMEM; + }else{ + i64 nByte; + rc = vdbeSorterReadVarint(pSorter->pTemp1, iEof, &pIter->iReadOff, &nByte); + *pnByte += nByte; + pIter->iEof = pIter->iReadOff + nByte; + } + if( rc==SQLITE_OK ){ + rc = vdbeSorterIterNext(db, pIter); + } + return rc; } /* @@ -298,7 +332,6 @@ void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){ if( pSorter->pTemp1 ){ sqlite3OsCloseFree(pSorter->pTemp1); } - sqlite3DbFree(db, pSorter->aOffset); sqlite3DbFree(db, pSorter); pCsr->pSorter = 0; } @@ -318,11 +351,12 @@ static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){ ); } + /* ** Write the current contents of the b-tree to a PMA. Return SQLITE_OK ** if successful, or an SQLite error code otherwise. */ -static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){ +static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){ int rc = SQLITE_OK; /* Return code */ VdbeSorter *pSorter = pCsr->pSorter; i64 iWriteOff = pSorter->iWriteOff; @@ -338,27 +372,26 @@ static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){ rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1); assert( rc!=SQLITE_OK || pSorter->pTemp1 ); assert( pSorter->iWriteOff==0 ); - assert( pSorter->nOffset==0 ); - assert( pSorter->aOffset==0 ); + assert( pSorter->nPMA==0 ); } if( rc==SQLITE_OK ){ + pSorter->nPMA++; + + /* Write a varint containg the size of the PMA in bytes into the file. */ + assert( pSorter->nBtree>0 ); + for( - rc = vdbeSorterAppendOffset(db, pSorter, iWriteOff); + rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff); rc==SQLITE_OK && res==0; rc = sqlite3BtreeNext(pCsr->pCursor, &res) ){ i64 nKey; /* Size of this key in bytes */ - u8 aVarint[9]; /* Buffer containing varint(nKey) */ - int nVar; /* Number of bytes in aVarint[] used */ - (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey); - nVar = sqlite3PutVarint(aVarint, nKey); - /* Write the size of the record in bytes to the output file */ - rc = sqlite3OsWrite(pSorter->pTemp1, aVarint, nVar, iWriteOff); - iWriteOff += nVar; + (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey); + rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff); /* Make sure the aMalloc[] buffer is large enough for the record */ if( rc==SQLITE_OK && nKey>nMalloc ){ @@ -377,13 +410,16 @@ static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){ } } - if( rc!=SQLITE_OK ) break; } + assert( pSorter->nBtree==( + iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree) + )); pSorter->iWriteOff = iWriteOff; sqlite3DbFree(db, aMalloc); } + pSorter->nBtree = 0; return rc; } @@ -392,7 +428,7 @@ static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){ ** If the current b-tree being constructed is already considered "full", ** a new tree is started. */ -int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){ +int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){ int rc = SQLITE_OK; /* Return code */ VdbeSorter *pSorter = pCsr->pSorter; if( pSorter ){ @@ -423,7 +459,7 @@ int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){ /* Copy the current contents of the b-tree into a PMA in sorted order. ** Close the currently open b-tree cursor. */ - rc = sorterBtreeToPma(db, pCsr); + rc = vdbeSorterBtreeToPMA(db, pCsr); sqlite3BtreeCloseCursor(p); if( rc==SQLITE_OK ){ @@ -441,6 +477,8 @@ int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){ rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p); } } + + pSorter->nBtree += sqlite3VarintLen(nKey) + nKey; } return rc; } @@ -452,58 +490,30 @@ static int vdbeSorterInitMerge( sqlite3 *db, VdbeCursor *pCsr, int iFirst, - int *piNext + i64 *pnByte /* Sum of bytes in all opened PMAs */ ){ VdbeSorter *pSorter = pCsr->pSorter; int rc = SQLITE_OK; int i; - int N = 2; - int nIter; /* Number of iterators to initialize. */ - - nIter = pSorter->nOffset - iFirst; - if( nIter>SORTER_MAX_MERGE_COUNT ){ - nIter = SORTER_MAX_MERGE_COUNT; - } - assert( nIter>0 ); - while( NaIter==0 ){ - int nByte = N * (sizeof(int) + sizeof(VdbeSorterIter)); - pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte); - if( !pSorter->aIter ) return SQLITE_NOMEM; - pSorter->aTree = (int *)&pSorter->aIter[N]; - } + i64 nByte = 0; /* Initialize as many iterators as possible. */ for(i=iFirst; - rc==SQLITE_OK && inOffset && (i-iFirst)nPMA && (i-iFirst)aIter[iIter]; - i64 iStart = pSorter->aOffset[i]; - i64 iEof; - if( i==(pSorter->nOffset-1) ){ - iEof = pSorter->iWriteOff; - }else{ - iEof = pSorter->aOffset[i+1]; - } - rc = vdbeSorterIterInit(db, pSorter->pTemp1, iStart, iEof, pIter); - } + VdbeSorterIter *pIter = &pSorter->aIter[i - iFirst]; + rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte); + pSorter->iReadOff = pIter->iEof; } - *piNext = i; - assert( i>iFirst ); - pSorter->nTree = N; /* Populate the aTree[] array. */ - for(i=N-1; rc==SQLITE_OK && i>0; i--){ + for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){ rc = vdbeSorterDoCompare(pCsr, i); } + *pnByte = nByte; return rc; } @@ -516,39 +526,65 @@ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){ int rc; /* Return code */ sqlite3_file *pTemp2 = 0; /* Second temp file to use */ i64 iWrite2 = 0; /* Write offset for pTemp2 */ + int nIter; /* Number of iterators used */ + int nByte; /* Bytes of space required for aIter/aTree */ + int N = 2; /* Power of 2 >= nIter */ assert( pSorter ); /* Write the current b-tree to a PMA. Close the b-tree cursor. */ - rc = sorterBtreeToPma(db, pCsr); + rc = vdbeSorterBtreeToPMA(db, pCsr); sqlite3BtreeCloseCursor(pCsr->pCursor); if( rc!=SQLITE_OK ) return rc; - if( pSorter->nOffset==0 ){ + if( pSorter->nPMA==0 ){ *pbEof = 1; return SQLITE_OK; } - while( rc==SQLITE_OK ){ - int iNext = 0; /* Index of next segment to open */ - int iNew = 0; /* Index of new, merged, PMA */ + /* Allocate space for aIter[] and aTree[]. */ + nIter = pSorter->nPMA; + if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT; + assert( nIter>0 ); + while( NaIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte); + if( !pSorter->aIter ) return SQLITE_NOMEM; + pSorter->aTree = (int *)&pSorter->aIter[N]; + pSorter->nTree = N; - do { + do { + int iNew = 0; /* Index of new, merged, PMA */ - /* This call configures iterators for merging. */ - rc = vdbeSorterInitMerge(db, pCsr, iNext, &iNext); - assert( iNext>0 ); + for(iNew=0; rc==SQLITE_OK; iNew++){ + i64 nWrite; /* Number of bytes in new PMA */ + + /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1, + ** initialize an iterator for each of them and break out of the loop. + ** These iterators will be incrementally merged as the VDBE layer calls + ** sqlite3VdbeSorterNext(). + ** + ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs, + ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs + ** are merged into a single PMA that is written to file pTemp2. + */ + rc = vdbeSorterInitMerge(db, pCsr, iNew*SORTER_MAX_MERGE_COUNT, &nWrite); assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile ); + if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ + break; + } - if( rc==SQLITE_OK && (iNew>0 || iNextnOffset) ){ - int bEof = 0; + /* Open the second temp file, if it is not already open. */ + if( pTemp2==0 ){ + assert( iWrite2==0 ); + rc = vdbeSorterOpenTempFile(db, &pTemp2); + } - if( pTemp2==0 ){ - rc = vdbeSorterOpenTempFile(db, &pTemp2); - } - if( rc==SQLITE_OK ){ - pSorter->aOffset[iNew] = iWrite2; - } + if( rc==SQLITE_OK ){ + rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2); + } + if( rc==SQLITE_OK ){ + int bEof = 0; while( rc==SQLITE_OK && bEof==0 ){ int nByte; VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ]; @@ -560,26 +596,25 @@ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){ rc = sqlite3VdbeSorterNext(db, pCsr, &bEof); } } - iNew++; } - }while( rc==SQLITE_OK && iNextnOffset ); + } - if( iNew==0 ){ + if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ break; }else{ sqlite3_file *pTmp = pSorter->pTemp1; - pSorter->nOffset = iNew; + pSorter->nPMA = iNew; pSorter->pTemp1 = pTemp2; pTemp2 = pTmp; pSorter->iWriteOff = iWrite2; + pSorter->iReadOff = 0; iWrite2 = 0; } - } + }while( rc==SQLITE_OK ); if( pTemp2 ){ sqlite3OsCloseFree(pTemp2); } - *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); return rc; }