From: dan Date: Thu, 26 Feb 2015 20:49:09 +0000 (+0000) Subject: Optimize copying data from fts5 in-memory hash tables to top level segments. X-Git-Tag: version-3.8.11~114^2~84 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=dfdc4b46134c22550e48cf32224f6b990cbeabf5;p=thirdparty%2Fsqlite.git Optimize copying data from fts5 in-memory hash tables to top level segments. FossilOrigin-Name: 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac --- diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 4b85b2af23..eb1f3d1b1a 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -393,55 +393,6 @@ static int fts5HashEntrySort( return SQLITE_OK; } -int sqlite3Fts5HashIterate( - Fts5Hash *pHash, - void *pCtx, - int (*xTerm)(void*, const char*, int), - int (*xEntry)(void*, i64, const u8*, int), - int (*xTermDone)(void*) -){ - Fts5HashEntry *pList; - int rc; - - rc = fts5HashEntrySort(pHash, 0, 0, &pList); - if( rc==SQLITE_OK ){ - memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); - while( pList ){ - Fts5HashEntry *pNext = pList->pScanNext; - if( rc==SQLITE_OK ){ - const int nKey = strlen(pList->zKey); - i64 iRowid = 0; - u8 *pPtr = (u8*)pList; - int iOff = sizeof(Fts5HashEntry) + nKey + 1; - - /* Fill in the final poslist size field */ - fts5HashAddPoslistSize(pList); - - /* Issue the new-term callback */ - rc = xTerm(pCtx, pList->zKey, nKey); - - /* Issue the xEntry callbacks */ - while( rc==SQLITE_OK && iOffnData ){ - i64 iDelta; /* Rowid delta value */ - int nPoslist; /* Size of position list in bytes */ - int nVarint; - iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); - iRowid += iDelta; - nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); - rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); - iOff += nVarint+nPoslist; - } - - /* Issue the term-done callback */ - if( rc==SQLITE_OK ) rc = xTermDone(pCtx); - } - sqlite3_free(pList); - pList = pNext; - } - } - return rc; -} - /* ** Query the hash table for a doclist associated with term pTerm/nTerm. */ @@ -478,9 +429,8 @@ void sqlite3Fts5HashScanInit( } void sqlite3Fts5HashScanNext(Fts5Hash *p){ - if( p->pScan ){ - p->pScan = p->pScan->pScanNext; - } + Fts5HashEntry *pScan = p->pScan; + if( pScan ) p->pScan = pScan->pScanNext; } int sqlite3Fts5HashScanEof(Fts5Hash *p){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index f24be525d9..481cd75fa7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -113,7 +113,7 @@ ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) -** poslist: first poslist +** poslist: next poslist ** } ** 0x00 byte ** @@ -2677,7 +2677,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty @@ -3004,12 +3004,15 @@ static void fts5WriteFinish( ){ int i; if( p->rc==SQLITE_OK ){ - *pnLeaf = pWriter->aWriter[0].pgno; - if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ + Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; + if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ - fts5WriteFlushLeaf(p, pWriter); + if( pLeaf->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + *pnLeaf = pLeaf->pgno-1; if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } @@ -3381,44 +3384,20 @@ struct Fts5FlushCtx { Fts5SegWriter writer; }; -static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; - fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm); - return rc; -} - -static int fts5FlushTermDone(void *pCtx){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; - /* Write the doclist terminator */ - fts5WriteAppendZerobyte(p->pIdx, &p->writer); - return rc; -} - -static int fts5FlushNewEntry( - void *pCtx, - i64 iRowid, - const u8 *aPoslist, - int nPoslist -){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - Fts5Index *pIdx = p->pIdx; - -#ifdef SQLITE_DEBUG - /* The poslist-size varint should already be at the start of the - ** aPoslist/nPoslist buffer. This assert verifies that. */ - int n, i; - i = fts5GetVarint32(aPoslist, n); - assert( nPoslist==(n+i) ); -#endif - - /* Append the rowid itself */ - fts5WriteAppendRowid(pIdx, &p->writer, iRowid); - - /* And the poslist data */ - fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist); - return pIdx->rc; +/* +** Buffer aBuf[] contains a list of varints, all small enough to fit +** in a 32-bit integer. Return the size of the largest prefix of this +** list nMax bytes or less in size. +*/ +static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ + int ret = 0; + while( 1 ){ + u32 dummy; + int i = fts5GetVarint32(&aBuf[ret], dummy); + if( (ret + i) > nMax ) break; + ret += i; + } + return ret; } /* @@ -3429,6 +3408,7 @@ static int fts5FlushNewEntry( ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ + Fts5Hash *pHash = p->apHash[iHash]; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ @@ -3439,19 +3419,127 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ + const int pgsz = p->pConfig->pgsz; + Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ - int rc; - Fts5FlushCtx ctx; + Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ - fts5WriteInit(p, &ctx.writer, iHash, iSegid); - ctx.pIdx = p; + Fts5SegWriter writer; + fts5WriteInit(p, &writer, iHash, iSegid); - rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, - fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone - ); - if( p->rc==SQLITE_OK ) p->rc = rc; - fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); + /* Pre-allocate the buffer used to assemble leaf pages to the target + ** page size. */ + assert( pgsz>0 ); + pBuf = &writer.aWriter[0].buf; + fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + + /* Begin scanning through hash table entries. */ + if( p->rc==SQLITE_OK ){ + memset(pBuf->p, 0, 4); + pBuf->n = 4; + sqlite3Fts5HashScanInit(pHash, 0, 0); + } + + while( 0==sqlite3Fts5HashScanEof(pHash) ){ + const char *zTerm; + int nTerm; + const u8 *pDoclist; + int nDoclist; + + sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist); + nTerm = strlen(zTerm); + + /* Decide if the term fits on the current leaf. If not, flush it + ** to disk. */ + if( (pBuf->n + nTerm + 2) > pgsz ){ + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + if( (nTerm + 32) > pBuf->nSpace ){ + fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); + } + } + + /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + if( writer.bFirstTermInPage==0 ){ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0); + }else{ + fts5PutU16(&pBuf->p[2], pBuf->n); + writer.bFirstTermInPage = 0; + if( writer.aWriter[0].pgno!=1 ){ + fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm); + pBuf = &writer.aWriter[0].buf; + } + } + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm); + fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm); + + if( pgsz>=(pBuf->n + nDoclist + 1) ){ + /* The entire doclist will fit on the current leaf. */ + fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist); + }else{ + i64 iRowid = 0; + i64 iDelta = 0; + int iOff = 0; + int bFirstDocid = 0; + + /* The entire doclist will not fit on this leaf. The following + ** loop iterates through the poslists that make up the current + ** doclist. */ + while( iOffp[0], pBuf->n); /* first docid on page */ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); + bFirstDocid = 0; + }else{ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); + } + assert( pBuf->n<=pBuf->nSpace ); + + if( (pBuf->n + nCopy) <= pgsz ){ + /* The entire poslist will fit on the current leaf. So copy + ** it in one go. */ + fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]); + }else{ + /* The entire poslist will not fit on this leaf. So it needs + ** to be broken into sections. The only qualification being + ** that each varint must be stored contiguously. */ + const u8 *pPoslist = &pDoclist[iOff]; + int iPos = 0; + while( 1 ){ + int nSpace = pgsz - pBuf->n; + int n; + if( (nCopy - iPos)<=nSpace ){ + n = nCopy - iPos; + }else{ + n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + } + fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]); + iPos += n; + if( iPos>=nCopy ) break; + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + } + bFirstDocid = 1; + } + assert( pBuf->n<=pgsz ); + iOff += nCopy; + } + } + + pBuf->p[pBuf->n++] = '\0'; + assert( pBuf->n<=pBuf->nSpace ); + sqlite3Fts5HashScanNext(pHash); + } + sqlite3Fts5HashClear(pHash); + fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ diff --git a/manifest b/manifest index 94a134ea1d..c27cd5f2c0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sbug\sin\slarge\sincremental\smerges. -D 2015-02-26T14:54:03.688 +C Optimize\scopying\sdata\sfrom\sfts5\sin-memory\shash\stables\sto\stop\slevel\ssegments. +D 2015-02-26T20:49:09.566 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,8 +111,8 @@ F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b -F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4 -F ext/fts5/fts5_index.c 97ce5c919be5a70b623f89c66c60bda15408d577 +F ext/fts5/fts5_hash.c 323099a445bf8f608af069e2d8ff4bb93db9904c +F ext/fts5/fts5_index.c 7a9de0c033a8f702f8e3659a23c2ea31bbbb789b F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7eb022d7e5fdb180af823c82c47c938e4a7a355f -R a31df37a1e652372bc458cec184c4145 +P 208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 +R 2ae35fa8ad1fd2b74f86acf33cc74d9f U dan -Z ba5b725699c58a1a7e341156a5c76451 +Z 84061e5477aeb4ed2552f0a9aa275eda diff --git a/manifest.uuid b/manifest.uuid index 32c499775f..3538b76cd7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 \ No newline at end of file +8e3ca6323a2beab5f04250e24ae15b159d2aa0ac \ No newline at end of file