From: dan Date: Tue, 12 Jan 2016 19:28:51 +0000 (+0000) Subject: Improve performance of fts5 low level iterators. X-Git-Tag: version-3.11.0~157^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fd7601a125dc1e16f7220540104d9de2be29474e;p=thirdparty%2Fsqlite.git Improve performance of fts5 low level iterators. FossilOrigin-Name: bc5118f40a11f64ffb4e1c086277fa80b9764745 --- diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ef673085c6..2488cd7ca9 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -433,6 +433,9 @@ struct Fts5SegIter { Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ + /* Next method */ + void (*xNext)(Fts5Index*, Fts5SegIter*, int*); + /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; @@ -453,7 +456,6 @@ struct Fts5SegIter { i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */ u8 bDel; /* True if the delete flag is set */ - // u8 bContent; /* True if has content (detail=none mode) */ }; /* @@ -1575,6 +1577,20 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ fts5SegIterLoadRowid(p, pIter); } +static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); +static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); +static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); + +static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ + if( pIter->flags & FTS5_SEGITER_REVERSE ){ + pIter->xNext = fts5SegIterNext_Reverse; + }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ + pIter->xNext = fts5SegIterNext_None; + }else{ + pIter->xNext = fts5SegIterNext; + } +} + /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when @@ -1600,6 +1616,7 @@ static void fts5SegIterInit( if( p->rc==SQLITE_OK ){ memset(pIter, 0, sizeof(*pIter)); + fts5SegIterSetNext(p, pIter); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); @@ -1740,6 +1757,110 @@ static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){ return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } +/* +** Advance iterator pIter to the next entry. +** +** This version of fts5SegIterNext() is only used by reverse iterators. +*/ +static void fts5SegIterNext_Reverse( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ +){ + assert( pIter->flags & FTS5_SEGITER_REVERSE ); + assert( pIter->pNextLeaf==0 ); + if( pIter->iRowidOffset>0 ){ + u8 *a = pIter->pLeaf->p; + int iOff; + int nPos; + int bDummy; + i64 iDelta; + + pIter->iRowidOffset--; + pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; + fts5SegIterLoadNPos(p, pIter); + iOff = pIter->iLeafOffset; + if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ + iOff += pIter->nPos; + } + fts5GetVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid -= iDelta; + }else{ + fts5SegIterReverseNewPage(p, pIter); + } +} + +/* +** Advance iterator pIter to the next entry. +** +** This version of fts5SegIterNext() is only used if detail=none and the +** iterator is not a reverse direction iterator. +*/ +static void fts5SegIterNext_None( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ +){ + int iOff; + + assert( p->rc==SQLITE_OK ); + assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); + assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); + + ASSERT_SZLEAF_OK(pIter->pLeaf); + iOff = pIter->iLeafOffset; + + /* Next entry is on the next page */ + if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ + fts5SegIterNextPage(p, pIter); + if( p->rc || pIter->pLeaf==0 ) return; + pIter->iRowid = 0; + iOff = 4; + } + + if( iOffiEndofDoclist ){ + /* Next entry is on the current page */ + i64 iDelta; + iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], &iDelta); + pIter->iLeafOffset = iOff; + pIter->iRowid += iDelta; + }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ + if( pIter->pSeg ){ + int nKeep = 0; + if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ + iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); + } + pIter->iLeafOffset = iOff; + fts5SegIterLoadTerm(p, pIter, nKeep); + }else{ + const u8 *pList = 0; + const char *zTerm = 0; + int nList; + sqlite3Fts5HashScanNext(p->pHash); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + if( pList==0 ) goto next_none_eof; + pIter->pLeaf->p = (u8*)pList; + pIter->pLeaf->nn = nList; + pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList; + sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); + pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); + } + + if( pbNewTerm ) *pbNewTerm = 1; + }else{ + goto next_none_eof; + } + + fts5SegIterLoadNPos(p, pIter); + + return; + next_none_eof: + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; +} + + /* ** Advance iterator pIter to the next entry. ** @@ -1752,151 +1873,119 @@ static void fts5SegIterNext( Fts5SegIter *pIter, /* Iterator to advance */ int *pbNewTerm /* OUT: Set for new term */ ){ + Fts5Data *pLeaf = pIter->pLeaf; + int iOff; + int bNewTerm = 0; + int nKeep = 0; + assert( pbNewTerm==0 || *pbNewTerm==0 ); - if( p->rc==SQLITE_OK ){ - if( pIter->flags & FTS5_SEGITER_REVERSE ){ - assert( pIter->pNextLeaf==0 ); - if( pIter->iRowidOffset>0 ){ - u8 *a = pIter->pLeaf->p; - int iOff; - int nPos; - int bDummy; - i64 iDelta; + assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); - pIter->iRowidOffset--; - pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; - fts5SegIterLoadNPos(p, pIter); - iOff = pIter->iLeafOffset; - if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ - iOff += pIter->nPos; - } - fts5GetVarint(&a[iOff], (u64*)&iDelta); - pIter->iRowid -= iDelta; - }else{ - fts5SegIterReverseNewPage(p, pIter); - } - }else{ - Fts5Data *pLeaf = pIter->pLeaf; - int iOff; - int bNewTerm = 0; - int nKeep = 0; + /* Search for the end of the position list within the current page. */ + u8 *a = pLeaf->p; + int n = pLeaf->szLeaf; - /* Search for the end of the position list within the current page. */ - u8 *a = pLeaf->p; - int n = pLeaf->szLeaf; + ASSERT_SZLEAF_OK(pLeaf); + iOff = pIter->iLeafOffset + pIter->nPos; - ASSERT_SZLEAF_OK(pLeaf); - if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ - iOff = pIter->iLeafOffset; - }else{ - iOff = pIter->iLeafOffset + pIter->nPos; + if( iOffiEndofDoclist ); + if( iOff>=pIter->iEndofDoclist ){ + bNewTerm = 1; + if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ + iOff += fts5GetVarint32(&a[iOff], nKeep); } + }else{ + u64 iDelta; + iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); + pIter->iRowid += iDelta; + assert_nc( iDelta>0 ); + } + pIter->iLeafOffset = iOff; - if( iOffiEndofDoclist ); - if( iOff>=pIter->iEndofDoclist ){ - bNewTerm = 1; - if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ - iOff += fts5GetVarint32(&a[iOff], nKeep); - } - }else{ - u64 iDelta; - iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); - pIter->iRowid += iDelta; - assert_nc( iDelta>0 ); - } + }else if( pIter->pSeg==0 ){ + const u8 *pList = 0; + const char *zTerm = 0; + int nList = 0; + assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); + if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ + sqlite3Fts5HashScanNext(p->pHash); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + } + if( pList==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + pIter->pLeaf->p = (u8*)pList; + pIter->pLeaf->nn = nList; + pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList+1; + sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), + (u8*)zTerm); + pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); + *pbNewTerm = 1; + } + }else{ + iOff = 0; + /* Next entry is not on the current page */ + while( iOff==0 ){ + fts5SegIterNextPage(p, pIter); + pLeaf = pIter->pLeaf; + if( pLeaf==0 ) break; + ASSERT_SZLEAF_OK(pLeaf); + if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ + iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; - }else if( pIter->pSeg==0 ){ - const u8 *pList = 0; - const char *zTerm = 0; - int nList = 0; - assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); - if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ - sqlite3Fts5HashScanNext(p->pHash); - sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); - } - if( pList==0 ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - }else{ - pIter->pLeaf->p = (u8*)pList; - pIter->pLeaf->nn = nList; - pIter->pLeaf->szLeaf = nList; - pIter->iEndofDoclist = nList+1; - sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), - (u8*)zTerm); - pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); - *pbNewTerm = 1; - } - }else{ - iOff = 0; - /* Next entry is not on the current page */ - while( iOff==0 ){ - fts5SegIterNextPage(p, pIter); - pLeaf = pIter->pLeaf; - if( pLeaf==0 ) break; - ASSERT_SZLEAF_OK(pLeaf); - if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ - iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - - if( pLeaf->nn>pLeaf->szLeaf ){ - pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( - &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist + if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist ); - } + } - } - else if( pLeaf->nn>pLeaf->szLeaf ){ - pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( - &pLeaf->p[pLeaf->szLeaf], iOff + } + else if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], iOff ); - pIter->iLeafOffset = iOff; - pIter->iEndofDoclist = iOff; - bNewTerm = 1; - } - assert_nc( iOffszLeaf - || p->pConfig->eDetail==FTS5_DETAIL_NONE - ); - if( iOff>pLeaf->szLeaf ){ - p->rc = FTS5_CORRUPT; - return; - } - } + pIter->iLeafOffset = iOff; + pIter->iEndofDoclist = iOff; + bNewTerm = 1; + } + assert_nc( iOffszLeaf ); + if( iOff>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + return; } + } + } - /* Check if the iterator is now at EOF. If so, return early. */ - if( pIter->pLeaf ){ - if( bNewTerm ){ - if( pIter->flags & FTS5_SEGITER_ONETERM ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - }else{ - fts5SegIterLoadTerm(p, pIter, nKeep); - fts5SegIterLoadNPos(p, pIter); - if( pbNewTerm ) *pbNewTerm = 1; - } - }else{ - /* The following could be done by calling fts5SegIterLoadNPos(). But - ** this block is particularly performance critical, so equivalent - ** code is inlined. - ** - ** Later: Switched back to fts5SegIterLoadNPos() because it supports - ** detail=none mode. Not ideal. - */ -#if 0 - int nSz; - assert( p->rc==SQLITE_OK ); - fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); - pIter->bDel = (nSz & 0x0001); - pIter->nPos = nSz>>1; - assert_nc( pIter->nPos>=0 ); -#endif - fts5SegIterLoadNPos(p, pIter); - } + /* Check if the iterator is now at EOF. If so, return early. */ + if( pIter->pLeaf ){ + if( bNewTerm ){ + if( pIter->flags & FTS5_SEGITER_ONETERM ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + fts5SegIterLoadNPos(p, pIter); + if( pbNewTerm ) *pbNewTerm = 1; } + }else{ + /* The following could be done by calling fts5SegIterLoadNPos(). But + ** this block is particularly performance critical, so equivalent + ** code is inlined. + ** + ** Later: Switched back to fts5SegIterLoadNPos() because it supports + ** detail=none mode. Not ideal. + */ + int nSz; + assert( p->rc==SQLITE_OK ); + fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); + pIter->bDel = (nSz & 0x0001); + pIter->nPos = nSz>>1; + assert_nc( pIter->nPos>=0 ); } } } @@ -2231,6 +2320,8 @@ static void fts5SegIterSeekInit( } } + fts5SegIterSetNext(p, pIter); + /* Either: ** ** 1) an error has occurred, or @@ -2288,7 +2379,7 @@ static void fts5SegIterHashInit( pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); - pIter->iEndofDoclist = pLeaf->nn+1; + pIter->iEndofDoclist = pLeaf->nn; if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; @@ -2297,6 +2388,8 @@ static void fts5SegIterHashInit( fts5SegIterLoadNPos(p, pIter); } } + + fts5SegIterSetNext(p, pIter); } /* @@ -2540,7 +2633,7 @@ static void fts5SegIterNextFrom( } do{ - if( bMove ) fts5SegIterNext(p, pIter, 0); + if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; @@ -2574,7 +2667,9 @@ static void fts5MultiIterAdvanced( for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ - fts5SegIterNext(p, &pIter->aSeg[iEq], 0); + Fts5SegIter *pSeg = &pIter->aSeg[iEq]; + assert( p->rc==SQLITE_OK ); + pSeg->xNext(p, pSeg, 0); i = pIter->nSeg + iEq; } } @@ -2661,7 +2756,7 @@ static void fts5MultiIterNext( if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ - fts5SegIterNext(p, pSeg, &bNewTerm); + pSeg->xNext(p, pSeg, &bNewTerm); } if( pSeg->pLeaf==0 || bNewTerm @@ -2689,7 +2784,8 @@ static void fts5MultiIterNext2( Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; int bNewTerm = 0; - fts5SegIterNext(p, pSeg, &bNewTerm); + assert( p->rc==SQLITE_OK ); + pSeg->xNext(p, pSeg, &bNewTerm); if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst) ){ @@ -2809,7 +2905,8 @@ static void fts5MultiIterNew( for(iIter=pNew->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - fts5SegIterNext(p, &pNew->aSeg[iEq], 0); + Fts5SegIter *pSeg = &pNew->aSeg[iEq]; + if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } @@ -2859,6 +2956,7 @@ static void fts5MultiIterNew2( }else{ pNew->bEof = 1; } + fts5SegIterSetNext(p, pIter); *ppOut = pNew; } diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index b875354534..c6b09d8121 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -24,8 +24,6 @@ ifcapable !fts5 { foreach_detail_mode $testprefix { -if {[detail_is_none]==0} continue - do_execsql_test 1.0 { CREATE VIRTUAL TABLE yy USING fts5(x, y, detail=%DETAIL%); INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching'); diff --git a/manifest b/manifest index 5a749962b4..bd4318635f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\strunk\swith\sthis\sbranch. -D 2016-01-11T18:30:34.152 +C Improve\sperformance\sof\sfts5\slow\slevel\siterators. +D 2016-01-12T19:28:51.507 F Makefile.in 7c8cc4c2f0179efc6fa9492141d1fb65f4807054 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc e45d8b9b56dfa3f2cd860b2c28bd9d304513b042 @@ -103,7 +103,7 @@ F ext/fts5/fts5_buffer.c 87204c8b3b8bc62b27376eab09b74d6d5acc41f1 F ext/fts5/fts5_config.c b0ed7b0ddd785fb4d4e6f9037d357f8aa95918e6 F ext/fts5/fts5_expr.c 6eba2220747ea1b20a358fb3b34b2ab78323e285 F ext/fts5/fts5_hash.c 1b113977296cf4212c6ec667d5e3f2bd18036955 -F ext/fts5/fts5_index.c 04fe098ffab66d9424ba4e7c0b88ce7c98748cb1 +F ext/fts5/fts5_index.c 5ab044a67919e2c9f42f0288a39778136154511e F ext/fts5/fts5_main.c 03bd44e4bd0ba16213ca9259ad5df1d4d743fd7e F ext/fts5/fts5_storage.c f7b2d330dd7b29a9f4da09f6d85879ca8c41b2e8 F ext/fts5/fts5_tcl.c 18e9382d8cdad4c05b49559c68494968b9b4a4fb @@ -118,7 +118,7 @@ F ext/fts5/test/fts5_common.tcl 393882afb225a21edf033043bbf936951e9198c1 F ext/fts5/test/fts5aa.test 7e814df4a0e6c22a6fe2d84f210fdc0b5068a084 F ext/fts5/test/fts5ab.test 30325a89453280160106be411bba3acf138e6d1b F ext/fts5/test/fts5ac.test d5073ca7bd2d9fe8aab0c82c6c75a7e4b0d70ced -F ext/fts5/test/fts5ad.test 049f7511a79c155d2d8dfd2ddcfeb640c50ad0dc +F ext/fts5/test/fts5ad.test 0ddaa5b692ff220100ee396228838f4331399eaa F ext/fts5/test/fts5ae.test 612dcb51f4069226791ff14c17dbfb3138c56f20 F ext/fts5/test/fts5af.test be858a96b1f5de66ba6d64f0021bd8b2408e126c F ext/fts5/test/fts5ag.test 27180de76c03036be75ee80b93d8c5f540014071 @@ -1412,7 +1412,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2c0b93553ebb00a70c611eb8dac021877933f088 ecc98bef43c2fd07d64e4efddf340929a875ebac -R fac95fe8d4249da246cad11b14ac9297 +P a73d245f2e523a57163bc93d9fb6a74c4cdf21e7 +R 1ce1920656146c835e2df297e09b2c78 U dan -Z 0acdab0d7c32bec7117a883df7ebebf7 +Z 25b8dbc71e1c6971b424b6f8a3c7183e diff --git a/manifest.uuid b/manifest.uuid index 0ab6f5ff10..0aaeafe188 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a73d245f2e523a57163bc93d9fb6a74c4cdf21e7 \ No newline at end of file +bc5118f40a11f64ffb4e1c086277fa80b9764745 \ No newline at end of file