From: dan Date: Mon, 30 Sep 2013 18:14:45 +0000 (+0000) Subject: Fix a performance problem in the FTS4 auxiliary functions triggered by an OR clause... X-Git-Tag: version-3.8.1~57 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1f03e609ff2e02493bfa274872837d47e47ee4a8;p=thirdparty%2Fsqlite.git Fix a performance problem in the FTS4 auxiliary functions triggered by an OR clause in the full-text query. FossilOrigin-Name: fa0f2f0e3e79ae653118b901e1cca7725dfaf249 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 0f7c38e571..4bd80db8b1 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -5268,15 +5268,23 @@ int sqlite3Fts3EvalPhrasePoslist( pIter = pPhrase->doclist.pList; if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ + int iMul; /* +1 if csr dir matches index dir, else -1 */ int bOr = 0; u8 bEof = 0; - Fts3Expr *p; + u8 bTreeEof = 0; + Fts3Expr *p; /* Used to iterate from pExpr to root */ + Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ /* Check if this phrase descends from an OR expression node. If not, ** return NULL. Otherwise, the entry that corresponds to docid - ** pCsr->iPrevId may lie earlier in the doclist buffer. */ + ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the + ** tree that the node is part of has been marked as EOF, but the node + ** itself is not EOF, then it may point to an earlier entry. */ + pNear = pExpr; for(p=pExpr->pParent; p; p=p->pParent){ if( p->eType==FTSQUERY_OR ) bOr = 1; + if( p->eType==FTSQUERY_NEAR ) pNear = p; + if( p->bEof ) bTreeEof = 1; } if( bOr==0 ) return SQLITE_OK; @@ -5295,29 +5303,59 @@ int sqlite3Fts3EvalPhrasePoslist( assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); if( rc!=SQLITE_OK ) return rc; } - - if( pExpr->bEof ){ - pIter = 0; - iDocid = 0; + + iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1); + while( bTreeEof==1 + && pNear->bEof==0 + && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0 + ){ + int rc = SQLITE_OK; + fts3EvalNextRow(pCsr, pExpr, &rc); + if( rc!=SQLITE_OK ) return rc; + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; } + bEof = (pPhrase->doclist.nAll==0); assert( bDescDoclist==0 || bDescDoclist==1 ); assert( pCsr->bDesc==0 || pCsr->bDesc==1 ); - if( pCsr->bDesc==bDescDoclist ){ - int dummy; - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ - sqlite3Fts3DoclistPrev( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &dummy, &bEof - ); - } - }else{ - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ - sqlite3Fts3DoclistNext( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &bEof - ); + if( bEof==0 ){ + if( pCsr->bDesc==bDescDoclist ){ + int dummy; + if( pNear->bEof ){ + /* This expression is already at EOF. So position it to point to the + ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable + ** iDocid is already set for this entry, so all that is required is + ** to set pIter to point to the first byte of the last position-list + ** in the doclist. + ** + ** It would also be correct to set pIter and iDocid to zero. In + ** this case, the first call to sqltie3Fts4DoclistPrev() below + ** would also move the iterator to point to the last entry in the + ** doclist. However, this is expensive, as to do so it has to + ** iterate through the entire doclist from start to finish (since + ** it does not know the docid for the last entry). */ + pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1]; + fts3ReversePoslist(pPhrase->doclist.aAll, &pIter); + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + sqlite3Fts3DoclistPrev( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } + }else{ + if( pNear->bEof ){ + pIter = 0; + iDocid = 0; + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } } } diff --git a/manifest b/manifest index eb27458602..1acf1cc13a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\smemory\sleaks\sin\sthe\samatch\sextension.\s\sAdd\sa\sfew\ssimple\stest\scases. -D 2013-09-30T17:37:15.455 +C Fix\sa\sperformance\sproblem\sin\sthe\sFTS4\sauxiliary\sfunctions\striggered\sby\san\sOR\sclause\sin\sthe\sfull-text\squery. +D 2013-09-30T18:14:45.059 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -78,7 +78,7 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 0d6311cd433ea30c9e941b93bfeac2f9e6937980 +F ext/fts3/fts3.c e1240ab6f5999174309a41ffac63b94ed1233098 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h c7a451661c2d9b2440b2008c3f63ce06f13181d6 F ext/fts3/fts3_aux.c b02632f6dd0e375ce97870206d914ea6d8df5ccd @@ -548,7 +548,7 @@ F test/fts3prefix2.test e1f0a822ca661dced7f12ce392e14eaf65609dce F test/fts3query.test 4fefd43ff24993bc2c9b2778f2bec0cc7629e7ed F test/fts3rnd.test 1320d8826a845e38a96e769562bf83d7a92a15d0 F test/fts3shared.test 57e26a801f21027b7530da77db54286a6fe4997e -F test/fts3snippet.test 24d6ff1920a70fd970c401a8525834b4ad12cece +F test/fts3snippet.test d524af6bcef4714e059ef559113dbdc924cd33d1 F test/fts3sort.test ed34c716a11cc2009a35210e84ad5f9c102362ca F test/fts3tok1.test b10d0a12a0ab5f905cea1200b745de233f37443f F test/fts3tok_err.test 52273cd193b9036282f7bacb43da78c6be87418d @@ -1117,7 +1117,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff -P 94c914e3fa632f88a0d0c14537f81aa46759e2be -R 365ba09c87d852154921d1b9e7f8c208 -U drh -Z 32444bec405923a830b81223af98656e +P 604134732e309a738e21bcb4240b9ff285d2bfe4 +R 6a5e216c5c51e185221e1c2d8a375388 +U dan +Z 83a6f701584057aa6ad5500f631da9c2 diff --git a/manifest.uuid b/manifest.uuid index 3aed65bfb3..3f571c4238 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -604134732e309a738e21bcb4240b9ff285d2bfe4 \ No newline at end of file +fa0f2f0e3e79ae653118b901e1cca7725dfaf249 \ No newline at end of file diff --git a/test/fts3snippet.test b/test/fts3snippet.test index f19db925ba..415251dcce 100644 --- a/test/fts3snippet.test +++ b/test/fts3snippet.test @@ -16,6 +16,7 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +set testprefix fts3snippet # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test ; return } @@ -138,7 +139,7 @@ foreach {DO_MALLOC_TEST enc} { # Set variable $T to the test name prefix for this iteration of the loop. # - set T "fts3snippet-$enc" + set T "fts3snippet-1.$enc" ########################################################################## # Test the offset function. @@ -459,5 +460,65 @@ foreach {DO_MALLOC_TEST enc} { } {0 blob} } +#------------------------------------------------------------------------- +# Test an interaction between the snippet() function and OR clauses. +# +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t2 USING fts4; + INSERT INTO t2 VALUES('one two three four five'); + INSERT INTO t2 VALUES('two three four five one'); + INSERT INTO t2 VALUES('three four five one two'); + INSERT INTO t2 VALUES('four five one two three'); + INSERT INTO t2 VALUES('five one two three four'); +} + +do_execsql_test 2.2 { + SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)' +} { + {[one] two three [four] five} + {two three [four] five [one]} + {three [four] five [one] two} + {[four] five [one] two three} + {five [one] two three [four]} +} + +do_execsql_test 2.3 { + SELECT snippet(t2, '[', ']') FROM t2 + WHERE t2 MATCH 'one OR (four AND six)' + ORDER BY docid DESC +} { + {five [one] two three [four]} + {[four] five [one] two three} + {three [four] five [one] two} + {two three [four] five [one]} + {[one] two three [four] five} +} + +do_execsql_test 2.4 { + INSERT INTO t2 VALUES('six'); +} + +do_execsql_test 2.5 { + SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)' +} { + {[one] two three [four] five} + {two three [four] five [one]} + {three [four] five [one] two} + {[four] five [one] two three} + {five [one] two three [four]} +} + +do_execsql_test 2.6 { + SELECT snippet(t2, '[', ']') FROM t2 + WHERE t2 MATCH 'one OR (four AND six)' + ORDER BY docid DESC +} { + {five [one] two three [four]} + {[four] five [one] two three} + {three [four] five [one] two} + {two three [four] five [one]} + {[one] two three [four] five} +} + set sqlite_fts3_enable_parentheses 0 finish_test