From: dan Date: Wed, 4 May 2011 12:52:59 +0000 (+0000) Subject: Optimize "ORDER BY rowid/docid DESC/ASC" clauses on FTS tables. X-Git-Tag: version-3.7.7~138^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0f599faa294d3a7ee0316a5a249c87a3c94351a0;p=thirdparty%2Fsqlite.git Optimize "ORDER BY rowid/docid DESC/ASC" clauses on FTS tables. FossilOrigin-Name: 13395121e3d17ab6581dc5f6736ea324321a374c --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 52b853a9a2..faf497d9a0 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -419,6 +419,28 @@ static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ *pVal += iVal; } +/* +** +*/ +static void fts3GetReverseDeltaVarint( + char **pp, + char *pStart, + sqlite3_int64 *pVal +){ + sqlite3_int64 iVal; + char *p = *pp; + + /* Pointer p now points at the first byte past the varint we are + ** interested in. So, unless the doclist is corrupt, the 0x80 bit is + ** clear on character p[-1]. */ + for(p = (*pp)-2; p>=pStart && *p&0x80; p--); + p++; + *pp = p; + + sqlite3Fts3GetVarint(p, &iVal); + *pVal -= iVal; +} + /* ** As long as *pp has not reached its end (pEnd), then do the same ** as fts3GetDeltaVarint(): read a single varint and add it to *pVal. @@ -1094,6 +1116,22 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ pInfo->aConstraintUsage[iCons].argvIndex = 1; pInfo->aConstraintUsage[iCons].omit = 1; } + + /* Regardless of the strategy selected, FTS can deliver rows in rowid (or + ** docid) order. Both ascending and descending are possible. + */ + if( pInfo->nOrderBy==1 ){ + struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; + if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ + if( pOrder->desc ){ + pInfo->idxStr = "DESC"; + }else{ + pInfo->idxStr = "ASC"; + } + } + pInfo->orderByConsumed = 1; + } + return SQLITE_OK; } @@ -2998,12 +3036,20 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ } pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); }else{ - if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ - pCsr->isEof = 1; - break; + if( pCsr->desc==0 ){ + if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ + pCsr->isEof = 1; + break; + } + fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); + }else{ + fts3GetReverseDeltaVarint(&pCsr->pNextId,pCsr->aDoclist,&pCsr->iPrevId); + if( pCsr->pNextId<=pCsr->aDoclist ){ + pCsr->isEof = 1; + break; + } } sqlite3_reset(pCsr->pStmt); - fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; } @@ -3036,8 +3082,8 @@ static int fts3FilterMethod( sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ const char *azSql[] = { - "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */ - "SELECT %s FROM %Q.'%q_content' AS x ", /* full-scan */ + "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */ + "SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s", /* full-scan */ }; int rc; /* Return code */ char *zSql; /* SQL statement used to access %_content */ @@ -3093,7 +3139,9 @@ static int fts3FilterMethod( ** row by docid. */ zSql = (char *)azSql[idxNum==FTS3_FULLSCAN_SEARCH]; - zSql = sqlite3_mprintf(zSql, p->zReadExprlist, p->zDb, p->zName); + zSql = sqlite3_mprintf( + zSql, p->zReadExprlist, p->zDb, p->zName, (idxStr ? idxStr : "ASC") + ); if( !zSql ){ rc = SQLITE_NOMEM; }else{ @@ -3105,7 +3153,22 @@ static int fts3FilterMethod( } pCsr->eSearch = (i16)idxNum; + assert( pCsr->desc==0 ); if( rc!=SQLITE_OK ) return rc; + if( rc==SQLITE_OK && pCsr->nDoclist>0 && idxStr && idxStr[0]=='D' ){ + sqlite3_int64 iDocid = 0; + char *csr = pCsr->aDoclist; + while( csr<&pCsr->aDoclist[pCsr->nDoclist] ){ + fts3GetDeltaVarint(&csr, &iDocid); + } + pCsr->pNextId = csr; + pCsr->iPrevId = iDocid; + pCsr->desc = 1; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->eEvalmode = FTS3_EVAL_NEXT; + return SQLITE_OK; + } return fts3NextMethod(pCursor); } @@ -3264,6 +3327,7 @@ int sqlite3Fts3ExprLoadFtDoclist( ** stored in pExpr->aDoclist. */ char *sqlite3Fts3FindPositions( + Fts3Cursor *pCursor, /* Associate FTS3 cursor */ Fts3Expr *pExpr, /* Access this expressions doclist */ sqlite3_int64 iDocid, /* Docid associated with requested pos-list */ int iCol /* Column of requested pos-list */ @@ -3273,7 +3337,7 @@ char *sqlite3Fts3FindPositions( char *pEnd = &pExpr->aDoclist[pExpr->nDoclist]; char *pCsr; - if( pExpr->pCurrent==0 ){ + if( pExpr->pCurrent==0 || pCursor->desc ){ pExpr->pCurrent = pExpr->aDoclist; pExpr->iCurrent = 0; pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent); diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index b3f1ab55b1..b843fe9dc6 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -171,6 +171,7 @@ struct Fts3Cursor { char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ + int desc; /* True to sort in descending order */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ @@ -353,7 +354,7 @@ int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); -char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); +char *sqlite3Fts3FindPositions(Fts3Cursor *, Fts3Expr *, sqlite3_int64, int); int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *); int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 6b74535079..99f9dd7174 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -415,7 +415,7 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ pPhrase->nToken = pExpr->pPhrase->nToken; - pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); + pCsr = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->pCsr->iPrevId, p->iCol); if( pCsr ){ int iFirst = 0; pPhrase->pList = pCsr; @@ -888,7 +888,7 @@ static int fts3ExprLocalHitsCb( if( pExpr->aDoclist ){ char *pCsr; - pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); + pCsr = sqlite3Fts3FindPositions(p->pCursor, pExpr, p->pCursor->iPrevId, -1); if( pCsr ){ fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); } @@ -1055,7 +1055,7 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ LcsIterator *pIter = &aIter[i]; nToken -= pIter->pExpr->pPhrase->nToken; pIter->iPosOffset = nToken; - pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); + pIter->pRead = sqlite3Fts3FindPositions(pCsr,pIter->pExpr,pCsr->iPrevId,-1); if( pIter->pRead ){ pIter->iPos = pIter->iPosOffset; fts3LcsIteratorAdvance(&aIter[i]); @@ -1408,6 +1408,7 @@ struct TermOffset { }; struct TermOffsetCtx { + Fts3Cursor *pCsr; int iCol; /* Column of table to populate aTerm for */ int iTerm; sqlite3_int64 iDocid; @@ -1425,7 +1426,7 @@ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ int iPos = 0; /* First position in position-list */ UNUSED_PARAMETER(iPhrase); - pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); + pList = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->iDocid, p->iCol); nTerm = pExpr->pPhrase->nToken; if( pList ){ fts3GetDeltaPosition(&pList, &iPos); @@ -1478,6 +1479,7 @@ void sqlite3Fts3Offsets( goto offsets_out; } sCtx.iDocid = pCsr->iPrevId; + sCtx.pCsr = pCsr; /* Loop through the table columns, appending offset information to ** string-buffer res for each column. diff --git a/ext/fts3/fts3_term.c b/ext/fts3/fts3_term.c index ce581633e1..e207ff870d 100644 --- a/ext/fts3/fts3_term.c +++ b/ext/fts3/fts3_term.c @@ -10,6 +10,9 @@ ** ****************************************************************************** ** +** This file is not part of the production FTS code. It is only used for +** testing. It contains a virtual table implementation that provides direct +** access to the full-text index of an FTS table. */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) @@ -134,7 +137,18 @@ static int fts3termBestIndexMethod( sqlite3_index_info *pInfo ){ UNUSED_PARAMETER(pVTab); - UNUSED_PARAMETER(pInfo); + + /* This vtab naturally does "ORDER BY term, docid, col, pos". */ + if( pInfo->nOrderBy ){ + int i; + for(i=0; inOrderBy; i++){ + if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break; + } + if( i==pInfo->nOrderBy ){ + pInfo->orderByConsumed = 1; + } + } + return SQLITE_OK; } diff --git a/manifest b/manifest index bd7a00eb95..fde6974ea3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Have\sr-tree\svirtual\stables\ssupport\son-conflict\sclauses. -D 2011-04-28T18:46:46.861 +C Optimize\s"ORDER\sBY\srowid/docid\sDESC/ASC"\sclauses\son\sFTS\stables. +D 2011-05-04T12:52:59.896 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7a4d9524721d40ef9ee26f93f9bd6a51dba106f2 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -61,17 +61,17 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c ce37973c86f15711a020fa629d8f95cfd642ebc3 +F ext/fts3/fts3.c 47e4f4da599e0ccd7b7fea08aaf2c77544e278e3 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 945926ea4b6a686c3e9834640a252d9870b7191e +F ext/fts3/fts3Int.h 8c2ac39ee17362571c58ab2c4f0667324c31f738 F ext/fts3/fts3_aux.c 9e931f55eed8498dafe7bc1160f10cbb1a652fdf F ext/fts3/fts3_expr.c 5f49e0deaf723724b08100bb3ff40aab02ad0c93 F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c d61cfd81fb0fd8fbcb25adcaee0ba671aefaa5c2 -F ext/fts3/fts3_snippet.c e857c6a89d81d3b89df59f3b44b35c68d8ed5c62 -F ext/fts3/fts3_term.c c1dbc904ab1c2d687b97643c671795456228ab22 +F ext/fts3/fts3_snippet.c a4a3c7d2ab15ca9188e2d9b51a5e3927bf76580d +F ext/fts3/fts3_term.c f115f5a5f4298303d3b22fc6c524b8d565c7b950 F ext/fts3/fts3_tokenizer.c 055f3dc7369585350b28db1ee0f3b214dca6724d F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d @@ -475,6 +475,7 @@ F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2 F test/fts3rnd.test 2b1a579be557ab8ac54a51b39caa4aa8043cc4ad F test/fts3shared.test 8bb266521d7c5495c0ae522bb4d376ad5387d4a2 F test/fts3snippet.test a12f22a3ba4dd59751a57c79b031d07ab5f51ddd +F test/fts3sort.test b33d4650e8d4bff2dc00d14359a29cd1c25769f8 F test/fts4aa.test eadf85621c0a113d4c7ad3ccbf8441130e007b8f F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -932,7 +933,7 @@ F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P abdd70ae0424ccadb7edaf16e970c78b5257d23c -R c8f4a330c2633adfb2862fa8f9efbab2 +P 822ab52f1023b1c4973c806cc75454acd4e95fd0 +R bb599d547c2452f152daafa893566415 U dan -Z e17a424fc4dea753d2cbffe0969a3e63 +Z 8f8ca68f3fd8a51bab340e5fb86e9e14 diff --git a/manifest.uuid b/manifest.uuid index e2827a6e32..12102a8f27 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -822ab52f1023b1c4973c806cc75454acd4e95fd0 \ No newline at end of file +13395121e3d17ab6581dc5f6736ea324321a374c \ No newline at end of file diff --git a/test/fts3sort.test b/test/fts3sort.test new file mode 100644 index 0000000000..3f833a65cb --- /dev/null +++ b/test/fts3sort.test @@ -0,0 +1,107 @@ +# 2011 May 04 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +set testprefix fts3sort + +proc build_database {nRow} { + db close + forcedelete test.db + sqlite3 db test.db + + set vocab [list aa ab ac ba bb bc ca cb cc da] + expr srand(0) + + execsql { CREATE VIRTUAL TABLE t1 USING fts4 } + for {set i 0} {$i < $nRow} {incr i} { + set v [expr int(rand()*1000000)] + set doc [list] + for {set div 1} {$div < 1000000} {set div [expr $div*10]} { + lappend doc [lindex $vocab [expr ($v/$div) % 10]] + } + execsql { INSERT INTO t1 VALUES($doc) } + } +} + +set nRow 1000 +do_test 1.0 { + build_database $nRow + execsql { SELECT count(*) FROM t1 } +} $nRow + +foreach {tn query} { + 1 "SELECT docid, * FROM t1" + 2 "SELECT docid, * FROM t1 WHERE t1 MATCH 'aa'" + 3 "SELECT docid, * FROM t1 WHERE t1 MATCH 'a*'" + 4 "SELECT docid, quote(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'a*'" + 5 "SELECT docid, quote(matchinfo(t1,'pcnxals')) FROM t1 WHERE t1 MATCH 'b*'" + 6 "SELECT docid, * FROM t1 WHERE t1 MATCH 'a* b* c*'" + 7 "SELECT docid, * FROM t1 WHERE t1 MATCH 'aa OR da'" + 8 "SELECT docid, * FROM t1 WHERE t1 MATCH 'nosuchtoken'" + 9 "SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'aa OR da'" +} { + + unset -nocomplain A B C D + set A_list [list] + set B_list [list] + set C_list [list] + set D_list [list] + + unset -nocomplain X + db eval "$query ORDER BY rowid ASC" X { + set A($X(docid)) [array get X] + lappend A_list $X(docid) + } + unset -nocomplain X + db eval "$query ORDER BY rowid DESC" X { + set B($X(docid)) [array get X] + lappend B_list $X(docid) + } + unset -nocomplain X + db eval "$query ORDER BY docid ASC" X { + set C($X(docid)) [array get X] + lappend C_list $X(docid) + } + unset -nocomplain X + db eval "$query ORDER BY docid DESC" X { + set D($X(docid)) [array get X] + lappend D_list $X(docid) + } + + do_test 1.$tn.1 { set A_list } [lsort -integer -increasing $A_list] + do_test 1.$tn.2 { set B_list } [lsort -integer -decreasing $B_list] + do_test 1.$tn.3 { set C_list } [lsort -integer -increasing $C_list] + do_test 1.$tn.4 { set D_list } [lsort -integer -decreasing $D_list] + + unset -nocomplain DATA + unset -nocomplain X + db eval "$query" X { + set DATA($X(docid)) [array get X] + } + + do_test 1.$tn.5 { lsort [array get A] } [lsort [array get DATA]] + do_test 1.$tn.6 { lsort [array get B] } [lsort [array get DATA]] + do_test 1.$tn.7 { lsort [array get C] } [lsort [array get DATA]] + do_test 1.$tn.8 { lsort [array get D] } [lsort [array get DATA]] +} + +finish_test