From: dan Date: Tue, 28 Jun 2011 11:58:09 +0000 (+0000) Subject: Add a fix and tests for the FTS deferred token logic. X-Git-Tag: version-3.7.8~38^2~35^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=da7b45615d2ddd4b528f18fdeabab1d263dba228;p=thirdparty%2Fsqlite.git Add a fix and tests for the FTS deferred token logic. FossilOrigin-Name: 91daea7d2ec41f014fb30c6371aae447cc07f287 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 16067aacc7..448a5a403a 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -3864,12 +3864,14 @@ static int fts3EvalSelectDeferred( ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int nDocSize = 0; /* Number of pages per doc loaded */ - int nDocEst = 0; /* Est. docs if all other tokens deferred */ int rc = SQLITE_OK; /* Return code */ int ii; /* Iterator variable for various purposes */ int nOvfl = 0; /* Total overflow pages used by doclists */ int nToken = 0; /* Total number of tokens in cluster */ + int nMinEst = 0; /* The minimum count for any phrase so far. */ + int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ + /* Count the tokens in this AND/NEAR cluster. If none of the doclists ** associated with the tokens spill onto overflow pages, or if there is ** only 1 token, exit early. No tokens to defer in this case. */ @@ -3885,6 +3887,29 @@ static int fts3EvalSelectDeferred( rc = fts3EvalAverageDocsize(pCsr, &nDocSize); assert( rc!=SQLITE_OK || nDocSize>0 ); + + /* Iterate through all tokens in this AND/NEAR cluster, in ascending order + ** of the number of overflow pages that will be loaded by the pager layer + ** to retrieve the entire doclist for the token from the full-text index. + ** Load the doclists for tokens that are either: + ** + ** a. The cheapest token in the entire query (i.e. the one visited by the + ** first iteration of this loop), or + ** + ** b. Part of a multi-token phrase. + ** + ** After each token doclist is loaded, merge it with the others from the + ** same phrase and count the number of documents that the merged doclist + ** contains. Set variable "nMinEst" to the smallest number of documents in + ** any phrase doclist for which 1 or more token doclists have been loaded. + ** Let nOther be the number of other phrases for which it is certain that + ** one or more tokens will not be deferred. + ** + ** Then, for each token, defer it if loading the doclist would result in + ** loading N or more overflow pages into memory, where N is computed as: + ** + ** (nMinEst + 4^nOther - 1) / (4^nOther) + */ for(ii=0; iinOvfl>=(nDocEst*nDocSize) ){ + if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ /* The number of overflow pages to load for this (and therefore all ** subsequent) tokens is greater than the estimated number of pages ** that will be loaded if all subsequent tokens are deferred. @@ -3915,26 +3933,26 @@ static int fts3EvalSelectDeferred( rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; - }else if( ii==0 || pTC->pPhrase->nToken>1 ){ - /* Either this is the cheapest token in the entire query, or it is - ** part of a multi-token phrase. Either way, the entire doclist will - ** (eventually) be loaded into memory. It may as well be now. */ - Fts3PhraseToken *pToken = pTC->pToken; - int nList = 0; - char *pList = 0; - rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); - assert( rc==SQLITE_OK || pList==0 ); - if( rc==SQLITE_OK ){ - fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList); - nDocEst = fts3DoclistCountDocids( - pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll - ); + }else{ + nLoad4 = nLoad4*4; + if( ii==0 || pTC->pPhrase->nToken>1 ){ + /* Either this is the cheapest token in the entire query, or it is + ** part of a multi-token phrase. Either way, the entire doclist will + ** (eventually) be loaded into memory. It may as well be now. */ + Fts3PhraseToken *pToken = pTC->pToken; + int nList = 0; + char *pList = 0; + rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); + assert( rc==SQLITE_OK || pList==0 ); + if( rc==SQLITE_OK ){ + int nCount; + fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList); + nCount = fts3DoclistCountDocids( + pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll + ); + if( ii==0 || nCountpToken = 0; } diff --git a/manifest b/manifest index 57c9073015..f8bc9380fb 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sfts3-changes\sbranch. -D 2011-06-28T09:51:22.803 +C Add\sa\sfix\sand\stests\sfor\sthe\sFTS\sdeferred\stoken\slogic. +D 2011-06-28T11:58:09.194 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -62,7 +62,7 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 70127bf57c54c45545a4ddd73161caf2d9bd1abe +F ext/fts3/fts3.c f45ad45053a587ad1c005459b704b7ade8bd504e F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h e670e2c58b3dca76e44f7e7d35e61113d079db58 F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691 @@ -456,7 +456,7 @@ F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8 F test/fts3an.test a49ccadc07a2f7d646ec1b81bc09da2d85a85b18 F test/fts3ao.test b83f99f70e9eec85f27d75801a974b3f820e01f9 F test/fts3atoken.test 402ef2f7c2fb4b3d4fa0587df6441c1447e799b3 -F test/fts3auto.test f1cb0a55130897013ca5850dbee2945c2908a45a +F test/fts3auto.test c1a30b37002b7c764a96937fbc71065b73d69494 F test/fts3aux1.test 0b02743955d56fc0d4d66236a26177bd1b726de0 F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984 F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958 @@ -949,7 +949,7 @@ F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00 F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262 -P 11ea98e454ad35b2606e81dec9ac866e4dd9f7e1 29e69f389cb0078e125ba5814a68b3fe8cf634c6 -R 99f54668f5b612c34e36e4280019aae3 +P 226686475c28cdc87e86d2d3f10014436a9a2b8f +R 26c661c69e309ad434e75bfd241a096d U dan -Z c4e55cbac13868c8cb421abab69674f2 +Z 1b1eba0343676ae88643e1d9f2f985a9 diff --git a/manifest.uuid b/manifest.uuid index 8ca57f2fa7..0e9f22faf1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -226686475c28cdc87e86d2d3f10014436a9a2b8f \ No newline at end of file +91daea7d2ec41f014fb30c6371aae447cc07f287 \ No newline at end of file diff --git a/test/fts3auto.test b/test/fts3auto.test index 484fd6ace2..1c58a17204 100644 --- a/test/fts3auto.test +++ b/test/fts3auto.test @@ -107,15 +107,17 @@ proc do_fts3query_test {tn args} { " $matchinfo_asc } -# fts3_make_deferrable TABLE TOKEN +# fts3_make_deferrable TABLE TOKEN ?NROW? # -proc fts3_make_deferrable {tbl token} { +proc fts3_make_deferrable {tbl token {nRow 0}} { set stmt [sqlite3_prepare db "SELECT * FROM $tbl" -1 dummy] set name [sqlite3_column_name $stmt 0] sqlite3_finalize $stmt - set nRow [db one "SELECT count(*) FROM $tbl"] + if {$nRow==0} { + set nRow [db one "SELECT count(*) FROM $tbl"] + } set pgsz [db one "PRAGMA page_size"] execsql BEGIN for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} { @@ -653,6 +655,50 @@ foreach {tn pending create} { catchsql { COMMIT } } +foreach {tn create} { + 1 "fts4(x)" + 2 "fts4(x, order=DESC)" +} { + execsql [subst { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING $create; + }] + + foreach {x} { + "F E N O T K X V A X I E X A P G Q V H U" + "R V A E T C V Q N I E L O N U G J K L U" + "U Y I G W M V F J L X I D C H F P J Q B" + "S G D Z X R P G S S Y B K A S G A I L L" + "L S I C H T Z S R Q P R N K J X L F M J" + "C C C D P X B Z C M A D A C X S B T X V" + "W Y J M D R G V R K B X S A W R I T N C" + "P K L W T M S P O Y Y V V O E H Q A I R" + "C D Y I C Z F H J C O Y A Q F L S B D K" + "P G S C Y C Y V I M B D S Z D D Y W I E" + "Z K Z U E E S F Y X T U A L W O U J C Q" + "P A T Z S W L P L Q V Y Y I P W U X S S" + "I U I H U O F Z F R H R F T N D X A G M" + "N A B M S H K X S O Y D T X S B R Y H Z" + "L U D A S K I L S V Z J P U B E B Y H M" + } { + execsql { INSERT INTO t1 VALUES($x) } + } + + # Add extra documents to the database such that token "B" will be considered + # deferrable if considering the other tokens means that 2 or fewer documents + # will be loaded into memory. + # + fts3_make_deferrable t1 B 2 + + # B is not deferred in either of the first two tests below, since filtering + # on "M" or "D" returns 10 documents or so. But filtering on "M * D" only + # returns 2, so B is deferred in this case. + # + do_fts3query_test 7.$tn.1 t1 {"M B"} + do_fts3query_test 7.$tn.2 t1 {"B D"} + do_fts3query_test 7.$tn.3 -deferred B t1 {"M B D"} +} + set sqlite_fts3_enable_parentheses $sfep finish_test