From: dan Date: Thu, 19 Mar 2026 17:56:10 +0000 (+0000) Subject: Update fts5 scan costs to take into account that a prefix query combined with a rowid... X-Git-Tag: major-release~69 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0cb7603d4ce8bd89b03eb542051e6e70fbfd788a;p=thirdparty%2Fsqlite.git Update fts5 scan costs to take into account that a prefix query combined with a rowid equality constraint is still quite expensive. FossilOrigin-Name: 539769b63e4044018be2993894b1962df6a6ae58636682b7fb7a2dd5aaf4075c --- diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index cf033ab5de..2e3b5b3af5 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -517,7 +517,7 @@ static void fts5SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ if( sqlite3_libversion_number()>=3008002 ) #endif { - pIdxInfo->estimatedRows = nRow; + pIdxInfo->estimatedRows = MAX(1, nRow); } #endif } @@ -586,19 +586,30 @@ static int fts5UsePatternMatch( ** a) If a MATCH operator is present, the cost depends on the other ** constraints also present. As follows: ** -** * No other constraints: cost=1000.0 -** * One rowid range constraint: cost=750.0 -** * Both rowid range constraints: cost=500.0 -** * An == rowid constraint: cost=100.0 +** * No other constraints: cost=50000.0 +** * One rowid range constraint: cost=37500.0 +** * Both rowid range constraints: cost=30000.0 +** * An == rowid constraint: cost=25000.0 ** ** b) Otherwise, if there is no MATCH: ** -** * No other constraints: cost=1000000.0 -** * One rowid range constraint: cost=750000.0 -** * Both rowid range constraints: cost=250000.0 -** * An == rowid constraint: cost=10.0 +** * No other constraints: cost=3000000.0 +** * One rowid range constraints: cost=2250000.0 +** * Both rowid range constraint: cost=750000.0 +** * An == rowid constraint: cost=25.0 ** ** Costs are not modified by the ORDER BY clause. +** +** The ratios used in case (a) are based on informal results obtained from +** the tool/fts5cost.tcl script. The "MATCH and ==" combination has the +** cost set quite high because the query may be a prefix query. Unless +** there is a prefix index, prefix queries with rowid constraints are much +** more expensive than non-prefix queries with rowid constraints. +** +** The estimated rows returned is set to the cost/40. For simple queries, +** experimental results show that cost/4 might be about right. But for +** more complex queries that use multiple terms the number of rows might +** be far fewer than this. So we compromise and use cost/40. */ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts5Table *pTab = (Fts5Table*)pVTab; @@ -724,21 +735,35 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ /* Calculate the estimated cost based on the flags set in idxFlags. */ if( bSeenEq ){ - pInfo->estimatedCost = nSeenMatch ? 1000.0 : 25.0; - fts5SetUniqueFlag(pInfo); + pInfo->estimatedCost = nSeenMatch ? 25000.0 : 25.0; fts5SetEstimatedRows(pInfo, 1); + fts5SetUniqueFlag(pInfo); }else{ - if( bSeenLt && bSeenGt ){ - pInfo->estimatedCost = nSeenMatch ? 5000.0 : 750000.0; - }else if( bSeenLt || bSeenGt ){ - pInfo->estimatedCost = nSeenMatch ? 7500.0 : 2250000.0; + i64 nEstRows; + if( nSeenMatch ){ + if( bSeenLt && bSeenGt ){ + pInfo->estimatedCost = 50000.0; + }else if( bSeenLt || bSeenGt ){ + pInfo->estimatedCost = 37500.0; + }else{ + pInfo->estimatedCost = 50000.0; + } + nEstRows = (i64)(pInfo->estimatedCost / 40.0); + for(i=1; iestimatedCost *= 2.5; + nEstRows = nEstRows / 2; + } }else{ - pInfo->estimatedCost = nSeenMatch ? 10000.0 : 3000000.0; - } - for(i=1; iestimatedCost *= 0.4; + if( bSeenLt && bSeenGt ){ + pInfo->estimatedCost = 750000.0; + }else if( bSeenLt || bSeenGt ){ + pInfo->estimatedCost = 2250000.0; + }else{ + pInfo->estimatedCost = 3000000.0; + } + nEstRows = (i64)(pInfo->estimatedCost / 4.0); } - fts5SetEstimatedRows(pInfo, (i64)(pInfo->estimatedCost / 4.0)); + fts5SetEstimatedRows(pInfo, nEstRows); } pInfo->idxNum = idxFlags; diff --git a/ext/fts5/test/fts5join.test b/ext/fts5/test/fts5join.test index e4d3b69b79..2b9945a6f1 100644 --- a/ext/fts5/test/fts5join.test +++ b/ext/fts5/test/fts5join.test @@ -65,5 +65,14 @@ do_eqp_test 1.4 { `--SCAN vt VIRTUAL TABLE INDEX 0:= } +do_eqp_test 1.5 { + SELECT * FROM vt, t1 + WHERE vt.rowid = t1.rowid AND vt MATCH ? AND b = ? +} { + QUERY PLAN + |--SCAN vt VIRTUAL TABLE INDEX 0:M1 + `--SEARCH t1 USING INTEGER PRIMARY KEY (rowid=?) +} + finish_test diff --git a/ext/fts5/tool/fts5cost.tcl b/ext/fts5/tool/fts5cost.tcl new file mode 100644 index 0000000000..4f53d29eb6 --- /dev/null +++ b/ext/fts5/tool/fts5cost.tcl @@ -0,0 +1,153 @@ +# +# 2026 March 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#-------------------------------------------------------------------------- +# +# This script extracts the documentation for the API used by fts5 auxiliary +# functions from header file fts5.h. It outputs html text on stdout that +# is included in the documentation on the web. +# + + +sqlite3 db fts5cost.db + +# Create an IPK table with 1,000,000 entries. Short records. +# +set res [list [catch { db eval {SELECT count(*) FROM t1} } msg] $msg] +if {$res!="0 1000000"} { + db eval { + PRAGMA mmap_size = 1000000000; -- 1GB + DROP TABLE IF EXISTS t1; + CREATE TABLE t1(a INTEGER PRIMARY KEY, b TEXT); + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1_000_000 + ) + INSERT INTO t1 SELECT i, hex(randomblob(8)) FROM s; + } +} + +# Create an FTS5 table with 1,000,000 entries. Each row contains a single +# column containing a document of 100 terms chosen pseudo-randomly from +# a vocabularly of 2000. +set res [list [catch { db eval {SELECT count(*) FROM f1} } msg] $msg] +if {$res!="0 1000000"} { + set nVocab 2000 + set nTerm 100 + db eval { + BEGIN; + DROP TABLE IF EXISTS vocab1; + CREATE TABLE vocab1(w); + } + for {set ii 0} {$ii<$nVocab} {incr ii} { + set word [format %06x [expr {int(abs(rand()) * 0xFFFFFF)}]] + db eval { INSERT INTO vocab1 VALUES($word) } + lappend lVocab $word + } + db func doc doc + proc doc {} { + for {set ii 0} {$ii<$::nTerm} {incr ii} { + lappend ret [lindex $::lVocab [expr int(abs(rand())*$::nVocab)]] + } + set ret + } + db eval { + DROP TABLE IF EXISTS f1; + CREATE VIRTUAL TABLE f1 USING fts5(x); + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1_000_000 + ) + INSERT INTO f1(rowid, x) SELECT i, doc() FROM s; + COMMIT; + } +} else { + set lVocab [db eval { SELECT * FROM vocab1 }] + set nVocab [llength $lVocab] +} + +proc rowid_query {n} { + set rowid 654 + for {set ii 0} {$ii<$n} {incr ii} { + db eval { SELECT b FROM t1 WHERE a = $rowid } + set rowid [expr {($rowid + 7717) % 1000000}] + } +} + +proc rowid_query_fts {n} { + set rowid 654 + for {set ii 0} {$ii<$n} {incr ii} { + db eval { SELECT * FROM f1 WHERE rowid = $rowid } + set rowid [expr {($rowid + 7717) % 1000000}] + } +} + +proc match_query_fts {n} { + set idx 654 + for {set ii 0} {$ii<$n} {incr ii} { + set match [lrange $::lVocab $idx $idx+1] + db eval { SELECT * FROM f1($match) } + set idx [expr {($idx + 7717) % $::nVocab}] + } +} + +proc prefix_query_fts {n} { + set idx 654 + for {set ii 0} {$ii<$n} {incr ii} { + set match "[lindex $::lVocab $idx]*" + db eval { SELECT * FROM f1($match) } + set idx [expr {($idx + 7717) % $::nVocab}] + } +} + +proc match_rowid_query_fts {n} { + set idx 654 + for {set ii 0} {$ii<$n} {incr ii} { + set match "[lindex $::lVocab $idx]" + db eval { SELECT * FROM f1($match) WHERE rowid=500000 } + set idx [expr {($idx + 7717) % $::nVocab}] + } +} + +proc prefix_rowid_query_fts {n} { + set idx 654 + for {set ii 0} {$ii<$n} {incr ii} { + set match "[lindex $::lVocab $idx]*" + db eval { SELECT * FROM f1($match) WHERE rowid=500000 } + set idx [expr {($idx + 7717) % $::nVocab}] + } +} + + +proc mytime {cmd div} { + set tm [time $cmd] + expr {[lindex $tm 0] / $div} +} + +#set us [mytime { match_rowid_query_fts 1000 } 1000] +#puts "1000 match/rowid queries on fts5 table: ${us} per query" + +set us [mytime { prefix_rowid_query_fts 1000 } 1000] +puts "1000 prefix/rowid queries on fts5 table: ${us} per query" + +set us [mytime { match_query_fts 10 } 10] +puts "10 match queries on fts5 table: ${us} per query" + +set us [mytime { prefix_query_fts 10 } 10] +puts "10 prefix queries on fts5 table: ${us} per query" + +set us [mytime { prefix_rowid_query_fts 1000 } 1000] +puts "1000 prefix/rowid queries on fts5 table: ${us} per query" + +set us [mytime { rowid_query 10000 } 10000] +puts "10000 by-rowid queries on normal table: ${us} per query" + +set us [mytime { rowid_query_fts 10000 } 10000] +puts "10000 by-rowid queries on fts5 table: ${us} per query" + + diff --git a/manifest b/manifest index ff7b5101a3..2fe275dae0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Restructure\ssome\sinternal\ssubroutine\ssignatures\sfor\simproved\nperformance\sof\saffinity\stype\sconversions. -D 2026-03-19T17:08:08.454 +C Update\sfts5\sscan\scosts\sto\stake\sinto\saccount\sthat\sa\sprefix\squery\scombined\swith\sa\srowid\sequality\sconstraint\sis\sstill\squite\sexpensive. +D 2026-03-19T17:56:10.586 F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea @@ -114,7 +114,7 @@ F ext/fts5/fts5_config.c bfba970fe1e4eed18ee57c8d51458e226db9a960ddf775c5e50e3d7 F ext/fts5/fts5_expr.c 71d48e8cf0358deace4949276647d317ff7665db6db09f40b81e2e7fe6664c7c F ext/fts5/fts5_hash.c d5871df92ce3fa210a650cf419ee916b87c29977e86084d06612edf772bff6f5 F ext/fts5/fts5_index.c f8cfa37bb7397e5ede20242e4c9cb030bc8b4584ce3f23a5e2495038c0ae64bd -F ext/fts5/fts5_main.c 6889f1373c469d515e792fb3d783c2218e63c560433ebd66edc0f740ab086c1b +F ext/fts5/fts5_main.c b0fed47b3b4420ba6810373480a75bc28a9c0b7d16478d19a396436fb3ff17d7 F ext/fts5/fts5_storage.c 19bc7c4cbe1e6a2dd9849ef7d84b5ca1fcbf194cefc3e386b901e00e08bf05c2 F ext/fts5/fts5_tcl.c 2be6cc14f9448f720fd4418339cd202961a0801ea9424cb3d9de946f8f5a051c F ext/fts5/fts5_test_mi.c 4308d5658cb1f5eee5998dcbaac7d5bdf7a2ef43c8192ca6e0c843f856ccee26 @@ -202,7 +202,7 @@ F ext/fts5/test/fts5hash.test fd3e0367fbf0b0944d6936fdb22696350f57b9871069c67662 F ext/fts5/test/fts5integrity.test 613efcebe16b2d7a4096f03bcfb164f79a000b3354420ceda4a6f3e035090789 F ext/fts5/test/fts5integrity2.test 4c3636615c0201232c44a8105d5cb14fd5499fd0ee3014d7ffd7e83aac76ece8 F ext/fts5/test/fts5interrupt.test af7834ac6c2e71c05aea42d92f272eef3655e89b7a14a5620a2cd9de35e2e8ea -F ext/fts5/test/fts5join.test 48b7ed36956948c5b8456c8bcaa5b087808d99000675918a43c4f51a925f1514 +F ext/fts5/test/fts5join.test 3791e30d034050281191cd8141d969849f6c060135f1e3938fea3eea955f9da9 F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1 F ext/fts5/test/fts5leftjoin.test 1c14b51f4d1344a89e488160882f05a2246dd7e70c5cf077c8fb473e03c66338 F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c @@ -271,6 +271,7 @@ F ext/fts5/test/fts5update2.test c5baa76799ac605ebb8e5e21035db2014b396cef25c903e F ext/fts5/test/fts5version.test 44ab35566267b7618c090443de2d9ad84f633df5d20bf72e9bad199ae5fced84 F ext/fts5/test/fts5vocab.test 2a2bdb60d0998fa3124d541b6d30b019504918dc43a6584645b63a24be72f992 F ext/fts5/test/fts5vocab2.test 4265137a3747b27deb1e2e2bde5654120c6de72bfed3238e67806d85af60fc4c +F ext/fts5/tool/fts5cost.tcl 188a802e69422619c526698b92f0e5935f7d00b964e155bf4d5b4d4094989f60 F ext/fts5/tool/fts5speed.tcl b0056f91a55b2d1a3684ec05729de92b042e2f85 F ext/fts5/tool/fts5txt2db.tcl c0d43c8590656f8240e622b00957b3a0facc49482411a9fdc2870b45c0c82f9f F ext/fts5/tool/loadfts5.tcl 95b03429ee6b138645703c6ca192c3ac96eaf093 @@ -2194,8 +2195,8 @@ F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee F tool/warnings.sh a554d13f6e5cf3760f041b87939e3d616ec6961859c3245e8ef701d1eafc2ca2 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f F tool/winmain.c 00c8fb88e365c9017db14c73d3c78af62194d9644feaf60e220ab0f411f3604c -P 3c64f31392328f51e205fde52f19cf70c193e90897ccb7e93c685bad4f342cac -R 72f078867d2671a9f24c867ea87f8152 -U drh -Z b5c58da09518d5fa628ac92460368943 +P 2bc6ce87b39c6ae324c3641c5af60e000df7d7c6d5fd2dd1a318c94cbb4aef84 +R 6c35fdaa0bdeaaa060b11671c8849a23 +U dan +Z f5066f6b4172e70d6c3c82bea527816b # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index e862e1a8c5..4c8d07eff5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2bc6ce87b39c6ae324c3641c5af60e000df7d7c6d5fd2dd1a318c94cbb4aef84 +539769b63e4044018be2993894b1962df6a6ae58636682b7fb7a2dd5aaf4075c