-C Fix\sa\sduplicate\sassert()\scaused\sby\sthe\ssecond\scherrypick\sin\sthe\sprevious\ncheck-in.
-D 2023-11-29T16:26:31.103
+C Add\sa\sheuristic\sin\sbetween\sthe\stwo\ssolver()\spasses\sof\sthe\squery\splanner\sthat\ntries\sto\sprevent\sa\svery\sslow\squery\splan\sin\scases\swhere\sthe\soutput\srow\scount\nestimate\sis\simprecise.
+D 2024-04-02T11:54:13.919
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F src/wal.c 9eccc7ebb532a7b0fd3cabc16cff576b9afa763472272db67d84fb8cec96f5c0
F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a
F src/walker.c 7607f1a68130c028255d8d56094ea602fc402c79e1e35a46e6282849d90d5fe4
-F src/where.c 9adf9d9e55a17d6e89d7286c85836738efcb081c8630d214156542b4f6ea6ae6
+F src/where.c 9c4a3b64e633941b5dc4e3020f3b53fd2430374dde4d32547c6402b57eeff7d6
F src/whereInt.h 064a1508edcc9af400a3b79211ba55e553fab6451b797a31334c0d11cbb7debb
F src/wherecode.c cf67460973119c7b2141ad67daf8368dfb4871f225e2489f95effaa139007bfd
F src/whereexpr.c ca55a11c2443700fe084a1e039660688d7733c594a37697ee4bd99462e2c2f6a
F test/whereJ.test 88287550f6ee604422403b053455b1ad894eeaa5c35d348532dfa1439286cb9a
F test/whereK.test f8e3cf26a8513ecc7f514f54df9f0572c046c42b
F test/whereL.test 0a19fc44cd1122040f56c934f1b14d0ca85bde28f270268a428dd9796ea0634c
+F test/whereN.test 63a3584b71acfb6963416de82f26c6b1644abc5ca6080c76546b9246734c8803
F test/wherefault.test 1374c3aa198388925246475f84ad4cd5f9528864
F test/wherelfault.test 9012e4ef5259058b771606616bd007af5d154e64cc25fa9fd4170f6411db44e3
F test/wherelimit.test 592081800806d297dd7449b1030c863d2883d6d42901837ccd2e5a9bd962edb0
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P f9c6e6a7102689b2a5cf32996a02853e346bcebaed6af0265a7422260fa3358c
-R b7dddf28b8f8cdcdb7b64db0061d6a57
+P f10d4fc4a8fe439e58f35b94322229394229c8f3e15b572c2928402df7d30d90
+Q +8018417b0143ea11535f2457bf3e4b3755717c554a17df1076425b4251b5f2c6
+R c34b6a95461d27a98a446f4c4db8e82b
U drh
-Z 1ce388907a6964ca9edb1d789f771e7a
+Z 60ce6d3f716eec6a9eb866abf26289ff
# Remove this line to create a well-formed Fossil manifest.
}
}
-
pWInfo->nRowOut = pFrom->nRow;
/* Free temporary memory and return success */
return SQLITE_OK;
}
+/*
+** This routine implements a heuristic designed to improve query planning.
+** This routine is called in between the first and second call to
+** wherePathSolver(). Hence the name "Interstage" "Heuristic".
+**
+** The first call to wherePathSolver() (hereafter just "solver()") computes
+** the best path without regard to the order of the outputs. The second call
+** to the solver() builds upon the first call to try to find an alternative
+** path that satisfies the ORDER BY clause.
+**
+** This routine looks at the results of the first solver() run, and for
+** every FROM clause term in the resulting query plan that uses an equality
+** constraint against an index, disable other WhereLoops for that same
+** FROM clause term that would try to do a full-table scan. This prevents
+** an index search from being converted into a full-table scan in order to
+** satisfy an ORDER BY clause, since even though we might get slightly better
+** performance using the full-scan without sorting if the output size
+** estimates are very precise, we might also get severe performance
+** degradation using the full-scan if the output size estimate is too large.
+** It is better to err on the side of caution.
+**
+** Except, if the first solver() call generated a full-table scan in an outer
+** loop then stop this analysis at the first full-scan, since the second
+** solver() run might try to swap that full-scan for another in order to
+** get the output into the correct order. In other words, we do *not* want
+** to inhibit a rewrites like this:
+**
+** First Solver() Second Solver()
+** |-- SCAN t1 |-- SCAN t2
+** |-- SEARCH t2 `-- SEARCH t1
+** `-- SORT USING B-TREE
+**
+** Rather, the purpose of this routine is to inhibit rewrites such as:
+**
+** First Solver() Second Solver()
+** |-- SEARCH t1 |-- SCAN t2 <--- bad!
+** |-- SEARCH t2 `-- SEARCH t1
+** `-- SORT USING B-TREE
+**
+** See test cases in test/whereN.test for the real-world query that
+** originally provoked this heuristic.
+*/
+static SQLITE_NOINLINE void whereInterstageHeuristic(WhereInfo *pWInfo){
+ int i;
+ for(i=0; i<pWInfo->nLevel; i++){
+ WhereLoop *p = pWInfo->a[i].pWLoop;
+ if( p==0 ) break;
+ if( (p->wsFlags & WHERE_VIRTUALTABLE)!=0 ) continue;
+ if( (p->wsFlags & (WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_IN))!=0 ){
+ u8 iTab = p->iTab;
+ WhereLoop *pLoop;
+ for(pLoop=pWInfo->pLoops; pLoop; pLoop=pLoop->pNextLoop){
+ if( pLoop->iTab!=iTab ) continue;
+ if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 ) continue;
+ pLoop->prereq = ALLBITS; /* Prevent 2nd solver() from using this one */
+ }
+ }else{
+ break;
+ }
+ }
+}
+
/*
** Most queries use only a single table (they are not joins) and have
** simple == constraints against indexed fields. This routine attempts
wherePathSolver(pWInfo, 0);
if( db->mallocFailed ) goto whereBeginError;
if( pWInfo->pOrderBy ){
+ whereInterstageHeuristic(pWInfo);
wherePathSolver(pWInfo, pWInfo->nRowOut+1);
if( db->mallocFailed ) goto whereBeginError;
}
--- /dev/null
+# 2024-04-02
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+# Tests for the whereInterstageHeuristic() routine in the query planner.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set testprefix whereN
+
+# The following is a simplified and "sanitized" version of the original
+# real-world query that brought the problem to light.
+#
+# The issue is a slow query. The answer is correct, but it was taking too
+# much time, because it was doing a full table scan rather than an indexed
+# lookup.
+#
+# The problem was that the query planner was overestimating the number of
+# output rows. The estimated number of output rows is accurate if the
+# DSNAME parameter is "ds-one". In that case, a large fraction of the rows
+# in "violation" end up being output. The query planner correctly deduces
+# that it is faster to do a full table scan of the large "violation" table
+# to avoid the after-query sort that implements the ORDER BY clause. However,
+# if the DSNAME is "ds-two", then only a few rows (about 6) are generated,
+# and it is much much faster to do an indexed lookup of "violation" followed
+# by a sort operation to implement ORDER BY
+#
+# The problem, of course, is that the query planner has no way of knowing
+# in advance how many rows will be generated. The query planner tries to
+# estimate a worst case, which is a large number of output rows, and it picks
+# the best plan for that case. However, the plan choosen is very inefficient
+# when the number of output rows is small.
+#
+# The whereInterstageHeuristic() routine in the query planner attempts to
+# correct this by adjusting the query plan such that it avoids the very bad
+# query plan for a small number of rows, at the expense of a slightly less
+# efficient plan for a large number of rows. The large number of rows case
+# is perhaps 5% slower with the revised plan, but the small number of
+# rows case is around 100 times faster. That seems like a good tradeoff.
+#
+do_execsql_test 1.0 {
+ CREATE TABLE datasource(dsid INT, name TEXT);
+ INSERT INTO datasource VALUES(1,'ds-one'),(2,'ds-two'),(3,'ds-three');
+ CREATE INDEX ds1 ON datasource(name, dsid);
+
+ CREATE TABLE rule(rid INT, team_id INT, dsid INT);
+ WITH RECURSIVE c(n) AS (VALUES(1) UNION ALL SELECT n+1 FROM c WHERE n<9)
+ INSERT INTO rule(rid,team_id,dsid) SELECT n, 1, 1 FROM c;
+ WITH RECURSIVE c(n) AS (VALUES(10) UNION ALL SELECT n+1 FROM c WHERE n<24)
+ INSERT INTO rule(rid,team_id,dsid) SELECT n, 2, 2 FROM c;
+ CREATE INDEX rule2 ON rule(dsid, rid);
+
+ CREATE TABLE violation(vid INT, rid INT, vx BLOB);
+ /*** Uncomment to insert actual data
+ WITH src(rid, cnt) AS (VALUES(1,3586),(2,1343),(3,6505),(5,76230),
+ (6,740),(7,287794),(8,457),(12,1),
+ (14,1),(16,1),(17,1),(18,1),(19,1))
+ INSERT INTO violation(vid, rid, vx)
+ SELECT rid*1000000+value, rid, randomblob(15)
+ FROM src, generate_series(1,cnt);
+ ***/
+ CREATE INDEX v1 ON violation(rid, vid);
+ CREATE INDEX v2 ON violation(vid);
+ ANALYZE;
+ DELETE FROM sqlite_stat1;
+ DROP TABLE IF EXISTS sqlite_stat4;
+ INSERT INTO sqlite_stat1 VALUES
+ ('violation','v2','376661 1'),
+ ('violation','v1','376661 28974 1'),
+ ('rule','rule2','24 12 1'),
+ ('datasource','ds1','3 1 1');
+ ANALYZE sqlite_schema;
+}
+set DSNAME ds-two ;# Only a few rows. Change to "ds-one" for many rows.
+do_eqp_test 1.1 {
+ SELECT count(*), length(group_concat(vx)) FROM (
+ SELECT V.*
+ FROM datasource DS, rule R, violation V
+ WHERE V.rid=R.rid
+ AND R.dsid=DS.dsid
+ AND DS.name=$DSNAME
+ ORDER BY V.vid desc
+ );
+} {
+ QUERY PLAN
+ |--CO-ROUTINE (subquery-xxxxxx)
+ | |--SEARCH DS USING COVERING INDEX ds1 (name=?)
+ | |--SEARCH R USING COVERING INDEX rule2 (dsid=?)
+ | |--SEARCH V USING INDEX v1 (rid=?)
+ | `--USE TEMP B-TREE FOR ORDER BY
+ `--SCAN (subquery-xxxxxx)
+}
+# ^^^^---- We want to see three SEARCH terms. No SCAN terms.
+# The ORDER BY is implemented by a separate sorter pass.
+
+finish_test