-C Revise\sthe\sstrategy\sused\sby\sthe\sstar-query\sheuristic:\s\sInstead\sof\sdecreasing\nthe\scost\sof\sall\sfact-table\sWhereLoops,\sincrease\sthe\srun-cost\sof\sWhereLoops\sthat\nare\sSCANs\sof\sdimension\stables.
-D 2025-01-25T23:04:05.646
+C Small\ssize\sand\scomplexity\sreduction\son\sthe\sstar-query\sheuristic.\s\sImproved\ncomments\sfor\sthe\sstar-query\sheuristic.
+D 2025-01-26T17:29:33.018
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d
F src/sqlite.h.in 135c4479d03d5dcff7f580c1997f351d94954cf37346db560948c7add6159d9a
F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54
-F src/sqliteInt.h e28d3ad85c671effec0892f156e1e0f949733fb0995335406b5ec3803faba3a4
+F src/sqliteInt.h 5c859fc5b1b9fa489bc7a39d37606472b70f808f965eab11c24f14d8d7654330
F src/sqliteLimit.h 1bbdbf72bd0411d003267ffebc59a262f061df5653027a75627d03f48ca30523
F src/status.c cb11f8589a6912af2da3bb1ec509a94dd8ef27df4d4c1a97e0bcf2309ece972b
F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1
F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997
F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452
F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014
-F src/where.c 3917a56c85951be386057ee4e9af8a6746427f6f26e8ffce293a61e334b76498
-F src/whereInt.h 510b12f85a763602a1b1e70a9cc643a73082a63cda2ca204dd912daaa540115d
+F src/where.c adda3afc98a4f58713973997c699be2a8515c591701092e8380695f1a0a7b220
+F src/whereInt.h d20cddddb1d61b18d5cb1fcfa9b77fbeebbc4afe44d996e603452a23b3009ee1
F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab
F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f
F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385
F test/sqldiff1.test 1b7ab4f312442c5cc6b3a5f299fa8ca051416d1dd173cb1126fd51bf64f2c3fb
F test/sqllimits1.test 408131e4975d61868711c83f101a56d4602313cc5cae88d3eee81c1da364fd89
F test/sqllog.test 6af6cb0b09f4e44e1917e06ce85be7670302517a
-F test/starschema1.test a84205f97fe278a015ac39546c86b97228d22043af28f3a2ef809e8d5637ce1d
+F test/starschema1.test f5388cd32527ab18d3f98f9e3402ec780f6a186e04e0d9c8531d7568ee734e11
F test/startup.c 1beb5ca66fcc0fce95c3444db9d1674f90fc605499a574ae2434dcfc10d22805
F test/stat.test 123212a20ceb496893d5254a5f6c76442ce549fdc08d1702d8288a2bbaac8408
F test/statfault.test 064f43379e4992b5221b7d9ac887c313b3191f85cce605d78e416fc4045da64e
F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7
F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P b3ebeb0682a2c837987acf4ed92f06cf91aea235830c5a0f9dd1ce64afe16e84
-R a3d46d3d0d804ef5b7f52b211e4f3855
+P 1bc09c9e8bd77ac41ecbe510c7e003757fc11d0f586da6cdf3584315aa9d6407
+R c0f75689e724495cafe203db572d7b59
U drh
-Z 01bfb692a509c7bcf6c556a6b5d760a5
+Z 5884f43bd01d477b04985ecdd4572f42
# Remove this line to create a well-formed Fossil manifest.
** For the purposes of this heuristic, a star-query is defined as a query
** with a large central table that is joined using an INNER JOIN,
** not CROSS or OUTER JOINs, against four or more smaller tables.
-* The central table is called the "fact" table. The smaller tables
+** The central table is called the "fact" table. The smaller tables
** that get joined are "dimension tables". Also, any table that is
** self-joined cannot be a dimension table; we assume that dimension
** tables may only be joined against fact tables.
**
** If pWInfo describes a star-query, then the cost for SCANs of dimension
** WhereLoops is increased to be slightly larger than the cost of a SCAN
-** in the fact table. This heuristic helps keep fact tables in
-** outer loops. Without this heuristic, paths with fact tables in outer
-** loops tend to get pruned by the mxChoice limit on the number of paths,
-** resulting in poor query plans. The cost adjustment for each WhereLoop
-** is stored in its rStarDelta field.
+** in the fact table. Only SCAN costs are increased. SEARCH costs are
+** unchanged. This heuristic helps keep fact tables in outer loops. Without
+** this heuristic, paths with fact tables in outer loops tend to get pruned
+** by the mxChoice limit on the number of paths, resulting in poor query
+** plans. See the starschema1.test test module for examples of queries
+** that need this heuristic to find good query plans.
**
** This heuristic can be completely disabled, so that no query is
** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to
** disable the SQLITE_StarQuery optimization. In the CLI, the command
** to do that is: ".testctrl opt -starquery".
+**
+** HISTORICAL NOTES:
+**
+** This optimization was first added on 2024-05-09 by check-in 38db9b5c83d.
+** The original optimization reduced the cost and output size estimate for
+** fact tables to help them move to outer loops. But months later (as people
+** started upgrading) performance regression reports started caming in,
+** including:
+**
+** forum post b18ef983e68d06d1 (2024-12-21)
+** forum post 0025389d0860af82 (2025-01-14)
+** forum post d87570a145599033 (2025-01-17)
+**
+** To address these, the criteria for a star-query was tightened to exclude
+** cases where the fact and dimensions are separated by an outer join, and
+** the affect of star-schema detection was changed to increase the rRun cost
+** on just full table scans of dimension tables, rather than reducing costs
+** in the all access methods of the fact table.
*/
static int computeMxChoice(WhereInfo *pWInfo){
int nLoop = pWInfo->nLevel; /* Number of terms in the join */
#ifdef SQLITE_DEBUG
/* The star-query detection code below makes use of the following
- ** properties of the WhereLoop list, so verifying them before
+ ** properties of the WhereLoop list, so verify them before
** continuing:
** (1) .maskSelf is the bitmask corresponding to .iTab
** (2) The WhereLoop list is in ascending .iTab order
pWInfo->bStarDone = 1; /* Only do this computation once */
- /* Check to see if we are dealing with a star schema and if so, reduce
- ** the cost of fact tables relative to dimension tables, as a heuristic
- ** to help keep the fact tables in outer loops.
+ /* Check to see if we are dealing with a star schema and if so, adjust
+ ** SCAN cost of dimensino tables so that they are as large as the SCAN
+ ** cost of the fact table. This is a heuristic that helps keep the
+ ** fact tables in outer loops.
*/
assert( !pWInfo->bStarUsed );
aFromTabs = pWInfo->pTabList->a;
}
}
if( nDep<=3 ) continue;
-
- /* Compute the maximum cost of any WhereLoop for the iFromIdx-th term */
- mxRun = -32767;
- for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
- if( pWLoop->iTab<iFromIdx ) continue;
- if( pWLoop->iTab>iFromIdx ) break;
- if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun;
- }
-
+ /* If we reach this point, it means that pFactTab is a fact table */
+
+#ifdef WHERETRACE_ENABLED
/* Make sure rStarDelta values are initialized */
if( !pWInfo->bStarUsed ){
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
pWLoop->rStarDelta = 0;
}
- pWInfo->bStarUsed = 1;
}
+#endif
+ pWInfo->bStarUsed = 1;
+
+ /* Compute one more than the maximum cost of any WhereLoop for the
+ ** fact table */
+ mxRun = LOGEST_MIN;
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( pWLoop->iTab<iFromIdx ) continue;
+ if( pWLoop->iTab>iFromIdx ) break;
+ if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun;
+ }
+ if( ALWAYS(mxRun<LOGEST_MAX) ) mxRun++;
/* Increase the cost of table scans for dimension tables to be slightly
** more than the maximum cost of fact table */
for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
if( (pWLoop->maskSelf & mSeen)==0 ) continue;
if( pWLoop->nLTerm ) continue;
- if( pWLoop->rRun<=mxRun ){
+ if( pWLoop->rRun<mxRun ){
#ifdef WHERETRACE_ENABLED /* 0x80000 */
if( sqlite3WhereTrace & 0x80000 ){
SrcItem *pDim = aFromTabs + pWLoop->iTab;
"Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n",
pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab,
pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName,
- iFromIdx, mxRun+1
+ iFromIdx, mxRun
);
}
+ pWLoop->rStarDelta = mxRun - pWLoop->rRun;
#endif /* WHERETRACE_ENABLED */
- pWLoop->rStarDelta = mxRun+1 - pWLoop->rRun;
- pWLoop->rRun = mxRun+1;
+ pWLoop->rRun = mxRun;
}
}
}
/**** whereLoopXfer() copies fields above ***********************/
# define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot)
u16 nLSlot; /* Number of slots allocated for aLTerm[] */
+#ifdef WHERETRACE_ENABLED
LogEst rStarDelta; /* Cost delta due to star-schema heuristic. Not
** initialized unless pWInfo->bStarUsed */
+#endif
WhereTerm **aLTerm; /* WhereTerms used */
WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */
WhereTerm *aLTermSpace[3]; /* Initial aLTerm[] space */
unsigned bDeferredSeek :1; /* Uses OP_DeferredSeek */
unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */
unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */
- unsigned sorted :1; /* True if really sorted (not just grouped) */
+ unsigned sorted :1; /* True if really sorted (not just grouped) */
unsigned bStarDone :1; /* True if check for star-query is complete */
- unsigned bStarUsed :1; /* True if cost adjustments for star-query */
+ unsigned bStarUsed :1; /* True if star-query heuristic is used */
LogEst nRowOut; /* Estimated number of output rows */
#ifdef WHERETRACE_ENABLED
LogEst rTotalCost; /* Total cost of the solution */