From: drh <> Date: Sun, 26 Jan 2025 17:29:33 +0000 (+0000) Subject: Small size and complexity reduction on the star-query heuristic. Improved X-Git-Tag: version-3.49.0~53 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fe54b7a1f442a4c5b1a23fe83821adbe04b782cf;p=thirdparty%2Fsqlite.git Small size and complexity reduction on the star-query heuristic. Improved comments for the star-query heuristic. FossilOrigin-Name: a7ecb2f4b7eee78b88f1b2e026dffed2007ca4ffeb152632624ab2582839b250 --- diff --git a/manifest b/manifest index c6113e84a4..e2dc85219e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Revise\sthe\sstrategy\sused\sby\sthe\sstar-query\sheuristic:\s\sInstead\sof\sdecreasing\nthe\scost\sof\sall\sfact-table\sWhereLoops,\sincrease\sthe\srun-cost\sof\sWhereLoops\sthat\nare\sSCANs\sof\sdimension\stables. -D 2025-01-25T23:04:05.646 +C Small\ssize\sand\scomplexity\sreduction\son\sthe\sstar-query\sheuristic.\s\sImproved\ncomments\sfor\sthe\sstar-query\sheuristic. +D 2025-01-26T17:29:33.018 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d @@ -784,7 +784,7 @@ F src/shell.c.in 9ade75afa953c5c2ded38d076533eaa6c9b2ab1977ef6cce0bc773adac178c5 F src/sqlite.h.in 135c4479d03d5dcff7f580c1997f351d94954cf37346db560948c7add6159d9a F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54 -F src/sqliteInt.h e28d3ad85c671effec0892f156e1e0f949733fb0995335406b5ec3803faba3a4 +F src/sqliteInt.h 5c859fc5b1b9fa489bc7a39d37606472b70f808f965eab11c24f14d8d7654330 F src/sqliteLimit.h 1bbdbf72bd0411d003267ffebc59a262f061df5653027a75627d03f48ca30523 F src/status.c cb11f8589a6912af2da3bb1ec509a94dd8ef27df4d4c1a97e0bcf2309ece972b F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 @@ -863,8 +863,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997 F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 -F src/where.c 3917a56c85951be386057ee4e9af8a6746427f6f26e8ffce293a61e334b76498 -F src/whereInt.h 510b12f85a763602a1b1e70a9cc643a73082a63cda2ca204dd912daaa540115d +F src/where.c adda3afc98a4f58713973997c699be2a8515c591701092e8380695f1a0a7b220 +F src/whereInt.h d20cddddb1d61b18d5cb1fcfa9b77fbeebbc4afe44d996e603452a23b3009ee1 F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385 @@ -1688,7 +1688,7 @@ F test/spellfix4.test 51c7c26514ade169855c66bcf130bd5acfb4d7fd090cc624645ab275ae F test/sqldiff1.test 1b7ab4f312442c5cc6b3a5f299fa8ca051416d1dd173cb1126fd51bf64f2c3fb F test/sqllimits1.test 408131e4975d61868711c83f101a56d4602313cc5cae88d3eee81c1da364fd89 F test/sqllog.test 6af6cb0b09f4e44e1917e06ce85be7670302517a -F test/starschema1.test a84205f97fe278a015ac39546c86b97228d22043af28f3a2ef809e8d5637ce1d +F test/starschema1.test f5388cd32527ab18d3f98f9e3402ec780f6a186e04e0d9c8531d7568ee734e11 F test/startup.c 1beb5ca66fcc0fce95c3444db9d1674f90fc605499a574ae2434dcfc10d22805 F test/stat.test 123212a20ceb496893d5254a5f6c76442ce549fdc08d1702d8288a2bbaac8408 F test/statfault.test 064f43379e4992b5221b7d9ac887c313b3191f85cce605d78e416fc4045da64e @@ -2208,8 +2208,8 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b3ebeb0682a2c837987acf4ed92f06cf91aea235830c5a0f9dd1ce64afe16e84 -R a3d46d3d0d804ef5b7f52b211e4f3855 +P 1bc09c9e8bd77ac41ecbe510c7e003757fc11d0f586da6cdf3584315aa9d6407 +R c0f75689e724495cafe203db572d7b59 U drh -Z 01bfb692a509c7bcf6c556a6b5d760a5 +Z 5884f43bd01d477b04985ecdd4572f42 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 29b013c9fc..22688c5390 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1bc09c9e8bd77ac41ecbe510c7e003757fc11d0f586da6cdf3584315aa9d6407 +a7ecb2f4b7eee78b88f1b2e026dffed2007ca4ffeb152632624ab2582839b250 diff --git a/src/sqliteInt.h b/src/sqliteInt.h index d29b2e3930..205f8f3e0e 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -881,6 +881,8 @@ typedef u64 tRowcnt; ** 0.5 -> -10 0.1 -> -33 0.0625 -> -40 */ typedef INT16_TYPE LogEst; +#define LOGEST_MIN (-32768) +#define LOGEST_MAX (32767) /* ** Set the SQLITE_PTRSIZE macro to the number of bytes in a pointer diff --git a/src/where.c b/src/where.c index 0db7cbc1e2..01f8ff0d9a 100644 --- a/src/where.c +++ b/src/where.c @@ -5451,7 +5451,7 @@ static LogEst whereSortingCost( ** For the purposes of this heuristic, a star-query is defined as a query ** with a large central table that is joined using an INNER JOIN, ** not CROSS or OUTER JOINs, against four or more smaller tables. -* The central table is called the "fact" table. The smaller tables +** The central table is called the "fact" table. The smaller tables ** that get joined are "dimension tables". Also, any table that is ** self-joined cannot be a dimension table; we assume that dimension ** tables may only be joined against fact tables. @@ -5460,16 +5460,35 @@ static LogEst whereSortingCost( ** ** If pWInfo describes a star-query, then the cost for SCANs of dimension ** WhereLoops is increased to be slightly larger than the cost of a SCAN -** in the fact table. This heuristic helps keep fact tables in -** outer loops. Without this heuristic, paths with fact tables in outer -** loops tend to get pruned by the mxChoice limit on the number of paths, -** resulting in poor query plans. The cost adjustment for each WhereLoop -** is stored in its rStarDelta field. +** in the fact table. Only SCAN costs are increased. SEARCH costs are +** unchanged. This heuristic helps keep fact tables in outer loops. Without +** this heuristic, paths with fact tables in outer loops tend to get pruned +** by the mxChoice limit on the number of paths, resulting in poor query +** plans. See the starschema1.test test module for examples of queries +** that need this heuristic to find good query plans. ** ** This heuristic can be completely disabled, so that no query is ** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to ** disable the SQLITE_StarQuery optimization. In the CLI, the command ** to do that is: ".testctrl opt -starquery". +** +** HISTORICAL NOTES: +** +** This optimization was first added on 2024-05-09 by check-in 38db9b5c83d. +** The original optimization reduced the cost and output size estimate for +** fact tables to help them move to outer loops. But months later (as people +** started upgrading) performance regression reports started caming in, +** including: +** +** forum post b18ef983e68d06d1 (2024-12-21) +** forum post 0025389d0860af82 (2025-01-14) +** forum post d87570a145599033 (2025-01-17) +** +** To address these, the criteria for a star-query was tightened to exclude +** cases where the fact and dimensions are separated by an outer join, and +** the affect of star-schema detection was changed to increase the rRun cost +** on just full table scans of dimension tables, rather than reducing costs +** in the all access methods of the fact table. */ static int computeMxChoice(WhereInfo *pWInfo){ int nLoop = pWInfo->nLevel; /* Number of terms in the join */ @@ -5477,7 +5496,7 @@ static int computeMxChoice(WhereInfo *pWInfo){ #ifdef SQLITE_DEBUG /* The star-query detection code below makes use of the following - ** properties of the WhereLoop list, so verifying them before + ** properties of the WhereLoop list, so verify them before ** continuing: ** (1) .maskSelf is the bitmask corresponding to .iTab ** (2) The WhereLoop list is in ascending .iTab order @@ -5500,9 +5519,10 @@ static int computeMxChoice(WhereInfo *pWInfo){ pWInfo->bStarDone = 1; /* Only do this computation once */ - /* Check to see if we are dealing with a star schema and if so, reduce - ** the cost of fact tables relative to dimension tables, as a heuristic - ** to help keep the fact tables in outer loops. + /* Check to see if we are dealing with a star schema and if so, adjust + ** SCAN cost of dimensino tables so that they are as large as the SCAN + ** cost of the fact table. This is a heuristic that helps keep the + ** fact tables in outer loops. */ assert( !pWInfo->bStarUsed ); aFromTabs = pWInfo->pTabList->a; @@ -5543,29 +5563,34 @@ static int computeMxChoice(WhereInfo *pWInfo){ } } if( nDep<=3 ) continue; - - /* Compute the maximum cost of any WhereLoop for the iFromIdx-th term */ - mxRun = -32767; - for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ - if( pWLoop->iTabiTab>iFromIdx ) break; - if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun; - } - + /* If we reach this point, it means that pFactTab is a fact table */ + +#ifdef WHERETRACE_ENABLED /* Make sure rStarDelta values are initialized */ if( !pWInfo->bStarUsed ){ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ pWLoop->rStarDelta = 0; } - pWInfo->bStarUsed = 1; } +#endif + pWInfo->bStarUsed = 1; + + /* Compute one more than the maximum cost of any WhereLoop for the + ** fact table */ + mxRun = LOGEST_MIN; + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( pWLoop->iTabiTab>iFromIdx ) break; + if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun; + } + if( ALWAYS(mxRunpNextLoop){ if( (pWLoop->maskSelf & mSeen)==0 ) continue; if( pWLoop->nLTerm ) continue; - if( pWLoop->rRun<=mxRun ){ + if( pWLoop->rRuniTab; @@ -5573,12 +5598,12 @@ static int computeMxChoice(WhereInfo *pWInfo){ "Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n", pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab, pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName, - iFromIdx, mxRun+1 + iFromIdx, mxRun ); } + pWLoop->rStarDelta = mxRun - pWLoop->rRun; #endif /* WHERETRACE_ENABLED */ - pWLoop->rStarDelta = mxRun+1 - pWLoop->rRun; - pWLoop->rRun = mxRun+1; + pWLoop->rRun = mxRun; } } } diff --git a/src/whereInt.h b/src/whereInt.h index 35bfe581dd..8ba8a7072d 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -162,8 +162,10 @@ struct WhereLoop { /**** whereLoopXfer() copies fields above ***********************/ # define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot) u16 nLSlot; /* Number of slots allocated for aLTerm[] */ +#ifdef WHERETRACE_ENABLED LogEst rStarDelta; /* Cost delta due to star-schema heuristic. Not ** initialized unless pWInfo->bStarUsed */ +#endif WhereTerm **aLTerm; /* WhereTerms used */ WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */ WhereTerm *aLTermSpace[3]; /* Initial aLTerm[] space */ @@ -485,9 +487,9 @@ struct WhereInfo { unsigned bDeferredSeek :1; /* Uses OP_DeferredSeek */ unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */ unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */ - unsigned sorted :1; /* True if really sorted (not just grouped) */ + unsigned sorted :1; /* True if really sorted (not just grouped) */ unsigned bStarDone :1; /* True if check for star-query is complete */ - unsigned bStarUsed :1; /* True if cost adjustments for star-query */ + unsigned bStarUsed :1; /* True if star-query heuristic is used */ LogEst nRowOut; /* Estimated number of output rows */ #ifdef WHERETRACE_ENABLED LogEst rTotalCost; /* Total cost of the solution */ diff --git a/test/starschema1.test b/test/starschema1.test index af8168b510..bb7d8aa79b 100644 --- a/test/starschema1.test +++ b/test/starschema1.test @@ -10,7 +10,7 @@ #*********************************************************************** # # Test cases for the ability of the query planner to cope with -# star-schema queries on databases with goofy indexes. +# star-schema queries. # set testdir [file dirname $argv0]