From: drh <> Date: Sat, 25 Jan 2025 23:04:05 +0000 (+0000) Subject: Revise the strategy used by the star-query heuristic: Instead of decreasing X-Git-Tag: version-3.49.0~54 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fb9e8e48fd70b463fb7ba6d99e00f2be54df749e;p=thirdparty%2Fsqlite.git Revise the strategy used by the star-query heuristic: Instead of decreasing the cost of all fact-table WhereLoops, increase the run-cost of WhereLoops that are SCANs of dimension tables. FossilOrigin-Name: 1bc09c9e8bd77ac41ecbe510c7e003757fc11d0f586da6cdf3584315aa9d6407 --- diff --git a/manifest b/manifest index b407f7242e..c6113e84a4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improvments\sto\sdebug\soutput\son\sthe\sstar-query\sheuristic. -D 2025-01-25T14:30:36.057 +C Revise\sthe\sstrategy\sused\sby\sthe\sstar-query\sheuristic:\s\sInstead\sof\sdecreasing\nthe\scost\sof\sall\sfact-table\sWhereLoops,\sincrease\sthe\srun-cost\sof\sWhereLoops\sthat\nare\sSCANs\sof\sdimension\stables. +D 2025-01-25T23:04:05.646 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d @@ -863,8 +863,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997 F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 -F src/where.c 8fb5ce22e5fbc82f8e53a059e76ff492113f3de8ff26c3639d52c75e94088596 -F src/whereInt.h 3b2ef4617758174d00c6940850e4988c1b195d39bcc20f2965319a1f7bffc714 +F src/where.c 3917a56c85951be386057ee4e9af8a6746427f6f26e8ffce293a61e334b76498 +F src/whereInt.h 510b12f85a763602a1b1e70a9cc643a73082a63cda2ca204dd912daaa540115d F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385 @@ -2208,8 +2208,8 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8b9e621dbf599d7e75e07d75d4c400247d693ea76e00eba0919f4e33e3e10bdd -R e6dd9f24405ca06acecad76b774b4e57 +P b3ebeb0682a2c837987acf4ed92f06cf91aea235830c5a0f9dd1ce64afe16e84 +R a3d46d3d0d804ef5b7f52b211e4f3855 U drh -Z 5c57dd71c02ea0fa33cecfe1d8ec4eed +Z 01bfb692a509c7bcf6c556a6b5d760a5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index a25e3e3f14..29b013c9fc 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b3ebeb0682a2c837987acf4ed92f06cf91aea235830c5a0f9dd1ce64afe16e84 +1bc09c9e8bd77ac41ecbe510c7e003757fc11d0f586da6cdf3584315aa9d6407 diff --git a/src/where.c b/src/where.c index d25a1723d3..0db7cbc1e2 100644 --- a/src/where.c +++ b/src/where.c @@ -2475,9 +2475,9 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ }else{ sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm); } - if( pWInfo && pWInfo->nOutStarDelta>0 && p->rStarDelta!=0 ){ + if( pWInfo && pWInfo->bStarUsed && p->rStarDelta!=0 ){ sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n", - p->rSetup, p->rRun, p->nOut, -p->rStarDelta); + p->rSetup, p->rRun, p->nOut, p->rStarDelta); }else{ sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); } @@ -5458,13 +5458,13 @@ static LogEst whereSortingCost( ** ** SIDE EFFECT: (and really the whole point of this subroutine) ** -** If pWInfo describes a star-query, then the cost on WhereLoops for the -** fact table is reduced. This heuristic helps keep fact tables in +** If pWInfo describes a star-query, then the cost for SCANs of dimension +** WhereLoops is increased to be slightly larger than the cost of a SCAN +** in the fact table. This heuristic helps keep fact tables in ** outer loops. Without this heuristic, paths with fact tables in outer ** loops tend to get pruned by the mxChoice limit on the number of paths, -** resulting in poor query plans. The total amount of heuristic cost -** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment -** for each WhereLoop is stored in its rStarDelta field. +** resulting in poor query plans. The cost adjustment for each WhereLoop +** is stored in its rStarDelta field. ** ** This heuristic can be completely disabled, so that no query is ** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to @@ -5488,7 +5488,7 @@ static int computeMxChoice(WhereInfo *pWInfo){ } #endif /* SQLITE_DEBUG */ - if( nLoop>=5 + if( nLoop>=5 && !pWInfo->bStarDone && OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery) ){ @@ -5504,12 +5504,12 @@ static int computeMxChoice(WhereInfo *pWInfo){ ** the cost of fact tables relative to dimension tables, as a heuristic ** to help keep the fact tables in outer loops. */ - assert( pWInfo->nOutStarDelta==0 ); + assert( !pWInfo->bStarUsed ); aFromTabs = pWInfo->pTabList->a; pStart = pWInfo->pLoops; for(iFromIdx=0, m=1; iFromIdxzAlias ? pFactTab->zAlias : pFactTab->pSTab->zName, - iFromIdx, rDelta, nDep - ); - for(ii=0, x=1; iizAlias ? pDim->zAlias : pDim->pSTab->zName, ii - ); - } - } - sqlite3DebugPrintf("\n"); + + /* Compute the maximum cost of any WhereLoop for the iFromIdx-th term */ + mxRun = -32767; + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( pWLoop->iTabiTab>iFromIdx ) break; + if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun; } -#endif - if( pWInfo->nOutStarDelta==0 ){ + + /* Make sure rStarDelta values are initialized */ + if( !pWInfo->bStarUsed ){ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ pWLoop->rStarDelta = 0; } + pWInfo->bStarUsed = 1; } - pWInfo->nOutStarDelta += rDelta; - for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - if( pWLoop->maskSelf==m ){ - pWLoop->rRun -= rDelta; - pWLoop->nOut -= rDelta; - pWLoop->rStarDelta = rDelta; + + /* Increase the cost of table scans for dimension tables to be slightly + ** more than the maximum cost of fact table */ + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( (pWLoop->maskSelf & mSeen)==0 ) continue; + if( pWLoop->nLTerm ) continue; + if( pWLoop->rRun<=mxRun ){ +#ifdef WHERETRACE_ENABLED /* 0x80000 */ + if( sqlite3WhereTrace & 0x80000 ){ + SrcItem *pDim = aFromTabs + pWLoop->iTab; + sqlite3DebugPrintf( + "Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n", + pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab, + pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName, + iFromIdx, mxRun+1 + ); + } +#endif /* WHERETRACE_ENABLED */ + pWLoop->rStarDelta = mxRun+1 - pWLoop->rRun; + pWLoop->rRun = mxRun+1; } } } #ifdef WHERETRACE_ENABLED /* 0x80000 */ - if( (sqlite3WhereTrace & 0x80000)!=0 && pWInfo->nOutStarDelta ){ + if( (sqlite3WhereTrace & 0x80000)!=0 && pWInfo->bStarUsed ){ sqlite3DebugPrintf("WhereLoops changed by star-query heuristic:\n"); for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ if( pWLoop->rStarDelta ){ @@ -5589,7 +5593,7 @@ static int computeMxChoice(WhereInfo *pWInfo){ } #endif } - return pWInfo->nOutStarDelta>0 ? 18 : 12; + return pWInfo->bStarUsed ? 18 : 12; } /* @@ -6035,9 +6039,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ } } - pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta; + pWInfo->nRowOut = pFrom->nRow; #ifdef WHERETRACE_ENABLED - pWInfo->rTotalCost = pFrom->rCost + pWInfo->nOutStarDelta; + pWInfo->rTotalCost = pFrom->rCost; #endif /* Free temporary memory and return success */ @@ -6436,7 +6440,6 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( } } nSearch += pLoop->nOut; - if( pWInfo->nOutStarDelta ) nSearch += pLoop->rStarDelta; } } diff --git a/src/whereInt.h b/src/whereInt.h index 5bea70ba9f..35bfe581dd 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -163,7 +163,7 @@ struct WhereLoop { # define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot) u16 nLSlot; /* Number of slots allocated for aLTerm[] */ LogEst rStarDelta; /* Cost delta due to star-schema heuristic. Not - ** initialized unless pWInfo->nOutStarDelta>0 */ + ** initialized unless pWInfo->bStarUsed */ WhereTerm **aLTerm; /* WhereTerms used */ WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */ WhereTerm *aLTermSpace[3]; /* Initial aLTerm[] space */ @@ -487,7 +487,7 @@ struct WhereInfo { unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */ unsigned sorted :1; /* True if really sorted (not just grouped) */ unsigned bStarDone :1; /* True if check for star-query is complete */ - LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */ + unsigned bStarUsed :1; /* True if cost adjustments for star-query */ LogEst nRowOut; /* Estimated number of output rows */ #ifdef WHERETRACE_ENABLED LogEst rTotalCost; /* Total cost of the solution */