From: drh <> Date: Fri, 29 Dec 2023 19:03:01 +0000 (+0000) Subject: Attempt to improve the ANALYZE command so that it does a better job of X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6fa09f6120477d9ff8537f769115e4ca0938068c;p=thirdparty%2Fsqlite.git Attempt to improve the ANALYZE command so that it does a better job of detecting lopsided indexes and makes appropriate changes to the sqlite_stat1 table. FossilOrigin-Name: 4b70b94616ef37bac969051eee3ea6913a28f30520cdd4fc3a19e848f2cf12b7 --- diff --git a/manifest b/manifest index adb892f498..c875ee5dda 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Minor\sdoc\stouchup\sin\sthe\sJS\sbits. -D 2023-12-29T04:29:07.096 +C Attempt\sto\simprove\sthe\sANALYZE\scommand\sso\sthat\sit\sdoes\sa\sbetter\sjob\sof\ndetecting\slopsided\sindexes\sand\smakes\sappropriate\schanges\sto\sthe\ssqlite_stat1\ntable. +D 2023-12-29T19:03:01.857 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -670,7 +670,7 @@ F sqlite3.1 acdff36db796e2d00225b911d3047d580cd136547298435426ce9d40347973cc F sqlite3.pc.in 48fed132e7cb71ab676105d2a4dc77127d8c1f3a F sqlite_cfg.h.in baf2e409c63d4e7a765e17769b6ff17c5a82bbd9cbf1e284fd2e4cefaff3fcf2 F src/alter.c 30c2333b8bb3af71e4eb9adeadee8aa20edb15917ed44b8422e5cd15f3dfcddc -F src/analyze.c d4cc28738c29e009640ec20ebb6936ba6fcefff0d11aa93398d9bb9a5ead6c1f +F src/analyze.c dd9f25aa96ab1ca1a52743a433fbc53888f41059d909a4360382835414733f82 F src/attach.c cc9d00d30da916ff656038211410ccf04ed784b7564639b9b61d1839ed69fd39 F src/auth.c 19b7ccacae3dfba23fc6f1d0af68134fa216e9040e53b0681b4715445ea030b4 F src/backup.c 5c97e8023aab1ce14a42387eb3ae00ba5a0644569e3476f38661fa6f824c3523 @@ -2156,8 +2156,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 630604a4e604bfb36c31602917bfa8d42c10c82966d0819932bf8f827b9158b8 -R 0182e48a4eb943a57927e95defc2f847 -U stephan -Z 19ca2e7863b05c35a59e90f21d5378b3 +P 8d2120c35425081e2158d6a8a6b083c4adf8d694046b2d98f5fd235520920432 +R b58bf97977b7e8d817610de3abc3e459 +T *branch * enhanced-stat1 +T *sym-enhanced-stat1 * +T -sym-trunk * +U drh +Z 87510e89218beaa47ade1953d2d2ed80 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index c9aa0e6dea..7b89cb32bf 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8d2120c35425081e2158d6a8a6b083c4adf8d694046b2d98f5fd235520920432 \ No newline at end of file +4b70b94616ef37bac969051eee3ea6913a28f30520cdd4fc3a19e848f2cf12b7 \ No newline at end of file diff --git a/src/analyze.c b/src/analyze.c index a7a8b6d665..2c1fbdaac6 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -266,6 +266,7 @@ typedef struct StatSample StatSample; struct StatSample { tRowcnt *anEq; /* sqlite_stat4.nEq */ tRowcnt *anDLt; /* sqlite_stat4.nDLt */ + tRowcnt *amxEq; /* Maximum length run of equal values */ #ifdef SQLITE_ENABLE_STAT4 tRowcnt *anLt; /* sqlite_stat4.nLt */ union { @@ -425,6 +426,7 @@ static void statInit( /* Allocate the space required for the StatAccum object */ n = sizeof(*p) + sizeof(tRowcnt)*nColUp /* StatAccum.anEq */ + + sizeof(tRowcnt)*nColUp /* StatAccum.amxEq */ + sizeof(tRowcnt)*nColUp; /* StatAccum.anDLt */ #ifdef SQLITE_ENABLE_STAT4 if( mxSample ){ @@ -447,7 +449,8 @@ static void statInit( p->nKeyCol = nKeyCol; p->nSkipAhead = 0; p->current.anDLt = (tRowcnt*)&p[1]; - p->current.anEq = &p->current.anDLt[nColUp]; + p->current.amxEq = &p->current.anDLt[nColUp]; + p->current.anEq = &p->current.amxEq[nColUp]; #ifdef SQLITE_ENABLE_STAT4 p->mxSample = p->nLimit==0 ? mxSample : 0; @@ -716,7 +719,10 @@ static void statPush( if( p->nRow==0 ){ /* This is the first call to this function. Do initialization. */ - for(i=0; inCol; i++) p->current.anEq[i] = 1; + for(i=0; inCol; i++){ + p->current.anEq[i] = 1; + p->current.amxEq[i] = 1; + } }else{ /* Second and subsequent calls get processed here */ #ifdef SQLITE_ENABLE_STAT4 @@ -733,6 +739,9 @@ static void statPush( #ifdef SQLITE_ENABLE_STAT4 if( p->mxSample ) p->current.anLt[i] += p->current.anEq[i]; #endif + if( p->current.amxEq[i]current.anEq[i] ){ + p->current.amxEq[i] = p->current.anEq[i]; + } p->current.anEq[i] = 1; } } @@ -841,20 +850,18 @@ static void statGet( ** a key with the corresponding number of fields. In other words, ** if the index is on columns (a,b) and the sqlite_stat1 value is ** "100 10 2", then SQLite estimates that: - ** - ** * the index contains 100 rows, - ** * "WHERE a=?" matches 10 rows, and - ** * "WHERE a=? AND b=?" matches 2 rows. + ** | | | + ** | | `-- "WHERE a=? AND b=?" matches approximately 2 rows + ** | `---- "WHERE a=?" matches approximately 10 rows + ** `-------- There are approximately 100 rows in the index total ** ** If D is the count of distinct values and K is the total number of ** rows, then each estimate is usually computed as: ** ** I = (K+D-1)/D ** - ** In other words, I is K/D rounded up to the next whole integer. - ** However, if I is between 1.0 and 1.1 (in other words if I is - ** close to 1.0 but just a little larger) then do not round up but - ** instead keep the I value at 1.0. + ** Adjustments to the I value are made in some cases. See comments + ** in-line below. */ sqlite3_str sStat; /* Text of the constructed "stat" line */ int i; /* Loop counter */ @@ -863,9 +870,23 @@ static void statGet( sqlite3_str_appendf(&sStat, "%llu", p->nSkipAhead ? (u64)p->nEst : (u64)p->nRow); for(i=0; inKeyCol; i++){ - u64 nDistinct = p->current.anDLt[i] + 1; - u64 iVal = (p->nRow + nDistinct - 1) / nDistinct; - if( iVal==2 && p->nRow*10 <= nDistinct*11 ) iVal = 1; + u64 nDistinct, iVal, mx; + nDistinct = p->current.anDLt[i] + 1; + iVal = (p->nRow + nDistinct - 1) / nDistinct; + mx = p->current.amxEq[i]; + if( nDistinct==1 && p->nLimit>0 ){ + /* If we never saw more than a single value in a PRAGMA analysis_limit + ** search, then set the estimated number of matching rows to the + ** estimated number of rows in the index. */ + iVal = p->nEst; + }else if( iValnRow*10 <= nDistinct*11 ){ + /* If the value is less than or equal to 1.1, round it down to 1.0 */ + iVal = 1; + } sqlite3_str_appendf(&sStat, " %llu", iVal); assert( p->current.anEq[i] ); }