From: drh Date: Thu, 20 Jan 2011 16:52:09 +0000 (+0000) Subject: Use histogram data to improve the row-count estimates on equality constraints. X-Git-Tag: version-3.7.6~166^2~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=82759754ce452f4ef821df656d8391017754b9b3;p=thirdparty%2Fsqlite.git Use histogram data to improve the row-count estimates on equality constraints. FossilOrigin-Name: 6bfc5c69eb22938972bbf4e60179952dc215f770 --- diff --git a/manifest b/manifest index 00b3208ed7..cfa47d73c3 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C The\sfirst\sof\sa\splanned\sseries\sof\senhancements\sto\sthe\squery\splanner\sthat\nenable\sit\sto\smake\sbetter\suse\sof\ssqlite_stat2\shistograms\swhen\sthe\stable\nhas\smany\srepeated\svalues. -D 2011-01-20T02:56:37.736 +C Use\shistogram\sdata\sto\simprove\sthe\srow-count\sestimates\son\sequality\sconstraints. +D 2011-01-20T16:52:09.439 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 5cd6b88d57bfc816ba7f753a3cdf03686d954b8a +F src/where.c 068ecc5195578aed083a4314e572de9979a489e5 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test 18987796646efdf009ca0b8c8f060874a8fe57fb +F test/analyze5.test b6bd2e4fbbe2ffcf2a22250b28fb1f2a2ca507de F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,18 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 9660a0a22547656cc3765b673d0cee9e1dd829ef -R 9ff1bb21abd03a28e074b829beb25c52 -T *bgcolor * #a8c7d3 -T *branch * stat2-enhancement -T *sym-stat2-enhancement * -T -sym-trunk * +P 2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 +R b298fdc0ea4be4264436dd1639cb2644 U drh -Z f191562671825ddb731a0f83c41674dd +Z 10e84b987d0709faa336295530a6ebae -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNN6RpoxKgR168RlERAtbyAJ4tlGP5CKHBEdaaRtF9LD6pnMJo9QCaAnxY -oc508+oZBxzr/UoIZL3o+G4= -=2eyE +iD8DBQFNOGg8oxKgR168RlERAhVwAJ9ZyON4XSbnksAxlSedyp9Kpxx2QACfe+y0 +6eKoyLZRueuMI2F5tREdfw4= +=Hhmt -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index a32e6ed42f..cbf97eefea 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 \ No newline at end of file +6bfc5c69eb22938972bbf4e60179952dc215f770 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 734f24019f..048ab940a3 100644 --- a/src/where.c +++ b/src/where.c @@ -2461,6 +2461,51 @@ range_est_fallback: return rc; } +#ifdef SQLITE_ENABLE_STAT2 +/* +** Estimate the number of rows that will be returned based on +** an equality constraint x=VALUE and where that VALUE occurs in +** the histogram data. This only works when x is the left-most +** column of an index and sqlite_stat2 histogram data is available +** for that index. +** +** Write the estimated row count into *pnRow. If unable to make +** an estimate, leave *pnRow unchanged. +*/ +void whereEqScanEst( + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index whose left-most column is pTerm */ + WhereTerm *pTerm, /* The x=VALUE constraint */ + double *pnRow /* Write the revised row estimate here */ +){ + sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ + int iLower, iUpper; /* Range of histogram regions containing pRhs */ + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + double nRowEst; /* New estimate of the number of rows */ + + assert( p->aSample!=0 ); + assert( pTerm->eOperator==WO_EQ ); + aff = p->pTable->aCol[p->aiColumn[0]].affinity; + rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs); + if( rc ) goto whereEqScanEst_cancel; + rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); + if( rc ) goto whereEqScanEst_cancel; + rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); + if( rc ) goto whereEqScanEst_cancel; + if( iLower>=iUpper ){ + nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); + if( nRowEst<*pnRow ) *pnRow = nRowEst; + }else{ + nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES; + *pnRow = nRowEst; + } + +whereEqScanEst_cancel: + sqlite3ValueFree(pRhs); +} +#endif /* defined(SQLITE_ENABLE_STAT2) */ + /* ** Find the query plan for accessing a particular table. Write the @@ -2624,10 +2669,13 @@ static void bestBtreeIndex( int bInEst = 0; int nInMul = 1; int estBound = 100; - int nBound = 0; /* Number of range constraints seen */ + int nBound = 0; /* Number of range constraints seen */ int bSort = 0; int bLookup = 0; - WhereTerm *pTerm; /* A single term of the WHERE clause */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ +#ifdef SQLITE_ENABLE_STAT2 + WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */ +#endif /* Determine the values of nEq and nInMul */ for(nEq=0; nEqnColumn; nEq++){ @@ -2647,6 +2695,11 @@ static void bestBtreeIndex( }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } +#ifdef SQLITE_ENABLE_STAT2 + else if( nEq==0 && pProbe->aSample ){ + pFirstEqTerm = pTerm; + } +#endif used |= pTerm->prereqRight; } @@ -2723,6 +2776,17 @@ static void bestBtreeIndex( nInMul = (int)(nRow / aiRowEst[nEq]); } +#ifdef SQLITE_ENABLE_STAT2 + /* If the constraint is of the form x=VALUE and histogram + ** data is available for column x, then it might be possible + ** to get a better estimate on the number of rows based on + ** VALUE and how common that value is according to the histogram. + */ + if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){ + whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow); + } +#endif /* SQLITE_ENABLE_STAT2 */ + /* Assume constant cost to access a row and logarithmic cost to ** do a binary search. Hence, the initial cost is the number of output ** rows plus log2(table-size) times the number of binary searches. diff --git a/test/analyze5.test b/test/analyze5.test index 233e7ff9e7..da0dbe3ead 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -214,6 +214,14 @@ foreach {testid where rows} { 20 {y>=0 AND y<'charlie'} 700 21 {y>=0 AND y<='charlie'} 900 22 {y>=0 AND y<'delta'} 900 + 23 {y>'alpha' AND y='bravo' AND y'bravo' AND y='charlie' AND y'charlie' AND y='delta' AND y'delta' AND y='echo' AND y