From: dan Date: Thu, 8 Aug 2013 11:48:57 +0000 (+0000) Subject: Fix a bug in using stat4 data to estimate the number of rows selected by a range... X-Git-Tag: version-3.8.1~132^2~27 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6cb8d76ccb7ae39b3a62ca6f407bdcc7124536aa;p=thirdparty%2Fsqlite.git Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint. FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d --- diff --git a/manifest b/manifest index d23d9a5f36..44a7e1f774 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Replace\svariable\sIndex.avgEq\s(average\snumber\sof\srows\sin\skeys\sfor\swhich\sthere\sis\sno\ssample\sin\ssqlite_stat4)\swith\svector\sIndex.aAvgEq. -D 2013-08-07T19:46:15.623 +C Fix\sa\sbug\sin\susing\sstat4\sdata\sto\sestimate\sthe\snumber\sof\srows\sselected\sby\sa\srange\sconstraint. +D 2013-08-08T11:48:57.819 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -290,7 +290,7 @@ F src/vtab.c 2e8b489db47e20ae36cd247932dc671c9ded0624 F src/wal.c 7dc3966ef98b74422267e7e6e46e07ff6c6eb1b4 F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 4fa43583d0a84b48f93b1e88f11adf2065be4e73 -F src/where.c c973297fc29c5dae03a07d6deb479af432d24005 +F src/where.c 5ea698bd91c8c264bd00fb9c6aafc30043a3873b F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/aggnested.test 45c0201e28045ad38a530b5a144b73cd4aa2cfd6 @@ -307,7 +307,7 @@ F test/analyze4.test eff2df19b8dd84529966420f29ea52edc6b56213 F test/analyze5.test e3eece09761c935ec0b85dc4ed70dbf6cac1ed77 F test/analyze6.test 3c01e084309706a1033f850330ea24f6f7846297 F test/analyze7.test c0af22c5e0140e2e4ac556a21c2b6fff58229c98 -F test/analyze8.test 092425439c12f62f9d5c3127e2b4f6e7b3e170cc +F test/analyze8.test 8d1f76ff1e47c4093bb7be3971ba08fa56dc470d F test/analyze9.test 1ed4e7d95d8e1e1923766281b20870d61730450c F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b @@ -1106,7 +1106,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac -P 08f74c45ecf711a2373af578d44470add9082377 -R 4d9cb44bdbdf4b1edd0f4ec9caccbe56 +P 7b70b419c43b2c3b2daf11d833a1d60245bfaef5 +R 57059d9c69ace9dccd9c4ceedb6afa1e U dan -Z a128130e52d0e5080bab7e97885213b4 +Z 9ec2673c689c33df3101a910ffd90aa8 diff --git a/manifest.uuid b/manifest.uuid index 843b9772c2..5cea5a9336 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7b70b419c43b2c3b2daf11d833a1d60245bfaef5 \ No newline at end of file +f783938ea999731ea073cd2c78e278095f7bea6d \ No newline at end of file diff --git a/src/where.c b/src/where.c index 0a300ca2f4..d230758ed2 100644 --- a/src/where.c +++ b/src/where.c @@ -394,7 +394,6 @@ struct WhereLoopBuilder { #ifdef SQLITE_ENABLE_STAT4 UnpackedRecord *pRec; /* Probe for stat4 (if required) */ int nRecValid; /* Number of valid fields currently in pRec */ - tRowcnt nMaxRowcnt; /* If !=0, the maximum estimated row count */ #endif }; @@ -2478,38 +2477,40 @@ static int whereKeyStats( ** If either of the upper or lower bound is not present, then NULL is passed in ** place of the corresponding WhereTerm. ** -** The nEq parameter is passed the index of the index column subject to the -** range constraint. Or, equivalently, the number of equality constraints -** optimized by the proposed index scan. For example, assuming index p is -** on t1(a, b), and the SQL query is: +** The value in (pBuilder->pNew->u.btree.nEq) is the index of the index +** column subject to the range constraint. Or, equivalently, the number of +** equality constraints optimized by the proposed index scan. For example, +** assuming index p is on t1(a, b), and the SQL query is: ** ** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ... ** -** then nEq should be passed the value 1 (as the range restricted column, -** b, is the second left-most column of the index). Or, if the query is: +** then nEq is set to 1 (as the range restricted column, b, is the second +** left-most column of the index). Or, if the query is: ** ** ... FROM t1 WHERE a > ? AND a < ? ... ** -** then nEq should be passed 0. +** then nEq is set to 0. ** -** The returned value is an integer divisor to reduce the estimated -** search space. A return value of 1 means that range constraints are -** no help at all. A return value of 2 means range constraints are -** expected to reduce the search space by half. And so forth... -** -** In the absence of sqlite_stat3 ANALYZE data, each range inequality -** reduces the search space by a factor of 4. Hence a single constraint (x>?) -** results in a return of 4 and a range constraint (x>? AND x? AND x123" Might be NULL */ WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereCost *pRangeDiv /* OUT: Reduce search space by this divisor */ + WhereCost *pnOut /* IN/OUT: Number of rows visited */ ){ int rc = SQLITE_OK; + int nOut = (int)*pnOut; #ifdef SQLITE_ENABLE_STAT4 Index *p = pBuilder->pNew->u.btree.pIndex; @@ -2550,17 +2551,18 @@ static int whereRangeScanEst( } pBuilder->pRec = pRec; if( rc==SQLITE_OK ){ - WhereCost iBase = whereCost(p->aiRowEst[0]); + WhereCost nNew; if( iUpper>iLower ){ - iBase -= whereCost(iUpper - iLower); - } - if( pBuilder->nMaxRowcnt && iBasenMaxRowcnt ){ - *pRangeDiv = pBuilder->nMaxRowcnt; + nNew = whereCost(iUpper - iLower); }else{ - *pRangeDiv = iBase; + nNew = whereCost(2); /* Small number */ + } + if( nNewwtFlags & TERM_VNULL)==0 ){ - *pRangeDiv += 20; assert( 20==whereCost(4) ); + nOut -= 20; assert( 20==whereCost(4) ); } if( pUpper ){ - *pRangeDiv += 20; assert( 20==whereCost(4) ); + nOut -= 20; assert( 20==whereCost(4) ); } + if( nOut<10 ) nOut = 10; + *pnOut = (WhereCost)nOut; return rc; } @@ -2641,9 +2644,6 @@ static int whereEqualScanEst( if( rc==SQLITE_OK ){ WHERETRACE(0x100,("equality scan regions: %d\n", (int)a[1])); *pnRow = a[1]; - if( pBuilder->nMaxRowcnt && *pnRow>pBuilder->nMaxRowcnt ){ - *pnRow = pBuilder->nMaxRowcnt; - } } return rc; @@ -2690,11 +2690,7 @@ static int whereInScanEst( if( rc==SQLITE_OK ){ if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; - if( pBuilder->nMaxRowcnt && nRowEst>pBuilder->nMaxRowcnt ){ - *pnRow = pBuilder->nMaxRowcnt; - }else{ - *pnRow = nRowEst; - } + *pnRow = nRowEst; WHERETRACE(0x100,("IN row estimate: est=%g\n", nRowEst)); } assert( pBuilder->nRecValid==nRecValid ); @@ -4246,7 +4242,6 @@ static int whereLoopAddBtreeIndex( int nIn = 0; #ifdef SQLITE_ENABLE_STAT4 int nRecValid = pBuilder->nRecValid; - int nMaxRowcnt = pBuilder->nMaxRowcnt; if( (pTerm->wtFlags & TERM_VNULL)!=0 && pSrc->pTab->aCol[iCol].notNull ){ continue; /* skip IS NOT NULL constraints on a NOT NULL column */ } @@ -4309,9 +4304,8 @@ static int whereLoopAddBtreeIndex( } if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ /* Adjust nOut and rRun for STAT3 range values */ - WhereCost rDiv; - whereRangeScanEst(pParse, pBuilder, pBtm, pTop, &rDiv); - pNew->nOut = saved_nOut>rDiv+10 ? saved_nOut - rDiv : 10; + assert( pNew->nOut==saved_nOut ); + whereRangeScanEst(pParse, pBuilder, pBtm, pTop, &pNew->nOut); } #ifdef SQLITE_ENABLE_STAT4 if( nInMul==0 && pProbe->nSample && OptimizationEnabled(db, SQLITE_Stat3) ){ @@ -4321,14 +4315,15 @@ static int whereLoopAddBtreeIndex( testcase( pTerm->eOperator & WO_EQ ); testcase( pTerm->eOperator & WO_ISNULL ); rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); - assert( nOut==0||pBuilder->nMaxRowcnt==0||nOut<=pBuilder->nMaxRowcnt); - if( nOut ) pBuilder->nMaxRowcnt = nOut; }else if( (pTerm->eOperator & WO_IN) && !ExprHasProperty(pExpr, EP_xIsSelect) ){ rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); } assert( nOut==0 || rc==SQLITE_OK ); - if( nOut ) pNew->nOut = whereCost(nOut); + if( nOut ){ + nOut = whereCost(nOut); + pNew->nOut = MIN(nOut, saved_nOut); + } } #endif if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ @@ -4347,7 +4342,7 @@ static int whereLoopAddBtreeIndex( } #ifdef SQLITE_ENABLE_STAT4 pBuilder->nRecValid = nRecValid; - pBuilder->nMaxRowcnt = nMaxRowcnt; + pNew->nOut = saved_nOut; #endif } pNew->prereq = saved_prereq; diff --git a/test/analyze8.test b/test/analyze8.test index f059424ce2..3a00e69e53 100644 --- a/test/analyze8.test +++ b/test/analyze8.test @@ -84,14 +84,27 @@ do_test 2.1 { # There are many more values of c between 0 and 100000 than there are # between 800000 and 900000. So t1c is more selective for the latter # range. +# +# Test 3.2 is a little unstable. It depends on the planner estimating +# that (b BETWEEN 40 AND 44) will match more rows than (c BETWEEN +# 800000 AND 900000). Which is a pretty close call (50 vs. 32), so +# the planner could get it wrong with an unlucky set of samples. This +# case happens to work, but others ("b BETWEEN 50 AND 54" for example) +# will fail. # +do_execsql_test 3.0 { + SELECT count(*) FROM t1 WHERE b BETWEEN 40 AND 44; + SELECT count(*) FROM t1 WHERE c BETWEEN 0 AND 100000; + SELECT count(*) FROM t1 WHERE c BETWEEN 800000 AND 900000; +} {50 376 32} do_test 3.1 { - eqp {SELECT * FROM t1 WHERE b BETWEEN 50 AND 54 AND c BETWEEN 0 AND 100000} + eqp {SELECT * FROM t1 WHERE b BETWEEN 40 AND 44 AND c BETWEEN 0 AND 100000} } {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>? AND b? AND c