From: dan Date: Thu, 24 Apr 2014 20:04:49 +0000 (+0000) Subject: Changes to the way the planner calculates the costs of various table and index scans... X-Git-Tag: version-3.8.5~65^2~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=aa9933c11529cabf659615d9c94277846dacbd15;p=thirdparty%2Fsqlite.git Changes to the way the planner calculates the costs of various table and index scans. Some test cases still failing. FossilOrigin-Name: c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3 --- diff --git a/manifest b/manifest index 7f4e9189fd..2b621bd738 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Comment\stweaks\son\sthe\stest\scase\sfor\sthe\s[b75a9ca6b0]\sbug\sfix. -D 2014-04-21T13:36:54.639 +C Changes\sto\sthe\sway\sthe\splanner\scalculates\sthe\scosts\sof\svarious\stable\sand\sindex\sscans.\sSome\stest\scases\sstill\sfailing. +D 2014-04-24T20:04:49.939 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -291,7 +291,7 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd F src/wal.c 76e7fc6de229bea8b30bb2539110f03a494dc3a8 F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45 -F src/where.c 3b127bdc24b7aa84ffa69729170be11555cd7733 +F src/where.c c12bc20cd649bcae39de3e452bfc1a3f164454ee F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -311,7 +311,7 @@ F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4 F test/analyze6.test d31defa011a561b938b4608d3538c1b4e0b5e92c F test/analyze7.test bb1409afc9e8629e414387ef048b8e0e3e0bdc4f F test/analyze8.test 093d15c1c888eed5034304a98c992f7360130b88 -F test/analyze9.test e072a5172d55afcba98d6ca6a219ce8878c2f5c9 +F test/analyze9.test e219daa58fd8677c6a43d771798cf37d68f51d3e F test/analyzeA.test 1a5c40079894847976d983ca39c707aaa44b6944 F test/analyzeB.test 8bf35ee0a548aea831bf56762cb8e7fdb1db083d F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b @@ -329,7 +329,7 @@ F test/auth.test 5bdf154eb28c0e4bbc0473f335858c0d96171768 F test/auth2.test c3b415b76c033bedb81292118fb7c01f5f10cbcd F test/auth3.test a4755e6a2a2fea547ffe63c874eb569e60a28eb5 F test/autoinc.test c58912526998a39e11f66b533e23cfabea7f25b7 -F test/autoindex1.test d4dfe14001dfcb74cfbd7107f45a79fc1ab6183e +F test/autoindex1.test 762ff3f8e25d852aae55c6462ca166a80c0cde61 F test/autovacuum.test 941892505d2c0f410a0cb5970dfa1c7c4e5f6e74 F test/autovacuum_ioerr2.test 8a367b224183ad801e0e24dcb7d1501f45f244b4 F test/avtrans.test 0252654f4295ddda3b2cce0e894812259e655a85 @@ -1079,7 +1079,7 @@ F test/walslow.test e7be6d9888f83aa5d3d3c7c08aa9b5c28b93609a F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e F test/where.test 28b64e93428961b07b0d486778d63fd672948f6b F test/where2.test 455a2eb2666e66c1e84e2cb5815173a85e6237db -F test/where3.test d28c51f257e60be30f74308fa385ceeddfb54a6e +F test/where3.test 1ad55ba900bd7747f98b6082e65bd3e442c5004e F test/where4.test d8420ceeb8323a41ceff1f1841fc528e824e1ecf F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2 F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b @@ -1093,7 +1093,7 @@ F test/whereC.test d6f4ecd4fa2d9429681a5b22a25d2bda8e86ab8a F test/whereD.test fd9120e262f9da3c45940f52aefeef4d15b904e5 F test/whereE.test b3a055eef928c992b0a33198a7b8dc10eea5ad2f F test/whereF.test 5b2ba0dbe8074aa13e416b37c753991f0a2492d7 -F test/whereG.test 2533b72ed4a31fd1687230a499b557b911525344 +F test/whereG.test 8189fedf3b98ab581bb70f830175e403a0ef1722 F test/whereH.test e4b07f7a3c2f5d31195cd33710054c78667573b2 F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31 F test/wild001.test bca33f499866f04c24510d74baf1e578d4e44b1c @@ -1161,7 +1161,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01 F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff -P de9a490f594183f337a2ec9e0f87792eac83548b -R ce888b84132e0cad3bcca115a32951d3 -U drh -Z cf9f241149456ab1fa24984e95a412d2 +P 65d2544af9adc1e2f1d193e57f8be0422fb0d5eb +R 6cc54703275bf2ed6708c34ae52cf7ea +T *branch * experimental-costs +T *sym-experimental-costs * +T -sym-trunk * +U dan +Z 868aa60f36dda291ae018583501645e5 diff --git a/manifest.uuid b/manifest.uuid index a2761a597b..4c2b675cff 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -65d2544af9adc1e2f1d193e57f8be0422fb0d5eb \ No newline at end of file +c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 9bde27e52f..6a75840525 100644 --- a/src/where.c +++ b/src/where.c @@ -227,7 +227,7 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){ if( p && ExprHasProperty(p, EP_Unlikely) ){ pTerm->truthProb = sqlite3LogEst(p->iTable) - 99; }else{ - pTerm->truthProb = -1; + pTerm->truthProb = 1; } pTerm->pExpr = sqlite3ExprSkipCollate(p); pTerm->wtFlags = wtFlags; @@ -1975,6 +1975,31 @@ static void whereKeyStats( } #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ +/* +** If it is not NULL, pTerm is a term that provides an upper or lower +** bound on a range scan. Without considering pTerm, it is estimated +** that the scan will visit nNew rows. This function returns the number +** estimated to be visited after taking pTerm into account. +** +** If the user explicitly specified a likelihood() value for this term, +** then the return value is the likelihood multiplied by the number of +** input rows. Otherwise, this function assumes that an "IS NOT NULL" term +** has a likelihood of 0.50, and any other term a likelihood of 0.25. +*/ +static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ + LogEst nRet = nNew; + if( pTerm ){ + if( pTerm->truthProb<=0 ){ + nRet += pTerm->truthProb; + }else if( pTerm->wtFlags & TERM_VNULL ){ + nRet -= 10; assert( 10==sqlite3LogEst(2) ); + }else{ + nRet -= 20; assert( 20==sqlite3LogEst(4) ); + } + } + return nRet; +} + /* ** This function is used to estimate the number of rows that will be visited ** by scanning an index for a range of values. The range may have an upper @@ -2127,17 +2152,9 @@ static int whereRangeScanEst( UNUSED_PARAMETER(pBuilder); #endif assert( pLower || pUpper ); - /* TUNING: Each inequality constraint reduces the search space 4-fold. - ** A BETWEEN operator, therefore, reduces the search space 16-fold */ - nNew = nOut; - if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){ - nNew -= 20; assert( 20==sqlite3LogEst(4) ); - nOut--; - } - if( pUpper ){ - nNew -= 20; assert( 20==sqlite3LogEst(4) ); - nOut--; - } + nNew = whereRangeAdjust(pLower, nOut); + nNew = whereRangeAdjust(pUpper, nNew); + nOut -= (pLower!=0) + (pUpper!=0); if( nNew<10 ) nNew = 10; if( nNewnOut = (LogEst)nOut; @@ -3987,7 +4004,9 @@ static void whereLoopOutputAdjust(WhereClause *pWC, WhereLoop *pLoop){ if( pX==pTerm ) break; if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break; } - if( j<0 ) pLoop->nOut += pTerm->truthProb; + if( j<0 ){ + pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1); + } } } @@ -4081,6 +4100,7 @@ static int whereLoopAddBtreeIndex( pNew->nOut = saved_nOut; } for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ + LogEst rCostIdx; int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nRecValid = pBuilder->nRecValid; @@ -4154,7 +4174,8 @@ static int whereLoopAddBtreeIndex( pNew->aLTerm[pNew->nLTerm-2] : 0; } if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ - /* Adjust nOut and rRun for STAT3 range values */ + /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4 + ** data, using some other estimate. */ assert( pNew->nOut==saved_nOut ); whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); } @@ -4181,13 +4202,16 @@ static int whereLoopAddBtreeIndex( } } #endif + /* Set rCostIdx to the cost of visiting selected rows in index. Add + ** it to pNew->rRun, which is currently set to the cost of the index + ** seek only. Then, if this is a non-covering index, add the cost of + ** visiting the rows in the main table. */ + rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow; + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rCostIdx); if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ - /* Each row involves a step of the index, then a binary search of - ** the main table */ - pNew->rRun = sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10); + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16); } - /* Step cost for each output row */ - pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut); + whereLoopOutputAdjust(pBuilder->pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 @@ -4319,6 +4343,7 @@ static int whereLoopAddBtree( sPk.aiRowEst = aiRowEstPk; sPk.onError = OE_Replace; sPk.pTable = pTab; + sPk.szIdxRow = pTab->szTabRow; aiRowEstPk[0] = pTab->nRowEst; aiRowEstPk[1] = 1; pFirst = pSrc->pTab->pIndex; @@ -4396,10 +4421,8 @@ static int whereLoopAddBtree( /* Full table scan */ pNew->iSortIdx = b ? iSortIdx : 0; - /* TUNING: Cost of full table scan is 3*(N + log2(N)). - ** + The extra 3 factor is to encourage the use of indexed lookups - ** over full scans. FIXME */ - pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16; + /* TUNING: Cost of full table scan is (N*3.0). */ + pNew->rRun = rSize + 16; whereLoopOutputAdjust(pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = rSize; @@ -4426,35 +4449,16 @@ static int whereLoopAddBtree( ) ){ pNew->iSortIdx = b ? iSortIdx : 0; - /* TUNING: The base cost of an index scan is N + log2(N). - ** The log2(N) is for the initial seek to the beginning and the N - ** is for the scan itself. */ - pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize); - if( m==0 ){ - /* TUNING: Cost of a covering index scan is K*(N + log2(N)). - ** + The extra factor K of between 1.1 and 3.0 that depends - ** on the relative sizes of the table and the index. K - ** is smaller for smaller indices, thus favoring them. - ** The upper bound on K (3.0) matches the penalty factor - ** on a full table scan that tries to encourage the use of - ** indexed lookups over full scans. - */ - pNew->rRun += 1 + (15*pProbe->szIdxRow)/pTab->szTabRow; - }else{ - /* TUNING: The cost of scanning a non-covering index is multiplied - ** by log2(N) to account for the binary search of the main table - ** that must happen for each row of the index. - ** TODO: Should there be a multiplier here, analogous to the 3x - ** multiplier for a fulltable scan or covering index scan, to - ** further discourage the use of an index scan? Or is the log2(N) - ** term sufficient discouragement? - ** TODO: What if some or all of the WHERE clause terms can be - ** computed without reference to the original table. Then the - ** penality should reduce to logK where K is the number of output - ** rows. - */ - pNew->rRun += rLogSize; + + /* The cost of visiting the index rows is N*K, where K is + ** between 1.1 and 3.0, depending on the relative sizes of the + ** index and table rows. If this is a non-covering index scan, + ** also add the cost of visiting table rows (N*3.0). */ + pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow; + if( m!=0 ){ + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16); } + whereLoopOutputAdjust(pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = rSize; @@ -4732,8 +4736,7 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ pNew->iSortIdx = 0; memset(&pNew->u, 0, sizeof(pNew->u)); for(i=0; rc==SQLITE_OK && irRun = sSum.a[i].rRun + 18; + pNew->rRun = sSum.a[i].rRun; pNew->nOut = sSum.a[i].nOut; pNew->prereq = sSum.a[i].prereq; rc = whereLoopInsert(pBuilder, pNew); diff --git a/test/analyze9.test b/test/analyze9.test index 820bcdb0e7..125cecf182 100644 --- a/test/analyze9.test +++ b/test/analyze9.test @@ -577,16 +577,16 @@ do_test 13.1 { execsql ANALYZE } {} do_eqp_test 13.2.1 { - SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12 } {/SEARCH TABLE t1 USING INDEX i1/} do_eqp_test 13.2.2 { - SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12 } {/SEARCH TABLE t1 USING INDEX i1/} do_eqp_test 13.3.1 { - SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12 } {/SEARCH TABLE t1 USING INDEX i2/} do_eqp_test 13.3.2 { - SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12 } {/SEARCH TABLE t1 USING INDEX i2/} #------------------------------------------------------------------------- diff --git a/test/autoindex1.test b/test/autoindex1.test index 0e5032b9b9..6cb0ab146a 100644 --- a/test/autoindex1.test +++ b/test/autoindex1.test @@ -97,6 +97,8 @@ do_test autoindex1-210 { PRAGMA automatic_index=ON; ANALYZE; UPDATE sqlite_stat1 SET stat='10000' WHERE tbl='t1'; + -- Table t2 actually contains 8 rows. + UPDATE sqlite_stat1 SET stat='16' WHERE tbl='t2'; ANALYZE sqlite_master; SELECT b, (SELECT d FROM t2 WHERE c=a) FROM t1; } diff --git a/test/where3.test b/test/where3.test index 8fa9fa7840..c2804b5579 100644 --- a/test/where3.test +++ b/test/where3.test @@ -231,6 +231,7 @@ do_execsql_test where3-3.0 { CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c); CREATE INDEX t301c ON t301(c); INSERT INTO t301 VALUES(1,2,3); + INSERT INTO t301 VALUES(2,2,3); CREATE TABLE t302(x, y); INSERT INTO t302 VALUES(4,5); ANALYZE; @@ -251,7 +252,7 @@ do_execsql_test where3-3.2 { } {} do_execsql_test where3-3.3 { SELECT * FROM t301 WHERE c=3 AND a IS NOT NULL; -} {1 2 3} +} {1 2 3 2 2 3} if 0 { # Query planner no longer does this # Verify that when there are multiple tables in a join which must be diff --git a/test/whereG.test b/test/whereG.test index 17d5653223..6274213491 100644 --- a/test/whereG.test +++ b/test/whereG.test @@ -14,6 +14,7 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +set testprefix whereG do_execsql_test whereG-1.0 { CREATE TABLE composer( @@ -179,5 +180,36 @@ do_execsql_test whereG-4.0 { ORDER BY x; } {right} +#------------------------------------------------------------------------- +# + +reset_db +do_execsql_test 5.1 { + CREATE TABLE t1(a, b, c); + CREATE INDEX i1 ON t1(a, b); +} +do_eqp_test 5.1.2 { + SELECT * FROM t1 WHERE a>? +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}} +do_eqp_test 5.1.3 { + SELECT * FROM t1 WHERE likelihood(a>?, 0.9) +} {0 0 0 {SCAN TABLE t1}} + +do_test 5.2 { + for {set i 0} {$i < 100} {incr i} { + execsql { INSERT INTO t1 VALUES('abc', $i, $i); } + } + execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; } + execsql { ANALYZE } +} {} + +do_eqp_test 5.2.2 { + SELECT * FROM t1 WHERE likelihood(b>?, 0.01) +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}} + +do_eqp_test 5.2.3 { + SELECT * FROM t1 WHERE likelihood(b>?, 0.9) +} {0 0 0 {SCAN TABLE t1}} finish_test +