From: dan Date: Fri, 25 Apr 2014 20:22:45 +0000 (+0000) Subject: If the user provides likelihood() data for a WHERE clause term used as part of an... X-Git-Tag: version-3.8.5~65^2~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8ad1d8ba38ef4bf965ab1a657892b0e57be00a97;p=thirdparty%2Fsqlite.git If the user provides likelihood() data for a WHERE clause term used as part of an index key, have the planner use it when calculating the expected number of rows visited by the loop. FossilOrigin-Name: c51efaa5d29ee0a91b9e6a83a8dd82530670811a --- diff --git a/manifest b/manifest index 0c855c53e6..afdc8889fe 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Store\svalues\sloaded\sfrom\sthe\sstat1\stable\sas\slogarithmic\svalues\sin\smemory. -D 2014-04-25T15:01:01.691 +C If\sthe\suser\sprovides\slikelihood()\sdata\sfor\sa\sWHERE\sclause\sterm\sused\sas\spart\sof\san\sindex\skey,\shave\sthe\splanner\suse\sit\swhen\scalculating\sthe\sexpected\snumber\sof\srows\svisited\sby\sthe\sloop. +D 2014-04-25T20:22:45.291 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -222,7 +222,7 @@ F src/shell.c 2afe7a7154e97be0c74c5feacf09626bda8493be F src/sqlite.h.in bde98816e1ba0c9ffef50afe7b32f4e5a8f54fe0 F src/sqlite3.rc 11094cc6a157a028b301a9f06b3d03089ea37c3e F src/sqlite3ext.h 886f5a34de171002ad46fae8c36a7d8051c190fc -F src/sqliteInt.h bad694fd6b91b10a7a5aafa16fd05b504bad6b6e +F src/sqliteInt.h 63656cfa5a8221c3eb1a182e97d61b1fe2dfd7da F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 7ac05a5c7017d0b9f0b4bcd701228b784f987158 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -291,8 +291,8 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd F src/wal.c 76e7fc6de229bea8b30bb2539110f03a494dc3a8 F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45 -F src/where.c 15a5c94c8c93500e141c6cb25af600615dc196d8 -F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6 +F src/where.c 2b3f47801939c2853c03bba3d5aa83abdd51211e +F src/whereInt.h 6804c2e5010378568c2bb1350477537755296a46 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/aggnested.test 45c0201e28045ad38a530b5a144b73cd4aa2cfd6 @@ -1093,7 +1093,7 @@ F test/whereC.test d6f4ecd4fa2d9429681a5b22a25d2bda8e86ab8a F test/whereD.test fd9120e262f9da3c45940f52aefeef4d15b904e5 F test/whereE.test b3a055eef928c992b0a33198a7b8dc10eea5ad2f F test/whereF.test 5b2ba0dbe8074aa13e416b37c753991f0a2492d7 -F test/whereG.test 8189fedf3b98ab581bb70f830175e403a0ef1722 +F test/whereG.test 0ac23e5e8311b69d87245f4a85112de321031658 F test/whereH.test e4b07f7a3c2f5d31195cd33710054c78667573b2 F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31 F test/wild001.test bca33f499866f04c24510d74baf1e578d4e44b1c @@ -1161,7 +1161,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01 F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff -P c5a6ec0a880652dc8f4593d9f7acd58ddc3dc5f3 -R 63fad85eb66cf540e6aa11923b167cbf +P 1bd74c49ddab6f53bb6eaa57907eff44c2580dd6 +R 804d3aff8175381c892eb4eebbb86307 U dan -Z 04f4e3645ebd9e33526df3bd26c04a76 +Z 6dd1c2a2c429274ab108441094bd0963 diff --git a/manifest.uuid b/manifest.uuid index 5436721392..71ddf4b2d3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1bd74c49ddab6f53bb6eaa57907eff44c2580dd6 \ No newline at end of file +c51efaa5d29ee0a91b9e6a83a8dd82530670811a \ No newline at end of file diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 3ddcc8610f..b584ff401d 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1680,9 +1680,6 @@ struct UnpackedRecord { struct Index { char *zName; /* Name of this index */ i16 *aiColumn; /* Which columns are used by this index. 1st is 0 */ -#if 0 - tRowcnt *aiRowEst; /* From ANALYZE: Est. rows selected by each column */ -#endif LogEst *aiRowLogEst; /* From ANALYZE: Est. rows selected by each column */ Table *pTable; /* The SQL table being indexed */ char *zColAff; /* String defining the affinity of each column */ diff --git a/src/where.c b/src/where.c index 909129f666..73d3a22b7c 100644 --- a/src/where.c +++ b/src/where.c @@ -4040,7 +4040,6 @@ static int whereLoopAddBtreeIndex( LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ - LogEst nRowEst; /* Estimated index selectivity */ LogEst rLogSize; /* Logarithm of table size */ WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ @@ -4061,10 +4060,8 @@ static int whereLoopAddBtreeIndex( assert( pNew->u.btree.nEq<=pProbe->nKeyCol ); if( pNew->u.btree.nEq < pProbe->nKeyCol ){ iCol = pProbe->aiColumn[pNew->u.btree.nEq]; - nRowEst = pProbe->aiRowLogEst[pNew->u.btree.nEq+1]; }else{ iCol = -1; - nRowEst = 0; } pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol, opMask, pProbe); @@ -4095,35 +4092,40 @@ static int whereLoopAddBtreeIndex( pNew->u.btree.nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; - nIter = pProbe->aiRowLogEst[0] - pProbe->aiRowLogEst[saved_nEq+1]; - pNew->rRun = rLogSize + nIter; - pNew->nOut += nIter; - whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter); + nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1]; + pNew->nOut -= nIter; + whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul); pNew->nOut = saved_nOut; } for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ + u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ LogEst rCostIdx; + LogEst nOutUnadjusted; /* nOut before IN() and WHERE adjustments */ int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nRecValid = pBuilder->nRecValid; #endif - if( (pTerm->eOperator==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) + if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) && (iCol<0 || pSrc->pTab->aCol[iCol].notNull) ){ continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */ } if( pTerm->prereqRight & pNew->maskSelf ) continue; - assert( pNew->nOut==saved_nOut ); - pNew->wsFlags = saved_wsFlags; pNew->u.btree.nEq = saved_nEq; pNew->nLTerm = saved_nLTerm; if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */ pNew->aLTerm[pNew->nLTerm++] = pTerm; pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf; - pNew->rRun = rLogSize; /* Baseline cost is log2(N). Adjustments below */ - if( pTerm->eOperator & WO_IN ){ + + assert( nInMul==0 + || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 + || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 + || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 + ); + + if( eOp & WO_IN ){ Expr *pExpr = pTerm->pExpr; pNew->wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ @@ -4135,87 +4137,111 @@ static int whereLoopAddBtreeIndex( } assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ - pNew->rRun += nIn; - pNew->u.btree.nEq++; - pNew->nOut = nRowEst + nInMul + nIn; - }else if( pTerm->eOperator & (WO_EQ) ){ - assert( - (pNew->wsFlags & (WHERE_COLUMN_NULL|WHERE_COLUMN_IN|WHERE_SKIPSCAN))!=0 - || nInMul==0 - ); + + }else if( eOp & (WO_EQ) ){ pNew->wsFlags |= WHERE_COLUMN_EQ; - if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1)){ - assert( (pNew->wsFlags & WHERE_COLUMN_IN)==0 || iCol<0 ); + if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){ if( iCol>=0 && pProbe->onError==OE_None ){ pNew->wsFlags |= WHERE_UNQ_WANTED; }else{ pNew->wsFlags |= WHERE_ONEROW; } } - pNew->u.btree.nEq++; - pNew->nOut = nRowEst + nInMul; - }else if( pTerm->eOperator & (WO_ISNULL) ){ - pNew->wsFlags |= WHERE_COLUMN_NULL; - pNew->u.btree.nEq++; - /* TUNING: IS NULL selects 2 rows */ - nIn = 10; assert( 10==sqlite3LogEst(2) ); - pNew->nOut = nRowEst + nInMul + nIn; - }else if( pTerm->eOperator & (WO_GT|WO_GE) ){ - testcase( pTerm->eOperator & WO_GT ); - testcase( pTerm->eOperator & WO_GE ); + + }else if( eOp & (WO_GT|WO_GE) ){ + testcase( eOp & WO_GT ); + testcase( eOp & WO_GE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT; pBtm = pTerm; pTop = 0; - }else{ - assert( pTerm->eOperator & (WO_LT|WO_LE) ); - testcase( pTerm->eOperator & WO_LT ); - testcase( pTerm->eOperator & WO_LE ); + }else if( (eOp & WO_ISNULL)==0 ){ + assert( eOp & (WO_LT|WO_LE) ); + testcase( eOp & WO_LT ); + testcase( eOp & WO_LE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT; pTop = pTerm; pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ? pNew->aLTerm[pNew->nLTerm-2] : 0; } + + /* At this point pNew->nOut is set to the number of rows expected to + ** be visited by the index scan before considering term pTerm, or the + ** values of nIn and nInMul. In other words, assuming that all + ** "x IN(...)" terms are replaced with "x = ?". This block updates + ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul). */ + assert( pNew->nOut==saved_nOut ); if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4 ** data, using some other estimate. */ - assert( pNew->nOut==saved_nOut ); whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); - } + }else{ + int nEq = ++pNew->u.btree.nEq; + assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) ); + + assert( pNew->nOut==saved_nOut ); + if( pTerm->truthProb<=0 ){ + assert( (eOp & WO_IN) || nIn==0 ); + pNew->nOut += pTerm->truthProb; + pNew->nOut -= nIn; + pNew->wsFlags |= WHERE_LIKELIHOOD; + }else{ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - if( nInMul==0 - && pProbe->nSample - && pNew->u.btree.nEq<=pProbe->nSampleCol - && OptimizationEnabled(db, SQLITE_Stat3) - ){ - Expr *pExpr = pTerm->pExpr; - tRowcnt nOut = 0; - if( (pTerm->eOperator & (WO_EQ|WO_ISNULL))!=0 ){ - testcase( pTerm->eOperator & WO_EQ ); - testcase( pTerm->eOperator & WO_ISNULL ); - rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); - }else if( (pTerm->eOperator & WO_IN) - && !ExprHasProperty(pExpr, EP_xIsSelect) ){ - rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); - } - assert( nOut==0 || rc==SQLITE_OK ); - if( nOut ){ - pNew->nOut = sqlite3LogEst(nOut); - if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; + tRowcnt nOut = 0; + if( nInMul==0 + && pProbe->nSample + && pNew->u.btree.nEq<=pProbe->nSampleCol + && OptimizationEnabled(db, SQLITE_Stat3) + && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) + && (pNew->wsFlags & WHERE_LIKELIHOOD)==0 + ){ + Expr *pExpr = pTerm->pExpr; + if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){ + testcase( eOp & WO_EQ ); + testcase( eOp & WO_ISNULL ); + rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); + }else{ + rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); + } + assert( rc!=SQLITE_OK || nOut>0 ); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ + if( nOut ){ + pNew->nOut = sqlite3LogEst(nOut); + if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; + pNew->nOut -= nIn; + } + } + if( nOut==0 ) +#endif + { + pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]); + if( eOp & WO_ISNULL ){ + /* TUNING: If there is no likelihood() value, assume that a + ** "col IS NULL" expression matches twice as many rows + ** as (col=?). */ + pNew->nOut += 10; + } + } } } -#endif + /* Set rCostIdx to the cost of visiting selected rows in index. Add ** it to pNew->rRun, which is currently set to the cost of the index ** seek only. Then, if this is a non-covering index, add the cost of ** visiting the rows in the main table. */ rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow; - pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rCostIdx); + pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx); if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16); } + nOutUnadjusted = pNew->nOut; + pNew->rRun += nInMul + nIn; + pNew->nOut += nInMul + nIn; whereLoopOutputAdjust(pBuilder->pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); + pNew->nOut = nOutUnadjusted; + if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 && pNew->u.btree.nEq<(pProbe->nKeyCol + (pProbe->zName!=0)) ){ diff --git a/src/whereInt.h b/src/whereInt.h index 72e7530db9..010cd6e8ae 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -458,3 +458,4 @@ struct WhereInfo { #define WHERE_AUTO_INDEX 0x00004000 /* Uses an ephemeral index */ #define WHERE_SKIPSCAN 0x00008000 /* Uses the skip-scan algorithm */ #define WHERE_UNQ_WANTED 0x00010000 /* WHERE_ONEROW would have been helpful*/ +#define WHERE_LIKELIHOOD 0x00020000 /* A likelihood() is affecting nOut */ diff --git a/test/whereG.test b/test/whereG.test index 6274213491..66918a35fa 100644 --- a/test/whereG.test +++ b/test/whereG.test @@ -181,9 +181,14 @@ do_execsql_test whereG-4.0 { } {right} #------------------------------------------------------------------------- -# - +# Test that likelihood() specifications on indexed terms are taken into +# account by various forms of loops. +# +# 5.1.*: open ended range scans +# 5.2.*: skip-scans +# reset_db + do_execsql_test 5.1 { CREATE TABLE t1(a, b, c); CREATE INDEX i1 ON t1(a, b); @@ -202,14 +207,19 @@ do_test 5.2 { execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; } execsql { ANALYZE } } {} - do_eqp_test 5.2.2 { SELECT * FROM t1 WHERE likelihood(b>?, 0.01) } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}} - do_eqp_test 5.2.3 { SELECT * FROM t1 WHERE likelihood(b>?, 0.9) } {0 0 0 {SCAN TABLE t1}} +do_eqp_test 5.3.1 { + SELECT * FROM t1 WHERE a=? +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} +do_eqp_test 5.3.2 { + SELECT * FROM t1 WHERE likelihood(a=?, 0.9) +} {0 0 0 {SCAN TABLE t1}} + finish_test