From: dan Date: Thu, 13 Aug 2009 07:09:33 +0000 (+0000) Subject: Changes to the query planner that improve the order in which tables/indexes are scann... X-Git-Tag: fts3-refactor~273^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5236ac1d04025b7716bb87abb86053144f7c847a;p=thirdparty%2Fsqlite.git Changes to the query planner that improve the order in which tables/indexes are scanned in join queries. FossilOrigin-Name: 19f799b32f9d1be25d4185ce18b13f4dd502e199 --- diff --git a/manifest b/manifest index e7ba1049ca..1c5d6ad440 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixed\ssome\scompiler\swarnings\sin\sWINCE\sonly\ssections\swhen\susing\sthe\sMSVC\scompiler. -D 2009-08-12T15:34:03 +C Changes\sto\sthe\squery\splanner\sthat\simprove\sthe\sorder\sin\swhich\stables/indexes\sare\sscanned\sin\sjoin\squeries. +D 2009-08-13T07:09:33 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in c606c9b502dfde3b9c3b2d23ed49f3737829693b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -213,7 +213,7 @@ F src/vdbeblob.c a3f3e0e877fc64ea50165eec2855f5ada4477611 F src/vdbemem.c bfc25f9ef4fa914b473303566459552bdb2e008a F src/vtab.c aedd76e8670d5a5379f93804398d3ba960125547 F src/walker.c 1edca756275f158b80f20eb6f104c8d3fcc96a04 -F src/where.c 7e696d69a6d1b0fa277da2801ae4126dd4db0f8c +F src/where.c 53adef2c7b8bc888755cf41fb3449aedb36a429c F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 14165b3e32715b700b5f0cbf8f6e3833dda0be45 @@ -674,7 +674,7 @@ F test/trigger6.test 0e411654f122552da6590f0b4e6f781048a4a9b9 F test/trigger7.test 72feaf8dbc52cea84de0c3e6ce7559ff19c479af F test/trigger8.test 83d92c212f36442d26527d6f7701575905a52ae1 F test/trigger9.test e6e8dbab673666b3c0a63f0fefcff2329fe6bba8 -F test/triggerA.test 208dbda4d2f7c918b02f8a0dfa3acd2a0fe00691 +F test/triggerA.test 0718ad2d9bfef27c7af00e636df79bee6b988da7 F test/triggerB.test 56780c031b454abac2340dbb3b71ac5c56c3d7fe F test/types.test 9a825ec8eea4e965d7113b74c76a78bb5240f2ac F test/types2.test 3555aacf8ed8dc883356e59efc314707e6247a84 @@ -693,7 +693,7 @@ F test/vtab2.test 7bcffc050da5c68f4f312e49e443063e2d391c0d F test/vtab3.test baad99fd27217f5d6db10660522e0b7192446de1 F test/vtab4.test 942f8b8280b3ea8a41dae20e7822d065ca1cb275 F test/vtab5.test a0a84a89c622f4e2e816ebf39883dc319b4a1024 -F test/vtab6.test 226b116d63ad77f9b084d556f772c45a0d28e9b5 +F test/vtab6.test c7f290d172609d636fbfc58166eadcb55d5c117c F test/vtab7.test a8c3c3cb3eb60be364991bd714e4927e26c4cd85 F test/vtab8.test e19fa4a538fcd1bb66c22825fa8f71618fb13583 F test/vtab9.test ea58d2b95d61955f87226381716b2d0b1d4e4f9b @@ -711,7 +711,7 @@ F test/where4.test e9b9e2f2f98f00379e6031db6a6fca29bae782a2 F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2 F test/where6.test 42c4373595f4409d9c6a9987b4a60000ad664faf F test/where7.test b6e84b472a024e45c6dbdadc52bbcab3fcc8d0e1 -F test/where8.test fb2ccd7f1fa33287fef25b6bad6849c868a6e331 +F test/where8.test 8d3704d04a683e792d373005f2e4e13bfd7e2dd5 F test/where8m.test da346596e19d54f0aba35ebade032a7c47d79739 F test/where9.test be19e1a92f80985c1a121b4678bf7d2123eaa623 F test/whereA.test 1d1674254614147c866ab9b59af6582f454a858c @@ -742,7 +742,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl 672f81d693a03f80f5ae60bfefacd8a349e76746 -P b0848925babde5241aefe0a117ebb10299c94a15 -R 85d8bc88106b78c6b602120dcf0a8f49 -U shane -Z 4619ac16a1c5a72371d086ffe5b2d550 +P 1f0a93e17d6291268da909699ce1a5a7619ae637 +R d962edbb5f58f48d082773e78b64bd00 +U dan +Z 73a8cffe9b2f6a30befca6bd75c15499 diff --git a/manifest.uuid b/manifest.uuid index e7a0103468..ff060f2167 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1f0a93e17d6291268da909699ce1a5a7619ae637 \ No newline at end of file +19f799b32f9d1be25d4185ce18b13f4dd502e199 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 1921ee3a34..797559c86f 100644 --- a/src/where.c +++ b/src/where.c @@ -195,6 +195,7 @@ struct WhereCost { WherePlan plan; /* The lookup strategy */ double rCost; /* Overall cost of pursuing this search strategy */ double nRow; /* Estimated number of output rows */ + Bitmask used; /* Bitmask of cursors used by this plan */ }; /* @@ -1338,6 +1339,11 @@ static int isSortingIndex( nTerm = pOrderBy->nExpr; assert( nTerm>0 ); + /* Argument pIdx must either point to a 'real' named index structure, + ** or an index structure allocated on the stack by bestBtreeIndex() to + ** represent the rowid index that is part of every table. */ + assert( pIdx->zName || (pIdx->nColumn==1 && pIdx->aiColumn[0]==-1) ); + /* Match terms of the ORDER BY clause against columns of ** the index. ** @@ -1364,7 +1370,7 @@ static int isSortingIndex( if( !pColl ){ pColl = db->pDfltColl; } - if( inColumn ){ + if( pIdx->zName && inColumn ){ iColumn = pIdx->aiColumn[i]; if( iColumn==pIdx->pTable->iPKey ){ iColumn = -1; @@ -1393,7 +1399,7 @@ static int isSortingIndex( return 0; } } - assert( pIdx->aSortOrder!=0 ); + assert( pIdx->aSortOrder!=0 || iColumn==-1 ); assert( pTerm->sortOrder==0 || pTerm->sortOrder==1 ); assert( iSortOrder==0 || iSortOrder==1 ); termSortOrder = iSortOrder ^ pTerm->sortOrder; @@ -1436,30 +1442,6 @@ static int isSortingIndex( return 0; } -/* -** Check table to see if the ORDER BY clause in pOrderBy can be satisfied -** by sorting in order of ROWID. Return true if so and set *pbRev to be -** true for reverse ROWID and false for forward ROWID order. -*/ -static int sortableByRowid( - int base, /* Cursor number for table to be sorted */ - ExprList *pOrderBy, /* The ORDER BY clause */ - WhereMaskSet *pMaskSet, /* Mapping from table cursors to bitmaps */ - int *pbRev /* Set to 1 if ORDER BY is DESC */ -){ - Expr *p; - - assert( pOrderBy!=0 ); - assert( pOrderBy->nExpr>0 ); - p = pOrderBy->a[0].pExpr; - if( p->op==TK_COLUMN && p->iTable==base && p->iColumn==-1 - && !referencesOtherTables(pOrderBy, pMaskSet, 1, base) ){ - *pbRev = pOrderBy->a[0].sortOrder; - return 1; - } - return 0; -} - /* ** Prepare a crude estimate of the logarithm of the input value. ** The results need not be exact. This is only used for estimating @@ -1560,6 +1542,7 @@ static void bestOrClauseIndex( int flags = WHERE_MULTI_OR; double rTotal = 0; double nRow = 0; + Bitmask used = 0; for(pOrTerm=pOrWC->a; pOrTerm=pCost->rCost ) break; } @@ -1599,6 +1583,7 @@ static void bestOrClauseIndex( if( rTotalrCost ){ pCost->rCost = rTotal; pCost->nRow = nRow; + pCost->used = used; pCost->plan.wsFlags = flags; pCost->plan.u.pTerm = pTerm; } @@ -1851,7 +1836,7 @@ static void bestVirtualIndex( for(i=0; inConstraint; i++, pIdxCons++){ j = pIdxCons->iTermOffset; pTerm = &pWC->a[j]; - pIdxCons->usable = (pTerm->prereqRight & notReady)==0 ?1:0; + pIdxCons->usable = (pTerm->prereqRight¬Ready) ? 0 : 1; } memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint); if( pIdxInfo->needToFreeIdxStr ){ @@ -1872,6 +1857,13 @@ static void bestVirtualIndex( return; } + pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; + for(i=0; inConstraint; i++){ + if( pUsage[i].argvIndex>0 ){ + pCost->used |= pWC->a[pIdxCons[i].iTermOffset].prereqRight; + } + } + /* The cost is not allowed to be larger than SQLITE_BIG_DBL (the ** inital value of lowestCost in this loop. If it is, then the ** (costiCursor; /* The cursor of the table to be accessed */ Index *pProbe; /* An index we are evaluating */ - int rev; /* True to scan in reverse order */ - int wsFlags; /* Flags associated with pProbe */ - int nEq; /* Number of == or IN constraints */ - int eqTermMask; /* Mask of valid equality operators */ - double cost; /* Cost of using pProbe */ - double nRow; /* Estimated number of rows in result set */ - int i; /* Loop counter */ - - WHERETRACE(("bestIndex: tbl=%s notReady=%llx\n", pSrc->pTab->zName,notReady)); - pProbe = pSrc->pTab->pIndex; - if( pSrc->notIndexed ){ - pProbe = 0; - } - - /* If the table has no indices and there are no terms in the where - ** clause that refer to the ROWID, then we will never be able to do - ** anything other than a full table scan on this table. We might as - ** well put it first in the join order. That way, perhaps it can be - ** referenced by other tables in the join. - */ - memset(pCost, 0, sizeof(*pCost)); - if( pProbe==0 && - findTerm(pWC, iCur, -1, 0, WO_EQ|WO_IN|WO_LT|WO_LE|WO_GT|WO_GE,0)==0 && - (pOrderBy==0 || !sortableByRowid(iCur, pOrderBy, pWC->pMaskSet, &rev)) ){ - if( pParse->db->flags & SQLITE_ReverseOrder ){ - /* For application testing, randomly reverse the output order for - ** SELECT statements that omit the ORDER BY clause. This will help - ** to find cases where - */ - pCost->plan.wsFlags |= WHERE_REVERSE; - } - return; - } - pCost->rCost = SQLITE_BIG_DBL; + Index *pIdx; /* Copy of pProbe, or zero for IPK index */ + int eqTermMask; /* Current mask of valid equality operators */ + int idxEqTermMask; /* Index mask of valid equality operators */ - /* Check for a rowid=EXPR or rowid IN (...) constraints. If there was - ** an INDEXED BY clause attached to this table, skip this step. - */ - if( !pSrc->pIndex ){ - pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); - if( pTerm ){ - Expr *pExpr; - pCost->plan.wsFlags = WHERE_ROWID_EQ; - if( pTerm->eOperator & WO_EQ ){ - /* Rowid== is always the best pick. Look no further. Because only - ** a single row is generated, output is always in sorted order */ - pCost->plan.wsFlags = WHERE_ROWID_EQ | WHERE_UNIQUE; - pCost->plan.nEq = 1; - WHERETRACE(("... best is rowid\n")); - pCost->rCost = 0; - pCost->nRow = 1; - return; - }else if( !ExprHasProperty((pExpr = pTerm->pExpr), EP_xIsSelect) - && pExpr->x.pList - ){ - /* Rowid IN (LIST): cost is NlogN where N is the number of list - ** elements. */ - pCost->rCost = pCost->nRow = pExpr->x.pList->nExpr; - pCost->rCost *= estLog(pCost->rCost); - }else{ - /* Rowid IN (SELECT): cost is NlogN where N is the number of rows - ** in the result of the inner select. We have no way to estimate - ** that value so make a wild guess. */ - pCost->nRow = 100; - pCost->rCost = 200; - } - WHERETRACE(("... rowid IN cost: %.9g\n", pCost->rCost)); - } - - /* Estimate the cost of a table scan. If we do not know how many - ** entries are in the table, use 1 million as a guess. - */ - cost = pProbe ? pProbe->aiRowEst[0] : 1000000; - WHERETRACE(("... table scan base cost: %.9g\n", cost)); - wsFlags = WHERE_ROWID_RANGE; - - /* Check for constraints on a range of rowids in a table scan. - */ - pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0); - if( pTerm ){ - if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){ - wsFlags |= WHERE_TOP_LIMIT; - cost /= 3; /* Guess that rowidEXPR eliminates two-thirds of rows */ - } - WHERETRACE(("... rowid range reduces cost to %.9g\n", cost)); - }else{ - wsFlags = 0; - } - nRow = cost; - - /* If the table scan does not satisfy the ORDER BY clause, increase - ** the cost by NlogN to cover the expense of sorting. */ - if( pOrderBy ){ - if( sortableByRowid(iCur, pOrderBy, pWC->pMaskSet, &rev) ){ - wsFlags |= WHERE_ORDERBY|WHERE_ROWID_RANGE; - if( rev ){ - wsFlags |= WHERE_REVERSE; - } - }else{ - cost += cost*estLog(cost); - WHERETRACE(("... sorting increases cost to %.9g\n", cost)); - } - }else if( pParse->db->flags & SQLITE_ReverseOrder ){ - /* For application testing, randomly reverse the output order for - ** SELECT statements that omit the ORDER BY clause. This will help - ** to find cases where - */ - wsFlags |= WHERE_REVERSE; - } + Index pk; + unsigned int pkint[2] = {1000000, 1}; + int pkicol = -1; + int wsFlagMask; - /* Remember this case if it is the best so far */ - if( costrCost ){ - pCost->rCost = cost; - pCost->nRow = nRow; - pCost->plan.wsFlags = wsFlags; - } - } - - bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost); + memset(pCost, 0, sizeof(*pCost)); + pCost->rCost = SQLITE_BIG_DBL; /* If the pSrc table is the right table of a LEFT JOIN then we may not ** use an index to satisfy IS NULL constraints on that table. This is ** because columns might end up being NULL if the table does not match - ** a circumstance which the index cannot help us discover. Ticket #2177. */ - if( (pSrc->jointype & JT_LEFT)!=0 ){ - eqTermMask = WO_EQ|WO_IN; + if( pSrc->jointype & JT_LEFT ){ + idxEqTermMask = WO_EQ|WO_IN; }else{ - eqTermMask = WO_EQ|WO_IN|WO_ISNULL; + idxEqTermMask = WO_EQ|WO_IN|WO_ISNULL; } - /* Look at each index. - */ if( pSrc->pIndex ){ - pProbe = pSrc->pIndex; - } - for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){ - double inMultiplier = 1; /* Number of equality look-ups needed */ - int inMultIsEst = 0; /* True if inMultiplier is an estimate */ - - WHERETRACE(("... index %s:\n", pProbe->zName)); - - /* Count the number of columns in the index that are satisfied - ** by x=EXPR or x IS NULL constraints or x IN (...) constraints. - ** For a term of the form x=EXPR or x IS NULL we only have to do - ** a single binary search. But for x IN (...) we have to do a - ** number of binary searched - ** equal to the number of entries on the RHS of the IN operator. - ** The inMultipler variable with try to estimate the number of - ** binary searches needed. + pIdx = pProbe = pSrc->pIndex; + wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); + eqTermMask = idxEqTermMask; + }else{ + Index *pFirst = pSrc->pTab->pIndex; + memset(&pk, 0, sizeof(Index)); + pk.nColumn = 1; + pk.aiColumn = &pkicol; + pk.aiRowEst = pkint; + pk.onError = OE_Replace; + pk.pTable = pSrc->pTab; + if( pSrc->notIndexed==0 ){ + pk.pNext = pFirst; + } + if( pFirst && pFirst->aiRowEst ){ + pkint[0] = pFirst->aiRowEst[0]; + } + pProbe = &pk; + wsFlagMask = ~( + WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE + ); + eqTermMask = WO_EQ|WO_IN; + pIdx = 0; + } + + + for(; pProbe; pIdx=pProbe=pProbe->pNext){ + const unsigned int * const aiRowEst = pProbe->aiRowEst; + double cost; /* Cost of using pProbe */ + double nRow; /* Estimated number of rows in result set */ + int rev; /* True to scan in reverse order */ + int wsFlags = 0; + Bitmask used = 0; + + /* The following variables are populated based on the properties of + ** scan being evaluated. They are then used to determine the expected + ** cost and number of rows returned. + ** + ** nEq: + ** Number of equality terms that can be implemented using the index. + ** + ** nInMul: + ** The "in-multiplier". This is an estimate of how many seek operations + ** SQLite must perform on the index in question. For example, if the + ** WHERE clause is: + ** + ** WHERE a IN (1, 2, 3) AND b IN (4, 5, 6) + ** + ** SQLite must perform 9 lookups on an index on (a, b), so nInMul is + ** set to 9. Given the same schema and either of the following WHERE + ** clauses: + ** + ** WHERE a = 1 + ** WHERE a >= 2 + ** + ** nInMul is set to 1. + ** + ** If there exists a WHERE term of the form "x IN (SELECT ...)", then + ** the sub-select is assumed to return 25 rows for the purposes of + ** determining nInMul. + ** + ** bInEst: + ** Set to true if there was at least one "x IN (SELECT ...)" term used + ** in determining the value of nInMul. + ** + ** nBound: + ** Set based on whether or not there is a range constraint on the + ** (nEq+1)th column of the index. 1 if there is neither an upper or + ** lower bound, 3 if there is an upper or lower bound, or 9 if there + ** is both an upper and lower bound. + ** + ** bSort: + ** Boolean. True if there is an ORDER BY clause that will require an + ** external sort (i.e. scanning the index being evaluated will not + ** correctly order records). + ** + ** bLookup: + ** Boolean. True if for each index entry visited a lookup on the + ** corresponding table b-tree is required. This is always false + ** for the rowid index. For other indexes, it is true unless all the + ** columns of the table used by the SELECT statement are present in + ** the index (such an index is sometimes described as a covering index). + ** For example, given the index on (a, b), the second of the following + ** two queries requires table b-tree lookups, but the first does not. + ** + ** SELECT a, b FROM tbl WHERE a = 1; + ** SELECT a, b, c FROM tbl WHERE a = 1; */ - wsFlags = 0; - for(i=0; inColumn; i++){ - int j = pProbe->aiColumn[i]; - pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe); + int nEq; + int bInEst = 0; + int nInMul = 1; + int nBound = 1; + int bSort = 0; + int bLookup = 0; + + /* Determine the values of nEq and nInMul */ + for(nEq=0; nEqnColumn; nEq++){ + WhereTerm *pTerm; /* A single term of the WHERE clause */ + int j = pProbe->aiColumn[nEq]; + pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx); if( pTerm==0 ) break; - wsFlags |= WHERE_COLUMN_EQ; + wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ); if( pTerm->eOperator & WO_IN ){ Expr *pExpr = pTerm->pExpr; wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ - inMultiplier *= 25; - inMultIsEst = 1; + nInMul *= 25; + bInEst = 1; }else if( pExpr->x.pList ){ - inMultiplier *= pExpr->x.pList->nExpr + 1; + nInMul *= pExpr->x.pList->nExpr + 1; } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } + used |= pTerm->prereqRight; } - nRow = pProbe->aiRowEst[i] * inMultiplier; - /* If inMultiplier is an estimate and that estimate results in an - ** nRow it that is more than half number of rows in the table, - ** then reduce inMultipler */ - if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){ - nRow = pProbe->aiRowEst[0]/2; - inMultiplier = nRow/pProbe->aiRowEst[i]; - } - cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]); - nEq = i; - if( pProbe->onError!=OE_None && nEq==pProbe->nColumn ){ - testcase( wsFlags & WHERE_COLUMN_IN ); - testcase( wsFlags & WHERE_COLUMN_NULL ); - if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 ){ - wsFlags |= WHERE_UNIQUE; - } - } - WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n", - nEq, inMultiplier, nRow, cost)); - /* Look for range constraints. Assume that each range constraint - ** makes the search space 1/3rd smaller. - */ + /* Determine the value of nBound. */ if( nEqnColumn ){ int j = pProbe->aiColumn[nEq]; - pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe); - if( pTerm ){ - wsFlags |= WHERE_COLUMN_RANGE; - if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){ + if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){ + WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pIdx); + WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pIdx); + if( pTop ){ wsFlags |= WHERE_TOP_LIMIT; - cost /= 3; - nRow /= 3; + nBound *= 3; + used |= pTop->prereqRight; } - if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){ + if( pBtm ){ wsFlags |= WHERE_BTM_LIMIT; - cost /= 3; - nRow /= 3; + nBound *= 3; + used |= pBtm->prereqRight; } - WHERETRACE(("...... range reduces nRow to %.9g and cost to %.9g\n", - nRow, cost)); + wsFlags |= (WHERE_COLUMN_RANGE|WHERE_ROWID_RANGE); + } + }else if( pProbe->onError!=OE_None ){ + testcase( wsFlags & WHERE_COLUMN_IN ); + testcase( wsFlags & WHERE_COLUMN_NULL ); + if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 ){ + wsFlags |= WHERE_UNIQUE; } } - /* Add the additional cost of sorting if that is a factor. - */ + /* If there is an ORDER BY clause and the index being considered will + ** naturally scan rows in the required order, set the appropriate flags + ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index + ** will scan rows in a different order, set the bSort variable. */ if( pOrderBy ){ if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 - && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev) + && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev) ){ - if( wsFlags==0 ){ - wsFlags = WHERE_COLUMN_RANGE; - } - wsFlags |= WHERE_ORDERBY; - if( rev ){ - wsFlags |= WHERE_REVERSE; - } + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; + wsFlags |= (rev ? WHERE_REVERSE : 0); }else{ - cost += cost*estLog(cost); - WHERETRACE(("...... orderby increases cost to %.9g\n", cost)); + bSort = 1; } - }else if( wsFlags!=0 && (pParse->db->flags & SQLITE_ReverseOrder)!=0 ){ - /* For application testing, randomly reverse the output order for - ** SELECT statements that omit the ORDER BY clause. This will help - ** to find cases where - */ - wsFlags |= WHERE_REVERSE; } - /* Check to see if we can get away with using just the index without - ** ever reading the table. If that is the case, then halve the - ** cost of this index. - */ - if( wsFlags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){ + /* If currently calculating the cost of using an index (not the IPK + ** index), determine if all required column data may be obtained without + ** seeking to entries in the main table (i.e. if the index is a covering + ** index for this query). If it is, set the WHERE_IDX_ONLY flag in + ** wsFlags. Otherwise, set the bLookup variable to true. */ + if( pIdx && wsFlags ){ Bitmask m = pSrc->colUsed; int j; - for(j=0; jnColumn; j++){ - int x = pProbe->aiColumn[j]; + for(j=0; jnColumn; j++){ + int x = pIdx->aiColumn[j]; if( xrCost ){ +#if 0 + if( bInEst && (nInMul*aiRowEst[nEq])>(aiRowEst[0]/2) ){ + nInMul = aiRowEst[0] / (2 * aiRowEst[nEq]); + } + nRow = (double)(aiRowEst[nEq] * nInMul) / nBound; + cost = (nEq>0) * nInMul * estLog(aiRowEst[0]) + + nRow + + bSort * nRow * estLog(nRow) + + bLookup * nRow * estLog(aiRowEst[0]); +#else + + /* The following block calculates nRow and cost for the index scan + ** in the same way as SQLite versions 3.6.17 and earlier. Some elements + ** of this calculation are difficult to justify. But using this strategy + ** works well in practice and causes the test suite to pass. */ + nRow = (double)(aiRowEst[nEq] * nInMul); + if( bInEst && nRow*2>aiRowEst[0] ){ + nRow = aiRowEst[0]/2; + nInMul = nRow / aiRowEst[nEq]; + } + cost = nRow + nInMul*estLog(aiRowEst[0]); + nRow /= nBound; + cost /= nBound; + if( bSort ){ + cost += cost*estLog(cost); + } + if( pIdx && bLookup==0 ){ + cost /= 2; + } +#endif + + WHERETRACE(( + "tbl=%s idx=%s nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d" + " wsFlags=%d (nRow=%.2f cost=%.2f)\n", + pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), + nEq, nInMul, nBound, bSort, bLookup, wsFlags, nRow, cost + )); + + if( (!pIdx || wsFlags) && costrCost ){ pCost->rCost = cost; pCost->nRow = nRow; - pCost->plan.wsFlags = wsFlags; + pCost->used = used; + pCost->plan.wsFlags = (wsFlags&wsFlagMask); pCost->plan.nEq = nEq; - assert( pCost->plan.wsFlags & WHERE_INDEXED ); - pCost->plan.u.pIdx = pProbe; + pCost->plan.u.pIdx = pIdx; } + + if( pSrc->pIndex ) break; + wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); + eqTermMask = idxEqTermMask; } - /* Report the best result - */ + /* If there is no ORDER BY clause and the SQLITE_ReverseOrder flag + ** is set, then reverse the order that the index will be scanned + ** in. This is used for application testing, to help find cases + ** where application behaviour depends on the (undefined) order that + ** SQLite outputs rows in in the absence of an ORDER BY clause. */ + if( !pOrderBy && pParse->db->flags & SQLITE_ReverseOrder ){ + pCost->plan.wsFlags |= WHERE_REVERSE; + } + + assert( pOrderBy || (pCost->plan.wsFlags&WHERE_ORDERBY)==0 ); + assert( pCost->plan.u.pIdx==0 || (pCost->plan.wsFlags&WHERE_ROWID_EQ)==0 ); + assert( pSrc->pIndex==0 + || pCost->plan.u.pIdx==0 + || pCost->plan.u.pIdx==pSrc->pIndex + ); + + WHERETRACE(("best index is: %s\n", + (pCost->plan.u.pIdx ? pCost->plan.u.pIdx->zName : "ipk") + )); + + bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost); pCost->plan.wsFlags |= eqTermMask; - WHERETRACE(("best index is %s, nrow=%.9g, cost=%.9g, wsFlags=%x, nEq=%d\n", - (pCost->plan.wsFlags & WHERE_INDEXED)!=0 ? - pCost->plan.u.pIdx->zName : "(none)", pCost->nRow, - pCost->rCost, pCost->plan.wsFlags, pCost->plan.nEq)); } /* @@ -3271,44 +3258,82 @@ WhereInfo *sqlite3WhereBegin( WhereCost bestPlan; /* Most efficient plan seen so far */ Index *pIdx; /* Index for FROM table at pTabItem */ int j; /* For looping over FROM tables */ - int bestJ = 0; /* The value of j */ + int bestJ = -1; /* The value of j */ Bitmask m; /* Bitmask value for j or bestJ */ - int once = 0; /* True when first table is seen */ + int isOptimal; /* Iterator for optimal/non-optimal search */ memset(&bestPlan, 0, sizeof(bestPlan)); bestPlan.rCost = SQLITE_BIG_DBL; - for(j=iFrom, pTabItem=&pTabList->a[j]; jnSrc; j++, pTabItem++){ - int doNotReorder; /* True if this table should not be reordered */ - WhereCost sCost; /* Cost information from best[Virtual]Index() */ - ExprList *pOrderBy; /* ORDER BY clause for index to optimize */ - - doNotReorder = (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0; - if( once && doNotReorder ) break; - m = getMask(pMaskSet, pTabItem->iCursor); - if( (m & notReady)==0 ){ - if( j==iFrom ) iFrom++; - continue; - } - pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); - assert( pTabItem->pTab ); + /* Loop through the remaining entries in the FROM clause to find the + ** next nested loop. The FROM clause entries may be iterated through + ** either once or twice. + ** + ** The first iteration, which is always performed, searches for the + ** FROM clause entry that permits the lowest-cost, "optimal" scan. In + ** this context an optimal scan is one that uses the same strategy + ** for the given FROM clause entry as would be selected if the entry + ** were used as the innermost nested loop. + ** + ** The second iteration is only performed if no optimal scan strategies + ** were found by the first. This iteration is used to search for the + ** lowest cost scan overall. + ** + ** Previous versions of SQLite performed only the second iteration - + ** the next outermost loop was always that with the lowest overall + ** cost. However, this meant that SQLite could select the wrong plan + ** for scripts such as the following: + ** + ** CREATE TABLE t1(a, b); + ** CREATE TABLE t2(c, d); + ** SELECT * FROM t2, t1 WHERE t2.rowid = t1.a; + ** + ** The best strategy is to iterate through table t1 first. However it + ** is not possible to determine this with a simple greedy algorithm. + ** However, since the cost of a linear scan through table t2 is the same + ** as the cost of a linear scan through table t1, a simple greedy + ** algorithm may choose to use t2 for the outer loop, which is a much + ** costlier approach. + */ + for(isOptimal=1; isOptimal>=0 && bestJ<0; isOptimal--){ + Bitmask mask = (isOptimal ? 0 : notReady); + assert( (pTabList->nSrc-iFrom)>1 || isOptimal ); + for(j=iFrom, pTabItem=&pTabList->a[j]; jnSrc; j++, pTabItem++){ + int doNotReorder; /* True if this table should not be reordered */ + WhereCost sCost; /* Cost information from best[Virtual]Index() */ + ExprList *pOrderBy; /* ORDER BY clause for index to optimize */ + + doNotReorder = (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0; + if( j!=iFrom && doNotReorder ) break; + m = getMask(pMaskSet, pTabItem->iCursor); + if( (m & notReady)==0 ){ + if( j==iFrom ) iFrom++; + continue; + } + pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); + + assert( pTabItem->pTab ); #ifndef SQLITE_OMIT_VIRTUALTABLE - if( IsVirtual(pTabItem->pTab) ){ - sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo; - bestVirtualIndex(pParse, pWC, pTabItem, notReady, pOrderBy, &sCost, pp); - }else + if( IsVirtual(pTabItem->pTab) ){ + sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo; + bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp); + }else #endif - { - bestBtreeIndex(pParse, pWC, pTabItem, notReady, pOrderBy, &sCost); - } - if( once==0 || sCost.rCost=0 ); assert( notReady & getMask(pMaskSet, pTabList->a[bestJ].iCursor) ); WHERETRACE(("*** Optimizer selects table %d for loop %d\n", bestJ, pLevel-pWInfo->a)); diff --git a/test/triggerA.test b/test/triggerA.test index e3583524da..1a0055f3a4 100644 --- a/test/triggerA.test +++ b/test/triggerA.test @@ -79,7 +79,7 @@ do_test triggerA-1.6 { CREATE VIEW v5 AS SELECT x, b FROM t1, t2 WHERE y=c; SELECT * FROM v5; } -} {1 103 2 203 3 305 4 404 5 504 6 603 7 705 8 805 9 904 10 1003} +} {10 1003 9 904 8 805 7 705 6 603 5 504 4 404 3 305 2 203 1 103} # Create INSTEAD OF triggers on the views. Run UPDATE and DELETE statements # using those triggers. Verify correct operation. diff --git a/test/vtab6.test b/test/vtab6.test index a263973d33..96e45bf546 100644 --- a/test/vtab6.test +++ b/test/vtab6.test @@ -492,6 +492,7 @@ do_test vtab6-11.1.4 { do_test vtab6-11.2.0 { execsql { CREATE INDEX ab_i ON ab_r(b); + CREATE INDEX bc_i ON bc_r(b); } } {} @@ -560,7 +561,7 @@ do_test vtab6-11.4.1 { catchsql { SELECT a, b, c FROM ab NATURAL JOIN bc; } -} {1 {table ab: xBestIndex returned an invalid plan}} +} {1 {table bc: xBestIndex returned an invalid plan}} do_test vtab6-11.4.2 { catchsql { SELECT a, b, c FROM bc NATURAL JOIN ab; diff --git a/test/where8.test b/test/where8.test index 623d382459..a8dd5ed059 100644 --- a/test/where8.test +++ b/test/where8.test @@ -287,7 +287,7 @@ do_test where8-3.15 { SELECT sum(e IS NULL) FROM t2 AS inner WHERE t2.d>inner.d ) } -} {I I I I I I I I I I II II II II II II II II II II III III III III III 99 0} +} {I II I II I II I II I II I II III I II III I II III I II III I II III 9 0} #----------------------------------------------------------------------- # The following tests - where8-4.* - verify that adding or removing