Changes to the query planner that improve the order in which tables/indexes are scann...

author dan <dan@noemail.net>

Thu, 13 Aug 2009 07:09:33 +0000 (07:09 +0000)

committer dan <dan@noemail.net>

Thu, 13 Aug 2009 07:09:33 +0000 (07:09 +0000)
author dan <dan@noemail.net>
Thu, 13 Aug 2009 07:09:33 +0000 (07:09 +0000)
committer dan <dan@noemail.net>
Thu, 13 Aug 2009 07:09:33 +0000 (07:09 +0000)
diff --git a/manifest b/manifest

index e7ba1049ca6d1c1c810f8491c2a7e4cf28802b2a..1c5d6ad4406b5c673241930cc32e7acb6dd306b3 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Fixed\ssome\scompiler\swarnings\sin\sWINCE\sonly\ssections\swhen\susing\sthe\sMSVC\scompiler.
-D 2009-08-12T15:34:03
+C Changes\sto\sthe\squery\splanner\sthat\simprove\sthe\sorder\sin\swhich\stables/indexes\sare\sscanned\sin\sjoin\squeries.
+D 2009-08-13T07:09:33
  F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
  F Makefile.in c606c9b502dfde3b9c3b2d23ed49f3737829693b
  F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -213,7 +213,7 @@ F src/vdbeblob.c a3f3e0e877fc64ea50165eec2855f5ada4477611
  F src/vdbemem.c bfc25f9ef4fa914b473303566459552bdb2e008a
  F src/vtab.c aedd76e8670d5a5379f93804398d3ba960125547
  F src/walker.c 1edca756275f158b80f20eb6f104c8d3fcc96a04
-F src/where.c 7e696d69a6d1b0fa277da2801ae4126dd4db0f8c
+F src/where.c 53adef2c7b8bc888755cf41fb3449aedb36a429c
  F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
  F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
  F test/all.test 14165b3e32715b700b5f0cbf8f6e3833dda0be45
@@ -674,7 +674,7 @@ F test/trigger6.test 0e411654f122552da6590f0b4e6f781048a4a9b9
  F test/trigger7.test 72feaf8dbc52cea84de0c3e6ce7559ff19c479af
  F test/trigger8.test 83d92c212f36442d26527d6f7701575905a52ae1
  F test/trigger9.test e6e8dbab673666b3c0a63f0fefcff2329fe6bba8
-F test/triggerA.test 208dbda4d2f7c918b02f8a0dfa3acd2a0fe00691
+F test/triggerA.test 0718ad2d9bfef27c7af00e636df79bee6b988da7
  F test/triggerB.test 56780c031b454abac2340dbb3b71ac5c56c3d7fe
  F test/types.test 9a825ec8eea4e965d7113b74c76a78bb5240f2ac
  F test/types2.test 3555aacf8ed8dc883356e59efc314707e6247a84
@@ -693,7 +693,7 @@ F test/vtab2.test 7bcffc050da5c68f4f312e49e443063e2d391c0d
  F test/vtab3.test baad99fd27217f5d6db10660522e0b7192446de1
  F test/vtab4.test 942f8b8280b3ea8a41dae20e7822d065ca1cb275
  F test/vtab5.test a0a84a89c622f4e2e816ebf39883dc319b4a1024
-F test/vtab6.test 226b116d63ad77f9b084d556f772c45a0d28e9b5
+F test/vtab6.test c7f290d172609d636fbfc58166eadcb55d5c117c
  F test/vtab7.test a8c3c3cb3eb60be364991bd714e4927e26c4cd85
  F test/vtab8.test e19fa4a538fcd1bb66c22825fa8f71618fb13583
  F test/vtab9.test ea58d2b95d61955f87226381716b2d0b1d4e4f9b
@@ -711,7 +711,7 @@ F test/where4.test e9b9e2f2f98f00379e6031db6a6fca29bae782a2
  F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2
  F test/where6.test 42c4373595f4409d9c6a9987b4a60000ad664faf
  F test/where7.test b6e84b472a024e45c6dbdadc52bbcab3fcc8d0e1
-F test/where8.test fb2ccd7f1fa33287fef25b6bad6849c868a6e331
+F test/where8.test 8d3704d04a683e792d373005f2e4e13bfd7e2dd5
  F test/where8m.test da346596e19d54f0aba35ebade032a7c47d79739
  F test/where9.test be19e1a92f80985c1a121b4678bf7d2123eaa623
  F test/whereA.test 1d1674254614147c866ab9b59af6582f454a858c
@@ -742,7 +742,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
  F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
  F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
  F tool/vdbe-compress.tcl 672f81d693a03f80f5ae60bfefacd8a349e76746
-P b0848925babde5241aefe0a117ebb10299c94a15
-R 85d8bc88106b78c6b602120dcf0a8f49
-U shane
-Z 4619ac16a1c5a72371d086ffe5b2d550
+P 1f0a93e17d6291268da909699ce1a5a7619ae637
+R d962edbb5f58f48d082773e78b64bd00
+U dan
+Z 73a8cffe9b2f6a30befca6bd75c15499
diff --git a/manifest.uuid b/manifest.uuid

index e7a01034680e9ab6b1d4d85b8dd440dceb7411f2..ff060f216730ed8c45f13fcdc07e41f98da11756 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-1f0a93e17d6291268da909699ce1a5a7619ae637
-\ No newline at end of file
+19f799b32f9d1be25d4185ce18b13f4dd502e199
+\ No newline at end of file
diff --git a/src/where.c b/src/where.c

index 1921ee3a34e8e16a155c0d5ba09bfdcf68efc11d..797559c86f6f6db3462f1c16f6d0b4e3c4caa4e5 100644 (file)
--- a/src/where.c
+++ b/src/where.c
@@ -195,6 +195,7 @@ struct WhereCost {
    WherePlan plan;    /* The lookup strategy */
    double rCost;      /* Overall cost of pursuing this search strategy */
    double nRow;       /* Estimated number of output rows */
+  Bitmask used;      /* Bitmask of cursors used by this plan */
  };
  
  /*
@@ -1338,6 +1339,11 @@ static int isSortingIndex(
    nTerm = pOrderBy->nExpr;
    assert( nTerm>0 );
  
+  /* Argument pIdx must either point to a 'real' named index structure, 
+  ** or an index structure allocated on the stack by bestBtreeIndex() to
+  ** represent the rowid index that is part of every table.  */
+  assert( pIdx->zName || (pIdx->nColumn==1 && pIdx->aiColumn[0]==-1) );
+
    /* Match terms of the ORDER BY clause against columns of
    ** the index.
    **
@@ -1364,7 +1370,7 @@ static int isSortingIndex(
      if( !pColl ){
        pColl = db->pDfltColl;
      }
-    if( i<pIdx->nColumn ){
+    if( pIdx->zName && i<pIdx->nColumn ){
        iColumn = pIdx->aiColumn[i];
        if( iColumn==pIdx->pTable->iPKey ){
          iColumn = -1;
@@ -1393,7 +1399,7 @@ static int isSortingIndex(
          return 0;
        }
      }
-    assert( pIdx->aSortOrder!=0 );
+    assert( pIdx->aSortOrder!=0 || iColumn==-1 );
      assert( pTerm->sortOrder==0 || pTerm->sortOrder==1 );
      assert( iSortOrder==0 || iSortOrder==1 );
      termSortOrder = iSortOrder ^ pTerm->sortOrder;
@@ -1436,30 +1442,6 @@ static int isSortingIndex(
    return 0;
  }
  
-/*
-** Check table to see if the ORDER BY clause in pOrderBy can be satisfied
-** by sorting in order of ROWID.  Return true if so and set *pbRev to be
-** true for reverse ROWID and false for forward ROWID order.
-*/
-static int sortableByRowid(
-  int base,               /* Cursor number for table to be sorted */
-  ExprList *pOrderBy,     /* The ORDER BY clause */
-  WhereMaskSet *pMaskSet, /* Mapping from table cursors to bitmaps */
-  int *pbRev              /* Set to 1 if ORDER BY is DESC */
-){
-  Expr *p;
-
-  assert( pOrderBy!=0 );
-  assert( pOrderBy->nExpr>0 );
-  p = pOrderBy->a[0].pExpr;
-  if( p->op==TK_COLUMN && p->iTable==base && p->iColumn==-1
-    && !referencesOtherTables(pOrderBy, pMaskSet, 1, base) ){
-    *pbRev = pOrderBy->a[0].sortOrder;
-    return 1;
-  }
-  return 0;
-}
-
  /*
  ** Prepare a crude estimate of the logarithm of the input value.
  ** The results need not be exact.  This is only used for estimating
@@ -1560,6 +1542,7 @@ static void bestOrClauseIndex(
        int flags = WHERE_MULTI_OR;
        double rTotal = 0;
        double nRow = 0;
+      Bitmask used = 0;
  
        for(pOrTerm=pOrWC->a; pOrTerm<pOrWCEnd; pOrTerm++){
          WhereCost sTermCost;
@@ -1582,6 +1565,7 @@ static void bestOrClauseIndex(
          }
          rTotal += sTermCost.rCost;
          nRow += sTermCost.nRow;
+        used |= sTermCost.used;
          if( rTotal>=pCost->rCost ) break;
        }
  
@@ -1599,6 +1583,7 @@ static void bestOrClauseIndex(
        if( rTotal<pCost->rCost ){
          pCost->rCost = rTotal;
          pCost->nRow = nRow;
+        pCost->used = used;
          pCost->plan.wsFlags = flags;
          pCost->plan.u.pTerm = pTerm;
        }
@@ -1851,7 +1836,7 @@ static void bestVirtualIndex(
    for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){
      j = pIdxCons->iTermOffset;
      pTerm = &pWC->a[j];
-    pIdxCons->usable =  (pTerm->prereqRight & notReady)==0 ?1:0;
+    pIdxCons->usable = (pTerm->prereqRight&notReady) ? 0 : 1;
    }
    memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint);
    if( pIdxInfo->needToFreeIdxStr ){
@@ -1872,6 +1857,13 @@ static void bestVirtualIndex(
      return;
    }
  
+  pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint;
+  for(i=0; i<pIdxInfo->nConstraint; i++){
+    if( pUsage[i].argvIndex>0 ){
+      pCost->used |= pWC->a[pIdxCons[i].iTermOffset].prereqRight;
+    }
+  }
+
    /* The cost is not allowed to be larger than SQLITE_BIG_DBL (the
    ** inital value of lowestCost in this loop. If it is, then the
    ** (cost<lowestCost) test below will never be true.
@@ -1934,290 +1926,285 @@ static void bestBtreeIndex(
    ExprList *pOrderBy,         /* The ORDER BY clause */
    WhereCost *pCost            /* Lowest cost query plan */
  ){
-  WhereTerm *pTerm;           /* A single term of the WHERE clause */
    int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
    Index *pProbe;              /* An index we are evaluating */
-  int rev;                    /* True to scan in reverse order */
-  int wsFlags;                /* Flags associated with pProbe */
-  int nEq;                    /* Number of == or IN constraints */
-  int eqTermMask;             /* Mask of valid equality operators */
-  double cost;                /* Cost of using pProbe */
-  double nRow;                /* Estimated number of rows in result set */
-  int i;                      /* Loop counter */
-
-  WHERETRACE(("bestIndex: tbl=%s notReady=%llx\n", pSrc->pTab->zName,notReady));
-  pProbe = pSrc->pTab->pIndex;
-  if( pSrc->notIndexed ){
-    pProbe = 0;
-  }
-
-  /* If the table has no indices and there are no terms in the where
-  ** clause that refer to the ROWID, then we will never be able to do
-  ** anything other than a full table scan on this table.  We might as
-  ** well put it first in the join order.  That way, perhaps it can be
-  ** referenced by other tables in the join.
-  */
-  memset(pCost, 0, sizeof(*pCost));
-  if( pProbe==0 &&
-     findTerm(pWC, iCur, -1, 0, WO_EQ|WO_IN|WO_LT|WO_LE|WO_GT|WO_GE,0)==0 &&
-     (pOrderBy==0 || !sortableByRowid(iCur, pOrderBy, pWC->pMaskSet, &rev)) ){
-     if( pParse->db->flags & SQLITE_ReverseOrder ){
-      /* For application testing, randomly reverse the output order for
-      ** SELECT statements that omit the ORDER BY clause.  This will help
-      ** to find cases where
-      */
-      pCost->plan.wsFlags |= WHERE_REVERSE;
-    }
-    return;
-  }
-  pCost->rCost = SQLITE_BIG_DBL;
+  Index *pIdx;                /* Copy of pProbe, or zero for IPK index */
+  int eqTermMask;             /* Current mask of valid equality operators */
+  int idxEqTermMask;          /* Index mask of valid equality operators */
  
-  /* Check for a rowid=EXPR or rowid IN (...) constraints. If there was
-  ** an INDEXED BY clause attached to this table, skip this step.
-  */
-  if( !pSrc->pIndex ){
-    pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0);
-    if( pTerm ){
-      Expr *pExpr;
-      pCost->plan.wsFlags = WHERE_ROWID_EQ;
-      if( pTerm->eOperator & WO_EQ ){
-        /* Rowid== is always the best pick.  Look no further.  Because only
-        ** a single row is generated, output is always in sorted order */
-        pCost->plan.wsFlags = WHERE_ROWID_EQ | WHERE_UNIQUE;
-        pCost->plan.nEq = 1;
-        WHERETRACE(("... best is rowid\n"));
-        pCost->rCost = 0;
-        pCost->nRow = 1;
-        return;
-      }else if( !ExprHasProperty((pExpr = pTerm->pExpr), EP_xIsSelect) 
-             && pExpr->x.pList 
-      ){
-        /* Rowid IN (LIST): cost is NlogN where N is the number of list
-        ** elements.  */
-        pCost->rCost = pCost->nRow = pExpr->x.pList->nExpr;
-        pCost->rCost *= estLog(pCost->rCost);
-      }else{
-        /* Rowid IN (SELECT): cost is NlogN where N is the number of rows
-        ** in the result of the inner select.  We have no way to estimate
-        ** that value so make a wild guess. */
-        pCost->nRow = 100;
-        pCost->rCost = 200;
-      }
-      WHERETRACE(("... rowid IN cost: %.9g\n", pCost->rCost));
-    }
-  
-    /* Estimate the cost of a table scan.  If we do not know how many
-    ** entries are in the table, use 1 million as a guess.
-    */
-    cost = pProbe ? pProbe->aiRowEst[0] : 1000000;
-    WHERETRACE(("... table scan base cost: %.9g\n", cost));
-    wsFlags = WHERE_ROWID_RANGE;
-  
-    /* Check for constraints on a range of rowids in a table scan.
-    */
-    pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0);
-    if( pTerm ){
-      if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){
-        wsFlags |= WHERE_TOP_LIMIT;
-        cost /= 3;  /* Guess that rowid<EXPR eliminates two-thirds of rows */
-      }
-      if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){
-        wsFlags |= WHERE_BTM_LIMIT;
-        cost /= 3;  /* Guess that rowid>EXPR eliminates two-thirds of rows */
-      }
-      WHERETRACE(("... rowid range reduces cost to %.9g\n", cost));
-    }else{
-      wsFlags = 0;
-    }
-    nRow = cost;
-  
-    /* If the table scan does not satisfy the ORDER BY clause, increase
-    ** the cost by NlogN to cover the expense of sorting. */
-    if( pOrderBy ){
-      if( sortableByRowid(iCur, pOrderBy, pWC->pMaskSet, &rev) ){
-        wsFlags |= WHERE_ORDERBY|WHERE_ROWID_RANGE;
-        if( rev ){
-          wsFlags |= WHERE_REVERSE;
-        }
-      }else{
-        cost += cost*estLog(cost);
-        WHERETRACE(("... sorting increases cost to %.9g\n", cost));
-      }
-    }else if( pParse->db->flags & SQLITE_ReverseOrder ){
-      /* For application testing, randomly reverse the output order for
-      ** SELECT statements that omit the ORDER BY clause.  This will help
-      ** to find cases where
-      */
-      wsFlags |= WHERE_REVERSE;
-    }
+  Index pk;
+  unsigned int pkint[2] = {1000000, 1};
+  int pkicol = -1;
+  int wsFlagMask;
  
-    /* Remember this case if it is the best so far */
-    if( cost<pCost->rCost ){
-      pCost->rCost = cost;
-      pCost->nRow = nRow;
-      pCost->plan.wsFlags = wsFlags;
-    }
-  }
-
-  bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost);
+  memset(pCost, 0, sizeof(*pCost));
+  pCost->rCost = SQLITE_BIG_DBL;
  
    /* If the pSrc table is the right table of a LEFT JOIN then we may not
    ** use an index to satisfy IS NULL constraints on that table.  This is
    ** because columns might end up being NULL if the table does not match -
    ** a circumstance which the index cannot help us discover.  Ticket #2177.
    */
-  if( (pSrc->jointype & JT_LEFT)!=0 ){
-    eqTermMask = WO_EQ|WO_IN;
+  if( pSrc->jointype & JT_LEFT ){
+    idxEqTermMask = WO_EQ|WO_IN;
    }else{
-    eqTermMask = WO_EQ|WO_IN|WO_ISNULL;
+    idxEqTermMask = WO_EQ|WO_IN|WO_ISNULL;
    }
  
-  /* Look at each index.
-  */
    if( pSrc->pIndex ){
-    pProbe = pSrc->pIndex;
-  }
-  for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){
-    double inMultiplier = 1;  /* Number of equality look-ups needed */
-    int inMultIsEst = 0;      /* True if inMultiplier is an estimate */
-
-    WHERETRACE(("... index %s:\n", pProbe->zName));
-
-    /* Count the number of columns in the index that are satisfied
-    ** by x=EXPR or x IS NULL constraints or x IN (...) constraints.
-    ** For a term of the form x=EXPR or x IS NULL we only have to do 
-    ** a single binary search.  But for x IN (...) we have to do a
-    ** number of binary searched
-    ** equal to the number of entries on the RHS of the IN operator.
-    ** The inMultipler variable with try to estimate the number of
-    ** binary searches needed.
+    pIdx = pProbe = pSrc->pIndex;
+    wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
+    eqTermMask = idxEqTermMask;
+  }else{
+    Index *pFirst = pSrc->pTab->pIndex;
+    memset(&pk, 0, sizeof(Index));
+    pk.nColumn = 1;
+    pk.aiColumn = &pkicol;
+    pk.aiRowEst = pkint;
+    pk.onError = OE_Replace;
+    pk.pTable = pSrc->pTab;
+    if( pSrc->notIndexed==0 ){
+      pk.pNext = pFirst;
+    }
+    if( pFirst && pFirst->aiRowEst ){
+      pkint[0] = pFirst->aiRowEst[0];
+    }
+    pProbe = &pk;
+    wsFlagMask = ~(
+        WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE
+    );
+    eqTermMask = WO_EQ|WO_IN;
+    pIdx = 0;
+  }
+
+
+  for(; pProbe; pIdx=pProbe=pProbe->pNext){
+    const unsigned int * const aiRowEst = pProbe->aiRowEst;
+    double cost;                /* Cost of using pProbe */
+    double nRow;                /* Estimated number of rows in result set */
+    int rev;                    /* True to scan in reverse order */
+    int wsFlags = 0;
+    Bitmask used = 0;
+
+    /* The following variables are populated based on the properties of
+    ** scan being evaluated. They are then used to determine the expected
+    ** cost and number of rows returned.
+    **
+    **  nEq: 
+    **    Number of equality terms that can be implemented using the index.
+    **
+    **  nInMul:  
+    **    The "in-multiplier". This is an estimate of how many seek operations 
+    **    SQLite must perform on the index in question. For example, if the 
+    **    WHERE clause is:
+    **
+    **      WHERE a IN (1, 2, 3) AND b IN (4, 5, 6)
+    **
+    **    SQLite must perform 9 lookups on an index on (a, b), so nInMul is 
+    **    set to 9. Given the same schema and either of the following WHERE 
+    **    clauses:
+    **
+    **      WHERE a =  1
+    **      WHERE a >= 2
+    **
+    **    nInMul is set to 1.
+    **
+    **    If there exists a WHERE term of the form "x IN (SELECT ...)", then 
+    **    the sub-select is assumed to return 25 rows for the purposes of 
+    **    determining nInMul.
+    **
+    **  bInEst:  
+    **    Set to true if there was at least one "x IN (SELECT ...)" term used 
+    **    in determining the value of nInMul.
+    **
+    **  nBound:  
+    **    Set based on whether or not there is a range constraint on the 
+    **    (nEq+1)th column of the index. 1 if there is neither an upper or 
+    **    lower bound, 3 if there is an upper or lower bound, or 9 if there 
+    **    is both an upper and lower bound.
+    **
+    **  bSort:   
+    **    Boolean. True if there is an ORDER BY clause that will require an 
+    **    external sort (i.e. scanning the index being evaluated will not 
+    **    correctly order records).
+    **
+    **  bLookup: 
+    **    Boolean. True if for each index entry visited a lookup on the 
+    **    corresponding table b-tree is required. This is always false 
+    **    for the rowid index. For other indexes, it is true unless all the 
+    **    columns of the table used by the SELECT statement are present in 
+    **    the index (such an index is sometimes described as a covering index).
+    **    For example, given the index on (a, b), the second of the following 
+    **    two queries requires table b-tree lookups, but the first does not.
+    **
+    **             SELECT a, b    FROM tbl WHERE a = 1;
+    **             SELECT a, b, c FROM tbl WHERE a = 1;
      */
-    wsFlags = 0;
-    for(i=0; i<pProbe->nColumn; i++){
-      int j = pProbe->aiColumn[i];
-      pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe);
+    int nEq;
+    int bInEst = 0;
+    int nInMul = 1;
+    int nBound = 1;
+    int bSort = 0;
+    int bLookup = 0;
+
+    /* Determine the values of nEq and nInMul */
+    for(nEq=0; nEq<pProbe->nColumn; nEq++){
+      WhereTerm *pTerm;           /* A single term of the WHERE clause */
+      int j = pProbe->aiColumn[nEq];
+      pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
        if( pTerm==0 ) break;
-      wsFlags |= WHERE_COLUMN_EQ;
+      wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
        if( pTerm->eOperator & WO_IN ){
          Expr *pExpr = pTerm->pExpr;
          wsFlags |= WHERE_COLUMN_IN;
          if( ExprHasProperty(pExpr, EP_xIsSelect) ){
-          inMultiplier *= 25;
-          inMultIsEst = 1;
+          nInMul *= 25;
+          bInEst = 1;
          }else if( pExpr->x.pList ){
-          inMultiplier *= pExpr->x.pList->nExpr + 1;
+          nInMul *= pExpr->x.pList->nExpr + 1;
          }
        }else if( pTerm->eOperator & WO_ISNULL ){
          wsFlags |= WHERE_COLUMN_NULL;
        }
+      used |= pTerm->prereqRight;
      }
-    nRow = pProbe->aiRowEst[i] * inMultiplier;
-    /* If inMultiplier is an estimate and that estimate results in an
-    ** nRow it that is more than half number of rows in the table,
-    ** then reduce inMultipler */
-    if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){
-      nRow = pProbe->aiRowEst[0]/2;
-      inMultiplier = nRow/pProbe->aiRowEst[i];
-    }
-    cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]);
-    nEq = i;
-    if( pProbe->onError!=OE_None && nEq==pProbe->nColumn ){
-      testcase( wsFlags & WHERE_COLUMN_IN );
-      testcase( wsFlags & WHERE_COLUMN_NULL );
-      if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 ){
-        wsFlags |= WHERE_UNIQUE;
-      }
-    }
-    WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n",
-                nEq, inMultiplier, nRow, cost));
  
-    /* Look for range constraints.  Assume that each range constraint
-    ** makes the search space 1/3rd smaller.
-    */
+    /* Determine the value of nBound. */
      if( nEq<pProbe->nColumn ){
        int j = pProbe->aiColumn[nEq];
-      pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe);
-      if( pTerm ){
-        wsFlags |= WHERE_COLUMN_RANGE;
-        if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){
+      if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
+        WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pIdx);
+        WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pIdx);
+        if( pTop ){
            wsFlags |= WHERE_TOP_LIMIT;
-          cost /= 3;
-          nRow /= 3;
+          nBound *= 3;
+          used |= pTop->prereqRight;
          }
-        if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){
+        if( pBtm ){
            wsFlags |= WHERE_BTM_LIMIT;
-          cost /= 3;
-          nRow /= 3;
+          nBound *= 3;
+          used |= pBtm->prereqRight;
          }
-        WHERETRACE(("...... range reduces nRow to %.9g and cost to %.9g\n",
-                    nRow, cost));
+        wsFlags |= (WHERE_COLUMN_RANGE|WHERE_ROWID_RANGE);
+      }
+    }else if( pProbe->onError!=OE_None ){
+      testcase( wsFlags & WHERE_COLUMN_IN );
+      testcase( wsFlags & WHERE_COLUMN_NULL );
+      if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 ){
+        wsFlags |= WHERE_UNIQUE;
        }
      }
  
-    /* Add the additional cost of sorting if that is a factor.
-    */
+    /* If there is an ORDER BY clause and the index being considered will
+    ** naturally scan rows in the required order, set the appropriate flags
+    ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index
+    ** will scan rows in a different order, set the bSort variable.  */
      if( pOrderBy ){
        if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0
-       && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev)
+        && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev)
        ){
-        if( wsFlags==0 ){
-          wsFlags = WHERE_COLUMN_RANGE;
-        }
-        wsFlags |= WHERE_ORDERBY;
-        if( rev ){
-          wsFlags |= WHERE_REVERSE;
-        }
+        wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
+        wsFlags |= (rev ? WHERE_REVERSE : 0);
        }else{
-        cost += cost*estLog(cost);
-        WHERETRACE(("...... orderby increases cost to %.9g\n", cost));
+        bSort = 1;
        }
-    }else if( wsFlags!=0 && (pParse->db->flags & SQLITE_ReverseOrder)!=0 ){
-      /* For application testing, randomly reverse the output order for
-      ** SELECT statements that omit the ORDER BY clause.  This will help
-      ** to find cases where
-      */
-      wsFlags |= WHERE_REVERSE;
      }
  
-    /* Check to see if we can get away with using just the index without
-    ** ever reading the table.  If that is the case, then halve the
-    ** cost of this index.
-    */
-    if( wsFlags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){
+    /* If currently calculating the cost of using an index (not the IPK
+    ** index), determine if all required column data may be obtained without 
+    ** seeking to entries in the main table (i.e. if the index is a covering
+    ** index for this query). If it is, set the WHERE_IDX_ONLY flag in
+    ** wsFlags. Otherwise, set the bLookup variable to true.  */
+    if( pIdx && wsFlags ){
        Bitmask m = pSrc->colUsed;
        int j;
-      for(j=0; j<pProbe->nColumn; j++){
-        int x = pProbe->aiColumn[j];
+      for(j=0; j<pIdx->nColumn; j++){
+        int x = pIdx->aiColumn[j];
          if( x<BMS-1 ){
            m &= ~(((Bitmask)1)<<x);
          }
        }
        if( m==0 ){
          wsFlags |= WHERE_IDX_ONLY;
-        cost /= 2;
-        WHERETRACE(("...... idx-only reduces cost to %.9g\n", cost));
+      }else{
+        bLookup = 1;
        }
      }
  
-    /* If this index has achieved the lowest cost so far, then use it.
-    */
-    if( wsFlags!=0 && cost < pCost->rCost ){
+#if 0
+    if( bInEst && (nInMul*aiRowEst[nEq])>(aiRowEst[0]/2) ){
+      nInMul = aiRowEst[0] / (2 * aiRowEst[nEq]);
+    }
+    nRow = (double)(aiRowEst[nEq] * nInMul) / nBound;
+    cost = (nEq>0) * nInMul * estLog(aiRowEst[0])
+         + nRow
+         + bSort * nRow * estLog(nRow)
+         + bLookup * nRow * estLog(aiRowEst[0]);
+#else
+
+    /* The following block calculates nRow and cost for the index scan
+    ** in the same way as SQLite versions 3.6.17 and earlier. Some elements
+    ** of this calculation are difficult to justify. But using this strategy
+    ** works well in practice and causes the test suite to pass.  */
+    nRow = (double)(aiRowEst[nEq] * nInMul);
+    if( bInEst && nRow*2>aiRowEst[0] ){
+      nRow = aiRowEst[0]/2;
+      nInMul = nRow / aiRowEst[nEq];
+    }
+    cost = nRow + nInMul*estLog(aiRowEst[0]);
+    nRow /= nBound;
+    cost /= nBound;
+    if( bSort ){
+      cost += cost*estLog(cost);
+    }
+    if( pIdx && bLookup==0 ){
+      cost /= 2;
+    }
+#endif
+
+    WHERETRACE((
+      "tbl=%s idx=%s nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d"
+      " wsFlags=%d   (nRow=%.2f cost=%.2f)\n",
+      pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), 
+      nEq, nInMul, nBound, bSort, bLookup, wsFlags, nRow, cost
+    ));
+
+    if( (!pIdx || wsFlags) && cost<pCost->rCost ){
        pCost->rCost = cost;
        pCost->nRow = nRow;
-      pCost->plan.wsFlags = wsFlags;
+      pCost->used = used;
+      pCost->plan.wsFlags = (wsFlags&wsFlagMask);
        pCost->plan.nEq = nEq;
-      assert( pCost->plan.wsFlags & WHERE_INDEXED );
-      pCost->plan.u.pIdx = pProbe;
+      pCost->plan.u.pIdx = pIdx;
      }
+
+    if( pSrc->pIndex ) break;
+    wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
+    eqTermMask = idxEqTermMask;
    }
  
-  /* Report the best result
-  */
+  /* If there is no ORDER BY clause and the SQLITE_ReverseOrder flag
+  ** is set, then reverse the order that the index will be scanned
+  ** in. This is used for application testing, to help find cases
+  ** where application behaviour depends on the (undefined) order that
+  ** SQLite outputs rows in in the absence of an ORDER BY clause.  */
+  if( !pOrderBy && pParse->db->flags & SQLITE_ReverseOrder ){
+    pCost->plan.wsFlags |= WHERE_REVERSE;
+  }
+
+  assert( pOrderBy || (pCost->plan.wsFlags&WHERE_ORDERBY)==0 );
+  assert( pCost->plan.u.pIdx==0 || (pCost->plan.wsFlags&WHERE_ROWID_EQ)==0 );
+  assert( pSrc->pIndex==0 
+       || pCost->plan.u.pIdx==0 
+       || pCost->plan.u.pIdx==pSrc->pIndex 
+  );
+
+  WHERETRACE(("best index is: %s\n", 
+    (pCost->plan.u.pIdx ? pCost->plan.u.pIdx->zName : "ipk")
+  ));
+  
+  bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost);
    pCost->plan.wsFlags |= eqTermMask;
-  WHERETRACE(("best index is %s, nrow=%.9g, cost=%.9g, wsFlags=%x, nEq=%d\n",
-        (pCost->plan.wsFlags & WHERE_INDEXED)!=0 ?
-             pCost->plan.u.pIdx->zName : "(none)", pCost->nRow,
-        pCost->rCost, pCost->plan.wsFlags, pCost->plan.nEq));
  }
  
  /*
@@ -3271,44 +3258,82 @@ WhereInfo *sqlite3WhereBegin(
      WhereCost bestPlan;         /* Most efficient plan seen so far */
      Index *pIdx;                /* Index for FROM table at pTabItem */
      int j;                      /* For looping over FROM tables */
-    int bestJ = 0;              /* The value of j */
+    int bestJ = -1;             /* The value of j */
      Bitmask m;                  /* Bitmask value for j or bestJ */
-    int once = 0;               /* True when first table is seen */
+    int isOptimal;              /* Iterator for optimal/non-optimal search */
  
      memset(&bestPlan, 0, sizeof(bestPlan));
      bestPlan.rCost = SQLITE_BIG_DBL;
-    for(j=iFrom, pTabItem=&pTabList->a[j]; j<pTabList->nSrc; j++, pTabItem++){
-      int doNotReorder;    /* True if this table should not be reordered */
-      WhereCost sCost;     /* Cost information from best[Virtual]Index() */
-      ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
-
-      doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
-      if( once && doNotReorder ) break;
-      m = getMask(pMaskSet, pTabItem->iCursor);
-      if( (m & notReady)==0 ){
-        if( j==iFrom ) iFrom++;
-        continue;
-      }
-      pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
  
-      assert( pTabItem->pTab );
+    /* Loop through the remaining entries in the FROM clause to find the
+    ** next nested loop. The FROM clause entries may be iterated through
+    ** either once or twice. 
+    **
+    ** The first iteration, which is always performed, searches for the
+    ** FROM clause entry that permits the lowest-cost, "optimal" scan. In
+    ** this context an optimal scan is one that uses the same strategy
+    ** for the given FROM clause entry as would be selected if the entry
+    ** were used as the innermost nested loop.
+    **
+    ** The second iteration is only performed if no optimal scan strategies
+    ** were found by the first. This iteration is used to search for the
+    ** lowest cost scan overall.
+    **
+    ** Previous versions of SQLite performed only the second iteration -
+    ** the next outermost loop was always that with the lowest overall
+    ** cost. However, this meant that SQLite could select the wrong plan
+    ** for scripts such as the following:
+    **   
+    **   CREATE TABLE t1(a, b); 
+    **   CREATE TABLE t2(c, d);
+    **   SELECT * FROM t2, t1 WHERE t2.rowid = t1.a;
+    **
+    ** The best strategy is to iterate through table t1 first. However it
+    ** is not possible to determine this with a simple greedy algorithm.
+    ** However, since the cost of a linear scan through table t2 is the same 
+    ** as the cost of a linear scan through table t1, a simple greedy 
+    ** algorithm may choose to use t2 for the outer loop, which is a much
+    ** costlier approach.
+    */
+    for(isOptimal=1; isOptimal>=0 && bestJ<0; isOptimal--){
+      Bitmask mask = (isOptimal ? 0 : notReady);
+      assert( (pTabList->nSrc-iFrom)>1 || isOptimal );
+      for(j=iFrom, pTabItem=&pTabList->a[j]; j<pTabList->nSrc; j++, pTabItem++){
+        int doNotReorder;    /* True if this table should not be reordered */
+        WhereCost sCost;     /* Cost information from best[Virtual]Index() */
+        ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
+  
+        doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
+        if( j!=iFrom && doNotReorder ) break;
+        m = getMask(pMaskSet, pTabItem->iCursor);
+        if( (m & notReady)==0 ){
+          if( j==iFrom ) iFrom++;
+          continue;
+        }
+        pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
+  
+        assert( pTabItem->pTab );
  #ifndef SQLITE_OMIT_VIRTUALTABLE
-      if( IsVirtual(pTabItem->pTab) ){
-        sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
-        bestVirtualIndex(pParse, pWC, pTabItem, notReady, pOrderBy, &sCost, pp);
-      }else 
+        if( IsVirtual(pTabItem->pTab) ){
+          sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
+          bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
+        }else 
  #endif
-      {
-        bestBtreeIndex(pParse, pWC, pTabItem, notReady, pOrderBy, &sCost);
-      }
-      if( once==0 || sCost.rCost<bestPlan.rCost ){
-        once = 1;
-        bestPlan = sCost;
-        bestJ = j;
+        {
+          bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
+        }
+        assert( isOptimal || (sCost.used&notReady)==0 );
+
+        if( (sCost.used&notReady)==0
+         && (j==iFrom || sCost.rCost<bestPlan.rCost) 
+        ){
+          bestPlan = sCost;
+          bestJ = j;
+        }
+        if( doNotReorder ) break;
        }
-      if( doNotReorder ) break;
      }
-    assert( once );
+    assert( bestJ>=0 );
      assert( notReady & getMask(pMaskSet, pTabList->a[bestJ].iCursor) );
      WHERETRACE(("*** Optimizer selects table %d for loop %d\n", bestJ,
             pLevel-pWInfo->a));
diff --git a/test/triggerA.test b/test/triggerA.test

index e3583524da308ca599250f26eb79cf83e0366fec..1a0055f3a41970f16104ce62bea176e29509e5af 100644 (file)
--- a/test/triggerA.test
+++ b/test/triggerA.test
@@ -79,7 +79,7 @@ do_test triggerA-1.6 {
       CREATE VIEW v5 AS SELECT x, b FROM t1, t2 WHERE y=c;
       SELECT * FROM v5;
    }
-} {1 103 2 203 3 305 4 404 5 504 6 603 7 705 8 805 9 904 10 1003}
+} {10 1003 9 904 8 805 7 705 6 603 5 504 4 404 3 305 2 203 1 103}
  
  # Create INSTEAD OF triggers on the views.  Run UPDATE and DELETE statements
  # using those triggers.  Verify correct operation.
diff --git a/test/vtab6.test b/test/vtab6.test

index a263973d33b8a0f7429e5c373c35a12fcaac7881..96e45bf5463dbb2b453bb804f0546e09bd6a9918 100644 (file)
--- a/test/vtab6.test
+++ b/test/vtab6.test
@@ -492,6 +492,7 @@ do_test vtab6-11.1.4 {
  do_test vtab6-11.2.0 {
    execsql {
      CREATE INDEX ab_i ON ab_r(b);
+    CREATE INDEX bc_i ON bc_r(b);
    }
  } {}
  
@@ -560,7 +561,7 @@ do_test vtab6-11.4.1 {
    catchsql {
      SELECT a, b, c FROM ab NATURAL JOIN bc;
    }
-} {1 {table ab: xBestIndex returned an invalid plan}}
+} {1 {table bc: xBestIndex returned an invalid plan}}
  do_test vtab6-11.4.2 {
    catchsql {
      SELECT a, b, c FROM bc NATURAL JOIN ab;
diff --git a/test/where8.test b/test/where8.test

index 623d38245973274bec525444061ebf2807d62c65..a8dd5ed059e6c2f0eb921b980cdb9d9fd3d5ec05 100644 (file)
--- a/test/where8.test
+++ b/test/where8.test
@@ -287,7 +287,7 @@ do_test where8-3.15 {
        SELECT sum(e IS NULL) FROM t2 AS inner WHERE t2.d>inner.d
      )
    }
-} {I I I I I I I I I I II II II II II II II II II II III III III III III 99 0}
+} {I II I II I II I II I II I II III I II III I II III I II III I II III 9 0}
  
  #-----------------------------------------------------------------------
  # The following tests - where8-4.* - verify that adding or removing
author	dan <dan@noemail.net>
	Thu, 13 Aug 2009 07:09:33 +0000 (07:09 +0000)
committer	dan <dan@noemail.net>
	Thu, 13 Aug 2009 07:09:33 +0000 (07:09 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/where.c		patch \| blob \| blame \| history
test/triggerA.test		patch \| blob \| blame \| history
test/vtab6.test		patch \| blob \| blame \| history
test/where8.test		patch \| blob \| blame \| history