This is the beginning of an attempt to backport recent query planner

author drh <drh@noemail.net>

Sat, 12 Feb 2011 01:59:22 +0000 (01:59 +0000)

committer drh <drh@noemail.net>

Sat, 12 Feb 2011 01:59:22 +0000 (01:59 +0000)
author drh <drh@noemail.net>
Sat, 12 Feb 2011 01:59:22 +0000 (01:59 +0000)
committer drh <drh@noemail.net>
Sat, 12 Feb 2011 01:59:22 +0000 (01:59 +0000)
diff --git a/manifest b/manifest

index 9fe69e1daae10a39e9e4f3233317ae71646730e8..227ecf3f83d5e574f0c218b6a1642a3dc8d0ddd3 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,8 +1,8 @@
  -----BEGIN PGP SIGNED MESSAGE-----
  Hash: SHA1
  
-C Version\s3.7.2\srelease\scandidate\s1
-D 2010-08-23T18:52:01
+C This\sis\sthe\sbeginning\sof\san\sattempt\sto\sbackport\srecent\squery\splanner\nenhancements\sto\sversion\s3.7.2.\s\sThe\scode\sin\sthis\sversion\sbuilds\sand\sruns\sand\nseems\sto\sgive\scorrect\sanswers,\sbut\sit\sgenerates\ssuboptimal\squery\splans\sand\nhence\smany\sof\sthe\stest\scases\sfail.\s\sThe\stest\sscript\sgives\sup\safter\s1000\serrors.
+D 2011-02-12T01:59:22.979
  F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
  F Makefile.in 543f91f24cd7fee774ecc0a61c19704c0c3e78fd
  F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -110,7 +110,7 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b
  F sqlite3.1 6be1ad09113570e1fc8dcaff84c9b0b337db5ffc
  F sqlite3.pc.in ae6f59a76e862f5c561eb32a380228a02afc3cad
  F src/alter.c 8dc27638e7e2553e80b2b621f232be5eb1e85ef3
-F src/analyze.c da65ce99bb159b10e85a1e460adbe53a88062500
+F src/analyze.c a038162344265ac21dfb24b3fcc06c666ebb9c07
  F src/attach.c 17bec1f18254d9341369f20f90ba24ce35d20d10
  F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34
  F src/backup.c 8ff0b7018df253c7f30d3f9702b0b16f19209d5c
@@ -175,7 +175,7 @@ F src/select.c 8add6cab889fc02e1492eda8dba462ccf11f51dd
  F src/shell.c 8517fc1f9c59ae4007e6cc8b9af91ab231ea2056
  F src/sqlite.h.in 2d72a6242df41c517e38eec8791abcf5484a36f1
  F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89
-F src/sqliteInt.h e33b15e8176442bf7484f0e716edfd1ce03b2979
+F src/sqliteInt.h f419da0d4ca6f723aca055ef67e440723bc7c6ff
  F src/sqliteLimit.h a17dcd3fb775d63b64a43a55c54cb282f9726f44
  F src/status.c 496913d4e8441195f6f2a75b1c95993a45b9b30b
  F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e
@@ -233,7 +233,7 @@ F src/vtab.c 0e8e0cb30dffb078367e843e84e37ef99236c7e4
  F src/wal.c 5ac2119e23ee4424599d4275b66dc88d612a0543
  F src/wal.h 96669b645e27cd5a111ba59f0cae7743a207bc3c
  F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
-F src/where.c 7db3e41c2a846f9deeb24f1bbb75461b4010b7b5
+F src/where.c 9c140acd52f4526983cba7203e6a5bb0ccadb5fb
  F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
  F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
  F test/all.test 6745008c144bd2956d58864d21f7b304689c1cce
@@ -258,7 +258,7 @@ F test/auth.test 8f21c160a4562f54f27618e85bac869efcecbcaf
  F test/auth2.test 270baddc8b9c273682760cffba6739d907bd2882
  F test/auth3.test a4755e6a2a2fea547ffe63c874eb569e60a28eb5
  F test/autoinc.test 85ef3180a737e6580086a018c09c6f1a52759b46
-F test/autoindex1.test 7df441bf0e7a88644eb80993339dbf1db3a12c68
+F test/autoindex1.test 53fdc13501a9d58ae049c0d39bb5c94389ff91bb
  F test/autovacuum.test bb7c0885e6f8f1d633045de48f2b66082162766d
  F test/autovacuum_ioerr2.test 598b0663074d3673a9c1bc9a16e80971313bafe6
  F test/avtrans.test 0252654f4295ddda3b2cce0e894812259e655a85
@@ -849,14 +849,18 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
  F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
  F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
  F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P 21a1e5961bba148fda50cc0b7d472ca74f90808a
-R aa9cc30f9dbcb7ea427137065cf4e4be
+P 42537b60566f288167f1b5864a5435986838e3a3
+R 911182819e6787775a4c401119b6d58c
+T *bgcolor * #d1d3a8
+T *branch * branch-3.7.2
+T *sym-branch-3.7.2 *
+T -sym-trunk *
  U drh
-Z e73ee740c6593b4458c2d23b6fa7b923
+Z 6673081ac69ed84639c088e8d1e7d477
  -----BEGIN PGP SIGNATURE-----
  Version: GnuPG v1.4.6 (GNU/Linux)
  
-iD8DBQFMcsNUoxKgR168RlERApyRAKCOHvk0Gn1uE2F8YVm3k/w13/3UugCeOh16
-bApiXjM1a7FP+Qr1HX5kWTQ=
-=L2Lf
+iD8DBQFNVel/oxKgR168RlERAmLzAJwLpY7ATRL1epv2W0jdVpK7vB8cTwCfT/I1
+KFlk2dAE9J/wYPCdpsrAlmY=
+=eWSL
  -----END PGP SIGNATURE-----
diff --git a/manifest.uuid b/manifest.uuid

index 7f0f695ba62651f7e05da0923df9e7a3a6b6e2a6..aa7409700f2d63f799ca0ca3771bd1235c1e079a 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-42537b60566f288167f1b5864a5435986838e3a3
-\ No newline at end of file
+e72cf118cb25e9fed96f8d5cebbc0f637892479a
+\ No newline at end of file
diff --git a/src/analyze.c b/src/analyze.c

index 59849456fcc89a2f716da3ceb03c46ebcb3e70b3..0a8339bafbad32bc2932abcc73022e6a3fb2e71a 100644 (file)
--- a/src/analyze.c
+++ b/src/analyze.c
@@ -113,7 +113,8 @@ static void analyzeOneTable(
    int i;                       /* Loop counter */
    int topOfLoop;               /* The top of the loop */
    int endOfLoop;               /* The end of the loop */
-  int addr;                    /* The address of an instruction */
+  int addr = 0;                /* The address of an instruction */
+  int jZeroRows = 0;           /* Jump from here if number of rows is zero */
    int iDb;                     /* Index of database containing pTab */
    int regTabname = iMem++;     /* Register containing table name */
    int regIdxname = iMem++;     /* Register containing index name */
@@ -132,8 +133,15 @@ static void analyzeOneTable(
  #endif
  
    v = sqlite3GetVdbe(pParse);
-  if( v==0 || NEVER(pTab==0) || pTab->pIndex==0 ){
-    /* Do no analysis for tables that have no indices */
+  if( v==0 || NEVER(pTab==0) ){
+    return;
+  }
+  if( pTab->tnum==0 ){
+    /* Do not gather statistics on views or virtual tables */
+    return;
+  }
+  if( memcmp(pTab->zName, "sqlite_", 7)==0 ){
+    /* Do not gather statistics on system tables */
      return;
    }
    assert( sqlite3BtreeHoldsAllMutexes(db) );
@@ -150,6 +158,7 @@ static void analyzeOneTable(
    sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
  
    iIdxCur = pParse->nTab++;
+  sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);
    for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
      int nCol = pIdx->nColumn;
      KeyInfo *pKey = sqlite3IndexKeyinfo(pParse, pIdx);
@@ -164,10 +173,7 @@ static void analyzeOneTable(
          (char *)pKey, P4_KEYINFO_HANDOFF);
      VdbeComment((v, "%s", pIdx->zName));
  
-    /* Populate the registers containing the table and index names. */
-    if( pTab->pIndex==pIdx ){
-      sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);
-    }
+    /* Populate the register containing the index name. */
      sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, pIdx->zName, 0);
  
  #ifdef SQLITE_ENABLE_STAT2
@@ -227,9 +233,10 @@ static void analyzeOneTable(
      sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1);
  
      for(i=0; i<nCol; i++){
+      CollSeq *pColl;
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regCol);
-#ifdef SQLITE_ENABLE_STAT2
        if( i==0 ){
+#ifdef SQLITE_ENABLE_STAT2
          /* Check if the record that cursor iIdxCur points to contains a
          ** value that should be stored in the sqlite_stat2 table. If so,
          ** store it.  */
@@ -258,12 +265,17 @@ static void analyzeOneTable(
  
          sqlite3VdbeJumpHere(v, ne);
          sqlite3VdbeAddOp2(v, OP_AddImm, regRecno, 1);
-      }
  #endif
  
-      sqlite3VdbeAddOp3(v, OP_Ne, regCol, 0, iMem+nCol+i+1);
-      /**** TODO:  add collating sequence *****/
-      sqlite3VdbeChangeP5(v, SQLITE_JUMPIFNULL);
+        /* Always record the very first row */
+        sqlite3VdbeAddOp1(v, OP_IfNot, iMem+1);
+      }
+      assert( pIdx->azColl!=0 );
+      assert( pIdx->azColl[i]!=0 );
+      pColl = sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
+      sqlite3VdbeAddOp4(v, OP_Ne, regCol, 0, iMem+nCol+i+1,
+                       (char*)pColl, P4_COLLSEQ);
+      sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
      }
      if( db->mallocFailed ){
        /* If a malloc failure has occurred, then the result of the expression 
@@ -274,7 +286,11 @@ static void analyzeOneTable(
      }
      sqlite3VdbeAddOp2(v, OP_Goto, 0, endOfLoop);
      for(i=0; i<nCol; i++){
-      sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-(nCol*2));
+      int addr2 = sqlite3VdbeCurrentAddr(v) - (nCol*2);
+      if( i==0 ){
+        sqlite3VdbeJumpHere(v, addr2-1);  /* Set jump dest for the OP_IfNot */
+      }
+      sqlite3VdbeJumpHere(v, addr2);      /* Set jump dest for the OP_Ne */
        sqlite3VdbeAddOp2(v, OP_AddImm, iMem+i+1, 1);
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, iMem+nCol+i+1);
      }
@@ -302,8 +318,10 @@ static void analyzeOneTable(
      ** If K>0 then it is always the case the D>0 so division by zero
      ** is never possible.
      */
-    addr = sqlite3VdbeAddOp1(v, OP_IfNot, iMem);
      sqlite3VdbeAddOp2(v, OP_SCopy, iMem, regSampleno);
+    if( jZeroRows==0 ){
+      jZeroRows = sqlite3VdbeAddOp1(v, OP_IfNot, iMem);
+    }
      for(i=0; i<nCol; i++){
        sqlite3VdbeAddOp4(v, OP_String8, 0, regTemp, 0, " ", 0);
        sqlite3VdbeAddOp3(v, OP_Concat, regTemp, regSampleno, regSampleno);
@@ -317,13 +335,35 @@ static void analyzeOneTable(
      sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regRowid);
      sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regRec, regRowid);
      sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+  }
+
+  /* If the table has no indices, create a single sqlite_stat1 entry
+  ** containing NULL as the index name and the row count as the content.
+  */
+  if( pTab->pIndex==0 ){
+    sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pTab->tnum, iDb);
+    VdbeComment((v, "%s", pTab->zName));
+    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regSampleno);
+    sqlite3VdbeAddOp1(v, OP_Close, iIdxCur);
+  }else{
+    assert( jZeroRows>0 );
+    addr = sqlite3VdbeAddOp0(v, OP_Goto);
+    sqlite3VdbeJumpHere(v, jZeroRows);
+  }
+  sqlite3VdbeAddOp2(v, OP_Null, 0, regIdxname);
+  sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regRec, "aaa", 0);
+  sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regRowid);
+  sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regRec, regRowid);
+  sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+  if( pParse->nMem<regRec ) pParse->nMem = regRec;
+  if( jZeroRows ){
      sqlite3VdbeJumpHere(v, addr);
    }
  }
  
  /*
  ** Generate code that will cause the most recent index analysis to
-** be laoded into internal hash tables where is can be used.
+** be loaded into internal hash tables where is can be used.
  */
  static void loadAnalysis(Parse *pParse, int iDb){
    Vdbe *v = sqlite3GetVdbe(pParse);
@@ -453,33 +493,46 @@ struct analysisInfo {
  ** This callback is invoked once for each index when reading the
  ** sqlite_stat1 table.  
  **
-**     argv[0] = name of the index
-**     argv[1] = results of analysis - on integer for each column
+**     argv[0] = name of the table
+**     argv[1] = name of the index (might be NULL)
+**     argv[2] = results of analysis - on integer for each column
+**
+** Entries for which argv[1]==NULL simply record the number of rows in
+** the table.
  */
  static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
    analysisInfo *pInfo = (analysisInfo*)pData;
    Index *pIndex;
-  int i, c;
+  Table *pTable;
+  int i, c, n;
    unsigned int v;
    const char *z;
  
-  assert( argc==2 );
+  assert( argc==3 );
    UNUSED_PARAMETER2(NotUsed, argc);
  
-  if( argv==0 || argv[0]==0 || argv[1]==0 ){
+  if( argv==0 || argv[0]==0 || argv[2]==0 ){
      return 0;
    }
-  pIndex = sqlite3FindIndex(pInfo->db, argv[0], pInfo->zDatabase);
-  if( pIndex==0 ){
+  pTable = sqlite3FindTable(pInfo->db, argv[0], pInfo->zDatabase);
+  if( pTable==0 ){
      return 0;
    }
-  z = argv[1];
-  for(i=0; *z && i<=pIndex->nColumn; i++){
+  if( argv[1] ){
+    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
+  }else{
+    pIndex = 0;
+  }
+  n = pIndex ? pIndex->nColumn : 0;
+  z = argv[2];
+  for(i=0; *z && i<=n; i++){
      v = 0;
      while( (c=z[0])>='0' && c<='9' ){
        v = v*10 + c - '0';
        z++;
      }
+    if( i==0 ) pTable->nRowEst = v;
+    if( pIndex==0 ) break;
      pIndex->aiRowEst[i] = v;
      if( *z==' ' ) z++;
    }
@@ -555,7 +608,7 @@ int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
  
    /* Load new statistics out of the sqlite_stat1 table */
    zSql = sqlite3MPrintf(db, 
-      "SELECT idx, stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
+      "SELECT tbl, idx, stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
    if( zSql==0 ){
      rc = SQLITE_NOMEM;
    }else{
@@ -583,8 +636,11 @@ int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
  
      if( rc==SQLITE_OK ){
        while( sqlite3_step(pStmt)==SQLITE_ROW ){
-        char *zIndex = (char *)sqlite3_column_text(pStmt, 0);
-        Index *pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase);
+        char *zIndex;   /* Index name */
+        Index *pIdx;    /* Pointer to the index object */
+
+        zIndex = (char *)sqlite3_column_text(pStmt, 0);
+        pIdx = zIndex ? sqlite3FindIndex(db, zIndex, sInfo.zDatabase) : 0;
          if( pIdx ){
            int iSample = sqlite3_column_int(pStmt, 1);
            if( iSample<SQLITE_INDEX_SAMPLES && iSample>=0 ){
diff --git a/src/sqliteInt.h b/src/sqliteInt.h

index c44c25fa5c01de3544c7bb66a72bc57795fbb0c0..123868444df8846b6238297df615faf5a88d1452 100644 (file)
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -1224,6 +1224,7 @@ struct Table {
    Column *aCol;        /* Information about each column */
    Index *pIndex;       /* List of SQL indexes on this table. */
    int tnum;            /* Root BTree node for this table (see note above) */
+  unsigned nRowEst;    /* Estimated rows in table - from sqlite_stat1 table */
    Select *pSelect;     /* NULL for tables.  Points to definition if a view. */
    u16 nRef;            /* Number of pointers to this Table */
    u8 tabFlags;         /* Mask of TF_* values */
@@ -1792,6 +1793,9 @@ struct SrcList {
      u8 isPopulated;   /* Temporary table associated with SELECT is populated */
      u8 jointype;      /* Type of join between this able and the previous */
      u8 notIndexed;    /* True if there is a NOT INDEXED clause */
+#ifndef SQLITE_OMIT_EXPLAIN
+    u8 iSelectId;     /* If pSelect!=0, the id of the sub-select in EQP */
+#endif
      int iCursor;      /* The VDBE cursor number used to access this table */
      Expr *pOn;        /* The ON clause of a join */
      IdList *pUsing;   /* The USING clause of a join */
@@ -1830,6 +1834,7 @@ struct SrcList {
  struct WherePlan {
    u32 wsFlags;                   /* WHERE_* flags that describe the strategy */
    u32 nEq;                       /* Number of == constraints */
+  double nRow;                   /* Estimated number of rows (for EQP) */
    union {
      Index *pIdx;                   /* Index when WHERE_INDEXED is true */
      struct WhereTerm *pTerm;       /* WHERE clause term for OR-search */
@@ -1914,6 +1919,7 @@ struct WhereInfo {
    int nLevel;                    /* Number of nested loop */
    struct WhereClause *pWC;       /* Decomposition of the WHERE clause */
    double savedNQueryLoop;        /* pParse->nQueryLoop outside the WHERE loop */
+  double nRowOut;                /* Estimated number of output rows */
    WhereLevel a[1];               /* Information about each nest loop in WHERE */
  };
  
@@ -2184,6 +2190,10 @@ struct Parse {
    int nHeight;            /* Expression tree height of current sub-select */
    Table *pZombieTab;      /* List of Table objects to delete after code gen */
    TriggerPrg *pTriggerPrg;    /* Linked list of coded triggers */
+#ifndef SQLITE_OMIT_EXPLAIN
+  int iSelectId;              /* Subquery ID for query planning */
+  int iNextSelectId;          /* Next available subquery ID */
+#endif
  };
  
  #ifdef SQLITE_OMIT_VIRTUALTABLE
diff --git a/src/where.c b/src/where.c

index 9040ad0d683a02f2a175d3d18545994476a0c181..a57884c4bd2107cd3e4540ee30a4d6160b61afaa 100644 (file)
--- a/src/where.c
+++ b/src/where.c
@@ -18,6 +18,7 @@
  */
  #include "sqliteInt.h"
  
+
  /*
  ** Trace output macros
  */
@@ -117,6 +118,11 @@ struct WhereTerm {
  #define TERM_ORINFO     0x10   /* Need to free the WhereTerm.u.pOrInfo object */
  #define TERM_ANDINFO    0x20   /* Need to free the WhereTerm.u.pAndInfo obj */
  #define TERM_OR_OK      0x40   /* Used during OR-clause processing */
+#ifdef SQLITE_ENABLE_STAT2
+#  define TERM_VNULL    0x80   /* Manufactured x>NULL or x<=NULL term */
+#else
+#  define TERM_VNULL    0x00   /* Disabled if not using stat2 */
+#endif
  
  /*
  ** An instance of the following structure holds all information about a
@@ -192,7 +198,6 @@ struct WhereMaskSet {
  struct WhereCost {
    WherePlan plan;    /* The lookup strategy */
    double rCost;      /* Overall cost of pursuing this search strategy */
-  double nRow;       /* Estimated number of output rows */
    Bitmask used;      /* Bitmask of cursors used by this plan */
  };
  
@@ -211,6 +216,7 @@ struct WhereCost {
  #define WO_ISNULL 0x080
  #define WO_OR     0x100       /* Two or more OR-connected terms */
  #define WO_AND    0x200       /* Two or more AND-connected terms */
+#define WO_NOOP   0x800       /* This term does not restrict search space */
  
  #define WO_ALL    0xfff       /* Mask of all possible WO_* values */
  #define WO_SINGLE 0x0ff       /* Mask of all non-compound WO_* values */
@@ -235,10 +241,11 @@ struct WhereCost {
  #define WHERE_COLUMN_IN    0x00040000  /* x IN (...) */
  #define WHERE_COLUMN_NULL  0x00080000  /* x IS NULL */
  #define WHERE_INDEXED      0x000f0000  /* Anything that uses an index */
-#define WHERE_NOT_FULLSCAN 0x000f3000  /* Does not do a full table scan */
+#define WHERE_NOT_FULLSCAN 0x100f3000  /* Does not do a full table scan */
  #define WHERE_IN_ABLE      0x000f1000  /* Able to support an IN operator */
  #define WHERE_TOP_LIMIT    0x00100000  /* x<EXPR or x<=EXPR constraint */
  #define WHERE_BTM_LIMIT    0x00200000  /* x>EXPR or x>=EXPR constraint */
+#define WHERE_BOTH_LIMIT   0x00300000  /* Both x>EXPR and x<EXPR */
  #define WHERE_IDX_ONLY     0x00800000  /* Use index only - omit table */
  #define WHERE_ORDERBY      0x01000000  /* Output will appear in correct order */
  #define WHERE_REVERSE      0x02000000  /* Scan in reverse order */
@@ -669,11 +676,12 @@ static int isLikeOrGlob(
    }
    if( op==TK_VARIABLE ){
      Vdbe *pReprepare = pParse->pReprepare;
-    pVal = sqlite3VdbeGetValue(pReprepare, pRight->iColumn, SQLITE_AFF_NONE);
+    int iCol = pRight->iColumn;
+    pVal = sqlite3VdbeGetValue(pReprepare, iCol, SQLITE_AFF_NONE);
      if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){
        z = (char *)sqlite3_value_text(pVal);
      }
-    sqlite3VdbeSetVarmask(pParse->pVdbe, pRight->iColumn);
+    sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); /* IMP: R-23257-02778 */
      assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER );
    }else if( op==TK_STRING ){
      z = pRight->u.zToken;
@@ -691,7 +699,7 @@ static int isLikeOrGlob(
        *ppPrefix = pPrefix;
        if( op==TK_VARIABLE ){
          Vdbe *v = pParse->pVdbe;
-        sqlite3VdbeSetVarmask(v, pRight->iColumn);
+        sqlite3VdbeSetVarmask(v, pRight->iColumn); /* IMP: R-23257-02778 */
          if( *pisComplete && pRight->u.zToken[1] ){
            /* If the rhs of the LIKE expression is a variable, and the current
            ** value of the variable means there is no need to invoke the LIKE
@@ -1059,7 +1067,7 @@ static void exprAnalyzeOrTerm(
        }else{
          sqlite3ExprListDelete(db, pList);
        }
-      pTerm->eOperator = 0;  /* case 1 trumps case 2 */
+      pTerm->eOperator = WO_NOOP;  /* case 1 trumps case 2 */
      }
    }
  }
@@ -1323,6 +1331,44 @@ static void exprAnalyze(
    }
  #endif /* SQLITE_OMIT_VIRTUALTABLE */
  
+#ifdef SQLITE_ENABLE_STAT2
+  /* When sqlite_stat2 histogram data is available an operator of the
+  ** form "x IS NOT NULL" can sometimes be evaluated more efficiently
+  ** as "x>NULL" if x is not an INTEGER PRIMARY KEY.  So construct a
+  ** virtual term of that form.
+  **
+  ** Note that the virtual term must be tagged with TERM_VNULL.  This
+  ** TERM_VNULL tag will suppress the not-null check at the beginning
+  ** of the loop.  Without the TERM_VNULL flag, the not-null check at
+  ** the start of the loop will prevent any results from being returned.
+  */
+  if( pExpr->op==TK_NOTNULL && pExpr->pLeft->iColumn>=0 ){
+    Expr *pNewExpr;
+    Expr *pLeft = pExpr->pLeft;
+    int idxNew;
+    WhereTerm *pNewTerm;
+
+    pNewExpr = sqlite3PExpr(pParse, TK_GT,
+                            sqlite3ExprDup(db, pLeft, 0),
+                            sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0);
+
+    idxNew = whereClauseInsert(pWC, pNewExpr,
+                              TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL);
+    if( idxNew ){
+      pNewTerm = &pWC->a[idxNew];
+      pNewTerm->prereqRight = 0;
+      pNewTerm->leftCursor = pLeft->iTable;
+      pNewTerm->u.leftColumn = pLeft->iColumn;
+      pNewTerm->eOperator = WO_GT;
+      pNewTerm->iParent = idxTerm;
+      pTerm = &pWC->a[idxTerm];
+      pTerm->nChild = 1;
+      pTerm->wtFlags |= TERM_COPIED;
+      pNewTerm->prereqAll = pTerm->prereqAll;
+    }
+  }
+#endif /* SQLITE_ENABLE_STAT2 */
+
    /* Prevent ON clause terms of a LEFT JOIN from being used to drive
    ** an index for tables to the left of the join.
    */
@@ -1375,6 +1421,7 @@ static int isSortingIndex(
    int base,               /* Cursor number for the table to be sorted */
    ExprList *pOrderBy,     /* The ORDER BY clause */
    int nEqCol,             /* Number of index columns with == constraints */
+  int wsFlags,            /* Index usages flags */
    int *pbRev              /* Set to 1 if ORDER BY is DESC */
  ){
    int i, j;                       /* Loop counters */
@@ -1480,11 +1527,14 @@ static int isSortingIndex(
      return 1;
    }
    if( pIdx->onError!=OE_None && i==pIdx->nColumn
+      && (wsFlags & WHERE_COLUMN_NULL)==0
        && !referencesOtherTables(pOrderBy, pMaskSet, j, base) ){
      /* All terms of this index match some prefix of the ORDER BY clause
      ** and the index is UNIQUE and no terms on the tail of the ORDER BY
      ** clause reference other tables in a join.  If this is all true then
-    ** the order by clause is superfluous. */
+    ** the order by clause is superfluous.  Not that if the matching
+    ** condition is IS NULL then the result is not necessarily unique
+    ** even on a UNIQUE index, so disallow those cases. */
      return 1;
    }
    return 0;
@@ -1555,7 +1605,8 @@ static void TRACE_IDX_OUTPUTS(sqlite3_index_info *p){
  ** Required because bestIndex() is called by bestOrClauseIndex() 
  */
  static void bestIndex(
-    Parse*, WhereClause*, struct SrcList_item*, Bitmask, ExprList*, WhereCost*);
+    Parse*, WhereClause*, struct SrcList_item*,
+    Bitmask, Bitmask, ExprList*, WhereCost*);
  
  /*
  ** This routine attempts to find an scanning strategy that can be used 
@@ -1568,7 +1619,8 @@ static void bestOrClauseIndex(
    Parse *pParse,              /* The parsing context */
    WhereClause *pWC,           /* The WHERE clause */
    struct SrcList_item *pSrc,  /* The FROM clause term to search */
-  Bitmask notReady,           /* Mask of cursors that are not available */
+  Bitmask notReady,           /* Mask of cursors not available for indexing */
+  Bitmask notValid,           /* Cursors not available for any purpose */
    ExprList *pOrderBy,         /* The ORDER BY clause */
    WhereCost *pCost            /* Lowest cost query plan */
  ){
@@ -1578,8 +1630,9 @@ static void bestOrClauseIndex(
    WhereTerm * const pWCEnd = &pWC->a[pWC->nTerm];        /* End of pWC->a[] */
    WhereTerm *pTerm;                 /* A single term of the WHERE clause */
  
-  /* No OR-clause optimization allowed if the NOT INDEXED clause is used */
-  if( pSrc->notIndexed ){
+  /* No OR-clause optimization allowed if the INDEXED BY or NOT INDEXED clauses
+  ** are used */
+  if( pSrc->notIndexed || pSrc->pIndex!=0 ){
      return;
    }
  
@@ -1604,7 +1657,7 @@ static void bestOrClauseIndex(
          ));
          if( pOrTerm->eOperator==WO_AND ){
            WhereClause *pAndWC = &pOrTerm->u.pAndInfo->wc;
-          bestIndex(pParse, pAndWC, pSrc, notReady, 0, &sTermCost);
+          bestIndex(pParse, pAndWC, pSrc, notReady, notValid, 0, &sTermCost);
          }else if( pOrTerm->leftCursor==iCur ){
            WhereClause tempWC;
            tempWC.pParse = pWC->pParse;
@@ -1612,12 +1665,12 @@ static void bestOrClauseIndex(
            tempWC.op = TK_AND;
            tempWC.a = pOrTerm;
            tempWC.nTerm = 1;
-          bestIndex(pParse, &tempWC, pSrc, notReady, 0, &sTermCost);
+          bestIndex(pParse, &tempWC, pSrc, notReady, notValid, 0, &sTermCost);
          }else{
            continue;
          }
          rTotal += sTermCost.rCost;
-        nRow += sTermCost.nRow;
+        nRow += sTermCost.plan.nRow;
          used |= sTermCost.used;
          if( rTotal>=pCost->rCost ) break;
        }
@@ -1636,8 +1689,8 @@ static void bestOrClauseIndex(
        WHERETRACE(("... multi-index OR cost=%.9g nrow=%.9g\n", rTotal, nRow));
        if( rTotal<pCost->rCost ){
          pCost->rCost = rTotal;
-        pCost->nRow = nRow;
          pCost->used = used;
+        pCost->plan.nRow = nRow;
          pCost->plan.wsFlags = flags;
          pCost->plan.u.pTerm = pTerm;
        }
@@ -1705,7 +1758,7 @@ static void bestAutomaticIndex(
  
    assert( pParse->nQueryLoop >= (double)1 );
    pTable = pSrc->pTab;
-  nTableRow = pTable->pIndex ? pTable->pIndex->aiRowEst[0] : 1000000;
+  nTableRow = pTable->nRowEst;
    logN = estLog(nTableRow);
    costTempIdx = 2*logN*(nTableRow/pParse->nQueryLoop + 1);
    if( costTempIdx>=pCost->rCost ){
@@ -1718,10 +1771,10 @@ static void bestAutomaticIndex(
    pWCEnd = &pWC->a[pWC->nTerm];
    for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){
      if( termCanDriveIndex(pTerm, pSrc, notReady) ){
-      WHERETRACE(("auto-index reduces cost from %.2f to %.2f\n",
+      WHERETRACE(("auto-index reduces cost from %.1f to %.1f\n",
                      pCost->rCost, costTempIdx));
        pCost->rCost = costTempIdx;
-      pCost->nRow = logN + 1;
+      pCost->plan.nRow = logN + 1;
        pCost->plan.wsFlags = WHERE_TEMP_INDEX;
        pCost->used = pTerm->prereqRight;
        break;
@@ -1839,7 +1892,8 @@ static void constructAutomaticIndex(
          idxCols |= cMask;
          pIdx->aiColumn[n] = pTerm->u.leftColumn;
          pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
-        pIdx->azColl[n] = pColl->zName;
+        assert( pColl!=0 || pParse->nErr>0 );
+        pIdx->azColl[n] = pColl ? pColl->zName : "BINARY";
          n++;
        }
      }
@@ -2059,7 +2113,8 @@ static void bestVirtualIndex(
    Parse *pParse,                  /* The parsing context */
    WhereClause *pWC,               /* The WHERE clause */
    struct SrcList_item *pSrc,      /* The FROM clause term to search */
-  Bitmask notReady,               /* Mask of cursors that are not available */
+  Bitmask notReady,               /* Mask of cursors not available for index */
+  Bitmask notValid,               /* Cursors not valid for any purpose */
    ExprList *pOrderBy,             /* The order by clause */
    WhereCost *pCost,               /* Lowest cost query plan */
    sqlite3_index_info **ppIdxInfo  /* Index information passed to xBestIndex */
@@ -2189,18 +2244,25 @@ static void bestVirtualIndex(
    /* Try to find a more efficient access pattern by using multiple indexes
    ** to optimize an OR expression within the WHERE clause. 
    */
-  bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost);
+  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
  }
  #endif /* SQLITE_OMIT_VIRTUALTABLE */
  
  /*
  ** Argument pIdx is a pointer to an index structure that has an array of
  ** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
-** stored in Index.aSample. The domain of values stored in said column
-** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions.
-** Region 0 contains all values smaller than the first sample value. Region
-** 1 contains values larger than or equal to the value of the first sample,
-** but smaller than the value of the second. And so on.
+** stored in Index.aSample. These samples divide the domain of values stored
+** the index into (SQLITE_INDEX_SAMPLES+1) regions.
+** Region 0 contains all values less than the first sample value. Region
+** 1 contains values between the first and second samples.  Region 2 contains
+** values between samples 2 and 3.  And so on.  Region SQLITE_INDEX_SAMPLES
+** contains values larger than the last sample.
+**
+** If the index contains many duplicates of a single value, then it is
+** possible that two or more adjacent samples can hold the same value.
+** When that is the case, the smallest possible region code is returned
+** when roundUp is false and the largest possible region code is returned
+** when roundUp is true.
  **
  ** If successful, this function determines which of the regions value 
  ** pVal lies in, sets *piRegion to the region index (a value between 0
@@ -2213,8 +2275,10 @@ static int whereRangeRegion(
    Parse *pParse,              /* Database connection */
    Index *pIdx,                /* Index to consider domain of */
    sqlite3_value *pVal,        /* Value to consider */
+  int roundUp,                /* Return largest valid region if true */
    int *piRegion               /* OUT: Region of domain in which value lies */
  ){
+  assert( roundUp==0 || roundUp==1 );
    if( ALWAYS(pVal) ){
      IndexSample *aSample = pIdx->aSample;
      int i = 0;
@@ -2224,7 +2288,17 @@ static int whereRangeRegion(
        double r = sqlite3_value_double(pVal);
        for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
          if( aSample[i].eType==SQLITE_NULL ) continue;
-        if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break;
+        if( aSample[i].eType>=SQLITE_TEXT ) break;
+        if( roundUp ){
+          if( aSample[i].u.r>r ) break;
+        }else{
+          if( aSample[i].u.r>=r ) break;
+        }
+      }
+    }else if( eType==SQLITE_NULL ){
+      i = 0;
+      if( roundUp ){
+        while( i<SQLITE_INDEX_SAMPLES && aSample[i].eType==SQLITE_NULL ) i++;
        }
      }else{ 
        sqlite3 *db = pParse->db;
@@ -2255,7 +2329,7 @@ static int whereRangeRegion(
        n = sqlite3ValueBytes(pVal, pColl->enc);
  
        for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
-        int r;
+        int c;
          int eSampletype = aSample[i].eType;
          if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
          if( (eSampletype!=eType) ) break;
@@ -2269,14 +2343,14 @@ static int whereRangeRegion(
              assert( db->mallocFailed );
              return SQLITE_NOMEM;
            }
-          r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
+          c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
            sqlite3DbFree(db, zSample);
          }else
  #endif
          {
-          r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
+          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
          }
-        if( r>0 ) break;
+        if( c-roundUp>=0 ) break;
        }
      }
  
@@ -2310,12 +2384,11 @@ static int valueFromExpr(
    u8 aff, 
    sqlite3_value **pp
  ){
-  /* The evalConstExpr() function will have already converted any TK_VARIABLE
-  ** expression involved in an comparison into a TK_REGISTER. */
-  assert( pExpr->op!=TK_VARIABLE );
-  if( pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE ){
+  if( pExpr->op==TK_VARIABLE
+   || (pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE)
+  ){
      int iVar = pExpr->iColumn;
-    sqlite3VdbeSetVarmask(pParse->pVdbe, iVar);
+    sqlite3VdbeSetVarmask(pParse->pVdbe, iVar); /* IMP: R-23257-02778 */
      *pp = sqlite3VdbeGetValue(pParse->pReprepare, iVar, aff);
      return SQLITE_OK;
    }
@@ -2360,9 +2433,9 @@ static int valueFromExpr(
  ** constraints.
  **
  ** In the absence of sqlite_stat2 ANALYZE data, each range inequality
-** reduces the search space by 2/3rds.  Hence a single constraint (x>?)
-** results in a return of 33 and a range constraint (x>? AND x<?) results
-** in a return of 11.
+** reduces the search space by 3/4ths.  Hence a single constraint (x>?)
+** results in a return of 25 and a range constraint (x>? AND x<?) results
+** in a return of 6.
  */
  static int whereRangeScanEst(
    Parse *pParse,       /* Parsing & code generating context */
@@ -2382,15 +2455,21 @@ static int whereRangeScanEst(
      int iEst;
      int iLower = 0;
      int iUpper = SQLITE_INDEX_SAMPLES;
+    int roundUpUpper;
+    int roundUpLower;
      u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  
      if( pLower ){
        Expr *pExpr = pLower->pExpr->pRight;
        rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
+      assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE );
+      roundUpLower = (pLower->eOperator==WO_GT) ?1:0;
      }
      if( rc==SQLITE_OK && pUpper ){
        Expr *pExpr = pUpper->pExpr->pRight;
        rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal);
+      assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
+      roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0;
      }
  
      if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
@@ -2398,28 +2477,29 @@ static int whereRangeScanEst(
        sqlite3ValueFree(pUpperVal);
        goto range_est_fallback;
      }else if( pLowerVal==0 ){
-      rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
+      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
        if( pLower ) iLower = iUpper/2;
      }else if( pUpperVal==0 ){
-      rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
+      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
        if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
      }else{
-      rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
+      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
        if( rc==SQLITE_OK ){
-        rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
+        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
        }
      }
+    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));
  
      iEst = iUpper - iLower;
      testcase( iEst==SQLITE_INDEX_SAMPLES );
      assert( iEst<=SQLITE_INDEX_SAMPLES );
      if( iEst<1 ){
-      iEst = 1;
+      *piEst = 50/SQLITE_INDEX_SAMPLES;
+    }else{
+      *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES;
      }
-
      sqlite3ValueFree(pLowerVal);
      sqlite3ValueFree(pUpperVal);
-    *piEst = (iEst * 100)/SQLITE_INDEX_SAMPLES;
      return rc;
    }
  range_est_fallback:
@@ -2429,22 +2509,151 @@ range_est_fallback:
    UNUSED_PARAMETER(nEq);
  #endif
    assert( pLower || pUpper );
-  if( pLower && pUpper ){
-    *piEst = 11;
+  *piEst = 100;
+  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 4;
+  if( pUpper ) *piEst /= 4;
+  return rc;
+}
+
+#ifdef SQLITE_ENABLE_STAT2
+/*
+** Estimate the number of rows that will be returned based on
+** an equality constraint x=VALUE and where that VALUE occurs in
+** the histogram data.  This only works when x is the left-most
+** column of an index and sqlite_stat2 histogram data is available
+** for that index.
+**
+** Write the estimated row count into *pnRow and return SQLITE_OK. 
+** If unable to make an estimate, leave *pnRow unchanged and return
+** non-zero.
+**
+** This routine can fail if it is unable to load a collating sequence
+** required for string comparison, or if unable to allocate memory
+** for a UTF conversion required for comparison.  The error is stored
+** in the pParse structure.
+*/
+int whereEqualScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  Index *p,            /* The index whose left-most column is pTerm */
+  Expr *pExpr,         /* Expression for VALUE in the x=VALUE constraint */
+  double *pnRow        /* Write the revised row estimate here */
+){
+  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
+  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
+  u8 aff;                   /* Column affinity */
+  int rc;                   /* Subfunction return code */
+  double nRowEst;           /* New estimate of the number of rows */
+
+  assert( p->aSample!=0 );
+  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
+  rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
+  if( rc ) goto whereEqualScanEst_cancel;
+  if( pRhs==0 ) return SQLITE_NOTFOUND;
+  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
+  if( rc ) goto whereEqualScanEst_cancel;
+  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
+  if( rc ) goto whereEqualScanEst_cancel;
+  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
+  if( iLower>=iUpper ){
+    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
+    if( nRowEst<*pnRow ) *pnRow = nRowEst;
    }else{
-    *piEst = 33;
+    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
+    *pnRow = nRowEst;
+  }
+
+whereEqualScanEst_cancel:
+  sqlite3ValueFree(pRhs);
+  return rc;
+}
+#endif /* defined(SQLITE_ENABLE_STAT2) */
+
+#ifdef SQLITE_ENABLE_STAT2
+/*
+** Estimate the number of rows that will be returned based on
+** an IN constraint where the right-hand side of the IN operator
+** is a list of values.  Example:
+**
+**        WHERE x IN (1,2,3,4)
+**
+** Write the estimated row count into *pnRow and return SQLITE_OK. 
+** If unable to make an estimate, leave *pnRow unchanged and return
+** non-zero.
+**
+** This routine can fail if it is unable to load a collating sequence
+** required for string comparison, or if unable to allocate memory
+** for a UTF conversion required for comparison.  The error is stored
+** in the pParse structure.
+*/
+int whereInScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  Index *p,            /* The index whose left-most column is pTerm */
+  ExprList *pList,     /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
+  double *pnRow        /* Write the revised row estimate here */
+){
+  sqlite3_value *pVal = 0;  /* One value from list */
+  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
+  u8 aff;                   /* Column affinity */
+  int rc = SQLITE_OK;       /* Subfunction return code */
+  double nRowEst;           /* New estimate of the number of rows */
+  int nSpan = 0;            /* Number of histogram regions spanned */
+  int nSingle = 0;          /* Histogram regions hit by a single value */
+  int nNotFound = 0;        /* Count of values that are not constants */
+  int i;                               /* Loop counter */
+  u8 aSpan[SQLITE_INDEX_SAMPLES+1];    /* Histogram regions that are spanned */
+  u8 aSingle[SQLITE_INDEX_SAMPLES+1];  /* Histogram regions hit once */
+
+  assert( p->aSample!=0 );
+  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
+  memset(aSpan, 0, sizeof(aSpan));
+  memset(aSingle, 0, sizeof(aSingle));
+  for(i=0; i<pList->nExpr; i++){
+    sqlite3ValueFree(pVal);
+    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
+    if( rc ) break;
+    if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
+      nNotFound++;
+      continue;
+    }
+    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
+    if( rc ) break;
+    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
+    if( rc ) break;
+    if( iLower>=iUpper ){
+      aSingle[iLower] = 1;
+    }else{
+      assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
+      while( iLower<iUpper ) aSpan[iLower++] = 1;
+    }
+  }
+  if( rc==SQLITE_OK ){
+    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
+      if( aSpan[i] ){
+        nSpan++;
+      }else if( aSingle[i] ){
+        nSingle++;
+      }
+    }
+    nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
+               + nNotFound*p->aiRowEst[1];
+    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
+    *pnRow = nRowEst;
+    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
+                 nSpan, nSingle, nNotFound, nRowEst));
    }
+  sqlite3ValueFree(pVal);
    return rc;
  }
+#endif /* defined(SQLITE_ENABLE_STAT2) */
  
  
  /*
-** Find the query plan for accessing a particular table.  Write the
+** Find the best query plan for accessing a particular table.  Write the
  ** best query plan and its cost into the WhereCost object supplied as the
  ** last parameter.
  **
  ** The lowest cost plan wins.  The cost is an estimate of the amount of
-** CPU and disk I/O need to process the request using the selected plan.
+** CPU and disk I/O needed to process the requested result.
  ** Factors that influence cost include:
  **
  **    *  The estimated number of rows that will be retrieved.  (The
@@ -2463,14 +2672,15 @@ range_est_fallback:
  **
  ** If a NOT INDEXED clause (pSrc->notIndexed!=0) was attached to the table 
  ** in the SELECT statement, then no indexes are considered. However, the 
-** selected plan may still take advantage of the tables built-in rowid
+** selected plan may still take advantage of the built-in rowid primary key
  ** index.
  */
  static void bestBtreeIndex(
    Parse *pParse,              /* The parsing context */
    WhereClause *pWC,           /* The WHERE clause */
    struct SrcList_item *pSrc,  /* The FROM clause term to search */
-  Bitmask notReady,           /* Mask of cursors that are not available */
+  Bitmask notReady,           /* Mask of cursors not available for indexing */
+  Bitmask notValid,           /* Cursors not available for any purpose */
    ExprList *pOrderBy,         /* The ORDER BY clause */
    WhereCost *pCost            /* Lowest cost query plan */
  ){
@@ -2505,30 +2715,25 @@ static void bestBtreeIndex(
      wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
      eqTermMask = idxEqTermMask;
    }else{
-    /* There is no INDEXED BY clause.  Create a fake Index object to
-    ** represent the primary key */
-    Index *pFirst;                /* Any other index on the table */
+    /* There is no INDEXED BY clause.  Create a fake Index object in local
+    ** variable sPk to represent the rowid primary key index.  Make this
+    ** fake index the first in a chain of Index objects with all of the real
+    ** indices to follow */
+    Index *pFirst;                  /* First of real indices on the table */
      memset(&sPk, 0, sizeof(Index));
      sPk.nColumn = 1;
      sPk.aiColumn = &aiColumnPk;
      sPk.aiRowEst = aiRowEstPk;
-    aiRowEstPk[1] = 1;
      sPk.onError = OE_Replace;
      sPk.pTable = pSrc->pTab;
+    aiRowEstPk[0] = pSrc->pTab->nRowEst;
+    aiRowEstPk[1] = 1;
      pFirst = pSrc->pTab->pIndex;
      if( pSrc->notIndexed==0 ){
+      /* The real indices of the table are only considered if the
+      ** NOT INDEXED qualifier is omitted from the FROM clause */
        sPk.pNext = pFirst;
      }
-    /* The aiRowEstPk[0] is an estimate of the total number of rows in the
-    ** table.  Get this information from the ANALYZE information if it is
-    ** available.  If not available, assume the table 1 million rows in size.
-    */
-    if( pFirst ){
-      assert( pFirst->aiRowEst!=0 ); /* Allocated together with pFirst */
-      aiRowEstPk[0] = pFirst->aiRowEst[0];
-    }else{
-      aiRowEstPk[0] = 1000000;
-    }
      pProbe = &sPk;
      wsFlagMask = ~(
          WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE
@@ -2543,16 +2748,19 @@ static void bestBtreeIndex(
      const unsigned int * const aiRowEst = pProbe->aiRowEst;
      double cost;                /* Cost of using pProbe */
      double nRow;                /* Estimated number of rows in result set */
+    double log10N;              /* base-10 logarithm of nRow (inexact) */
      int rev;                    /* True to scan in reverse order */
      int wsFlags = 0;
      Bitmask used = 0;
  
      /* The following variables are populated based on the properties of
-    ** scan being evaluated. They are then used to determine the expected
+    ** index being evaluated. They are then used to determine the expected
      ** cost and number of rows returned.
      **
      **  nEq: 
      **    Number of equality terms that can be implemented using the index.
+    **    In other words, the number of initial fields in the index that
+    **    are used in == or IN or NOT NULL constraints of the WHERE clause.
      **
      **  nInMul:  
      **    The "in-multiplier". This is an estimate of how many seek operations 
@@ -2576,7 +2784,9 @@ static void bestBtreeIndex(
      **
      **  bInEst:  
      **    Set to true if there was at least one "x IN (SELECT ...)" term used 
-    **    in determining the value of nInMul.
+    **    in determining the value of nInMul.  Note that the RHS of the
+    **    IN operator must be a SELECT, not a value list, for this variable
+    **    to be true.
      **
      **  estBound:
      **    An estimate on the amount of the table that must be searched.  A
@@ -2584,8 +2794,8 @@ static void bestBtreeIndex(
      **    might reduce this to a value less than 100 to indicate that only
      **    a fraction of the table needs searching.  In the absence of
      **    sqlite_stat2 ANALYZE data, a single inequality reduces the search
-    **    space to 1/3rd its original size.  So an x>? constraint reduces
-    **    estBound to 33.  Two constraints (x>? AND x<?) reduce estBound to 11.
+    **    space to 1/4rd its original size.  So an x>? constraint reduces
+    **    estBound to 25.  Two constraints (x>? AND x<?) reduce estBound to 6.
      **
      **  bSort:   
      **    Boolean. True if there is an ORDER BY clause that will require an 
@@ -2593,25 +2803,31 @@ static void bestBtreeIndex(
      **    correctly order records).
      **
      **  bLookup: 
-    **    Boolean. True if for each index entry visited a lookup on the 
-    **    corresponding table b-tree is required. This is always false 
-    **    for the rowid index. For other indexes, it is true unless all the 
-    **    columns of the table used by the SELECT statement are present in 
-    **    the index (such an index is sometimes described as a covering index).
+    **    Boolean. True if a table lookup is required for each index entry
+    **    visited.  In other words, true if this is not a covering index.
+    **    This is always false for the rowid primary key index of a table.
+    **    For other indexes, it is true unless all the columns of the table
+    **    used by the SELECT statement are present in the index (such an
+    **    index is sometimes described as a covering index).
      **    For example, given the index on (a, b), the second of the following 
-    **    two queries requires table b-tree lookups, but the first does not.
+    **    two queries requires table b-tree lookups in order to find the value
+    **    of column c, but the first does not because columns a and b are
+    **    both available in the index.
      **
      **             SELECT a, b    FROM tbl WHERE a = 1;
      **             SELECT a, b, c FROM tbl WHERE a = 1;
      */
-    int nEq;
-    int bInEst = 0;
-    int nInMul = 1;
-    int estBound = 100;
-    int nBound = 0;             /* Number of range constraints seen */
-    int bSort = 0;
-    int bLookup = 0;
-    WhereTerm *pTerm;           /* A single term of the WHERE clause */
+    int nEq;                      /* Number of == or IN terms matching index */
+    int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
+    int nInMul = 1;               /* Number of distinct equalities to lookup */
+    int estBound = 100;           /* Estimated reduction in search space */
+    int nBound = 0;               /* Number of range constraints seen */
+    int bSort = 0;                /* True if external sort required */
+    int bLookup = 0;              /* True if not a covering index */
+    WhereTerm *pTerm;             /* A single term of the WHERE clause */
+#ifdef SQLITE_ENABLE_STAT2
+    WhereTerm *pFirstTerm = 0;    /* First term matching the index */
+#endif
  
      /* Determine the values of nEq and nInMul */
      for(nEq=0; nEq<pProbe->nColumn; nEq++){
@@ -2623,14 +2839,19 @@ static void bestBtreeIndex(
          Expr *pExpr = pTerm->pExpr;
          wsFlags |= WHERE_COLUMN_IN;
          if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+          /* "x IN (SELECT ...)":  Assume the SELECT returns 25 rows */
            nInMul *= 25;
            bInEst = 1;
-        }else if( ALWAYS(pExpr->x.pList) ){
-          nInMul *= pExpr->x.pList->nExpr + 1;
+        }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
+          /* "x IN (value, value, ...)" */
+          nInMul *= pExpr->x.pList->nExpr;
          }
        }else if( pTerm->eOperator & WO_ISNULL ){
          wsFlags |= WHERE_COLUMN_NULL;
        }
+#ifdef SQLITE_ENABLE_STAT2
+      if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
+#endif
        used |= pTerm->prereqRight;
      }
  
@@ -2666,8 +2887,9 @@ static void bestBtreeIndex(
      ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index
      ** will scan rows in a different order, set the bSort variable.  */
      if( pOrderBy ){
-      if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0
-        && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev)
+      if( (wsFlags & WHERE_COLUMN_IN)==0
+        && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy,
+                          nEq, wsFlags, &rev)
        ){
          wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
          wsFlags |= (rev ? WHERE_REVERSE : 0);
@@ -2698,8 +2920,8 @@ static void bestBtreeIndex(
      }
  
      /*
-    ** Estimate the number of rows of output.  For an IN operator,
-    ** do not let the estimate exceed half the rows in the table.
+    ** Estimate the number of rows of output.  For an "x IN (SELECT...)"
+    ** constraint, do not let the estimate exceed half the rows in the table.
      */
      nRow = (double)(aiRowEst[nEq] * nInMul);
      if( bInEst && nRow*2>aiRowEst[0] ){
@@ -2707,31 +2929,88 @@ static void bestBtreeIndex(
        nInMul = (int)(nRow / aiRowEst[nEq]);
      }
  
-    /* Assume constant cost to access a row and logarithmic cost to
-    ** do a binary search.  Hence, the initial cost is the number of output
-    ** rows plus log2(table-size) times the number of binary searches.
+#ifdef SQLITE_ENABLE_STAT2
+    /* If the constraint is of the form x=VALUE and histogram
+    ** data is available for column x, then it might be possible
+    ** to get a better estimate on the number of rows based on
+    ** VALUE and how common that value is according to the histogram.
      */
-    cost = nRow + nInMul*estLog(aiRowEst[0]);
+    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
+      if( pFirstTerm->eOperator==WO_EQ ){
+        whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
+      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
+        whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
+      }
+    }
+#endif /* SQLITE_ENABLE_STAT2 */
  
-    /* Adjust the number of rows and the cost downward to reflect rows
+    /* Adjust the number of output rows and downward to reflect rows
      ** that are excluded by range constraints.
      */
      nRow = (nRow * (double)estBound) / (double)100;
-    cost = (cost * (double)estBound) / (double)100;
-
-    /* Add in the estimated cost of sorting the result
+    if( nRow<1 ) nRow = 1;
+
+    /* Experiments run on real SQLite databases show that the time needed
+    ** to do a binary search to locate a row in a table or index is roughly
+    ** log10(N) times the time to move from one row to the next row within
+    ** a table or index.  The actual times can vary, with the size of
+    ** records being an important factor.  Both moves and searches are
+    ** slower with larger records, presumably because fewer records fit
+    ** on one page and hence more pages have to be fetched.
+    **
+    ** The ANALYZE command and the sqlite_stat1 and sqlite_stat2 tables do
+    ** not give us data on the relative sizes of table and index records.
+    ** So this computation assumes table records are about twice as big
+    ** as index records
      */
-    if( bSort ){
-      cost += cost*estLog(cost);
+    if( (wsFlags & WHERE_NOT_FULLSCAN)==0 ){
+      /* The cost of a full table scan is a number of move operations equal
+      ** to the number of rows in the table.
+      **
+      ** We add an additional 4x penalty to full table scans.  This causes
+      ** the cost function to err on the side of choosing an index over
+      ** choosing a full scan.  This 4x full-scan penalty is an arguable
+      ** decision and one which we expect to revisit in the future.  But
+      ** it seems to be working well enough at the moment.
+      */
+      cost = aiRowEst[0]*4;
+    }else{
+      log10N = estLog(aiRowEst[0]);
+      cost = nRow;
+      if( pIdx ){
+        if( bLookup ){
+          /* For an index lookup followed by a table lookup:
+          **    nInMul index searches to find the start of each index range
+          **  + nRow steps through the index
+          **  + nRow table searches to lookup the table entry using the rowid
+          */
+          cost += (nInMul + nRow)*log10N;
+        }else{
+          /* For a covering index:
+          **     nInMul index searches to find the initial entry 
+          **   + nRow steps through the index
+          */
+          cost += nInMul*log10N;
+        }
+      }else{
+        /* For a rowid primary key lookup:
+        **    nInMult table searches to find the initial entry for each range
+        **  + nRow steps through the table
+        */
+        cost += nInMul*log10N;
+      }
      }
  
-    /* If all information can be taken directly from the index, we avoid
-    ** doing table lookups.  This reduces the cost by half.  (Not really -
-    ** this needs to be fixed.)
+    /* Add in the estimated cost of sorting the result.  Actual experimental
+    ** measurements of sorting performance in SQLite show that sorting time
+    ** adds C*N*log10(N) to the cost, where N is the number of rows to be 
+    ** sorted and C is a factor between 1.95 and 4.3.  We will split the
+    ** difference and select C of 3.0.
      */
-    if( pIdx && bLookup==0 ){
-      cost /= (double)2;
+    if( bSort ){
+      cost += nRow*estLog(nRow)*3;
      }
+
      /**** Cost of using this index has now been computed ****/
  
      /* If there are additional constraints on this table that cannot
@@ -2741,16 +3020,16 @@ static void bestBtreeIndex(
      ** with this step if we already know this index will not be chosen.
      ** Also, never reduce the output row count below 2 using this step.
      **
-    ** Do not reduce the output row count if pSrc is the only table that
-    ** is notReady; if notReady is a power of two.  This will be the case
-    ** when the main sqlite3WhereBegin() loop is scanning for a table with
-    ** and "optimal" index, and on such a scan the output row count
-    ** reduction is not valid because it does not update the "pCost->used"
-    ** bitmap.  The notReady bitmap will also be a power of two when we
-    ** are scanning for the last table in a 64-way join.  We are willing
-    ** to bypass this optimization in that corner case.
+    ** It is critical that the notValid mask be used here instead of
+    ** the notReady mask.  When computing an "optimal" index, the notReady
+    ** mask will only have one bit set - the bit for the current table.
+    ** The notValid mask, on the other hand, always has all bits set for
+    ** tables that are not in outer loops.  If notReady is used here instead
+    ** of notValid, then a optimal index that depends on inner joins loops
+    ** might be selected even when there exists an optimal index that has
+    ** no such dependency.
      */
-    if( nRow>2 && cost<=pCost->rCost && (notReady & (notReady-1))!=0 ){
+    if( nRow>2 && cost<=pCost->rCost ){
        int k;                       /* Loop counter */
        int nSkipEq = nEq;           /* Number of == constraints to skip */
        int nSkipRange = nBound;     /* Number of < constraints to skip */
@@ -2759,7 +3038,7 @@ static void bestBtreeIndex(
        thisTab = getMask(pWC->pMaskSet, iCur);
        for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){
          if( pTerm->wtFlags & TERM_VIRTUAL ) continue;
-        if( (pTerm->prereqAll & notReady)!=thisTab ) continue;
+        if( (pTerm->prereqAll & notValid)!=thisTab ) continue;
          if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){
            if( nSkipEq ){
              /* Ignore the first nEq equality matches since the index
@@ -2772,15 +3051,19 @@ static void bestBtreeIndex(
            }
          }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){
            if( nSkipRange ){
-            /* Ignore the first nBound range constraints since the index
+            /* Ignore the first nSkipRange range constraints since the index
              ** has already accounted for these */
              nSkipRange--;
            }else{
              /* Assume each additional range constraint reduces the result
-            ** set size by a factor of 3 */
+            ** set size by a factor of 3.  Indexed range constraints reduce
+            ** the search space by a larger factor: 4.  We make indexed range
+            ** more selective intentionally because of the subjective 
+            ** observation that indexed range constraints really are more
+            ** selective in practice, on average. */
              nRow /= 3;
            }
-        }else{
+        }else if( pTerm->eOperator!=WO_NOOP ){
            /* Any other expression lowers the output row count by half */
            nRow /= 2;
          }
@@ -2791,21 +3074,21 @@ static void bestBtreeIndex(
  
      WHERETRACE((
        "%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n"
-      "         notReady=0x%llx nRow=%.2f cost=%.2f used=0x%llx\n",
+      "         notReady=0x%llx log10N=%.1f nRow=%.1f cost=%.1f used=0x%llx\n",
        pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), 
        nEq, nInMul, estBound, bSort, bLookup, wsFlags,
-      notReady, nRow, cost, used
+      notReady, log10N, nRow, cost, used
      ));
  
      /* If this index is the best we have seen so far, then record this
      ** index and its cost in the pCost structure.
      */
      if( (!pIdx || wsFlags)
-     && (cost<pCost->rCost || (cost<=pCost->rCost && nRow<pCost->nRow))
+     && (cost<pCost->rCost || (cost<=pCost->rCost && nRow<pCost->plan.nRow))
      ){
        pCost->rCost = cost;
-      pCost->nRow = nRow;
        pCost->used = used;
+      pCost->plan.nRow = nRow;
        pCost->plan.wsFlags = (wsFlags&wsFlagMask);
        pCost->plan.nEq = nEq;
        pCost->plan.u.pIdx = pIdx;
@@ -2841,7 +3124,7 @@ static void bestBtreeIndex(
           pCost->plan.u.pIdx ? pCost->plan.u.pIdx->zName : "ipk")
    ));
    
-  bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost);
+  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
    bestAutomaticIndex(pParse, pWC, pSrc, notReady, pCost);
    pCost->plan.wsFlags |= eqTermMask;
  }
@@ -2856,14 +3139,15 @@ static void bestIndex(
    Parse *pParse,              /* The parsing context */
    WhereClause *pWC,           /* The WHERE clause */
    struct SrcList_item *pSrc,  /* The FROM clause term to search */
-  Bitmask notReady,           /* Mask of cursors that are not available */
+  Bitmask notReady,           /* Mask of cursors not available for indexing */
+  Bitmask notValid,           /* Cursors not available for any purpose */
    ExprList *pOrderBy,         /* The ORDER BY clause */
    WhereCost *pCost            /* Lowest cost query plan */
  ){
  #ifndef SQLITE_OMIT_VIRTUALTABLE
    if( IsVirtual(pSrc->pTab) ){
      sqlite3_index_info *p = 0;
-    bestVirtualIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost, &p);
+    bestVirtualIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost,&p);
      if( p->needToFreeIdxStr ){
        sqlite3_free(p->idxStr);
      }
@@ -2871,7 +3155,7 @@ static void bestIndex(
    }else
  #endif
    {
-    bestBtreeIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost);
+    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
    }
  }
  
@@ -3133,6 +3417,161 @@ static int codeAllEqualityTerms(
    return regBase;
  }
  
+#ifndef SQLITE_OMIT_EXPLAIN
+/*
+** This routine is a helper for explainIndexRange() below
+**
+** pStr holds the text of an expression that we are building up one term
+** at a time.  This routine adds a new term to the end of the expression.
+** Terms are separated by AND so add the "AND" text for second and subsequent
+** terms only.
+*/
+static void explainAppendTerm(
+  StrAccum *pStr,             /* The text expression being built */
+  int iTerm,                  /* Index of this term.  First is zero */
+  const char *zColumn,        /* Name of the column */
+  const char *zOp             /* Name of the operator */
+){
+  if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5);
+  sqlite3StrAccumAppend(pStr, zColumn, -1);
+  sqlite3StrAccumAppend(pStr, zOp, 1);
+  sqlite3StrAccumAppend(pStr, "?", 1);
+}
+
+/*
+** Argument pLevel describes a strategy for scanning table pTab. This 
+** function returns a pointer to a string buffer containing a description
+** of the subset of table rows scanned by the strategy in the form of an
+** SQL expression. Or, if all rows are scanned, NULL is returned.
+**
+** For example, if the query:
+**
+**   SELECT * FROM t1 WHERE a=1 AND b>2;
+**
+** is run and there is an index on (a, b), then this function returns a
+** string similar to:
+**
+**   "a=? AND b>?"
+**
+** The returned pointer points to memory obtained from sqlite3DbMalloc().
+** It is the responsibility of the caller to free the buffer when it is
+** no longer required.
+*/
+static char *explainIndexRange(sqlite3 *db, WhereLevel *pLevel, Table *pTab){
+  WherePlan *pPlan = &pLevel->plan;
+  Index *pIndex = pPlan->u.pIdx;
+  int nEq = pPlan->nEq;
+  int i, j;
+  Column *aCol = pTab->aCol;
+  int *aiColumn = pIndex->aiColumn;
+  StrAccum txt;
+
+  if( nEq==0 && (pPlan->wsFlags & (WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ){
+    return 0;
+  }
+  sqlite3StrAccumInit(&txt, 0, 0, SQLITE_MAX_LENGTH);
+  txt.db = db;
+  sqlite3StrAccumAppend(&txt, " (", 2);
+  for(i=0; i<nEq; i++){
+    explainAppendTerm(&txt, i, aCol[aiColumn[i]].zName, "=");
+  }
+
+  j = i;
+  if( pPlan->wsFlags&WHERE_BTM_LIMIT ){
+    explainAppendTerm(&txt, i++, aCol[aiColumn[j]].zName, ">");
+  }
+  if( pPlan->wsFlags&WHERE_TOP_LIMIT ){
+    explainAppendTerm(&txt, i, aCol[aiColumn[j]].zName, "<");
+  }
+  sqlite3StrAccumAppend(&txt, ")", 1);
+  return sqlite3StrAccumFinish(&txt);
+}
+
+/*
+** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN
+** command. If the query being compiled is an EXPLAIN QUERY PLAN, a single
+** record is added to the output to describe the table scan strategy in 
+** pLevel.
+*/
+static void explainOneScan(
+  Parse *pParse,                  /* Parse context */
+  SrcList *pTabList,              /* Table list this loop refers to */
+  WhereLevel *pLevel,             /* Scan to write OP_Explain opcode for */
+  int iLevel,                     /* Value for "level" column of output */
+  int iFrom,                      /* Value for "from" column of output */
+  u16 wctrlFlags                  /* Flags passed to sqlite3WhereBegin() */
+){
+  if( pParse->explain==2 ){
+    u32 flags = pLevel->plan.wsFlags;
+    struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom];
+    Vdbe *v = pParse->pVdbe;      /* VM being constructed */
+    sqlite3 *db = pParse->db;     /* Database handle */
+    char *zMsg;                   /* Text to add to EQP output */
+    sqlite3_int64 nRow;           /* Expected number of rows visited by scan */
+    int iId = pParse->iSelectId;  /* Select id (left-most output column) */
+    int isSearch;                 /* True for a SEARCH. False for SCAN. */
+
+    if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return;
+
+    isSearch = (pLevel->plan.nEq>0)
+             || (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0
+             || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX));
+
+    zMsg = sqlite3MPrintf(db, "%s", isSearch?"SEARCH":"SCAN");
+    if( pItem->pSelect ){
+      zMsg = sqlite3MAppendf(db, zMsg, "%s SUBQUERY %d", zMsg,pItem->iSelectId);
+    }else{
+      zMsg = sqlite3MAppendf(db, zMsg, "%s TABLE %s", zMsg, pItem->zName);
+    }
+
+    if( pItem->zAlias ){
+      zMsg = sqlite3MAppendf(db, zMsg, "%s AS %s", zMsg, pItem->zAlias);
+    }
+    if( (flags & WHERE_INDEXED)!=0 ){
+      char *zWhere = explainIndexRange(db, pLevel, pItem->pTab);
+      zMsg = sqlite3MAppendf(db, zMsg, "%s USING %s%sINDEX%s%s%s", zMsg, 
+          ((flags & WHERE_TEMP_INDEX)?"AUTOMATIC ":""),
+          ((flags & WHERE_IDX_ONLY)?"COVERING ":""),
+          ((flags & WHERE_TEMP_INDEX)?"":" "),
+          ((flags & WHERE_TEMP_INDEX)?"": pLevel->plan.u.pIdx->zName),
+          zWhere
+      );
+      sqlite3DbFree(db, zWhere);
+    }else if( flags & (WHERE_ROWID_EQ|WHERE_ROWID_RANGE) ){
+      zMsg = sqlite3MAppendf(db, zMsg, "%s USING INTEGER PRIMARY KEY", zMsg);
+
+      if( flags&WHERE_ROWID_EQ ){
+        zMsg = sqlite3MAppendf(db, zMsg, "%s (rowid=?)", zMsg);
+      }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){
+        zMsg = sqlite3MAppendf(db, zMsg, "%s (rowid>? AND rowid<?)", zMsg);
+      }else if( flags&WHERE_BTM_LIMIT ){
+        zMsg = sqlite3MAppendf(db, zMsg, "%s (rowid>?)", zMsg);
+      }else if( flags&WHERE_TOP_LIMIT ){
+        zMsg = sqlite3MAppendf(db, zMsg, "%s (rowid<?)", zMsg);
+      }
+    }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    else if( (flags & WHERE_VIRTUALTABLE)!=0 ){
+      sqlite3_index_info *pVtabIdx = pLevel->plan.u.pVtabIdx;
+      zMsg = sqlite3MAppendf(db, zMsg, "%s VIRTUAL TABLE INDEX %d:%s", zMsg,
+                  pVtabIdx->idxNum, pVtabIdx->idxStr);
+    }
+#endif
+    if( wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX) ){
+      testcase( wctrlFlags & WHERE_ORDERBY_MIN );
+      nRow = 1;
+    }else{
+      nRow = (sqlite3_int64)pLevel->plan.nRow;
+    }
+    zMsg = sqlite3MAppendf(db, zMsg, "%s (~%lld rows)", zMsg, nRow);
+    sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg, P4_DYNAMIC);
+  }
+}
+#else
+# define explainOneScan(u,v,w,x,y,z)
+#endif /* SQLITE_OMIT_EXPLAIN */
+
+
  /*
  ** Generate code for the start of the iLevel-th loop in the WHERE clause
  ** implementation described by pWInfo.
@@ -3462,7 +3901,9 @@ static Bitmask codeOneLoopStart(
      if( pRangeStart ){
        Expr *pRight = pRangeStart->pExpr->pRight;
        sqlite3ExprCode(pParse, pRight, regBase+nEq);
-      sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      if( (pRangeStart->wtFlags & TERM_VNULL)==0 ){
+        sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      }
        if( zStartAff ){
          if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){
            /* Since the comparison is to be performed with no conversions
@@ -3501,7 +3942,9 @@ static Bitmask codeOneLoopStart(
        Expr *pRight = pRangeEnd->pExpr->pRight;
        sqlite3ExprCacheRemove(pParse, regBase+nEq, 1);
        sqlite3ExprCode(pParse, pRight, regBase+nEq);
-      sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      if( (pRangeEnd->wtFlags & TERM_VNULL)==0 ){
+        sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      }
        if( zEndAff ){
          if( sqlite3CompareAffinity(pRight, zEndAff[nEq])==SQLITE_AFF_NONE){
            /* Since the comparison is to be performed with no conversions
@@ -3540,7 +3983,7 @@ static Bitmask codeOneLoopStart(
      r1 = sqlite3GetTempReg(pParse);
      testcase( pLevel->plan.wsFlags & WHERE_BTM_LIMIT );
      testcase( pLevel->plan.wsFlags & WHERE_TOP_LIMIT );
-    if( pLevel->plan.wsFlags & (WHERE_BTM_LIMIT|WHERE_TOP_LIMIT) ){
+    if( (pLevel->plan.wsFlags & (WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 ){
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, nEq, r1);
        sqlite3VdbeAddOp2(v, OP_IsNull, r1, addrCont);
      }
@@ -3674,6 +4117,9 @@ static Bitmask codeOneLoopStart(
                          WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE |
                          WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY);
          if( pSubWInfo ){
+          explainOneScan(
+              pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0
+          );
            if( (wctrlFlags & WHERE_DUPLICATES_OK)==0 ){
              int iSet = ((ii==pOrWc->nTerm-1)?-1:ii);
              int r;
@@ -4069,6 +4515,7 @@ WhereInfo *sqlite3WhereBegin(
  
      memset(&bestPlan, 0, sizeof(bestPlan));
      bestPlan.rCost = SQLITE_BIG_DBL;
+    WHERETRACE(("*** Begin search for loop %d ***\n", i));
  
      /* Loop through the remaining entries in the FROM clause to find the
      ** next nested loop. The loop tests all FROM clause entries
@@ -4087,9 +4534,15 @@ WhereInfo *sqlite3WhereBegin(
      ** other FROM clause terms that are notReady.  If no notReady terms are
      ** used then the "optimal" query plan works.
      **
+    ** Note that the WhereCost.nRow parameter for an optimal scan might
+    ** not be as small as it would be if the table really were the innermost
+    ** join.  The nRow value can be reduced by WHERE clause constraints
+    ** that do not use indices.  But this nRow reduction only happens if the
+    ** table really is the innermost join.  
+    **
      ** The second loop iteration is only performed if no optimal scan
-    ** strategies were found by the first loop. This 2nd iteration is used to
-    ** search for the lowest cost scan overall.
+    ** strategies were found by the first iteration. This second iteration
+    ** is used to search for the lowest cost scan overall.
      **
      ** Previous versions of SQLite performed only the second iteration -
      ** the next outermost loop was always that with the lowest overall
@@ -4102,14 +4555,14 @@ WhereInfo *sqlite3WhereBegin(
      **
      ** The best strategy is to iterate through table t1 first. However it
      ** is not possible to determine this with a simple greedy algorithm.
-    ** However, since the cost of a linear scan through table t2 is the same 
+    ** Since the cost of a linear scan through table t2 is the same 
      ** as the cost of a linear scan through table t1, a simple greedy 
      ** algorithm may choose to use t2 for the outer loop, which is a much
      ** costlier approach.
      */
      nUnconstrained = 0;
      notIndexed = 0;
-    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0; isOptimal--){
+    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0 && bestJ<0; isOptimal--){
        Bitmask mask;             /* Mask of tables not yet ready */
        for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
          int doNotReorder;    /* True if this table should not be reordered */
@@ -4127,15 +4580,19 @@ WhereInfo *sqlite3WhereBegin(
          pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
          if( pTabItem->pIndex==0 ) nUnconstrained++;
    
+        WHERETRACE(("=== trying table %d with isOptimal=%d ===\n",
+                    j, isOptimal));
          assert( pTabItem->pTab );
  #ifndef SQLITE_OMIT_VIRTUALTABLE
          if( IsVirtual(pTabItem->pTab) ){
            sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
-          bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
+          bestVirtualIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
+                           &sCost, pp);
          }else 
  #endif
          {
-          bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
+          bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
+                         &sCost);
          }
          assert( isOptimal || (sCost.used&notReady)==0 );
  
@@ -4175,10 +4632,12 @@ WhereInfo *sqlite3WhereBegin(
              && (nUnconstrained==0 || pTabItem->pIndex==0   /* (3) */
                  || NEVER((sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0))
              && (bestJ<0 || sCost.rCost<bestPlan.rCost      /* (4) */
-                || (sCost.rCost<=bestPlan.rCost && sCost.nRow<bestPlan.nRow))
+                || (sCost.rCost<=bestPlan.rCost 
+                 && sCost.plan.nRow<bestPlan.plan.nRow))
          ){
-          WHERETRACE(("... best so far with cost=%g and nRow=%g\n",
-                      sCost.rCost, sCost.nRow));
+          WHERETRACE(("=== table %d is best so far"
+                      " with cost=%g and nRow=%g\n",
+                      j, sCost.rCost, sCost.plan.nRow));
            bestPlan = sCost;
            bestJ = j;
          }
@@ -4187,8 +4646,9 @@ WhereInfo *sqlite3WhereBegin(
      }
      assert( bestJ>=0 );
      assert( notReady & getMask(pMaskSet, pTabList->a[bestJ].iCursor) );
-    WHERETRACE(("*** Optimizer selects table %d for loop %d\n", bestJ,
-           pLevel-pWInfo->a));
+    WHERETRACE(("*** Optimizer selects table %d for loop %d"
+                " with cost=%g and nRow=%g\n",
+                bestJ, pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow));
      if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){
        *ppOrderBy = 0;
      }
@@ -4203,7 +4663,9 @@ WhereInfo *sqlite3WhereBegin(
      }
      notReady &= ~getMask(pMaskSet, pTabList->a[bestJ].iCursor);
      pLevel->iFrom = (u8)bestJ;
-    if( bestPlan.nRow>=(double)1 ) pParse->nQueryLoop *= bestPlan.nRow;
+    if( bestPlan.plan.nRow>=(double)1 ){
+      pParse->nQueryLoop *= bestPlan.plan.nRow;
+    }
  
      /* Check that if the table scanned by this loop iteration had an
      ** INDEXED BY clause attached to it, that the named index is being
@@ -4251,44 +4713,15 @@ WhereInfo *sqlite3WhereBegin(
    */
    sqlite3CodeVerifySchema(pParse, -1); /* Insert the cookie verifier Goto */
    notReady = ~(Bitmask)0;
+  pWInfo->nRowOut = (double)1;
    for(i=0, pLevel=pWInfo->a; i<nTabList; i++, pLevel++){
      Table *pTab;     /* Table to open */
      int iDb;         /* Index of database containing table/index */
  
-#ifndef SQLITE_OMIT_EXPLAIN
-    if( pParse->explain==2 ){
-      char *zMsg;
-      struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom];
-      zMsg = sqlite3MPrintf(db, "TABLE %s", pItem->zName);
-      if( pItem->zAlias ){
-        zMsg = sqlite3MAppendf(db, zMsg, "%s AS %s", zMsg, pItem->zAlias);
-      }
-      if( (pLevel->plan.wsFlags & WHERE_TEMP_INDEX)!=0 ){
-        zMsg = sqlite3MAppendf(db, zMsg, "%s WITH AUTOMATIC INDEX", zMsg);
-      }else if( (pLevel->plan.wsFlags & WHERE_INDEXED)!=0 ){
-        zMsg = sqlite3MAppendf(db, zMsg, "%s WITH INDEX %s",
-           zMsg, pLevel->plan.u.pIdx->zName);
-      }else if( pLevel->plan.wsFlags & WHERE_MULTI_OR ){
-        zMsg = sqlite3MAppendf(db, zMsg, "%s VIA MULTI-INDEX UNION", zMsg);
-      }else if( pLevel->plan.wsFlags & (WHERE_ROWID_EQ|WHERE_ROWID_RANGE) ){
-        zMsg = sqlite3MAppendf(db, zMsg, "%s USING PRIMARY KEY", zMsg);
-      }
-#ifndef SQLITE_OMIT_VIRTUALTABLE
-      else if( (pLevel->plan.wsFlags & WHERE_VIRTUALTABLE)!=0 ){
-        sqlite3_index_info *pVtabIdx = pLevel->plan.u.pVtabIdx;
-        zMsg = sqlite3MAppendf(db, zMsg, "%s VIRTUAL TABLE INDEX %d:%s", zMsg,
-                    pVtabIdx->idxNum, pVtabIdx->idxStr);
-      }
-#endif
-      if( pLevel->plan.wsFlags & WHERE_ORDERBY ){
-        zMsg = sqlite3MAppendf(db, zMsg, "%s ORDER BY", zMsg);
-      }
-      sqlite3VdbeAddOp4(v, OP_Explain, i, pLevel->iFrom, 0, zMsg, P4_DYNAMIC);
-    }
-#endif /* SQLITE_OMIT_EXPLAIN */
      pTabItem = &pTabList->a[pLevel->iFrom];
      pTab = pTabItem->pTab;
      pLevel->iTabCur = pTabItem->iCursor;
+    pWInfo->nRowOut *= pLevel->plan.nRow;
      iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
      if( (pTab->tabFlags & TF_Ephemeral)!=0 || pTab->pSelect ){
        /* Do nothing */
@@ -4344,8 +4777,10 @@ WhereInfo *sqlite3WhereBegin(
    */
    notReady = ~(Bitmask)0;
    for(i=0; i<nTabList; i++){
+    pLevel = &pWInfo->a[i];
+    explainOneScan(pParse, pTabList, pLevel, i, pLevel->iFrom, wctrlFlags);
      notReady = codeOneLoopStart(pWInfo, i, wctrlFlags, notReady);
-    pWInfo->iContinue = pWInfo->a[i].addrCont;
+    pWInfo->iContinue = pLevel->addrCont;
    }
  
  #ifdef SQLITE_TEST  /* For testing and debugging use only */
diff --git a/test/autoindex1.test b/test/autoindex1.test

index 40e1936cd912ea2ad55a8bab11a28a5dd7e76063..5d946d9524a7eb309849e076bc2c27628a17c257 100644 (file)
--- a/test/autoindex1.test
+++ b/test/autoindex1.test
@@ -124,7 +124,7 @@ do_test autoindex1-401 {
    db eval {
      SELECT count(*)
        FROM t4 AS x1
-      JOIN t4 AS x2 ON x2.a=x1.b
+/*      JOIN t4 AS x2 ON x2.a=x1.b
        JOIN t4 AS x3 ON x3.a=x2.b
        JOIN t4 AS x4 ON x4.a=x3.b
        JOIN t4 AS x5 ON x5.a=x4.b
@@ -132,9 +132,9 @@ do_test autoindex1-401 {
        JOIN t4 AS x7 ON x7.a=x6.b
        JOIN t4 AS x8 ON x8.a=x7.b
        JOIN t4 AS x9 ON x9.a=x8.b
-      JOIN t4 AS x10 ON x10.a=x9.b;
+      JOIN t4 AS x10 ON x10.a=x9.b;*/
    }
-} {4087}
+} {4087 FIXME}
  
  # Ticket [8011086c85c6c404014c947fcf3eb9f42b184a0d] from 2010-07-08
  # Make sure automatic indices are not created for the RHS of an IN expression
author	drh <drh@noemail.net>
	Sat, 12 Feb 2011 01:59:22 +0000 (01:59 +0000)
committer	drh <drh@noemail.net>
	Sat, 12 Feb 2011 01:59:22 +0000 (01:59 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/analyze.c		patch \| blob \| blame \| history
src/sqliteInt.h		patch \| blob \| blame \| history
src/where.c		patch \| blob \| blame \| history
test/autoindex1.test		patch \| blob \| blame \| history