Optimisations for expressions of the form "<value> IN (SELECT <column> FROM <table...

author danielk1977 <danielk1977@noemail.net>

Thu, 29 Nov 2007 17:05:18 +0000 (17:05 +0000)

committer danielk1977 <danielk1977@noemail.net>

Thu, 29 Nov 2007 17:05:18 +0000 (17:05 +0000)
author danielk1977 <danielk1977@noemail.net>
Thu, 29 Nov 2007 17:05:18 +0000 (17:05 +0000)
committer danielk1977 <danielk1977@noemail.net>
Thu, 29 Nov 2007 17:05:18 +0000 (17:05 +0000)
diff --git a/manifest b/manifest

index e7e04da2cb96ba20000d22c0286f150aa0fdbed1..6b48852469934207aadf237e66902e3cbfdb31f2 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\sthe\s{quote:\sStrAccum}\sobject\r\n\sfor\saccumulating\sstrings.\s\sRevamp\sxprintf\sto\suse\r\nthe\snew\sobject.\s\sRewrite\sthe\sgroup_concat()\sfunction\sto\suse\sthe\snew\sobject.\r\nProductize\sand\stest\sthe\sgroup_concat()\sfunction.\s(CVS\s4578)
-D 2007-11-28T22:36:41
+C Optimisations\sfor\sexpressions\sof\sthe\sform\s"<value>\sIN\s(SELECT\s<column>\sFROM\s<table>)".\s(CVS\s4579)
+D 2007-11-29T17:05:18
  F Makefile.arm-wince-mingw32ce-gcc ac5f7b2cef0cd850d6f755ba6ee4ab961b1fadf7
  F Makefile.in 35396fd58890420b29edcf27b6c0e2d054862a6b
  F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -92,7 +92,7 @@ F src/complete.c 4cf68fd75d60257524cbe74f87351b9848399131
  F src/date.c 49c5a6d2de6c12000905b4d36868b07d3011bbf6
  F src/delete.c 034b87768c4135a22038a86a205f9d2d5f68a143
  F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
-F src/expr.c 7977bb9680ebeeabfa3214d936778baaa26dcc0c
+F src/expr.c 8c32309dedd13b8ab220348eb90dc221ee0a5174
  F src/func.c 49f98cfe26b9ab507b96a34404295c4d89dfc620
  F src/hash.c 45a7005aac044b6c86bd7e49c44bc15d30006d6c
  F src/hash.h 031cd9f915aff27e12262cb9eb570ac1b8326b53
@@ -135,11 +135,11 @@ F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
  F src/shell.c c97be281cfc3dcb14902f45e4b16f20038eb83ff
  F src/sqlite.h.in 75ae0863db3a0b074868a6157e34b646dbe143dd
  F src/sqlite3ext.h a93f59cdee3638dc0c9c086f80df743a4e68c3cb
-F src/sqliteInt.h c724e97dcc7d9c3881ce6443f243afccfe6b10a8
+F src/sqliteInt.h f5d5b5f0dcfcd2098131c57c08ece6e314e47697
  F src/sqliteLimit.h 15ffe2116746c27ace2b428a26a4fcd6dba6fa65
  F src/table.c 1aeb9eab57b4235db86fe15a35dec76fb445a9c4
  F src/tclsqlite.c 9923abeffc9b3d7dad58e92b319661521f60debf
-F src/test1.c 0e6fe5449ef9e7289bcaf904e9e3a2ea20650b32
+F src/test1.c ba01f4b8bb742ec5869167bbc87491eaafb7da3f
  F src/test2.c 77b34303883b9d722c65a6879bb0163a400e3789
  F src/test3.c 73c1fd55d1ece61f295a6b9204fd97a139de86ae
  F src/test4.c c2c0f5dc907f1346f5d4b65eb5799f11eb9e4071
@@ -167,16 +167,16 @@ F src/update.c 2add92a6159fa73128653706574afbcd8fd1dd80
  F src/utf.c ef4b7d83bae533b76c3e1bf635b113fdad86a736
  F src/util.c 05f31144bbd3f1a24f4139ae029c42545cb72624
  F src/vacuum.c a5e51c77370c1a6445e86d42abfc43867cdd482d
-F src/vdbe.c 791d056da2c264c2cfed6e2150852926845875e5
+F src/vdbe.c 9859467b3762a1c05f956b5f556fb922e139ff9f
  F src/vdbe.h 79e09ff13b85457abe437d9814454534ebbc1fe3
  F src/vdbeInt.h 630145b9bfaa19190ab491f52658a7db550f2247
  F src/vdbeapi.c dd2c43317294e0a013e9f634ee4209a3ea459b43
-F src/vdbeaux.c ecda6d92276c61aa9d8d4466444a980b7163827b
+F src/vdbeaux.c 285f113364134f7c1571456723dcb84be89e6c3f
  F src/vdbeblob.c 82f51cdf9b0c0af729732fde48c824e498c0a1ca
  F src/vdbefifo.c 334c838c8f42d61a94813d136019ee566b5dc2f6
  F src/vdbemem.c 123994fcd344993d2fb050a83b91b341bbbd08b4
  F src/vtab.c f819d55ef638d45e09ce00009d435da8bf16f528
-F src/where.c 82e2cb76d7e15d79a5b19eaef23284c31868d4ee
+F src/where.c a03cd9da865f9ae9d750b01b45ad0bd82bd456f1
  F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617
  F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
  F test/all.test ee350b9ab15b175fc0a8fb51bf2141ed3a3b9cba
@@ -321,6 +321,7 @@ F test/hook.test 3870abead2ee75d2c03585c916256ca8b5584679
  F test/icu.test e6bfae7f625c88fd14df6f540fe835bdfc1e4329
  F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
  F test/in2.test b1f447f4f0f67e9f83ff931e7e2e30873f9ea055
+F test/in3.test ec1b3a9f478835954a0ecab1facd9198c62459bf
  F test/incrblob.test 602dbfa956904d6e58c45635b58850ad0f02d927
  F test/incrblob_err.test 5273097dc7c97f9b7008423a6ffd5c80d21923cb
  F test/incrvacuum.test 985bc94a7ff544db09aec3a0c93c779280da9415
@@ -593,7 +594,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130
  F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
  F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
  F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
-P 98960132dc082da61652201f4bd2b559725350c0
-R 099384e3de985a880eea8f610db48bc7
-U drh
-Z 25fbe708392dfcc01a46aeda08531786
+P 221aee72be040769e8026b91648f03c6366a8821
+R 17bc1550023a2d1b2bcf36adaf1ceda3
+U danielk1977
+Z 90a3ecb3d9566cd903183cf31905697c
diff --git a/manifest.uuid b/manifest.uuid

index 93f868d506129a5907d054e3e17d04c0ed279db5..9c13383ea243b738d379437c520a710dc46b21eb 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-221aee72be040769e8026b91648f03c6366a8821
-\ No newline at end of file
+56d0e32677744df8570b519fae1c04da4ea4984d
+\ No newline at end of file
diff --git a/src/expr.c b/src/expr.c

index a2ba3f8ba63623ecf42430621bfe7a58ddb80052..39441f4c319a9240ff8373516b1489e98bd366eb 100644 (file)
--- a/src/expr.c
+++ b/src/expr.c
@@ -12,7 +12,7 @@
  ** This file contains routines used for analyzing expressions and
  ** for generating VDBE code that evaluates expressions in SQLite.
  **
-** $Id: expr.c,v 1.316 2007/11/12 09:50:26 danielk1977 Exp $
+** $Id: expr.c,v 1.317 2007/11/29 17:05:18 danielk1977 Exp $
  */
  #include "sqliteInt.h"
  #include <ctype.h>
@@ -1522,6 +1522,143 @@ struct QueryCoder {
    NameContext *pNC;    /* Namespace of first enclosing query */
  };
  
+#ifdef SQLITE_TEST
+  int sqlite3_enable_in_opt = 1;
+#else
+  #define sqlite3_enable_in_opt 1
+#endif
+
+/*
+** This function is used by the implementation of the IN (...) operator.
+** It's job is to find or create a b-tree structure that may be used
+** either to test for membership of the (...) set or to iterate through
+** it's members, skipping duplicates.
+**
+** The cursor opened on the structure (database table, database index 
+** or ephermal table) is stored in pX->iTable before this function returns.
+** The returned value indicates the structure type, as follows:
+**
+**   IN_INDEX_ROWID - The cursor was opened on a database table.
+**   IN_INDEX_INDEX - The cursor was opened on a database indec.
+**   IN_INDEX_EPH -   The cursor was opened on a specially created and
+**                    populated epheremal table.
+**
+** An existing structure may only be used if the SELECT is of the simple
+** form:
+**
+**     SELECT <column> FROM <table>
+**
+** If the mustBeUnique parameter is false, the structure will be used 
+** for fast set membership tests. In this case an epheremal table must 
+** be used unless <column> is an INTEGER PRIMARY KEY or an index can 
+** be found with <column> as it's left-most column.
+**
+** If mustBeUnique is true, then the structure will be used to iterate
+** through the set members, skipping any duplicates. In this case an
+** epheremal table must be used unless the selected <column> is guaranteed
+** to be unique - either because it is an INTEGER PRIMARY KEY or it
+** is unique by virtue of a constraint or implicit index.
+*/
+int sqlite3FindInIndex(Parse *pParse, Expr *pX, int mustBeUnique){
+  Select *p;
+  int eType = 0;
+  int iTab = pParse->nTab++;
+
+  /* The follwing if(...) expression is true if the SELECT is of the 
+  ** simple form:
+  **
+  **     SELECT <column> FROM <table>
+  **
+  ** If this is the case, it may be possible to use an existing table
+  ** or index instead of generating an epheremal table.
+  */
+  if( sqlite3_enable_in_opt
+   && (p=pX->pSelect) && !p->pPrior
+   && !p->isDistinct && !p->isAgg && !p->pGroupBy
+   && p->pSrc && p->pSrc->nSrc==1 && !p->pSrc->a[0].pSelect
+   && !p->pSrc->a[0].pTab->pSelect                                  
+   && p->pEList->nExpr==1 && p->pEList->a[0].pExpr->op==TK_COLUMN
+   && !p->pLimit && !p->pOffset && !p->pWhere
+  ){
+    sqlite3 *db = pParse->db;
+    Index *pIdx;
+    Expr *pExpr = p->pEList->a[0].pExpr;
+    int iCol = pExpr->iColumn;
+    Vdbe *v = sqlite3GetVdbe(pParse);
+
+    /* This function is only called from two places. In both cases the vdbe
+    ** has already been allocated. So assume sqlite3GetVdbe() is always
+    ** successful here.
+    */
+    assert(v);
+    if( iCol<0 ){
+      int iMem = pParse->nMem++;
+      int iAddr;
+      Table *pTab = p->pSrc->a[0].pTab;
+      int iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+      sqlite3VdbeUsesBtree(v, iDb);
+
+      sqlite3VdbeAddOp(v, OP_MemLoad, iMem, 0);
+      iAddr = sqlite3VdbeAddOp(v, OP_If, 0, iMem);
+      sqlite3VdbeAddOp(v, OP_MemInt, 1, iMem);
+
+      sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
+      eType = IN_INDEX_ROWID;
+
+      sqlite3VdbeJumpHere(v, iAddr);
+    }else{
+      /* The collation sequence used by the comparison. If an index is to 
+      ** be used in place of a temp-table, it must be ordered according
+      ** to this collation sequence.
+      */
+      CollSeq *pReq = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pExpr);
+
+      /* Check that the affinity that will be used to perform the 
+      ** comparison is the same as the affinity of the column. If
+      ** it is not, it is not possible to use any index.
+      */
+      Table *pTab = p->pSrc->a[0].pTab;
+      char aff = comparisonAffinity(pX);
+      int affinity_ok = (pTab->aCol[iCol].affinity==aff||aff==SQLITE_AFF_NONE);
+
+      for(pIdx=pTab->pIndex; pIdx && eType==0 && affinity_ok; pIdx=pIdx->pNext){
+        if( (pIdx->aiColumn[0]==iCol)
+         && (pReq==sqlite3FindCollSeq(db, ENC(db), pIdx->azColl[0], -1, 0))
+         && (!mustBeUnique || (pIdx->nColumn==1 && pIdx->onError!=OE_None))
+        ){
+          int iDb;
+          int iMem = pParse->nMem++;
+          int iAddr;
+          char *pKey;
+  
+          pKey = (char *)sqlite3IndexKeyinfo(pParse, pIdx);
+          iDb = sqlite3SchemaToIndex(db, pIdx->pSchema);
+          sqlite3VdbeUsesBtree(v, iDb);
+
+          sqlite3VdbeAddOp(v, OP_MemLoad, iMem, 0);
+          iAddr = sqlite3VdbeAddOp(v, OP_If, 0, iMem);
+          sqlite3VdbeAddOp(v, OP_MemInt, 1, iMem);
+  
+          sqlite3VdbeAddOp(v, OP_Integer, iDb, 0);
+          VdbeComment((v, "# %s", pIdx->zName));
+          sqlite3VdbeOp3(v,OP_OpenRead,iTab,pIdx->tnum,pKey,P3_KEYINFO_HANDOFF);
+          eType = IN_INDEX_INDEX;
+          sqlite3VdbeAddOp(v, OP_SetNumColumns, iTab, pIdx->nColumn);
+
+          sqlite3VdbeJumpHere(v, iAddr);
+        }
+      }
+    }
+  }
+
+  if( eType==0 ){
+    sqlite3CodeSubselect(pParse, pX);
+    eType = IN_INDEX_EPH;
+  }else{
+    pX->iTable = iTab;
+  }
+  return eType;
+}
  
  /*
  ** Generate code for scalar subqueries used as an expression
@@ -2036,7 +2173,10 @@ void sqlite3ExprCode(Parse *pParse, Expr *pExpr){
        int addr;
        char affinity;
        int ckOffset = pParse->ckOffset;
-      sqlite3CodeSubselect(pParse, pExpr);
+      int eType;
+      int iLabel = sqlite3VdbeMakeLabel(v);
+
+      eType = sqlite3FindInIndex(pParse, pExpr, 0);
  
        /* Figure out the affinity to use to create a key from the results
        ** of the expression. affinityStr stores a static string suitable for
@@ -2055,10 +2195,18 @@ void sqlite3ExprCode(Parse *pParse, Expr *pExpr){
        sqlite3VdbeAddOp(v, OP_NotNull, -1, addr+4);            /* addr + 0 */
        sqlite3VdbeAddOp(v, OP_Pop, 2, 0);
        sqlite3VdbeAddOp(v, OP_Null, 0, 0);
-      sqlite3VdbeAddOp(v, OP_Goto, 0, addr+7);
-      sqlite3VdbeOp3(v, OP_MakeRecord, 1, 0, &affinity, 1);   /* addr + 4 */
-      sqlite3VdbeAddOp(v, OP_Found, pExpr->iTable, addr+7);
+      sqlite3VdbeAddOp(v, OP_Goto, 0, iLabel);
+      if( eType==IN_INDEX_ROWID ){
+        int iAddr = sqlite3VdbeCurrentAddr(v)+3;
+        sqlite3VdbeAddOp(v, OP_MustBeInt, 1, iAddr);
+        sqlite3VdbeAddOp(v, OP_NotExists, pExpr->iTable, iAddr);
+        sqlite3VdbeAddOp(v, OP_Goto, pExpr->iTable, iLabel);
+      }else{
+        sqlite3VdbeOp3(v, OP_MakeRecord, 1, 0, &affinity, 1);   /* addr + 4 */
+        sqlite3VdbeAddOp(v, OP_Found, pExpr->iTable, iLabel);
+      }
        sqlite3VdbeAddOp(v, OP_AddImm, -1, 0);                  /* addr + 6 */
+      sqlite3VdbeResolveLabel(v, iLabel);
  
        break;
      }
diff --git a/src/sqliteInt.h b/src/sqliteInt.h

index f1142493e85274a07450f689141ecda431a3decb..3dadec3ce15de7335dd1fe26a0b98c57aa96834f 100644 (file)
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -11,7 +11,7 @@
  *************************************************************************
  ** Internal interface definitions for SQLite.
  **
-** @(#) $Id: sqliteInt.h,v 1.621 2007/11/28 22:36:41 drh Exp $
+** @(#) $Id: sqliteInt.h,v 1.622 2007/11/29 17:05:18 danielk1977 Exp $
  */
  #ifndef _SQLITEINT_H_
  #define _SQLITEINT_H_
@@ -884,6 +884,7 @@ struct KeyInfo {
    sqlite3 *db;        /* The database connection */
    u8 enc;             /* Text encoding - one of the TEXT_Utf* values */
    u8 incrKey;         /* Increase 2nd key by epsilon before comparison */
+  u8 prefixIsEqual;   /* Treat a prefix as equal */
    int nField;         /* Number of entries in aColl[] */
    u8 *aSortOrder;     /* If defined an aSortOrder[i] is true, sort DESC */
    CollSeq *aColl[1];  /* Collating sequence for each term of the key */
@@ -1971,6 +1972,11 @@ int sqlite3Reprepare(Vdbe*);
  void sqlite3ExprListCheckLength(Parse*, ExprList*, int, const char*);
  CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
  
+#define IN_INDEX_ROWID           1
+#define IN_INDEX_EPH             2
+#define IN_INDEX_INDEX           3
+int sqlite3FindInIndex(Parse *, Expr *, int);
+
  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
    int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int);
    int sqlite3JournalSize(sqlite3_vfs *);
diff --git a/src/test1.c b/src/test1.c

index f7abaf9a5539b617de0065ff23220874c890ca72..9adac2914027caaf9b88afd8e203c52daa5729a4 100644 (file)
--- a/src/test1.c
+++ b/src/test1.c
@@ -13,7 +13,7 @@
  ** is not included in the SQLite library.  It is used for automated
  ** testing of the SQLite library.
  **
-** $Id: test1.c,v 1.279 2007/10/23 15:39:45 drh Exp $
+** $Id: test1.c,v 1.280 2007/11/29 17:05:18 danielk1977 Exp $
  */
  #include "sqliteInt.h"
  #include "tcl.h"
@@ -4433,6 +4433,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
    extern int sqlite3_vdbe_addop_trace;
  #endif
  #ifdef SQLITE_TEST
+  extern int sqlite3_enable_in_opt;
    extern char sqlite3_query_plan[];
    static char *query_plan = sqlite3_query_plan;
  #endif
@@ -4512,5 +4513,9 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
        (char*)&sqlite3_sync_count, TCL_LINK_INT);
    Tcl_LinkVar(interp, "sqlite_fullsync_count",
        (char*)&sqlite3_fullsync_count, TCL_LINK_INT);
+#ifdef SQLITE_TEST
+  Tcl_LinkVar(interp, "sqlite_enable_in_opt",
+      (char*)&sqlite3_enable_in_opt, TCL_LINK_INT);
+#endif
    return TCL_OK;
  }
diff --git a/src/vdbe.c b/src/vdbe.c

index 0c5efd9cf3c74fbce0a3aef6a5bf8ac8240c4d6b..c79e6efd530a9cbd6c802a813be92a4e15f2ef3a 100644 (file)
--- a/src/vdbe.c
+++ b/src/vdbe.c
@@ -43,7 +43,7 @@
  ** in this file for details.  If in doubt, do not deviate from existing
  ** commenting and indentation practices when changing or adding code.
  **
-** $Id: vdbe.c,v 1.654 2007/11/12 08:09:35 danielk1977 Exp $
+** $Id: vdbe.c,v 1.655 2007/11/29 17:05:18 danielk1977 Exp $
  */
  #include "sqliteInt.h"
  #include <ctype.h>
@@ -1436,7 +1436,7 @@ case OP_ForceInt: {            /* no-push */
  ** 
  ** Force the top of the stack to be an integer.  If the top of the
  ** stack is not an integer and cannot be converted into an integer
-** with out data loss, then jump immediately to P2, or if P2==0
+** without data loss, then jump immediately to P2, or if P2==0
  ** raise an SQLITE_MISMATCH exception.
  **
  ** If the top of the stack is not an integer and P2 is not zero and
@@ -3056,11 +3056,14 @@ case OP_MoveGt: {       /* no-push */
  ** if it exists.  The blob is popped off the top of the stack.
  **
  ** This instruction is used to implement the IN operator where the
-** left-hand side is a SELECT statement.  P1 is not a true index but
-** is instead a temporary index that holds the results of the SELECT
-** statement.  This instruction just checks to see if the left-hand side
-** of the IN operator (stored on the top of the stack) exists in the
-** result of the SELECT statement.
+** left-hand side is a SELECT statement.  P1 may be a true index, or it
+** may be a temporary index that holds the results of the SELECT
+** statement. 
+**
+** This instruction checks if index P1 contains a record for which 
+** the first N serialised values exactly match the N serialised values
+** in the record on the stack, where N is the total number of values in
+** the stack record (stack record is a prefix of the P1 record). 
  **
  ** See also: Distinct, NotFound, MoveTo, IsUnique, NotExists
  */
@@ -3090,7 +3093,11 @@ case OP_Found: {        /* no-push */
      assert( pC->isTable==0 );
      assert( pTos->flags & MEM_Blob );
      Stringify(pTos, encoding);
+    if( pOp->opcode==OP_Found ){
+      pC->pKeyInfo->prefixIsEqual = 1;
+    }
      rc = sqlite3BtreeMoveto(pC->pCursor, pTos->z, pTos->n, 0, &res);
+    pC->pKeyInfo->prefixIsEqual = 0;
      if( rc!=SQLITE_OK ){
        break;
      }
diff --git a/src/vdbeaux.c b/src/vdbeaux.c

index 080e8c2ef2d6628d740e978ca86847dff251cd21..3f651baa051c77e56b66317cdcc7ce776ae31c7e 100644 (file)
--- a/src/vdbeaux.c
+++ b/src/vdbeaux.c
@@ -2111,10 +2111,12 @@ int sqlite3VdbeRecordCompare(
    if( rc==0 ){
      if( pKeyInfo->incrKey ){
        rc = -1;
-    }else if( d1<nKey1 ){
-      rc = 1;
-    }else if( d2<nKey2 ){
-      rc = -1;
+    }else if( !pKeyInfo->prefixIsEqual ){
+      if( d1<nKey1 ){
+        rc = 1;
+      }else if( d2<nKey2 ){
+        rc = -1;
+      }
      }
    }else if( pKeyInfo->aSortOrder && i<pKeyInfo->nField
                 && pKeyInfo->aSortOrder[i] ){
diff --git a/src/where.c b/src/where.c

index dc918f60880f300e13e61a7d6b95c7bccf2a91c6..853af403ff4baf9bd53eea0d7352b2b64c0fedd7 100644 (file)
--- a/src/where.c
+++ b/src/where.c
@@ -16,7 +16,7 @@
  ** so is applicable.  Because this module is responsible for selecting
  ** indices, you might also think of this module as the "query optimizer".
  **
-** $Id: where.c,v 1.263 2007/11/26 13:36:00 drh Exp $
+** $Id: where.c,v 1.264 2007/11/29 17:05:18 danielk1977 Exp $
  */
  #include "sqliteInt.h"
  
@@ -1736,11 +1736,12 @@ static void codeEqualityTerm(
      sqlite3VdbeAddOp(v, OP_Null, 0, 0);
  #ifndef SQLITE_OMIT_SUBQUERY
    }else{
+    int eType;
      int iTab;
      struct InLoop *pIn;
  
      assert( pX->op==TK_IN );
-    sqlite3CodeSubselect(pParse, pX);
+    eType = sqlite3FindInIndex(pParse, pX, 1);
      iTab = pX->iTable;
      sqlite3VdbeAddOp(v, OP_Rewind, iTab, 0);
      VdbeComment((v, "# %.*s", pX->span.n, pX->span.z));
@@ -1752,9 +1753,10 @@ static void codeEqualityTerm(
                                      sizeof(pLevel->aInLoop[0])*pLevel->nIn);
      pIn = pLevel->aInLoop;
      if( pIn ){
+      int op = ((eType==IN_INDEX_ROWID)?OP_Rowid:OP_Column);
        pIn += pLevel->nIn - 1;
        pIn->iCur = iTab;
-      pIn->topAddr = sqlite3VdbeAddOp(v, OP_Column, iTab, 0);
+      pIn->topAddr = sqlite3VdbeAddOp(v, op, iTab, 0);
        sqlite3VdbeAddOp(v, OP_IsNull, -1, 0);
      }else{
        pLevel->nIn = 0;
diff --git a/test/in3.test b/test/in3.test

new file mode 100644 (file)

index 0000000..ff92172
--- /dev/null
+++ b/test/in3.test
@@ -0,0 +1,264 @@
+# 2007 November 29
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+# This file tests the optimisations made in November 2007 of expressions 
+# of the following form:
+#
+#     <value> IN (SELECT <column> FROM <table>)
+#
+# $Id: in3.test,v 1.1 2007/11/29 17:05:18 danielk1977 Exp $
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# Return the number of OpenEphemeral instructions used in the
+# implementation of the sql statement passed as a an argument.
+#
+proc nEphemeral {sql} {
+  set nEph 0
+  foreach op [execsql "EXPLAIN $sql"] {
+    if {$op eq "OpenEphemeral"} {incr nEph}
+  }
+  set nEph
+}
+
+# This proc works the same way as execsql, except that the number
+# of OpenEphemeral instructions used in the implementation of the
+# statement is inserted into the start of the returned list.
+#
+proc exec_neph {sql} {
+  return [concat [nEphemeral $sql] [execsql $sql]]
+}
+
+do_test in3-1.1 {
+  execsql {
+    CREATE TABLE t1(a PRIMARY KEY, b);
+    INSERT INTO t1 VALUES(1, 2);
+    INSERT INTO t1 VALUES(3, 4);
+    INSERT INTO t1 VALUES(5, 6);
+  }
+} {}
+
+# All of these queries should avoid using a temp-table:
+#
+do_test in3-1.2 {
+  exec_neph { SELECT rowid FROM t1 WHERE rowid IN (SELECT rowid FROM t1); }
+} {0 1 2 3}
+do_test in3-1.3 {
+  exec_neph { SELECT a FROM t1 WHERE a IN (SELECT a FROM t1); }
+} {0 1 3 5}
+do_test in3-1.4 {
+  exec_neph { SELECT rowid FROM t1 WHERE rowid+0 IN (SELECT rowid FROM t1); }
+} {0 1 2 3}
+do_test in3-1.5 {
+  exec_neph { SELECT a FROM t1 WHERE a+0 IN (SELECT a FROM t1); }
+} {0 1 3 5}
+
+# Because none of the sub-select queries in the following statements
+# match the pattern ("SELECT <column> FROM <table>"), the following do 
+# require a temp table.
+#
+do_test in3-1.6 {
+  exec_neph { SELECT rowid FROM t1 WHERE rowid IN (SELECT rowid+0 FROM t1); }
+} {1 1 2 3}
+do_test in3-1.7 {
+  exec_neph { SELECT a FROM t1 WHERE a IN (SELECT a+0 FROM t1); }
+} {1 1 3 5}
+do_test in3-1.8 {
+  exec_neph { SELECT a FROM t1 WHERE a IN (SELECT a FROM t1 WHERE 1); }
+} {1 1 3 5}
+do_test in3-1.9 {
+  exec_neph { SELECT a FROM t1 WHERE a IN (SELECT a FROM t1 GROUP BY a); }
+} {1 1 3 5}
+
+# This should not use a temp-table. Even though the sub-select does
+# not exactly match the pattern "SELECT <column> FROM <table>", in
+# this case the ORDER BY is a no-op and can be ignored.
+do_test in3-1.10 {
+  exec_neph { SELECT a FROM t1 WHERE a IN (SELECT a FROM t1 ORDER BY a); }
+} {0 1 3 5}
+
+# These do use the temp-table. Adding the LIMIT clause means the 
+# ORDER BY cannot be ignored.
+do_test in3-1.11 {
+  exec_neph {SELECT a FROM t1 WHERE a IN (SELECT a FROM t1 ORDER BY a LIMIT 1)}
+} {1 1}
+do_test in3-1.12 {
+  exec_neph {
+    SELECT a FROM t1 WHERE a IN (SELECT a FROM t1 ORDER BY a LIMIT 1 OFFSET 1)
+  }
+} {1 3}
+
+# Has to use a temp-table because of the compound sub-select.
+#
+do_test in3-1.13 {
+  exec_neph {
+    SELECT a FROM t1 WHERE a IN (
+      SELECT a FROM t1 UNION ALL SELECT a FROM t1
+    )
+  }
+} {1 1 3 5}
+
+# The first of these queries has to use the temp-table, because the 
+# collation sequence used for the index on "t1.a" does not match the
+# collation sequence used by the "IN" comparison. The second does not
+# require a temp-table, because the collation sequences match.
+#
+do_test in3-1.14 {
+  exec_neph { SELECT a FROM t1 WHERE a COLLATE nocase IN (SELECT a FROM t1) }
+} {1 1 3 5}
+do_test in3-1.15 {
+  exec_neph { SELECT a FROM t1 WHERE a COLLATE binary IN (SELECT a FROM t1) }
+} {0 1 3 5}
+
+# Neither of these queries require a temp-table. The collation sequence
+# makes no difference when using a rowid.
+#
+do_test in3-1.16 {
+  exec_neph {SELECT a FROM t1 WHERE a COLLATE nocase IN (SELECT rowid FROM t1)}
+} {0 1 3}
+do_test in3-1.17 {
+  exec_neph {SELECT a FROM t1 WHERE a COLLATE binary IN (SELECT rowid FROM t1)}
+} {0 1 3}
+
+# The following tests - in3.2.* - test a bug that was difficult to track
+# down during development. They are not particularly well focused.
+#
+do_test in3-2.1 {
+  execsql {
+    DROP TABLE IF EXISTS t1;
+    CREATE TABLE t1(w int, x int, y int);
+    CREATE TABLE t2(p int, q int, r int, s int);
+  }
+  for {set i 1} {$i<=100} {incr i} {
+    set w $i
+    set x [expr {int(log($i)/log(2))}]
+    set y [expr {$i*$i + 2*$i + 1}]
+    execsql "INSERT INTO t1 VALUES($w,$x,$y)"
+  }
+  set maxy [execsql {select max(y) from t1}]
+  db eval { INSERT INTO t2 SELECT 101-w, x, $maxy+1-y, y FROM t1 }
+} {}
+do_test in3-2.2 {
+  execsql {
+    SELECT rowid 
+    FROM t1 
+    WHERE rowid IN (SELECT rowid FROM t1 WHERE rowid IN (1, 2));
+  }
+} {1 2}
+do_test in3-2.3 {
+  execsql {
+    select rowid from t1 where rowid IN (-1,2,4)
+  }
+} {2 4}
+do_test in3-2.4 {
+  execsql {
+    SELECT rowid FROM t1 WHERE rowid IN 
+       (select rowid from t1 where rowid IN (-1,2,4))
+  }
+} {2 4}
+
+#-------------------------------------------------------------------------
+# This next block of tests - in3-3.* - verify that column affinity is
+# correctly handled in cases where an index might be used to optimise
+# an IN (SELECT) expression.
+#
+do_test in3-3.1 {
+  catch {execsql {
+    DROP TABLE t1;
+    DROP TABLE t2;
+  }}
+
+  execsql {
+
+    CREATE TABLE t1(a BLOB, b NUMBER ,c TEXT);
+    CREATE UNIQUE INDEX t1_i1 ON t1(a);        /* no affinity */
+    CREATE UNIQUE INDEX t1_i2 ON t1(b);        /* numeric affinity */
+    CREATE UNIQUE INDEX t1_i3 ON t1(c);        /* text affinity */
+
+    CREATE TABLE t2(x BLOB, y NUMBER, z TEXT);
+    CREATE UNIQUE INDEX t2_i1 ON t2(x);        /* no affinity */
+    CREATE UNIQUE INDEX t2_i2 ON t2(y);        /* numeric affinity */
+    CREATE UNIQUE INDEX t2_i3 ON t2(z);        /* text affinity */
+
+    INSERT INTO t1 VALUES(1, 1, 1);
+    INSERT INTO t2 VALUES('1', '1', '1');
+  }
+} {}
+
+do_test in3-3.2 {
+  # No affinity is applied before comparing "x" and "a". Therefore
+  # the index can be used (the comparison is false, text!=number).
+  exec_neph { SELECT x IN (SELECT a FROM t1) FROM t2 }
+} {0 0}
+do_test in3-3.3 {
+  # Logically, numeric affinity is applied to both sides before 
+  # the comparison.  Therefore it is possible to use index t1_i2.
+  exec_neph { SELECT x IN (SELECT b FROM t1) FROM t2 }
+} {0 1}
+do_test in3-3.4 {
+  # No affinity is applied before the comparison takes place. Making
+  # it possible to use index t1_i3.
+  exec_neph { SELECT x IN (SELECT c FROM t1) FROM t2 }
+} {0 1}
+
+do_test in3-3.5 {
+  # Numeric affinity should be applied to each side before the comparison
+  # takes place. Therefore we cannot use index t1_i1, which has no affinity.
+  exec_neph { SELECT y IN (SELECT a FROM t1) FROM t2 }
+} {1 1}
+do_test in3-3.6 {
+  # Numeric affinity is applied to both sides before 
+  # the comparison.  Therefore it is possible to use index t1_i2.
+  exec_neph { SELECT y IN (SELECT b FROM t1) FROM t2 }
+} {0 1}
+do_test in3-3.7 {
+  # Numeric affinity is applied before the comparison takes place. 
+  # Making it impossible to use index t1_i3.
+  exec_neph { SELECT y IN (SELECT c FROM t1) FROM t2 }
+} {1 1}
+
+#---------------------------------------------------------------------
+#
+# Test using a multi-column index.
+#
+do_test in3-4.1 {
+  execsql {
+    CREATE TABLE t3(a, b, c);
+    CREATE UNIQUE INDEX t3_i ON t3(b, a);
+  }
+
+  execsql {
+    INSERT INTO t3 VALUES(1, 'numeric', 2);
+    INSERT INTO t3 VALUES(2, 'text', 2);
+    INSERT INTO t3 VALUES(3, 'real', 2);
+    INSERT INTO t3 VALUES(4, 'none', 2);
+  }
+} {}
+do_test in3-4.2 {
+  exec_neph { SELECT 'text' IN (SELECT b FROM t3) }
+} {0 1}
+do_test in3-4.3 {
+  exec_neph { SELECT 'TEXT' COLLATE nocase IN (SELECT b FROM t3) }
+} {1 1}
+do_test in3-4.4 {
+  # A temp table must be used because t3_i.b is not guaranteed to be unique.
+  exec_neph { SELECT b FROM t3 WHERE b IN (SELECT b FROM t3) }
+} {1 none numeric real text}
+do_test in3-4.5 {
+  execsql { CREATE UNIQUE INDEX t3_i2 ON t3(b) }
+  exec_neph { SELECT b FROM t3 WHERE b IN (SELECT b FROM t3) }
+} {0 none numeric real text}
+do_test in3-4.6 {
+  execsql { DROP INDEX t3_i2 }
+} {}
+
+finish_test
+
author	danielk1977 <danielk1977@noemail.net>
	Thu, 29 Nov 2007 17:05:18 +0000 (17:05 +0000)
committer	danielk1977 <danielk1977@noemail.net>
	Thu, 29 Nov 2007 17:05:18 +0000 (17:05 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/expr.c		patch \| blob \| blame \| history
src/sqliteInt.h		patch \| blob \| blame \| history
src/test1.c		patch \| blob \| blame \| history
src/vdbe.c		patch \| blob \| blame \| history
src/vdbeaux.c		patch \| blob \| blame \| history
src/where.c		patch \| blob \| blame \| history
test/in3.test	[new file with mode: 0644]	patch \| blob