From: drh Date: Fri, 1 Aug 2014 14:46:57 +0000 (+0000) Subject: Begin making changes to the IN operator in an attempt to make it run faster X-Git-Tag: version-3.8.6~41^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3a85625d87c2b5d0f4cd504cb4fdc5d8fc5ee8c4;p=thirdparty%2Fsqlite.git Begin making changes to the IN operator in an attempt to make it run faster and to make the code easier to understand. FossilOrigin-Name: ee0fd6aaf94cda1dce3fe752bfe3b0f83e0043f1 --- diff --git a/manifest b/manifest index f13081bfc1..3fae6c98bd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Enhance\sthe\sPRAGMA\sintegrity_check\scommand\sto\sdetect\sUNIQUE\sand\sNOT\sNULL\nconstraint\sviolations. -D 2014-08-01T01:40:33.869 +C Begin\smaking\schanges\sto\sthe\sIN\soperator\sin\san\sattempt\sto\smake\sit\srun\sfaster\nand\sto\smake\sthe\scode\seasier\sto\sunderstand. +D 2014-08-01T14:46:57.155 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5eb79e334a5de69c87740edd56af6527dd219308 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -176,7 +176,7 @@ F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac F src/ctime.c 0231df905e2c4abba4483ee18ffc05adc321df2a F src/date.c 593c744b2623971e45affd0bde347631bdfa4625 F src/delete.c bcf8f72126cea80fc3d5bc5494cf19b3f8935aaf -F src/expr.c 3be0e9f90bb1c475a99a821a11eecde53ecefc1d +F src/expr.c 7c52ea8b322992a91a241c0092a5bf97b141d353 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb F src/fkey.c 8545f3b36da47473e10800ea4fb0810fd4062514 F src/func.c 3bc223ea36cd29a91c481485343d0ee4257ab8dc @@ -227,7 +227,7 @@ F src/shell.c 191129c3f7a9cf241aea90ff6a6be3e74d3767f0 F src/sqlite.h.in 9bbc5815c73b0e77e68b5275481a5e3e7814a804 F src/sqlite3.rc 11094cc6a157a028b301a9f06b3d03089ea37c3e F src/sqlite3ext.h 886f5a34de171002ad46fae8c36a7d8051c190fc -F src/sqliteInt.h 3b17ba74eec22781f51e7b3e4c73d2cbd458f89b +F src/sqliteInt.h 17ece600d3c9d36cc0ee2b74a30507507f3e0937 F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 7ac05a5c7017d0b9f0b4bcd701228b784f987158 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -296,7 +296,7 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd F src/wal.c 264df50a1b33124130b23180ded2e2c5663c652a F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45 -F src/where.c a7d0434de56d6a4b60cc98ec661969d521201d39 +F src/where.c ce1b9a3a2573033cd15e0882719db7f211f21cdd F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -1185,7 +1185,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e75b26ee357bb3d3c1a539b05d633ebf314726d7 -R 50d518176c08d9fef67243df4085599b +P 9abcf2698c09f4f6a44a68e74f9f6b538f3253d6 +R 1cc42777f2b661f6d0caf162eadc6d85 +T *branch * IN-operator-improvements +T *sym-IN-operator-improvements * +T -sym-trunk * U drh -Z ae546b77e53655a859301e9ce422b440 +Z 08adfdc6013d75458685b1e962a583ca diff --git a/manifest.uuid b/manifest.uuid index 087d6ad801..1b3e9a1eeb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9abcf2698c09f4f6a44a68e74f9f6b538f3253d6 \ No newline at end of file +ee0fd6aaf94cda1dce3fe752bfe3b0f83e0043f1 \ No newline at end of file diff --git a/src/expr.c b/src/expr.c index 3b254f3d3e..dd7b7c3de1 100644 --- a/src/expr.c +++ b/src/expr.c @@ -1484,7 +1484,7 @@ int sqlite3CodeOnce(Parse *pParse){ ** be used either to test for membership in the RHS set or to iterate through ** all members of the RHS set, skipping duplicates. ** -** A cursor is opened on the b-tree object that the RHS of the IN operator +** A cursor is opened on the b-tree object that is the RHS of the IN operator ** and pX->iTable is set to the index of that cursor. ** ** The returned value of this function indicates the b-tree type, as follows: @@ -1494,6 +1494,8 @@ int sqlite3CodeOnce(Parse *pParse){ ** IN_INDEX_INDEX_DESC - The cursor was opened on a descending index. ** IN_INDEX_EPH - The cursor was opened on a specially created and ** populated epheremal table. +** IN_INDEX_NOOP - No cursor was allocated. The in operator must be +** implemented as a sequence of comparisons. ** ** An existing b-tree might be used if the RHS expression pX is a simple ** subquery such as: @@ -1503,23 +1505,37 @@ int sqlite3CodeOnce(Parse *pParse){ ** If the RHS of the IN operator is a list or a more complex subquery, then ** an ephemeral table might need to be generated from the RHS and then ** pX->iTable made to point to the ephermeral table instead of an -** existing table. +** existing table. ** -** If the prNotFound parameter is 0, then the b-tree will be used to iterate -** through the set members, skipping any duplicates. In this case an -** epheremal table must be used unless the selected is guaranteed +** The inFlags parameter must contain exactly one of the bits +** IN_INDEX_MEMBERSHIP or IN_INDEX_LOOP. If inFlags contains +** IN_INDEX_MEMBERSHIP, then the generated table will be used for a +** fast membership test. When the IN_INDEX_LOOP bit is set, the +** IN index will be used to loop over all values of the RHS of the +** IN operator. +** +** When IN_INDEX_LOOP is used (and the b-tree will be used to iterate +** through the set members) then the b-tree must not contain duplicates. +** An epheremal table must be used unless the selected is guaranteed ** to be unique - either because it is an INTEGER PRIMARY KEY or it ** has a UNIQUE constraint or UNIQUE index. ** -** If the prNotFound parameter is not 0, then the b-tree will be used -** for fast set membership tests. In this case an epheremal table must +** When IN_INDEX_MEMBERSHIP is used (and the b-tree will be used +** for fast set membership tests) then an epheremal table must ** be used unless is an INTEGER PRIMARY KEY or an index can ** be found with as its left-most column. ** +** If the IN_INDEX_NOOP_OK and IN_INDEX_MEMBERSHIP are both set and +** if the RHS of the IN operator is a list (not a subquery) then this +** routine might decide that creating an ephemeral b-tree for membership +** testing is too expensive and return IN_INDEX_NOOP. IN that case, the +** calling routine should implement the IN operator using a sequence +** of Eq or Ne comparison operations. +** ** When the b-tree is being used for membership tests, the calling function -** needs to know whether or not the structure contains an SQL NULL -** value in order to correctly evaluate expressions like "X IN (Y, Z)". -** If there is any chance that the (...) might contain a NULL value at +** might need to know whether or not the RHS side of the IN operator +** contains a NULL. If prNotFound is not NULL and +** if there is any chance that the (...) might contain a NULL value at ** runtime, then a register is allocated and the register number written ** to *prNotFound. If there is no chance that the (...) contains a ** NULL value, then *prNotFound is left unchanged. @@ -1540,14 +1556,15 @@ int sqlite3CodeOnce(Parse *pParse){ ** test more often than is necessary. */ #ifndef SQLITE_OMIT_SUBQUERY -int sqlite3FindInIndex(Parse *pParse, Expr *pX, int *prNotFound){ +int sqlite3FindInIndex(Parse *pParse, Expr *pX, u32 inFlags, int *prNotFound){ Select *p; /* SELECT to the right of IN operator */ int eType = 0; /* Type of RHS table. IN_INDEX_* */ int iTab = pParse->nTab++; /* Cursor of the RHS table */ - int mustBeUnique = (prNotFound==0); /* True if RHS must be unique */ + int mustBeUnique; /* True if RHS must be unique */ Vdbe *v = sqlite3GetVdbe(pParse); /* Virtual machine being coded */ assert( pX->op==TK_IN ); + mustBeUnique = (inFlags & IN_INDEX_LOOP)!=0; /* Check to see if an existing table or index can be used to ** satisfy the query. This is preferable to generating a new @@ -1630,14 +1647,14 @@ int sqlite3FindInIndex(Parse *pParse, Expr *pX, int *prNotFound){ u32 savedNQueryLoop = pParse->nQueryLoop; int rMayHaveNull = 0; eType = IN_INDEX_EPH; - if( prNotFound ){ - *prNotFound = rMayHaveNull = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Null, 0, *prNotFound); - }else{ + if( inFlags & IN_INDEX_LOOP ){ pParse->nQueryLoop = 0; if( pX->pLeft->iColumn<0 && !ExprHasProperty(pX, EP_xIsSelect) ){ eType = IN_INDEX_ROWID; } + }else if( prNotFound ){ + *prNotFound = rMayHaveNull = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Null, 0, rMayHaveNull); } sqlite3CodeSubselect(pParse, pX, rMayHaveNull, eType==IN_INDEX_ROWID); pParse->nQueryLoop = savedNQueryLoop; @@ -1668,15 +1685,9 @@ int sqlite3FindInIndex(Parse *pParse, Expr *pX, int *prNotFound){ ** ** If rMayHaveNull is non-zero, that means that the operation is an IN ** (not a SELECT or EXISTS) and that the RHS might contains NULLs. -** Furthermore, the IN is in a WHERE clause and that we really want -** to iterate over the RHS of the IN operator in order to quickly locate -** all corresponding LHS elements. All this routine does is initialize -** the register given by rMayHaveNull to NULL. Calling routines will take -** care of changing this register value to non-NULL if the RHS is NULL-free. -** -** If rMayHaveNull is zero, that means that the subquery is being used -** for membership testing only. There is no need to initialize any -** registers to indicate the presence or absence of NULLs on the RHS. +** All this routine does is initialize the register given by rMayHaveNull +** to NULL. Calling routines will take care of changing this register +** value to non-NULL if the RHS is NULL-free. ** ** For a SELECT or EXISTS operator, return the register that holds the ** result. For IN operators or if an error occurs, the return value is 0. @@ -1928,7 +1939,8 @@ static void sqlite3ExprCodeIN( v = pParse->pVdbe; assert( v!=0 ); /* OOM detected prior to this routine */ VdbeNoopComment((v, "begin IN expr")); - eType = sqlite3FindInIndex(pParse, pExpr, &rRhsHasNull); + eType = sqlite3FindInIndex(pParse, pExpr, 0, + destIfFalse==destIfNull ? 0 : &rRhsHasNull); /* Figure out the affinity to use to create a key from the results ** of the expression. affinityStr stores a static string suitable for diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 1aede95c14..784fd0fd93 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -3591,11 +3591,21 @@ const char *sqlite3JournalModename(int); #define sqlite3EndBenignMalloc() #endif -#define IN_INDEX_ROWID 1 -#define IN_INDEX_EPH 2 -#define IN_INDEX_INDEX_ASC 3 -#define IN_INDEX_INDEX_DESC 4 -int sqlite3FindInIndex(Parse *, Expr *, int*); +/* +** Allowed return values from sqlite3FindInIndex() +*/ +#define IN_INDEX_ROWID 1 /* Search the rowid of the table */ +#define IN_INDEX_EPH 2 /* Search an ephemeral b-tree */ +#define IN_INDEX_INDEX_ASC 3 /* Existing index ASCENDING */ +#define IN_INDEX_INDEX_DESC 4 /* Existing index DESCENDING */ +#define IN_INDEX_NOOP 5 /* No table available. Use comparisons */ +/* +** Allowed flags for the 3rd parameter to sqlite3FindInIndex(). +*/ +#define IN_INDEX_NOOP_OK 0x0001 /* OK to return IN_INDEX_NOOP */ +#define IN_INDEX_MEMBERSHIP 0x0002 /* IN operator used for membership test */ +#define IN_INDEX_LOOP 0x0004 /* IN operator used as a loop */ +int sqlite3FindInIndex(Parse *, Expr *, u32, int*); #ifdef SQLITE_ENABLE_ATOMIC_WRITE int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int); diff --git a/src/where.c b/src/where.c index 3cc66a34b1..20823046f7 100644 --- a/src/where.c +++ b/src/where.c @@ -2522,7 +2522,7 @@ static int codeEqualityTerm( } assert( pX->op==TK_IN ); iReg = iTarget; - eType = sqlite3FindInIndex(pParse, pX, 0); + eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0); if( eType==IN_INDEX_INDEX_DESC ){ testcase( bRev ); bRev = !bRev;