From: drh Date: Thu, 25 Aug 2016 21:14:34 +0000 (+0000) Subject: Refactor the sqlite3ExprCodeIN() routine for improved maintainability. X-Git-Tag: version-3.15.0~110^2~36 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e347d3e8132c56ef59b12889605a4f3f0ad082ec;p=thirdparty%2Fsqlite.git Refactor the sqlite3ExprCodeIN() routine for improved maintainability. FossilOrigin-Name: b56705ae6374db9db82613ef89faa1a1e6b00a18 --- diff --git a/manifest b/manifest index 6410765471..5805cde823 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Another\sfix\sin\sthe\sIN-operator\salgorithm\sdescription. -D 2016-08-25T17:47:36.711 +C Refactor\sthe\ssqlite3ExprCodeIN()\sroutine\sfor\simproved\smaintainability. +D 2016-08-25T21:14:34.728 F Makefile.in cfd8fb987cd7a6af046daa87daa146d5aad0e088 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc d66d0395c38571aab3804f8db0fa20707ae4609a @@ -338,7 +338,7 @@ F src/ctime.c e77f3dc297b4b65c96da78b4ae4272fdfae863d7 F src/date.c 95c9a8d00767e7221a8e9a31f4e913fc8029bf6b F src/dbstat.c 19ee7a4e89979d4df8e44cfac7a8f905ec89b77d F src/delete.c 76c084f0265f4a3cd1ecf17eee112a94f1ccbc05 -F src/expr.c 1c003fcb9c5a6f8c54f6392378225d578be2a086 +F src/expr.c 45f814e52d2279914bd7a9141d92a26cb71b1533 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb F src/fkey.c e2be0968c1adc679c87e467aa5b4f167588f38a8 F src/func.c 29cc9acb170ec1387b9f63eb52cd85f8de96c771 @@ -346,7 +346,7 @@ F src/global.c c45ea22aff29334f6a9ec549235ac3357c970015 F src/hash.c 55b5fb474100cee0b901edaf203e26c970940f36 F src/hash.h ab34c5c54a9e9de2e790b24349ba5aab3dbb4fd4 F src/hwtime.h 747c1bbe9df21a92e9c50f3bbec1de841dc5e5da -F src/in-operator.md e74c3dbd32b765c22c0bfc023f3b867e841a292b +F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71 F src/insert.c a255eb795cf475e7a0659297144fc80f70eb4e30 F src/legacy.c 75d3023be8f0d2b99d60f905090341a03358c58e F src/loadext.c dd7a2b77902cc66c22555aef02e1a682554b7aec @@ -1521,7 +1521,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P df0648373a50006ca18d692e12552d1d53d549e3 -R 5563c2646a49e89461278dffc729d682 +P f474aeac4fa62d87e73189868d7c7a295ffb7265 +R 015c67366edd8d396f4d5202834c4796 U drh -Z f4cbbe9a35f7af77488dc4e4d3d948a7 +Z ec6ee7d7de4c92376a0d667aef9d24cc diff --git a/manifest.uuid b/manifest.uuid index 53efcbce1d..b8c92ac025 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f474aeac4fa62d87e73189868d7c7a295ffb7265 \ No newline at end of file +b56705ae6374db9db82613ef89faa1a1e6b00a18 \ No newline at end of file diff --git a/src/expr.c b/src/expr.c index 385e808917..ab0349dcb7 100644 --- a/src/expr.c +++ b/src/expr.c @@ -2638,10 +2638,15 @@ int sqlite3ExprCheckIN(Parse *pParse, Expr *pIn){ ** x IN (value, value, ...) ** ** The left-hand side (LHS) is a scalar or vector expression. The -** right-hand side (RHS) is an array of zero or more values. The IN operator -** is true if the LHS is contained within the RHS. The result is false -** if the LHS is definitely not in the RHS. The result is NULL if the presence -** of the LHS in the RHS cannot be determined due to NULLs. +** right-hand side (RHS) is an array of zero or more scalar values, or a +** subquery. If the RHS is a subquery, the number of result columns must +** match the number of columns in the vector on the LHS. If the RHS is +** a list of values, the LHS must be a scalar. +** +** The IN operator is true if the LHS value is contained within the RHS. +** The result is false if the LHS is definitely not in the RHS. The +** result is NULL if the presence of the LHS in the RHS cannot be +** determined due to NULLs. ** ** This routine generates code that jumps to destIfFalse if the LHS is not ** contained within the RHS. If due to NULLs we cannot determine if the LHS @@ -2659,22 +2664,29 @@ static void sqlite3ExprCodeIN( ){ int rRhsHasNull = 0; /* Register that is true if RHS contains NULL values */ int eType; /* Type of the RHS */ - int r1, r2; /* Temporary use registers */ + int rLhs; /* Register(s) holding the LHS values */ + int rLhsOrig; /* LHS values prior to reordering by aiMap[] */ Vdbe *v; /* Statement under construction */ int *aiMap = 0; /* Map from vector field to index column */ char *zAff = 0; /* Affinity string for comparisons */ - int nVector; /* Size of vectors for this IN operator */ - int iDummy; /* Dummy parameter to exprCodeVector() */ - Expr *pLeft = pExpr->pLeft; /* The LHS of the IN operator */ - int i; /* loop counter */ - + int nVector; /* Size of vectors for this IN operator */ + int iDummy; /* Dummy parameter to exprCodeVector() */ + Expr *pLeft; /* The LHS of the IN operator */ + int i; /* loop counter */ + int destStep2; /* Where to jump when NULLs seen in step 2 */ + int destStep6 = 0; /* Start of code for Step 6 */ + int addrTruthOp; /* Address of opcode that determines the IN is true */ + int destNotNull; /* Jump here if a comparison is not true in step 6 */ + int addrTop; /* Top of the step-6 loop */ + + pLeft = pExpr->pLeft; if( sqlite3ExprCheckIN(pParse, pExpr) ) return; zAff = exprINAffinity(pParse, pExpr); nVector = sqlite3ExprVectorSize(pExpr->pLeft); aiMap = (int*)sqlite3DbMallocZero( pParse->db, nVector*(sizeof(int) + sizeof(char)) + 1 ); - if( pParse->db->mallocFailed ) goto end_code_IN_op; + if( pParse->db->mallocFailed ) goto sqlite3ExprCodeIN_oom_error; /* Attempt to compute the RHS. After this step, if anything other than ** IN_INDEX_NOOP is returned, the table opened ith cursor pExpr->iTable @@ -2703,24 +2715,31 @@ static void sqlite3ExprCodeIN( /* Code the LHS, the from " IN (...)". If the LHS is a ** vector, then it is stored in an array of nVector registers starting ** at r1. + ** + ** sqlite3FindInIndex() might have reordered the fields of the LHS vector + ** so that the fields are in the same order as an existing index. The + ** aiMap[] array contains a mapping from the original LHS field order to + ** the field order that matches the RHS index. */ sqlite3ExprCachePush(pParse); - r2 = exprCodeVector(pParse, pLeft, &iDummy); - for(i=0; ix.pList; @@ -2732,7 +2751,7 @@ static void sqlite3ExprCodeIN( assert( !ExprHasProperty(pExpr, EP_xIsSelect) ); if( destIfNull!=destIfFalse ){ regCkNull = sqlite3GetTempReg(pParse); - sqlite3VdbeAddOp3(v, OP_BitAnd, r1, r1, regCkNull); + sqlite3VdbeAddOp3(v, OP_BitAnd, rLhs, rLhs, regCkNull); } for(ii=0; iinExpr; ii++){ r2 = sqlite3ExprCodeTemp(pParse, pList->a[ii].pExpr, ®ToFree); @@ -2740,14 +2759,14 @@ static void sqlite3ExprCodeIN( sqlite3VdbeAddOp3(v, OP_BitAnd, regCkNull, r2, regCkNull); } if( iinExpr-1 || destIfNull!=destIfFalse ){ - sqlite3VdbeAddOp4(v, OP_Eq, r1, labelOk, r2, + sqlite3VdbeAddOp4(v, OP_Eq, rLhs, labelOk, r2, (void*)pColl, P4_COLLSEQ); VdbeCoverageIf(v, iinExpr-1); VdbeCoverageIf(v, ii==pList->nExpr-1); sqlite3VdbeChangeP5(v, zAff[0]); }else{ assert( destIfNull==destIfFalse ); - sqlite3VdbeAddOp4(v, OP_Ne, r1, destIfFalse, r2, + sqlite3VdbeAddOp4(v, OP_Ne, rLhs, destIfFalse, r2, (void*)pColl, P4_COLLSEQ); VdbeCoverage(v); sqlite3VdbeChangeP5(v, zAff[0] | SQLITE_JUMPIFNULL); } @@ -2759,127 +2778,111 @@ static void sqlite3ExprCodeIN( } sqlite3VdbeResolveLabel(v, labelOk); sqlite3ReleaseTempReg(pParse, regCkNull); + goto sqlite3ExprCodeIN_finished; + } + + /* Step 2: Check to see if the LHS contains any NULL columns. If the + ** LHS does contain NULLs then the result must be either FALSE or NULL. + ** We will then skip the binary search of the RHS. + */ + if( destIfNull==destIfFalse ){ + destStep2 = destIfFalse; }else{ - - /* If any value on the LHS is NULL, the result of the IN(...) operator - ** must be either false or NULL. If these two are handled identically, - ** test the LHS for NULLs and jump directly to destIfNull if any are - ** found. - ** - ** Otherwise, if NULL and false are handled differently, and the - ** IN(...) operation is not a vector operation, and the LHS of the - ** operator is NULL, then the result is false if the index is - ** completely empty, or NULL otherwise. */ - if( destIfNull==destIfFalse ){ - for(i=0; ipLeft, i); - if( sqlite3ExprCanBeNull(p) ){ - sqlite3VdbeAddOp2(v, OP_IsNull, r1+aiMap[i], destIfNull); - VdbeCoverage(v); - } - } - }else if( nVector==1 && sqlite3ExprCanBeNull(pExpr->pLeft) ){ - int addr1 = sqlite3VdbeAddOp1(v, OP_NotNull, r1); VdbeCoverage(v); - sqlite3VdbeAddOp2(v, OP_Rewind, pExpr->iTable, destIfFalse); + destStep2 = destStep6 = sqlite3VdbeMakeLabel(v); + } + for(i=0; ipLeft, i); + if( sqlite3ExprCanBeNull(p) ){ + sqlite3VdbeAddOp2(v, OP_IsNull, rLhs+i, destStep2); VdbeCoverage(v); - sqlite3VdbeGoto(v, destIfNull); - sqlite3VdbeJumpHere(v, addr1); } - - if( eType==IN_INDEX_ROWID ){ - /* In this case, the RHS is the ROWID of table b-tree */ - sqlite3VdbeAddOp3(v, OP_SeekRowid, pExpr->iTable, destIfFalse, r1); - VdbeCoverage(v); - }else{ - /* In this case, the RHS is an index b-tree. Apply the comparison - ** affinities to each value on the LHS of the operator. */ - sqlite3VdbeAddOp4(v, OP_Affinity, r1, nVector, 0, zAff, nVector); - - if( nVector>1 && destIfNull!=destIfFalse ){ - int iIdx = pExpr->iTable; - int addrTop; - int addrNext; - int addrFound; - - /* Search the index for the key. */ - addrFound = sqlite3VdbeAddOp4Int(v, OP_Found, iIdx, 0, r1, nVector); - VdbeCoverage(v); + } - /* At this point the specified key is not present in the index, - ** so the result of the IN(..) operator must be either NULL or - ** 0. The vdbe code generated below figures out which. */ - addrTop = 1+sqlite3VdbeAddOp2(v, OP_Rewind, iIdx, destIfFalse); - VdbeCoverage(v); - addrNext = sqlite3VdbeMakeLabel(v); - - for(i=0; ipLeft, i); - if( sqlite3ExprCanBeNull(p) ){ - sqlite3VdbeAddOp2(v, OP_IsNull, r1+aiMap[i], destIfNull); - VdbeCoverage(v); - } - } + /* Step 3. The LHS is now known to be non-NULL. Do the binary search + ** of the RHS using the LHS as a probe. If found, the result is + ** true. + */ + if( eType==IN_INDEX_ROWID ){ + /* In this case, the RHS is the ROWID of table b-tree and so we also + ** know that the RHS is non-NULL. Hence, we combine steps 3 and 4 + ** into a single opcode. */ + sqlite3VdbeAddOp3(v, OP_SeekRowid, pExpr->iTable, destIfFalse, rLhs); + VdbeCoverage(v); + addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ + }else{ + sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); + if( destIfFalse==destIfNull ){ + /* Combine Step 3 and Step 5 into a single opcode */ + sqlite3VdbeAddOp4Int(v, OP_NotFound, pExpr->iTable, destIfFalse, + rLhs, nVector); VdbeCoverage(v); + goto sqlite3ExprCodeIN_finished; + } + /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ + addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, pExpr->iTable, 0, + rLhs, nVector); VdbeCoverage(v); + } - }else if( rRhsHasNull==0 ){ - /* This branch runs if it is known at compile time that the RHS - ** cannot contain NULL values. This happens as a result - ** of "NOT NULL" constraints in the database schema. - ** - ** Also run this branch if NULL is equivalent to FALSE - ** for this particular IN operator. */ - sqlite3VdbeAddOp4Int( - v, OP_NotFound, pExpr->iTable, destIfFalse, r1, nVector - ); - VdbeCoverage(v); - }else{ - /* In this branch, the RHS of the IN might contain a NULL and - ** the presence of a NULL on the RHS makes a difference in the - ** outcome. - */ - int addr1; + /* Step 4. If the RHS is known to be non-NULL and we did not find + ** an match on the search above, then the result must be FALSE. + */ + if( rRhsHasNull && nVector==1 ){ + sqlite3VdbeAddOp2(v, OP_NotNull, rRhsHasNull, destIfFalse); + VdbeCoverage(v); + } - /* First check to see if the LHS is contained in the RHS. If so, - ** then the answer is TRUE the presence of NULLs in the RHS does - ** not matter. If the LHS is not contained in the RHS, then the - ** answer is NULL if the RHS contains NULLs and the answer is - ** FALSE if the RHS is NULL-free. - */ - addr1 = sqlite3VdbeAddOp4Int(v, OP_Found, pExpr->iTable, 0, r1, 1); - VdbeCoverage(v); - sqlite3VdbeAddOp2(v, OP_IsNull, rRhsHasNull, destIfNull); - VdbeCoverage(v); - sqlite3VdbeGoto(v, destIfFalse); - sqlite3VdbeJumpHere(v, addr1); - } - } + /* Step 5. If we do not care about the difference between NULL and + ** FALSE, then just return false. + */ + if( destIfFalse==destIfNull ) sqlite3VdbeGoto(v, destIfFalse); + + /* Step 6: Loop through rows of the RHS. Compare each row to the LHS. + ** If any comparison is NULL, then the result is NULL. If all + ** comparisons are FALSE then the final result is FALSE. + ** + ** For a scalar LHS, it is sufficient to check just the first row + ** of the RHS. + */ + if( destStep6 ) sqlite3VdbeResolveLabel(v, destStep6); + addrTop = sqlite3VdbeAddOp2(v, OP_Rewind, pExpr->iTable, destIfFalse); + VdbeCoverage(v); + if( nVector>1 ){ + destNotNull = sqlite3VdbeMakeLabel(v); + }else{ + /* For nVector==1, combine steps 6 and 7 by immediately returning + ** FALSE if the first comparison is not NULL */ + destNotNull = destIfFalse; } - if( r2!=r1 ) sqlite3ReleaseTempReg(pParse, r1); + for(i=0; iiTable, i, r3); + sqlite3VdbeAddOp4(v, OP_Ne, rLhs+i, destNotNull, r3, + (void*)pColl, P4_COLLSEQ); + VdbeCoverage(v); + sqlite3ReleaseTempReg(pParse, r3); + } + sqlite3VdbeAddOp2(v, OP_Goto, 0, destIfNull); + if( nVector>1 ){ + sqlite3VdbeResolveLabel(v, destNotNull); + sqlite3VdbeAddOp2(v, OP_Next, pExpr->iTable, addrTop+1); + VdbeCoverage(v); + + /* Step 7: If we reach this point, we know that the result must + ** be false. */ + sqlite3VdbeAddOp2(v, OP_Goto, 0, destIfFalse); + } + + /* Jumps here in order to return true. */ + sqlite3VdbeJumpHere(v, addrTruthOp); + +sqlite3ExprCodeIN_finished: + if( rLhs!=rLhsOrig ) sqlite3ReleaseTempReg(pParse, rLhs); sqlite3ExprCachePop(pParse); VdbeComment((v, "end IN expr")); -end_code_IN_op: +sqlite3ExprCodeIN_oom_error: sqlite3DbFree(pParse->db, aiMap); sqlite3DbFree(pParse->db, zAff); } diff --git a/src/in-operator.md b/src/in-operator.md index ff691864b0..e9ad2101aa 100644 --- a/src/in-operator.md +++ b/src/in-operator.md @@ -6,7 +6,7 @@ IN-Operator Implementation Notes An IN operator has one of the following formats: > - x IN (list) + x IN (y1,y2,y3,...,yN) x IN (subquery) The "x" is referred to as the LHS (left-hand side). The list or subquery @@ -14,9 +14,20 @@ on the right is called the RHS (right-hand side). If the RHS is a list it must be a non-empty list. But if the RHS is a subquery, it can be an empty set. -Both the LHS and RHS can be scalars or vectors. The two must match. -In other words, they must both be scalar or else they must both be -vectors of the same length. +The LHS can be a scalar (a single quantity) or a vector (a list of +two or or more values) or a subquery that returns one or more columns. +We use the term "vector" to mean an actually list of values or a +subquery that returns two or more columns. An isolated value or +a subquery that returns a single columns is called a scalar. + +The RHS can be a subquery that returns a single column, a subquery +that returns two or more columns, or a list of scalars. It is not +currently support for the RHS to be a list of vectors. + +The number of columns for LHS must match the number of columns for +the RHS. If the RHS is a list of values, then the LHS must be a +scalar. If the RHS is a subquery returning N columns, then the LHS +must be a vector of size N. NULL values can occur in either or both of the LHS and RHS. If the LHS contains only @@ -84,7 +95,7 @@ is the algorithm that is implemented in SQLite. 4. If the RHS is non-NULL then return FALSE. - 5. If we do not need to distingish between FALSE and NULL, + 5. If we do not need to distinguish between FALSE and NULL, then return FALSE. 6. For each row in the RHS, compare that row against the LHS and