From 274baaa927be1d5b0f44677167badd7dc8ae4e83 Mon Sep 17 00:00:00 2001 From: drh Date: Wed, 30 Sep 2020 18:22:24 +0000 Subject: [PATCH] Improved query optimization for multi-column indexes where the second or later columns are constrained by an IN operator and the earlier index columns limit the search to a small number of rows. Use the new OP_SeekScan opcode which does scanning of the relevant range of the index but gives up and falls back to doing a seek if the number of rows scanned grows to large, in order to guard against pathological cases where the estimated number of rows to be scanned is far too small. FossilOrigin-Name: f07ac3fb388630808c6299cc5b061426eb3a4832bf781a5cd4aa5ed2be7f8bf9 --- manifest | 22 +++---- manifest.uuid | 2 +- src/btree.c | 2 +- src/vdbe.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++- src/where.c | 17 +++-- src/whereInt.h | 1 + src/wherecode.c | 19 +++++- 7 files changed, 204 insertions(+), 25 deletions(-) diff --git a/manifest b/manifest index bae9513a03..2305566ac7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improvements\sto\sthe\sIN-early-out\soptimization\sso\sthat\sit\sworks\smore\nefficiently\swhen\sthere\sare\stwo\sor\smore\sindexed\sIN\sclauses\son\sa\ssingle\stable. -D 2020-09-01T02:02:16.434 +C Improved\squery\soptimization\sfor\smulti-column\sindexes\swhere\sthe\ssecond\sor\nlater\scolumns\sare\sconstrained\sby\san\sIN\soperator\sand\sthe\searlier\sindex\scolumns\nlimit\sthe\ssearch\sto\sa\ssmall\snumber\sof\srows.\s\sUse\sthe\snew\sOP_SeekScan\sopcode\nwhich\sdoes\sscanning\sof\sthe\srelevant\srange\sof\sthe\sindex\sbut\sgives\sup\sand\nfalls\sback\sto\sdoing\sa\sseek\sif\sthe\snumber\sof\srows\sscanned\sgrows\sto\slarge,\nin\sorder\sto\sguard\sagainst\spathological\scases\swhere\sthe\sestimated\snumber\nof\srows\sto\sbe\sscanned\sis\sfar\stoo\ssmall. +D 2020-09-30T18:22:24.218 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -459,7 +459,7 @@ F src/auth.c 0fac71038875693a937e506bceb492c5f136dd7b1249fbd4ae70b4e8da14f9df F src/backup.c 78d3cecfbe28230a3a9a1793e2ead609f469be43e8f486ca996006be551857ab F src/bitvec.c 17ea48eff8ba979f1f5b04cc484c7bb2be632f33 F src/btmutex.c 8acc2f464ee76324bf13310df5692a262b801808984c1b79defb2503bbafadb6 -F src/btree.c 958939f608e351a36756e3749596472baa0e5aae54eebd14e6beffe7a68aafc7 +F src/btree.c 47276c5c150c3d99d4b72e3d296a4c16e38a8531a3e8b3ece59fdf48208950d9 F src/btree.h c11446f07ec0e9dc85af8041cb0855c52f5359c8b2a43e47e02a685282504d89 F src/btreeInt.h 6111c15868b90669f79081039d19e7ea8674013f907710baa3c814dc3f8bfd3f F src/build.c 04bc5a6b6331a30348e59222ab132ecde7cf5dc04c0915a2182b0609d1ab3df0 @@ -590,7 +590,7 @@ F src/upsert.c 0dd81b40206841814d46942a7337786932475f085716042d0cb2fc7791bf8ca4 F src/utf.c 2f0fac345c7660d5c5bd3df9e9d8d33d4c27f366bcfb09e07443064d751a0507 F src/util.c e12939405e77906d06ab0b78c5f513dcd2b7cec2fbb553877b0abfece6067141 F src/vacuum.c 72690ccb6877a88f8473a893cf9f6d7592236f3eebfebfa840b19c708acde574 -F src/vdbe.c 2263fce9d8bf32dbbc26876dd148b6aaa4ae4af7dc0d8811ff7a7c56664475cf +F src/vdbe.c 8b2799c514c70fd0c8d71d6e50b5c720d18db7c23b47157dfecc0fe34823aa00 F src/vdbe.h 712bca562eaed1c25506b9faf9680bdc75fc42e2f4a1cd518d883fa79c7a4237 F src/vdbeInt.h 1a928793190675799194bdee2778f13b10ee42bc20bc03b6635687e4d3d80874 F src/vdbeapi.c 2ddd60f4a351f15ee98d841e346af16111ad59dfa4d25d2dd4012e9875bf7d92 @@ -604,9 +604,9 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 9eccc7ebb532a7b0fd3cabc16cff576b9afa763472272db67d84fb8cec96f5c0 F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a F src/walker.c 7607f1a68130c028255d8d56094ea602fc402c79e1e35a46e6282849d90d5fe4 -F src/where.c 3a727c5265843abe32b67c2ee4e19f5bfab39206f5d1d68e359d58e7448a77dc -F src/whereInt.h 7bf2fc8f036784f8d75780a38b44c4b772876651fe16cbdebbfc5018ccf5c5e9 -F src/wherecode.c 8e6776e9a1cc4b33f06b0020feb00585eb871f28e5146f18cd19f849f6d9376e +F src/where.c 4604336992184fa80fe9a8b176d18cb5c6b6e78f70b21f5d826d50275ed34791 +F src/whereInt.h bcbba483d0cd72c17ab9af97061dce3c00eb3695cd17a5d41cdfec2cb5a6e8ce +F src/wherecode.c 3e46ac6aad7ac2a260f5e12375586ec652f467a0b31e56d6b72c22f9a017e16d F src/whereexpr.c 90859652920f153d2c03f075488744be2926625ebd36911bcbcb17d0d29c891c F src/window.c 038c248267e74ff70a2bb9b1884d40fd145c5183b017823ecb6cbb14bc781478 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 @@ -1819,8 +1819,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 0ecda433718f0bc973078099b19955dcfb0dcd15b68455e53156ac708e9d92e5 -Q +35505c68c1945c35babd2496e02bc4907a15c8e7b8d77f05f230bd0e9d4891d7 -R 34e80ec769980ce3f758c494a6190f92 +P 49b7631e86988d913b9bf926868a5d7c5db557c5a78ecfb8f25fe2d3ba17557a +Q +4a43430fd23f88352c33b29c4c105b72f6dc821f94bf362040c41a1648c402e5 +R d293953ed3d6717eecae576fd45d3a21 U drh -Z e2ad73430dd32cf44379c565112b498e +Z d13fdf54c676de3eeadb15809f5f40b1 diff --git a/manifest.uuid b/manifest.uuid index 535e0841db..156ac6cefc 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -49b7631e86988d913b9bf926868a5d7c5db557c5a78ecfb8f25fe2d3ba17557a \ No newline at end of file +f07ac3fb388630808c6299cc5b061426eb3a4832bf781a5cd4aa5ed2be7f8bf9 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index b31f74e794..dc6b77f0bb 100644 --- a/src/btree.c +++ b/src/btree.c @@ -5671,7 +5671,7 @@ static SQLITE_NOINLINE int btreeNext(BtCursor *pCur){ pPage = pCur->pPage; idx = ++pCur->ix; - if( !pPage->isInit ){ + if( !pPage->isInit || sqlite3FaultSim(412) ){ /* The only known way for this to happen is for there to be a ** recursive SQL function that does a DELETE operation as part of a ** SELECT which deletes content out from under an active cursor diff --git a/src/vdbe.c b/src/vdbe.c index 528307f75b..0e48c2a73c 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -4106,6 +4106,143 @@ seek_not_found: break; } + +/* Opcode: SeekScan P1 * * * * +** Synopsis: Scan-ahead up to P1 rows +** +** This opcode is a prefix opcode to OP_SeekGE. In other words, this +** opcode must be immediately followed by OP_SeekGE. Furthermore, the +** OP_SeekGE must be followed by OP_IdxGT. These constraints are +** checked by assert() statements. +** +** This opcode uses the P1 through P4 operands of the subsequent +** OP_SeekGE. In the text that follows, the operands of the subsequent +** OP_SeekGE opcode are denoted as SeekOP.P1 through SeekOP.P4. Only +** the P1 operand of this opcode is used, and it is denoted as This.P1. +** +** This opcode helps to optimize IN operators on a multi-column index +** where the IN operator is on the later terms of the index by avoiding +** unnecessary seeks on the btree, substituting steps to the next row +** of the b-tree instead. A correct answer is obtained if this opcode +** is omitted or is a no-op. +** +** The SeekGE.P3 and SeekGE.P4 operands identify an unpacked key which +** is the desired entry that we want the cursor SeekGE.P1 to be pointing +** to. Call this SeekGE.P4/P5 row the "target". +** +** If the SeekGE.P1 cursor is not currently pointing to a valid row, +** then this opcode is a no-op and control passes through into the OP_SeekGE. +** +** If the SeekGE.P1 cursor is pointing to a valid row, then that row +** might be the target row, or it might be near and slightly before the +** target row. This opcode attempts to position the cursor on the target +** row by, perhaps stepping by invoking sqlite3BtreeStep() on the cursor +** between 0 and This.P1 times. +** +** There are three possible outcomes from this opcode:
    +** +**
  1. If after This.P1 steps, the cursor is still point to a place that +** is earlier in the btree than the target row, +** then fall through into the subsquence OP_SeekGE opcode. +** +**
  2. If the cursor is successfully moved to the target row by 0 or more +** sqlite3BtreeNext() calls, then jump to the first instruction after the +** OP_IdxGT opcode - or in other words, skip the next two opcodes. +** +**
  3. If the cursor ends up past the target row (indicating the the target +** row does not exist in the btree) then jump to SeekOP.P2. +**
+*/ +case OP_SeekScan: { + VdbeCursor *pC; + int res; + int n; + UnpackedRecord r; + + assert( pOp[1].opcode==OP_SeekGE ); + assert( pOp[2].opcode==OP_IdxGT ); + assert( pOp[1].p1==pOp[2].p1 ); + assert( pOp[1].p2==pOp[2].p2 ); + assert( pOp[1].p3==pOp[2].p3 ); + assert( pOp[1].p4.i==pOp[2].p4.i ); + assert( pOp->p1>0 ); + pC = p->apCsr[pOp[1].p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( !pC->isTable ); + if( !sqlite3BtreeCursorIsValidNN(pC->uc.pCursor) ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... cursor not valid - fall through\n"); + } +#endif + break; + } + n = pOp->p1; + assert( n>=1 ); + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)pOp[1].p4.i; + r.default_rc = 0; + r.aMem = &aMem[pOp[1].p3]; +#ifdef SQLITE_DEBUG + { + int i; + for(i=0; i0 ){ + seekscan_search_fail: +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... %d steps and then skip\n", pOp->p1 - n); + } +#endif + VdbeBranchTaken(1,3); + pOp++; + goto jump_to_p2; + } + if( res==0 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... %d steps and then success\n", pOp->p1 - n); + } +#endif + VdbeBranchTaken(2,3); + pOp += 2; + break; + } + if( n<=0 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... fall through after %d steps\n", pOp->p1); + } +#endif + VdbeBranchTaken(0,3); + break; + } + n--; + rc = sqlite3BtreeNext(pC->uc.pCursor, 0); + if( rc ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + goto seekscan_search_fail; + }else{ + goto abort_due_to_error; + } + } + } + + break; +} + + /* Opcode: SeekHit P1 P2 P3 * * ** Synopsis: set P2<=seekHit<=P3 ** @@ -5592,8 +5729,31 @@ case OP_IdxGE: { /* jump */ } } #endif - res = 0; /* Not needed. Only used to silence a warning. */ - rc = sqlite3VdbeIdxKeyCompare(db, pC, &r, &res); + + /* Inlined version of sqlite3VdbeIdxKeyCompare() */ + { + i64 nCellKey = 0; + BtCursor *pCur; + Mem m; + + assert( pC->eCurType==CURTYPE_BTREE ); + pCur = pC->uc.pCursor; + assert( sqlite3BtreeCursorIsValid(pCur) ); + nCellKey = sqlite3BtreePayloadSize(pCur); + /* nCellKey will always be between 0 and 0xffffffff because of the way + ** that btreeParseCellPtr() and sqlite3GetVarint32() are implemented */ + if( nCellKey<=0 || nCellKey>0x7fffffff ){ + rc = SQLITE_CORRUPT_BKPT; + goto abort_due_to_error; + } + sqlite3VdbeMemInit(&m, db, 0); + rc = sqlite3VdbeMemFromBtree(pCur, 0, (u32)nCellKey, &m); + if( rc ) goto abort_due_to_error; + res = sqlite3VdbeRecordCompareWithSkip(m.n, m.z, &r, 0); + sqlite3VdbeMemRelease(&m); + } + /* End of inlined sqlite3VdbeIdxKeyCompare() */ + assert( (OP_IdxLE&1)==(OP_IdxLT&1) && (OP_IdxGE&1)==(OP_IdxGT&1) ); if( (pOp->opcode&1)==(OP_IdxLT&1) ){ assert( pOp->opcode==OP_IdxLE || pOp->opcode==OP_IdxLT ); @@ -5603,7 +5763,7 @@ case OP_IdxGE: { /* jump */ res++; } VdbeBranchTaken(res>0,2); - if( rc ) goto abort_due_to_error; + assert( rc==SQLITE_OK ); if( res>0 ) goto jump_to_p2; break; } diff --git a/src/where.c b/src/where.c index ce025e11d4..8efcb475c1 100644 --- a/src/where.c +++ b/src/where.c @@ -2554,7 +2554,7 @@ static int whereLoopAddBtreeIndex( WHERETRACE(0x40, ("Scan preferred over IN operator on column %d of \"%s\" (%d<%d)\n", saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize)); - continue; + pNew->wsFlags |= WHERE_IN_SEEKSCAN; }else{ WHERETRACE(0x40, ("IN operator preferred on column %d of \"%s\" (%d>=%d)\n", @@ -5050,6 +5050,7 @@ WhereInfo *sqlite3WhereBegin( sqlite3VdbeSetP4KeyInfo(pParse, pIx); if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 && (pLoop->wsFlags & (WHERE_COLUMN_RANGE|WHERE_SKIPSCAN))==0 + && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 && (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 && pWInfo->eDistinct!=WHERE_DISTINCT_ORDERED ){ @@ -5207,11 +5208,15 @@ void sqlite3WhereEnd(WhereInfo *pWInfo){ sqlite3VdbeJumpHere(v, pIn->addrInTop+1); if( pIn->eEndLoopOp!=OP_Noop ){ if( pIn->nPrefix ){ - assert( pLoop->wsFlags & WHERE_IN_EARLYOUT ); - sqlite3VdbeAddOp4Int(v, OP_IfNoHope, pLevel->iIdxCur, - sqlite3VdbeCurrentAddr(v)+2, - pIn->iBase, pIn->nPrefix); - VdbeCoverage(v); + int bEarlyOut = + (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 + && (pLoop->wsFlags & WHERE_IN_EARLYOUT)!=0; + if( bEarlyOut ){ + sqlite3VdbeAddOp4Int(v, OP_IfNoHope, pLevel->iIdxCur, + sqlite3VdbeCurrentAddr(v)+2, + pIn->iBase, pIn->nPrefix); + VdbeCoverage(v); + } } sqlite3VdbeAddOp2(v, pIn->eEndLoopOp, pIn->iCur, pIn->addrInTop); VdbeCoverage(v); diff --git a/src/whereInt.h b/src/whereInt.h index 60f41df910..69342b79a6 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -586,3 +586,4 @@ void sqlite3WhereTabFuncArgs(Parse*, struct SrcList_item*, WhereClause*); #define WHERE_UNQ_WANTED 0x00010000 /* WHERE_ONEROW would have been helpful*/ #define WHERE_PARTIALIDX 0x00020000 /* The automatic index is partial */ #define WHERE_IN_EARLYOUT 0x00040000 /* Perhaps quit IN loops early */ +#define WHERE_IN_SEEKSCAN 0x00100000 /* Seek-scan optimization for IN */ diff --git a/src/wherecode.c b/src/wherecode.c index 7315d3f6fa..bf304eefc0 100644 --- a/src/wherecode.c +++ b/src/wherecode.c @@ -568,7 +568,7 @@ static int codeEqualityTerm( if( pLevel->u.in.nIn==0 ){ pLevel->addrNxt = sqlite3VdbeMakeLabel(pParse); } - if( iEq>0 ){ + if( iEq>0 && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 ){ pLoop->wsFlags |= WHERE_IN_EARLYOUT; } @@ -606,7 +606,7 @@ static int codeEqualityTerm( pIn++; } } - if( iEq>0 ){ + if( iEq>0 && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 ){ sqlite3VdbeAddOp3(v, OP_SeekHit, pLevel->iIdxCur, 0, iEq); } }else{ @@ -1689,6 +1689,19 @@ Bitmask sqlite3WhereCodeOneLoopStart( }else{ op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; assert( op!=0 ); + if( (pLoop->wsFlags & WHERE_IN_SEEKSCAN)!=0 ){ + assert( op==OP_SeekGE ); + /* TUNING: The OP_SeekScan opcode seeks to reduce the number + ** of expensive seek operations by replacing a single seek with + ** 1 or more step operations. The question is, how many steps + ** should we try before giving up and going with a seek. The cost + ** of a seek is proportional to the logarithm of the of the number + ** of entries in the tree, so basing the number of steps to try + ** on the estimated number of rows in the btree seems like a good + ** guess. */ + sqlite3VdbeAddOp1(v, OP_SeekScan, (pIdx->aiRowLogEst[0]+9)/10); + VdbeCoverage(v); + } sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); VdbeCoverage(v); VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); @@ -1748,7 +1761,7 @@ Bitmask sqlite3WhereCodeOneLoopStart( testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); } - if( pLoop->wsFlags & WHERE_IN_EARLYOUT ){ + if( (pLoop->wsFlags & WHERE_IN_EARLYOUT)!=0 ){ sqlite3VdbeAddOp3(v, OP_SeekHit, iIdxCur, nEq, nEq); } -- 2.47.3