From: drh Date: Mon, 28 Sep 2020 19:51:54 +0000 (+0000) Subject: Revisiting the IN-scan optimization to try to fix it for the corner case X-Git-Tag: version-3.34.0~82^2~9 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=68cf0ace3d160c8b3c12ee692c337f0d47e079d7;p=thirdparty%2Fsqlite.git Revisiting the IN-scan optimization to try to fix it for the corner case where the statistics deceive the query planner into using a scan when an indexed lookup would be better. This check-in changes the code generation to do the IN-scan using a new OP_SeekScan opcode. That new opcode is designed to abandon the scan and fall back to a seek if it doesn't find a match quickly enough. For this work-in-progress check-in, OP_SeekScan is still a no-op and OP_SeekGE still ends up doing all the work. FossilOrigin-Name: d720b6981eeb0ffdb14494ca63eca298ee724ae4ad4863c7c7cbfdad7fa52519 --- diff --git a/manifest b/manifest index 96eacb42ae..4e0a60c882 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Small\sperformance\simprovement\sand\ssize\sreduction\sby\sreducing\sthe\ssize\sof\nthe\sWhereTerm\sobject. -D 2020-09-28T15:49:43.021 +C Revisiting\sthe\sIN-scan\soptimization\sto\stry\sto\sfix\sit\sfor\sthe\scorner\scase\nwhere\sthe\sstatistics\sdeceive\sthe\squery\splanner\sinto\susing\sa\sscan\swhen\nan\sindexed\slookup\swould\sbe\sbetter.\s\sThis\scheck-in\schanges\sthe\scode\ngeneration\sto\sdo\sthe\sIN-scan\susing\sa\snew\sOP_SeekScan\sopcode.\s\sThat\snew\nopcode\sis\sdesigned\sto\sabandon\sthe\sscan\sand\sfall\sback\sto\sa\sseek\sif\nit\sdoesn't\sfind\sa\smatch\squickly\senough.\s\sFor\sthis\swork-in-progress\scheck-in,\s\nOP_SeekScan\sis\sstill\sa\sno-op\sand\sOP_SeekGE\sstill\sends\sup\sdoing\sall\sthe\swork. +D 2020-09-28T19:51:54.673 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -607,7 +607,7 @@ F src/upsert.c 2920de71b20f04fe25eb00b655d086f0ba60ea133c59d7fa3325c49838818e78 F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0 F src/util.c c0c7977de7ef9b8cb10f6c85f2d0557889a658f817b0455909a49179ba4c8002 F src/vacuum.c 492422c1463c076473bae1858799c7a0a5fe87a133d1223239447c422cd26286 -F src/vdbe.c 6430a540012b8b4c81076565804fcb979040e1b1a43ce76d2381863884155d84 +F src/vdbe.c 17584029ae2c2acc955f3dc19858fad1a3d509f31d42ddd2ff7be3552d4c77d3 F src/vdbe.h 83603854bfa5851af601fc0947671eb260f4363e62e960e8a994fb9bbcd2aaa1 F src/vdbeInt.h 3ca5e9fd6e095a8b6cf6bc3587a46fc93499503b2fe48951e1034ba9e2ce2f6e F src/vdbeapi.c c5e7cb2ab89a24d7f723e87b508f21bfb1359a04db5277d8a99fd1e015c12eb9 @@ -622,9 +622,9 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 69e770e96fd56cc21608992bf2c6f1f3dc5cf2572d0495c6a643b06c3a679f14 F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a F src/walker.c 3df26a33dc4f54e8771600fb7fdebe1ece0896c2ad68c30ab40b017aa4395049 -F src/where.c 7ed6512e73e679231ebdeba470055e8d8e871a020b7f3f8dc75e993aaebdcbcd -F src/whereInt.h db7472b6eb617b5853ae74bbd755383e2275be72ae03ff07cc8ea141bb146dc8 -F src/wherecode.c 895ff782a62370a823c99dc7e1bca09ffd90392c9fafc007b6d3df4811e88b4f +F src/where.c c628a6850b023cfacfdbf6060481eae5e538ccb3c3464a700b501e08d4cce74b +F src/whereInt.h 59077fbd0b3d01bc8715e746c86a99ebf4c85bde8a57077ec04d2a23e59666ec +F src/wherecode.c 4096498d05f0c1bfff435ef48679e774a345bb2c700215da1383902e14877d5c F src/whereexpr.c 2a05552e808047a93845278c98c6ca64a265fa8e9ffd087c161bb11bfe339866 F src/window.c edd6f5e25a1e8f2b6f5305b7f5f7da7bb35f07f0d432b255b1d4c2fcab4205aa F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 @@ -1880,7 +1880,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P d3d13df31a97648f952beb2e1a783f947a80ec843227985ad9ebd14452d2f654 -R 7b8c3cb46dfe068597bdde1b75906671 +P 43f7ddad800acf40917c5cc3d926640dbec17c34d5f1cbbb74bd80f44eeed0a5 +R 91153af0a9416b3fbb7b5420545d1fbb +T *branch * in-scan-vs-index +T *sym-in-scan-vs-index * +T -sym-trunk * U drh -Z d72bcf93f915317cb95885e250422e3c +Z 22384fb99540c2e7915964a3edd63b6d diff --git a/manifest.uuid b/manifest.uuid index f071ea53f6..0ba7bb60dc 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -43f7ddad800acf40917c5cc3d926640dbec17c34d5f1cbbb74bd80f44eeed0a5 \ No newline at end of file +d720b6981eeb0ffdb14494ca63eca298ee724ae4ad4863c7c7cbfdad7fa52519 \ No newline at end of file diff --git a/src/vdbe.c b/src/vdbe.c index a579b4ea11..d14838e4f1 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -4383,6 +4383,59 @@ seek_not_found: break; } + +/* Opcode: SeekScan +** Synopsis: Scan-ahead up to P1 rows +** +** This opcode is a prefix. It must be followed immediately by +** OP_SeekGE and then OP_IdxGT. This opcode should occur in no other +** context. That constraint is verified using assert() statements in +** the code. +** +** This opcode helps to optimize IN operators on a multi-column index +** where the IN operator is on the later terms of the index. +** +** The P3 and P4 operations of the OP_SeekGE opcode that follows this +** opcode identify an unpacked key which is the desired entry that +** we want to advance the cursor to. Call this the "target". +** +** If the OP_SeekGE opcode that immediately follows this opcode has +** never run before, then this opcode is a no-op and control passes +** through into the OP_SeekGE. +** +** If the subsequent OP_SeekGE opcode has run before, then that prior +** might OP_SeekGE might have left the cursor pointing any entry that +** is close to the target. This routine checks, and if possible +** bypasses the OP_SeekGE. +** +** If the cursor is past the target, jump immediately to the +** P2 of the subsequent OP_SeekGE. +** +** If the cursor is less than the target, then step forward up to P1 +** times trying to find a match. If during these steps, the +** cursor moves past the target, then jump immediately to +** the P2 of the subsequent OP_SeekGE. If a match is found, jump +** to the first instruction past the OP_IdxGT that follows the +** OP_SeekGE. (In other words, skip over the next two opcodes). +** If P1 steps are performed and the cursor is still less than the +** target, then fall through into OP_SeekGE opcode. +** +** This opcode is an optimization. This opcode can be a no-op and +** the correct answer should still be obtained. The purpose of this +** opcode is to bypass unnecessary OP_SeekGE operations. +*/ +case OP_SeekScan: { + assert( pOp[1].opcode==OP_SeekGE ); + assert( pOp[2].opcode==OP_IdxGT ); + assert( pOp[1].p1==pOp[2].p1 ); + assert( pOp[1].p2==pOp[2].p2 ); + assert( pOp[1].p3==pOp[2].p3 ); + assert( pOp[1].p4.i==pOp[2].p4.i ); + assert( pOp->p1>0 ); + break; /* No-op for now. FIX ME. */ +} + + /* Opcode: SeekHit P1 P2 P3 * * ** Synopsis: set P2<=seekHit<=P3 ** diff --git a/src/where.c b/src/where.c index 9049a59f28..0ab5adee8f 100644 --- a/src/where.c +++ b/src/where.c @@ -2564,7 +2564,7 @@ static int whereLoopAddBtreeIndex( WHERETRACE(0x40, ("Scan preferred over IN operator on column %d of \"%s\" (%d<%d)\n", saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize)); - continue; + pNew->wsFlags |= WHERE_IN_SEEKSCAN; }else{ WHERETRACE(0x40, ("IN operator preferred on column %d of \"%s\" (%d>=%d)\n", @@ -5197,6 +5197,7 @@ WhereInfo *sqlite3WhereBegin( if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 && (pLoop->wsFlags & (WHERE_COLUMN_RANGE|WHERE_SKIPSCAN))==0 && (pLoop->wsFlags & WHERE_BIGNULL_SORT)==0 + && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 && (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 && pWInfo->eDistinct!=WHERE_DISTINCT_ORDERED ){ diff --git a/src/whereInt.h b/src/whereInt.h index 0a2b5c5cef..6c969af9c2 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -619,5 +619,6 @@ void sqlite3WhereTabFuncArgs(Parse*, struct SrcList_item*, WhereClause*); #define WHERE_PARTIALIDX 0x00020000 /* The automatic index is partial */ #define WHERE_IN_EARLYOUT 0x00040000 /* Perhaps quit IN loops early */ #define WHERE_BIGNULL_SORT 0x00080000 /* Column nEq of index is BIGNULL */ +#define WHERE_IN_SEEKSCAN 0x00100000 /* Seek-scan optimization for IN */ #endif /* !defined(SQLITE_WHEREINT_H) */ diff --git a/src/wherecode.c b/src/wherecode.c index e3310b314e..8227d0d7d0 100644 --- a/src/wherecode.c +++ b/src/wherecode.c @@ -1804,6 +1804,11 @@ Bitmask sqlite3WhereCodeOneLoopStart( op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; assert( op!=0 ); + if( (pLoop->wsFlags & WHERE_IN_SEEKSCAN)!=0 ){ + assert( op==OP_SeekGE ); + assert( regBignull==0 ); + sqlite3VdbeAddOp1(v, OP_SeekScan, 10); + } sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); VdbeCoverage(v); VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind );