From 5525ac1049ad73e6ddb1ad444993cc32d9980e4e Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 7 Jun 2024 21:00:42 +0000 Subject: [PATCH] Experimental optimization to rewrite a SELECT with an EXISTS(...) expression in the WHERE clause as a join. FossilOrigin-Name: 972a33db0b0e924b78d5309d222d8ea298bd59c72da14ea2d14e8e2caaad1e0a --- manifest | 24 ++++--- manifest.uuid | 2 +- src/build.c | 9 ++- src/resolve.c | 1 + src/select.c | 153 +++++++++++++++++++++++++++++++++++++++++++ src/sqliteInt.h | 1 + test/existsexpr.test | 116 ++++++++++++++++++++++++++++++++ 7 files changed, 292 insertions(+), 14 deletions(-) create mode 100644 test/existsexpr.test diff --git a/manifest b/manifest index f28f11b60c..6f4efd9bdc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Allow\sthe\squery\splanner\saccess\sto\sthe\sargument\sof\sLIMIT\seven\sif\sthat\nargument\sis\sa\sbound\sparameter. -D 2024-06-06T23:56:36.923 +C Experimental\soptimization\sto\srewrite\sa\sSELECT\swith\san\sEXISTS(...)\sexpression\sin\sthe\sWHERE\sclause\sas\sa\sjoin. +D 2024-06-07T21:00:42.970 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -697,7 +697,7 @@ F src/btmutex.c 79a43670447eacc651519a429f6ece9fd638563cf95b469d6891185ddae2b522 F src/btree.c 8b42fc7d9efdb2df05c30e8f91ff6cfbd979724ae24bf90269028468b7a13333 F src/btree.h 55066f513eb095db935169dab1dc2f7c7a747ef223c533f5d4ad4dfed346cbd0 F src/btreeInt.h 98aadb6dcb77b012cab2574d6a728fad56b337fc946839b9898c4b4c969e30b6 -F src/build.c 237ccc0290d131d646be722f418e92ee0a38043aee25e7dfdc75f8ce5b3abe4e +F src/build.c 9e6a971156db6285f726fb03ddd9d47cb0a3648198b611f462021ac96fa24135 F src/callback.c db3a45e376deff6a16c0058163fe0ae2b73a2945f3f408ca32cf74960b28d490 F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e F src/ctime.c 64e4b1227b4ed123146f0aa2989131d1fbd9b927b11e80c9d58c6a68f9cd5ce3 @@ -753,14 +753,14 @@ F src/pragma.h e690a356c18e98414d2e870ea791c1be1545a714ba623719deb63f7f226d8bb7 F src/prepare.c d99931f45416652895e502328ca49fe782cfc4e1ebdcda13b3736d991ebf42ce F src/printf.c 8b250972305e14b365561be5117ed0fd364e4fd58968776df1ce64c6280b90f9 F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c -F src/resolve.c 7e8d23ce7cdbfedf351a47e759f2722e8182ca10fd7580be43f4ce1f1a228145 +F src/resolve.c 9c7786f032dea81487e7d94cb17849936f0e9b8891bfc91a6ac24ab193762804 F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 -F src/select.c ea0b75fce45e1f2c22f50ed2b6e2ddd7f66640948d0fc79a397917b4236a74af +F src/select.c 669cfc0392c8a0bd43e5a199cba5f796aead3423e2c529d09148adfa57ae1152 F src/shell.c.in 77d12a0dab8724819e64a14d5fbaad91a934be2b22ad329708fba9ba78993f04 F src/sqlite.h.in cbd3e4177791a61c056fd81e37a5b21bb6c8cb2ea8cac558c625974673f50acf F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54 -F src/sqliteInt.h 0fec1cf17d9e4c93baf29bf74b7d03de9425299443d4abd6989a004d6eb53b60 +F src/sqliteInt.h b81970d56dcb4ed5fc53841ef8b75d42bf511cb9533584546113bc131de832eb F src/sqliteLimit.h 6878ab64bdeb8c24a1d762d45635e34b96da21132179023338c93f820eee6728 F src/status.c cb11f8589a6912af2da3bb1ec509a94dd8ef27df4d4c1a97e0bcf2309ece972b F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 @@ -1114,6 +1114,7 @@ F test/exclusive.test 7ff63be7503990921838d5c9f77f6e33e68e48ed1a9d48cd28745bf650 F test/exclusive2.test 984090e8e9d1b331d2e8111daf6e5d61dda0bef7 F test/exec.test e949714dc127eaa5ecc7d723efec1ec27118fdd7 F test/exists.test 79a75323c78f02bbe9c251ea502a092f9ef63dac +F test/existsexpr.test a86e8300a2e0cd26109ba5b0d552dbc598b9ad4b79dfd26cac255839c9d5cf6a F test/expr.test 5c06696478212e5a04e04b043f993373f6f8e5ce5a80f5548a84703b123b6caa F test/expr2.test c27327ae9c017a7ff6280123f67aff496f912da74d78c888926d68b46ec75fd8 F test/exprfault.test da33606d799718e2f8e34efd0e5858884a1ad87f608774c552a7f5517cc27181 @@ -2195,8 +2196,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P f66608bd356efe492d1003663c2e1ccd7cfbf2d40393d256f8720149904ad2d5 e94dfe9928750dd98145d4d9920b298f7b0868703b487f86e0db77a41d53ccf9 -R f3d6aba1cc68a2224dd6f7df935a2c23 -U drh -Z b92cbf17475d91065d48b9fdd285839c +P c4a9dda2809c6e0e3d928e11e5553ead82cd9df551bcd35b11a7d869ef80ab8e +R 7e4221204288c4c4593aaccf38fb6e40 +T *branch * exists-to-join +T *sym-exists-to-join * +T -sym-trunk * +U dan +Z 8ebacdbf5bd3ee682966a7aab4962dc0 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index a310670ce9..7a08d86957 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c4a9dda2809c6e0e3d928e11e5553ead82cd9df551bcd35b11a7d869ef80ab8e \ No newline at end of file +972a33db0b0e924b78d5309d222d8ea298bd59c72da14ea2d14e8e2caaad1e0a \ No newline at end of file diff --git a/src/build.c b/src/build.c index 9747810e82..e6de79e775 100644 --- a/src/build.c +++ b/src/build.c @@ -5049,14 +5049,17 @@ void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pIndexedBy){ ** are deleted by this function. */ SrcList *sqlite3SrcListAppendList(Parse *pParse, SrcList *p1, SrcList *p2){ - assert( p1 && p1->nSrc==1 ); + assert( p1 ); if( p2 ){ - SrcList *pNew = sqlite3SrcListEnlarge(pParse, p1, p2->nSrc, 1); + int nOld = p1->nSrc; + SrcList *pNew = sqlite3SrcListEnlarge(pParse, p1, p2->nSrc, nOld); if( pNew==0 ){ sqlite3SrcListDelete(pParse->db, p2); }else{ p1 = pNew; - memcpy(&p1->a[1], p2->a, p2->nSrc*sizeof(SrcItem)); + memcpy(&p1->a[nOld], p2->a, p2->nSrc*sizeof(SrcItem)); + assert( nOld==1 || (p2->nSrc==1 && (p2->a[0].fg.jointype&JT_LTORJ)==0) ); + assert( p1->nSrc>=2 ); sqlite3DbFree(pParse->db, p2); p1->a[0].fg.jointype |= (JT_LTORJ & p1->a[1].fg.jointype); } diff --git a/src/resolve.c b/src/resolve.c index d5c1515a74..9cb3662625 100644 --- a/src/resolve.c +++ b/src/resolve.c @@ -1367,6 +1367,7 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ if( nRef!=pNC->nRef ){ ExprSetProperty(pExpr, EP_VarSelect); pExpr->x.pSelect->selFlags |= SF_Correlated; + if( pExpr->op==TK_EXISTS ) pParse->bHasExists = 1; } pNC->ncFlags |= NC_Subquery; } diff --git a/src/select.c b/src/select.c index 9a0f2e40f5..332fba3b85 100644 --- a/src/select.c +++ b/src/select.c @@ -7283,6 +7283,152 @@ static int fromClauseTermCanBeCoroutine( return 1; } +/* +** sqlite3WalkExpr() callback used by exprReferencesTable(). +*/ +static int exprReferencesTableExprCb(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_COLUMN && pExpr->iTable==pWalker->u.iCur ){ + pWalker->eCode = 1; + } + return WRC_Continue; +} + +/* +** Return true if the expression passed as the first argument refers +** to cursor number iCur. Otherwise return false. +*/ +static int exprReferencesTable(Expr *pExpr, int iCur){ + Walker w; + memset(&w, 0, sizeof(w)); + w.u.iCur = iCur; + w.xExprCallback = exprReferencesTableExprCb; + w.xSelectCallback = sqlite3SelectWalkNoop; + sqlite3WalkExpr(&w, pExpr); + return w.eCode; +} + +/* +** Index pIdx is a UNIQUE index on the table accessed by cursor number +** iCsr. This function returns a mask of the index columns that are +** constrained to match a single, non-NULL value by the WHERE clause +** passed as the 4th argument. For example, if the index is: +** +** CREATE UNIQUE INDEX idx ON tbl(a, b, c); +** +** and pWhere: +** +** WHERE a=? AND c=? +** +** then this function returns 5. +*/ +static u64 findConstIdxTerms( + Parse *pParse, + int iCsr, + Index *pIdx, + Expr *pWhere +){ + u64 m = 0; + if( pWhere->op==TK_AND ){ + m = findConstIdxTerms(pParse, iCsr, pIdx, pWhere->pLeft); + m |= findConstIdxTerms(pParse, iCsr, pIdx, pWhere->pRight); + }else if( pWhere->op==TK_EQ ){ + Expr *pLeft = pWhere->pLeft; + Expr *pRight = pWhere->pRight; + if( pRight->op==TK_COLUMN && pRight->iTable==iCsr ){ + SWAP(Expr*, pLeft, pRight); + } + if( pLeft->op==TK_COLUMN + && pLeft->iTable==iCsr + && exprReferencesTable(pRight, iCsr)==0 + ){ + if( pIdx ){ + int ii; + for(ii=0; iinKeyCol; ii++){ + assert( pIdx->azColl[ii] ); + if( pLeft->iColumn==pIdx->aiColumn[ii] ){ + CollSeq *pColl = sqlite3ExprCompareCollSeq(pParse, pWhere); + if( sqlite3StrICmp(pColl->zName, pIdx->azColl[ii])==0 ){ + m |= ((u64)1 << ii); + break; + } + } + } + }else{ + if( pLeft->iColumn<0 ) m = 1; + } + } + } + return m; +} + +/* +** Argument pWhere is the WHERE clause belonging to SELECT statement p. This +** function attempts to transform expressions of the form: +** +** EXISTS (SELECT ...) +** +** into joins. For example, given +** +** CREATE TABLE sailors(sid INTEGER PRIMARY KEY, name TEXT); +** CREATE TABLE reserves(sid INT, day DATE, PRIMARY KEY(sid, day)); +** +** SELECT name FROM sailors AS S WHERE EXISTS ( +** SELECT * FROM reserves AS R WHERE S.sid = R.sid AND R.day = '2022-10-25' +** ); +** +** the SELECT statement may be transformed as follows: +** +** SELECT name FROM sailors AS S, reserves AS R +** WHERE S.sid = R.sid AND R.day = '2022-10-25'; +*/ +static void existsToJoin(Parse *pParse, Select *p, Expr *pWhere){ + if( pWhere && p->pSrc->nSrc>0 ){ + if( pWhere->op==TK_AND ){ + existsToJoin(pParse, p, pWhere->pLeft); + existsToJoin(pParse, p, pWhere->pRight); + } + else if( pWhere->op==TK_EXISTS && (pWhere->flags & EP_xIsSelect) ){ + Select *pSub = pWhere->x.pSelect; + if( pSub->pSrc->nSrc==1 + && (pSub->selFlags & (SF_Aggregate|SF_Correlated))==SF_Correlated + && pSub->pWhere + ){ + int bTransform = 0; /* True if EXISTS can be made into join */ + Table *pTab = pSub->pSrc->a[0].pTab; + int iCsr = pSub->pSrc->a[0].iCursor; + Index *pIdx; + if( HasRowid(pTab) && findConstIdxTerms(pParse, iCsr, 0,pSub->pWhere) ){ + bTransform = 1; + } + for(pIdx=pTab->pIndex; pIdx && bTransform==0; pIdx=pIdx->pNext){ + if( pIdx->onError && pIdx->nKeyCol<64 ){ + u64 c = findConstIdxTerms(pParse, iCsr, pIdx, pSub->pWhere); + if( c==(1 << pIdx->nKeyCol)-1 ){ + bTransform = 1; + } + } + } + if( bTransform ){ + p->pSrc = sqlite3SrcListAppendList(pParse, p->pSrc, pSub->pSrc); + pSub->pSrc = 0; + if( p->pWhere ){ + p->pWhere = sqlite3PExpr(pParse, TK_AND, p->pWhere, pSub->pWhere); + }else{ + p->pWhere = pSub->pWhere; + } + pSub->pWhere = 0; + + sqlite3SelectDelete(pParse->db, pSub); + memset(pWhere, 0, sizeof(*pWhere)); + pWhere->op = TK_INTEGER; + pWhere->u.iValue = 1; + ExprSetProperty(pWhere, EP_IntValue); + } + } + } + } +} + /* ** Generate code for the SELECT statement given in the p argument. ** @@ -7609,6 +7755,13 @@ int sqlite3Select( } #endif + /* If there may be an "EXISTS (SELECT ...)" in the WHERE clause, attempt + ** to change it into a join. */ + if( pParse->bHasExists ){ + existsToJoin(pParse, p, p->pWhere); + pTabList = p->pSrc; + } + /* Do the WHERE-clause constant propagation optimization if this is ** a join. No need to speed time on this operation for non-join queries ** as the equivalent optimization will be handled by query planner in diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 8866b69ea5..8d104bc253 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -3827,6 +3827,7 @@ struct Parse { u8 prepFlags; /* SQLITE_PREPARE_* flags */ u8 withinRJSubrtn; /* Nesting level for RIGHT JOIN body subroutines */ u8 bHasWith; /* True if statement contains WITH */ + u8 bHasExists; /* Has a correlated "EXISTS (SELECT ....)" expression */ #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) u8 earlyCleanup; /* OOM inside sqlite3ParserAddCleanup() */ #endif diff --git a/test/existsexpr.test b/test/existsexpr.test new file mode 100644 index 0000000000..4bd55db52d --- /dev/null +++ b/test/existsexpr.test @@ -0,0 +1,116 @@ +# 2024 May 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set testprefix existsexpr + + +do_execsql_test 1.0 { + CREATE TABLE x1(a, b); + INSERT INTO x1 VALUES(1, 2), (3, 4), (5, 6); + CREATE UNIQUE INDEX x1a ON x1(a); + CREATE INDEX x1b ON x1(b); + + CREATE TABLE x2(x, y); + INSERT INTO x2 VALUES(1, 2), (3, 4), (5, 6); +} + +do_execsql_test 1.1 { + SELECT 1 WHERE EXISTS (SELECT 1 FROM x1 WHERE a=5) +} {1} + +do_execsql_test 1.2 { + SELECT * FROM x2 WHERE EXISTS (SELECT 1 FROM x1 WHERE a=x) +} {1 2 3 4 5 6} + +# With "a=x", the UNIQUE index means the EXIST can be transformed to a join. +# So no "SUBQUERY". With "b=x", the index is not UNIQUE and so there is a +# "SUBQUERY". +do_execsql_test 1.3.1 { + EXPLAIN QUERY PLAN + SELECT * FROM x2 WHERE EXISTS (SELECT 1 FROM x1 WHERE a=x) +} {~/SUBQUERY/} +do_execsql_test 1.3.2 { + EXPLAIN QUERY PLAN + SELECT * FROM x2 WHERE EXISTS (SELECT 1 FROM x1 WHERE b=x) +} {/SUBQUERY/} + +do_execsql_test 1.4.1 { + EXPLAIN QUERY PLAN + SELECT * FROM x2 WHERE x=1 AND EXISTS (SELECT 1 FROM x1 WHERE a=x) +} {~/SUBQUERY/} +do_execsql_test 1.4.2 { + EXPLAIN QUERY PLAN + SELECT * FROM x2 WHERE EXISTS (SELECT 1 FROM x1 WHERE a=x) AND y=2 +} {~/SUBQUERY/} + +do_execsql_test 1.5 { + SELECT count(*) FROM x2 WHERE EXISTS (SELECT 1 FROM x1 WHERE a=x) +} {3} + +#------------------------------------------------------------------------- +do_execsql_test 2.0 { + CREATE TABLE t1(a, b); + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1000 + ) INSERT INTO t1 SELECT i, i FROM s; + + CREATE TABLE t2(c, d); + WITH s(i) AS ( + SELECT 10 UNION ALL SELECT i+10 FROM s WHERE i<1000 + ) INSERT INTO t2 SELECT i, i FROM s; +} + +do_execsql_test 2.1 { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; +} {1000 100} + +do_execsql_test 2.2 { + SELECT count(*) FROM t1, t2 WHERE a=c; +} {100} + +do_execsql_test 2.3 { + SELECT count(*) FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE c=a) +} {100} +do_eqp_test 2.4 { + SELECT count(*) FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE c=a) +} {SCAN t1} + +do_execsql_test 2.5 { + CREATE UNIQUE INDEX t2c ON t2(c); + CREATE UNIQUE INDEX t1a ON t1(a); +} + +do_eqp_test 2.4.1 { + SELECT count(*) FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE c=a); +} {SCAN t1*SEARCH t2} +do_execsql_test 2.4.2 { + ANALYZE; +} +do_eqp_test 2.4.3 { + SELECT count(*) FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE c=a); +} {SCAN t2*SEARCH t1} +do_execsql_test 2.4.4 { + SELECT count(*) FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE c=a); +} {100} + +do_execsql_test 2.5.1 { + EXPLAIN QUERY PLAN + SELECT count(*) FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE t2.rowid=a); +} {~/SUBQUERY/} + +finish_test + + -- 2.47.2