]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Use a Bloom filter to improve performance of IN operators when the RHS of
authordrh <>
Wed, 3 Jul 2024 17:51:48 +0000 (17:51 +0000)
committerdrh <>
Wed, 3 Jul 2024 17:51:48 +0000 (17:51 +0000)
the IN operator is a subquery.

FossilOrigin-Name: 1933496539c19cbf429a39d6b0b1c6b1b2af50733a3c4aea4920990ced652f6a

manifest
manifest.uuid
src/expr.c
src/select.c
src/sqliteInt.h

index d24ae82c7919498a8ac279096a9aaf547230faa1..054fab72aa0255daeabc3b05a0b575a4f67ed3fd 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\sassert()\sstatements\sto\sFTS5\sto\shush-up\swarnings\sfrom\sscan-build.
-D 2024-07-02T13:54:46.151
+C Use\sa\sBloom\sfilter\sto\simprove\sperformance\sof\sIN\soperators\swhen\sthe\sRHS\sof\nthe\sIN\soperator\sis\sa\ssubquery.
+D 2024-07-03T17:51:48.915
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -705,7 +705,7 @@ F src/date.c 13dd752847afb32ed70510ad7345a5b9c841f51ad904dba5d010f1fa3a6a324e
 F src/dbpage.c 80e46e1df623ec40486da7a5086cb723b0275a6e2a7b01d9f9b5da0f04ba2782
 F src/dbstat.c 3b677254d512fcafd4d0b341bf267b38b235ccfddbef24f9154e19360fa22e43
 F src/delete.c cb766727c78e715f9fb7ec8a7d03658ed2a3016343ca687acfcec9083cdca500
-F src/expr.c 2b72d352a2917fe8d2cd8f84e0acd0140c6ba769b0cd7bcfd86eb9ba0c9c50e8
+F src/expr.c d6f614d0c90cf8d72bbd54dbae96040e7feb495a77991fdc1b1c67db68eeb326
 F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007
 F src/fkey.c 852f93c0ef995e0c2b8983059a2b97151c194cc8259e21f5bc2b7ac508348c2a
 F src/func.c 1f61e32e7a357e615b5d2e774bee563761fce4f2fd97ecb0f72c33e62a2ada5f
@@ -755,12 +755,12 @@ F src/printf.c 8b250972305e14b365561be5117ed0fd364e4fd58968776df1ce64c6280b90f9
 F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c
 F src/resolve.c 7e8d23ce7cdbfedf351a47e759f2722e8182ca10fd7580be43f4ce1f1a228145
 F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97
-F src/select.c a1c8fadd45d0843b14793db2af49809a5327db5cca9d87d507b57aa748ee6ee2
+F src/select.c ca249242cf6c8762f505b30d93915ee61a21de30fd69fbfcf04a7ea8dda6b334
 F src/shell.c.in b7d435c137eb323981adff814f172dbaabb9ba504fef17cb11d4681c1633ee13
 F src/sqlite.h.in 6c884a87bbf8828562b49272025a1e66e3801a196a58b0bdec87edcd2c9c8fc1
 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
 F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54
-F src/sqliteInt.h 86cbef26e285786dfd6dc7b33cfed0210095eb3ce9b38047229b03eb795b2edd
+F src/sqliteInt.h 039b5309b49399340f260ad3d1e6b6fa13e04a7b587b9daa1e694ce0f13a04a3
 F src/sqliteLimit.h 6878ab64bdeb8c24a1d762d45635e34b96da21132179023338c93f820eee6728
 F src/status.c cb11f8589a6912af2da3bb1ec509a94dd8ef27df4d4c1a97e0bcf2309ece972b
 F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1
@@ -2195,8 +2195,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P fc248a4a0a232a95a79e24e57faedb5d824c3bf0aa62054b72339257dc9c18b3
-R f55a5b83388b86278f89f11080405f1e
+P 77a76654e64c05f2c67be120f579fc60924ff51a9d0fa2cf9eb158f40171b5f5
+R 4c7ee9fc9c8b7335da98fdebc409f26a
+T *branch * in-bloom
+T *sym-in-bloom *
+T -sym-trunk *
 U drh
-Z dda9ed678e9e469573614b965067e0d5
+Z e24cefbcb7cf0e7de9d7a748b98704bf
 # Remove this line to create a well-formed Fossil manifest.
index 632f9e6c5451c37f3777e11a27c497fbf776aa6c..bee2a32214912a86db5a5bb5a8c77848b77608b3 100644 (file)
@@ -1 +1 @@
-77a76654e64c05f2c67be120f579fc60924ff51a9d0fa2cf9eb158f40171b5f5
+1933496539c19cbf429a39d6b0b1c6b1b2af50733a3c4aea4920990ced652f6a
index 6d2b6d1339097e57556c6361e7bf70b4a4365b3d..6d67d77f68d68c24a61d1b3b05745525be48f904 100644 (file)
@@ -3536,19 +3536,34 @@ void sqlite3CodeRhsOfIN(
       SelectDest dest;
       int i;
       int rc;
+      int addrBloom = 0;
       sqlite3SelectDestInit(&dest, SRT_Set, iTab);
       dest.zAffSdst = exprINAffinity(pParse, pExpr);
       pSelect->iLimit = 0;
+      if( addrOnce && OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){
+        int regBloom = ++pParse->nMem;
+        addrBloom = sqlite3VdbeAddOp2(v, OP_Blob, 10000, regBloom);
+        VdbeComment((v, "Bloom filter"));
+        dest.iSDParm2 = regBloom;
+      }
       testcase( pSelect->selFlags & SF_Distinct );
       testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */
       pCopy = sqlite3SelectDup(pParse->db, pSelect, 0);
       rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest);
       sqlite3SelectDelete(pParse->db, pCopy);
       sqlite3DbFree(pParse->db, dest.zAffSdst);
+      if( addrBloom ){
+        sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2;
+        if( dest.iSDParm2==0 ){
+          sqlite3VdbeChangeToNoop(v, addrBloom);
+        }else{
+          sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2;
+        }
+      }
       if( rc ){
         sqlite3KeyInfoUnref(pKeyInfo);
         return;
-      }     
+      }
       assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */
       assert( pEList!=0 );
       assert( pEList->nExpr>0 );
@@ -3987,6 +4002,15 @@ static void sqlite3ExprCodeIN(
     sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector);
     if( destIfFalse==destIfNull ){
       /* Combine Step 3 and Step 5 into a single opcode */
+      if( ExprHasProperty(pExpr, EP_Subrtn) ){
+        const VdbeOp *pOp = sqlite3VdbeGetOp(v, pExpr->y.sub.iAddr);
+        assert( pOp->opcode==OP_Once || pParse->nErr );
+        if( pOp->opcode==OP_Once && pOp->p3>0 ){
+          assert( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) );
+          sqlite3VdbeAddOp4Int(v, OP_Filter, pOp->p3, destIfFalse,
+                               rLhs, nVector); VdbeCoverage(v);
+        }
+      }
       sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse,
                            rLhs, nVector); VdbeCoverage(v);
       goto sqlite3ExprCodeIN_finished;
index b43861cef2f5b4f237135b43986a65acf2d7014c..a1da3f51beae6b7e0ec89f6bda0126f9c6a45e4f 100644 (file)
@@ -1377,12 +1377,17 @@ static void selectInnerLoop(
         ** case the order does matter */
         pushOntoSorter(
             pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg);
+        pDest->iSDParm2 = 0; /* Signal that any Bloom filter is unpopulated */
       }else{
         int r1 = sqlite3GetTempReg(pParse);
         assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol );
         sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol,
             r1, pDest->zAffSdst, nResultCol);
         sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol);
+        if( pDest->iSDParm2 ){
+          sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0,
+                               regResult, nResultCol);
+        }
         sqlite3ReleaseTempReg(pParse, r1);
       }
       break;
@@ -3316,6 +3321,10 @@ static int generateOutputSubroutine(
           r1, pDest->zAffSdst, pIn->nSdst);
       sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1,
                            pIn->iSdst, pIn->nSdst);
+      if( pDest->iSDParm2>0 ){
+        sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0,
+                             pIn->iSdst, pIn->nSdst);
+      }
       sqlite3ReleaseTempReg(pParse, r1);
       break;
     }
index fd9faf6b43b859da6d255cd2f96dc96aad6795b2..df6774aefdeab5e05880ca2c849f9f851ec7d011 100644 (file)
@@ -3622,7 +3622,11 @@ struct Select {
 **     SRT_Set         The result must be a single column.  Store each
 **                     row of result as the key in table pDest->iSDParm.
 **                     Apply the affinity pDest->affSdst before storing
-**                     results.  Used to implement "IN (SELECT ...)".
+**                     results.  if pDest->iSDParm2 is positive, then it is
+**                     a regsiter holding a Bloom filter for the IN operator
+**                     that should be populated in addition to the 
+**                     pDest->iSDParm table.  This SRT is used to
+**                     implement "IN (SELECT ...)".
 **
 **     SRT_EphemTab    Create an temporary table pDest->iSDParm and store
 **                     the result there. The cursor is left open after