From 2db144c33b39985b159ec9210cc13bdb53c00e1c Mon Sep 17 00:00:00 2001 From: drh <> Date: Wed, 1 Dec 2021 16:31:02 +0000 Subject: [PATCH] Add a Bloom filter to the automatic-index mechanism. FossilOrigin-Name: 50ac4de1d7cbb586ea7969e1ae80ea8b021e194edc2fa7db19374b4ee9369bee --- manifest | 27 ++++++------ manifest.uuid | 2 +- src/sqliteInt.h | 1 + src/vdbe.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++ src/vdbeInt.h | 2 +- src/vdbemem.c | 4 +- src/where.c | 8 ++++ src/whereInt.h | 1 + src/wherecode.c | 8 ++++ 9 files changed, 143 insertions(+), 16 deletions(-) diff --git a/manifest b/manifest index 1799086100..8fae6a4e21 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C In\sthe\sautomatic\sindex\sgenerator\slogic,\sbe\smore\sprecise\sabout\swhen\sa\npartial\sautomatic\sindex\sis\sallowed\sin\sorder\sto\scapture\smore\scases\swhere\sit\nis\slegal\sto\suse\sa\spartial\sautomatic\sindex. -D 2021-11-30T14:07:58.372 +C Add\sa\sBloom\sfilter\sto\sthe\sautomatic-index\smechanism. +D 2021-12-01T16:31:02.486 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -555,7 +555,7 @@ F src/shell.c.in 975f268ef261773fcbed1e519dfa10c4f33e8b1cffc12120563e61857fff07c F src/sqlite.h.in 5cd209ac7dc4180f0e19292846f40440b8488015849ca0110c70b906b57d68f0 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 8ff2fd2c166150b2e48639f5e506fb44e29f1a3f65031710b9e89d1c126ac839 -F src/sqliteInt.h 193e716a67c877a6054a8c261c932bdc64f8c8be9b66388c74f21d94a259b7ce +F src/sqliteInt.h a44a3474b2247a82decc17644c68e00db5ba1bc239a6c005f6c08f6599083c01 F src/sqliteLimit.h d7323ffea5208c6af2734574bae933ca8ed2ab728083caa117c9738581a31657 F src/status.c 4b8bc2a6905163a38b739854a35b826c737333fab5b1f8e03fa7eb9a4799c4c1 F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 @@ -622,13 +622,13 @@ F src/upsert.c 8789047a8f0a601ea42fa0256d1ba3190c13746b6ba940fe2d25643a7e991937 F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0 F src/util.c 30df8356e231dad33be10bb27897655002668343280004ba28c734489414a167 F src/vacuum.c 6c38ddc52f0619865c91dae9c441d4d48bf3040d7dc1bc5b22da1e45547ed0b3 -F src/vdbe.c e98f1baf54a00db2c4669dbd04f8bbc89b5909a5b43e76fbbbf1a97007adba2b +F src/vdbe.c a77525ddf690771b13db4114dce5305657efe6b7393920f2cbaa2447e8b83129 F src/vdbe.h 25dabb25c7e157b84e59260cfb5b466c3ac103ede9f36f4db371332c47601abe -F src/vdbeInt.h 31fbabdc1ed61d9695337dfe5269ea94e1cf615c17f5cafeaa1bb01066820bab +F src/vdbeInt.h fd1103c7ecec8c84164038c8eacaa4a633cb3c10a2f725aae7bd865d4a4fcceb F src/vdbeapi.c 22c79072ae7d8a01e9bcae8ba16e918d60d202eaa9553b5fda38f99f7464d99a F src/vdbeaux.c 21db442d159fd745a7693d157b5f998260b6af4ca60de559fa3b7b68c7405af2 F src/vdbeblob.c 29c4118f7ee615cdee829e8401f6ead1b96b95d545b4de0042f6de39c962c652 -F src/vdbemem.c a3d91dc9bb9ef725db77e4e9de7e1acef43192c9f8406c307665d503e3c2837c +F src/vdbemem.c da4d594084d581be6436582bb44bb128feeb138a3e6c313eda6749ebdc3a65ec F src/vdbesort.c 513b481c8bab4a6578c92194a60cf3bc3b48736e4a53f8d2d7918121c5b594e7 F src/vdbetrace.c fe0bc29ebd4e02c8bc5c1945f1d2e6be5927ec12c06d89b03ef2a4def34bf823 F src/vdbevtab.c f99b275366c5fc5e2d99f734729880994ab9500bdafde7fae3b02d562b9d323c @@ -637,9 +637,9 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c ed0398a7adf02c31e34aada42cc86c58f413a7afe5f741a5d373ad087abde028 F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b -F src/where.c 4b276017881185d0e1dc984df66c835cde6faf03834ef9ff34f48653f9144dec -F src/whereInt.h 83877a75a1bce056ea44aff02f1dfa958ad1d6038c213ddadb8652003b45151d -F src/wherecode.c 1f5b62f46d284c8886945eb7438415bc27e23e87bb60b9ee468fa6bd31268f33 +F src/where.c 1b8a6c53c34c59c765190484d245c457bedc211325de4c75a2070432fd67e314 +F src/whereInt.h 23f990791dc428f0bbb31f8ef5026b78b6290272de86185f4822c85f4c774803 +F src/wherecode.c 620e81077588a727876a86f87aab310c02fc4e4960691e48153d8a87ca1fa453 F src/whereexpr.c 17bdbf4f5b490e70a18635498f0b910a558f953a9bf80af7f19cbde6e60e6825 F src/window.c 5d3b397b0c026d0ff5890244ac41359e524c01ae31e78782e1ff418c3e271a9e F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 @@ -1933,7 +1933,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 19c51b46e4095ee28badb10f4e08bbd330bda320c9a8806e93b8fc60ba211a2e -R 0fbfb4a13255ae0bad405a99d2bdf9ec +P 664b461bb5063d98047fc2e51a3827235cd9f55ca2e23cb66e719eac53fb5437 +R 157be6898a36d49d7a017b78b4cdbad6 +T *branch * bloom-filter +T *sym-bloom-filter * +T -sym-trunk * U drh -Z 511f59b4656ae7d0825d672340080bad +Z 5c7d4c8ea12d463e648cb44a868717dc diff --git a/manifest.uuid b/manifest.uuid index 8a15f64b8a..2cbf640244 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -664b461bb5063d98047fc2e51a3827235cd9f55ca2e23cb66e719eac53fb5437 \ No newline at end of file +50ac4de1d7cbb586ea7969e1ae80ea8b021e194edc2fa7db19374b4ee9369bee \ No newline at end of file diff --git a/src/sqliteInt.h b/src/sqliteInt.h index b45151104a..d404b7b4fc 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1761,6 +1761,7 @@ struct sqlite3 { #define SQLITE_SeekScan 0x00020000 /* The OP_SeekScan optimization */ #define SQLITE_OmitOrderBy 0x00040000 /* Omit pointless ORDER BY */ /* TH3 expects this value ^^^^^^^^^^ to be 0x40000. Coordinate any change */ +#define SQLITE_BloomFilter 0x00080000 /* Use a Bloom filter on searches */ #define SQLITE_AllOpts 0xffffffff /* All optimizations */ /* diff --git a/src/vdbe.c b/src/vdbe.c index 3476c02daa..e38527356c 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -671,6 +671,35 @@ static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){ } } +/* +** Default size of a bloom filter, in bytes +*/ +#define SQLITE_BLOOM_SZ 10000 + +/* +** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning +** with pOp->p3. Return the hash. +*/ +static unsigned int filterHash(const Mem *aMem, const Op *pOp){ + int i, mx; + u32 h = 0; + + i = pOp->p3; + assert( pOp->p4type==P4_INT32 ); + mx = i + pOp->p4.i; + for(i=pOp->p3, mx=i+pOp->p4.i; iflags & (MEM_Int|MEM_IntReal) ){ + h += (u32)(p->u.i&0xffffffff); + }else if( p->flags & MEM_Real ){ + h += (u32)(sqlite3VdbeIntValue(p)&0xffffffff); + }else if( p->flags & (MEM_Str|MEM_Blob) ){ + h += p->n; + } + } + return h % (SQLITE_BLOOM_SZ*8); +} + /* ** Return the symbolic name for the data type of a pMem */ @@ -8129,6 +8158,83 @@ case OP_Function: { /* group */ break; } +/* Opcode: FilterInit P1 * * * * +** Synopsis: filter(P1) = empty +** +** Initialize register P1 so that is an empty bloom filter. +*/ +case OP_FilterInit: { + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + sqlite3VdbeMemSetZeroBlob(pIn1, SQLITE_BLOOM_SZ); + if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem; + break; +} + +/* Opcode: FilterAdd P1 * P3 P4 * +** Synopsis: filter(P1) += key(P3@P4) +** +** Compute a hash on the P4 registers starting with r[P3] and +** add that hash to the bloom filter contained in r[P1]. +*/ +case OP_FilterAdd: { + u32 h; + + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags & MEM_Blob ); + assert( pIn1->n==SQLITE_BLOOM_SZ ); + h = filterHash(aMem, pOp); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + int ii; + for(ii=pOp->p3; iip3+pOp->p4.i; ii++){ + registerTrace(ii, &aMem[ii]); + } + printf("hash = %u\n", h); + } +#endif + assert( h>=0 && hz[h/8] |= 1<<(h&7); + break; +} + +/* Opcode: Filter P1 P2 P3 P4 * +** Synopsis: if key(P3@P4) not in filter(P1) goto P2 +** +** Compute a hash on the key contained in the P4 registers starting +** with r[P3]. Check to see if that hash is found in the +** bloom filter hosted by register P1. If it is not present then +** maybe jump to P2. Otherwise fall through. +** +** False negatives are harmless. It is always safe to fall through, +** even if the value is in the bloom filter. A false negative causes +** more CPU cycles to be used, but it should still yield the correct +** answer. However, an incorrect answer may well arise from a +** false positive - if the jump is taken when it should fall through. +*/ +case OP_Filter: { /* jump */ + u32 h; + + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags & MEM_Blob ); + assert( pIn1->n==SQLITE_BLOOM_SZ ); + h = filterHash(aMem, pOp); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + int ii; + for(ii=pOp->p3; iip3+pOp->p4.i; ii++){ + registerTrace(ii, &aMem[ii]); + } + printf("hash = %u\n", h); + } +#endif + assert( h>=0 && hz[h/8] & (1<<(h&7)))==0 ) goto jump_to_p2; + break; +} + /* Opcode: Trace P1 P2 * P4 * ** ** Write P4 on the statement trace output if statement tracing is diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 599d064165..c76cdbfdbc 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -538,7 +538,7 @@ int sqlite3VdbeMemSetRowSet(Mem*); int sqlite3VdbeMemMakeWriteable(Mem*); int sqlite3VdbeMemStringify(Mem*, u8, u8); int sqlite3IntFloatCompare(i64,double); -i64 sqlite3VdbeIntValue(Mem*); +i64 sqlite3VdbeIntValue(const Mem*); int sqlite3VdbeMemIntegerify(Mem*); double sqlite3VdbeRealValue(Mem*); int sqlite3VdbeBooleanValue(Mem*, int ifNull); diff --git a/src/vdbemem.c b/src/vdbemem.c index 570a2eb38c..5a9d15f465 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -596,12 +596,12 @@ static SQLITE_NOINLINE i64 doubleToInt64(double r){ ** ** If pMem represents a string value, its encoding might be changed. */ -static SQLITE_NOINLINE i64 memIntValue(Mem *pMem){ +static SQLITE_NOINLINE i64 memIntValue(const Mem *pMem){ i64 value = 0; sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc); return value; } -i64 sqlite3VdbeIntValue(Mem *pMem){ +i64 sqlite3VdbeIntValue(const Mem *pMem){ int flags; assert( pMem!=0 ); assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); diff --git a/src/where.c b/src/where.c index 54e4ea8196..eaa45b0198 100644 --- a/src/where.c +++ b/src/where.c @@ -904,6 +904,10 @@ static void constructAutomaticIndex( sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); sqlite3VdbeSetP4KeyInfo(pParse, pIdx); VdbeComment((v, "for %s", pTable->zName)); + if( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){ + pLevel->regFilter = ++pParse->nMem; + sqlite3VdbeAddOp1(v, OP_FilterInit, pLevel->regFilter); + } /* Fill the automatic index with content */ pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; @@ -926,6 +930,10 @@ static void constructAutomaticIndex( regBase = sqlite3GenerateIndexKey( pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0 ); + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0, + regBase, pLoop->u.btree.nEq); + } sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); diff --git a/src/whereInt.h b/src/whereInt.h index f651e790cc..5b076e40e2 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -64,6 +64,7 @@ struct WhereLevel { u32 iLikeRepCntr; /* LIKE range processing counter register (times 2) */ int addrLikeRep; /* LIKE range processing address */ #endif + int regFilter; /* Bloom filter */ u8 iFrom; /* Which entry in the FROM clause */ u8 op, p3, p5; /* Opcode, P3 & P5 of the opcode that ends the loop */ int p1, p2; /* Operands of the opcode used to end the loop */ diff --git a/src/wherecode.c b/src/wherecode.c index 460ac4fe30..637db432ea 100644 --- a/src/wherecode.c +++ b/src/wherecode.c @@ -1511,6 +1511,10 @@ Bitmask sqlite3WhereCodeOneLoopStart( iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); addrNxt = pLevel->addrNxt; + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt, + iRowidReg, 1); + } sqlite3VdbeAddOp3(v, OP_SeekRowid, iCur, addrNxt, iRowidReg); VdbeCoverage(v); pLevel->op = OP_Noop; @@ -1836,6 +1840,10 @@ Bitmask sqlite3WhereCodeOneLoopStart( sqlite3VdbeAddOp2(v, OP_Integer, 1, regBignull); VdbeComment((v, "NULL-scan pass ctr")); } + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt, + regBase, nConstraint); + } op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; assert( op!=0 ); -- 2.47.2