From 5baaf40af1898bdcfcdb5c30405b6da1342e2011 Mon Sep 17 00:00:00 2001 From: drh <> Date: Mon, 6 Dec 2021 13:07:28 +0000 Subject: [PATCH] Attempt to vary the size of Bloom filters based on an estimate of how many keys the filter will hold. FossilOrigin-Name: a7adcf69088cba4b86cc5731a45c9a5263af4355bc0a38f5225cab421c915f7f --- manifest | 18 +++++------ manifest.uuid | 2 +- src/malloc.c | 19 ++++++++++++ src/sqliteInt.h | 1 + src/vdbe.c | 81 +++++++++++++++++++++++++++++++++++++------------ src/where.c | 5 +-- 6 files changed, 95 insertions(+), 31 deletions(-) diff --git a/manifest b/manifest index 47992d2a2e..648c9a9161 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Run\sas\smany\sBloom\sfilters\sas\spossible\sbefore\sindex\slookups. -D 2021-12-05T20:19:47.744 +C Attempt\sto\svary\sthe\ssize\sof\sBloom\sfilters\sbased\son\san\sestimate\sof\show\smany\nkeys\sthe\sfilter\swill\shold. +D 2021-12-06T13:07:28.112 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -515,7 +515,7 @@ F src/insert.c e0293a6f686e18cb2c9dd0619a731518e0109d7e1f1db1932974659e7843cfd1 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa F src/loadext.c e1dcff1c916bf6834e150b492eddda5d9792453182d2ad64294d2266b6e93c4c F src/main.c 7bd4fdc41ef53535271a1816ff043ba153cda03842b444b6e2f57b27b2cb9090 -F src/malloc.c ef796bcc0e81d845d59a469f1cf235056caf9024172fd524e32136e65593647b +F src/malloc.c 183c2bf45cee1589254e4047e220f1ffbcc0a3bc8e4fe46fe64ba5db447a79af F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c c12a42539b1ba105e3707d0e628ad70e611040d8f5e38cf942cee30c867083de F src/mem2.c c8bfc9446fd0798bddd495eb5d9dbafa7d4b7287d8c22d50a83ac9daa26d8a75 @@ -555,7 +555,7 @@ F src/shell.c.in e7ee6517544d075d9f06ee2571567026b89cf9fbeef16a74918019b1cb42576 F src/sqlite.h.in 5cd209ac7dc4180f0e19292846f40440b8488015849ca0110c70b906b57d68f0 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 8ff2fd2c166150b2e48639f5e506fb44e29f1a3f65031710b9e89d1c126ac839 -F src/sqliteInt.h 178eb899c1edc08dcddf37e79dfaa39404a1f5d44a1d512509cd5d41867aa836 +F src/sqliteInt.h ab40ea9c294c656e0d6ab14e67d58f10b015a77e962dd075fdbe3ea3cc1a976b F src/sqliteLimit.h d7323ffea5208c6af2734574bae933ca8ed2ab728083caa117c9738581a31657 F src/status.c 4b8bc2a6905163a38b739854a35b826c737333fab5b1f8e03fa7eb9a4799c4c1 F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 @@ -622,7 +622,7 @@ F src/upsert.c 8789047a8f0a601ea42fa0256d1ba3190c13746b6ba940fe2d25643a7e991937 F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0 F src/util.c 30df8356e231dad33be10bb27897655002668343280004ba28c734489414a167 F src/vacuum.c 6c38ddc52f0619865c91dae9c441d4d48bf3040d7dc1bc5b22da1e45547ed0b3 -F src/vdbe.c 94af4eba93ad9ca7dd929cd19792ce2a5feb4797a7a64ec3cb3b2277e1467a8b +F src/vdbe.c 6176125ea038f593597b5897898328142b5253201d321369df74e187b2b1abaa F src/vdbe.h 25dabb25c7e157b84e59260cfb5b466c3ac103ede9f36f4db371332c47601abe F src/vdbeInt.h fd1103c7ecec8c84164038c8eacaa4a633cb3c10a2f725aae7bd865d4a4fcceb F src/vdbeapi.c 22c79072ae7d8a01e9bcae8ba16e918d60d202eaa9553b5fda38f99f7464d99a @@ -637,7 +637,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c ed0398a7adf02c31e34aada42cc86c58f413a7afe5f741a5d373ad087abde028 F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b -F src/where.c b07c5eefecffa1b69b91c366a83c69d01a83f1c900b9d9b1ffb6eb5ab59902a1 +F src/where.c 04ead529a272341a4cae3ef0dcd2f7675d433627acc5fb87fed1407e7b3d8614 F src/whereInt.h 5c6601d6d0b7b8936482506d2d835981cc6efcd8e106a829893a27a14cfb10b8 F src/wherecode.c fa667db48db1077b42731bfd97e9181b39409ffdc7051162ecae6895ca71ad2c F src/whereexpr.c 19394cb463003e9cc9305730b1508b8817a22bb7247170d81234b691a7f05b89 @@ -1933,7 +1933,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 5be2470f8755ef454f813c880e659bdbf82f2396be9320cf3079cd4ca8e81a19 -R da3cb867f9ab99abba060de93457c8d9 +P 06f6fefd67086896bc49272c6319545ff6c6792f18babe23aced27b60b032119 +R 5f90145148ed31800619d7b25d77beb7 U drh -Z d87a509afa829d1cf21b5a6dcadef441 +Z 3013773c2d9a03770ac02d1fdd22abd3 diff --git a/manifest.uuid b/manifest.uuid index 669a897f37..7894b82c00 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -06f6fefd67086896bc49272c6319545ff6c6792f18babe23aced27b60b032119 \ No newline at end of file +a7adcf69088cba4b86cc5731a45c9a5263af4355bc0a38f5225cab421c915f7f \ No newline at end of file diff --git a/src/malloc.c b/src/malloc.c index 932cecc210..ab9b37ddad 100644 --- a/src/malloc.c +++ b/src/malloc.c @@ -211,6 +211,25 @@ sqlite3_int64 sqlite3_memory_highwater(int resetFlag){ return mx; } +/* +** Return an estimate of the amount of unallocated memory. +** +** This the hard heap limit minus the current memory usage. It might +** not be possible to allocate this much memory all at once. This is +** only an estimate. +*/ +sqlite3_int64 sqlite3EstMemoryAvailable(void){ + sqlite3_int64 n; + sqlite3_mutex_enter(mem0.mutex); + n = mem0.alarmThreshold; + if( n<=0 ) n = mem0.hardLimit; + sqlite3_mutex_leave(mem0.mutex); + if( n<=0 ) n = LARGEST_INT64; + n -= sqlite3_memory_used(); + if( n<0 ) n = 0; + return n; +} + /* ** Trigger the alarm */ diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 896b2aa422..9091f02115 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -4293,6 +4293,7 @@ void sqlite3MemSetDefault(void); void sqlite3BenignMallocHooks(void (*)(void), void (*)(void)); #endif int sqlite3HeapNearlyFull(void); +sqlite3_int64 sqlite3EstMemoryAvailable(void); /* ** On systems with ample stack space and that support alloca(), make diff --git a/src/vdbe.c b/src/vdbe.c index acbbee892f..487d731af3 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -672,17 +672,29 @@ static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){ } /* -** Default size of a bloom filter, in bytes +** The minimum size (in bytes) for a Bloom filter. +** +** No Bloom filter will be smaller than this many bytes. But they +** may be larger. +*/ +#ifndef SQLITE_BLOOM_MIN +# define SQLITE_BLOOM_MIN 10000 +#endif + +/* +** The maximum size (in bytes) for a Bloom filter. */ -#define SQLITE_BLOOM_SZ 10000 +#ifndef SQLITE_BLOOM_MAX +# define SQLITE_BLOOM_MAX 1000000 +#endif /* ** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning ** with pOp->p3. Return the hash. */ -static unsigned int filterHash(const Mem *aMem, const Op *pOp){ +static u64 filterHash(const Mem *aMem, const Op *pOp){ int i, mx; - u32 h = 0; + u64 h = 0; i = pOp->p3; assert( pOp->p4type==P4_INT32 ); @@ -690,15 +702,15 @@ static unsigned int filterHash(const Mem *aMem, const Op *pOp){ for(i=pOp->p3, mx=i+pOp->p4.i; iflags & (MEM_Int|MEM_IntReal) ){ - h += (u32)(p->u.i&0xffffffff); + h += p->u.i; }else if( p->flags & MEM_Real ){ - h += (u32)(sqlite3VdbeIntValue(p)&0xffffffff); + h += sqlite3VdbeIntValue(p); }else if( p->flags & (MEM_Str|MEM_Blob) ){ h += p->n; if( p->flags & MEM_Zero ) h += p->u.nZero; } } - return h % (SQLITE_BLOOM_SZ*8); + return h; } /* @@ -8157,15 +8169,44 @@ case OP_Function: { /* group */ break; } -/* Opcode: FilterInit P1 * * * * -** Synopsis: filter(P1) = empty +/* Opcode: FilterInit P1 P2 * * * ** ** Initialize register P1 so that is an empty bloom filter. +** +** If P2 is positive, it is a register that holds an estimate on +** the number of entries to be added to the Bloom filter. The +** Bloom filter is sized accordingly. If P2 is zero or negative, +** then a default-size Bloom filter is created. +** +** It is ok for P1 and P2 to be the same register. In that case the +** integer value originally in that register will be overwritten +** with the new empty bloom filter. */ case OP_FilterInit: { + i64 n, mx; assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); pIn1 = &aMem[pOp->p1]; - sqlite3VdbeMemSetZeroBlob(pIn1, SQLITE_BLOOM_SZ); + if( pOp->p2>0 ){ + assert( pOp->p2<=(p->nMem+1 - p->nCursor) ); + n = sqlite3VdbeIntValue(&aMem[pOp->p2]); + if( nSQLITE_BLOOM_MAX ){ + n = SQLITE_BLOOM_MAX; + } + }else{ + n = SQLITE_BLOOM_MIN; + } + mx = sqlite3EstMemoryAvailable()/2; + if( n>mx && mx>SQLITE_BLOOM_MIN ){ + n = mx; + } +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("Bloom-filter size: %llu bytes\n", n); + } +#endif + sqlite3VdbeMemSetZeroBlob(pIn1, n); if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem; break; } @@ -8177,12 +8218,12 @@ case OP_FilterInit: { ** add that hash to the bloom filter contained in r[P1]. */ case OP_FilterAdd: { - u32 h; + u64 h; assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); pIn1 = &aMem[pOp->p1]; assert( pIn1->flags & MEM_Blob ); - assert( pIn1->n==SQLITE_BLOOM_SZ ); + assert( pIn1->n>0 ); h = filterHash(aMem, pOp); #ifdef SQLITE_DEBUG if( db->flags&SQLITE_VdbeTrace ){ @@ -8190,10 +8231,10 @@ case OP_FilterAdd: { for(ii=pOp->p3; iip3+pOp->p4.i; ii++){ registerTrace(ii, &aMem[ii]); } - printf("hash = %u\n", h); + printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n)); } #endif - assert( h>=0 && hn; pIn1->z[h/8] |= 1<<(h&7); break; } @@ -8213,12 +8254,14 @@ case OP_FilterAdd: { ** false positive - if the jump is taken when it should fall through. */ case OP_Filter: { /* jump */ - u32 h; + u64 h; assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); pIn1 = &aMem[pOp->p1]; - assert( pIn1->flags & MEM_Blob ); - assert( pIn1->n==SQLITE_BLOOM_SZ ); + if( (pIn1->flags & MEM_Blob)==0 || NEVER(pIn1->n<=0) ){ + VdbeBranchTaken(0, 2); + break; + } h = filterHash(aMem, pOp); #ifdef SQLITE_DEBUG if( db->flags&SQLITE_VdbeTrace ){ @@ -8226,10 +8269,10 @@ case OP_Filter: { /* jump */ for(ii=pOp->p3; iip3+pOp->p4.i; ii++){ registerTrace(ii, &aMem[ii]); } - printf("hash = %u\n", h); + printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n)); } #endif - assert( h>=0 && hn; if( (pIn1->z[h/8] & (1<<(h&7)))==0 ){ VdbeBranchTaken(1, 2); goto jump_to_p2; diff --git a/src/where.c b/src/where.c index 1d88c3ed30..233e77b6a7 100644 --- a/src/where.c +++ b/src/where.c @@ -1010,8 +1010,9 @@ static SQLITE_NOINLINE void constructBloomFilter( addrCont = sqlite3VdbeMakeLabel(pParse); iCur = pLevel->iTabCur; pLevel->regFilter = ++pParse->nMem; - sqlite3VdbeAddOp1(v, OP_FilterInit, pLevel->regFilter); addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_Count, iCur, pLevel->regFilter, 1); + sqlite3VdbeAddOp2(v, OP_FilterInit, pLevel->regFilter, pLevel->regFilter); pWCEnd = &pWInfo->sWC.a[pWInfo->sWC.nTerm]; for(pTerm=pWInfo->sWC.a; pTermpExpr; @@ -1039,7 +1040,7 @@ static SQLITE_NOINLINE void constructBloomFilter( sqlite3ReleaseTempRange(pParse, r1, n); } sqlite3VdbeResolveLabel(v, addrCont); - sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); + sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+3); VdbeCoverage(v); sqlite3VdbeJumpHere(v, addrTop); pLoop->wsFlags &= ~WHERE_BLOOMFILTER; -- 2.47.2