]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
In the Bloom filter optimization, hash all strings and blobs into the same
authordrh <>
Tue, 28 Feb 2023 14:28:54 +0000 (14:28 +0000)
committerdrh <>
Tue, 28 Feb 2023 14:28:54 +0000 (14:28 +0000)
value, because we do not know if two different strings might compare equal
even if they have different byte sequences, due to collating functions.
Formerly, the hash of a string or blob was just its length.  This could
all be improved.  Fix for the issue reported by
[forum:/forumpost/0846211821|forum post 0846211821].

FossilOrigin-Name: 090304b870419acb5b05205a07fc75830b556928149f76a843cda526f77a6fc0

manifest
manifest.uuid
src/vdbe.c
test/bloom1.test

index e118369f8fd985a59176f962cbe6f0fdf5ddd36e..3aaadcecc8fe6802ff812ddc6fae0079fe0114b4 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C When\san\sautomatic\sindex\screates\sa\sBloom\sfilter,\sshow\sthat\sin\sthe\nEXPLAIN\sQUERY\sPLAN\soutput.
-D 2023-02-28T13:46:01.925
+C In\sthe\sBloom\sfilter\soptimization,\shash\sall\sstrings\sand\sblobs\sinto\sthe\ssame\nvalue,\sbecause\swe\sdo\snot\sknow\sif\stwo\sdifferent\sstrings\smight\scompare\sequal\neven\sif\sthey\shave\sdifferent\sbyte\ssequences,\sdue\sto\scollating\sfunctions.\nFormerly,\sthe\shash\sof\sa\sstring\sor\sblob\swas\sjust\sits\slength.\s\sThis\scould\nall\sbe\simproved.\s\sFix\sfor\sthe\sissue\sreported\sby\n[forum:/forumpost/0846211821|forum\spost\s0846211821].
+D 2023-02-28T14:28:54.933
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -693,7 +693,7 @@ F src/upsert.c 5303dc6c518fa7d4b280ec65170f465c7a70b7ac2b22491598f6d0b4875b3145
 F src/utf.c ee39565f0843775cc2c81135751ddd93eceb91a673ea2c57f61c76f288b041a0
 F src/util.c 3ff7bc2b48dd425b1448304bb86273b05da1621f136d51dbb9789f8803559a1f
 F src/vacuum.c 84ce7f01f8a7a08748e107a441db83bcec13970190ddcb0c9ff522adbc1c23fd
-F src/vdbe.c 0bf4804f6e9cae872502aa4533a62ba3f6e97ee28523302deb27f9c990b5f44f
+F src/vdbe.c b3fd04b0643edd7e0a4356aff6d2cf50f04d0e182e292c3a330d1afffe3100e1
 F src/vdbe.h 73b904a6b3bb27f308c6cc287a5751ebc7f1f89456be0ed068a12b92844c6e8c
 F src/vdbeInt.h a4147a4ddf613cb1bcb555ace9e9e74a9c099d65facd88155f191b1fb4d74cfb
 F src/vdbeapi.c 40c47b1528d308a322203de21d2e0d711753257ed9771771b6129214b1d65932
@@ -816,7 +816,7 @@ F test/bind2.test 918bc35135f4141809ead7585909cde57d44db90a7a62aef540127148f91aa
 F test/bindxfer.test efecd12c580c14df5f4ad3b3e83c667744a4f7e0
 F test/bitvec.test 75894a880520164d73b1305c1c3f96882615e142
 F test/blob.test e7ac6c7d3a985cc4678c64f325292529a69ae252
-F test/bloom1.test 2785a190fcc2a5e170e5d38b08aca8ff0f3e3b4a74d47453d6ac1bd355180a6a
+F test/bloom1.test 589361c1f20158a8583863738c883f0e73e82d18422c9b4ed9c7068c13c2d310
 F test/boundary1.tcl 6421b2d920d8b09539503a8673339d32f7609eb1
 F test/boundary1.test 66d7f4706ccdb42d58eafdb081de07b0eb42d77b
 F test/boundary2.tcl e34ef4e930cf1083150d4d2c603e146bd3b76bcb
@@ -2048,8 +2048,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P b2b91c7cb40f1efe800059614e34823411016a3ece3f988e1574aecadd4c3114
-R ad671f91f6548ed5a58a53003a52602c
+P d7b2ac1c1a31fa4285cf6df0995db7e7705bb6a1bc94850c14c94cc4e3eb239a
+R 2983937b0d34422b7617ca6b30588a0e
 U drh
-Z 2bfb99217fbdf0c560b29d9c5fd7f671
+Z c3cca21d42699bd3e59fea902b259dbf
 # Remove this line to create a well-formed Fossil manifest.
index 4cb695987dea6af6220cd9228dd0dbd30f1a5e48..e66a052f86b08826b6b251ee3cea474d0ce36e9a 100644 (file)
@@ -1 +1 @@
-d7b2ac1c1a31fa4285cf6df0995db7e7705bb6a1bc94850c14c94cc4e3eb239a
\ No newline at end of file
+090304b870419acb5b05205a07fc75830b556928149f76a843cda526f77a6fc0
\ No newline at end of file
index f3853e7fd2f8ac02e55c814ab7660afa3a23b010..a9febbb6e2ff722feceb916a061f0002cbe53f51 100644 (file)
@@ -683,8 +683,7 @@ static u64 filterHash(const Mem *aMem, const Op *pOp){
     }else if( p->flags & MEM_Real ){
       h += sqlite3VdbeIntValue(p);
     }else if( p->flags & (MEM_Str|MEM_Blob) ){
-      h += p->n;
-      if( p->flags & MEM_Zero ) h += p->u.nZero;
+      /* no-op */
     }
   }
   return h;
index 1846e4d63f78ebd392b07c61d83d804794f1a53e..87a7c8632c43dce1c5d450cbe7efc932e9793e25 100644 (file)
@@ -99,6 +99,20 @@ do_eqp_test 2.1 {
   |     |--SCAN transit
   |     `--SEARCH objs USING COVERING INDEX objs_cspo (o=? AND p=?)
   `--SCAN transit
-} 
+}
+
+# 2023-02-28
+# https://sqlite.org/forum/forumpost/0846211821
+#
+# Bloom filter gives an incorrect result if the collating sequence is
+# anything other than binary.
+#
+reset_db
+do_execsql_test 3.1 {
+  CREATE TABLE t0(x TEXT COLLATE rtrim);
+  INSERT INTO t0(x) VALUES ('a'), ('b'), ('c');
+  CREATE VIEW v0(y) AS SELECT DISTINCT x FROM t0;
+  SELECT count(*) FROM t0, v0 WHERE x='b ';
+} 3
 
 finish_test