From: drh <> Date: Sun, 3 Jul 2022 14:32:18 +0000 (+0000) Subject: Fix the initial-prefix optimization for the REGEXP extension such that it X-Git-Tag: version-3.39.1~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f14c4c91e140c61f0f98a445f37c9326132ca0d6;p=thirdparty%2Fsqlite.git Fix the initial-prefix optimization for the REGEXP extension such that it works even if the prefix contains characters that require a 3-byte UTF8 encoding. This should fix the problem reported by [forum:/forumpost/96692f8ba5|forum post 96692f8ba5]. FossilOrigin-Name: 7a32ccccef7712a40aba86c2204457f6772403befabe0aab19407399df46f096 --- diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c index b626ca424a..52973cc73f 100644 --- a/ext/misc/regexp.c +++ b/ext/misc/regexp.c @@ -685,7 +685,7 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6)); pRe->zInit[j++] = 0x80 | (x&0x3f); }else if( x<=0xffff ){ - pRe->zInit[j++] = (unsigned char)(0xd0 | (x>>12)); + pRe->zInit[j++] = (unsigned char)(0xe0 | (x>>12)); pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); pRe->zInit[j++] = 0x80 | (x&0x3f); }else{ diff --git a/manifest b/manifest index 9401105526..0be838b7bf 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sdocumentation\sand\stest-script\stypos\sand\sa\sdependency\sproblem\son\sa\sMakefile.\sFix\sthe\ssqlite_stmt\sextension\svirtual\stable\sso\sthat\sit\sshows\sthe\sstate\sof\sall\nprepared\sstatements\sfor\sa\ssingle\sinstant\sin\stime. -D 2022-06-30T14:19:38.025 +C Fix\sthe\sinitial-prefix\soptimization\sfor\sthe\sREGEXP\sextension\ssuch\sthat\sit\nworks\seven\sif\sthe\sprefix\scontains\scharacters\sthat\srequire\sa\s3-byte\sUTF8\nencoding.\s\sThis\sshould\sfix\sthe\sproblem\sreported\sby\n[forum:/forumpost/96692f8ba5|forum\spost\s96692f8ba5]. +D 2022-07-03T14:32:18.936 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -334,7 +334,7 @@ F ext/misc/normalize.c bd84355c118e297522aba74de34a4fd286fc775524e0499b14473918d F ext/misc/percentile.c b9086e223d583bdaf8cb73c98a6539d501a2fc4282654adbfea576453d82e691 F ext/misc/prefixes.c 0f4f8cff5aebc00a7e3ac4021fd59cfe1a8e17c800ceaf592859ecb9cbc38196 F ext/misc/qpvtab.c 09738419e25f603a35c0ac8bd0a04daab794f48d08a9bc07a6085b9057b99009 -F ext/misc/regexp.c b267fd05ff8d38b22f4c2809d7b7a2c61d522e9faf2feb928dbb9662e4a3a386 +F ext/misc/regexp.c 03e483711534c437b2e29648d2a4b7730f5cb781a434ac8150907376bc4489f6 F ext/misc/remember.c add730f0f7e7436cd15ea3fd6a90fd83c3f706ab44169f7f048438b7d6baa69c F ext/misc/rot13.c 51ac5f51e9d5fd811db58a9c23c628ad5f333c173f1fc53c8491a3603d38556c F ext/misc/scrub.c 2a44b0d44c69584c0580ad2553f6290a307a49df4668941d2812135bfb96a946 @@ -1342,7 +1342,7 @@ F test/randexpr1.test eda062a97e60f9c38ae8d806b03b0ddf23d796df F test/rbu.test 168573d353cd0fd10196b87b0caa322c144ef736 F test/rdonly.test 64e2696c322e3538df0b1ed624e21f9a23ed9ff8 F test/recover.test ccb8c2623902a92ebb76770edd075cb4f75a4760bb7afde38026572c6e79070d -F test/regexp1.test 0c3ff80f66b0eff80e623eb5db7a3dad512095c573d78ac23009785f6d8f51ce +F test/regexp1.test 4a44e014664a109bbb1c37d29d9b61ca5aa5a7f49cc564c95208a80f818e3377 F test/regexp2.test 55ed41da802b0e284ac7e2fe944be3948f93ff25abbca0361a609acfed1368b5 F test/reindex.test cd9d6021729910ece82267b4f5e1b5ac2911a7566c43b43c176a6a4732e2118d F test/releasetest_data.tcl 11ba48a21ed1c808147b0e77c6e93d204577f4327ffe6d7c3b34cd3c01eac3a2 @@ -1978,15 +1978,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 14e166f40dbfa6e055543f8301525f2ca2e96a02a57269818b9e69e162e98918 -Q +13cb3f1e63ed1e906f820655645a4966f0cae140ac442177b6685637dcfd365a -Q +65930a5c069e7274b945ce1aed0abb0edba3d4ab4e63916cc38c11cdef998926 -Q +84a91c255e3d77728820561f16bdd9a87b7ff42b5430a9e13f404dfc3365c716 -Q +869061f18d2f2f500451c87ab62d3ca71a5321d5246b2e2c7bf960e48c6b5250 -R daa03d2b9a733363aa4f82ad99fc3d72 -T *branch * branch-3.39 -T *sym-branch-3.39 * -T -sym-trunk * +P 0a9e08be6d06e571ef9dc688317271de8054179a0458e196d3bc2dc6262efecc +Q +c94595a6e15490b432f099fefbe2429fa19287f7bdc86332cba0fd1e08f65bd6 +R f00d1bfbb463fa5a19a7d29d082da2c3 U drh -Z cdc378951e2612eeb48ac4d1a5d70c78 +Z c8c769f5766b47d3e0174aafc839859f # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 53386049ac..2f484f9abe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0a9e08be6d06e571ef9dc688317271de8054179a0458e196d3bc2dc6262efecc \ No newline at end of file +7a32ccccef7712a40aba86c2204457f6772403befabe0aab19407399df46f096 \ No newline at end of file diff --git a/test/regexp1.test b/test/regexp1.test index 1eb56c672c..569dd66c2a 100644 --- a/test/regexp1.test +++ b/test/regexp1.test @@ -239,4 +239,26 @@ do_execsql_test regexp1-2.22 { SELECT 'abc$¢€xyz' REGEXP '^abc[^\u0025-X][^ -\u007f][^\u20ab]xyz$' } {1} +# 2022-07-03 +# https://sqlite.org/forum/forumpost/96692f8ba5 +# The REGEXP extension mishandles the prefix search optimization when +# the prefix contains 3-byte UTF8 characters. +# +reset_db +load_static_extension db regexp +do_execsql_test regexp1-3.1 { + CREATE TABLE t1(id INTEGER PRIMARY KEY, a TEXT); + INSERT INTO t1(id, a) VALUES(1, '日本語'); + SELECT a, hex(a), length(a) FROM t1; +} {日本語 E697A5E69CACE8AA9E 3} +do_execsql_test regexp1-3.2 { + SELECT * FROM t1 WHERE a='日本語'; +} {1 日本語} +do_execsql_test regexp1-3.3 { + SELECT * FROM t1 WHERE a LIKE '日本語'; +} {1 日本語} +do_execsql_test regexp1-3.4 { + SELECT * FROM t1 wHERE a REGEXP '日本語'; +} {1 日本語} + finish_test