From: drh <> Date: Mon, 26 Dec 2022 16:03:34 +0000 (+0000) Subject: Fix corner cases in UTF8 handling in the REGEXP extension. X-Git-Tag: version-3.40.1~12 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2de62873d8a22e1cf1e08d6f439f7f6d2927596c;p=thirdparty%2Fsqlite.git Fix corner cases in UTF8 handling in the REGEXP extension. [forum:/forumpost/3ffe058b04|Forum post 3ffe058b04]. FossilOrigin-Name: 93e68b398329dcdd25758065c5f45b0e8c43368ab7a034201a244c7b3ac5b3dd --- diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c index d0c8ee5cfe..086ef564cb 100644 --- a/ext/misc/regexp.c +++ b/ext/misc/regexp.c @@ -185,7 +185,7 @@ static unsigned re_next_char(ReInput *p){ c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); p->i += 2; if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; - }else if( (c&0xf8)==0xf0 && p->i+3mx && (p->z[p->i]&0xc0)==0x80 + }else if( (c&0xf8)==0xf0 && p->i+2mx && (p->z[p->i]&0xc0)==0x80 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) | (p->z[p->i+2]&0x3f); @@ -712,15 +712,15 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ ** one or more matching characters, enter those matching characters into ** zInit[]. The re_match() routine can then search ahead in the input ** string looking for the initial match without having to run the whole - ** regex engine over the string. Do not worry able trying to match + ** regex engine over the string. Do not worry about trying to match ** unicode characters beyond plane 0 - those are very rare and this is ** just an optimization. */ if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){ for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ unsigned x = pRe->aArg[i]; - if( x<=127 ){ + if( x<=0x7f ){ pRe->zInit[j++] = (unsigned char)x; - }else if( x<=0xfff ){ + }else if( x<=0x7ff ){ pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6)); pRe->zInit[j++] = 0x80 | (x&0x3f); }else if( x<=0xffff ){ diff --git a/manifest b/manifest index d2c75e3309..3c8dd3fcaf 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\soff-by-one\serror\sin\scode\sgeneration\sthat\scomes\sup\swhen\ndoing\sa\sDISTINCT\squery\sagainst\sa\svirtual\stable\swith\san\sOR\sterm\sin\sthe\nWHERE\sclause\sand\swhere\sthe\sORDER\sBY\sclause\shas\s64\sor\smore\sreferences\sto\nthe\sresult\sset.\s\s[forum:/forumpost/dfe8084751|Forum\spost\sdfe8084751].\nThe\serror\sis\sharmless\sin\sproduction\sbuilds,\sbut\smight\scause\san\sassertion\nfault\sin\sa\sdebugging\sbuild. -D 2022-12-26T15:55:10.218 +C Fix\scorner\scases\sin\sUTF8\shandling\sin\sthe\sREGEXP\sextension.\n[forum:/forumpost/3ffe058b04|Forum\spost\s3ffe058b04]. +D 2022-12-26T16:03:34.506 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -316,7 +316,7 @@ F ext/misc/normalize.c bd84355c118e297522aba74de34a4fd286fc775524e0499b14473918d F ext/misc/percentile.c b9086e223d583bdaf8cb73c98a6539d501a2fc4282654adbfea576453d82e691 F ext/misc/prefixes.c 0f4f8cff5aebc00a7e3ac4021fd59cfe1a8e17c800ceaf592859ecb9cbc38196 F ext/misc/qpvtab.c 09738419e25f603a35c0ac8bd0a04daab794f48d08a9bc07a6085b9057b99009 -F ext/misc/regexp.c 5abed0ace2d9340b42b9ab1dbe64db9c276e4e8eba38a903232b6253e05ccdaf +F ext/misc/regexp.c 064838f7b31e90d312cce089cfafbb992034e75d359009c48886ca06c0a794b2 F ext/misc/remember.c add730f0f7e7436cd15ea3fd6a90fd83c3f706ab44169f7f048438b7d6baa69c F ext/misc/rot13.c 51ac5f51e9d5fd811db58a9c23c628ad5f333c173f1fc53c8491a3603d38556c F ext/misc/scrub.c 2a44b0d44c69584c0580ad2553f6290a307a49df4668941d2812135bfb96a946 @@ -1411,7 +1411,7 @@ F test/randexpr1.test eda062a97e60f9c38ae8d806b03b0ddf23d796df F test/rbu.test 168573d353cd0fd10196b87b0caa322c144ef736 F test/rdonly.test 64e2696c322e3538df0b1ed624e21f9a23ed9ff8 F test/recover.test fd5199f928757cb308661b5fdca1abc19398a798ff7f24b57c3071e9f8e0471e -F test/regexp1.test 83c631617357150f8054ca1d1fed40a552b0d0f8eb7a7f090c3be02cee9f9913 +F test/regexp1.test 8f2a8bc1569666e29a4cee6c1a666cd224eb6d50e2470d1dc1df995170f3e0f1 F test/regexp2.test 55ed41da802b0e284ac7e2fe944be3948f93ff25abbca0361a609acfed1368b5 F test/reindex.test cd9d6021729910ece82267b4f5e1b5ac2911a7566c43b43c176a6a4732e2118d F test/releasetest_data.tcl 0db8aee0c348090fd06da47020ab4ed8ec692e0723427b2f3947d4dfb806f3b0 @@ -2055,9 +2055,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 70964b8a1cfe8e47bac399db840afda9c35e36d62f1933744f49011e94a2343e -Q +04af7ef77043702f93cbff23548610759786893bd3d4d6fc08181e1e249c6663 -R 48792d88b8481865424dd0453425ccb6 +P cbcd7d9be5e5fa6c55fa53a868ff07ee2c1725fc27c57a4bb390072e369b2be4 +Q +abb18f61c5cec0f524acc41453b4c06b61c5af51ff46417588837fc0c3967288 +R 5f18be7e6ce38d41b3bb5a844923b1e4 U drh -Z bf85d64cd1846c96c1ebb289943a6ada +Z da0736ec3d4438c4303b366a0507405c # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 7b913b5f96..b510257e3e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -cbcd7d9be5e5fa6c55fa53a868ff07ee2c1725fc27c57a4bb390072e369b2be4 \ No newline at end of file +93e68b398329dcdd25758065c5f45b0e8c43368ab7a034201a244c7b3ac5b3dd \ No newline at end of file diff --git a/test/regexp1.test b/test/regexp1.test index 102c1280c0..0401b13d72 100644 --- a/test/regexp1.test +++ b/test/regexp1.test @@ -303,6 +303,33 @@ do_execsql_test regexp1-6.7 {SELECT 'xabc' REGEXP '(^abc|def)';} {0} do_execsql_test regexp1-6.8 {SELECT 'def' REGEXP '(^abc|def)';} {1} do_execsql_test regexp1-6.9 {SELECT 'xdef' REGEXP '(^abc|def)';} {1} +# 2022-11-17 +# https://sqlite.org/forum/forumpost/3ffe058b04 +# +do_execsql_test regexp1-7.1 { + SELECT char(0x61,0x7ff,0x62) REGEXP char(0x7ff); +} 1 +do_execsql_test regexp1-7.2 { + SELECT char(0x61,0x800,0x62) REGEXP char(0x800); +} 1 +do_execsql_test regexp1-7.3 { + SELECT char(0x61,0xabc,0x62) REGEXP char(0xabc); +} 1 +do_execsql_test regexp1-7.4 { + SELECT char(0x61,0xfff,0x62) REGEXP char(0xfff); +} 1 +do_execsql_test regexp1-7.5 { + SELECT char(0x61,0x1000,0x62) REGEXP char(0x1000); +} 1 +do_execsql_test regexp1-7.10 { + SELECT char(0x61,0xffff,0x62) REGEXP char(0xffff); +} 1 +do_execsql_test regexp1-7.11 { + SELECT char(0x61,0x10000,0x62) REGEXP char(0x10000); +} 1 +do_execsql_test regexp1-7.12 { + SELECT char(0x61,0x10ffff,0x62) REGEXP char(0x10ffff); +} 1 finish_test