From: drh <> Date: Mon, 7 Oct 2024 12:24:51 +0000 (+0000) Subject: Fix handling of U+fffd in the LIKE optimization. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5b6803c539a92cf2dfb62598fd731d0690526f5b;p=thirdparty%2Fsqlite.git Fix handling of U+fffd in the LIKE optimization. FossilOrigin-Name: 242cb4bbee0707f470833d9f47efcfb5631f2302b9d48cffdbba63e64984827c --- diff --git a/manifest b/manifest index e34ec1c146..875a8d8591 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Ensure\sthat\sthe\sWhereInfo.revMask\sbitmap\sis\sadjusted\swhen\stables\sare\sremoved\nfrom\sthe\sFROM\sclause\sby\sthe\sOmit-Noop-Join\soptimization. -D 2024-09-05T23:44:49.612 +C Fix\shandling\sof\sU+fffd\sin\sthe\sLIKE\soptimization. +D 2024-10-07T12:24:51.913 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -842,7 +842,7 @@ F src/walker.c 7c7ea0115345851c3da4e04e2e239a29983b61fb5b038b94eede6aba462640e2 F src/where.c 74ef82f58056c71dc0d6dc6417b9861814b9a92fe87ff32ec2fde6d01f903996 F src/whereInt.h 82a13766f13d1a53b05387c2e60726289ef26404bc7b9b1f7770204d97357fb8 F src/wherecode.c f5255f49d1f42b6e7e6b0362ff3522fa88cbcaa7213e52f9374744027ecdebca -F src/whereexpr.c 1d350f8ddb7d8740423341d0289d899bf3b287faad8d19c815b6715d396bc919 +F src/whereexpr.c 29307f9f528e2f8785b4cae93cffe56eb763842ec17ae37fdd05f6964d891ed4 F src/window.c 5d95122dd330bfaebd732358c8ef067c5a9394a53ac249470d611d0ce2c52be2 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/affinity2.test ce1aafc86e110685b324e9a763eab4f2a73f737842ec3b687bd965867de90627 @@ -1374,7 +1374,7 @@ F test/kvtest.c 6e0228409ea7ca0497dad503fbd109badb5e59545d131014b6aaac68b56f484a F test/lastinsert.test 42e948fd6442f07d60acbd15d33fb86473e0ef63 F test/laststmtchanges.test ae613f53819206b3222771828d024154d51db200 F test/lemon-test01.y 70110eff607ab137ccc851edb2bc7e14a6d4f246b5d2d25f82a60b69d87a9ff2 -F test/like.test 242ee7f5d08a031144c0daf63bbd7e7710c847ccf387a83347e0b61b3aa69526 +F test/like.test b3ea2ba3558199aa8f25a42ddeb54772e234fab50868c9f066047acdbda8fc58 F test/like2.test d3be15fefee3e02fc88942a9b98f26c5339bbdef7783c90023c092c4955fe3d3 F test/like3.test a76e5938fadbe6d32807284c796bafd869974a961057bc5fc5a28e06de98745c F test/limit.test 350f5d03c29e7dff9a2cde016f84f8d368d40bcd02fa2b2a52fa10c4bf3cbfaf @@ -2192,9 +2192,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c49c2a8c0082622890c2de006afad4f03f2c4ab931dde846e303d0374590b522 -Q +22ca5a2ffb89ccb5f337993b5a95e27c449c39014284156eabc33da012a8759c -R 6c4a68eb6bc21deac80d2f01e5d2cb3c +P 2b543fbc28a03661590fa7e1f9ded65e0758f6bf6e1ee05070b9bcad422ff087 +Q +bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6 +R 1b5e9b98cb41ca99c1cc02c963a1b7fb U drh -Z b87b900cc21c070df7552ebe1f4ad160 +Z ae1430176683a6fef9f313cd0a826f5e # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index b8f955b651..67f87af436 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2b543fbc28a03661590fa7e1f9ded65e0758f6bf6e1ee05070b9bcad422ff087 +242cb4bbee0707f470833d9f47efcfb5631f2302b9d48cffdbba63e64984827c diff --git a/src/whereexpr.c b/src/whereexpr.c index 1a2267bb0e..15c751d387 100644 --- a/src/whereexpr.c +++ b/src/whereexpr.c @@ -213,20 +213,25 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - - /* Count the number of prefix characters prior to the first wildcard. - ** If the underlying database has a UTF16LE encoding, then only consider - ** ASCII characters. Note that the encoding of z[] is UTF8 - we are - ** dealing with only UTF8 here in this code, but the database engine - ** itself might be processing content using a different encoding. */ + /* Count the number of prefix bytes prior to the first wildcard. + ** or U+fffd character. If the underlying database has a UTF16LE + ** encoding, then only consider ASCII characters. Note that the + ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in + ** this code, but the database engine itself might be processing + ** content using a different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; if( c==wc[3] && z[cnt]!=0 ){ cnt++; - }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){ - cnt--; - break; + }else if( c>=0x80 ){ + const u8 *z2 = z+cnt-1; + if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ + cnt--; + break; + }else{ + cnt = (int)(z2-z); + } } } @@ -238,7 +243,7 @@ static int isLikeOrGlob( ** range search. The third is because the caller assumes that the pattern ** consists of at least one character after all escapes have been ** removed. */ - if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){ + if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){ Expr *pPrefix; /* A "complete" match if the pattern ends with "*" or "%" */ diff --git a/test/like.test b/test/like.test index d314e96a19..0d732b569c 100644 --- a/test/like.test +++ b/test/like.test @@ -731,16 +731,16 @@ ifcapable like_opt&&!icu { } do_test like-9.5.1 { set res [sqlite3_exec_hex db { - SELECT x FROM t2 WHERE x LIKE '%fe%25' + SELECT 1 FROM t2 WHERE x LIKE '%fe%25' }] - } {0 {}} + } {0 {1 1}} ifcapable explain { do_test like-9.5.2 { set res [sqlite3_exec_hex db { EXPLAIN QUERY PLAN SELECT x FROM t2 WHERE x LIKE '%fe%25' }] regexp {INDEX i2} $res - } {1} + } {0} } # Do an SQL statement. Append the search count to the end of the result.