]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Fix handling of U+fffd in the LIKE optimization.
authordrh <>
Mon, 7 Oct 2024 12:24:51 +0000 (12:24 +0000)
committerdrh <>
Mon, 7 Oct 2024 12:24:51 +0000 (12:24 +0000)
FossilOrigin-Name: 242cb4bbee0707f470833d9f47efcfb5631f2302b9d48cffdbba63e64984827c

manifest
manifest.uuid
src/whereexpr.c
test/like.test

index e34ec1c1460470577b846cd826142a6328cf63f7..875a8d8591b1b3f09291f1926288a0f2c29c2c57 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Ensure\sthat\sthe\sWhereInfo.revMask\sbitmap\sis\sadjusted\swhen\stables\sare\sremoved\nfrom\sthe\sFROM\sclause\sby\sthe\sOmit-Noop-Join\soptimization.
-D 2024-09-05T23:44:49.612
+C Fix\shandling\sof\sU+fffd\sin\sthe\sLIKE\soptimization.
+D 2024-10-07T12:24:51.913
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -842,7 +842,7 @@ F src/walker.c 7c7ea0115345851c3da4e04e2e239a29983b61fb5b038b94eede6aba462640e2
 F src/where.c 74ef82f58056c71dc0d6dc6417b9861814b9a92fe87ff32ec2fde6d01f903996
 F src/whereInt.h 82a13766f13d1a53b05387c2e60726289ef26404bc7b9b1f7770204d97357fb8
 F src/wherecode.c f5255f49d1f42b6e7e6b0362ff3522fa88cbcaa7213e52f9374744027ecdebca
-F src/whereexpr.c 1d350f8ddb7d8740423341d0289d899bf3b287faad8d19c815b6715d396bc919
+F src/whereexpr.c 29307f9f528e2f8785b4cae93cffe56eb763842ec17ae37fdd05f6964d891ed4
 F src/window.c 5d95122dd330bfaebd732358c8ef067c5a9394a53ac249470d611d0ce2c52be2
 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
 F test/affinity2.test ce1aafc86e110685b324e9a763eab4f2a73f737842ec3b687bd965867de90627
@@ -1374,7 +1374,7 @@ F test/kvtest.c 6e0228409ea7ca0497dad503fbd109badb5e59545d131014b6aaac68b56f484a
 F test/lastinsert.test 42e948fd6442f07d60acbd15d33fb86473e0ef63
 F test/laststmtchanges.test ae613f53819206b3222771828d024154d51db200
 F test/lemon-test01.y 70110eff607ab137ccc851edb2bc7e14a6d4f246b5d2d25f82a60b69d87a9ff2
-F test/like.test 242ee7f5d08a031144c0daf63bbd7e7710c847ccf387a83347e0b61b3aa69526
+F test/like.test b3ea2ba3558199aa8f25a42ddeb54772e234fab50868c9f066047acdbda8fc58
 F test/like2.test d3be15fefee3e02fc88942a9b98f26c5339bbdef7783c90023c092c4955fe3d3
 F test/like3.test a76e5938fadbe6d32807284c796bafd869974a961057bc5fc5a28e06de98745c
 F test/limit.test 350f5d03c29e7dff9a2cde016f84f8d368d40bcd02fa2b2a52fa10c4bf3cbfaf
@@ -2192,9 +2192,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P c49c2a8c0082622890c2de006afad4f03f2c4ab931dde846e303d0374590b522
-Q +22ca5a2ffb89ccb5f337993b5a95e27c449c39014284156eabc33da012a8759c
-R 6c4a68eb6bc21deac80d2f01e5d2cb3c
+P 2b543fbc28a03661590fa7e1f9ded65e0758f6bf6e1ee05070b9bcad422ff087
+Q +bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6
+R 1b5e9b98cb41ca99c1cc02c963a1b7fb
 U drh
-Z b87b900cc21c070df7552ebe1f4ad160
+Z ae1430176683a6fef9f313cd0a826f5e
 # Remove this line to create a well-formed Fossil manifest.
index b8f955b651146e4246a80381a1abdbcf00ae8ceb..67f87af43612f0599b7ff33d1e1006c595673673 100644 (file)
@@ -1 +1 @@
-2b543fbc28a03661590fa7e1f9ded65e0758f6bf6e1ee05070b9bcad422ff087
+242cb4bbee0707f470833d9f47efcfb5631f2302b9d48cffdbba63e64984827c
index 1a2267bb0e382667dfa86c758c41487435852cfa..15c751d387e7f203196c898641d7923b947ddc70 100644 (file)
@@ -213,20 +213,25 @@ static int isLikeOrGlob(
      z = (u8*)pRight->u.zToken;
   }
   if( z ){
-
-    /* Count the number of prefix characters prior to the first wildcard.
-    ** If the underlying database has a UTF16LE encoding, then only consider
-    ** ASCII characters.  Note that the encoding of z[] is UTF8 - we are
-    ** dealing with only UTF8 here in this code, but the database engine
-    ** itself might be processing content using a different encoding. */
+    /* Count the number of prefix bytes prior to the first wildcard.
+    ** or U+fffd character.  If the underlying database has a UTF16LE
+    ** encoding, then only consider ASCII characters.  Note that the
+    ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in
+    ** this code, but the database engine itself might be processing
+    ** content using a different encoding. */
     cnt = 0;
     while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
       cnt++;
       if( c==wc[3] && z[cnt]!=0 ){
         cnt++;
-      }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){
-         cnt--;
-         break;
+      }else if( c>=0x80 ){
+        const u8 *z2 = z+cnt-1;
+        if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){
+          cnt--;
+          break;
+        }else{
+          cnt = (int)(z2-z);
+        }
       }
     }
 
@@ -238,7 +243,7 @@ static int isLikeOrGlob(
     ** range search. The third is because the caller assumes that the pattern
     ** consists of at least one character after all escapes have been
     ** removed.  */
-    if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){
+    if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){
       Expr *pPrefix;
 
       /* A "complete" match if the pattern ends with "*" or "%" */
index d314e96a19dfe43db17565db1daeee96ba92a14d..0d732b569c43253753c09c411584a28f28c8b9d3 100644 (file)
@@ -731,16 +731,16 @@ ifcapable like_opt&&!icu {
   }
   do_test like-9.5.1 {
     set res [sqlite3_exec_hex db {
-       SELECT x FROM t2 WHERE x LIKE '%fe%25'
+       SELECT 1 FROM t2 WHERE x LIKE '%fe%25'
     }]
-  } {0 {}}
+  } {0 {1 1}}
   ifcapable explain {
     do_test like-9.5.2 {
       set res [sqlite3_exec_hex db {
          EXPLAIN QUERY PLAN SELECT x FROM t2 WHERE x LIKE '%fe%25'
       }]
       regexp {INDEX i2} $res
-    } {1}
+    } {0}
   }
 
   # Do an SQL statement.  Append the search count to the end of the result.