]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Fix handling of U+fffd in the LIKE optimization.
authordrh <>
Mon, 7 Oct 2024 12:19:23 +0000 (12:19 +0000)
committerdrh <>
Mon, 7 Oct 2024 12:19:23 +0000 (12:19 +0000)
dbsqlfuzz eee57fb9eea1dfa5aa40dfa87865cf8c84d12f96.

FossilOrigin-Name: bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6

manifest
manifest.uuid
src/whereexpr.c
test/like.test

index 921101d073d5b10781a678c9354c8b90e8842c1b..7bc5b300959d0bd0bb66506957bc39f786c13173 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Fix\san\sassert()\sfailure\sin\s"PRAGMA\sintegrity_check"\sthat\scould\soccur\swhen\schecking\sa\scorrupt\sdatabase.
-D 2024-10-07T11:47:05.013
+C Fix\shandling\sof\sU+fffd\sin\sthe\sLIKE\soptimization.\ndbsqlfuzz\seee57fb9eea1dfa5aa40dfa87865cf8c84d12f96.
+D 2024-10-07T12:19:23.717
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -857,7 +857,7 @@ F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014
 F src/where.c 461d41017d900d4248a268df96d2d30506c4dcc2257f4167c4f46072003ce2cf
 F src/whereInt.h a5d079c346a658b7a6e9e47bb943d021e02fa1e6aed3b964ca112112a4892192
 F src/wherecode.c 5172d647798134e7c92536ddffe7e530c393d79b5dedd648b88faf2646c65baf
-F src/whereexpr.c 44f41ae554c7572e1de1485b3169b233ee04d464b2ee5881687ede3bf07cacfa
+F src/whereexpr.c 562ce89d7f1c24a54c5124576e04928600061c87d83a30e63dcbaadf20eb0653
 F src/window.c 499d48f315a09242dc68f2fac635ed27dcf6bbb0d9ab9084857898c64489e975
 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
 F test/affinity2.test ce1aafc86e110685b324e9a763eab4f2a73f737842ec3b687bd965867de90627
@@ -1390,7 +1390,7 @@ F test/kvtest.c 6e0228409ea7ca0497dad503fbd109badb5e59545d131014b6aaac68b56f484a
 F test/lastinsert.test 42e948fd6442f07d60acbd15d33fb86473e0ef63
 F test/laststmtchanges.test ae613f53819206b3222771828d024154d51db200
 F test/lemon-test01.y 70110eff607ab137ccc851edb2bc7e14a6d4f246b5d2d25f82a60b69d87a9ff2
-F test/like.test 242ee7f5d08a031144c0daf63bbd7e7710c847ccf387a83347e0b61b3aa69526
+F test/like.test b3ea2ba3558199aa8f25a42ddeb54772e234fab50868c9f066047acdbda8fc58
 F test/like2.test d3be15fefee3e02fc88942a9b98f26c5339bbdef7783c90023c092c4955fe3d3
 F test/like3.test a76e5938fadbe6d32807284c796bafd869974a961057bc5fc5a28e06de98745c
 F test/limit.test 350f5d03c29e7dff9a2cde016f84f8d368d40bcd02fa2b2a52fa10c4bf3cbfaf
@@ -2215,8 +2215,9 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P cc6f3de0320aceb0e9d81413fa4c021ad2b4ee1c72ecef13438d80c4d3701135
-R 886d14b6c55746d1f77a7d0f4215362a
-U dan
-Z 224c1064901e8a73a6947d128b8e36d7
+P d218993be5886f07193d5c2a66ccd0ecdd7bb87687947b89945c90e31cea5451
+Q +13addee687c3fef02c6ef1af9f446822fe0945815648fb2198933c7c644798b2
+R 85a4f3886dc5b88ca917188b27f8b4cb
+U drh
+Z d22e20bac83a8cacb503bd8770c0d69a
 # Remove this line to create a well-formed Fossil manifest.
index aa365f77bd70e87bb57d90baccccb250eed35bc1..31cbb26ab21f99210a37514a73fdae0b035b75ef 100644 (file)
@@ -1 +1 @@
-d218993be5886f07193d5c2a66ccd0ecdd7bb87687947b89945c90e31cea5451
+bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6
index 7ea2956a755dbda71b5c775b142645e7a55384a9..24d203046f04f3b119de8f2393b3504101dd71ef 100644 (file)
@@ -219,20 +219,25 @@ static int isLikeOrGlob(
      z = (u8*)pRight->u.zToken;
   }
   if( z ){
-
-    /* Count the number of prefix characters prior to the first wildcard.
-    ** If the underlying database has a UTF16LE encoding, then only consider
-    ** ASCII characters.  Note that the encoding of z[] is UTF8 - we are
-    ** dealing with only UTF8 here in this code, but the database engine
-    ** itself might be processing content using a different encoding. */
+    /* Count the number of prefix bytes prior to the first wildcard.
+    ** or U+fffd character.  If the underlying database has a UTF16LE
+    ** encoding, then only consider ASCII characters.  Note that the
+    ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in
+    ** this code, but the database engine itself might be processing
+    ** content using a different encoding. */
     cnt = 0;
     while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
       cnt++;
       if( c==wc[3] && z[cnt]!=0 ){
         cnt++;
-      }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){
-         cnt--;
-         break;
+      }else if( c>=0x80 ){
+        const u8 *z2 = z+cnt-1;
+        if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){
+          cnt--;
+          break;
+        }else{
+          cnt = (int)(z2-z);
+        }
       }
     }
 
@@ -244,7 +249,7 @@ static int isLikeOrGlob(
     ** range search. The third is because the caller assumes that the pattern
     ** consists of at least one character after all escapes have been
     ** removed.  */
-    if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){
+    if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){
       Expr *pPrefix;
 
       /* A "complete" match if the pattern ends with "*" or "%" */
index d314e96a19dfe43db17565db1daeee96ba92a14d..0d732b569c43253753c09c411584a28f28c8b9d3 100644 (file)
@@ -731,16 +731,16 @@ ifcapable like_opt&&!icu {
   }
   do_test like-9.5.1 {
     set res [sqlite3_exec_hex db {
-       SELECT x FROM t2 WHERE x LIKE '%fe%25'
+       SELECT 1 FROM t2 WHERE x LIKE '%fe%25'
     }]
-  } {0 {}}
+  } {0 {1 1}}
   ifcapable explain {
     do_test like-9.5.2 {
       set res [sqlite3_exec_hex db {
          EXPLAIN QUERY PLAN SELECT x FROM t2 WHERE x LIKE '%fe%25'
       }]
       regexp {INDEX i2} $res
-    } {1}
+    } {0}
   }
 
   # Do an SQL statement.  Append the search count to the end of the result.