From: drh <> Date: Fri, 31 Jan 2025 14:58:00 +0000 (+0000) Subject: Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where... X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a082be3e8f10565566c4c238ffeda83015a3affc;p=thirdparty%2Fsqlite.git Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF. FossilOrigin-Name: 9d59f8a5f2b60be4251f6c1a2cfddb835d82b826ec0a7d60b5d9e9af3508ce5e --- diff --git a/manifest b/manifest index 3d9c6e7eca..61aaa8d403 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C If\sany\serrors\soccur\swhile\sprocessing\ssqlite_dbpage\schanges,\scancel\npending\struncate\soperations. -D 2025-01-31T14:54:20.344 +C Fix\sa\sproblem\swith\sLIKE\sand\sGLOB\sprocessing\sin\sutf-16be\sdatabases\sin\scases\swhere\sthe\sutf-8\sencoding\sof\sa\scharacter\sends\swith\sthe\sbyte\s0xBF. +D 2025-01-31T14:58:00.452 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d @@ -865,7 +865,7 @@ F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 F src/where.c 604f17baed46f4997ffe79f25c07c4b51a4165a5938cc27fe165c7e1ca485d11 F src/whereInt.h 1e36ec50392f7cc3d93d1152d4338064cd522b87156a0739388b7e273735f0ca F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab -F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f +F src/whereexpr.c fd80811331cebf37a96a1bbbe93a20425eb04a925baa7c2306d72e0797a47a4c F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/affinity2.test ce1aafc86e110685b324e9a763eab4f2a73f737842ec3b687bd965867de90627 @@ -1399,7 +1399,7 @@ F test/laststmtchanges.test ae613f53819206b3222771828d024154d51db200 F test/lemon-test01.y 70110eff607ab137ccc851edb2bc7e14a6d4f246b5d2d25f82a60b69d87a9ff2 F test/like.test b3ea2ba3558199aa8f25a42ddeb54772e234fab50868c9f066047acdbda8fc58 F test/like2.test d3be15fefee3e02fc88942a9b98f26c5339bbdef7783c90023c092c4955fe3d3 -F test/like3.test a76e5938fadbe6d32807284c796bafd869974a961057bc5fc5a28e06de98745c +F test/like3.test a9d78297426279c3db1206490444f3193f579890b3c4118a52960afe2b4863cd F test/limit.test 350f5d03c29e7dff9a2cde016f84f8d368d40bcd02fa2b2a52fa10c4bf3cbfaf F test/limit2.test 9409b033284642a859fafc95f29a5a6a557bd57c1f0d7c3f554bd64ed69df77e F test/literal.test a65dca9fef86e51b8e45544268e37abbd4bb94ba35fd65f6fdcab2f288cd8f79 @@ -2205,9 +2205,9 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 66bf67842bd458c7c2d563b5addaae4fd328543f4ddbbeee63c5e59faf429903 -Q +6138e0dc4e0e4e19cb048fe0cd969156151fdc427eee21188d2a822642aad043 -R 61f42abda88321326440732c6bc3cf4f +P 3f8d1902519009b4f37fc716ddaa9695a5bbb2a4ad5bba55a7efdfd1e99f3eab +Q +4b4f33d791fe4318c4597bee7d2f9e486ed223e731982af470f5cc0dbdc600fc +R b159b7f206c5ad7b10c423f9ad2662d7 U drh -Z 8137921b9529da45f1eb973819d03ef5 +Z b818b8e3f2f32f070323e2cf3a855914 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 7c0f98b753..6f5b4a1436 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3f8d1902519009b4f37fc716ddaa9695a5bbb2a4ad5bba55a7efdfd1e99f3eab +9d59f8a5f2b60be4251f6c1a2cfddb835d82b826ec0a7d60b5d9e9af3508ce5e diff --git a/src/whereexpr.c b/src/whereexpr.c index 2b6eb6a78d..0a963f4f92 100644 --- a/src/whereexpr.c +++ b/src/whereexpr.c @@ -219,12 +219,12 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - /* Count the number of prefix bytes prior to the first wildcard. - ** or U+fffd character. If the underlying database has a UTF16LE - ** encoding, then only consider ASCII characters. Note that the - ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in - ** this code, but the database engine itself might be processing - ** content using a different encoding. */ + /* Count the number of prefix bytes prior to the first wildcard, + ** U+fffd character, or malformed utf-8. If the underlying database + ** has a UTF16LE encoding, then only consider ASCII characters. Note that + ** the encoding of z[] is UTF8 - we are dealing with only UTF8 here in this + ** code, but the database engine itself might be processing content using a + ** different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; @@ -232,7 +232,9 @@ static int isLikeOrGlob( cnt++; }else if( c>=0x80 ){ const u8 *z2 = z+cnt-1; - if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ + if( sqlite3Utf8Read(&z2)==0xfffd || c==0xFF /* bad utf-8 */ + || ENC(db)==SQLITE_UTF16LE + ){ cnt--; break; }else{ @@ -1384,9 +1386,8 @@ static void exprAnalyze( } if( !db->mallocFailed ){ - u8 c, *pC; /* Last character before the first wildcard */ + u8 *pC; /* Last character before the first wildcard */ pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; - c = *pC; if( noCase ){ /* The point is to increment the last character before the first ** wildcard. But if we increment '@', that will push it into the @@ -1394,10 +1395,17 @@ static void exprAnalyze( ** inequality. To avoid this, make sure to also run the full ** LIKE on all candidate expressions by clearing the isComplete flag */ - if( c=='A'-1 ) isComplete = 0; - c = sqlite3UpperToLower[c]; + if( *pC=='A'-1 ) isComplete = 0; + *pC = sqlite3UpperToLower[*pC]; + } + + /* Increment the value of the last utf8 character in the prefix. */ + while( *pC==0xBF && pC>(u8*)pStr2->u.zToken ){ + *pC = 0x80; + pC--; } - *pC = c + 1; + assert( *pC!=0xFF ); /* isLikeOrGlob() guarantees this */ + (*pC)++; } zCollSeqName = noCase ? "NOCASE" : sqlite3StrBINARY; pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); diff --git a/test/like3.test b/test/like3.test index a93e113d62..2987e0e0e2 100644 --- a/test/like3.test +++ b/test/like3.test @@ -275,4 +275,84 @@ do_eqp_test like3-6.240 { } } +#------------------------------------------------------------------------- + +ifcapable utf16 { + reset_db + do_execsql_test like3-7.0 { + PRAGMA encoding = 'UTF-16be'; + + CREATE TABLE Example(word TEXT NOT NULL); + CREATE INDEX Example_word on Example(word); + + INSERT INTO Example VALUES(char(0x307F)); + } + + do_execsql_test like3-7.1 { + SELECT char(0x307F)=='み'; + } {1} + + do_execsql_test like3-7.1 { + SELECT * FROM Example WHERE word GLOB 'み*' + } {み} + + do_execsql_test like3-7.2 { + SELECT * FROM Example WHERE word >= char(0x307F) AND word < char(0x3080); + } {み} +} + +#------------------------------------------------------------------------- +reset_db + +foreach enc { + UTF-8 + UTF-16le + UTF-16be +} { + foreach {tn expr} { + 1 "CAST (X'FF' AS TEXT)" + 2 "CAST (X'FFBF' AS TEXT)" + 3 "CAST (X'FFBFBF' AS TEXT)" + 4 "CAST (X'FFBFBFBF' AS TEXT)" + + 5 "'abc' || CAST (X'FF' AS TEXT)" + 6 "'def' || CAST (X'FFBF' AS TEXT)" + 7 "'ghi' || CAST (X'FFBFBF' AS TEXT)" + 8 "'jkl' || CAST (X'FFBFBFBF' AS TEXT)" + } { + reset_db + execsql "PRAGMA encoding = '$enc'" + do_execsql_test like3-8.$tn.0 { + CREATE TABLE t1(x); + } + + do_execsql_test like3-8.$tn.1 { + PRAGMA encoding + } $enc + + do_execsql_test like3-8.$tn.1 " + INSERT INTO t1 VALUES( $expr ) + " + + do_execsql_test like3-8.$tn.2 { + SELECT typeof(x) FROM t1 + } {text} + + set x [db one {SELECT x || '%' FROM t1}] + + do_execsql_test like3-8.$tn.3 { + SELECT rowid FROM t1 WHERE x LIKE $x + } 1 + + do_execsql_test like3-8.$tn.4 { + CREATE INDEX i1 ON t1(x); + } + + do_execsql_test like3-8.$tn.5 { + SELECT rowid FROM t1 WHERE x LIKE $x + } 1 + } +} + finish_test +