]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Improved robustness of parsing of tokenize= arguments in FTS5.
authordrh <>
Tue, 6 Aug 2024 22:54:56 +0000 (22:54 +0000)
committerdrh <>
Tue, 6 Aug 2024 22:54:56 +0000 (22:54 +0000)
FossilOrigin-Name: 7a65ac42c2723b785786cf15f4b267ebfbd4f848f9fc6b37dcf9fac9abd0398c

ext/fts5/fts5_tokenize.c
ext/fts5/test/fts5tokenizer2.test
ext/fts5/test/fts5trigram.test
ext/fts5/test/fts5trigram2.test
manifest
manifest.uuid

index f12056170fa296f769a5a8aa3d9cadb631476b52..e5752efdd417f470717f3024ca7a84f5da5925a5 100644 (file)
@@ -79,7 +79,7 @@ static int fts5AsciiCreate(
       int i;
       memset(p, 0, sizeof(AsciiTokenizer));
       memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
-      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+      for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
         const char *zArg = azArg[i+1];
         if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
           fts5AsciiAddExceptions(p, zArg, 1);
@@ -90,6 +90,7 @@ static int fts5AsciiCreate(
           rc = SQLITE_ERROR;
         }
       }
+      if( i<nArg ) rc = SQLITE_ERROR;
       if( rc!=SQLITE_OK ){
         fts5AsciiDelete((Fts5Tokenizer*)p);
         p = 0;
@@ -381,17 +382,16 @@ static int fts5UnicodeCreate(
       }
 
       /* Search for a "categories" argument */
-      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+      for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
         if( 0==sqlite3_stricmp(azArg[i], "categories") ){
           zCat = azArg[i+1];
         }
       }
-
       if( rc==SQLITE_OK ){
         rc = unicodeSetCategories(p, zCat);
       }
 
-      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+      for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
         const char *zArg = azArg[i+1];
         if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
           if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
@@ -416,6 +416,7 @@ static int fts5UnicodeCreate(
           rc = SQLITE_ERROR;
         }
       }
+      if( i<nArg ) rc = SQLITE_ERROR;
 
     }else{
       rc = SQLITE_NOMEM;
@@ -1298,7 +1299,7 @@ static int fts5TriCreate(
     int i;
     pNew->bFold = 1;
     pNew->iFoldParam = 0;
-    for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+    for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
       const char *zArg = azArg[i+1];
       if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
         if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
@@ -1316,6 +1317,7 @@ static int fts5TriCreate(
         rc = SQLITE_ERROR;
       }
     }
+    if( i<nArg ) rc = SQLITE_ERROR;
 
     if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
       rc = SQLITE_ERROR;
index bdabd531271ef792476b3720baf0c1bb8524c616..52b30326aee6e060f7cdb09a2574d0414d49f401 100644 (file)
@@ -85,5 +85,25 @@ do_execsql_test 1.7 {
   SELECT highlight(t1, 0, '>', '<') FROM t1('BB mess');
 } {AAdont>BBmess<}
 
+# 2024-08-06 https://sqlite.org/forum/forumpost/171bcc2bcd
+# Error handling of tokenize= arguments.
+#
+foreach {n tkz} {
+  1  {ascii none}
+  2  {unicode61 none}
+  3  {porter none}
+  4  {trigram none}
+  5  {ascii none 0}
+  6  {unicode61 none 0}
+  7  {porter none 0}
+  8  {trigram none 0}
+} {
+  db eval {DROP TABLE IF EXISTS t2;}
+  do_catchsql_test 2.$n "
+     DROP TABLE IF EXISTS t2;
+     CREATE VIRTUAL TABLE t2 USING fts5(a,b,c,tokenize='$tkz');
+  " {1 {error in tokenizer constructor}}
+}
+
 
 finish_test
index 351c059bf51677c3c8b04dd299be6a9fb9461d31..752686620c3ad1cf18bd538b697e61e3edc0009b 100644 (file)
@@ -69,6 +69,9 @@ do_execsql_test 2.0 {
   INSERT INTO t1 VALUES('abcdefghijklm');
   INSERT INTO t1 VALUES('กรุงเทพมหานคร');
 }
+do_catchsql_test 2.0.1 {
+  CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram case_sensitive');
+} {1 {error in tokenizer constructor}}
 
 foreach {tn s res} {
   1 abc           "(abc)defghijklm"
@@ -206,7 +209,7 @@ do_execsql_test 7.0 {
       (20, "жираф.png"), 
       (30, "cat.png"), 
       (40, "кот.png"), 
-      (50, "misic-ð\9f\8eµ-.mp3");
+      (50, "misic-ðÂ\9fÂ\8eµ-.mp3");
 }
 do_execsql_test 7.1 {
   SELECT rowid FROM f WHERE +filename GLOB '*ир*';
index f5beae5b283a7fcb13aaff10819891b8211473f6..395d8994b2afa2c87148ad270320e1d77eda6bff 100644 (file)
@@ -21,6 +21,9 @@ do_execsql_test 1.0 "
   INSERT INTO t1 VALUES('abc\u0303defghijklm');
   INSERT INTO t1 VALUES('a\u0303b\u0303c\u0303defghijklm');
 "
+do_catchsql_test 1.0.1 {
+  CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram remove_diacritics');
+} {1 {error in tokenizer constructor}}
 
 do_execsql_test 1.1 {
   SELECT highlight(t1, 0, '(', ')') FROM t1('abc');
index f528de0301c763ab6ef78fd19a0546fe5846e148..f39f37f271b192bc078466246bc4a029baf67a43 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Slight\sdoc\stouchup\sfor\s[af41a1e6fc8b36e9bf65]/[c7519d98ff09e]\sbased\son\sfeedback.\sNo\scode\schanges.
-D 2024-07-15T10:13:02.516
+C Improved\srobustness\sof\sparsing\sof\stokenize=\sarguments\sin\sFTS5.
+D 2024-08-06T22:54:56.766
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -105,7 +105,7 @@ F ext/fts5/fts5_storage.c f9e31b0d155e9b2c92d5d3a09ad7a56b937fbf1c7f962e10f4ca62
 F ext/fts5/fts5_tcl.c fdf7e2bb9a9186cfcaf2d2ce11d338309342b7a7593c2812bc54455db53da5d2
 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
 F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
-F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13
+F ext/fts5/fts5_tokenize.c b94826fc235e1937c4e93c266e0db4052dcab648ff87f4080376fc04b1fc93a4
 F ext/fts5/fts5_unicode2.c eca63dbc797f8ff0572e97caf4631389c0ab900d6364861b915bdd4735973f00
 F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80
 F ext/fts5/fts5_vocab.c 209e0c151e108d5f3621fa24b91e9b02f3750ee6c3f9ccec312df39481b68a09
@@ -228,9 +228,9 @@ F ext/fts5/test/fts5synonym2.test e2f6ff68c4fbe12a866a3a87510f553d9dac99bcb74c10
 F ext/fts5/test/fts5tok1.test 1f7817499f5971450d8c4a652114b3d833393c8134e32422d0af27884ffe9cef
 F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
 F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43
-F ext/fts5/test/fts5tokenizer2.test cb5428c7cfb3b6a74b7adfcde65506e329112003e8dffa7501d01c2d18d02569
-F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0
-F ext/fts5/test/fts5trigram2.test 9fe4207f8a4241747aff1005258b564958588d21bfd240d6cd4c2e955d31c156
+F ext/fts5/test/fts5tokenizer2.test b9d734c1b10bc317a377ffea3ecb5c2937313113a02e364f167d0c7f8c81c282
+F ext/fts5/test/fts5trigram.test be914555deb8504dde682bd5aa343d00c4da37dfad20709a5bac30d5f97f2ef5
+F ext/fts5/test/fts5trigram2.test 4043f8836bbbb0ce37b86dd1e741431c6c595c7e4ba4fc8e26f21dc3b540e228
 F ext/fts5/test/fts5ubsan.test 783d5a8d13ebfa169e634940228db54540780e3ba7a87ad1e4510e61440bf64b
 F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602
 F ext/fts5/test/fts5unicode.test 17056f4efe6b0a5d4f41fdf7a7dc9af2873004562eaa899d40633b93dc95f5a9
@@ -2191,9 +2191,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P c7519d98ff09ed96c3c6f2cccf16f8efa19133e12cbb077fc86526f0e81f0470
-Q +be7b1fc0c5d8cda971b471dfcb4637212bfff4f42b1e074077a381cc493d877a
-R 25d6454fe046cdba79f56994072c8204
-U stephan
-Z a2c01e65c215b93339c9d803c0adca59
+P a61997c315ce70d60f3722a2b3b3d06ba592ce1cafed1639a9d5f162f712ae03
+Q +d9f726ade6b258f8723f90d0b04a4682e885e30939eb29773913e4dfc8e85503
+R 5f343f5765d17f03385da2454716eb3e
+U drh
+Z 71e3aedaf58c5df6d18eb9c2aaa0e383
 # Remove this line to create a well-formed Fossil manifest.
index 2a99b57e42dfa71c1098b1304172688ff47c96cf..52835240421d425186c5290df2778e92edfdb708 100644 (file)
@@ -1 +1 @@
-a61997c315ce70d60f3722a2b3b3d06ba592ce1cafed1639a9d5f162f712ae03
+7a65ac42c2723b785786cf15f4b267ebfbd4f848f9fc6b37dcf9fac9abd0398c