From: drh <> Date: Tue, 6 Aug 2024 22:49:01 +0000 (+0000) Subject: Improved robustness of parsing of tokenize= arguments in FTS5. X-Git-Tag: version-3.47.0~257 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e9b919d550262076d1b8453c3d6852b88822b922;p=thirdparty%2Fsqlite.git Improved robustness of parsing of tokenize= arguments in FTS5. [forum:/forumpost/171bcc2bcd|Forum post 171bcc2bcd]. FossilOrigin-Name: d9f726ade6b258f8723f90d0b04a4682e885e30939eb29773913e4dfc8e85503 --- diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 3e9fdff3eb..08de0d60d7 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -79,7 +79,7 @@ static int fts5AsciiCreate( int i; memset(p, 0, sizeof(AsciiTokenizer)); memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); - for(i=0; rc==SQLITE_OK && ibFold = 1; pNew->iFoldParam = 0; - for(i=0; rc==SQLITE_OK && iiFoldParam!=0 && pNew->bFold==0 ){ rc = SQLITE_ERROR; diff --git a/ext/fts5/test/fts5tokenizer2.test b/ext/fts5/test/fts5tokenizer2.test index 0017046baf..4fe31d22c4 100644 --- a/ext/fts5/test/fts5tokenizer2.test +++ b/ext/fts5/test/fts5tokenizer2.test @@ -85,5 +85,25 @@ do_execsql_test 1.7 { SELECT highlight(t1, 0, '>', '<') FROM t1('BB mess'); } {AAdont>BBmess<} +# 2024-08-06 https://sqlite.org/forum/forumpost/171bcc2bcd +# Error handling of tokenize= arguments. +# +foreach {n tkz} { + 1 {ascii none} + 2 {unicode61 none} + 3 {porter none} + 4 {trigram none} + 5 {ascii none 0} + 6 {unicode61 none 0} + 7 {porter none 0} + 8 {trigram none 0} +} { + db eval {DROP TABLE IF EXISTS t2;} + do_catchsql_test 2.$n " + DROP TABLE IF EXISTS t2; + CREATE VIRTUAL TABLE t2 USING fts5(a,b,c,tokenize='$tkz'); + " {1 {error in tokenizer constructor}} +} + finish_test diff --git a/ext/fts5/test/fts5trigram.test b/ext/fts5/test/fts5trigram.test index 351c059bf5..752686620c 100644 --- a/ext/fts5/test/fts5trigram.test +++ b/ext/fts5/test/fts5trigram.test @@ -69,6 +69,9 @@ do_execsql_test 2.0 { INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } +do_catchsql_test 2.0.1 { + CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram case_sensitive'); +} {1 {error in tokenizer constructor}} foreach {tn s res} { 1 abc "(abc)defghijklm" @@ -206,7 +209,7 @@ do_execsql_test 7.0 { (20, "жираф.png"), (30, "cat.png"), (40, "кот.png"), - (50, "misic-🎵-.mp3"); + (50, "misic-🎵-.mp3"); } do_execsql_test 7.1 { SELECT rowid FROM f WHERE +filename GLOB '*ир*'; diff --git a/ext/fts5/test/fts5trigram2.test b/ext/fts5/test/fts5trigram2.test index 00f6d43952..33c07d3f8b 100644 --- a/ext/fts5/test/fts5trigram2.test +++ b/ext/fts5/test/fts5trigram2.test @@ -21,6 +21,9 @@ do_execsql_test 1.0 " INSERT INTO t1 VALUES('abc\u0303defghijklm'); INSERT INTO t1 VALUES('a\u0303b\u0303c\u0303defghijklm'); " +do_catchsql_test 1.0.1 { + CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram remove_diacritics'); +} {1 {error in tokenizer constructor}} do_execsql_test 1.1 { SELECT highlight(t1, 0, '(', ')') FROM t1('abc'); diff --git a/manifest b/manifest index 215ae7ad21..dcbcb3f3ae 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improvements\sto\s./configure\sthat\swill\shopefully\sbreak\sfewer\sbuilds. -D 2024-08-06T20:00:10.407 +C Improved\srobustness\sof\sparsing\sof\stokenize=\sarguments\sin\sFTS5.\n[forum:/forumpost/171bcc2bcd|Forum\spost\s171bcc2bcd]. +D 2024-08-06T22:49:01.135 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -105,7 +105,7 @@ F ext/fts5/fts5_storage.c 1d7e08d4331da2f3f7e78e70eef2ed6a013d91ba16175c651adbc5 F ext/fts5/fts5_tcl.c 5ca3e3e35010d326f5b821a563e4fcde3913e052935f5c2c72c264122a26b48f F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b -F ext/fts5/fts5_tokenize.c fa5493075101540270f572038fc1723d44fcc97bfbf237c8530013b8a27860be +F ext/fts5/fts5_tokenize.c b1c3dc4de28f2532d9ee7be7182a96f943fa09fcc31c6f271d69dce72874ff8c F ext/fts5/fts5_unicode2.c eca63dbc797f8ff0572e97caf4631389c0ab900d6364861b915bdd4735973f00 F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80 F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0 @@ -229,9 +229,9 @@ F ext/fts5/test/fts5synonym2.test 58f357b997cf2fedeeb9d0de4db9f880fa96fa2fe27a74 F ext/fts5/test/fts5tok1.test 1f7817499f5971450d8c4a652114b3d833393c8134e32422d0af27884ffe9cef F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2 F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98 -F ext/fts5/test/fts5tokenizer2.test 9c1ad8ef0465076cbc9ff5c764782594329b3bce3e0f6a931a026902d006f495 -F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0 -F ext/fts5/test/fts5trigram2.test c91f0a94f7e1ff859682228646abeab4c0eba2effc46af2cbc8f0f48b05a0566 +F ext/fts5/test/fts5tokenizer2.test ddb8b10fbe4b84b2a75812671f127774c1d2e3e2bf82d2e0e4f0bb1cd8a2b2d6 +F ext/fts5/test/fts5trigram.test be914555deb8504dde682bd5aa343d00c4da37dfad20709a5bac30d5f97f2ef5 +F ext/fts5/test/fts5trigram2.test 9be01ec340a61869931a23ec109d189e589e2b807d574e1fc1f0cf2c23765c42 F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2 F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602 F ext/fts5/test/fts5unicode.test 41898f7e476e6515cd4b737c02a442cda5a580a74509788aa9072a2074948e0e @@ -2203,8 +2203,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 533a6251f188805363f0e39613ea03b1bfd758eaea00e0855803238585bdfec7 -R a7a477ca379232403aa066474b0e57d0 +P 769e32a69b7b7c04225afa0371f139b2ed29aaee5a7a4159a30d600ed9f25c57 +R f82e2089204b40d7709881ee6cd588a3 U drh -Z 01918af4ac385e19f077c5342eefac64 +Z e2400150d004e016a710bc2ae395399e # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index c7ef415a77..0cc2838448 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -769e32a69b7b7c04225afa0371f139b2ed29aaee5a7a4159a30d600ed9f25c57 +d9f726ade6b258f8723f90d0b04a4682e885e30939eb29773913e4dfc8e85503