From: dan Date: Tue, 19 May 2015 19:37:09 +0000 (+0000) Subject: Add tests for fts5 tokenizers. X-Git-Tag: version-3.8.11~114^2~34 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=116eaee4a0a9ed7db81e7acc9a7d30c7b9f7f3ef;p=thirdparty%2Fsqlite.git Add tests for fts5 tokenizers. FossilOrigin-Name: 4f90ba20e2be6ec5755fe894938ac97342d6fbf6 --- diff --git a/ext/fts5/test/fts5fault6.test b/ext/fts5/test/fts5fault6.test index 73e488ba63..b9657be1cc 100644 --- a/ext/fts5/test/fts5fault6.test +++ b/ext/fts5/test/fts5fault6.test @@ -100,5 +100,53 @@ do_faultsim_test 2.3 -faults oom-t* -prep { faultsim_test_result {0 {}} } +#------------------------------------------------------------------------- +# OOM in the ASCII tokenizer with very large tokens. +# +# Also the unicode tokenizer. +# +set t1 [string repeat wxyz 20] +set t2 [string repeat wxyz 200] +set t3 [string repeat wxyz 2000] +set doc "$t1 $t2 $t3" +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content=""); + CREATE VIRTUAL TABLE xyz2 USING fts5(c, content=""); +} +faultsim_save_and_close + +do_faultsim_test 3.1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM xyz } +} -body { + db eval { INSERT INTO xyz VALUES($::doc) } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 3.2 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM xyz2 } +} -body { + db eval { INSERT INTO xyz2 VALUES($::doc) } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +# OOM while initializing a unicode61 tokenizer. +# +reset_db +faultsim_save_and_close +do_faultsim_test 4.1 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { + CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc"); + } +} -test { + faultsim_test_result {0 {}} +} + finish_test diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index a365854295..83ad169188 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -134,5 +134,81 @@ do_catchsql_test 5.3 { CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg'); } {1 {error in tokenizer constructor}} +#------------------------------------------------------------------------- +# Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE +# correctly. +# + +proc test_token_cb {varname token iStart iEnd} { + upvar $varname var + lappend var $token + if {[llength $var]==3} { return "SQLITE_DONE" } + return "SQLITE_OK" +} + +proc tokenize {cmd} { + set res [list] + $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res] + set res +} +sqlite3_fts5_create_function db tokenize tokenize + +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii); + INSERT INTO x1 VALUES('q w e r t y'); + INSERT INTO x1 VALUES('y t r e w q'); + SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r'; +} { + {q w e} {y t r} +} + +do_execsql_test 6.1 { + CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61); + INSERT INTO x2 VALUES('q w e r t y'); + INSERT INTO x2 VALUES('y t r e w q'); + SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r'; +} { + {q w e} {y t r} +} + + +#------------------------------------------------------------------------- +# Miscellaneous tests for the unicode tokenizer. +# +do_catchsql_test 6.1 { + CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars'); +} {1 {error in tokenizer constructor}} +do_catchsql_test 6.2 { + CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b'); +} {1 {error in tokenizer constructor}} +do_catchsql_test 6.3 { + CREATE VIRTUAL TABLE a3 USING fts5( + x, y, tokenize = 'unicode61 remove_diacritics 2' + ); +} {1 {error in tokenizer constructor}} +do_catchsql_test 6.4 { + CREATE VIRTUAL TABLE a3 USING fts5( + x, y, tokenize = 'unicode61 remove_diacritics 10' + ); +} {1 {error in tokenizer constructor}} + +#------------------------------------------------------------------------- +# Porter tokenizer with very large tokens. +# +set a [string repeat a 100] +set b [string repeat b 500] +set c [string repeat c 1000] +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter); + INSERT INTO e5 VALUES($a || ' ' || $b); + INSERT INTO e5 VALUES($b || ' ' || $c); + INSERT INTO e5 VALUES($c || ' ' || $a); +} + +do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } +do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } +do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } + + finish_test diff --git a/manifest b/manifest index 50a6c9ec79..90de227169 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sMakefile.in\sso\sthat\sthe\samalgamation\sfiles\sbuilt\sby\s"make\ssqlite3.c"\sinclude\sfts5. -D 2015-05-19T11:38:32.860 +C Add\stests\sfor\sfts5\stokenizers. +D 2015-05-19T19:37:09.304 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -150,7 +150,7 @@ F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 7c7c46559368f06b98daa940d376c4740bcdad25 F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 -F ext/fts5/test/fts5fault6.test 7cdfdceef362cb9d72f66388846d62bff44c6d01 +F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 F ext/fts5/test/fts5integrity.test b45f633381a85dc000e41d68c96ab510985ca35e @@ -164,7 +164,7 @@ F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 -F ext/fts5/test/fts5tokenizer.test 45cc65223a686b303b409b19154ac310ba70678d +F ext/fts5/test/fts5tokenizer.test f54bbbff67ff03ce49c153c0f6a5e3f8369f986a F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 @@ -1328,7 +1328,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P de9f8ef6ebf036df5a558cd78fb4927da2d83ce8 -R 48462bff9aab6209ce6b76328087ee6c +P 2870a80593302e7835c5f5d167f42710d8439e7d +R 63f128b09262f76dbe78be4c38aa78c8 U dan -Z 49662f04b4638e7ccdb8bbbc0c7d2948 +Z e801c590b1575eb988d36c609d9907aa diff --git a/manifest.uuid b/manifest.uuid index 12095dd529..1348da3add 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2870a80593302e7835c5f5d167f42710d8439e7d \ No newline at end of file +4f90ba20e2be6ec5755fe894938ac97342d6fbf6 \ No newline at end of file