/* porter rule condition: (m > 1 and (*S or *T)) */
static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
- return nStem>0
- && (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
+ assert( nStem>0 );
+ return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
&& fts5Porter_MGt1(zStem, nStem);
}
fts5PorterStep4(aBuf, &nBuf);
/* Step 5a. */
- if( nBuf>0 && aBuf[nBuf-1]=='e' ){
+ assert( nBuf>0 );
+ if( aBuf[nBuf-1]=='e' ){
if( fts5Porter_MGt1(aBuf, nBuf-1)
|| (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
){
--- /dev/null
+# 2014 Dec 20
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the fts5 porter stemmer implementation.
+#
+# These are extra tests added to those in fts5porter.test in order to
+# improve test coverage of the porter stemmer implementation.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5porter2
+
+set test_vocab {
+ tion tion
+ ation ation
+ vation vation
+ avation avat
+ vion vion
+ ion ion
+ relational relat
+ relation relat
+ relate relat
+ zzz zzz
+ ii ii
+ iiing ii
+ xtional xtional
+ xenci xenci
+ xlogi xlogi
+ realization realiz
+ realize realiz
+ xization xizat
+ capitalism capit
+ talism talism
+ xiveness xive
+ xfulness xful
+ xousness xous
+ xical xical
+ xicate xicat
+ xicity xiciti
+ ies ie
+ eed e
+ eing e
+ s s
+}
+
+set i 0
+foreach {in out} $test_vocab {
+ do_test "1.$i.($in -> $out)" {
+ lindex [sqlite3_fts5_tokenize db porter $in] 0
+ } $out
+ incr i
+}
+
+
+finish_test
+
do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 }
do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 }
+#-------------------------------------------------------------------------
+# Test the 'separators' option with the unicode61 tokenizer.
+#
+do_execsql_test 8.1 {
+ BEGIN;
+ CREATE VIRTUAL TABLE e6 USING fts5(x,
+ tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ );
+ INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog');
+ CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
+ SELECT term FROM e7;
+ ROLLBACK;
+} {
+ brown dog fox jumped lazy over quick the
+}
+
+do_execsql_test 8.2 [subst {
+ BEGIN;
+ CREATE VIRTUAL TABLE e6 USING fts5(x,
+ tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'"
+ );
+ INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01'
+ || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog'
+ );
+ INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09');
+ CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
+ SELECT term FROM e7;
+ ROLLBACK;
+}] [subst {
+ brown dog fox jumped lazy over quick the \u0E08 \u0E09
+}]
finish_test
# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
+do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
+ "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
+
+do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
+ "abc abc def def"
+
#-------------------------------------------------------------------------
#
set docs [list {
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
+
+ execsql "CREATE VIRTUAL TABLE t8 USING fts5(
+ a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
+ )"
} {}
do_test 4.2 {
}
} {}
+do_test 4.4 {
+ sqlite3_exec_hex db {
+ CREATE VIRTUAL TABLE t9 USING fts5(a, b,
+ tokenize="unicode61 separators '%C09004'"
+ );
+ INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
+ }
+} {0 {}}
+
#-------------------------------------------------------------------------
-C Add\stests\sfor\sfts5\stokenizers.
-D 2015-05-19T19:37:09.304
+C Improve\stest\scoverage\sof\sfts5_tokenize.c.
+D 2015-05-20T09:27:51.629
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43
F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2
F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c
-F ext/fts5/fts5_tokenize.c 4d9d50478169a8446686ab255cc723a6b4f4c20b
+F ext/fts5/fts5_tokenize.c 6f4d2cbe7ed892821d1a233c7db613dafdb3877a
F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d
F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54
F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47
F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e
+F ext/fts5/test/fts5porter2.test c534385e88e685b354c2b2020acc0c4920042c8e
F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e
F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890
F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066
F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0
F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5
-F ext/fts5/test/fts5tokenizer.test f54bbbff67ff03ce49c153c0f6a5e3f8369f986a
+F ext/fts5/test/fts5tokenizer.test 668747fcb41de6fc7daebc478920b705164fccc1
F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d
-F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee
+F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba
F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944
F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887
F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 2870a80593302e7835c5f5d167f42710d8439e7d
-R 63f128b09262f76dbe78be4c38aa78c8
+P 4f90ba20e2be6ec5755fe894938ac97342d6fbf6
+R 43528c0613d372060fbd8256efc47909
U dan
-Z e801c590b1575eb988d36c609d9907aa
+Z e3c696b644b37e5798613b4f15c87656
-4f90ba20e2be6ec5755fe894938ac97342d6fbf6
\ No newline at end of file
+0e91a6a520f040b8902da6a1a4d9107dc66c0ea3
\ No newline at end of file