]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: Added unit testing for unicode apostrophe handling.
authorTimo Sirainen <tss@iki.fi>
Mon, 1 Jun 2015 18:59:02 +0000 (21:59 +0300)
committerTimo Sirainen <tss@iki.fi>
Mon, 1 Jun 2015 18:59:02 +0000 (21:59 +0300)
src/lib-fts/test-fts-tokenizer.c

index 1f355f4abeb3a99e7cc895cf2789501c088592c4..021a860c28e4b9eac11ac4d70a694c226aa3e613 100644 (file)
@@ -41,6 +41,11 @@ static const char *test_inputs[] = {
        "123456789012345678901234567890x',"
        "123456789012345678901234567890x'',"
 
+       /* \xe28099 = U+2019 is a smart quote, sometimes used as an apostrophe */
+       "\xE2\x80\x99 \xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99quoted text\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99 \xE2\x80\x99hlo words\xE2\x80\x99 you\xE2\x80\x99re78901234567890123456789012 bad\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99pre post\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99",
+
+       "you\xE2\x80\x99re\xE2\x80\x99xyz",
+
        /* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
           U+205A(e2 81 9a) and U+205F(e2 81 9f) */
        "hello\xEF\xBC\x81world\r\nAnd\xE2\x80\x80there\twas: text "
@@ -156,6 +161,11 @@ static void test_fts_tokenizer_generic_only(void)
                "123456789012345678901234567890",
                "123456789012345678901234567890",
 
+               "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
+               "word", "pre", "post", NULL,
+
+               "you're'xyz", NULL,
+
                "hello", "world", "And",
                "there", "was", "text", "galore",
                "and", "more", NULL,
@@ -208,6 +218,11 @@ static void test_fts_tokenizer_generic_tr29_only(void)
                "123456789012345678901234567890",
                "123456789012345678901234567890",
 
+               "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
+               "word", "pre", "post", NULL,
+
+               "you're'xyz", NULL,
+
                "hello", "world", "And",
                "there", "was", "text", "galore",
                "and", "more", NULL,