]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: allow hyphen character in domain part
authorBaofeng Wang <baofeng.wang@dovecot.fi>
Tue, 7 Jun 2016 13:37:27 +0000 (16:37 +0300)
committerGitLab <gitlab@git.dovecot.net>
Tue, 21 Jun 2016 07:44:18 +0000 (10:44 +0300)
Allow hyphen character and remove possible trailing hyhpen character
when email tokenization is done.

src/lib-fts/fts-tokenizer-address.c
src/lib-fts/fts-tokenizer-common.c
src/lib-fts/test-fts-tokenizer.c

index 13c201271c650eeae3a2d0fbb83d5371f30be9c8..2b9380c499ae35c43d7ee9049d75e7196bafead5 100644 (file)
@@ -189,7 +189,7 @@ fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok
 {
        size_t pos = 0;
 
-       while (pos < size && (IS_DTEXT(data[pos]) || data[pos] == '.'))
+       while (pos < size && (IS_DTEXT(data[pos]) || data[pos] == '.' || data[pos] == '-'))
                pos++;
         /* A complete domain name */
        if ((pos > 0 && pos < size) || /* non-atext after atext in this data*/
index 87faa7e9c3e649a07d5954e7b7904fac51320d2d..92feb71c28abf6c4daea0170b147a1358130f78b 100644 (file)
@@ -26,7 +26,8 @@ void fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
        size_t pos = *len;
 
        /* the token may contain '.' in the end - remove all of them. */
-       while (pos > 0 && data[pos-1] == '.')
+       while (pos > 0 &&
+                 (data[pos-1] == '.' || data[pos-1] == '-'))
            pos--;
        *len = pos;
 }
index 960e5119d3c3cc3f3497e00f8e9cb3a645018718..6360a86f04cba0ac4ab2dfb9d80adf822cf484ed 100644 (file)
@@ -17,7 +17,9 @@
        "abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyzabcdefghijklmnopqrstuvxyz@abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.abcdefghijklmnopqrstuvxyz.tld " \
        "trailing, period@blue.com. " \
        "multi-trialing, mul@trail.com..... " \
-       "m@s"
+       "m@s " \
+       "hypen@hypen-hypen.com " \
+       "hypen@hypen-hypen-sick.com.-"
 
 static const char *test_inputs[] = {
        /* generic things and word truncation: */
@@ -317,6 +319,8 @@ static void test_fts_tokenizer_address_only(void)
                "period@blue.com", /*trailing period '.' in email */
                "mul@trail.com",
                "m@s", /*one letter local-part and domain name */
+               "hypen@hypen-hypen.com",
+               "hypen@hypen-hypen-sick.com",
                NULL
        };
        struct fts_tokenizer *tok;
@@ -340,6 +344,8 @@ static void test_fts_tokenizer_address_parent(const char *name, const char * con
                "trailing", "period", "blue", "com", "period@blue.com",
                "multi", "trialing", "mul", "trail", "com", "mul@trail.com",
                "m", "s", "m@s",
+               "hypen", "hypen", "hypen", "com", "hypen@hypen-hypen.com",
+               "hypen", "hypen", "hypen", "sick", "com", "hypen@hypen-hypen-sick.com",
                NULL
        };
        struct fts_tokenizer *tok, *gen_tok;
@@ -376,6 +382,8 @@ static void test_fts_tokenizer_address_search(void)
                "trailing", "period@blue.com",
                "multi", "trialing", "mul@trail.com",
                "m@s",
+               "hypen@hypen-hypen.com",
+               "hypen@hypen-hypen-sick.com",
                NULL
        };
        static const char *const settings[] = { "search", "", NULL };