]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: Do not consider arbitrary headers with 8-bit data as language-specific
authorAki Tuomi <aki.tuomi@open-xchange.com>
Mon, 17 May 2021 09:13:16 +0000 (12:13 +0300)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Tue, 1 Jun 2021 12:06:57 +0000 (12:06 +0000)
If we do stemming etc processing for arbitrary headers, they might
become impossible to find in subsequent searches due to stemming.

This practically breaks e.g. From/To header searches with non-ascii
names.

src/plugins/fts/fts-build-mail.c

index 340fb7d9629120dbc58ebc0d3c9757785bff4e5b..d0f1c560ac1aefeeb76c44f579e84de1777077cc 100644 (file)
@@ -105,17 +105,6 @@ fts_build_unstructured_header(struct fts_mail_build_context *ctx,
        return ret;
 }
 
-static bool data_has_8bit(const unsigned char *data, size_t size)
-{
-       size_t i;
-
-       for (i = 0; i < size; i++) {
-               if ((data[i] & 0x80) != 0)
-                       return TRUE;
-       }
-       return FALSE;
-}
-
 static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx,
                                        struct fts_user_language *user_lang)
 {
@@ -135,11 +124,9 @@ fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx,
        /* Headers that don't contain any human language will only be
           translated to lowercase - no stemming or other filtering. There's
           unfortunately no pefect way of detecting which headers contain
-          human languages, so we have a list of some hardcoded header names
-          and we'll also assume that if there's any 8bit content it's a human
-          language. */
-       if (fts_header_has_language(hdr->name) ||
-           data_has_8bit(hdr->full_value, hdr->full_value_len))
+          human languages, so we check with fts_header_has_language if the
+          header is something that's supposed to containing human text. */
+       if (fts_header_has_language(hdr->name))
                ctx->cur_user_lang = NULL;
        else {
                fts_mail_build_ctx_set_lang(ctx,