]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: build-mail - Fix address header indexing for RFC 2047 encoded-words
authorTimo Sirainen <timo.sirainen@open-xchange.com>
Fri, 17 Apr 2026 19:33:32 +0000 (19:33 +0000)
committertimo.sirainen <timo.sirainen@open-xchange.com>
Fri, 1 May 2026 08:14:34 +0000 (08:14 +0000)
The header value reaching fts_build_mail_header() was already RFC 2047
decoded by message_decoder, so passing it to message_address_parse()
meant decoded display name characters were reinterpreted as RFC 5322
syntax. For example:

  From: =?UTF-8?B?VGVzdHVzZXIgKEV4YW1wbGUp?= <user@example.com>

decodes to "Testuser (Example)" and "(Example)" was then silently dropped as
an RFC 5322 comment. Similarly, display names containing characters
like '[' made parse_name_addr() fail, leaving neither the display name
nor the email address indexed.

Enable MESSAGE_DECODER_FLAG_RAW_ADDRESS_HEADERS so that address headers
arrive with encoded-words intact, parse the raw value, write the
normalised address, and then decode the result to UTF-8 before handing
it to the FTS backend.

src/plugins/fts/fts-build-mail.c

index 66d9a04f8da72e6522552be4c20569469aa3de69..060c3225dac2d6b3a5ae41042545af0d9cfd1e33 100644 (file)
@@ -8,6 +8,7 @@
 #include "message-address.h"
 #include "message-parser.h"
 #include "message-decoder.h"
+#include "message-header-decode.h"
 #include "mail-storage.h"
 #include "index-mail.h"
 #include "fts-parser.h"
@@ -171,9 +172,17 @@ static int fts_build_mail_header(struct fts_mail_build_context *ctx,
                ret = fts_build_unstructured_header(ctx, hdr);
        } else T_BEGIN {
                /* message address. normalize it to give better
-                  search results. */
+                  search results.
+
+                  The decoder was initialised with
+                  MESSAGE_DECODER_FLAG_RAW_ADDRESS_HEADERS, so hdr->full_value
+                  still holds the raw header bytes with RFC 2047 encoded-words
+                  intact. Parsing the raw value avoids having decoded display
+                  name characters (e.g. '(' or '[') reinterpreted as RFC 5322
+                  comments or other specials. After normalisation the result
+                  is decoded to UTF-8 for indexing. */
                struct message_address *addr;
-               string_t *str;
+               string_t *str, *decoded;
 
                addr = message_address_parse(pool_datastack_create(),
                                             hdr->full_value,
@@ -182,7 +191,12 @@ static int fts_build_mail_header(struct fts_mail_build_context *ctx,
                str = t_str_new(hdr->full_value_len);
                message_address_write(str, addr);
 
-               ret = fts_build_data(ctx, str_data(str), str_len(str), TRUE);
+               decoded = t_str_new(str_len(str));
+               message_header_decode_utf8(str_data(str), str_len(str),
+                                          decoded,
+                                          ctx->update_ctx->normalizer);
+               ret = fts_build_data(ctx, str_data(decoded),
+                                    str_len(decoded), TRUE);
        } T_END;
 
        if ((ctx->update_ctx->backend->flags &
@@ -623,7 +637,8 @@ fts_build_mail_real(struct fts_backend_update_context *update_ctx,
        pool_t parts_pool = pool_alloconly_create("fts message parts", 512);
        parser = message_parser_init(parts_pool, input, &parser_set);
 
-       decoder = message_decoder_init(update_ctx->normalizer, 0);
+       decoder = message_decoder_init(update_ctx->normalizer,
+                                      MESSAGE_DECODER_FLAG_RAW_ADDRESS_HEADERS);
        for (;;) {
                ret = message_parser_parse_next_block(parser, &raw_block);
                i_assert(ret != 0);