From: Timo Sirainen Date: Thu, 8 Feb 2024 22:57:12 +0000 (+0200) Subject: lib-mail, lib-imap: Optimize parsing large number of address headers X-Git-Tag: 2.3.21.1~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1481c04f02df7647f520df65d63df7626bf0ee32;p=thirdparty%2Fdovecot%2Fcore.git lib-mail, lib-imap: Optimize parsing large number of address headers Every header was appended to a linked list by walking through the whole list, causing excessive CPU usage when the list became large enough. Fixed by changing struct message_part_envelope to use struct message_address_list, which stores also linked list tail pointers. This allows quickly appending to the end of the linked list. --- diff --git a/src/lib-imap/imap-envelope.c b/src/lib-imap/imap-envelope.c index 1312eae2ff..da3177025a 100644 --- a/src/lib-imap/imap-envelope.c +++ b/src/lib-imap/imap-envelope.c @@ -67,17 +67,17 @@ void imap_envelope_write(struct message_part_envelope *data, } str_append_c(str, ' '); - imap_write_address(str, data->from); + imap_write_address(str, data->from.head); str_append_c(str, ' '); - imap_write_address(str, NVL(data->sender, data->from)); + imap_write_address(str, NVL(data->sender.head, data->from.head)); str_append_c(str, ' '); - imap_write_address(str, NVL(data->reply_to, data->from)); + imap_write_address(str, NVL(data->reply_to.head, data->from.head)); str_append_c(str, ' '); - imap_write_address(str, data->to); + imap_write_address(str, data->to.head); str_append_c(str, ' '); - imap_write_address(str, data->cc); + imap_write_address(str, data->cc.head); str_append_c(str, ' '); - imap_write_address(str, data->bcc); + imap_write_address(str, data->bcc.head); str_append_c(str, ' '); imap_append_nstring_nolf(str, data->in_reply_to); @@ -126,28 +126,25 @@ imap_envelope_parse_address(const struct imap_arg *arg, static bool imap_envelope_parse_addresses(const struct imap_arg *arg, - pool_t pool, struct message_address **addrs_r) + pool_t pool, struct message_address_list *addrs_r) { - struct message_address *first, *last, *addr; + struct message_address *addr; const struct imap_arg *list_args; - if (arg->type == IMAP_ARG_NIL) { - *addrs_r = NULL; + i_zero(addrs_r); + if (arg->type == IMAP_ARG_NIL) return TRUE; - } if (!imap_arg_get_list(arg, &list_args)) return FALSE; - first = last = addr = NULL; + addr = NULL; for (; !IMAP_ARG_IS_EOL(list_args); list_args++) { if (!imap_envelope_parse_address (list_args, pool, &addr)) return FALSE; - DLLIST2_APPEND(&first, &last, addr); + DLLIST2_APPEND(&addrs_r->head, &addrs_r->tail, addr); } - - *addrs_r = first; return TRUE; } diff --git a/src/lib-mail/message-part-data.c b/src/lib-mail/message-part-data.c index a5771f87e2..25019ab432 100644 --- a/src/lib-mail/message-part-data.c +++ b/src/lib-mail/message-part-data.c @@ -4,6 +4,7 @@ #include "str.h" #include "wildcard-match.h" #include "array.h" +#include "llist.h" #include "rfc822-parser.h" #include "rfc2231-parser.h" #include "message-address.h" @@ -176,7 +177,7 @@ void message_part_envelope_parse_from_header(pool_t pool, { struct message_part_envelope *d; enum envelope_field field; - struct message_address **addr_p, *addr; + struct message_address_list *addr_p, new_addr; const char **str_p; if (*data == NULL) { @@ -234,18 +235,18 @@ void message_part_envelope_parse_from_header(pool_t pool, } if (addr_p != NULL) { - addr = message_address_parse(pool, hdr->full_value, - hdr->full_value_len, - UINT_MAX, - MESSAGE_ADDRESS_PARSE_FLAG_FILL_MISSING); + message_address_parse_full(pool, hdr->full_value, + hdr->full_value_len, + UINT_MAX, + MESSAGE_ADDRESS_PARSE_FLAG_FILL_MISSING, + &new_addr); /* Merge multiple headers the same as if they were comma separated in a single line. This is better from security point of view, because attacker could intentionally write addresses in a way that e.g. the first From header is validated while MUA only shows the second From header. */ - while (*addr_p != NULL) - addr_p = &(*addr_p)->next; - *addr_p = addr; + DLLIST2_JOIN(&addr_p->head, &addr_p->tail, + &new_addr.head, &new_addr.tail); } else if (str_p != NULL) { *str_p = message_header_strdup(pool, hdr->full_value, hdr->full_value_len); diff --git a/src/lib-mail/message-part-data.h b/src/lib-mail/message-part-data.h index 5ff9ffe1bc..7ec878de68 100644 --- a/src/lib-mail/message-part-data.h +++ b/src/lib-mail/message-part-data.h @@ -2,6 +2,7 @@ #define MESSAGE_PART_DATA_H #include "message-part.h" +#include "message-address.h" #define MESSAGE_PART_DEFAULT_CHARSET "us-ascii" @@ -14,8 +15,9 @@ struct message_part_param { struct message_part_envelope { const char *date, *subject; - struct message_address *from, *sender, *reply_to; - struct message_address *to, *cc, *bcc; + + struct message_address_list from, sender, reply_to; + struct message_address_list to, cc, bcc; const char *in_reply_to, *message_id; }; diff --git a/src/lib-storage/index/index-search-mime.c b/src/lib-storage/index/index-search-mime.c index da7e5e1709..3328ce98af 100644 --- a/src/lib-storage/index/index-search-mime.c +++ b/src/lib-storage/index/index-search-mime.c @@ -205,7 +205,7 @@ seach_arg_mime_envelope_address_match( enum mail_search_mime_arg_type type, const char *key, const struct message_part_envelope *envelope) { - const struct message_address *addrs; + struct message_address_list addrs; string_t *addrs_enc; if (envelope == NULL) @@ -239,7 +239,7 @@ seach_arg_mime_envelope_address_match( probably be normalized directly in the struct message_address. */ addrs_enc = t_str_new(128); - message_address_write(addrs_enc, addrs); + message_address_write(addrs_enc, addrs.head); return (strstr(str_c(addrs_enc), key) != NULL ? 1 : 0); }