From: Vsevolod Stakhov Date: Sat, 3 Feb 2018 13:47:52 +0000 (+0000) Subject: [Feature] Try to detect HTML messages better X-Git-Tag: 1.7.0~222 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=85631ea2677e0a95679c38b3103af03fff161d9c;p=thirdparty%2Frspamd.git [Feature] Try to detect HTML messages better --- diff --git a/src/libmime/message.c b/src/libmime/message.c index 49cbc585c5..c92a1d26e1 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -646,7 +646,23 @@ rspamd_message_process_text_part (struct rspamd_task *task, found_html = TRUE; } else { - found_txt = TRUE; + /* + * We also need to apply heuristic for text parts that are actually + * HTML. + */ + RSPAMD_FTOK_ASSIGN (&html_tok, "parsed_data.begin, html_tok.begin, + MIN (html_tok.len, mime_part->parsed_data.len)) == 0 || + rspamd_lc_cmp (mime_part->parsed_data.begin, xhtml_tok.begin, + MIN (xhtml_tok.len, mime_part->parsed_data.len)) == 0) { + msg_info_task ("found html part pretending to be text/plain part"); + found_html = TRUE; + } + else { + found_txt = TRUE; + } } } else {