From: Vsevolod Stakhov Date: Wed, 18 Feb 2026 19:01:23 +0000 (+0000) Subject: [Fix] Fix XML detection overriding HTML in content type detection X-Git-Tag: 4.0.0~88 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=07b5bc2dfd40f9ef7eed056c694abf4d90d91f8a;p=thirdparty%2Frspamd.git [Fix] Fix XML detection overriding HTML in content type detection HTML parts containing an embedded prolog (e.g. inside the body) were misdetected as application/xml. Since xml type has no_text=true, this excluded the HTML part from text_parts entirely, causing has_only_html_part() to return false and MIME_HTML_ONLY to not fire. - Remove xml binary pattern from patterns.lua (it preempted the text heuristic which correctly prioritises HTML) - Guard xml text pattern so it cannot override HTML when HTML markers are also present - Raise xml text pattern weight to 40 to preserve pure XML detection --- diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua index 8b5b9c52da..6acbbbf8d0 100644 --- a/lualib/lua_magic/heuristics.lua +++ b/lualib/lua_magic/heuristics.lua @@ -84,7 +84,7 @@ local txt_patterns = { { [[^BEGIN:VCARD\r?\n]], 40 }, }, xml = { - { [[<\?xml\b.+\?>]], 31 }, + { [[<\?xml\b.+\?>]], 40 }, } } @@ -530,6 +530,11 @@ exports.text_part_heuristic = function(part, log_obj, _) return 'html', res.html end + -- XML prolog can appear inside HTML; do not let xml override html + if res.xml and res.html then + res.xml = nil + end + local ext, weight = process_top_detected(res) if weight then diff --git a/lualib/lua_magic/patterns.lua b/lualib/lua_magic/patterns.lua index 2bbf4e258d..630025202b 100644 --- a/lualib/lua_magic/patterns.lua +++ b/lualib/lua_magic/patterns.lua @@ -462,16 +462,6 @@ local patterns = { }, } }, - xml = { - matches = { - { - -- XML prolog - string = [[<\?xml\b.+\?>]], - position = { '>=', 0 }, - weight = 30, - }, - } - }, -- Other pgp = { matches = {