HTML parts containing an embedded <?xml?> prolog (e.g. inside the body)
were misdetected as application/xml. Since xml type has no_text=true,
this excluded the HTML part from text_parts entirely, causing
has_only_html_part() to return false and MIME_HTML_ONLY to not fire.
- Remove xml binary pattern from patterns.lua (it preempted the text
heuristic which correctly prioritises HTML)
- Guard xml text pattern so it cannot override HTML when HTML markers
are also present
- Raise xml text pattern weight to 40 to preserve pure XML detection
{ [[^BEGIN:VCARD\r?\n]], 40 },
},
xml = {
- { [[<\?xml\b.+\?>]], 31 },
+ { [[<\?xml\b.+\?>]], 40 },
}
}
return 'html', res.html
end
+ -- XML prolog can appear inside HTML; do not let xml override html
+ if res.xml and res.html then
+ res.xml = nil
+ end
+
local ext, weight = process_top_detected(res)
if weight then
},
}
},
- xml = {
- matches = {
- {
- -- XML prolog
- string = [[<\?xml\b.+\?>]],
- position = { '>=', 0 },
- weight = 30,
- },
- }
- },
-- Other
pgp = {
matches = {