return 'exe', 30
end
+-- SVG heuristic: check if this is actually HTML with embedded SVG
+exports.svg_format_heuristic = function(input, log_obj, pos, part)
+ if not input then
+ return
+ end
+
+ -- Only check content before the <svg> tag position
+ local check_len = math.min(pos, 4096)
+ if check_len < 5 then
+ -- <svg> is at the very beginning, likely a real SVG
+ return 'svg', 40
+ end
+
+ local head = tostring(input:span(1, check_len)):lower()
+
+ -- Check for HTML markers that would appear before <svg> in an HTML document
+ -- If we find these, it's HTML with embedded SVG, not a standalone SVG
+ if head:find('<!doctype%s+html') or
+ head:find('<html[%s>]') or
+ head:find('<head[%s>]') or
+ head:find('<body[%s>]') or
+ head:find('<meta[%s>]') then
+ lua_util.debugm(N, log_obj, 'svg pattern found at %s but HTML markers present, skipping svg detection',
+ pos)
+ return nil
+ end
+
+ return 'svg', 40
+end
+
return exports
matches = {
{
-- Case-insensitive <svg ...> in the first chunk
+ -- Use heuristic to avoid misdetecting HTML with embedded SVG
string = [[(?i)<svg\b]],
position = { '<=', 4096 },
weight = 40,
+ heuristic = heuristics.svg_format_heuristic
},
{
-- Case-insensitive <!DOCTYPE svg ...> within the first 4KiB
+ -- DOCTYPE svg is unambiguous - no heuristic needed
string = [[(?i)<!doctype\s+svg]],
position = { '<=', 4096 },
weight = 40,
... MAGIC_SYM_VCF_56
... MAGIC_SYM_CSV_57
... MAGIC_SYM_HEIC_58
+ ... MAGIC_SYM_HTML_59
+ ... MAGIC_SYM_SVG_60
OutxkSe3/G7yn398gU28Royre16hUFz7UXiMrjFcra8MwOeyEKzA44FlZMpNMynjbMDP+L2JfJ/3rmGJ
0YCJBxFcC867msO9wip2vP786vlLeC/fqKwSng==
+--XXX
+Content-Type: application/octet-stream
+Content-Transfer-Encoding: base64
+X-Real-Type: html-with-svg (should detect as html, not svg)
+
+PCFET0NUWVBFIGh0bWw+CjxodG1sPgo8aGVhZD4KICA8dGl0bGU+VGVzdCBIVE1MIHdpdGggU1ZH
+PC90aXRsZT4KPC9oZWFkPgo8Ym9keT4KICA8cD5UaGlzIGlzIEhUTUwgd2l0aCBlbWJlZGRlZCBT
+Vkc8L3A+CiAgPHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCI+CiAgICA8Y2lyY2xlIGN4PSI1
+MCIgY3k9IjUwIiByPSI0MCIgZmlsbD0icmVkIi8+CiAgPC9zdmc+CjwvYm9keT4KPC9odG1sPgo=
+
+--XXX
+Content-Type: application/octet-stream
+Content-Transfer-Encoding: base64
+X-Real-Type: svg
+
+PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIw
+MDAvc3ZnIj4KICA8Y2lyY2xlIGN4PSI1MCIgY3k9IjUwIiByPSI0MCIgZmlsbD0iYmx1ZSIvPgo8
+L3N2Zz4K
+
--XXX--