]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] lua_magic: avoid misdetecting HTML with embedded SVG as SVG
authorVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 4 Feb 2026 14:50:35 +0000 (14:50 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 4 Feb 2026 14:50:35 +0000 (14:50 +0000)
Add svg_format_heuristic that checks for HTML markers (<!DOCTYPE html>,
<html>, <head>, <body>, <meta>) before the <svg> tag position. If HTML
markers are present, skip SVG detection and let the text heuristic
properly classify the content as HTML.

Add functional test with HTML containing embedded SVG (should detect as
HTML) and standalone SVG (should still detect as SVG).

lualib/lua_magic/heuristics.lua
lualib/lua_magic/patterns.lua
test/functional/cases/001_merged/350_magic.robot
test/functional/messages/gargantua.eml

index 8258ff2493ad60ea66d9f25f42af5748f76f8597..8b5b9c52daac51f0838efe8d285c8ec858e2d3f2 100644 (file)
@@ -614,4 +614,34 @@ exports.pe_part_heuristic = function(input, log_obj, pos, part)
   return 'exe', 30
 end
 
+-- SVG heuristic: check if this is actually HTML with embedded SVG
+exports.svg_format_heuristic = function(input, log_obj, pos, part)
+  if not input then
+    return
+  end
+
+  -- Only check content before the <svg> tag position
+  local check_len = math.min(pos, 4096)
+  if check_len < 5 then
+    -- <svg> is at the very beginning, likely a real SVG
+    return 'svg', 40
+  end
+
+  local head = tostring(input:span(1, check_len)):lower()
+
+  -- Check for HTML markers that would appear before <svg> in an HTML document
+  -- If we find these, it's HTML with embedded SVG, not a standalone SVG
+  if head:find('<!doctype%s+html') or
+      head:find('<html[%s>]') or
+      head:find('<head[%s>]') or
+      head:find('<body[%s>]') or
+      head:find('<meta[%s>]') then
+    lua_util.debugm(N, log_obj, 'svg pattern found at %s but HTML markers present, skipping svg detection',
+      pos)
+    return nil
+  end
+
+  return 'svg', 40
+end
+
 return exports
index df524f177cc8984df7689c7fdcd7b9483c15c77c..2bbf4e258ded4586eb70c13289003de349edcd32 100644 (file)
@@ -447,12 +447,15 @@ local patterns = {
     matches = {
       {
         -- Case-insensitive <svg ...> in the first chunk
+        -- Use heuristic to avoid misdetecting HTML with embedded SVG
         string = [[(?i)<svg\b]],
         position = { '<=', 4096 },
         weight = 40,
+        heuristic = heuristics.svg_format_heuristic
       },
       {
         -- Case-insensitive <!DOCTYPE svg ...> within the first 4KiB
+        -- DOCTYPE svg is unambiguous - no heuristic needed
         string = [[(?i)<!doctype\s+svg]],
         position = { '<=', 4096 },
         weight = 40,
index b2746ce3cfe825b9f896f7a49adcf96d2fa3175d..dc001ab79942b36771d6ee7ec61647a865e28599 100644 (file)
@@ -66,3 +66,5 @@ Magic detections bundle 1
   ...  MAGIC_SYM_VCF_56
   ...  MAGIC_SYM_CSV_57
   ...  MAGIC_SYM_HEIC_58
+  ...  MAGIC_SYM_HTML_59
+  ...  MAGIC_SYM_SVG_60
index acb3d367fe3662cc9eecfe2256891f0add2ffd03..153938fa3ed52ee57d614698c31ed0ede8aeaf9e 100644 (file)
@@ -23556,4 +23556,23 @@ PtMl8z+eKcuwYg6Kh//WFxJpky/bbud1vxdTudsFQap/u1q50IBncAARTE1Hz6WVnXZBmEC3CoN9Xf02
 OutxkSe3/G7yn398gU28Royre16hUFz7UXiMrjFcra8MwOeyEKzA44FlZMpNMynjbMDP+L2JfJ/3rmGJ
 0YCJBxFcC867msO9wip2vP786vlLeC/fqKwSng==
 
+--XXX
+Content-Type: application/octet-stream
+Content-Transfer-Encoding: base64
+X-Real-Type: html-with-svg (should detect as html, not svg)
+
+PCFET0NUWVBFIGh0bWw+CjxodG1sPgo8aGVhZD4KICA8dGl0bGU+VGVzdCBIVE1MIHdpdGggU1ZH
+PC90aXRsZT4KPC9oZWFkPgo8Ym9keT4KICA8cD5UaGlzIGlzIEhUTUwgd2l0aCBlbWJlZGRlZCBT
+Vkc8L3A+CiAgPHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCI+CiAgICA8Y2lyY2xlIGN4PSI1
+MCIgY3k9IjUwIiByPSI0MCIgZmlsbD0icmVkIi8+CiAgPC9zdmc+CjwvYm9keT4KPC9odG1sPgo=
+
+--XXX
+Content-Type: application/octet-stream
+Content-Transfer-Encoding: base64
+X-Real-Type: svg
+
+PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIw
+MDAvc3ZnIj4KICA8Y2lyY2xlIGN4PSI1MCIgY3k9IjUwIiByPSI0MCIgZmlsbD0iYmx1ZSIvPgo8
+L3N2Zz4K
+
 --XXX--