]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Fix url_suspect plugin causing massive false positives
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 21 Nov 2025 11:12:55 +0000 (11:12 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 21 Nov 2025 11:12:55 +0000 (11:12 +0000)
The url_suspect plugin had multiple critical issues:

1. R_SUSPICIOUS_URL triggered on every message with URLs, adding 25 points
   due to incorrect dynamic score usage (5.0 * 5.0 instead of 1.0 * 5.0)

2. Broken compat_mode inserted R_SUSPICIOUS_URL without URL info whenever
   ANY url check triggered, making it impossible to debug

3. Symbol names were unnecessarily configurable, adding complexity

4. url_suspect_group.conf was not included in groups.conf, so scores
   were not loaded at all

Fixed by:
- Removed R_SUSPICIOUS_URL and compat_mode completely
- Fixed all insert_result() calls to use 1.0 dynamic weight
- Made symbol names hardcoded constants
- Added url group to groups.conf with max_score = 9.0
- Cleaned up score configuration parameters

conf/groups.conf
conf/modules.d/url_suspect.conf
conf/scores.d/url_suspect_group.conf
src/plugins/lua/url_suspect.lua
test/functional/cases/001_merged/400_url_suspect.robot

index 4f40d865c7f7527e18ebb06fa4cb11d8a4181b03..ba769ee7ca0d37f4fd774a99991647504a23be8c 100644 (file)
@@ -83,6 +83,12 @@ group "phishing" {
     .include(try=true; priority=10) "$LOCAL_CONFDIR/override.d/phishing_group.conf"
 }
 
+group "url" {
+    .include "$CONFDIR/scores.d/url_suspect_group.conf"
+    .include(try=true; priority=1; duplicate=merge) "$LOCAL_CONFDIR/local.d/url_suspect_group.conf"
+    .include(try=true; priority=10) "$LOCAL_CONFDIR/override.d/url_suspect_group.conf"
+}
+
 group "hfilter" {
     .include "$CONFDIR/scores.d/hfilter_group.conf"
     .include(try=true; priority=1; duplicate=merge) "$LOCAL_CONFDIR/local.d/hfilter_group.conf"
index e4e869da74b98f2a8e8627204235ae6e28073172..d4ff6239afa492679d16770976fd58f40bc9e9d3 100644 (file)
@@ -35,13 +35,8 @@ url_suspect {
     numeric_ip {
       enabled = true;
 
-      # Scoring for different scenarios
-      base_score = 1.5;          # Basic numeric IP
-      with_user_score = 4.0;     # Numeric IP + user field
-
       # Private IP ranges (10.x, 192.168.x, etc.)
       allow_private_ranges = true;
-      private_score = 0.5;       # Lower score for private IPs
 
       # OPTIONAL: Suspicious IP ranges map
       # To enable, add in local.d/url_suspect.conf:
@@ -52,12 +47,8 @@ url_suspect {
     tld {
       enabled = true;
 
-      # Built-in suspicious TLDs (no map needed)
+      # Built-in suspicious TLDs
       builtin_suspicious = [".tk", ".ml", ".ga", ".cf", ".gq"];
-      builtin_score = 3.0;
-
-      # Missing TLD score
-      missing_tld_score = 2.0;
 
       # OPTIONAL: Custom TLD map
       # To enable, add in local.d/url_suspect.conf:
@@ -100,34 +91,7 @@ url_suspect {
     }
   }
 
-  # Symbol names (can be customized)
-  symbols {
-    # User/password symbols
-    user_password = "URL_USER_PASSWORD";
-    user_long = "URL_USER_LONG";
-    user_very_long = "URL_USER_VERY_LONG";
-
-    # Numeric IP symbols
-    numeric_ip = "URL_NUMERIC_IP";
-    numeric_ip_user = "URL_NUMERIC_IP_USER";
-    numeric_private = "URL_NUMERIC_PRIVATE_IP";
-
-    # TLD symbols
-    no_tld = "URL_NO_TLD";
-    suspicious_tld = "URL_SUSPICIOUS_TLD";
-
-    # Unicode symbols
-    bad_unicode = "URL_BAD_UNICODE";
-    homograph = "URL_HOMOGRAPH_ATTACK";
-    rtl_override = "URL_RTL_OVERRIDE";
-    zero_width = "URL_ZERO_WIDTH_SPACES";
-
-    # Structure symbols
-    multiple_at = "URL_MULTIPLE_AT_SIGNS";
-    backslash = "URL_BACKSLASH_PATH";
-    excessive_dots = "URL_EXCESSIVE_DOTS";
-    very_long = "URL_VERY_LONG";
-  }
+
 
   # ADVANCED: Global whitelist
   # To enable, add in local.d/url_suspect.conf:
@@ -150,10 +114,6 @@ url_suspect {
   # EOD;
   # }
 
-  # Backward compatibility with R_SUSPICIOUS_URL
-  # When enabled, R_SUSPICIOUS_URL symbol is inserted if any URL_* symbols fire
-  compat_mode = true;
-
   .include(try=true,priority=5) "${DBDIR}/dynamic/url_suspect.conf"
   .include(try=true,priority=1,duplicate=merge) "$LOCAL_CONFDIR/local.d/url_suspect.conf"
   .include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/url_suspect.conf"
index b0591c0fba29b625e26bfec22da335f3c79560f8..1de8d256459ee71ee23e5cbffd4b0bf7d64e6b2f 100644 (file)
@@ -1,5 +1,24 @@
 # URL Suspect Plugin Scores
 # These scores are applied when suspicious URLs are detected
+#
+# Please don't modify this file as your changes might be overwritten with
+# the next update.
+#
+# You can modify '$LOCAL_CONFDIR/rspamd.conf.local.override' to redefine
+# parameters defined on the top level
+#
+# You can modify '$LOCAL_CONFDIR/rspamd.conf.local' to add
+# parameters defined on the top level
+#
+# For specific modules or configuration you can also modify
+# '$LOCAL_CONFDIR/local.d/url_suspect_group.conf' - to add your options or rewrite defaults
+# '$LOCAL_CONFDIR/override.d/url_suspect_group.conf' - to override the defaults
+#
+# See https://rspamd.com/doc/developers/writing_rules.html for details
+
+description = "Suspicious URL checks";
+
+max_score = 9.0;
 
 symbols = {
   # User/password in URL
@@ -91,11 +110,4 @@ symbols = {
     description = "URL is very long";
     one_shot = false;
   }
-
-  # Legacy symbol (backward compatibility)
-  "R_SUSPICIOUS_URL" {
-    weight = 5.0;
-    description = "Suspicious URL detected (legacy symbol)";
-    one_shot = true;
-  }
 }
index 49e4625193a5db53e39d4d15cdbd912ad7d1e41c..55d67d514212cdf400401a6985c8a9ff950f2dc2 100644 (file)
@@ -32,6 +32,31 @@ local rspamd_url = require "rspamd_url"
 local rspamd_util = require "rspamd_util"
 local bit = require "bit"
 
+-- Symbol names (fixed, not configurable)
+local symbols = {
+  -- User/password symbols
+  user_password = "URL_USER_PASSWORD",
+  user_long = "URL_USER_LONG",
+  user_very_long = "URL_USER_VERY_LONG",
+  -- Numeric IP symbols
+  numeric_ip = "URL_NUMERIC_IP",
+  numeric_ip_user = "URL_NUMERIC_IP_USER",
+  numeric_private = "URL_NUMERIC_PRIVATE_IP",
+  -- TLD symbols
+  no_tld = "URL_NO_TLD",
+  suspicious_tld = "URL_SUSPICIOUS_TLD",
+  -- Unicode symbols
+  bad_unicode = "URL_BAD_UNICODE",
+  homograph = "URL_HOMOGRAPH_ATTACK",
+  rtl_override = "URL_RTL_OVERRIDE",
+  zero_width = "URL_ZERO_WIDTH_SPACES",
+  -- Structure symbols
+  multiple_at = "URL_MULTIPLE_AT_SIGNS",
+  backslash = "URL_BACKSLASH_PATH",
+  excessive_dots = "URL_EXCESSIVE_DOTS",
+  very_long = "URL_VERY_LONG"
+}
+
 -- Default settings (work without any maps)
 local settings = {
   enabled = true,
@@ -43,21 +68,15 @@ local settings = {
         suspicious = 64,
         long = 128,
         very_long = 256
-      },
-
+      }
     },
     numeric_ip = {
       enabled = true,
-      base_score = 1.5,
-      with_user_score = 4.0,
-      allow_private_ranges = true,
-      private_score = 0.5
+      allow_private_ranges = true
     },
     tld = {
       enabled = true,
-      builtin_suspicious = { ".tk", ".ml", ".ga", ".cf", ".gq" },
-      builtin_score = 3.0,
-      missing_tld_score = 2.0
+      builtin_suspicious = { ".tk", ".ml", ".ga", ".cf", ".gq" }
     },
     unicode = {
       enabled = true,
@@ -77,32 +96,8 @@ local settings = {
       max_url_length = 2048
     }
   },
-  symbols = {
-    -- User/password symbols
-    user_password = "URL_USER_PASSWORD",
-    user_long = "URL_USER_LONG",
-    user_very_long = "URL_USER_VERY_LONG",
-    -- Numeric IP symbols
-    numeric_ip = "URL_NUMERIC_IP",
-    numeric_ip_user = "URL_NUMERIC_IP_USER",
-    numeric_private = "URL_NUMERIC_PRIVATE_IP",
-    -- TLD symbols
-    no_tld = "URL_NO_TLD",
-    suspicious_tld = "URL_SUSPICIOUS_TLD",
-    -- Unicode symbols
-    bad_unicode = "URL_BAD_UNICODE",
-    homograph = "URL_HOMOGRAPH_ATTACK",
-    rtl_override = "URL_RTL_OVERRIDE",
-    zero_width = "URL_ZERO_WIDTH_SPACES",
-    -- Structure symbols
-    multiple_at = "URL_MULTIPLE_AT_SIGNS",
-    backslash = "URL_BACKSLASH_PATH",
-    excessive_dots = "URL_EXCESSIVE_DOTS",
-    very_long = "URL_VERY_LONG"
-  },
   use_whitelist = false,
-  custom_checks = {},
-  compat_mode = true
+  custom_checks = {}
 }
 
 -- Optional maps (only loaded if enabled)
@@ -139,30 +134,26 @@ function checks.user_password_analysis(task, url, cfg)
 
   lua_util.debugm(N, task, "Checking user field length: %d chars", user_len)
 
-  -- Length-based scoring (built-in, no map needed)
+  -- Length-based detection
   if user_len > cfg.length_thresholds.very_long then
     table.insert(findings, {
-      symbol = settings.symbols.user_very_long,
-      score = 5.0,
+      symbol = symbols.user_very_long,
       options = { string.format("%d", user_len) }
     })
   elseif user_len > cfg.length_thresholds.long then
     table.insert(findings, {
-      symbol = settings.symbols.user_long,
-      score = 3.0,
+      symbol = symbols.user_long,
       options = { string.format("%d", user_len) }
     })
   elseif user_len > cfg.length_thresholds.suspicious then
     table.insert(findings, {
-      symbol = settings.symbols.user_password,
-      score = 2.0,
+      symbol = symbols.user_password,
       options = { host or "unknown" }
     })
   else
     -- Normal length user
     table.insert(findings, {
-      symbol = settings.symbols.user_password,
-      score = 2.0,
+      symbol = symbols.user_password,
       options = { host or "unknown" }
     })
   end
@@ -213,22 +204,19 @@ function checks.numeric_ip_analysis(task, url, cfg)
 
   if is_private and cfg.allow_private_ranges then
     table.insert(findings, {
-      symbol = settings.symbols.numeric_private,
-      score = cfg.private_score,
+      symbol = symbols.numeric_private,
       options = { host }
     })
   else
     -- Check if user present (more suspicious)
     if bit.band(flags, url_flags_tab.has_user) ~= 0 then
       table.insert(findings, {
-        symbol = settings.symbols.numeric_ip_user,
-        score = cfg.with_user_score,
+        symbol = symbols.numeric_ip_user,
         options = { host }
       })
     else
       table.insert(findings, {
-        symbol = settings.symbols.numeric_ip,
-        score = cfg.base_score,
+        symbol = symbols.numeric_ip,
         options = { host }
       })
     end
@@ -262,8 +250,7 @@ function checks.tld_analysis(task, url, cfg)
     if bit.band(flags, url_flags_tab.numeric) == 0 then
       lua_util.debugm(N, task, "URL has no TLD: %s", host)
       table.insert(findings, {
-        symbol = settings.symbols.no_tld,
-        score = cfg.missing_tld_score,
+        symbol = symbols.no_tld,
         options = { host }
       })
     end
@@ -275,13 +262,12 @@ function checks.tld_analysis(task, url, cfg)
     return findings
   end
 
-  -- Check built-in suspicious TLDs (no map needed)
+  -- Check built-in suspicious TLDs
   for _, suspicious_tld in ipairs(cfg.builtin_suspicious) do
     if tld == suspicious_tld or tld:sub(-#suspicious_tld) == suspicious_tld then
       lua_util.debugm(N, task, "URL uses suspicious TLD: %s", tld)
       table.insert(findings, {
-        symbol = settings.symbols.suspicious_tld,
-        score = cfg.builtin_score,
+        symbol = symbols.suspicious_tld,
         options = { tld }
       })
       break
@@ -312,8 +298,7 @@ function checks.unicode_analysis(task, url, cfg)
   if cfg.check_validity and not rspamd_util.is_valid_utf8(url_text) then
     lua_util.debugm(N, task, "URL has invalid UTF-8")
     table.insert(findings, {
-      symbol = settings.symbols.bad_unicode,
-      score = 3.0,
+      symbol = symbols.bad_unicode,
       options = { host or "unknown" }
     })
   end
@@ -322,8 +307,7 @@ function checks.unicode_analysis(task, url, cfg)
   if cfg.check_zero_width and bit.band(flags, url_flags_tab.zw_spaces) ~= 0 then
     lua_util.debugm(N, task, "URL contains zero-width spaces")
     table.insert(findings, {
-      symbol = settings.symbols.zero_width,
-      score = 7.0,
+      symbol = symbols.zero_width,
       options = { host or "unknown" }
     })
   end
@@ -333,8 +317,7 @@ function checks.unicode_analysis(task, url, cfg)
     if rspamd_util.is_utf_spoofed(host) then
       lua_util.debugm(N, task, "URL uses homograph attack: %s", host)
       table.insert(findings, {
-        symbol = settings.symbols.homograph,
-        score = 5.0,
+        symbol = symbols.homograph,
         options = { host }
       })
     end
@@ -344,8 +327,7 @@ function checks.unicode_analysis(task, url, cfg)
   if cfg.check_rtl_override and url_text:find("\226\128\174") then
     lua_util.debugm(N, task, "URL contains RTL override")
     table.insert(findings, {
-      symbol = settings.symbols.rtl_override,
-      score = 6.0,
+      symbol = symbols.rtl_override,
       options = { host or "unknown" }
     })
   end
@@ -365,8 +347,7 @@ function checks.structure_analysis(task, url, cfg)
     if at_count > cfg.max_at_signs then
       lua_util.debugm(N, task, "URL has %d @ signs", at_count)
       table.insert(findings, {
-        symbol = settings.symbols.multiple_at,
-        score = 3.0,
+        symbol = symbols.multiple_at,
         options = { string.format("%d", at_count) }
       })
     end
@@ -379,8 +360,7 @@ function checks.structure_analysis(task, url, cfg)
     if bit.band(flags, url_flags_tab.obscured) ~= 0 and url_text:find("\\") then
       lua_util.debugm(N, task, "URL contains backslashes")
       table.insert(findings, {
-        symbol = settings.symbols.backslash,
-        score = 2.0,
+        symbol = symbols.backslash,
         options = { host or "unknown" }
       })
     end
@@ -392,8 +372,7 @@ function checks.structure_analysis(task, url, cfg)
     if dot_count > cfg.max_host_dots then
       lua_util.debugm(N, task, "URL hostname has %d dots", dot_count)
       table.insert(findings, {
-        symbol = settings.symbols.excessive_dots,
-        score = 2.0,
+        symbol = symbols.excessive_dots,
         options = { string.format("%d", dot_count) }
       })
     end
@@ -403,8 +382,7 @@ function checks.structure_analysis(task, url, cfg)
   if cfg.check_length and #url_text > cfg.max_url_length then
     lua_util.debugm(N, task, "URL is very long: %d chars", #url_text)
     table.insert(findings, {
-      symbol = settings.symbols.very_long,
-      score = 1.5,
+      symbol = symbols.very_long,
       options = { string.format("%d", #url_text) }
     })
   end
@@ -484,10 +462,10 @@ local function url_suspect_callback(task)
   -- TLD and structure checks don't have corresponding URL flags, so need all URLs
   local need_all_urls = (
       (settings.checks.tld and settings.checks.tld.enabled) or
-      (settings.checks.structure and settings.checks.structure.enabled and
-          (settings.checks.structure.check_multiple_at or
-              settings.checks.structure.check_excessive_dots or
-              settings.checks.structure.check_length))
+          (settings.checks.structure and settings.checks.structure.enabled and
+              (settings.checks.structure.check_multiple_at or
+                  settings.checks.structure.check_excessive_dots or
+                  settings.checks.structure.check_length))
   )
 
   if need_all_urls then
@@ -506,30 +484,11 @@ local function url_suspect_callback(task)
     return false
   end
 
-  local total_findings = 0
-
   for _, url in ipairs(suspect_urls) do
     local url_findings = analyze_url(task, url, settings)
 
     for _, finding in ipairs(url_findings) do
-      task:insert_result(finding.symbol, finding.score, finding.options or {})
-      total_findings = total_findings + 1
-    end
-  end
-
-  -- Backward compatibility: R_SUSPICIOUS_URL
-  if settings.compat_mode and total_findings > 0 then
-    -- Check if we inserted any symbols
-    local has_findings = false
-    for _, symbol_name in pairs(settings.symbols) do
-      if task:has_symbol(symbol_name) then
-        has_findings = true
-        break
-      end
-    end
-
-    if has_findings then
-      task:insert_result('R_SUSPICIOUS_URL', 5.0)
+      task:insert_result(finding.symbol, 1.0, finding.options or {})
     end
   end
 
@@ -591,7 +550,7 @@ if settings.enabled then
   })
 
   -- Register all symbol names as virtual
-  for _, symbol_name in pairs(settings.symbols) do
+  for _, symbol_name in pairs(symbols) do
     rspamd_config:register_symbol({
       name = symbol_name,
       type = 'virtual',
@@ -599,16 +558,4 @@ if settings.enabled then
       group = 'url'
     })
   end
-
-  -- Backward compat symbol
-  if settings.compat_mode then
-    rspamd_config:register_symbol({
-      name = 'R_SUSPICIOUS_URL',
-      type = 'virtual',
-      parent = id,
-      score = 5.0,
-      group = 'url',
-      description = 'Suspicious URL (legacy symbol)'
-    })
-  end
 end
index bb218467ac6ddf84b31e041e41844e6991cbc49f..cb8b6eb2e02fa38a233ea42c32d82f832541245a 100644 (file)
@@ -10,14 +10,11 @@ URL Suspect - Issue 5731 - Long User Field
   Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_long_user.eml
   Expect Symbol With Exact Options  URL_USER_LONG  129
   Do Not Expect Symbol  URL_USER_VERY_LONG
-  # Should also generate R_SUSPICIOUS_URL for backward compatibility
-  Expect Symbol  R_SUSPICIOUS_URL
 
 URL Suspect - Very Long User Field
   # Test that very long user fields get appropriate symbol
   Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_very_long_user.eml
   Expect Symbol With Exact Options  URL_USER_VERY_LONG  300
-  Expect Symbol  R_SUSPICIOUS_URL
 
 URL Suspect - Numeric IP
   # Test numeric IP detection
@@ -29,19 +26,16 @@ URL Suspect - Numeric IP with User
   # Test numeric IP with user field (more suspicious)
   Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_numeric_ip_user.eml
   Expect Symbol  URL_NUMERIC_IP_USER
-  Expect Symbol  R_SUSPICIOUS_URL
 
 URL Suspect - Suspicious TLD
   # Test suspicious TLD detection
   Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_bad_tld.eml
   Expect Symbol  URL_SUSPICIOUS_TLD
-  Expect Symbol  R_SUSPICIOUS_URL
 
 URL Suspect - Multiple At Signs
   # Test multiple @ sign detection
   Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_multiple_at.eml
   Expect Symbol  URL_MULTIPLE_AT_SIGNS
-  Expect Symbol  R_SUSPICIOUS_URL
 
 URL Suspect - Normal URL
   # Test that normal URLs don't trigger symbols