From: Vsevolod Stakhov Date: Fri, 21 Nov 2025 11:12:55 +0000 (+0000) Subject: [Fix] Fix url_suspect plugin causing massive false positives X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b9f84dff495d6100e6af0ba26e4d6042245442ed;p=thirdparty%2Frspamd.git [Fix] Fix url_suspect plugin causing massive false positives The url_suspect plugin had multiple critical issues: 1. R_SUSPICIOUS_URL triggered on every message with URLs, adding 25 points due to incorrect dynamic score usage (5.0 * 5.0 instead of 1.0 * 5.0) 2. Broken compat_mode inserted R_SUSPICIOUS_URL without URL info whenever ANY url check triggered, making it impossible to debug 3. Symbol names were unnecessarily configurable, adding complexity 4. url_suspect_group.conf was not included in groups.conf, so scores were not loaded at all Fixed by: - Removed R_SUSPICIOUS_URL and compat_mode completely - Fixed all insert_result() calls to use 1.0 dynamic weight - Made symbol names hardcoded constants - Added url group to groups.conf with max_score = 9.0 - Cleaned up score configuration parameters --- diff --git a/conf/groups.conf b/conf/groups.conf index 4f40d865c7..ba769ee7ca 100644 --- a/conf/groups.conf +++ b/conf/groups.conf @@ -83,6 +83,12 @@ group "phishing" { .include(try=true; priority=10) "$LOCAL_CONFDIR/override.d/phishing_group.conf" } +group "url" { + .include "$CONFDIR/scores.d/url_suspect_group.conf" + .include(try=true; priority=1; duplicate=merge) "$LOCAL_CONFDIR/local.d/url_suspect_group.conf" + .include(try=true; priority=10) "$LOCAL_CONFDIR/override.d/url_suspect_group.conf" +} + group "hfilter" { .include "$CONFDIR/scores.d/hfilter_group.conf" .include(try=true; priority=1; duplicate=merge) "$LOCAL_CONFDIR/local.d/hfilter_group.conf" diff --git a/conf/modules.d/url_suspect.conf b/conf/modules.d/url_suspect.conf index e4e869da74..d4ff6239af 100644 --- a/conf/modules.d/url_suspect.conf +++ b/conf/modules.d/url_suspect.conf @@ -35,13 +35,8 @@ url_suspect { numeric_ip { enabled = true; - # Scoring for different scenarios - base_score = 1.5; # Basic numeric IP - with_user_score = 4.0; # Numeric IP + user field - # Private IP ranges (10.x, 192.168.x, etc.) allow_private_ranges = true; - private_score = 0.5; # Lower score for private IPs # OPTIONAL: Suspicious IP ranges map # To enable, add in local.d/url_suspect.conf: @@ -52,12 +47,8 @@ url_suspect { tld { enabled = true; - # Built-in suspicious TLDs (no map needed) + # Built-in suspicious TLDs builtin_suspicious = [".tk", ".ml", ".ga", ".cf", ".gq"]; - builtin_score = 3.0; - - # Missing TLD score - missing_tld_score = 2.0; # OPTIONAL: Custom TLD map # To enable, add in local.d/url_suspect.conf: @@ -100,34 +91,7 @@ url_suspect { } } - # Symbol names (can be customized) - symbols { - # User/password symbols - user_password = "URL_USER_PASSWORD"; - user_long = "URL_USER_LONG"; - user_very_long = "URL_USER_VERY_LONG"; - - # Numeric IP symbols - numeric_ip = "URL_NUMERIC_IP"; - numeric_ip_user = "URL_NUMERIC_IP_USER"; - numeric_private = "URL_NUMERIC_PRIVATE_IP"; - - # TLD symbols - no_tld = "URL_NO_TLD"; - suspicious_tld = "URL_SUSPICIOUS_TLD"; - - # Unicode symbols - bad_unicode = "URL_BAD_UNICODE"; - homograph = "URL_HOMOGRAPH_ATTACK"; - rtl_override = "URL_RTL_OVERRIDE"; - zero_width = "URL_ZERO_WIDTH_SPACES"; - - # Structure symbols - multiple_at = "URL_MULTIPLE_AT_SIGNS"; - backslash = "URL_BACKSLASH_PATH"; - excessive_dots = "URL_EXCESSIVE_DOTS"; - very_long = "URL_VERY_LONG"; - } + # ADVANCED: Global whitelist # To enable, add in local.d/url_suspect.conf: @@ -150,10 +114,6 @@ url_suspect { # EOD; # } - # Backward compatibility with R_SUSPICIOUS_URL - # When enabled, R_SUSPICIOUS_URL symbol is inserted if any URL_* symbols fire - compat_mode = true; - .include(try=true,priority=5) "${DBDIR}/dynamic/url_suspect.conf" .include(try=true,priority=1,duplicate=merge) "$LOCAL_CONFDIR/local.d/url_suspect.conf" .include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/url_suspect.conf" diff --git a/conf/scores.d/url_suspect_group.conf b/conf/scores.d/url_suspect_group.conf index b0591c0fba..1de8d25645 100644 --- a/conf/scores.d/url_suspect_group.conf +++ b/conf/scores.d/url_suspect_group.conf @@ -1,5 +1,24 @@ # URL Suspect Plugin Scores # These scores are applied when suspicious URLs are detected +# +# Please don't modify this file as your changes might be overwritten with +# the next update. +# +# You can modify '$LOCAL_CONFDIR/rspamd.conf.local.override' to redefine +# parameters defined on the top level +# +# You can modify '$LOCAL_CONFDIR/rspamd.conf.local' to add +# parameters defined on the top level +# +# For specific modules or configuration you can also modify +# '$LOCAL_CONFDIR/local.d/url_suspect_group.conf' - to add your options or rewrite defaults +# '$LOCAL_CONFDIR/override.d/url_suspect_group.conf' - to override the defaults +# +# See https://rspamd.com/doc/developers/writing_rules.html for details + +description = "Suspicious URL checks"; + +max_score = 9.0; symbols = { # User/password in URL @@ -91,11 +110,4 @@ symbols = { description = "URL is very long"; one_shot = false; } - - # Legacy symbol (backward compatibility) - "R_SUSPICIOUS_URL" { - weight = 5.0; - description = "Suspicious URL detected (legacy symbol)"; - one_shot = true; - } } diff --git a/src/plugins/lua/url_suspect.lua b/src/plugins/lua/url_suspect.lua index 49e4625193..55d67d5142 100644 --- a/src/plugins/lua/url_suspect.lua +++ b/src/plugins/lua/url_suspect.lua @@ -32,6 +32,31 @@ local rspamd_url = require "rspamd_url" local rspamd_util = require "rspamd_util" local bit = require "bit" +-- Symbol names (fixed, not configurable) +local symbols = { + -- User/password symbols + user_password = "URL_USER_PASSWORD", + user_long = "URL_USER_LONG", + user_very_long = "URL_USER_VERY_LONG", + -- Numeric IP symbols + numeric_ip = "URL_NUMERIC_IP", + numeric_ip_user = "URL_NUMERIC_IP_USER", + numeric_private = "URL_NUMERIC_PRIVATE_IP", + -- TLD symbols + no_tld = "URL_NO_TLD", + suspicious_tld = "URL_SUSPICIOUS_TLD", + -- Unicode symbols + bad_unicode = "URL_BAD_UNICODE", + homograph = "URL_HOMOGRAPH_ATTACK", + rtl_override = "URL_RTL_OVERRIDE", + zero_width = "URL_ZERO_WIDTH_SPACES", + -- Structure symbols + multiple_at = "URL_MULTIPLE_AT_SIGNS", + backslash = "URL_BACKSLASH_PATH", + excessive_dots = "URL_EXCESSIVE_DOTS", + very_long = "URL_VERY_LONG" +} + -- Default settings (work without any maps) local settings = { enabled = true, @@ -43,21 +68,15 @@ local settings = { suspicious = 64, long = 128, very_long = 256 - }, - + } }, numeric_ip = { enabled = true, - base_score = 1.5, - with_user_score = 4.0, - allow_private_ranges = true, - private_score = 0.5 + allow_private_ranges = true }, tld = { enabled = true, - builtin_suspicious = { ".tk", ".ml", ".ga", ".cf", ".gq" }, - builtin_score = 3.0, - missing_tld_score = 2.0 + builtin_suspicious = { ".tk", ".ml", ".ga", ".cf", ".gq" } }, unicode = { enabled = true, @@ -77,32 +96,8 @@ local settings = { max_url_length = 2048 } }, - symbols = { - -- User/password symbols - user_password = "URL_USER_PASSWORD", - user_long = "URL_USER_LONG", - user_very_long = "URL_USER_VERY_LONG", - -- Numeric IP symbols - numeric_ip = "URL_NUMERIC_IP", - numeric_ip_user = "URL_NUMERIC_IP_USER", - numeric_private = "URL_NUMERIC_PRIVATE_IP", - -- TLD symbols - no_tld = "URL_NO_TLD", - suspicious_tld = "URL_SUSPICIOUS_TLD", - -- Unicode symbols - bad_unicode = "URL_BAD_UNICODE", - homograph = "URL_HOMOGRAPH_ATTACK", - rtl_override = "URL_RTL_OVERRIDE", - zero_width = "URL_ZERO_WIDTH_SPACES", - -- Structure symbols - multiple_at = "URL_MULTIPLE_AT_SIGNS", - backslash = "URL_BACKSLASH_PATH", - excessive_dots = "URL_EXCESSIVE_DOTS", - very_long = "URL_VERY_LONG" - }, use_whitelist = false, - custom_checks = {}, - compat_mode = true + custom_checks = {} } -- Optional maps (only loaded if enabled) @@ -139,30 +134,26 @@ function checks.user_password_analysis(task, url, cfg) lua_util.debugm(N, task, "Checking user field length: %d chars", user_len) - -- Length-based scoring (built-in, no map needed) + -- Length-based detection if user_len > cfg.length_thresholds.very_long then table.insert(findings, { - symbol = settings.symbols.user_very_long, - score = 5.0, + symbol = symbols.user_very_long, options = { string.format("%d", user_len) } }) elseif user_len > cfg.length_thresholds.long then table.insert(findings, { - symbol = settings.symbols.user_long, - score = 3.0, + symbol = symbols.user_long, options = { string.format("%d", user_len) } }) elseif user_len > cfg.length_thresholds.suspicious then table.insert(findings, { - symbol = settings.symbols.user_password, - score = 2.0, + symbol = symbols.user_password, options = { host or "unknown" } }) else -- Normal length user table.insert(findings, { - symbol = settings.symbols.user_password, - score = 2.0, + symbol = symbols.user_password, options = { host or "unknown" } }) end @@ -213,22 +204,19 @@ function checks.numeric_ip_analysis(task, url, cfg) if is_private and cfg.allow_private_ranges then table.insert(findings, { - symbol = settings.symbols.numeric_private, - score = cfg.private_score, + symbol = symbols.numeric_private, options = { host } }) else -- Check if user present (more suspicious) if bit.band(flags, url_flags_tab.has_user) ~= 0 then table.insert(findings, { - symbol = settings.symbols.numeric_ip_user, - score = cfg.with_user_score, + symbol = symbols.numeric_ip_user, options = { host } }) else table.insert(findings, { - symbol = settings.symbols.numeric_ip, - score = cfg.base_score, + symbol = symbols.numeric_ip, options = { host } }) end @@ -262,8 +250,7 @@ function checks.tld_analysis(task, url, cfg) if bit.band(flags, url_flags_tab.numeric) == 0 then lua_util.debugm(N, task, "URL has no TLD: %s", host) table.insert(findings, { - symbol = settings.symbols.no_tld, - score = cfg.missing_tld_score, + symbol = symbols.no_tld, options = { host } }) end @@ -275,13 +262,12 @@ function checks.tld_analysis(task, url, cfg) return findings end - -- Check built-in suspicious TLDs (no map needed) + -- Check built-in suspicious TLDs for _, suspicious_tld in ipairs(cfg.builtin_suspicious) do if tld == suspicious_tld or tld:sub(-#suspicious_tld) == suspicious_tld then lua_util.debugm(N, task, "URL uses suspicious TLD: %s", tld) table.insert(findings, { - symbol = settings.symbols.suspicious_tld, - score = cfg.builtin_score, + symbol = symbols.suspicious_tld, options = { tld } }) break @@ -312,8 +298,7 @@ function checks.unicode_analysis(task, url, cfg) if cfg.check_validity and not rspamd_util.is_valid_utf8(url_text) then lua_util.debugm(N, task, "URL has invalid UTF-8") table.insert(findings, { - symbol = settings.symbols.bad_unicode, - score = 3.0, + symbol = symbols.bad_unicode, options = { host or "unknown" } }) end @@ -322,8 +307,7 @@ function checks.unicode_analysis(task, url, cfg) if cfg.check_zero_width and bit.band(flags, url_flags_tab.zw_spaces) ~= 0 then lua_util.debugm(N, task, "URL contains zero-width spaces") table.insert(findings, { - symbol = settings.symbols.zero_width, - score = 7.0, + symbol = symbols.zero_width, options = { host or "unknown" } }) end @@ -333,8 +317,7 @@ function checks.unicode_analysis(task, url, cfg) if rspamd_util.is_utf_spoofed(host) then lua_util.debugm(N, task, "URL uses homograph attack: %s", host) table.insert(findings, { - symbol = settings.symbols.homograph, - score = 5.0, + symbol = symbols.homograph, options = { host } }) end @@ -344,8 +327,7 @@ function checks.unicode_analysis(task, url, cfg) if cfg.check_rtl_override and url_text:find("\226\128\174") then lua_util.debugm(N, task, "URL contains RTL override") table.insert(findings, { - symbol = settings.symbols.rtl_override, - score = 6.0, + symbol = symbols.rtl_override, options = { host or "unknown" } }) end @@ -365,8 +347,7 @@ function checks.structure_analysis(task, url, cfg) if at_count > cfg.max_at_signs then lua_util.debugm(N, task, "URL has %d @ signs", at_count) table.insert(findings, { - symbol = settings.symbols.multiple_at, - score = 3.0, + symbol = symbols.multiple_at, options = { string.format("%d", at_count) } }) end @@ -379,8 +360,7 @@ function checks.structure_analysis(task, url, cfg) if bit.band(flags, url_flags_tab.obscured) ~= 0 and url_text:find("\\") then lua_util.debugm(N, task, "URL contains backslashes") table.insert(findings, { - symbol = settings.symbols.backslash, - score = 2.0, + symbol = symbols.backslash, options = { host or "unknown" } }) end @@ -392,8 +372,7 @@ function checks.structure_analysis(task, url, cfg) if dot_count > cfg.max_host_dots then lua_util.debugm(N, task, "URL hostname has %d dots", dot_count) table.insert(findings, { - symbol = settings.symbols.excessive_dots, - score = 2.0, + symbol = symbols.excessive_dots, options = { string.format("%d", dot_count) } }) end @@ -403,8 +382,7 @@ function checks.structure_analysis(task, url, cfg) if cfg.check_length and #url_text > cfg.max_url_length then lua_util.debugm(N, task, "URL is very long: %d chars", #url_text) table.insert(findings, { - symbol = settings.symbols.very_long, - score = 1.5, + symbol = symbols.very_long, options = { string.format("%d", #url_text) } }) end @@ -484,10 +462,10 @@ local function url_suspect_callback(task) -- TLD and structure checks don't have corresponding URL flags, so need all URLs local need_all_urls = ( (settings.checks.tld and settings.checks.tld.enabled) or - (settings.checks.structure and settings.checks.structure.enabled and - (settings.checks.structure.check_multiple_at or - settings.checks.structure.check_excessive_dots or - settings.checks.structure.check_length)) + (settings.checks.structure and settings.checks.structure.enabled and + (settings.checks.structure.check_multiple_at or + settings.checks.structure.check_excessive_dots or + settings.checks.structure.check_length)) ) if need_all_urls then @@ -506,30 +484,11 @@ local function url_suspect_callback(task) return false end - local total_findings = 0 - for _, url in ipairs(suspect_urls) do local url_findings = analyze_url(task, url, settings) for _, finding in ipairs(url_findings) do - task:insert_result(finding.symbol, finding.score, finding.options or {}) - total_findings = total_findings + 1 - end - end - - -- Backward compatibility: R_SUSPICIOUS_URL - if settings.compat_mode and total_findings > 0 then - -- Check if we inserted any symbols - local has_findings = false - for _, symbol_name in pairs(settings.symbols) do - if task:has_symbol(symbol_name) then - has_findings = true - break - end - end - - if has_findings then - task:insert_result('R_SUSPICIOUS_URL', 5.0) + task:insert_result(finding.symbol, 1.0, finding.options or {}) end end @@ -591,7 +550,7 @@ if settings.enabled then }) -- Register all symbol names as virtual - for _, symbol_name in pairs(settings.symbols) do + for _, symbol_name in pairs(symbols) do rspamd_config:register_symbol({ name = symbol_name, type = 'virtual', @@ -599,16 +558,4 @@ if settings.enabled then group = 'url' }) end - - -- Backward compat symbol - if settings.compat_mode then - rspamd_config:register_symbol({ - name = 'R_SUSPICIOUS_URL', - type = 'virtual', - parent = id, - score = 5.0, - group = 'url', - description = 'Suspicious URL (legacy symbol)' - }) - end end diff --git a/test/functional/cases/001_merged/400_url_suspect.robot b/test/functional/cases/001_merged/400_url_suspect.robot index bb218467ac..cb8b6eb2e0 100644 --- a/test/functional/cases/001_merged/400_url_suspect.robot +++ b/test/functional/cases/001_merged/400_url_suspect.robot @@ -10,14 +10,11 @@ URL Suspect - Issue 5731 - Long User Field Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_long_user.eml Expect Symbol With Exact Options URL_USER_LONG 129 Do Not Expect Symbol URL_USER_VERY_LONG - # Should also generate R_SUSPICIOUS_URL for backward compatibility - Expect Symbol R_SUSPICIOUS_URL URL Suspect - Very Long User Field # Test that very long user fields get appropriate symbol Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_very_long_user.eml Expect Symbol With Exact Options URL_USER_VERY_LONG 300 - Expect Symbol R_SUSPICIOUS_URL URL Suspect - Numeric IP # Test numeric IP detection @@ -29,19 +26,16 @@ URL Suspect - Numeric IP with User # Test numeric IP with user field (more suspicious) Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_numeric_ip_user.eml Expect Symbol URL_NUMERIC_IP_USER - Expect Symbol R_SUSPICIOUS_URL URL Suspect - Suspicious TLD # Test suspicious TLD detection Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_bad_tld.eml Expect Symbol URL_SUSPICIOUS_TLD - Expect Symbol R_SUSPICIOUS_URL URL Suspect - Multiple At Signs # Test multiple @ sign detection Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_multiple_at.eml Expect Symbol URL_MULTIPLE_AT_SIGNS - Expect Symbol R_SUSPICIOUS_URL URL Suspect - Normal URL # Test that normal URLs don't trigger symbols