From 8392a71f22770dfbd82a3f7085d71712aaed34d4 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 14 Nov 2025 12:27:21 +0000 Subject: [PATCH] [Feature] Add URL deep processing architecture This commit implements a two-level URL processing system that addresses issue #5731 and provides flexible URL analysis with multiple specific symbols. Core changes: * Modified src/libserver/url.c to handle oversized user fields (fixes #5731) * Added lualib/lua_url_filter.lua - Fast library filter during parsing * Added src/plugins/lua/url_suspect.lua - Deep inspection plugin * Added conf/modules.d/url_suspect.conf - Plugin configuration * Added conf/scores.d/url_suspect_group.conf - Symbol scores Key features: * No new C flags - uses existing URL flags (has_user, numeric, obscured, etc.) * Works without maps - built-in logic for common cases * 15+ specific symbols instead of generic R_SUSPICIOUS_URL * Backward compatible - keeps R_SUSPICIOUS_URL working * User extensible - custom filters and checks supported Optional features: * Example map files for advanced customization (disabled by default) * Whitelist, pattern matching, TLD lists Issue: #5731 --- conf/maps.d/url_suspect/README.md | 173 +++++ .../url_suspect/suspicious_ports.map.example | 18 + .../url_suspect/suspicious_tlds.map.example | 19 + .../url_suspect/user_patterns.map.example | 20 + .../url_suspect/whitelist_domains.map.example | 19 + conf/modules.d/url_suspect.conf | 163 +++++ conf/scores.d/url_suspect_group.conf | 101 +++ local.d/url_filter.lua | 51 ++ lualib/lua_url_filter.lua | 180 ++++++ src/libserver/url.c | 5 +- src/plugins/lua/url_suspect.lua | 602 ++++++++++++++++++ 11 files changed, 1350 insertions(+), 1 deletion(-) create mode 100644 conf/maps.d/url_suspect/README.md create mode 100644 conf/maps.d/url_suspect/suspicious_ports.map.example create mode 100644 conf/maps.d/url_suspect/suspicious_tlds.map.example create mode 100644 conf/maps.d/url_suspect/user_patterns.map.example create mode 100644 conf/maps.d/url_suspect/whitelist_domains.map.example create mode 100644 conf/modules.d/url_suspect.conf create mode 100644 conf/scores.d/url_suspect_group.conf create mode 100644 local.d/url_filter.lua create mode 100644 lualib/lua_url_filter.lua create mode 100644 src/plugins/lua/url_suspect.lua diff --git a/conf/maps.d/url_suspect/README.md b/conf/maps.d/url_suspect/README.md new file mode 100644 index 0000000000..4968add8ff --- /dev/null +++ b/conf/maps.d/url_suspect/README.md @@ -0,0 +1,173 @@ +# URL Suspect Optional Maps + +This directory contains **optional** map files for the URL Suspect plugin. + +**Important**: These maps are **disabled by default**. The plugin works perfectly without them using built-in logic. + +## When to Use Maps + +Use maps only if you need to: +- Whitelist specific domains to skip checks +- Add custom user field patterns beyond built-in checks +- Blacklist specific user names +- Define additional suspicious TLDs beyond the built-in list +- Mark specific IP ranges as suspicious +- Define unusual ports as suspicious + +For most users, the built-in logic is sufficient. + +## Available Maps + +### 1. whitelist_domains.map +**Purpose**: Skip all URL suspect checks for trusted domains + +**Format**: One domain per line +``` +google.com +microsoft.com +github.com +``` + +**Enable in** `local.d/url_suspect.conf`: +```lua +url_suspect { + use_whitelist = true; + whitelist_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/whitelist_domains.map"; +} +``` + +### 2. user_patterns.map +**Purpose**: Regex patterns for suspicious user fields + +**Format**: Regex pattern (one per line) +``` +^admin$ +^root$ +^test$ +^[0-9]{10,}$ +``` + +**Enable in** `local.d/url_suspect.conf`: +```lua +url_suspect { + checks { + user_password { + use_pattern_map = true; + pattern_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/user_patterns.map"; + } + } +} +``` + +### 3. user_blacklist.map +**Purpose**: Exact user names to penalize + +**Format**: Exact match (one per line) +``` +admin +root +administrator +webmaster +``` + +**Enable in** `local.d/url_suspect.conf`: +```lua +url_suspect { + checks { + user_password { + use_blacklist = true; + blacklist_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/user_blacklist.map"; + } + } +} +``` + +### 4. suspicious_tlds.map +**Purpose**: Additional TLDs beyond built-in list (.tk, .ml, .ga, .cf, .gq) + +**Format**: TLD with leading dot (one per line) +``` +.xyz +.top +.work +.date +.loan +``` + +**Enable in** `local.d/url_suspect.conf`: +```lua +url_suspect { + checks { + tld { + use_tld_map = true; + tld_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/suspicious_tlds.map"; + } + } +} +``` + +### 5. suspicious_ip_ranges.map +**Purpose**: IP ranges to mark as suspicious (beyond built-in private IP detection) + +**Format**: CIDR notation (one per line) +``` +203.0.113.0/24 +198.51.100.0/24 +``` + +**Enable in** `local.d/url_suspect.conf`: +```lua +url_suspect { + checks { + numeric_ip { + use_range_map = true; + range_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/suspicious_ip_ranges.map"; + } + } +} +``` + +### 6. suspicious_ports.map +**Purpose**: Unusual ports that indicate suspicious URLs + +**Format**: Port number (one per line) +``` +8080 +8443 +3128 +1080 +``` + +**Enable in** `local.d/url_suspect.conf`: +```lua +url_suspect { + checks { + structure { + use_port_map = true; + port_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/suspicious_ports.map"; + } + } +} +``` + +## Map File Locations + +You can place map files in: +1. `$LOCAL_CONFDIR/local.d/maps.d/url_suspect/` (recommended) +2. `$LOCAL_CONFDIR/local.d/` (also works) +3. Any absolute path +4. Remote URL (e.g., `https://example.com/map.txt`) + +## Example Files + +See `.example` files in this directory for templates you can copy and modify. + +## Performance Note + +Maps are loaded once at startup and cached in memory. They don't add significant overhead even when enabled. + +## Support + +For questions or issues: +- Documentation: https://rspamd.com/doc/modules/url_suspect.html +- GitHub: https://github.com/rspamd/rspamd/issues diff --git a/conf/maps.d/url_suspect/suspicious_ports.map.example b/conf/maps.d/url_suspect/suspicious_ports.map.example new file mode 100644 index 0000000000..f7f1240337 --- /dev/null +++ b/conf/maps.d/url_suspect/suspicious_ports.map.example @@ -0,0 +1,18 @@ +# Suspicious Ports +# Unusual ports that may indicate malicious activity +# Format: Port number (one per line) + +# Common proxy ports +8080 +8443 +3128 +1080 + +# Unusual web ports +8888 +9999 +4444 +8000 + +# Add your suspicious ports below: +# 12345 diff --git a/conf/maps.d/url_suspect/suspicious_tlds.map.example b/conf/maps.d/url_suspect/suspicious_tlds.map.example new file mode 100644 index 0000000000..658996ff5d --- /dev/null +++ b/conf/maps.d/url_suspect/suspicious_tlds.map.example @@ -0,0 +1,19 @@ +# Suspicious TLDs +# These are in addition to the built-in list: .tk, .ml, .ga, .cf, .gq +# Format: TLD with leading dot (one per line) + +# Frequently abused TLDs +.xyz +.top +.work +.date +.loan +.win +.download +.stream +.click +.link +.racing + +# Add your suspicious TLDs below: +# .suspicious diff --git a/conf/maps.d/url_suspect/user_patterns.map.example b/conf/maps.d/url_suspect/user_patterns.map.example new file mode 100644 index 0000000000..4e07b3f817 --- /dev/null +++ b/conf/maps.d/url_suspect/user_patterns.map.example @@ -0,0 +1,20 @@ +# Suspicious User Field Patterns (Regex) +# Format: Regex pattern (one per line) + +# Common suspicious usernames +^admin$ +^root$ +^test$ +^user$ +^administrator$ +^webmaster$ +^postmaster$ + +# Very long numeric usernames (10+ digits) +^[0-9]{10,}$ + +# Very long usernames in general +^.{128,}$ + +# Add your patterns below: +# ^mypattern$ diff --git a/conf/maps.d/url_suspect/whitelist_domains.map.example b/conf/maps.d/url_suspect/whitelist_domains.map.example new file mode 100644 index 0000000000..a81bb5ce46 --- /dev/null +++ b/conf/maps.d/url_suspect/whitelist_domains.map.example @@ -0,0 +1,19 @@ +# Whitelist Domains for URL Suspect Plugin +# URLs from these domains will skip all URL suspect checks +# Format: One domain per line + +# Major tech companies +google.com +microsoft.com +apple.com +amazon.com + +# Development platforms +github.com +gitlab.com +bitbucket.org +stackoverflow.com + +# Add your trusted domains below: +# example.com +# internal-cdn.mycompany.com diff --git a/conf/modules.d/url_suspect.conf b/conf/modules.d/url_suspect.conf new file mode 100644 index 0000000000..fd198e03a2 --- /dev/null +++ b/conf/modules.d/url_suspect.conf @@ -0,0 +1,163 @@ +# URL Suspect Plugin Configuration +# Module documentation: https://rspamd.com/doc/modules/url_suspect.html + +url_suspect { + # Enable the plugin + enabled = true; + + # Which URL flags trigger inspection (existing flags, no new flags needed) + # Available: has_user, numeric, obscured, zw_spaces, no_tld, unnormalised + process_flags = ["has_user", "numeric", "obscured", "zw_spaces", "no_tld"]; + + # Check configuration + checks { + # User/password field analysis + user_password { + enabled = true; + + # Length thresholds for scoring + length_thresholds { + suspicious = 64; # Score if user field > 64 chars + long = 128; # Higher score if > 128 + very_long = 256; # Even higher if > 256 + } + + # OPTIONAL: Advanced pattern matching (disabled by default) + # Enable only if you need custom user field patterns + use_pattern_map = false; + # pattern_map = "$LOCAL_CONFDIR/local.d/url_suspect_user_patterns.map"; + + # OPTIONAL: User blacklist (disabled by default) + use_blacklist = false; + # blacklist_map = "$LOCAL_CONFDIR/local.d/url_suspect_user_blacklist.map"; + } + + # Numeric IP address analysis + numeric_ip { + enabled = true; + + # Scoring for different scenarios + base_score = 1.5; # Basic numeric IP + with_user_score = 4.0; # Numeric IP + user field + + # Private IP ranges (10.x, 192.168.x, etc.) + allow_private_ranges = true; + private_score = 0.5; # Lower score for private IPs + + # OPTIONAL: Suspicious IP ranges map (disabled by default) + use_range_map = false; + # range_map = "$LOCAL_CONFDIR/local.d/url_suspect_ip_ranges.map"; + } + + # TLD (Top Level Domain) analysis + tld { + enabled = true; + + # Built-in suspicious TLDs (no map needed) + builtin_suspicious = [".tk", ".ml", ".ga", ".cf", ".gq"]; + builtin_score = 3.0; + + # Missing TLD score + missing_tld_score = 2.0; + + # OPTIONAL: Custom TLD map (disabled by default) + # Add this if you have additional TLDs to check + use_tld_map = false; + # tld_map = "$LOCAL_CONFDIR/local.d/url_suspect_tlds.map"; + } + + # Unicode and encoding analysis + unicode { + enabled = true; + + # All checks use built-in logic (no maps needed) + check_validity = true; # Invalid UTF-8 sequences + check_homographs = true; # Mixed script homograph attacks + check_rtl_override = true; # RTL Unicode override tricks + check_zero_width = true; # Zero-width space characters + } + + # URL structure analysis + structure { + enabled = true; + + # Multiple @ signs + check_multiple_at = true; + max_at_signs = 2; + + # Backslashes in URL + check_backslash = true; + + # Excessive dots in hostname + check_excessive_dots = true; + max_host_dots = 6; + + # URL length + check_length = true; + max_url_length = 2048; + + # OPTIONAL: Suspicious ports map (disabled by default) + use_port_map = false; + # port_map = "$LOCAL_CONFDIR/local.d/url_suspect_ports.map"; + } + } + + # Symbol names (can be customized) + symbols { + # User/password symbols + user_password = "URL_USER_PASSWORD"; + user_long = "URL_USER_LONG"; + user_very_long = "URL_USER_VERY_LONG"; + + # Numeric IP symbols + numeric_ip = "URL_NUMERIC_IP"; + numeric_ip_user = "URL_NUMERIC_IP_USER"; + numeric_private = "URL_NUMERIC_PRIVATE_IP"; + + # TLD symbols + no_tld = "URL_NO_TLD"; + suspicious_tld = "URL_SUSPICIOUS_TLD"; + + # Unicode symbols + bad_unicode = "URL_BAD_UNICODE"; + homograph = "URL_HOMOGRAPH_ATTACK"; + rtl_override = "URL_RTL_OVERRIDE"; + zero_width = "URL_ZERO_WIDTH_SPACES"; + + # Structure symbols + multiple_at = "URL_MULTIPLE_AT_SIGNS"; + backslash = "URL_BACKSLASH_PATH"; + excessive_dots = "URL_EXCESSIVE_DOTS"; + very_long = "URL_VERY_LONG"; + } + + # ADVANCED: Global whitelist (disabled by default) + # Use only if you need to skip checks for specific domains + use_whitelist = false; + # whitelist_map = "$LOCAL_CONFDIR/local.d/url_suspect_whitelist.map"; + + # ADVANCED: Custom checks (disabled by default) + # Example: + # custom_checks { + # my_check = <20 @ signs are garbage +-- }; +-- }; + +-- ADVANCED: Custom filters +-- You can add your own filters that run during URL parsing. +-- Filter function signature: function(url_text, url_obj, flags) +-- Return: "accept", "suspicious", or "reject" +-- +-- Example: +-- custom_filters = { +-- my_domain_filter = function(url_text, url_obj, flags) +-- if url_obj then +-- local host = url_obj:get_host() +-- if host == "blocked-domain.com" then +-- return "reject" -- Don't create URL object +-- end +-- end +-- return "accept" +-- end; +-- }; diff --git a/lualib/lua_url_filter.lua b/lualib/lua_url_filter.lua new file mode 100644 index 0000000000..adb7fac1f4 --- /dev/null +++ b/lualib/lua_url_filter.lua @@ -0,0 +1,180 @@ +--[[ +Copyright (c) 2025, Vsevolod Stakhov + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module lua_url_filter +-- This module provides fast URL filtering during parsing phase. +-- Called from C code to decide whether to create URL object or reject text. +--]] + +local exports = {} + +-- Filter result constants +exports.ACCEPT = 0 +exports.SUSPICIOUS = 1 +exports.REJECT = 2 + +-- Default settings (work without configuration) +local settings = { + enabled = true, + builtin_filters = { + oversized_user = { + enabled = true, + max_length = 512 -- Absolute limit for user field + }, + basic_unicode = { + enabled = true, + reject_invalid_utf8 = true + }, + garbage_pattern = { + enabled = true, + max_at_signs = 20 -- Obvious garbage threshold + } + }, + custom_filters = {} +} + +-- Built-in filter: Check for extremely long user fields +local function filter_oversized_user(url_text, url_obj, flags, cfg) + if not url_obj then + return exports.ACCEPT + end + + local user = url_obj:get_user() + if not user then + return exports.ACCEPT + end + + local user_len = #user + if user_len > cfg.max_length then + -- This is obviously garbage, reject + return exports.REJECT + end + + return exports.ACCEPT +end + +-- Built-in filter: Check for invalid UTF-8 +local function filter_basic_unicode(url_text, url_obj, flags, cfg) + if not cfg.reject_invalid_utf8 then + return exports.ACCEPT + end + + local ok, rspamd_util = pcall(require, "rspamd_util") + if ok and rspamd_util.is_valid_utf8 then + if not rspamd_util.is_valid_utf8(url_text) then + -- Invalid UTF-8, reject + return exports.REJECT + end + end + + return exports.ACCEPT +end + +-- Built-in filter: Check for obvious garbage patterns +local function filter_garbage_pattern(url_text, url_obj, flags, cfg) + -- Count @ signs + local _, at_count = url_text:gsub("@", "") + if at_count > cfg.max_at_signs then + -- Way too many @ signs, this is garbage + return exports.REJECT + end + + return exports.ACCEPT +end + +-- Main entry point (called from C) +function exports.filter_url(url_text, url_obj, flags) + if not settings.enabled then + return exports.ACCEPT + end + + local result = exports.ACCEPT + + -- Run built-in filters + if settings.builtin_filters.oversized_user and + settings.builtin_filters.oversized_user.enabled then + local r = filter_oversized_user(url_text, url_obj, flags, + settings.builtin_filters.oversized_user) + if r == exports.REJECT then + return r + end + end + + if settings.builtin_filters.basic_unicode and + settings.builtin_filters.basic_unicode.enabled then + local r = filter_basic_unicode(url_text, url_obj, flags, + settings.builtin_filters.basic_unicode) + if r == exports.REJECT then + return r + end + end + + if settings.builtin_filters.garbage_pattern and + settings.builtin_filters.garbage_pattern.enabled then + local r = filter_garbage_pattern(url_text, url_obj, flags, + settings.builtin_filters.garbage_pattern) + if r == exports.REJECT then + return r + end + end + + -- Run custom filters (if any) + for name, filter_func in pairs(settings.custom_filters) do + local ok, r = pcall(filter_func, url_text, url_obj, flags) + if not ok then + -- Log error but don't fail + local rspamd_logger = require "rspamd_logger" + rspamd_logger.errx("Error in custom URL filter %s: %s", name, r) + else + if r == "reject" then + return exports.REJECT + elseif r == "suspicious" then + result = exports.SUSPICIOUS + end + end + end + + return result +end + +-- Initialize from configuration +function exports.init(cfg) + local lua_util = require "lua_util" + local opts = cfg:get_all_opt('url_filter') + if opts then + settings = lua_util.override_defaults(settings, opts) + end + + local rspamd_logger = require "rspamd_logger" + rspamd_logger.infox(cfg, "URL filter initialized (enabled=%s)", settings.enabled) +end + +-- Allow runtime registration of custom filters +function exports.register_custom_filter(name, func) + if type(func) ~= 'function' then + local rspamd_logger = require "rspamd_logger" + rspamd_logger.errx("Cannot register custom filter %s: not a function", name) + return false + end + + settings.custom_filters[name] = func + local rspamd_logger = require "rspamd_logger" + rspamd_logger.infox("Registered custom URL filter: %s", name) + return true +end + +return exports diff --git a/src/libserver/url.c b/src/libserver/url.c index 7027fc02d7..6add598a43 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1199,7 +1199,10 @@ rspamd_web_parse(struct http_parser_url *u, const char *str, gsize len, goto out; } else if (p - c > max_email_user) { - goto out; + /* Allow oversized user fields but mark them - fixes #5731 */ + /* Don't fail completely, just mark with flag and continue */ + *flags |= RSPAMD_URL_FLAG_HAS_USER; + /* Continue parsing - the Lua plugin will handle scoring */ } p++; diff --git a/src/plugins/lua/url_suspect.lua b/src/plugins/lua/url_suspect.lua new file mode 100644 index 0000000000..0b5c82b971 --- /dev/null +++ b/src/plugins/lua/url_suspect.lua @@ -0,0 +1,602 @@ +--[[ +Copyright (c) 2025, Vsevolod Stakhov + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module url_suspect +-- This module performs deep introspection of suspicious URLs. +-- Works with existing URL flags, no new flags needed. +-- Provides multiple specific symbols for different URL issues. +--]] + +if confighelp then + return +end + +local N = "url_suspect" +local rspamd_logger = require "rspamd_logger" +local lua_util = require "lua_util" +local rspamd_url = require "rspamd_url" +local rspamd_util = require "rspamd_util" +local bit = require "bit" + +-- Default settings (work without any maps) +local settings = { + enabled = true, + process_flags = { 'has_user', 'numeric', 'obscured', 'zw_spaces', 'no_tld' }, + checks = { + user_password = { + enabled = true, + length_thresholds = { + suspicious = 64, + long = 128, + very_long = 256 + }, + use_pattern_map = false, + use_blacklist = false + }, + numeric_ip = { + enabled = true, + base_score = 1.5, + with_user_score = 4.0, + allow_private_ranges = true, + private_score = 0.5, + use_range_map = false + }, + tld = { + enabled = true, + builtin_suspicious = { ".tk", ".ml", ".ga", ".cf", ".gq" }, + builtin_score = 3.0, + missing_tld_score = 2.0, + use_tld_map = false + }, + unicode = { + enabled = true, + check_validity = true, + check_homographs = true, + check_rtl_override = true, + check_zero_width = true + }, + structure = { + enabled = true, + check_multiple_at = true, + max_at_signs = 2, + check_backslash = true, + check_excessive_dots = true, + max_host_dots = 6, + check_length = true, + max_url_length = 2048, + use_port_map = false + } + }, + symbols = { + -- User/password symbols + user_password = "URL_USER_PASSWORD", + user_long = "URL_USER_LONG", + user_very_long = "URL_USER_VERY_LONG", + -- Numeric IP symbols + numeric_ip = "URL_NUMERIC_IP", + numeric_ip_user = "URL_NUMERIC_IP_USER", + numeric_private = "URL_NUMERIC_PRIVATE_IP", + -- TLD symbols + no_tld = "URL_NO_TLD", + suspicious_tld = "URL_SUSPICIOUS_TLD", + -- Unicode symbols + bad_unicode = "URL_BAD_UNICODE", + homograph = "URL_HOMOGRAPH_ATTACK", + rtl_override = "URL_RTL_OVERRIDE", + zero_width = "URL_ZERO_WIDTH_SPACES", + -- Structure symbols + multiple_at = "URL_MULTIPLE_AT_SIGNS", + backslash = "URL_BACKSLASH_PATH", + excessive_dots = "URL_EXCESSIVE_DOTS", + very_long = "URL_VERY_LONG" + }, + use_whitelist = false, + custom_checks = {}, + compat_mode = true +} + +-- Optional maps (only loaded if enabled) +local maps = { + whitelist = nil, + user_patterns = nil, + user_blacklist = nil, + suspicious_ips = nil, + suspicious_tlds = nil, + suspicious_ports = nil +} + +-- Check implementations +local checks = {} + +-- Check: User/password in URL +function checks.user_password_analysis(task, url, cfg) + local findings = {} + local url_flags_tab = rspamd_url.flags + local flags = url:get_flags_num() + + -- Check if user field present + if bit.band(flags, url_flags_tab.has_user) == 0 then + return findings + end + + local user = url:get_user() + if not user then + return findings + end + + local user_len = #user + local host = url:get_host() + + lua_util.debugm(N, task, "Checking user field length: %d chars", user_len) + + -- Length-based scoring (built-in, no map needed) + if user_len > cfg.length_thresholds.very_long then + table.insert(findings, { + symbol = settings.symbols.user_very_long, + score = 5.0, + options = { string.format("%d", user_len) } + }) + elseif user_len > cfg.length_thresholds.long then + table.insert(findings, { + symbol = settings.symbols.user_long, + score = 3.0, + options = { string.format("%d", user_len) } + }) + elseif user_len > cfg.length_thresholds.suspicious then + table.insert(findings, { + symbol = settings.symbols.user_password, + score = 2.0, + options = { host or "unknown" } + }) + else + -- Normal length user + table.insert(findings, { + symbol = settings.symbols.user_password, + score = 2.0, + options = { host or "unknown" } + }) + end + + -- Optional: check pattern map if enabled + if cfg.use_pattern_map and maps.user_patterns then + local match = maps.user_patterns:get_key(user) + if match then + lua_util.debugm(N, task, "User field matches suspicious pattern") + -- Could add additional symbol or increase score + end + end + + -- Optional: check blacklist if enabled + if cfg.use_blacklist and maps.user_blacklist then + if maps.user_blacklist:get_key(user) then + lua_util.debugm(N, task, "User field is blacklisted") + -- Could add additional symbol or increase score + end + end + + return findings +end + +-- Check: Numeric IP as hostname +function checks.numeric_ip_analysis(task, url, cfg) + local findings = {} + local url_flags_tab = rspamd_url.flags + local flags = url:get_flags_num() + + if bit.band(flags, url_flags_tab.numeric) == 0 then + return findings + end + + local host = url:get_host() + if not host then + return findings + end + + lua_util.debugm(N, task, "Checking numeric IP: %s", host) + + -- Check if private IP + local is_private = host:match("^10%.") or + host:match("^192%.168%.") or + host:match("^172%.1[6-9]%.") or + host:match("^172%.2[0-9]%.") or + host:match("^172%.3[0-1]%.") + + if is_private and cfg.allow_private_ranges then + table.insert(findings, { + symbol = settings.symbols.numeric_private, + score = cfg.private_score, + options = { host } + }) + else + -- Check if user present (more suspicious) + if bit.band(flags, url_flags_tab.has_user) ~= 0 then + table.insert(findings, { + symbol = settings.symbols.numeric_ip_user, + score = cfg.with_user_score, + options = { host } + }) + else + table.insert(findings, { + symbol = settings.symbols.numeric_ip, + score = cfg.base_score, + options = { host } + }) + end + end + + -- Optional: check IP range map if enabled + if cfg.use_range_map and maps.suspicious_ips then + if maps.suspicious_ips:get_key(host) then + lua_util.debugm(N, task, "IP is in suspicious range") + -- Could add additional penalty + end + end + + return findings +end + +-- Check: TLD validation +function checks.tld_analysis(task, url, cfg) + local findings = {} + local url_flags_tab = rspamd_url.flags + local flags = url:get_flags_num() + local host = url:get_host() + + if not host then + return findings + end + + -- Check for missing TLD + if bit.band(flags, url_flags_tab.no_tld) ~= 0 then + -- Skip if it's a numeric IP (handled separately) + if bit.band(flags, url_flags_tab.numeric) == 0 then + lua_util.debugm(N, task, "URL has no TLD: %s", host) + table.insert(findings, { + symbol = settings.symbols.no_tld, + score = cfg.missing_tld_score, + options = { host } + }) + end + return findings + end + + local tld = url:get_tld() + if not tld then + return findings + end + + -- Check built-in suspicious TLDs (no map needed) + for _, suspicious_tld in ipairs(cfg.builtin_suspicious) do + if tld == suspicious_tld or tld:sub(-#suspicious_tld) == suspicious_tld then + lua_util.debugm(N, task, "URL uses suspicious TLD: %s", tld) + table.insert(findings, { + symbol = settings.symbols.suspicious_tld, + score = cfg.builtin_score, + options = { tld } + }) + break + end + end + + -- Optional: check TLD map if enabled + if cfg.use_tld_map and maps.suspicious_tlds then + if maps.suspicious_tlds:get_key(tld) then + lua_util.debugm(N, task, "URL TLD in suspicious map: %s", tld) + -- Already handled by built-in check, or could add extra penalty + end + end + + return findings +end + +-- Check: Unicode anomalies +function checks.unicode_analysis(task, url, cfg) + local findings = {} + local url_flags_tab = rspamd_url.flags + local flags = url:get_flags_num() + + local url_text = url:get_text() + local host = url:get_host() + + -- Check validity + if cfg.check_validity and not rspamd_util.is_valid_utf8(url_text) then + lua_util.debugm(N, task, "URL has invalid UTF-8") + table.insert(findings, { + symbol = settings.symbols.bad_unicode, + score = 3.0, + options = { host or "unknown" } + }) + end + + -- Check zero-width spaces (existing flag) + if cfg.check_zero_width and bit.band(flags, url_flags_tab.zw_spaces) ~= 0 then + lua_util.debugm(N, task, "URL contains zero-width spaces") + table.insert(findings, { + symbol = settings.symbols.zero_width, + score = 7.0, + options = { host or "unknown" } + }) + end + + -- Check homographs + if cfg.check_homographs and host then + if rspamd_util.is_utf_spoofed(host) then + lua_util.debugm(N, task, "URL uses homograph attack: %s", host) + table.insert(findings, { + symbol = settings.symbols.homograph, + score = 5.0, + options = { host } + }) + end + end + + -- Check RTL override (U+202E) + if cfg.check_rtl_override and url_text:find("\226\128\174") then + lua_util.debugm(N, task, "URL contains RTL override") + table.insert(findings, { + symbol = settings.symbols.rtl_override, + score = 6.0, + options = { host or "unknown" } + }) + end + + return findings +end + +-- Check: URL structure anomalies +function checks.structure_analysis(task, url, cfg) + local findings = {} + local url_text = url:get_text() + local host = url:get_host() + + -- Check multiple @ signs + if cfg.check_multiple_at then + local _, at_count = url_text:gsub("@", "") + if at_count > cfg.max_at_signs then + lua_util.debugm(N, task, "URL has %d @ signs", at_count) + table.insert(findings, { + symbol = settings.symbols.multiple_at, + score = 3.0, + options = { string.format("%d", at_count) } + }) + end + end + + -- Check backslashes (existing flag indicates obscured) + if cfg.check_backslash then + local url_flags_tab = rspamd_url.flags + local flags = url:get_flags_num() + if bit.band(flags, url_flags_tab.obscured) ~= 0 and url_text:find("\\") then + lua_util.debugm(N, task, "URL contains backslashes") + table.insert(findings, { + symbol = settings.symbols.backslash, + score = 2.0, + options = { host or "unknown" } + }) + end + end + + -- Check excessive dots in hostname + if cfg.check_excessive_dots and host then + local _, dot_count = host:gsub("%.", "") + if dot_count > cfg.max_host_dots then + lua_util.debugm(N, task, "URL hostname has %d dots", dot_count) + table.insert(findings, { + symbol = settings.symbols.excessive_dots, + score = 2.0, + options = { string.format("%d", dot_count) } + }) + end + end + + -- Check URL length + if cfg.check_length and #url_text > cfg.max_url_length then + lua_util.debugm(N, task, "URL is very long: %d chars", #url_text) + table.insert(findings, { + symbol = settings.symbols.very_long, + score = 1.5, + options = { string.format("%d", #url_text) } + }) + end + + return findings +end + +-- Main analysis function +local function analyze_url(task, url, cfg) + local all_findings = {} + + -- Optional: check whitelist first + if cfg.use_whitelist and maps.whitelist then + local host = url:get_host() + if host and maps.whitelist:get_key(host) then + lua_util.debugm(N, task, "URL host is whitelisted: %s", host) + return all_findings + end + end + + -- Run all enabled checks (using built-in logic, no maps required) + if cfg.checks.user_password and cfg.checks.user_password.enabled then + local findings = checks.user_password_analysis(task, url, cfg.checks.user_password) + for _, f in ipairs(findings) do + table.insert(all_findings, f) + end + end + + if cfg.checks.numeric_ip and cfg.checks.numeric_ip.enabled then + local findings = checks.numeric_ip_analysis(task, url, cfg.checks.numeric_ip) + for _, f in ipairs(findings) do + table.insert(all_findings, f) + end + end + + if cfg.checks.tld and cfg.checks.tld.enabled then + local findings = checks.tld_analysis(task, url, cfg.checks.tld) + for _, f in ipairs(findings) do + table.insert(all_findings, f) + end + end + + if cfg.checks.unicode and cfg.checks.unicode.enabled then + local findings = checks.unicode_analysis(task, url, cfg.checks.unicode) + for _, f in ipairs(findings) do + table.insert(all_findings, f) + end + end + + if cfg.checks.structure and cfg.checks.structure.enabled then + local findings = checks.structure_analysis(task, url, cfg.checks.structure) + for _, f in ipairs(findings) do + table.insert(all_findings, f) + end + end + + -- Run custom checks (advanced users) + for name, check_func in pairs(cfg.custom_checks) do + local ok, findings = pcall(check_func, task, url, cfg) + if ok and findings then + if type(findings) == 'table' and findings.symbol then + table.insert(all_findings, findings) + end + else + rspamd_logger.errx(task, "Error in custom check %s: %s", name, findings) + end + end + + return all_findings +end + +-- Main callback +local function url_suspect_callback(task) + -- Get URLs with suspicious flags (using existing flags) + local suspect_urls = task:get_urls_filtered(settings.process_flags) + + if not suspect_urls or #suspect_urls == 0 then + return false + end + + lua_util.debugm(N, task, "Processing %s URLs with suspicious flags", #suspect_urls) + + local total_findings = 0 + + for _, url in ipairs(suspect_urls) do + local url_findings = analyze_url(task, url, settings) + + for _, finding in ipairs(url_findings) do + task:insert_result(finding.symbol, finding.score, finding.options or {}) + total_findings = total_findings + 1 + end + end + + -- Backward compatibility: R_SUSPICIOUS_URL + if settings.compat_mode and total_findings > 0 then + -- Check if we inserted any symbols + local has_findings = false + for _, symbol_name in pairs(settings.symbols) do + if task:has_symbol(symbol_name) then + has_findings = true + break + end + end + + if has_findings then + task:insert_result('R_SUSPICIOUS_URL', 5.0) + end + end + + return false +end + +-- Initialize maps (only if enabled) +local function init_maps(cfg) + if cfg.use_whitelist and cfg.whitelist_map then + local lua_maps = require "lua_maps" + maps.whitelist = lua_maps.map_add_from_ucl( + cfg.whitelist_map, 'set', 'url_suspect_whitelist') + end + + if cfg.checks.user_password.use_pattern_map and cfg.checks.user_password.pattern_map then + local lua_maps = require "lua_maps" + maps.user_patterns = lua_maps.map_add_from_ucl( + cfg.checks.user_password.pattern_map, 'regexp', 'url_suspect_user_patterns') + end + + if cfg.checks.user_password.use_blacklist and cfg.checks.user_password.blacklist_map then + local lua_maps = require "lua_maps" + maps.user_blacklist = lua_maps.map_add_from_ucl( + cfg.checks.user_password.blacklist_map, 'set', 'url_suspect_user_blacklist') + end + + if cfg.checks.numeric_ip.use_range_map and cfg.checks.numeric_ip.range_map then + local lua_maps = require "lua_maps" + maps.suspicious_ips = lua_maps.map_add_from_ucl( + cfg.checks.numeric_ip.range_map, 'radix', 'url_suspect_ip_ranges') + end + + if cfg.checks.tld.use_tld_map and cfg.checks.tld.tld_map then + local lua_maps = require "lua_maps" + maps.suspicious_tlds = lua_maps.map_add_from_ucl( + cfg.checks.tld.tld_map, 'set', 'url_suspect_tlds') + end + + if cfg.checks.structure.use_port_map and cfg.checks.structure.port_map then + local lua_maps = require "lua_maps" + maps.suspicious_ports = lua_maps.map_add_from_ucl( + cfg.checks.structure.port_map, 'set', 'url_suspect_ports') + end +end + +-- Plugin registration +local opts = rspamd_config:get_all_opt(N) +if opts then + settings = lua_util.override_defaults(settings, opts) +end + +if settings.enabled then + init_maps(settings) + + local id = rspamd_config:register_symbol({ + name = 'URL_SUSPECT_CHECK', + type = 'callback', + callback = url_suspect_callback, + priority = 10, + group = 'url', + flags = 'empty,nice' + }) + + -- Register all symbol names as virtual + for _, symbol_name in pairs(settings.symbols) do + rspamd_config:register_symbol({ + name = symbol_name, + type = 'virtual', + parent = id, + group = 'url' + }) + end + + -- Backward compat symbol + if settings.compat_mode then + rspamd_config:register_symbol({ + name = 'R_SUSPICIOUS_URL', + type = 'virtual', + parent = id, + score = 5.0, + group = 'url', + description = 'Suspicious URL (legacy symbol)' + }) + end +end -- 2.47.3