]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] url: do not drop URLs with long userinfo
authorVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 6 May 2026 17:25:25 +0000 (18:25 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 6 May 2026 17:28:09 +0000 (18:28 +0100)
The C parser consults lua_url_filter for every byte of userinfo past
max_email_user (64); the filter previously rejected anything longer
than 2048 bytes, which silently dropped the entire URL. That blanket
length REJECT killed exactly the userinfo-obfuscation phishing pattern
(https://legit.com<lots-of-spaces>@evil.com/...) the parser is meant
to surface.

Raise the catastrophic-length REJECT to 16 KiB (still well under the
parser's own G_MAXUINT16/2 cap) and have parse_user mark the URL as
RSPAMD_URL_FLAG_OBSCURED | RSPAMD_URL_FLAG_HAS_USER as soon as the
userinfo crosses 64 bytes, regardless of the filter verdict, so
downstream rules can act on the obfuscation signal.

lualib/lua_url_filter.lua
src/libserver/url.c
test/lua/unit/lua_url_filter.lua

index c9c08c1b312f0814ab5f47381884fe83fde87665..c264eef30678d781defa341c22c0a568ad483520 100644 (file)
@@ -48,10 +48,16 @@ end
 -- @param flags number - URL parsing flags
 -- @return number - ACCEPT/SUSPICIOUS/REJECT
 function exports.filter_url_string(url_text, flags)
-  -- Sanity check: URL length
+  -- Note: this is invoked mid-parse from the C state machine with whatever
+  -- bytes have been seen so far (often just the userinfo span, not a full
+  -- URL). A blanket length REJECT here silently drops legitimate phishing
+  -- patterns like https://legit.com<lots-of-spaces>@evil.com/... where the
+  -- userinfo is intentionally bloated to obscure the real host. The C parser
+  -- already caps total URL length at G_MAXUINT16/2; this threshold is just a
+  -- DoS guard against catastrophic sizes.
   local url_len = url_text:len()
-  if url_len > 2048 then
-    return exports.REJECT -- Overly long URL
+  if url_len > 16384 then
+    return exports.REJECT -- Catastrophic length, abort
   end
 
   -- Build control character set: 0x00-0x08, 0x0B-0x1F, 0x7F
index 4c2871070b342cd4302bcff349735e639a13ba33..ca539f2ca9603a904c080dcd910d8efa8609e564 100644 (file)
@@ -1218,7 +1218,14 @@ rspamd_web_parse(struct http_parser_url *u, const char *str, gsize len,
                                goto out;
                        }
                        else if (p - c > max_email_user) {
-                               /* Oversized user field - consult Lua filter (fixes #5731) */
+                               /*
+                                * Oversized user field is itself an obfuscation signal
+                                * (e.g. https://legit.com<lots-of-spaces>@evil.com/...),
+                                * so mark obscured regardless of what the Lua filter says.
+                                */
+                               *flags |= RSPAMD_URL_FLAG_OBSCURED | RSPAMD_URL_FLAG_HAS_USER;
+
+                               /* Consult Lua filter (fixes #5731) */
                                enum rspamd_url_lua_filter_result lua_decision =
                                        rspamd_url_lua_consult(c, p - c, *flags, (lua_State *) lua_state);
 
@@ -1226,12 +1233,7 @@ rspamd_web_parse(struct http_parser_url *u, const char *str, gsize len,
                                        /* REJECT: Lua says this is garbage, abort parsing */
                                        goto out;
                                }
-                               else if (lua_decision == RSPAMD_URL_LUA_FILTER_SUSPICIOUS) {
-                                       /* SUSPICIOUS: Mark as obscured for plugin analysis */
-                                       *flags |= RSPAMD_URL_FLAG_OBSCURED;
-                               }
                                /* ACCEPT or SUSPICIOUS: continue parsing */
-                               *flags |= RSPAMD_URL_FLAG_HAS_USER;
                        }
 
                        p++;
index d56ad81ed0c7a19a0501fe413a93e00167d60adb..3ceee7fc825c9debba16b66c8438c902cf638a66 100644 (file)
@@ -33,8 +33,11 @@ context("URL filter functions", function()
     { "http://user@host@example.com", 0, SUSPICIOUS, "multiple @ signs" },
     { "http://" .. string.rep("@", 25) .. "example.com", 0, REJECT, ">20 @ signs (reject)" },
 
-    -- Very long URLs
-    { "http://example.com/" .. string.rep("a", 2100), 0, REJECT, ">2048 char URL (reject)" },
+    -- Very long URLs: the legacy 2048 threshold dropped legitimate
+    -- userinfo-obfuscation phishing patterns; only catastrophic sizes
+    -- (>16 KiB) should now reject.
+    { "http://example.com/" .. string.rep("a", 2100), 0, ACCEPT, "2100 char URL (accept)" },
+    { "http://example.com/" .. string.rep("a", 17000), 0, REJECT, ">16384 char URL (reject)" },
 
     -- Control characters (should reject)
     { "http://example.com/\x00test", 0, REJECT, "URL with null byte" },