From: Vsevolod Stakhov Date: Wed, 6 May 2026 17:25:25 +0000 (+0100) Subject: [Fix] url: do not drop URLs with long userinfo X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f1c1c3063ea9fd1d5b8d3de5d7d01ac749bdedfc;p=thirdparty%2Frspamd.git [Fix] url: do not drop URLs with long userinfo The C parser consults lua_url_filter for every byte of userinfo past max_email_user (64); the filter previously rejected anything longer than 2048 bytes, which silently dropped the entire URL. That blanket length REJECT killed exactly the userinfo-obfuscation phishing pattern (https://legit.com@evil.com/...) the parser is meant to surface. Raise the catastrophic-length REJECT to 16 KiB (still well under the parser's own G_MAXUINT16/2 cap) and have parse_user mark the URL as RSPAMD_URL_FLAG_OBSCURED | RSPAMD_URL_FLAG_HAS_USER as soon as the userinfo crosses 64 bytes, regardless of the filter verdict, so downstream rules can act on the obfuscation signal. --- diff --git a/lualib/lua_url_filter.lua b/lualib/lua_url_filter.lua index c9c08c1b31..c264eef306 100644 --- a/lualib/lua_url_filter.lua +++ b/lualib/lua_url_filter.lua @@ -48,10 +48,16 @@ end -- @param flags number - URL parsing flags -- @return number - ACCEPT/SUSPICIOUS/REJECT function exports.filter_url_string(url_text, flags) - -- Sanity check: URL length + -- Note: this is invoked mid-parse from the C state machine with whatever + -- bytes have been seen so far (often just the userinfo span, not a full + -- URL). A blanket length REJECT here silently drops legitimate phishing + -- patterns like https://legit.com@evil.com/... where the + -- userinfo is intentionally bloated to obscure the real host. The C parser + -- already caps total URL length at G_MAXUINT16/2; this threshold is just a + -- DoS guard against catastrophic sizes. local url_len = url_text:len() - if url_len > 2048 then - return exports.REJECT -- Overly long URL + if url_len > 16384 then + return exports.REJECT -- Catastrophic length, abort end -- Build control character set: 0x00-0x08, 0x0B-0x1F, 0x7F diff --git a/src/libserver/url.c b/src/libserver/url.c index 4c2871070b..ca539f2ca9 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1218,7 +1218,14 @@ rspamd_web_parse(struct http_parser_url *u, const char *str, gsize len, goto out; } else if (p - c > max_email_user) { - /* Oversized user field - consult Lua filter (fixes #5731) */ + /* + * Oversized user field is itself an obfuscation signal + * (e.g. https://legit.com@evil.com/...), + * so mark obscured regardless of what the Lua filter says. + */ + *flags |= RSPAMD_URL_FLAG_OBSCURED | RSPAMD_URL_FLAG_HAS_USER; + + /* Consult Lua filter (fixes #5731) */ enum rspamd_url_lua_filter_result lua_decision = rspamd_url_lua_consult(c, p - c, *flags, (lua_State *) lua_state); @@ -1226,12 +1233,7 @@ rspamd_web_parse(struct http_parser_url *u, const char *str, gsize len, /* REJECT: Lua says this is garbage, abort parsing */ goto out; } - else if (lua_decision == RSPAMD_URL_LUA_FILTER_SUSPICIOUS) { - /* SUSPICIOUS: Mark as obscured for plugin analysis */ - *flags |= RSPAMD_URL_FLAG_OBSCURED; - } /* ACCEPT or SUSPICIOUS: continue parsing */ - *flags |= RSPAMD_URL_FLAG_HAS_USER; } p++; diff --git a/test/lua/unit/lua_url_filter.lua b/test/lua/unit/lua_url_filter.lua index d56ad81ed0..3ceee7fc82 100644 --- a/test/lua/unit/lua_url_filter.lua +++ b/test/lua/unit/lua_url_filter.lua @@ -33,8 +33,11 @@ context("URL filter functions", function() { "http://user@host@example.com", 0, SUSPICIOUS, "multiple @ signs" }, { "http://" .. string.rep("@", 25) .. "example.com", 0, REJECT, ">20 @ signs (reject)" }, - -- Very long URLs - { "http://example.com/" .. string.rep("a", 2100), 0, REJECT, ">2048 char URL (reject)" }, + -- Very long URLs: the legacy 2048 threshold dropped legitimate + -- userinfo-obfuscation phishing patterns; only catastrophic sizes + -- (>16 KiB) should now reject. + { "http://example.com/" .. string.rep("a", 2100), 0, ACCEPT, "2100 char URL (accept)" }, + { "http://example.com/" .. string.rep("a", 17000), 0, REJECT, ">16384 char URL (reject)" }, -- Control characters (should reject) { "http://example.com/\x00test", 0, REJECT, "URL with null byte" },