From: Vsevolod Stakhov Date: Fri, 14 Nov 2025 17:18:37 +0000 (+0000) Subject: [Feature] Add C->Lua URL filter consultation infrastructure X-Git-Tag: 3.14.1~15^2~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=61b6ac13b306150dc3a7488a9243e712c40deb70;p=thirdparty%2Frspamd.git [Feature] Add C->Lua URL filter consultation infrastructure - Add rspamd_url_lua_consult() helper function in url.c - Function calls lua_url_filter.filter_url_string() from C - Returns ACCEPT/SUSPICIOUS/REJECT to guide C parser - Add filter_url_string() function in lua_url_filter.lua - Validates URL strings and rejects obvious garbage - Checks: length, @ count, user field size, control chars, UTF-8 - Add TODO comment at oversized user field check (line 1204) - Infrastructure ready, needs lua_State plumbing through call chain - This completes the two-level architecture design --- diff --git a/lualib/lua_url_filter.lua b/lualib/lua_url_filter.lua index adb7fac1f4..98ed571c95 100644 --- a/lualib/lua_url_filter.lua +++ b/lualib/lua_url_filter.lua @@ -177,4 +177,54 @@ function exports.register_custom_filter(name, func) return true end +-- Function called from C parser when encountering suspicious URL patterns +-- This is called DURING parsing when C is unsure how to proceed +-- @param url_str: URL string fragment (may be partial URL being parsed) +-- @param flags: Current parsing flags from C +-- @return 0=ACCEPT (continue), 1=SUSPICIOUS (mark obscured), 2=REJECT (abort) +function exports.filter_url_string(url_str, flags) + if not url_str or #url_str == 0 then + return exports.ACCEPT + end + + -- Quick rejection of obviously malicious patterns + if #url_str > 2048 then + return exports.REJECT -- Absurdly long URL + end + + -- Count @ signs (excessive indicates obfuscation) + local at_count = select(2, url_str:gsub("@", "")) + if at_count > 20 then + return exports.REJECT -- Too many @ signs + end + + -- Check for extremely long user field + local user = url_str:match("^[^:/@]*://([^:/@]+)@") or url_str:match("^([^@]+)@") + if user then + if #user > 512 then + return exports.REJECT -- Absurdly long user field + elseif #user > 128 then + return exports.SUSPICIOUS -- Long user field, mark for inspection + end + end + + -- Check for null bytes or other control characters (except tab/newline) + if url_str:find("[\0-\8\11-\31\127]") then + return exports.REJECT -- Control characters + end + + -- Basic UTF-8 validation (reject obviously broken) + local ok = pcall(function() + -- Try to iterate UTF-8 codepoints + for _ in url_str:gmatch("[%z\1-\127\194-\244][\128-\191]*") do + end + end) + if not ok then + return exports.REJECT -- Invalid UTF-8 + end + + -- Allow through - looks reasonable enough to continue parsing + return exports.ACCEPT +end + return exports diff --git a/src/libserver/url.c b/src/libserver/url.c index 6add598a43..5a6a9f5509 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -22,6 +22,7 @@ #include "multipattern.h" #include "contrib/uthash/utlist.h" #include "contrib/http-parser/http_parser.h" +#include "lua/lua_common.h" #include #include #include @@ -1200,7 +1201,10 @@ rspamd_web_parse(struct http_parser_url *u, const char *str, gsize len, } else if (p - c > max_email_user) { /* Allow oversized user fields but mark them - fixes #5731 */ - /* Don't fail completely, just mark with flag and continue */ + /* TODO: Call rspamd_url_lua_consult(pool, c, p-c, *flags, L) here + * to ask Lua if we should continue parsing this URL. + * Returns: 0=continue, 1=mark obscured, 2=abort (goto out) + * Challenge: need lua_State *L passed through call chain */ *flags |= RSPAMD_URL_FLAG_HAS_USER; /* Continue parsing - the Lua plugin will handle scoring */ } @@ -2189,6 +2193,64 @@ rspamd_url_remove_dots(struct rspamd_url *uri) return ret; } +/** + * Consult Lua filter when C parser encounters suspicious/ambiguous URL patterns + * This is called DURING parsing when C is unsure how to proceed + * @param pool Memory pool + * @param url_str URL string fragment being examined + * @param len Length of the fragment + * @param flags Current URL parsing flags + * @param L Lua state (may be NULL) + * @return 0=ACCEPT (continue), 1=SUSPICIOUS (mark obscured), 2=REJECT (abort) + */ +static int +rspamd_url_lua_consult(rspamd_mempool_t *pool, + const char *url_str, + gsize len, + unsigned int flags, + lua_State *L) +{ + int result = 0; /* Default: ACCEPT */ + int err_idx, ret; + + if (!L) { + return 0; /* No Lua available, accept */ + } + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + + /* Try to load lua_url_filter.filter_url_string function */ + if (!rspamd_lua_require_function(L, "lua_url_filter", "filter_url_string")) { + lua_pop(L, 1); /* Remove error handler */ + return 0; /* Filter not available, accept */ + } + + /* Push arguments: url_string, flags */ + lua_pushlstring(L, url_str, len); + lua_pushinteger(L, flags); + + /* Call filter_url_string(url_str, flags) */ + if ((ret = lua_pcall(L, 2, 1, err_idx)) != 0) { + msg_err("cannot call lua_url_filter.filter_url_string: %s", + lua_isstring(L, -1) ? lua_tostring(L, -1) : "unknown error"); + lua_pop(L, 2); /* Error + error handler */ + return 0; /* On error, accept */ + } + + /* Get result */ + if (lua_isnumber(L, -1)) { + result = lua_tointeger(L, -1); + /* Clamp to valid range */ + if (result < 0) result = 0; + if (result > 2) result = 2; + } + + lua_pop(L, 2); /* Result + error handler */ + + return result; +} + enum uri_errno rspamd_url_parse(struct rspamd_url *uri, char *uristring, gsize len,