From: Vsevolod Stakhov Date: Sat, 6 Dec 2025 09:51:14 +0000 (+0000) Subject: [Fix] Use locale-independent patterns in URL encoding X-Git-Tag: 3.14.2~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=40c76819753df2dbd6063dc919d7e11915eafe14;p=thirdparty%2Frspamd.git [Fix] Use locale-independent patterns in URL encoding Replace %w with explicit A-Za-z0-9 ranges in URL encoding functions. The %w pattern is locale-dependent and incorrectly matches high bytes (0xE4, 0xE5, 0xE6) as word characters in UTF-8 locales like en_GB.UTF-8, breaking URL encoding of non-ASCII characters. --- diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index a8d40c8763..091bcc8ceb 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -1911,7 +1911,9 @@ local function url_encode_string(str) if str == nil then return '' end - str = string.gsub(str, "([^%w _%%%-%.~])", + -- Use explicit ASCII ranges instead of %w which is locale-dependent + -- and may match non-ASCII bytes in UTF-8 locales + str = string.gsub(str, "([^A-Za-z0-9 _%%%-%.~])", function(c) return string.format("%%%02X", string.byte(c)) end) diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua index 862cb451db..1e5bc137c0 100644 --- a/src/plugins/lua/url_redirector.lua +++ b/src/plugins/lua/url_redirector.lua @@ -72,7 +72,8 @@ local function encode_url_for_redirect(url_str) -- Encode space and other problematic characters that are common in redirect URLs -- We're conservative - only encode what http_parser_parse_url actually rejects -- Don't encode already-encoded sequences (%XX) - local encoded = url_str:gsub("([^%w%-%._~:/?#%[%]@!$&'()*+,;=%%])", function(c) + -- Use explicit ASCII ranges instead of %w which is locale-dependent + local encoded = url_str:gsub("([^A-Za-z0-9%-%._~:/?#%[%]@!$&'()*+,;=%%])", function(c) -- Don't double-encode already encoded characters if c == '%' then return c