From 9f3a41069512c9c2287c99c9917350ff2da6ee24 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Feb 2026 15:11:44 +0000 Subject: [PATCH] [Fix] lua_url: Re-encode control characters and spaces in URL tostring The URL parser (rspamd_url_decode) decodes percent-encoded sequences like %20 back to literal characters in the internal representation. When tostring() returned these decoded URLs, spaces and control chars would break subsequent re-parsing (e.g., in url_redirector redirect chains and Redis cache round-trips). Fix by re-encoding characters <= 0x20 on serialization, matching browser behavior: decode internally for matching, re-encode on copy. --- src/lua/lua_url.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index b123630faf..b3299ead36 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -331,6 +331,50 @@ lua_url_get_text(lua_State *L) * Get full content of the url or user@domain in case of email * @return {string} url as a string */ +/* + * Re-encode characters that cannot appear literally in URLs. + * Like browsers: decode internally for matching/display, re-encode on copy/serialization. + * Returns the string pushed onto the Lua stack (via lua_pushlstring). + */ +static void +lua_url_push_encoded(lua_State *L, const char *s, gsize len) +{ + static const char hexdigits[] = "0123456789ABCDEF"; + gsize i, extra = 0; + + /* Fast check: count characters that need encoding */ + for (i = 0; i < len; i++) { + unsigned char c = (unsigned char) s[i]; + if (c <= 0x20) { + extra += 2; /* %XX is 3 chars vs 1 original */ + } + } + + if (extra == 0) { + /* No encoding needed — fast path */ + lua_pushlstring(L, s, len); + return; + } + + char *encoded = g_malloc(len + extra); + char *d = encoded; + + for (i = 0; i < len; i++) { + unsigned char c = (unsigned char) s[i]; + if (c <= 0x20) { + *d++ = '%'; + *d++ = hexdigits[c >> 4]; + *d++ = hexdigits[c & 0x0f]; + } + else { + *d++ = (char) c; + } + } + + lua_pushlstring(L, encoded, d - encoded); + g_free(encoded); +} + static int lua_url_tostring(lua_State *L) { @@ -353,7 +397,7 @@ lua_url_tostring(lua_State *L) g_free(tmp); } else { - lua_pushlstring(L, url->url->string, url->url->urllen); + lua_url_push_encoded(L, url->url->string, url->url->urllen); } } else { -- 2.47.3