]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] lua_url: Re-encode control characters and spaces in URL tostring
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 6 Feb 2026 15:11:44 +0000 (15:11 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 6 Feb 2026 15:11:44 +0000 (15:11 +0000)
The URL parser (rspamd_url_decode) decodes percent-encoded sequences
like %20 back to literal characters in the internal representation.
When tostring() returned these decoded URLs, spaces and control chars
would break subsequent re-parsing (e.g., in url_redirector redirect
chains and Redis cache round-trips). Fix by re-encoding characters
<= 0x20 on serialization, matching browser behavior: decode internally
for matching, re-encode on copy.

src/lua/lua_url.c

index b123630fafeb318b00d0ae70d222ed1a7b0ae9cb..b3299ead36fcf6275b2a0339f4555411ae3ed2eb 100644 (file)
@@ -331,6 +331,50 @@ lua_url_get_text(lua_State *L)
  * Get full content of the url or user@domain in case of email
  * @return {string} url as a string
  */
+/*
+ * Re-encode characters that cannot appear literally in URLs.
+ * Like browsers: decode internally for matching/display, re-encode on copy/serialization.
+ * Returns the string pushed onto the Lua stack (via lua_pushlstring).
+ */
+static void
+lua_url_push_encoded(lua_State *L, const char *s, gsize len)
+{
+       static const char hexdigits[] = "0123456789ABCDEF";
+       gsize i, extra = 0;
+
+       /* Fast check: count characters that need encoding */
+       for (i = 0; i < len; i++) {
+               unsigned char c = (unsigned char) s[i];
+               if (c <= 0x20) {
+                       extra += 2; /* %XX is 3 chars vs 1 original */
+               }
+       }
+
+       if (extra == 0) {
+               /* No encoding needed — fast path */
+               lua_pushlstring(L, s, len);
+               return;
+       }
+
+       char *encoded = g_malloc(len + extra);
+       char *d = encoded;
+
+       for (i = 0; i < len; i++) {
+               unsigned char c = (unsigned char) s[i];
+               if (c <= 0x20) {
+                       *d++ = '%';
+                       *d++ = hexdigits[c >> 4];
+                       *d++ = hexdigits[c & 0x0f];
+               }
+               else {
+                       *d++ = (char) c;
+               }
+       }
+
+       lua_pushlstring(L, encoded, d - encoded);
+       g_free(encoded);
+}
+
 static int
 lua_url_tostring(lua_State *L)
 {
@@ -353,7 +397,7 @@ lua_url_tostring(lua_State *L)
                        g_free(tmp);
                }
                else {
-                       lua_pushlstring(L, url->url->string, url->url->urllen);
+                       lua_url_push_encoded(L, url->url->string, url->url->urllen);
                }
        }
        else {