From c209d42b7f36b2c8024d8b069cfb13b0aa18de60 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 14 Oct 2025 10:42:19 +0100 Subject: [PATCH] [Fix] Add HTML entity encoding for URL rewriting Replacement URLs are now properly encoded when inserted into HTML attributes. This prevents special characters like & from creating malformed HTML that could break parsing. --- src/libserver/html/html_url_rewrite.cxx | 41 ++++++++++++++- test/rspamd_cxx_unit_html_url_rewrite.hxx | 64 +++++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/src/libserver/html/html_url_rewrite.cxx b/src/libserver/html/html_url_rewrite.cxx index 5375f92964..dcbff891dd 100644 --- a/src/libserver/html/html_url_rewrite.cxx +++ b/src/libserver/html/html_url_rewrite.cxx @@ -168,6 +168,43 @@ auto validate_patches(std::vector &patches) -> bool return true; } +/** + * Encode a string for safe insertion as HTML attribute value + * Encodes: & < > " ' + * @param input The string to encode + * @return HTML-encoded string + */ +static auto encode_html_attribute(const std::string &input) -> std::string +{ + std::string result; + result.reserve(input.size() + input.size() / 4);// Reserve extra for entities + + for (char ch: input) { + switch (ch) { + case '&': + result.append("&"); + break; + case '<': + result.append("<"); + break; + case '>': + result.append(">"); + break; + case '"': + result.append("""); + break; + case '\'': + result.append("'"); + break; + default: + result.push_back(ch); + break; + } + } + + return result; +} + auto apply_patches(std::string_view original, const std::vector &patches) -> std::string { @@ -186,8 +223,8 @@ auto apply_patches(std::string_view original, const std::vector & result.append(original.substr(pos, patch.offset - pos)); } - // Apply the replacement - result.append(patch.replacement); + // Apply the replacement with HTML entity encoding + result.append(encode_html_attribute(patch.replacement)); // Move position to after the patched region pos = patch.offset + patch.len; diff --git a/test/rspamd_cxx_unit_html_url_rewrite.hxx b/test/rspamd_cxx_unit_html_url_rewrite.hxx index 2f390f387f..1beab64bdb 100644 --- a/test/rspamd_cxx_unit_html_url_rewrite.hxx +++ b/test/rspamd_cxx_unit_html_url_rewrite.hxx @@ -228,6 +228,70 @@ TEST_SUITE("html_url_rewrite") auto result = apply_patches(original, patches); CHECK(result == "abcdefghi"); } + + TEST_CASE("apply_patches - HTML entity encoding for ampersand") + { + std::string_view original = R"(link)"; + std::vector patches{ + {0, 9, 3, "http://example.com?foo=1&bar=2"}// URL with & character + }; + auto result = apply_patches(original, patches); + // & should be encoded as & + CHECK(result == R"(link)"); + } + + TEST_CASE("apply_patches - HTML entity encoding for quotes") + { + std::string_view original = R"(link)"; + std::vector patches{ + {0, 9, 3, R"(url"with'quotes)"}// URL with quotes + }; + auto result = apply_patches(original, patches); + // " should be encoded as ", ' as ' + CHECK(result == R"(link)"); + } + + TEST_CASE("apply_patches - HTML entity encoding for angle brackets") + { + std::string_view original = R"(link)"; + std::vector patches{ + {0, 9, 3, "urlbrackets"}// URL with angle brackets + }; + auto result = apply_patches(original, patches); + // < should be encoded as <, > as > + CHECK(result == R"(link)"); + } + + TEST_CASE("apply_patches - HTML entity encoding for all special chars") + { + std::string_view original = R"(link)"; + std::vector patches{ + {0, 9, 3, R"(&<>"')"}// All special HTML chars + }; + auto result = apply_patches(original, patches); + CHECK(result == R"(link)"); + } + + TEST_CASE("apply_patches - HTML entity encoding preserves normal chars") + { + std::string_view original = R"(link)"; + std::vector patches{ + {0, 9, 3, "http://normal-url.com/path?q=test"}// Normal URL + }; + auto result = apply_patches(original, patches); + CHECK(result == R"(link)"); + } + + TEST_CASE("apply_patches - HTML entity encoding for multiple URLs") + { + std::string_view original = R"(A B)"; + std::vector patches{ + {0, 9, 4, "http://a.com?x=1&y=2"},// First URL with & + {0, 30, 4, "http://b.com?a=3&b=4"}// Second URL with & (starts at position 30) + }; + auto result = apply_patches(original, patches); + CHECK(result == R"(A B)"); + } } #endif -- 2.47.3