]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Add HTML entity encoding for URL rewriting
authorVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 14 Oct 2025 09:42:19 +0000 (10:42 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 14 Oct 2025 09:42:19 +0000 (10:42 +0100)
Replacement URLs are now properly encoded when inserted into HTML attributes. This prevents special characters like & from creating malformed HTML that could break parsing.

src/libserver/html/html_url_rewrite.cxx
test/rspamd_cxx_unit_html_url_rewrite.hxx

index 5375f92964cbaaaf39b1ea9d50cc85c534fc12a2..dcbff891ddb635c7c7e290d17a6006e216217d65 100644 (file)
@@ -168,6 +168,43 @@ auto validate_patches(std::vector<rewrite_patch> &patches) -> bool
        return true;
 }
 
+/**
+ * Encode a string for safe insertion as HTML attribute value
+ * Encodes: & < > " '
+ * @param input The string to encode
+ * @return HTML-encoded string
+ */
+static auto encode_html_attribute(const std::string &input) -> std::string
+{
+       std::string result;
+       result.reserve(input.size() + input.size() / 4);// Reserve extra for entities
+
+       for (char ch: input) {
+               switch (ch) {
+               case '&':
+                       result.append("&amp;");
+                       break;
+               case '<':
+                       result.append("&lt;");
+                       break;
+               case '>':
+                       result.append("&gt;");
+                       break;
+               case '"':
+                       result.append("&quot;");
+                       break;
+               case '\'':
+                       result.append("&#39;");
+                       break;
+               default:
+                       result.push_back(ch);
+                       break;
+               }
+       }
+
+       return result;
+}
+
 auto apply_patches(std::string_view original, const std::vector<rewrite_patch> &patches)
        -> std::string
 {
@@ -186,8 +223,8 @@ auto apply_patches(std::string_view original, const std::vector<rewrite_patch> &
                        result.append(original.substr(pos, patch.offset - pos));
                }
 
-               // Apply the replacement
-               result.append(patch.replacement);
+               // Apply the replacement with HTML entity encoding
+               result.append(encode_html_attribute(patch.replacement));
 
                // Move position to after the patched region
                pos = patch.offset + patch.len;
index 2f390f387fc22953229364ceab83a5b9f966bda3..1beab64bdb7aa4ffb37bbda4959a22fed9565f37 100644 (file)
@@ -228,6 +228,70 @@ TEST_SUITE("html_url_rewrite")
                auto result = apply_patches(original, patches);
                CHECK(result == "abcdefghi");
        }
+
+       TEST_CASE("apply_patches - HTML entity encoding for ampersand")
+       {
+               std::string_view original = R"(<a href="old">link</a>)";
+               std::vector<rewrite_patch> patches{
+                       {0, 9, 3, "http://example.com?foo=1&bar=2"}// URL with & character
+               };
+               auto result = apply_patches(original, patches);
+               // & should be encoded as &amp;
+               CHECK(result == R"(<a href="http://example.com?foo=1&amp;bar=2">link</a>)");
+       }
+
+       TEST_CASE("apply_patches - HTML entity encoding for quotes")
+       {
+               std::string_view original = R"(<a href="old">link</a>)";
+               std::vector<rewrite_patch> patches{
+                       {0, 9, 3, R"(url"with'quotes)"}// URL with quotes
+               };
+               auto result = apply_patches(original, patches);
+               // " should be encoded as &quot;, ' as &#39;
+               CHECK(result == R"(<a href="url&quot;with&#39;quotes">link</a>)");
+       }
+
+       TEST_CASE("apply_patches - HTML entity encoding for angle brackets")
+       {
+               std::string_view original = R"(<a href="old">link</a>)";
+               std::vector<rewrite_patch> patches{
+                       {0, 9, 3, "url<with>brackets"}// URL with angle brackets
+               };
+               auto result = apply_patches(original, patches);
+               // < should be encoded as &lt;, > as &gt;
+               CHECK(result == R"(<a href="url&lt;with&gt;brackets">link</a>)");
+       }
+
+       TEST_CASE("apply_patches - HTML entity encoding for all special chars")
+       {
+               std::string_view original = R"(<a href="old">link</a>)";
+               std::vector<rewrite_patch> patches{
+                       {0, 9, 3, R"(&<>"')"}// All special HTML chars
+               };
+               auto result = apply_patches(original, patches);
+               CHECK(result == R"(<a href="&amp;&lt;&gt;&quot;&#39;">link</a>)");
+       }
+
+       TEST_CASE("apply_patches - HTML entity encoding preserves normal chars")
+       {
+               std::string_view original = R"(<a href="old">link</a>)";
+               std::vector<rewrite_patch> patches{
+                       {0, 9, 3, "http://normal-url.com/path?q=test"}// Normal URL
+               };
+               auto result = apply_patches(original, patches);
+               CHECK(result == R"(<a href="http://normal-url.com/path?q=test">link</a>)");
+       }
+
+       TEST_CASE("apply_patches - HTML entity encoding for multiple URLs")
+       {
+               std::string_view original = R"(<a href="url1">A</a> <a href="url2">B</a>)";
+               std::vector<rewrite_patch> patches{
+                       {0, 9, 4, "http://a.com?x=1&y=2"},// First URL with &
+                       {0, 30, 4, "http://b.com?a=3&b=4"}// Second URL with & (starts at position 30)
+               };
+               auto result = apply_patches(original, patches);
+               CHECK(result == R"(<a href="http://a.com?x=1&amp;y=2">A</a> <a href="http://b.com?a=3&amp;b=4">B</a>)");
+       }
 }
 
 #endif