]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] html: prevent buffer overflow in entity decoding
authorVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 20 May 2026 10:39:51 +0000 (11:39 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 20 May 2026 10:39:51 +0000 (11:39 +0100)
decode_html_entitles_inplace works in place, relying on the
replacement never being longer than the source entity text. That
assumption does not hold for some short entity names that expand to
multi-codepoint replacements (e.g. nGt, nLt, nvap): when such an
entity sits at the very end of the buffer the named-entity memcpy
wrote a few bytes past the end.

Bounds-check the replacement against the remaining buffer before
copying, matching the existing numeric-entity path, and drop the
entity when it does not fit.

src/libserver/html/html_entities.cxx

index d7c709f2da11ba3c32de8f54c59c166da14ecd1b..5e18cf7a304b6d345234fb5d8bf1ddd98ec4695d 100644 (file)
@@ -2260,8 +2260,17 @@ decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces)
 
                auto replace_entity = [&]() -> void {
                        auto l = strlen(entity_def->replacement);
-                       memcpy(t, entity_def->replacement, l);
-                       t += l;
+                       /*
+                        * The decoder works in place, so the replacement may only be
+                        * written while it fits the remaining buffer. Some short entity
+                        * names expand to longer multi-codepoint replacements, which
+                        * would otherwise overflow when the entity sits at the very end
+                        * of the buffer. Drop such a truncated entity instead.
+                        */
+                       if (end - t >= (decltype(end - t)) l) {
+                               memcpy(t, entity_def->replacement, l);
+                               t += l;
+                       }
                };
 
                if (entity_def) {