From: Vsevolod Stakhov Date: Tue, 13 Jul 2021 15:52:09 +0000 (+0100) Subject: [Minor] Ignore bogus head tags inside body X-Git-Tag: 3.0~166 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=87ef0c44cef19ce6498fe5e595097fd09aeaf396;p=thirdparty%2Frspamd.git [Minor] Ignore bogus head tags inside body --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index cf12b0a018..51f8589e2b 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1085,7 +1085,7 @@ html_append_tag_content(rspamd_mempool_t *pool, return ret; } - else if (tag->id == Tag_HEAD) { + else if (tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) { auto ret = tag->closing.end; calculate_final_tag_offsets(); @@ -1706,6 +1706,7 @@ html_process_input(rspamd_mempool_t *pool, if (html_document_state == html_document_state::doctype) { if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) { html_document_state = html_document_state::head; + cur_tag->flags |= FL_IGNORE; } else if (cur_tag->id != Tag_HTML) { html_document_state = html_document_state::body; diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx index ac06a353bb..1181e79ac0 100644 --- a/src/libserver/html/html_tests.cxx +++ b/src/libserver/html/html_tests.cxx @@ -69,6 +69,7 @@ TEST_CASE("html text extraction") { using namespace std::string_literals; const std::vector> cases{ + {"displayed", "displayed"}, {"test", "test"}, {"test\0"s, "test\uFFFD"s}, {"test\0test"s, "test\uFFFDtest"s}, @@ -184,6 +185,7 @@ TEST_CASE("html text extraction") /* Head tag with some stuff */ {"

oh my god", "oh my god\n"}, {"oh my god</head><body></body></html>", ""}, + }; rspamd_url_init(NULL);