From: Vsevolod Stakhov Date: Wed, 8 Sep 2021 13:45:33 +0000 (+0100) Subject: [Fix] Fix some complicated case with the closing tags parsing X-Git-Tag: 3.1~184 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4c6234a1a07c3fd777551c6789ad0b44523da210;p=thirdparty%2Frspamd.git [Fix] Fix some complicated case with the closing tags parsing --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 97009749f3..b9729a71ee 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1752,9 +1752,49 @@ html_process_input(rspamd_mempool_t *pool, break; case tag_raw_text_less_than: if (t == '/') { - /* Shift back */ - p = c; - state = tag_begin; + /* Here are special things: we look for obrace and then ensure + * that if there is any closing brace nearby + * (we look maximum at 30 characters). We also need to ensure + * that we have no special characters, such as punctuation marks and + * so on. + * Basically, we validate the input to be sane. + * Since closing tags must not have attributes, these assumptions + * seems to be reasonable enough for our toy parser. + */ + gint cur_lookahead = 1; + gint max_lookahead = MIN (end - p, 30); + bool valid_closing_tag = true; + + if (p + 1 < end && !g_ascii_isalpha (p[1])) { + valid_closing_tag = false; + } + else { + while (cur_lookahead < max_lookahead) { + gchar tt = p[cur_lookahead]; + if (tt == '>') { + break; + } + else if (tt < '\n' || tt == ',') { + valid_closing_tag = false; + break; + } + cur_lookahead ++; + } + + if (cur_lookahead == max_lookahead) { + valid_closing_tag = false; + } + } + + if (valid_closing_tag) { + /* Shift back */ + p = c; + state = tag_begin; + } + else { + p ++; + state = tag_raw_text; + } } else { p ++;