};
if (tag->id == Tag_BR || tag->id == Tag_HR) {
- hc->parsed.append("\n");
+
+ if (!(tag->flags & FL_IGNORE)) {
+ hc->parsed.append("\n");
+ }
return tag->content_offset;
}
if (!(cur_tag->flags & CM_EMPTY)) {
html_process_block_tag(pool, cur_tag, hc);
}
+ else {
+ /* Implicitly close */
+ cur_tag->flags |= FL_CLOSED;
+ }
if (cur_tag->flags & FL_CLOSED) {
cur_tag->closing.end = cur_tag->content_offset;
break;
case tag_end_closing: {
if (cur_tag) {
+
+ if (cur_tag->flags & CM_EMPTY) {
+ /* Ignore closing empty tags */
+ cur_tag->flags |= FL_IGNORE;
+ }
/* cur_tag here is a closing tag */
auto *next_cur_tag = html_check_balance(hc, cur_tag,
c - start, p - start + 1);
auto &&vtag = std::make_unique<html_tag>();
vtag->id = cur_tag->id;
- vtag->flags = FL_VIRTUAL | FL_CLOSED;
+ vtag->flags = FL_VIRTUAL | FL_CLOSED | cur_tag->flags;
vtag->tag_start = cur_tag->closing.start;
vtag->content_offset = p - start + 1;
vtag->closing = cur_tag->closing;
{
const std::vector<std::pair<std::string, std::string>> cases{
+ {" <body>\n"
+ " <!-- page content -->\n"
+ " Hello, world!<br>test</br><br>content</hr>more content<br>\n"
+ " <div>\n"
+ " content inside div\n"
+ " </div>\n"
+ " </body>", "Hello, world!\ntest\ncontent\nmore content\ncontent inside div\n"},
{" <body>\n"
" <!-- escape content -->\n"
" a b a > b a < b a & b 'a "a"\n"
" <td>data2</td>\n"
" </tr>\n"
" </table>", "heada headb\ndata1 data2\n"},
+
};
rspamd_url_init(NULL);