static gboolean
rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc,
- struct html_tag *tag, GNode **cur_level)
+ struct html_tag *tag, GNode **cur_level, gboolean *balanced)
{
GNode *nnode;
nnode = g_node_new (tag);
+ if (tag->params) {
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) g_list_free,
+ tag->params);
+ }
+
if (tag->flags & FL_CLOSING) {
if (!*cur_level) {
debug_task ("bad parent node");
+ g_node_destroy (nnode);
return FALSE;
}
+
g_node_append (*cur_level, nnode);
if (!rspamd_html_check_balance (nnode, cur_level)) {
debug_task (
"mark part as unbalanced as it has not pairable closing tags");
hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+ *balanced = FALSE;
+ }
+ else {
+ *balanced = TRUE;
}
}
else {
{
const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL;
guchar t;
- gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE;
+ gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE, balanced;
GByteArray *dest;
guint obrace = 0, ebrace = 0;
GNode *cur_level = NULL;
savep = NULL;
if (cur_tag != NULL) {
- if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level)) {
+ balanced = TRUE;
+
+ if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
+ &balanced)) {
state = content_write;
need_decode = FALSE;
}
else {
state = content_ignore;
}
+
+ if ((cur_tag->id == Tag_P || cur_tag->id == Tag_BR ||
+ cur_tag->id == Tag_HR) && balanced) {
+ /* Insert newline */
+ g_byte_array_append (dest, "\r\n", 2);
+ save_space = FALSE;
+ }
}
else {
state = content_write;
}
+
p++;
c = p;
cur_tag = NULL;
<b>stuff</p>?
</body>
</html>
- ]], 'Hello, world! test data stuff?'},
+ ]], "Hello, world! test\r\ndata\r\nstuff?"},
{[[
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html
</p>
</body>
- </html>]], 'Hello, world!'},
+ </html>]], '\r\nHello, world!\r\n'},
{[[
<!DOCTYPE html>
<html lang="en">
--></head>
<body>
<!-- page content -->
- Hello, world! <b>test</b>
- <p>data<>
- </P>
- <b>stuff</p>?
+ Hello, world!
</body>
</html>
- ]], 'Hello, world! test data stuff?'},
+ ]], 'Hello, world!'},
}
for _,c in ipairs(cases) do