]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Add logic to preserve newlines in HTML.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 16 Jul 2015 14:22:51 +0000 (15:22 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 16 Jul 2015 14:22:51 +0000 (15:22 +0100)
src/libserver/html.c
test/lua/unit/html.lua

index 914d21feb9afcf609c8be4c688baaca9e8e8a16f..a6e1364816b2613c21556454d70e27c1e24b9e41 100644 (file)
@@ -1031,7 +1031,7 @@ add_html_node (struct rspamd_task *task,
 
 static gboolean
 rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc,
-               struct html_tag *tag, GNode **cur_level)
+               struct html_tag *tag, GNode **cur_level, gboolean *balanced)
 {
        GNode *nnode;
 
@@ -1046,17 +1046,29 @@ rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc,
 
        nnode = g_node_new (tag);
 
+       if (tag->params) {
+               rspamd_mempool_add_destructor (pool,
+                               (rspamd_mempool_destruct_t) g_list_free,
+                               tag->params);
+       }
+
        if (tag->flags & FL_CLOSING) {
                if (!*cur_level) {
                        debug_task ("bad parent node");
+                       g_node_destroy (nnode);
                        return FALSE;
                }
+
                g_node_append (*cur_level, nnode);
 
                if (!rspamd_html_check_balance (nnode, cur_level)) {
                        debug_task (
                                        "mark part as unbalanced as it has not pairable closing tags");
                        hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+                       *balanced = FALSE;
+               }
+               else {
+                       *balanced = TRUE;
                }
        }
        else {
@@ -1386,7 +1398,7 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
 {
        const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL;
        guchar t;
-       gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE;
+       gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE, balanced;
        GByteArray *dest;
        guint obrace = 0, ebrace = 0;
        GNode *cur_level = NULL;
@@ -1678,18 +1690,29 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
                        savep = NULL;
 
                        if (cur_tag != NULL) {
-                               if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level)) {
+                               balanced = TRUE;
+
+                               if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
+                                               &balanced)) {
                                        state = content_write;
                                        need_decode = FALSE;
                                }
                                else {
                                        state = content_ignore;
                                }
+
+                               if ((cur_tag->id == Tag_P || cur_tag->id == Tag_BR ||
+                                               cur_tag->id == Tag_HR) && balanced) {
+                                       /* Insert newline */
+                                       g_byte_array_append (dest, "\r\n", 2);
+                                       save_space = FALSE;
+                               }
                        }
                        else {
                                state = content_write;
                        }
 
+
                        p++;
                        c = p;
                        cur_tag = NULL;
index 22a03f6d68bffe413ce55b94e0d69871e7b011d5..f29d4eb3b8e4fc92fc90b6029b0871c553814bbc 100644 (file)
@@ -21,7 +21,7 @@ context("HTML processing", function()
     <b>stuff</p>?
   </body>
 </html>
-      ]], 'Hello, world! test data stuff?'},
+      ]], "Hello, world! test\r\ndata\r\nstuff?"},
       {[[
 <?xml version="1.0" encoding="iso-8859-1"?>
  <!DOCTYPE html 
@@ -39,7 +39,7 @@ context("HTML processing", function()
        
      </p>
    </body>
- </html>]], 'Hello, world!'},
+ </html>]], '\r\nHello, world!\r\n'},
        {[[
 <!DOCTYPE html>
 <html lang="en">
@@ -53,13 +53,10 @@ context("HTML processing", function()
   --></head>
   <body>
     <!-- page content -->
-    Hello, world! <b>test</b>
-    <p>data<>
-    </P>
-    <b>stuff</p>?
+    Hello, world!
   </body>
 </html>
-      ]], 'Hello, world! test data stuff?'},
+      ]], 'Hello, world!'},
     }
     
     for _,c in ipairs(cases) do