]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Rework raw headers storage.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 3 Sep 2014 16:52:09 +0000 (17:52 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 3 Sep 2014 16:52:09 +0000 (17:52 +0100)
- Use raw headers instead of message headers in message parser
- Store headers of parts
- Use double linked lists to avoid overhead on appending
- Decode raw headers

src/libmime/message.c
src/libmime/message.h

index 41e571b793b32b385df912ac9207b43735d16730..952db753f67461e443319a342d3d073463d90b22 100644 (file)
@@ -29,6 +29,7 @@
 #include "cfg_file.h"
 #include "html.h"
 #include "images.h"
+#include "utlist.h"
 
 #define RECURSION_LIMIT 30
 #define UTF8_CHARSET "UTF-8"
@@ -578,16 +579,34 @@ parse_recv_header (rspamd_mempool_t * pool,
        return;
 }
 
+static void
+append_raw_header (struct rspamd_task *task, struct raw_header *rh)
+{
+       struct raw_header *lp;
+
+       rh->next = NULL;
+       rh->prev = rh;
+       if ((lp =
+                       g_hash_table_lookup (task->raw_headers, rh->name)) != NULL) {
+               DL_APPEND (lp, rh);
+       }
+       else {
+               g_hash_table_insert (task->raw_headers, rh->name, rh);
+       }
+       debug_task ("add raw header %s: %s", rh->name, rh->value);
+}
+
 /* Convert raw headers to a list of struct raw_header * */
 static void
-process_raw_headers (struct rspamd_task *task)
+process_raw_headers (struct rspamd_task *task, const gchar *in)
 {
-       struct raw_header *new = NULL, *lp;
-       gchar *p, *c, *tmp, *tp;
+       struct raw_header *new = NULL;
+       const gchar *p, *c;
+       gchar *tmp, *tp;
        gint state = 0, l, next_state = 100, err_state = 100, t_state;
        gboolean valid_folding = FALSE;
 
-       p = task->raw_headers_str;
+       p = in;
        c = p;
        while (*p) {
                /* FSM for processing headers */
@@ -610,6 +629,7 @@ process_raw_headers (struct rspamd_task *task)
                                new =
                                        rspamd_mempool_alloc0 (task->task_pool,
                                                sizeof (struct raw_header));
+                               new->prev = new;
                                l = p - c;
                                tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
                                rspamd_strlcpy (tmp, c, l + 1);
@@ -713,36 +733,18 @@ process_raw_headers (struct rspamd_task *task)
                        }
                        *tp = '\0';
                        new->value = tmp;
-                       new->next = NULL;
-                       if ((lp =
-                               g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
-                               while (lp->next != NULL) {
-                                       lp = lp->next;
-                               }
-                               lp->next = new;
-                       }
-                       else {
-                               g_hash_table_insert (task->raw_headers, new->name, new);
-                       }
-                       debug_task ("add raw header %s: %s", new->name, new->value);
+                       new->decoded = g_mime_utils_header_decode_text (new->value);
+                       rspamd_mempool_add_destructor (task->task_pool,
+                                       (rspamd_mempool_destruct_t)g_free, new->decoded);
+                       append_raw_header (task, new);
                        state = 0;
                        break;
                case 5:
                        /* Header has only name, no value */
-                       new->next = NULL;
                        new->value = "";
-                       if ((lp =
-                               g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
-                               while (lp->next != NULL) {
-                                       lp = lp->next;
-                               }
-                               lp->next = new;
-                       }
-                       else {
-                               g_hash_table_insert (task->raw_headers, new->name, new);
-                       }
+                       new->decoded = NULL;
+                       append_raw_header (task, new);
                        state = 0;
-                       debug_task ("add raw header %s: %s", new->name, new->value);
                        break;
                case 99:
                        /* Folding state */
@@ -925,6 +927,7 @@ process_text_part (struct rspamd_task *task,
 {
        struct mime_text_part *text_part;
        const gchar *cd;
+       gchar *raw_headers;
 
        /* Skip attachements */
 #ifndef GMIME24
@@ -944,6 +947,12 @@ process_text_part (struct rspamd_task *task,
                debug_task ("skip attachments for checking as text parts");
                return;
        }
+
+       raw_headers = g_mime_object_get_headers (GMIME_OBJECT (part));
+       if (raw_headers) {
+               process_raw_headers (task, raw_headers);
+               g_free (raw_headers);
+       }
 #endif
 
        if (g_mime_content_type_is_type (type, "text",
@@ -1247,11 +1256,16 @@ process_message (struct rspamd_task *task)
                task->raw_headers_str = g_mime_message_get_headers (task->message);
 #endif
 
+               if (task->raw_headers_str) {
+                       rspamd_mempool_add_destructor (task->task_pool,
+                                       (rspamd_mempool_destruct_t) g_free, task->raw_headers_str);
+                       process_raw_headers (task, task->raw_headers_str);
+               }
                process_images (task);
 
                /* Parse received headers */
                first =
-                       message_get_header (task->task_pool, message, "Received", FALSE);
+                       message_get_raw_header (task, "Received", FALSE);
                cur = first;
                while (cur) {
                        recv =
@@ -1261,15 +1275,6 @@ process_message (struct rspamd_task *task)
                        task->received = g_list_prepend (task->received, recv);
                        cur = g_list_next (cur);
                }
-               if (first) {
-                       g_list_free (first);
-               }
-
-               if (task->raw_headers_str) {
-                       rspamd_mempool_add_destructor (task->task_pool,
-                               (rspamd_mempool_destruct_t) g_free, task->raw_headers_str);
-                       process_raw_headers (task);
-               }
 
                /* free the parser (and the stream) */
                g_object_unref (parser);
@@ -1350,7 +1355,7 @@ process_message (struct rspamd_task *task)
        }
 
        /* Parse urls inside Subject header */
-       cur = message_get_header (task->task_pool, task->message, "Subject", FALSE);
+       cur = message_get_raw_header (task, "Subject", FALSE);
        if (cur) {
                p = cur->data;
                len = strlen (p);
@@ -1390,8 +1395,6 @@ process_message (struct rspamd_task *task)
                        }
                        p = url_end + 1;
                }
-               /* Free header's list */
-               g_list_free (cur);
        }
 
        return 0;
@@ -2035,9 +2038,7 @@ message_get_raw_header (struct rspamd_task *task,
                        }
                }
                else {
-                       if (g_ascii_strcasecmp (rh->name, field) == 0) {
-                               gret = g_list_prepend (gret, rh);
-                       }
+                       gret = g_list_prepend (gret, rh);
                }
                rh = rh->next;
        }
index fdb987c8c4f81710c981e4c3e2f99f32575040e9..1c4ee981ae78b3d6d4e8ab2ebf9f588702218a11 100644 (file)
@@ -53,7 +53,8 @@ struct raw_header {
        gboolean tab_separated;
        gboolean empty_separator;
        gchar *separator;
-       struct raw_header *next;
+       gchar *decoded;
+       struct raw_header *prev, *next;
 };
 
 /**