]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
* Fix error in raw headers parsing
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 24 May 2011 14:07:28 +0000 (18:07 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 24 May 2011 14:07:28 +0000 (18:07 +0400)
* Improve speed of raw headers access

src/expressions.c
src/lua/lua_task.c
src/main.h
src/message.c
src/message.h
src/plugins/regexp.c
src/util.c

index deb77e795e51be56f5a4e8d15772d45cc879a65d..8c90160d2dbbea81f80e660b385d340835917687 100644 (file)
@@ -1392,7 +1392,7 @@ struct addr_list {
 };
 
 #define COMPARE_RCPT_LEN 3
-#define MIN_RCPT_TO_COMPARE 5
+#define MIN_RCPT_TO_COMPARE 7
 
 gboolean
 rspamd_recipients_distance (struct worker_task *task, GList * args, void *unused)
index 1011612aadf754f8500e85d68d7c2c907e291772..2ab56b29cc0f3453450e9a4891c163b390512538 100644 (file)
@@ -313,7 +313,7 @@ lua_task_get_raw_headers (lua_State * L)
        struct worker_task             *task = lua_check_task (L);
 
        if (task) {
-               lua_pushstring (L, task->raw_headers);
+               lua_pushstring (L, task->raw_headers_str);
        }
        else {
                lua_pushnil (L);
@@ -326,7 +326,6 @@ static gint
 lua_task_get_raw_header_common (lua_State * L, gboolean strong)
 {
        struct worker_task             *task = lua_check_task (L);
-       GList                          *cur;
        struct raw_header                          *rh;
        gint                            i = 1;
        const gchar                    *name;
@@ -338,23 +337,27 @@ lua_task_get_raw_header_common (lua_State * L, gboolean strong)
                        return 1;
                }
                lua_newtable (L);
-               cur = g_list_first (task->raw_headers_list);
-               while (cur) {
-                       rh = cur->data;
+               rh = g_hash_table_lookup (task->raw_headers, name);
+
+               if (rh == NULL) {
+                       return 1;
+               }
+
+               while (rh) {
                        if (rh->name == NULL) {
-                               cur = g_list_next (cur);
+                               rh = rh->next;
                                continue;
                        }
                        /* Check case sensivity */
                        if (strong) {
                                if (strcmp (rh->name, name) != 0) {
-                                       cur = g_list_next (cur);
+                                       rh = rh->next;
                                        continue;
                                }
                        }
                        else {
                                if (g_ascii_strcasecmp (rh->name, name) != 0) {
-                                       cur = g_list_next (cur);
+                                       rh = rh->next;
                                        continue;
                                }
                        }
@@ -371,7 +374,7 @@ lua_task_get_raw_header_common (lua_State * L, gboolean strong)
                        lua_set_table_index (L, "separator", rh->separator);
                        lua_rawseti (L, -2, i++);
                        /* Process next element */
-                       cur = g_list_next (cur);
+                       rh = rh->next;
                }
        }
        else {
index 186ee9baa21b29485e4110bfe26d36f7dbd31deb..d8f90b03f903b553a26a3b7945afa7ff99813b76 100644 (file)
@@ -206,12 +206,12 @@ struct worker_task {
        InternetAddressList *rcpts;                                                                     /**< list of all recipients                                             */
        GList *parts;                                                                                           /**< list of parsed parts                                                       */
        GList *text_parts;                                                                                      /**< list of text parts                                                         */
-       gchar *raw_headers;                                                                                     /**< list of raw headers                                                        */
+       gchar *raw_headers_str;                                                                                 /**< list of raw headers                                                        */
        GList *received;                                                                                        /**< list of received headers                                           */
        GTree *urls;                                                                                            /**< list of parsed urls                                                        */
        GTree *emails;                                                                                          /**< list of parsed emails                                                      */
        GList *images;                                                                                          /**< list of images                                                                     */
-       GList *raw_headers_list;                                                                        /**< list of raw headers                                                        */
+       GHashTable *raw_headers;                                                                        /**< list of raw headers                                                        */
        GHashTable *results;                                                                            /**< hash table of metric_result indexed by 
         *    metric's name                                                                     */
        GHashTable *tokens;                                                                                     /**< hash table of tokens indexed by tokenizer
index 4f8d4dcc3e9d07f0ed535603e5db0313b29c53d9..8d36ad3ebea999ee3a16ef5bae80904e033823ed 100644 (file)
@@ -253,7 +253,7 @@ parse_qmail_recv (memory_pool_t * pool, gchar *line, struct received_header *r)
 {
        gchar                           *s, *p, t;
 
-       /* We are intersted only with received from network headers */
+       /* We are interested only with received from network headers */
        if ((p = strstr (line, "from network")) == NULL) {
                r->is_error = 2;
                return;
@@ -467,12 +467,12 @@ parse_recv_header (memory_pool_t * pool, gchar *line, struct received_header *r)
 static void
 process_raw_headers (struct worker_task *task)
 {
-       struct raw_header              *new;
+       struct raw_header              *new, *lp;
        gchar                          *p, *c, *tmp, *tp;
        gint                            state = 0, l, next_state, err_state, t_state;
        gboolean                        valid_folding = FALSE;
 
-       p = task->raw_headers;
+       p = task->raw_headers_str;
        c = p;
        while (*p) {
                /* FSM for processing headers */
@@ -480,7 +480,7 @@ process_raw_headers (struct worker_task *task)
                case 0:
                        /* Begin processing headers */
                        if (!g_ascii_isalpha (*p)) {
-                               /* We have some garbadge at the beginning of headers, skip this line */
+                               /* We have some garbage at the beginning of headers, skip this line */
                                state = 100;
                                next_state = 0;
                        }
@@ -503,7 +503,7 @@ process_raw_headers (struct worker_task *task)
                                c = p;
                        }
                        else if (g_ascii_isspace (*p)) {
-                               /* Not header but some garbadge */
+                               /* Not header but some garbage */
                                state = 100;
                                next_state = 0;
                        }
@@ -554,6 +554,9 @@ process_raw_headers (struct worker_task *task)
                                next_state = 3;
                                err_state = 4;
                        }
+                       else if (*(p + 1) == '\0') {
+                               state = 4;
+                       }
                        else {
                                p ++;
                        }
@@ -593,36 +596,59 @@ process_raw_headers (struct worker_task *task)
                        }
                        *tp = '\0';
                        new->value = tmp;
-                       task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
+                       new->next = NULL;
+                       if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
+                               while (lp->next != NULL) {
+                                       lp = lp->next;
+                               }
+                               lp->next = new;
+                       }
+                       else {
+                               g_hash_table_insert (task->raw_headers, new->name, new);
+                       }
                        debug_task ("add raw header %s: %s", new->name, new->value);
                        state = 0;
                        break;
                case 5:
                        /* Header has only name, no value */
-                       task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
+                       new->next = NULL;
+                       if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
+                               while (lp->next != NULL) {
+                                       lp = lp->next;
+                               }
+                               lp->next = new;
+                       }
+                       else {
+                               g_hash_table_insert (task->raw_headers, new->name, new);
+                       }
                        state = 0;
                        debug_task ("add raw header %s: %s", new->name, new->value);
                        break;
                case 99:
                        /* Folding state */
-                       if (*p == '\r' || *p == '\n') {
-                               p ++;
-                               valid_folding = FALSE;
-                       }
-                       else if (*p == '\t' || *p == ' ') {
-                               /* Valid folding */
-                               p ++;
-                               valid_folding = TRUE;
+                       if (*(p + 1) == '\0') {
+                               state = err_state;
                        }
                        else {
-                               if (valid_folding) {
-                                       debug_task ("go to state: %d->%d", state, next_state);
-                                       state = next_state;
+                               if (*p == '\r' || *p == '\n') {
+                                       p ++;
+                                       valid_folding = FALSE;
+                               }
+                               else if (*p == '\t' || *p == ' ') {
+                                       /* Valid folding */
+                                       p ++;
+                                       valid_folding = TRUE;
                                }
                                else {
-                                       /* Fall back */
-                                       debug_task ("go to state: %d->%d", state, err_state);
-                                       state = err_state;
+                                       if (valid_folding) {
+                                               debug_task ("go to state: %d->%d", state, next_state);
+                                               state = next_state;
+                                       }
+                                       else {
+                                               /* Fall back */
+                                               debug_task ("go to state: %d->%d", state, err_state);
+                                               state = err_state;
+                                       }
                                }
                        }
                        break;
@@ -642,6 +668,9 @@ process_raw_headers (struct worker_task *task)
                                p ++;
                                state = next_state;
                        }
+                       else if (*(p + 1) == '\0') {
+                               state = next_state;
+                       }
                        else {
                                p ++;
                        }
@@ -1001,9 +1030,9 @@ process_message (struct worker_task *task)
                }
 
 #ifdef GMIME24
-               task->raw_headers = g_mime_object_get_headers (GMIME_OBJECT (task->message));
+               task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message));
 #else
-               task->raw_headers = g_mime_message_get_headers (task->message);
+               task->raw_headers_str = g_mime_message_get_headers (task->message);
 #endif
 
                process_images (task);
@@ -1021,10 +1050,9 @@ process_message (struct worker_task *task)
                        g_list_free (first);
                }
 
-               if (task->raw_headers) {
-                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers);
+               if (task->raw_headers_str) {
+                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers_str);
                        process_raw_headers (task);
-                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, task->raw_headers_list);
                }
 
                task->rcpts = g_mime_message_get_all_recipients (message);
@@ -1634,12 +1662,16 @@ message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *f
 GList*
 message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong)
 {
-       GList                               *cur, *gret = NULL;
+       GList                               *gret = NULL;
        struct raw_header                   *rh;
 
-       cur = task->raw_headers_list;
-       while (cur) {
-               rh = cur->data;
+       rh = g_hash_table_lookup (task->raw_headers, field);
+
+       if (rh == NULL) {
+               return NULL;
+       }
+
+       while (rh) {
                if (strong) {
                        if (strcmp (rh->name, field) == 0) {
                                gret = g_list_prepend (gret, rh);
@@ -1650,7 +1682,7 @@ message_get_raw_header (struct worker_task *task, const gchar *field, gboolean s
                                gret = g_list_prepend (gret, rh);
                        }
                }
-               cur = g_list_next (cur);
+               rh = rh->next;
        }
 
        if (gret != NULL) {
index 7ac598460940e1705b4ec7622e56cd47b888cc49..e70dd07e2e7f59df6fe7feab0af4d861ed5260b7 100644 (file)
@@ -51,6 +51,7 @@ struct raw_header {
        gboolean tab_separated;
        gboolean empty_separator;
        gchar *separator;
+       struct raw_header *next;
 };
 
 /**
index 24d238d8192f5880b3445db0109e5abb6cddfd06..441a17de5b80ee787582e613cd44f84654598c86 100644 (file)
@@ -1267,8 +1267,6 @@ static                          gboolean
 rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
 {
        struct expression_argument     *arg;
-       GList                          *cur;
-       struct raw_header              *rh;
 
        if (args == NULL || task == NULL) {
                return FALSE;
@@ -1280,16 +1278,7 @@ rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
                return FALSE;
        }
 
-       cur = task->raw_headers_list;
-       while (cur) {
-               rh = cur->data;
-               if (g_ascii_strcasecmp (rh->name, arg->data) == 0) {
-                       return TRUE;
-               }
-               cur = g_list_next (cur);
-       }
-
-       return FALSE;
+       return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
 }
 
 static gboolean
index cec4e455dc88abb25e2b008704cf97292fe2fc5a..6d8cb09e09330c050db6ef1184ce488409971a2d 100644 (file)
@@ -1257,6 +1257,10 @@ construct_task (struct rspamd_worker *worker)
        memory_pool_add_destructor (new_task->task_pool,
                        (pool_destruct_func) g_hash_table_destroy,
                        new_task->re_cache);
+       new_task->raw_headers = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+       memory_pool_add_destructor (new_task->task_pool,
+                               (pool_destruct_func) g_hash_table_destroy,
+                               new_task->raw_headers);
        new_task->emails = g_tree_new (compare_email_func);
        memory_pool_add_destructor (new_task->task_pool,
                                (pool_destruct_func) g_tree_destroy,