]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Start mime structures refactoring
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 10 Jul 2019 14:01:41 +0000 (15:01 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 12 Jul 2019 14:18:17 +0000 (15:18 +0100)
CMakeLists.txt
src/libmime/message.c
src/libmime/message.h
src/libmime/mime_headers.c
src/libmime/mime_headers.h
src/libserver/dkim.c
src/libserver/protocol.c
src/libserver/task.c
src/libserver/task.h

index 06cc9fd303d68bad275ddd74094c926e74b4a607..88aea0b62f318ef40989f648b1f1692efcafa620 100644 (file)
@@ -771,6 +771,7 @@ IF(NOT CMAKE_C_OPT_FLAGS)
        ENDIF()
        ELSE(ENABLE_OPTIMIZATION MATCHES "ON")
                IF(ENABLE_FULL_DEBUG MATCHES "ON")
+                       ADD_DEFINITIONS(-DFULL_DEBUG)
                        SET(CMAKE_C_OPT_FLAGS "-g -O0")
                ELSE(ENABLE_FULL_DEBUG MATCHES "ON")
                        SET(CMAKE_C_OPT_FLAGS "-g -O2")
index 482287769fbb34ba06694e8f372f6d5c0f970bcc..1d9da26f21a55bd6fe4772bdd8fe144daafb4985 100644 (file)
@@ -1091,6 +1091,22 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
        task->queue_id = mid;
 }
 
+static void
+rspamd_message_dtor (struct rspamd_message *msg)
+{
+
+}
+
+struct rspamd_message*
+rspamd_message_new (struct rspamd_task *task)
+{
+       struct rspamd_message *msg;
+
+       msg = rspamd_mempool_alloc0 (sizeof (*msg));
+
+
+}
+
 gboolean
 rspamd_message_parse (struct rspamd_task *task)
 {
@@ -1593,3 +1609,14 @@ rspamd_message_get_mime_header_array (struct rspamd_task *task,
 
        return ret;
 }
+
+struct rspamd_message *
+rspamd_message_ref (struct rspamd_message *msg)
+{
+       REF_RETAIN (msg);
+}
+
+void rspamd_message_unref (struct rspamd_message *msg)
+{
+       REF_RELEASE (msg);s
+}
index 17c4ec5b98a9c7ffe6bcdfd8aa6d1a807231e342..7d58fa88f95805f1ab6fccf0a593b23e9042fcca 100644 (file)
@@ -12,6 +12,8 @@
 #include "cryptobox.h"
 #include "mime_headers.h"
 #include "content_type.h"
+#include "libutil/ref.h"
+#include "libutil/str_util.h"
 
 #include <unicode/uchar.h>
 #include <unicode/utext.h>
@@ -129,6 +131,36 @@ struct rspamd_mime_text_part {
        guint unicode_scripts;
 };
 
+struct rspamd_message {
+       const gchar *message_id;
+       gchar *subject;
+
+       GPtrArray *parts;                               /**< list of parsed parts                                                       */
+       GPtrArray *text_parts;                  /**< list of text parts                                                         */
+       struct {
+               const gchar *begin;
+               gsize len;
+               const gchar *body_start;
+       } raw_headers_content;                  /**< list of raw headers                                                        */
+       GPtrArray *received;                    /**< list of received headers                                           */
+       GHashTable *urls;                               /**< list of parsed urls                                                        */
+       GHashTable *emails;                             /**< list of parsed emails                                                      */
+       GHashTable *raw_headers;                /**< list of raw headers                                                        */
+       GQueue *headers_order;                  /**< order of raw headers                                                       */
+       GPtrArray *rcpt_mime;
+       GPtrArray *from_mime;
+       enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers       */
+       ref_entry_t ref;
+};
+
+#ifndef FULL_DEBUG
+#define MESSAGE_FIELD(task, field) ((task)->message->(field))
+#else
+#define MESSAGE_FIELD(task, field) do { \
+       if (!task->message) {msg_err_task("no message when getting field %s", #field); g_assert(0);} \
+       } while(0), ((task)->message->(field))
+#endif
+
 /**
  * Parse and pre-process mime message
  * @param task worker_task object
@@ -191,6 +223,12 @@ enum rspamd_cte rspamd_cte_from_string (const gchar *str);
  */
 const gchar *rspamd_cte_to_string (enum rspamd_cte ct);
 
+struct rspamd_message* rspamd_message_new (struct rspamd_task *task);
+
+struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg);
+
+void rspamd_message_unref (struct rspamd_message *msg);
+
 #ifdef  __cplusplus
 }
 #endif
index cf6d0f763c5044899809b72527884d9a667c72a4..952a163b974508162d26c5c5ce14b7ab2222e3d2 100644 (file)
@@ -44,31 +44,31 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
                        g_ptr_array_add (task->received, recv);
                }
 
-               rh->type = RSPAMD_HEADER_RECEIVED;
+               rh->flags = RSPAMD_HEADER_RECEIVED;
                break;
        case 0x76F31A09F4352521ULL:     /* to */
                task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
                                rh->decoded, strlen (rh->decoded), task->rcpt_mime);
-               rh->type = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
                break;
        case 0x7EB117C1480B76ULL:       /* cc */
                task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
                                rh->decoded, strlen (rh->decoded), task->rcpt_mime);
-               rh->type = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
                break;
        case 0xE4923E11C4989C8DULL:     /* bcc */
                task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
                                rh->decoded, strlen (rh->decoded), task->rcpt_mime);
-               rh->type = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
                break;
        case 0x41E1985EDC1CBDE4ULL:     /* from */
                task->from_mime = rspamd_email_address_from_mime (task->task_pool,
                                rh->decoded, strlen (rh->decoded), task->from_mime);
-               rh->type = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
                break;
        case 0x43A558FC7C240226ULL:     /* message-id */ {
 
-               rh->type = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
                p = rh->decoded;
                end = p + strlen (p);
 
@@ -107,20 +107,20 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
                if (task->subject == NULL) {
                        task->subject = rh->decoded;
                }
-               rh->type = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
                break;
        case 0xEE4AA2EAAC61D6F4ULL:     /* return-path */
                if (task->from_envelope == NULL) {
                        task->from_envelope = rspamd_email_address_from_smtp (rh->decoded,
                                        strlen (rh->decoded));
                }
-               rh->type = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
                break;
        case 0xB9EEFAD2E93C2161ULL:     /* delivered-to */
                if (task->deliver_to == NULL) {
                        task->deliver_to = rh->decoded;
                }
-               rh->type = RSPAMD_HEADER_DELIVERED_TO;
+               rh->flags = RSPAMD_HEADER_DELIVERED_TO;
                break;
        case 0x2EC3BFF3C393FC10ULL: /* date */
        case 0xAC0DDB1A1D214CAULL: /* sender */
@@ -128,7 +128,7 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
        case 0x81CD9E9131AB6A9AULL: /* content-type */
        case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
        case 0xB3F6704CB3AD6589ULL: /* references */
-               rh->type = RSPAMD_HEADER_UNIQUE;
+               rh->flags = RSPAMD_HEADER_UNIQUE;
                break;
        }
 }
@@ -472,7 +472,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
                while (cur) {
                        nh = cur->data;
 
-                       if (nh->name && nh->type != RSPAMD_HEADER_RECEIVED) {
+                       if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
                                rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name));
                        }
 
index 5cb300978cf38dc4f7869355bfa47e29d6d252e5..60fd7b6973a3d9607017e5328d3b582835360403 100644 (file)
@@ -31,20 +31,22 @@ enum rspamd_rfc2047_encoding {
        RSPAMD_RFC2047_BASE64,
 };
 
-enum rspamd_mime_header_special_type {
-       RSPAMD_HEADER_GENERIC = 0,
-       RSPAMD_HEADER_RECEIVED = 1 << 0,
-       RSPAMD_HEADER_TO = 1 << 2,
-       RSPAMD_HEADER_CC = 1 << 3,
-       RSPAMD_HEADER_BCC = 1 << 4,
-       RSPAMD_HEADER_FROM = 1 << 5,
-       RSPAMD_HEADER_MESSAGE_ID = 1 << 6,
-       RSPAMD_HEADER_SUBJECT = 1 << 7,
-       RSPAMD_HEADER_RETURN_PATH = 1 << 8,
-       RSPAMD_HEADER_DELIVERED_TO = 1 << 9,
-       RSPAMD_HEADER_SENDER = 1 << 10,
-       RSPAMD_HEADER_RCPT = 1 << 11,
-       RSPAMD_HEADER_UNIQUE = 1 << 12
+enum rspamd_mime_header_flags {
+       RSPAMD_HEADER_GENERIC = 0u,
+       RSPAMD_HEADER_RECEIVED = 1u << 0u,
+       RSPAMD_HEADER_TO = 1u << 2u,
+       RSPAMD_HEADER_CC = 1u << 3u,
+       RSPAMD_HEADER_BCC = 1u << 4u,
+       RSPAMD_HEADER_FROM = 1u << 5u,
+       RSPAMD_HEADER_MESSAGE_ID = 1u << 6u,
+       RSPAMD_HEADER_SUBJECT = 1u << 7u,
+       RSPAMD_HEADER_RETURN_PATH = 1u << 8u,
+       RSPAMD_HEADER_DELIVERED_TO = 1u << 9u,
+       RSPAMD_HEADER_SENDER = 1u << 10u,
+       RSPAMD_HEADER_RCPT = 1u << 11u,
+       RSPAMD_HEADER_UNIQUE = 1u << 12u,
+       RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u,
+       RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u,
 };
 
 struct rspamd_mime_header {
@@ -52,32 +54,31 @@ struct rspamd_mime_header {
        gchar *value;
        const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
        gsize raw_len;
-       gboolean tab_separated;
-       gboolean empty_separator;
        guint order;
-       enum rspamd_mime_header_special_type type;
+       int flags; /* see enum rspamd_mime_header_flags */
        gchar *separator;
        gchar *decoded;
+       struct rspamd_mime_header *prev, *next; /* Headers with the same name */
+       struct rspamd_mime_header *ord_prev, *ord_next; /* Overall order of headers */
 };
 
 enum rspamd_received_type {
        RSPAMD_RECEIVED_SMTP = 0,
-       RSPAMD_RECEIVED_ESMTP,
-       RSPAMD_RECEIVED_ESMTPA,
-       RSPAMD_RECEIVED_ESMTPS,
-       RSPAMD_RECEIVED_ESMTPSA,
-       RSPAMD_RECEIVED_LMTP,
-       RSPAMD_RECEIVED_IMAP,
-       RSPAMD_RECEIVED_LOCAL,
-       RSPAMD_RECEIVED_HTTP,
-       RSPAMD_RECEIVED_MAPI,
-       RSPAMD_RECEIVED_UNKNOWN
+       RSPAMD_RECEIVED_ESMTP = 1u << 0u,
+       RSPAMD_RECEIVED_ESMTPA = 1u << 1u,
+       RSPAMD_RECEIVED_ESMTPS = 1u << 2u,
+       RSPAMD_RECEIVED_ESMTPSA = 1u << 3u,
+       RSPAMD_RECEIVED_LMTP = 1u << 4u,
+       RSPAMD_RECEIVED_IMAP = 1u << 5u,
+       RSPAMD_RECEIVED_LOCAL = 1u << 6u,
+       RSPAMD_RECEIVED_HTTP = 1u << 7u,
+       RSPAMD_RECEIVED_MAPI = 1u << 8u,
+       RSPAMD_RECEIVED_UNKNOWN = 1u << 9u,
+       RSPAMD_RECEIVED_FLAG_ARTIFICIAL =  (1u << 10u),
+       RSPAMD_RECEIVED_FLAG_SSL =  (1u << 11u),
+       RSPAMD_RECEIVED_FLAG_AUTHENTICATED =  (1u << 12u),
 };
 
-#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0)
-#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1)
-#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
-
 struct received_header {
        const gchar *from_hostname;
        const gchar *from_ip;
@@ -88,8 +89,8 @@ struct received_header {
        rspamd_inet_addr_t *addr;
        struct rspamd_mime_header *hdr;
        time_t timestamp;
-       enum rspamd_received_type type;
-       gint flags;
+       gint flags; /* See enum rspamd_received_type */
+       struct received_header *prev, *next;
 };
 
 /**
@@ -100,8 +101,9 @@ struct received_header {
  * @param len
  * @param check_newlines
  */
-void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
-                                                                 GQueue *order,
+void rspamd_mime_headers_process (struct rspamd_task *task,
+                                                                 GHashTable *target,
+                                                                 struct rspamd_mime_header **order_ptr,
                                                                  const gchar *in, gsize len,
                                                                  gboolean check_newlines);
 
index 9386c5cdcf70f15a5619cd2e1d12e0b7302007b1..c0ea29f08ebadbe3487de304a9197e4fe97819cd 100644 (file)
@@ -2175,7 +2175,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx,
                if (ar) {
                        /* Check uniqueness of the header */
                        rh = g_ptr_array_index (ar, 0);
-                       if ((rh->type & RSPAMD_HEADER_UNIQUE) && ar->len > 1) {
+                       if ((rh->flags & RSPAMD_HEADER_UNIQUE) && ar->len > 1) {
                                guint64 random_cookie = ottery_rand_uint64 ();
 
                                msg_warn_dkim ("header %s is intended to be unique by"
@@ -2210,7 +2210,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx,
                                                (gint)rh->raw_len, rh->raw_value);
                        }
                        else {
-                               if (ctx->is_sign && (rh->type & RSPAMD_HEADER_FROM)) {
+                               if (ctx->is_sign && (rh->flags & RSPAMD_HEADER_FROM)) {
                                        /* Special handling of the From handling when rewrite is done */
                                        gboolean has_rewrite = FALSE;
                                        guint i;
index bef7a0452afdfbada5f1a02d7a3aa5b00d067c27..ddd072882b8fde6f7701d3e2a16e315c00af3ac8 100644 (file)
@@ -1409,6 +1409,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
        gint flags = RSPAMD_PROTOCOL_DEFAULT;
        struct rspamd_action *action;
 
+#if 0
        /* Write custom headers */
        g_hash_table_iter_init (&hiter, task->reply_headers);
        while (g_hash_table_iter_next (&hiter, &h, &v)) {
@@ -1416,6 +1417,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
 
                rspamd_http_message_add_header (msg, hn->begin, hv->begin);
        }
+#endif
 
        flags |= RSPAMD_PROTOCOL_URLS;
 
index 04be617448f58d401a54b3d00784cb6caadcd033..88ee730a337411baa861f3cd06e70aad365a54c6 100644 (file)
@@ -129,15 +129,9 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
        new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
                        rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
                        rspamd_request_header_dtor);
-       rspamd_mempool_add_destructor (new_task->task_pool,
-               (rspamd_mempool_destruct_t) g_hash_table_unref,
-               new_task->request_headers);
-       new_task->reply_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
-                       rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
-                       rspamd_fstring_mapped_ftok_free);
        rspamd_mempool_add_destructor (new_task->task_pool,
                        (rspamd_mempool_destruct_t) g_hash_table_unref,
-                       new_task->reply_headers);
+                       new_task->request_headers);
        rspamd_mempool_add_destructor (new_task->task_pool,
                        (rspamd_mempool_destruct_t) g_hash_table_unref,
                        new_task->raw_headers);
index ac55dd9105eca742cb9f328d438c930f95b1a4b5..00caf3ab68fba3f7da312c62ef9e5e17f0f37990 100644 (file)
@@ -134,6 +134,7 @@ enum rspamd_task_stage {
 struct rspamd_email_address;
 struct rspamd_lang_detector;
 enum rspamd_newlines_type;
+struct rspamd_message;
 
 /**
  * Worker task structure
@@ -147,15 +148,12 @@ struct rspamd_task {
        gulong message_len;                                /**< Message length                                                                  */
        gchar *helo;                                    /**< helo header value                                                          */
        gchar *queue_id;                                /**< queue id if specified                                                      */
-       const gchar *message_id;                        /**< message id                                                                         */
        rspamd_inet_addr_t *from_addr;                    /**< from addr for a task                                                     */
        rspamd_inet_addr_t *client_addr;                /**< address of connected socket                                        */
        gchar *deliver_to;                                /**< address to deliver                                                               */
        gchar *user;                                    /**< user to deliver                                                            */
-       gchar *subject;                                    /**< subject (for non-mime)                                                  */
        const gchar *hostname;                            /**< hostname reported by MTA                                         */
        GHashTable *request_headers;                    /**< HTTP headers in a request                                          */
-       GHashTable *reply_headers;                        /**< Custom reply headers                                                     */
        struct {
                const gchar *begin;
                gsize len;
@@ -163,29 +161,14 @@ struct rspamd_task {
        } msg;                                            /**< message buffer                                                                   */
        struct rspamd_http_connection *http_conn;        /**< HTTP server connection                                                    */
        struct rspamd_async_session *s;                /**< async session object                                                        */
-       GPtrArray *parts;                                /**< list of parsed parts                                                      */
-       GPtrArray *text_parts;                            /**< list of text parts                                                               */
-       struct {
-               const gchar *begin;
-               gsize len;
-               const gchar *body_start;
-       } raw_headers_content;                /**< list of raw headers                                                  */
-       GPtrArray *received;                            /**< list of received headers                                           */
-       GHashTable *urls;                                /**< list of parsed urls                                                       */
-       GHashTable *emails;                                /**< list of parsed emails                                                   */
-       GHashTable *raw_headers;                        /**< list of raw headers                                                        */
-       GQueue *headers_order;                            /**< order of raw headers                                                     */
        struct rspamd_metric_result *result;            /**< Metric result                                                                      */
        GHashTable *lua_cache;                            /**< cache of lua objects                                                     */
        GPtrArray *tokens;                                /**< statistics tokens */
        GArray *meta_words;                                /**< rspamd_stat_token_t produced from meta headers
                                                                                                                (e.g. Subject) */
 
-       GPtrArray *rcpt_mime;
        GPtrArray *rcpt_envelope;                        /**< array of rspamd_email_address                                     */
-       GPtrArray *from_mime;
        struct rspamd_email_address *from_envelope;
-       enum rspamd_newlines_type nlines_type;            /**< type of newlines (detected on most of headers    */
 
        ucl_object_t *messages;                            /**< list of messages that would be reported         */
        struct rspamd_re_runtime *re_rt;                /**< regexp runtime                                                                     */
@@ -215,6 +198,7 @@ struct rspamd_task {
 
        const gchar *classifier;                        /**< Classifier to learn (if needed)                            */
        struct rspamd_lang_detector *lang_det;            /**< Languages detector                                                               */
+       struct rspamd_message *message;
        guchar digest[16];
 };
 
@@ -252,7 +236,8 @@ gboolean rspamd_task_fin (void *arg);
  * @return
  */
 gboolean rspamd_task_load_message (struct rspamd_task *task,
-                                                                  struct rspamd_http_message *msg, const gchar *start, gsize len);
+                                                                  struct rspamd_http_message *msg,
+                                                                  const gchar *start, gsize len);
 
 /**
  * Process task