]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Adopt libmime code
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 12 Jul 2019 09:45:43 +0000 (10:45 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 12 Jul 2019 14:18:17 +0000 (15:18 +0100)
src/libmime/archives.c
src/libmime/filter.c
src/libmime/images.c
src/libmime/mime_encoding.c
src/libmime/mime_parser.c

index c19991eb6516cb1e5c255cbe64f1d7888acfd945..b1c1624a4f80f7ccd86e12e28227588768ceef3c 100644 (file)
@@ -1906,9 +1906,7 @@ rspamd_archives_process (struct rspamd_task *task)
        const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
        const guchar gz_magic[] = {0x1F, 0x8B};
 
-       for (i = 0; i < task->parts->len; i ++) {
-               part = g_ptr_array_index (task->parts, i);
-
+       PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
                if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) {
                        if (part->parsed_data.len > 0) {
                                if (rspamd_archive_cheat_detect (part, "zip",
index a040cda1d44fcc32af903208823732fafda5ff90..83a9881d65404a82cf657471ae6f8be1a3fdd3fc 100644 (file)
@@ -139,14 +139,14 @@ rspamd_add_passthrough_result (struct rspamd_task *task,
        if (!isnan (target_score)) {
 
                msg_info_task ("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)",
-                               task->message_id, action->name,
+                               MESSAGE_FIELD (task, message_id), action->name,
                                flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
                                target_score,
                                message, module, priority);
        }
        else {
                msg_info_task ("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)",
-                               task->message_id, action->name,
+                               MESSAGE_FIELD (task, message_id), action->name,
                                flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
                                message, module, priority);
        }
index 787417ab33c41a2d612b013225056a772f3b8760..cb59bc88e48c77f4f286031a0fc1702a0e295268 100644 (file)
@@ -54,9 +54,7 @@ rspamd_images_process (struct rspamd_task *task)
 
        RSPAMD_FTOK_ASSIGN (&srch, "image");
 
-       for (i = 0; i < task->parts->len; i ++) {
-               part = g_ptr_array_index (task->parts, i);
-
+       PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
                if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_ARCHIVE))) {
                        if (rspamd_ftok_cmp (&part->ct->type, &srch) == 0 &&
                                part->parsed_data.len > 0) {
@@ -603,17 +601,15 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
        struct html_image *himg;
        const gchar *cid, *html_cid;
        guint cid_len, i, j;
-       GPtrArray *ar;
        struct rspamd_image *img;
 
 
        img = rspamd_maybe_process_image (task->task_pool, &part->parsed_data);
 
        if (img != NULL) {
-               msg_debug_images ("detected %s image of size %ud x %ud in message <%s>",
+               msg_debug_images ("detected %s image of size %ud x %ud",
                        rspamd_image_type_str (img->type),
-                       img->width, img->height,
-                       task->message_id);
+                       img->width, img->height);
 
                if (part->cd) {
                        img->filename = &part->cd->filename;
@@ -625,11 +621,10 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
                part->specific.img = img;
 
                /* Check Content-Id */
-               ar = rspamd_message_get_header_from_hash (part->raw_headers,
-                               task->task_pool, "Content-Id", FALSE);
+               rh = rspamd_message_get_header_from_hash (part->raw_headers,
+                               "Content-Id");
 
-               if (ar != NULL && ar->len > 0) {
-                       rh = g_ptr_array_index (ar, 0);
+               if (rh) {
                        cid = rh->decoded;
 
                        if (*cid == '<') {
@@ -643,9 +638,8 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
                                        cid_len --;
                                }
 
-                               for (i = 0; i < task->text_parts->len; i ++) {
-                                       tp = g_ptr_array_index (task->text_parts, i);
 
+                               PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
                                        if (IS_PART_HTML (tp) && tp->html != NULL &&
                                                        tp->html->images != NULL) {
                                                for (j = 0; j < tp->html->images->len; j ++) {
index 8dc7da12e189c3554e6b0468e28b85d426890f6a..4622ee0321ebe080176b945e73cee246ef131652 100644 (file)
@@ -667,7 +667,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
        }
 
        if (charset == NULL) {
-               msg_info_task ("<%s>: has invalid charset", task->message_id);
+               msg_info_task ("<%s>: has invalid charset",
+                               MESSAGE_FIELD (task, message_id));
                SET_PART_RAW (text_part);
                text_part->utf_raw_content = part_content;
 
@@ -690,7 +691,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                if (!rspamd_mime_text_part_utf8_convert (task, text_part,
                                part_content, charset, &err)) {
                        msg_warn_task ("<%s>: cannot convert from %s to utf8: %s",
-                                       task->message_id,
+                                       MESSAGE_FIELD (task, message_id),
                                        charset,
                                        err ? err->message : "unknown problem");
                        SET_PART_RAW (text_part);
index 6572f4e887ef243b315490d44577593e82e0a5bc..9fe9e7b1f64535ae046f9151fde8e20d8558d274 100644 (file)
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+
 #include "config.h"
 #include "task.h"
 #include "mime_parser.h"
@@ -21,6 +22,7 @@
 #include "message.h"
 #include "multipattern.h"
 #include "contrib/libottery/ottery.h"
+#include "contrib/uthash/utlist.h"
 
 struct rspamd_mime_parser_lib_ctx {
        struct rspamd_multipattern *mp_boundary;
@@ -256,21 +258,17 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task,
 
 static void
 rspamd_mime_part_get_cte (struct rspamd_task *task,
-               GHashTable *hdrs,
-               struct rspamd_mime_part *part,
-               gboolean apply_heuristic)
+                                                 khash_t(rspamd_mime_headers_htb) *hdrs,
+                                                 struct rspamd_mime_part *part,
+                                                 gboolean apply_heuristic)
 {
-       struct rspamd_mime_header *hdr;
+       struct rspamd_mime_header *hdr, *cur;
        guint i;
-       GPtrArray *hdrs_cte;
        enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN;
 
-       hdrs_cte = rspamd_message_get_header_from_hash (hdrs,
-                       task->task_pool,
-                       "Content-Transfer-Encoding", FALSE);
-
-       if (hdrs_cte == NULL) {
+       hdr = rspamd_message_get_header_from_hash (hdrs, "Content-Transfer-Encoding");
 
+       if (hdr == NULL) {
                if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN &&
                                !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) {
                        part->cte = part->parent_part->cte;
@@ -287,12 +285,11 @@ rspamd_mime_part_get_cte (struct rspamd_task *task,
                part->flags |= RSPAMD_MIME_PART_MISSING_CTE;
        }
        else {
-               for (i = 0; i < hdrs_cte->len; i ++) {
+               DL_FOREACH (hdr, cur) {
                        gsize hlen;
                        gchar lc_buf[128];
 
-                       hdr = g_ptr_array_index (hdrs_cte, i);
-                       hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", hdr->value);
+                       hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", cur->value);
                        rspamd_str_lc (lc_buf, hlen);
                        cte = rspamd_mime_parse_cte (lc_buf, hlen);
 
@@ -337,19 +334,17 @@ check_cte:
 static void
 rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part)
 {
-       struct rspamd_mime_header *hdr;
+       struct rspamd_mime_header *hdr, *cur;
        guint i;
-       GPtrArray *hdrs;
        struct rspamd_content_disposition *cd = NULL;
        rspamd_ftok_t srch;
        struct rspamd_content_type_param *found;
 
-       hdrs = rspamd_message_get_header_from_hash (part->raw_headers,
-                       task->task_pool,
-                       "Content-Disposition", FALSE);
+       hdr = rspamd_message_get_header_from_hash (part->raw_headers,
+                       "Content-Disposition");
 
 
-       if (hdrs == NULL) {
+       if (hdr == NULL) {
                cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd));
                cd->type = RSPAMD_CT_INLINE;
 
@@ -370,15 +365,13 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part
                }
        }
        else {
-               for (i = 0; i < hdrs->len; i ++) {
+               DL_FOREACH (hdr, cur) {
                        gsize hlen;
-
-                       hdr = g_ptr_array_index (hdrs, i);
                        cd = NULL;
 
-                       if (hdr->decoded) {
-                               hlen = strlen (hdr->decoded);
-                               cd = rspamd_content_disposition_parse (hdr->decoded, hlen,
+                       if (cur->decoded) {
+                               hlen = strlen (cur->decoded);
+                               cd = rspamd_content_disposition_parse (cur->decoded, hlen,
                                                task->task_pool);
                        }
 
@@ -517,8 +510,8 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
                g_assert_not_reached ();
        }
 
-       part->id = task->parts->len;
-       g_ptr_array_add (task->parts, part);
+       part->id = MESSAGE_FIELD (task, parts)->len;
+       g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
        msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
                        &part->ct->type, &part->ct->subtype, part->parsed_data.len,
                        part->raw_data.len, rspamd_cte_to_string (part->cte));
@@ -546,12 +539,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
                GError **err)
 {
        struct rspamd_content_type *ct, *sel = NULL;
-       struct rspamd_mime_header *hdr;
-       GPtrArray *hdrs = NULL;
+       struct rspamd_mime_header *hdr = NULL, *cur;
        struct rspamd_mime_part *npart;
        GString str;
        goffset hdr_pos, body_pos;
-       guint i;
        enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL;
 
 
@@ -592,9 +583,8 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
        npart = rspamd_mempool_alloc0 (task->task_pool,
                        sizeof (struct rspamd_mime_part));
        npart->parent_part = multipart;
-       npart->raw_headers =  g_hash_table_new_full (rspamd_strcase_hash,
-                       rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
-       npart->headers_order = g_queue_new ();
+       npart->raw_headers =  rspamd_message_headers_new ();
+       npart->headers_order = NULL;
 
        if (multipart) {
                if (multipart->specific.mp->children == NULL) {
@@ -612,15 +602,14 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 
                if (npart->raw_headers_len > 0) {
                        rspamd_mime_headers_process (task, npart->raw_headers,
-                                       npart->headers_order,
+                                       &npart->headers_order,
                                        npart->raw_headers_str,
                                        npart->raw_headers_len,
                                        FALSE);
                }
 
-               hdrs = rspamd_message_get_header_from_hash (npart->raw_headers,
-                               task->task_pool,
-                               "Content-Type", FALSE);
+               hdr = rspamd_message_get_header_from_hash (npart->raw_headers,
+                               "Content-Type");
 
        }
        else {
@@ -631,11 +620,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
        }
 
 
-       if (hdrs != NULL) {
+       if (hdr != NULL) {
 
-               for (i = 0; i < hdrs->len; i ++) {
-                       hdr = g_ptr_array_index (hdrs, i);
-                       ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded),
+               DL_FOREACH (hdr, cur) {
+                       ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded),
                                        task->task_pool);
 
                        /* Here we prefer multipart content-type or any content-type */
@@ -848,8 +836,8 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
                return RSPAMD_MIME_PARSE_NESTING;
        }
 
-       part->id = task->parts->len;
-       g_ptr_array_add (task->parts, part);
+       part->id = MESSAGE_FIELD (task, parts)->len;
+       g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
        st->nesting ++;
        rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE);
 
@@ -1098,8 +1086,7 @@ rspamd_mime_parse_message (struct rspamd_task *task,
                GError **err)
 {
        struct rspamd_content_type *ct, *sel = NULL;
-       struct rspamd_mime_header *hdr;
-       GPtrArray *hdrs = NULL;
+       struct rspamd_mime_header *hdr = NULL, *cur;
        const gchar *pbegin, *p;
        gsize plen, len;
        struct rspamd_mime_part *npart;
@@ -1159,42 +1146,45 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 
                if (hdr_pos > 0 && hdr_pos < str.len) {
 
-                       task->raw_headers_content.begin = str.str;
-                       task->raw_headers_content.len = hdr_pos;
-                       task->raw_headers_content.body_start = str.str + body_pos;
+                       MESSAGE_FIELD (task, raw_headers_content).begin = str.str;
+                       MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos;
+                       MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + body_pos;
 
-                       if (task->raw_headers_content.len > 0) {
-                               rspamd_mime_headers_process (task, task->raw_headers,
-                                               task->headers_order,
-                                               task->raw_headers_content.begin,
-                                               task->raw_headers_content.len,
+                       if (MESSAGE_FIELD (task, raw_headers_content).len > 0) {
+                               rspamd_mime_headers_process (task,
+                                               MESSAGE_FIELD (task, raw_headers),
+                                               &MESSAGE_FIELD (task, headers_order),
+                                               MESSAGE_FIELD (task, raw_headers_content).begin,
+                                               MESSAGE_FIELD (task, raw_headers_content).len,
                                                TRUE);
                        }
 
-                       hdrs = rspamd_message_get_header_from_hash (task->raw_headers,
-                                       task->task_pool,
-                                       "Content-Type", FALSE);
+                       hdr = rspamd_message_get_header_from_hash (
+                                       MESSAGE_FIELD (task, raw_headers),
+                                       "Content-Type");
                }
                else {
                        /* First apply heuristic, maybe we have just headers */
                        hdr_pos = rspamd_mime_parser_headers_heuristic (&str, &body_pos);
 
                        if (hdr_pos > 0 && hdr_pos <= str.len) {
-                               task->raw_headers_content.begin = str.str;
-                               task->raw_headers_content.len = hdr_pos;
-                               task->raw_headers_content.body_start = str.str + body_pos;
-
-                               if (task->raw_headers_content.len > 0) {
-                                       rspamd_mime_headers_process (task, task->raw_headers,
-                                                       task->headers_order,
-                                                       task->raw_headers_content.begin,
-                                                       task->raw_headers_content.len,
+                               MESSAGE_FIELD (task, raw_headers_content).begin = str.str;
+                               MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos;
+                               MESSAGE_FIELD (task, raw_headers_content).body_start = str.str +
+                                               body_pos;
+
+                               if (MESSAGE_FIELD (task, raw_headers_content).len > 0) {
+                                       rspamd_mime_headers_process (task,
+                                                       MESSAGE_FIELD (task, raw_headers),
+                                                       &MESSAGE_FIELD (task, headers_order),
+                                                       MESSAGE_FIELD (task, raw_headers_content).begin,
+                                                       MESSAGE_FIELD (task, raw_headers_content).len,
                                                        TRUE);
                                }
 
-                               hdrs = rspamd_message_get_header_from_hash (task->raw_headers,
-                                               task->task_pool,
-                                               "Content-Type", FALSE);
+                               hdr = rspamd_message_get_header_from_hash (
+                                               MESSAGE_FIELD (task, raw_headers),
+                                               "Content-Type");
                                task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
                        }
                        else {
@@ -1204,7 +1194,8 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 
                pbegin = st->start + body_pos;
                plen = st->end - pbegin;
-               npart->raw_headers = g_hash_table_ref (task->raw_headers);
+               /* TODO: check if it is correct */
+               npart->raw_headers = NULL;
                npart->headers_order = NULL;
        }
        else {
@@ -1227,9 +1218,8 @@ rspamd_mime_parse_message (struct rspamd_task *task,
                str.len = part->parsed_data.len;
 
                hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
-               npart->raw_headers =  g_hash_table_new_full (rspamd_strcase_hash,
-                               rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
-               npart->headers_order = g_queue_new ();
+               npart->raw_headers =  rspamd_message_headers_new ();
+               npart->headers_order = NULL;
 
                if (hdr_pos > 0 && hdr_pos < str.len) {
                        npart->raw_headers_str = str.str;
@@ -1237,16 +1227,16 @@ rspamd_mime_parse_message (struct rspamd_task *task,
                        npart->raw_data.begin = str.str + body_pos;
 
                        if (npart->raw_headers_len > 0) {
-                               rspamd_mime_headers_process (task, npart->raw_headers,
-                                               npart->headers_order,
+                               rspamd_mime_headers_process (task,
+                                               npart->raw_headers,
+                                               &npart->headers_order,
                                                npart->raw_headers_str,
                                                npart->raw_headers_len,
                                                FALSE);
                        }
 
-                       hdrs = rspamd_message_get_header_from_hash (npart->raw_headers,
-                                       task->task_pool,
-                                       "Content-Type", FALSE);
+                       hdr = rspamd_message_get_header_from_hash (npart->raw_headers,
+                                       "Content-Type");
                }
                else {
                        body_pos = 0;
@@ -1260,13 +1250,12 @@ rspamd_mime_parse_message (struct rspamd_task *task,
        npart->raw_data.len = plen;
        npart->parent_part = part;
 
-       if (hdrs == NULL) {
+       if (hdr == NULL) {
                sel = NULL;
        }
        else {
-               for (i = 0; i < hdrs->len; i ++) {
-                       hdr = g_ptr_array_index (hdrs, i);
-                       ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded),
+               DL_FOREACH (hdr, cur) {
+                       ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded),
                                        task->task_pool);
 
                        /* Here we prefer multipart content-type or any content-type */
@@ -1408,7 +1397,7 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err)
 
        st = g_malloc0 (sizeof (*st));
        st->stack = g_ptr_array_sized_new (4);
-       st->pos = task->raw_headers_content.body_start;
+       st->pos = MESSAGE_FIELD (task, raw_headers_content).body_start;
        st->end = task->msg.begin + task->msg.len;
        st->boundaries = g_array_sized_new (FALSE, FALSE,
                        sizeof (struct rspamd_mime_boundary), 8);