From: Vsevolod Stakhov Date: Fri, 12 Jul 2019 09:45:43 +0000 (+0100) Subject: [Project] Adopt libmime code X-Git-Tag: 2.0~603^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ed3e23421f58da0cdf9938cbe3c72de95752c80f;p=thirdparty%2Frspamd.git [Project] Adopt libmime code --- diff --git a/src/libmime/archives.c b/src/libmime/archives.c index c19991eb65..b1c1624a4f 100644 --- a/src/libmime/archives.c +++ b/src/libmime/archives.c @@ -1906,9 +1906,7 @@ rspamd_archives_process (struct rspamd_task *task) const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; const guchar gz_magic[] = {0x1F, 0x8B}; - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) { if (part->parsed_data.len > 0) { if (rspamd_archive_cheat_detect (part, "zip", diff --git a/src/libmime/filter.c b/src/libmime/filter.c index a040cda1d4..83a9881d65 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -139,14 +139,14 @@ rspamd_add_passthrough_result (struct rspamd_task *task, if (!isnan (target_score)) { msg_info_task ("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)", - task->message_id, action->name, + MESSAGE_FIELD (task, message_id), action->name, flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", target_score, message, module, priority); } else { msg_info_task ("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)", - task->message_id, action->name, + MESSAGE_FIELD (task, message_id), action->name, flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", message, module, priority); } diff --git a/src/libmime/images.c b/src/libmime/images.c index 787417ab33..cb59bc88e4 100644 --- a/src/libmime/images.c +++ b/src/libmime/images.c @@ -54,9 +54,7 @@ rspamd_images_process (struct rspamd_task *task) RSPAMD_FTOK_ASSIGN (&srch, "image"); - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_ARCHIVE))) { if (rspamd_ftok_cmp (&part->ct->type, &srch) == 0 && part->parsed_data.len > 0) { @@ -603,17 +601,15 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) struct html_image *himg; const gchar *cid, *html_cid; guint cid_len, i, j; - GPtrArray *ar; struct rspamd_image *img; img = rspamd_maybe_process_image (task->task_pool, &part->parsed_data); if (img != NULL) { - msg_debug_images ("detected %s image of size %ud x %ud in message <%s>", + msg_debug_images ("detected %s image of size %ud x %ud", rspamd_image_type_str (img->type), - img->width, img->height, - task->message_id); + img->width, img->height); if (part->cd) { img->filename = &part->cd->filename; @@ -625,11 +621,10 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) part->specific.img = img; /* Check Content-Id */ - ar = rspamd_message_get_header_from_hash (part->raw_headers, - task->task_pool, "Content-Id", FALSE); + rh = rspamd_message_get_header_from_hash (part->raw_headers, + "Content-Id"); - if (ar != NULL && ar->len > 0) { - rh = g_ptr_array_index (ar, 0); + if (rh) { cid = rh->decoded; if (*cid == '<') { @@ -643,9 +638,8 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) cid_len --; } - for (i = 0; i < task->text_parts->len; i ++) { - tp = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { if (IS_PART_HTML (tp) && tp->html != NULL && tp->html->images != NULL) { for (j = 0; j < tp->html->images->len; j ++) { diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 8dc7da12e1..4622ee0321 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -667,7 +667,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, } if (charset == NULL) { - msg_info_task ("<%s>: has invalid charset", task->message_id); + msg_info_task ("<%s>: has invalid charset", + MESSAGE_FIELD (task, message_id)); SET_PART_RAW (text_part); text_part->utf_raw_content = part_content; @@ -690,7 +691,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, if (!rspamd_mime_text_part_utf8_convert (task, text_part, part_content, charset, &err)) { msg_warn_task ("<%s>: cannot convert from %s to utf8: %s", - task->message_id, + MESSAGE_FIELD (task, message_id), charset, err ? err->message : "unknown problem"); SET_PART_RAW (text_part); diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 6572f4e887..9fe9e7b1f6 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -14,6 +14,7 @@ * limitations under the License. */ + #include "config.h" #include "task.h" #include "mime_parser.h" @@ -21,6 +22,7 @@ #include "message.h" #include "multipattern.h" #include "contrib/libottery/ottery.h" +#include "contrib/uthash/utlist.h" struct rspamd_mime_parser_lib_ctx { struct rspamd_multipattern *mp_boundary; @@ -256,21 +258,17 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, static void rspamd_mime_part_get_cte (struct rspamd_task *task, - GHashTable *hdrs, - struct rspamd_mime_part *part, - gboolean apply_heuristic) + khash_t(rspamd_mime_headers_htb) *hdrs, + struct rspamd_mime_part *part, + gboolean apply_heuristic) { - struct rspamd_mime_header *hdr; + struct rspamd_mime_header *hdr, *cur; guint i; - GPtrArray *hdrs_cte; enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN; - hdrs_cte = rspamd_message_get_header_from_hash (hdrs, - task->task_pool, - "Content-Transfer-Encoding", FALSE); - - if (hdrs_cte == NULL) { + hdr = rspamd_message_get_header_from_hash (hdrs, "Content-Transfer-Encoding"); + if (hdr == NULL) { if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN && !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) { part->cte = part->parent_part->cte; @@ -287,12 +285,11 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, part->flags |= RSPAMD_MIME_PART_MISSING_CTE; } else { - for (i = 0; i < hdrs_cte->len; i ++) { + DL_FOREACH (hdr, cur) { gsize hlen; gchar lc_buf[128]; - hdr = g_ptr_array_index (hdrs_cte, i); - hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", hdr->value); + hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", cur->value); rspamd_str_lc (lc_buf, hlen); cte = rspamd_mime_parse_cte (lc_buf, hlen); @@ -337,19 +334,17 @@ check_cte: static void rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part) { - struct rspamd_mime_header *hdr; + struct rspamd_mime_header *hdr, *cur; guint i; - GPtrArray *hdrs; struct rspamd_content_disposition *cd = NULL; rspamd_ftok_t srch; struct rspamd_content_type_param *found; - hdrs = rspamd_message_get_header_from_hash (part->raw_headers, - task->task_pool, - "Content-Disposition", FALSE); + hdr = rspamd_message_get_header_from_hash (part->raw_headers, + "Content-Disposition"); - if (hdrs == NULL) { + if (hdr == NULL) { cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd)); cd->type = RSPAMD_CT_INLINE; @@ -370,15 +365,13 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part } } else { - for (i = 0; i < hdrs->len; i ++) { + DL_FOREACH (hdr, cur) { gsize hlen; - - hdr = g_ptr_array_index (hdrs, i); cd = NULL; - if (hdr->decoded) { - hlen = strlen (hdr->decoded); - cd = rspamd_content_disposition_parse (hdr->decoded, hlen, + if (cur->decoded) { + hlen = strlen (cur->decoded); + cd = rspamd_content_disposition_parse (cur->decoded, hlen, task->task_pool); } @@ -517,8 +510,8 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, g_assert_not_reached (); } - part->id = task->parts->len; - g_ptr_array_add (task->parts, part); + part->id = MESSAGE_FIELD (task, parts)->len; + g_ptr_array_add (MESSAGE_FIELD (task, parts), part); msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte", &part->ct->type, &part->ct->subtype, part->parsed_data.len, part->raw_data.len, rspamd_cte_to_string (part->cte)); @@ -546,12 +539,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, GError **err) { struct rspamd_content_type *ct, *sel = NULL; - struct rspamd_mime_header *hdr; - GPtrArray *hdrs = NULL; + struct rspamd_mime_header *hdr = NULL, *cur; struct rspamd_mime_part *npart; GString str; goffset hdr_pos, body_pos; - guint i; enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL; @@ -592,9 +583,8 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_mime_part)); npart->parent_part = multipart; - npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); - npart->headers_order = g_queue_new (); + npart->raw_headers = rspamd_message_headers_new (); + npart->headers_order = NULL; if (multipart) { if (multipart->specific.mp->children == NULL) { @@ -612,15 +602,14 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, if (npart->raw_headers_len > 0) { rspamd_mime_headers_process (task, npart->raw_headers, - npart->headers_order, + &npart->headers_order, npart->raw_headers_str, npart->raw_headers_len, FALSE); } - hdrs = rspamd_message_get_header_from_hash (npart->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash (npart->raw_headers, + "Content-Type"); } else { @@ -631,11 +620,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, } - if (hdrs != NULL) { + if (hdr != NULL) { - for (i = 0; i < hdrs->len; i ++) { - hdr = g_ptr_array_index (hdrs, i); - ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded), + DL_FOREACH (hdr, cur) { + ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded), task->task_pool); /* Here we prefer multipart content-type or any content-type */ @@ -848,8 +836,8 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, return RSPAMD_MIME_PARSE_NESTING; } - part->id = task->parts->len; - g_ptr_array_add (task->parts, part); + part->id = MESSAGE_FIELD (task, parts)->len; + g_ptr_array_add (MESSAGE_FIELD (task, parts), part); st->nesting ++; rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE); @@ -1098,8 +1086,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, GError **err) { struct rspamd_content_type *ct, *sel = NULL; - struct rspamd_mime_header *hdr; - GPtrArray *hdrs = NULL; + struct rspamd_mime_header *hdr = NULL, *cur; const gchar *pbegin, *p; gsize plen, len; struct rspamd_mime_part *npart; @@ -1159,42 +1146,45 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (hdr_pos > 0 && hdr_pos < str.len) { - task->raw_headers_content.begin = str.str; - task->raw_headers_content.len = hdr_pos; - task->raw_headers_content.body_start = str.str + body_pos; + MESSAGE_FIELD (task, raw_headers_content).begin = str.str; + MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + body_pos; - if (task->raw_headers_content.len > 0) { - rspamd_mime_headers_process (task, task->raw_headers, - task->headers_order, - task->raw_headers_content.begin, - task->raw_headers_content.len, + if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { + rspamd_mime_headers_process (task, + MESSAGE_FIELD (task, raw_headers), + &MESSAGE_FIELD (task, headers_order), + MESSAGE_FIELD (task, raw_headers_content).begin, + MESSAGE_FIELD (task, raw_headers_content).len, TRUE); } - hdrs = rspamd_message_get_header_from_hash (task->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash ( + MESSAGE_FIELD (task, raw_headers), + "Content-Type"); } else { /* First apply heuristic, maybe we have just headers */ hdr_pos = rspamd_mime_parser_headers_heuristic (&str, &body_pos); if (hdr_pos > 0 && hdr_pos <= str.len) { - task->raw_headers_content.begin = str.str; - task->raw_headers_content.len = hdr_pos; - task->raw_headers_content.body_start = str.str + body_pos; - - if (task->raw_headers_content.len > 0) { - rspamd_mime_headers_process (task, task->raw_headers, - task->headers_order, - task->raw_headers_content.begin, - task->raw_headers_content.len, + MESSAGE_FIELD (task, raw_headers_content).begin = str.str; + MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + + body_pos; + + if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { + rspamd_mime_headers_process (task, + MESSAGE_FIELD (task, raw_headers), + &MESSAGE_FIELD (task, headers_order), + MESSAGE_FIELD (task, raw_headers_content).begin, + MESSAGE_FIELD (task, raw_headers_content).len, TRUE); } - hdrs = rspamd_message_get_header_from_hash (task->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash ( + MESSAGE_FIELD (task, raw_headers), + "Content-Type"); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } else { @@ -1204,7 +1194,8 @@ rspamd_mime_parse_message (struct rspamd_task *task, pbegin = st->start + body_pos; plen = st->end - pbegin; - npart->raw_headers = g_hash_table_ref (task->raw_headers); + /* TODO: check if it is correct */ + npart->raw_headers = NULL; npart->headers_order = NULL; } else { @@ -1227,9 +1218,8 @@ rspamd_mime_parse_message (struct rspamd_task *task, str.len = part->parsed_data.len; hdr_pos = rspamd_string_find_eoh (&str, &body_pos); - npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); - npart->headers_order = g_queue_new (); + npart->raw_headers = rspamd_message_headers_new (); + npart->headers_order = NULL; if (hdr_pos > 0 && hdr_pos < str.len) { npart->raw_headers_str = str.str; @@ -1237,16 +1227,16 @@ rspamd_mime_parse_message (struct rspamd_task *task, npart->raw_data.begin = str.str + body_pos; if (npart->raw_headers_len > 0) { - rspamd_mime_headers_process (task, npart->raw_headers, - npart->headers_order, + rspamd_mime_headers_process (task, + npart->raw_headers, + &npart->headers_order, npart->raw_headers_str, npart->raw_headers_len, FALSE); } - hdrs = rspamd_message_get_header_from_hash (npart->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash (npart->raw_headers, + "Content-Type"); } else { body_pos = 0; @@ -1260,13 +1250,12 @@ rspamd_mime_parse_message (struct rspamd_task *task, npart->raw_data.len = plen; npart->parent_part = part; - if (hdrs == NULL) { + if (hdr == NULL) { sel = NULL; } else { - for (i = 0; i < hdrs->len; i ++) { - hdr = g_ptr_array_index (hdrs, i); - ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded), + DL_FOREACH (hdr, cur) { + ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded), task->task_pool); /* Here we prefer multipart content-type or any content-type */ @@ -1408,7 +1397,7 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err) st = g_malloc0 (sizeof (*st)); st->stack = g_ptr_array_sized_new (4); - st->pos = task->raw_headers_content.body_start; + st->pos = MESSAGE_FIELD (task, raw_headers_content).body_start; st->end = task->msg.begin + task->msg.len; st->boundaries = g_array_sized_new (FALSE, FALSE, sizeof (struct rspamd_mime_boundary), 8);