]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Core: Add libmagic detection for all parts
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 11 Dec 2018 12:01:52 +0000 (12:01 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 11 Dec 2018 12:01:52 +0000 (12:01 +0000)
src/libmime/archives.c
src/libmime/message.c
src/libmime/message.h
src/lua/lua_mimepart.c

index 9cfce6968fc1be82dfb29f8114f64bcf11535e70..1f9a5c6345ddc44baf631882d4aad4ff00ba3153 100644 (file)
@@ -1509,8 +1509,8 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
                }
 
                if (magic_start != NULL) {
-                       if (part->parsed_data.len > magic_len && memcmp (part->parsed_data.begin,
-                                       magic_start, magic_len) == 0) {
+                       if (part->parsed_data.len > magic_len &&
+                               memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
                                return TRUE;
                        }
                }
index a5faaf017bfa7f1a88701bfadd96e7f169b79dd5..bbae5e42643d5987894688873cee8e50d62b34dc 100644 (file)
@@ -703,7 +703,7 @@ rspamd_message_process_html_text_part (struct rspamd_task *task,
        return TRUE;
 }
 
-static void
+static gboolean
 rspamd_message_process_text_part_maybe (struct rspamd_task *task,
                                                                                struct rspamd_mime_part *mime_part)
 {
@@ -812,11 +812,11 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
                        mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT &&
                        (task->cfg && !task->cfg->check_text_attachements)) {
                debug_task ("skip attachments for checking as text parts");
-               return;
+               return TRUE;
        }
        else if (!(found_txt || found_html)) {
                /* Not a text part */
-               return;
+               return FALSE;
        }
 
        text_part = rspamd_mempool_alloc0 (task->task_pool,
@@ -830,12 +830,12 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
 
        if (found_html) {
                if (!rspamd_message_process_html_text_part (task, text_part)) {
-                       return;
+                       return FALSE;
                }
        }
        else {
                if (!rspamd_message_process_plain_text_part (task, text_part)) {
-                       return;
+                       return FALSE;
                }
        }
 
@@ -866,7 +866,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
 
                rspamd_task_insert_result (task, GTUBE_SYMBOL, 0, NULL);
 
-               return;
+               return TRUE;
        }
 
        /* Post process part */
@@ -885,6 +885,8 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
        }
 
        rspamd_mime_part_create_words (task, text_part);
+
+       return TRUE;
 }
 
 /* Creates message from various data using libmagic to detect type */
@@ -900,15 +902,18 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
 
        g_assert (start != NULL);
 
+       part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));
+
        tok = rspamd_task_get_request_header (task, "Content-Type");
 
        if (tok) {
                /* We have Content-Type defined */
                ct = rspamd_content_type_parse (tok->begin, tok->len,
                                task->task_pool);
+               part->ct = ct;
        }
-       else if (task->cfg && task->cfg->libs_ctx) {
-               /* Try to predict it by content (slow) */
+
+       if (task->cfg && task->cfg->libs_ctx) {
                mb = magic_buffer (task->cfg->libs_ctx->libmagic,
                                start,
                                len);
@@ -918,12 +923,16 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
                        srch.len = strlen (mb);
                        ct = rspamd_content_type_parse (srch.begin, srch.len,
                                        task->task_pool);
+                       msg_warn_task ("construct fake mime of type: %s", mb);
+
+                       if (!part->ct) {
+                               part->ct = ct;
+                       }
+
+                       part->detected_ct = ct;
                }
        }
 
-       msg_warn_task ("construct fake mime of type: %s", mb);
-       part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));
-       part->ct = ct;
        part->raw_data.begin = start;
        part->raw_data.len = len;
        part->parsed_data.begin = start;
@@ -1189,7 +1198,25 @@ rspamd_message_process (struct rspamd_task *task)
                struct rspamd_mime_part *part;
 
                part = g_ptr_array_index (task->parts, i);
-               rspamd_message_process_text_part_maybe (task, part);
+
+
+               if (!rspamd_message_process_text_part_maybe (task, part) &&
+                               part->parsed_data.len > 0) {
+                       const gchar *mb = magic_buffer (task->cfg->libs_ctx->libmagic,
+                                       part->parsed_data.begin,
+                                       part->parsed_data.len);
+
+                       if (mb) {
+                               rspamd_ftok_t srch;
+
+                               srch.begin = mb;
+                               srch.len = strlen (mb);
+                               part->detected_ct = rspamd_content_type_parse (srch.begin,
+                                               srch.len,
+                                               task->task_pool);
+                       }
+
+               }
        }
 
        rspamd_images_process (task);
index 29f777c3b18034989e507f3a14cb521ca4d15060..25c88cc3a0fd8b99e18a388f136765927b6b4f66 100644 (file)
@@ -47,6 +47,7 @@ struct rspamd_mime_multipart {
 
 struct rspamd_mime_part {
        struct rspamd_content_type *ct;
+       struct rspamd_content_type *detected_ct;
        struct rspamd_content_disposition *cd;
        rspamd_ftok_t raw_data;
        rspamd_ftok_t parsed_data;
index 3617a145bef5218c05571b027f786d7eac9d7d6e..3019cf577e521caecaa5a4d53604f89f47b3c2f1 100644 (file)
@@ -333,6 +333,20 @@ LUA_FUNCTION_DEF (mimepart, get_type);
  */
 LUA_FUNCTION_DEF (mimepart, get_type_full);
 
+/***
+ * @method mime_part:get_detected_type()
+ * Extract content-type string of the mime part. Use libmagic detection
+ * @return {string,string} content type in form 'type','subtype'
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_type);
+
+/***
+ * @method mime_part:get_detected_type_full()
+ * Extract content-type string of the mime part with all attributes. Use libmagic detection
+ * @return {string,string,table} content type in form 'type','subtype', {attrs}
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
+
 /***
  * @method mime_part:get_cte()
  * Extract content-transfer-encoding for a part
@@ -457,6 +471,8 @@ static const struct luaL_reg mimepartlib_m[] = {
        LUA_INTERFACE_DEF (mimepart, get_length),
        LUA_INTERFACE_DEF (mimepart, get_type),
        LUA_INTERFACE_DEF (mimepart, get_type_full),
+       LUA_INTERFACE_DEF (mimepart, get_detected_type),
+       LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
        LUA_INTERFACE_DEF (mimepart, get_cte),
        LUA_INTERFACE_DEF (mimepart, get_filename),
        LUA_INTERFACE_DEF (mimepart, get_header),
@@ -1189,48 +1205,49 @@ lua_mimepart_get_length (lua_State * L)
 }
 
 static gint
-lua_mimepart_get_type_common (lua_State * L, gboolean full)
+lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
+               gboolean full)
 {
-       struct rspamd_mime_part *part = lua_check_mimepart (L);
+
        GHashTableIter it;
        gpointer k, v;
        struct rspamd_content_type_param *param;
 
-       if (part == NULL) {
+       if (ct == NULL) {
                lua_pushnil (L);
                lua_pushnil (L);
                return 2;
        }
 
-       lua_pushlstring (L, part->ct->type.begin, part->ct->type.len);
-       lua_pushlstring (L, part->ct->subtype.begin, part->ct->subtype.len);
+       lua_pushlstring (L, ct->type.begin, ct->type.len);
+       lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);
 
        if (!full) {
                return 2;
        }
 
-       lua_createtable (L, 0, 2 + (part->ct->attrs ?
-                       g_hash_table_size (part->ct->attrs) : 0));
+       lua_createtable (L, 0, 2 + (ct->attrs ?
+                       g_hash_table_size (ct->attrs) : 0));
 
-       if (part->ct->charset.len > 0) {
+       if (ct->charset.len > 0) {
                lua_pushstring (L, "charset");
-               lua_pushlstring (L, part->ct->charset.begin, part->ct->charset.len);
+               lua_pushlstring (L, ct->charset.begin, ct->charset.len);
                lua_settable (L, -3);
        }
 
-       if (part->ct->boundary.len > 0) {
+       if (ct->boundary.len > 0) {
                lua_pushstring (L, "charset");
-               lua_pushlstring (L, part->ct->boundary.begin, part->ct->boundary.len);
+               lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
                lua_settable (L, -3);
        }
 
-       if (part->ct->attrs) {
-               g_hash_table_iter_init (&it, part->ct->attrs);
+       if (ct->attrs) {
+               g_hash_table_iter_init (&it, ct->attrs);
 
                while (g_hash_table_iter_next (&it, &k, &v)) {
                        param = v;
 
-                       if (param->name.len > 0 && param->name.len > 0) {
+                       if (param->name.len > 0 && param->value.len > 0) {
                                /* TODO: think about multiple values here */
                                lua_pushlstring (L, param->name.begin, param->name.len);
                                lua_pushlstring (L, param->value.begin, param->value.len);
@@ -1246,14 +1263,52 @@ static gint
 lua_mimepart_get_type (lua_State * L)
 {
        LUA_TRACE_POINT;
-       return lua_mimepart_get_type_common (L, FALSE);
+       struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+       if (part == NULL) {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return lua_mimepart_get_type_common (L, part->ct, FALSE);
 }
 
 static gint
 lua_mimepart_get_type_full (lua_State * L)
 {
        LUA_TRACE_POINT;
-       return lua_mimepart_get_type_common (L, TRUE);
+       struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+       if (part == NULL) {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return lua_mimepart_get_type_common (L, part->ct, TRUE);
+}
+
+static gint
+lua_mimepart_get_detected_type (lua_State * L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+       if (part == NULL) {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
+}
+
+static gint
+lua_mimepart_get_detected_type_full (lua_State * L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+       if (part == NULL) {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
 }
 
 static gint