]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Detect part types in mime parser
authorVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 15 Sep 2025 10:06:14 +0000 (11:06 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 15 Sep 2025 10:06:14 +0000 (11:06 +0100)
lualib/lua_magic/init.lua
src/libmime/message.c
src/libmime/mime_parser.c
src/libmime/mime_parser.h
src/libserver/cfg_file.h

index 38bfddbf299bfc272d5cdb32dfdb10d06b4d1deb..cef1ddcea8d3db75eb1c4722b36a478385cbc7cc 100644 (file)
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-]]--
+]] --
 
 --[[[
 -- @module lua_magic
@@ -57,17 +57,17 @@ local function process_patterns(log_obj)
         end
 
         lua_util.debugm(N, log_obj, 'add tail pattern %s for ext %s',
-            str, pattern.ext)
+          str, pattern.ext)
       elseif match.position < short_match_limit then
         short_patterns[#short_patterns + 1] = {
           str, match, pattern
         }
         if str:sub(1, 1) == '^' then
           lua_util.debugm(N, log_obj, 'add head pattern %s for ext %s',
-              str, pattern.ext)
+            str, pattern.ext)
         else
           lua_util.debugm(N, log_obj, 'add short pattern %s for ext %s',
-              str, pattern.ext)
+            str, pattern.ext)
         end
 
         if max_short_offset < match.position then
@@ -79,7 +79,7 @@ local function process_patterns(log_obj)
         }
 
         lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s',
-            str, pattern.ext)
+          str, pattern.ext)
       end
     else
       processed_patterns[#processed_patterns + 1] = {
@@ -87,7 +87,7 @@ local function process_patterns(log_obj)
       }
 
       lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s',
-          str, pattern.ext)
+        str, pattern.ext)
     end
   end
 
@@ -133,25 +133,25 @@ local function process_patterns(log_obj)
         fun.map(function(t)
           return t[1]
         end, processed_patterns)),
-        compile_flags
+      compile_flags
     )
     compiled_short_patterns = rspamd_trie.create(fun.totable(
         fun.map(function(t)
           return t[1]
         end, short_patterns)),
-        compile_flags
+      compile_flags
     )
     compiled_tail_patterns = rspamd_trie.create(fun.totable(
         fun.map(function(t)
           return t[1]
         end, tail_patterns)),
-        compile_flags
+      compile_flags
     )
 
     lua_util.debugm(N, log_obj,
-        'compiled %s (%s short; %s long; %s tail) patterns',
-        #processed_patterns + #short_patterns + #tail_patterns,
-        #short_patterns, #processed_patterns, #tail_patterns)
+      'compiled %s (%s short; %s long; %s tail) patterns',
+      #processed_patterns + #short_patterns + #tail_patterns,
+      #short_patterns, #processed_patterns, #tail_patterns)
   end
 end
 
@@ -173,7 +173,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
     end
 
     lua_util.debugm(N, log_obj, 'add pattern for %s, weight %s, total weight %s',
-        ext, weight, res[ext])
+      ext, weight, res[ext])
   end
 
   local function match_position(pos, expected)
@@ -224,7 +224,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
 
       for _, pos in ipairs(matched_positions) do
         lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
-            pattern.ext, pos, offset)
+          pattern.ext, pos, offset)
         if match_position(pos + offset, position) then
           if match.heuristic then
             local ext, weight = match.heuristic(input, log_obj, pos + offset, part)
@@ -247,7 +247,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
         local matched = false
         for _, pos in ipairs(matched_positions) do
           lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
-              pattern.ext, pos, offset)
+            pattern.ext, pos, offset)
           if not match_position(pos + offset, position) then
             matched = true
             matched_pos = pos
@@ -275,7 +275,6 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
       end
     end
   end
-
 end
 
 local function process_detected(res)
@@ -312,13 +311,13 @@ exports.detect = function(part, log_obj)
     if inplen > min_tail_offset then
       local tail = input:span(inplen - min_tail_offset, min_tail_offset)
       match_chunk(tail, input, inplen, inplen - min_tail_offset,
-          compiled_tail_patterns, tail_patterns, log_obj, res, part)
+        compiled_tail_patterns, tail_patterns, log_obj, res, part)
     end
 
     -- Try short match
     local head = input:span(1, math.min(max_short_offset, inplen))
     match_chunk(head, input, inplen, 0,
-        compiled_short_patterns, short_patterns, log_obj, res, part)
+      compiled_short_patterns, short_patterns, log_obj, res, part)
 
     -- Check if we have enough data or go to long patterns
     local extensions, confidence = process_detected(res)
@@ -332,17 +331,17 @@ exports.detect = function(part, log_obj)
     if #input > exports.chunk_size * 3 then
       -- Chunked version as input is too long
       local chunk1, chunk2 = input:span(1, exports.chunk_size * 2),
-      input:span(inplen - exports.chunk_size, exports.chunk_size)
+          input:span(inplen - exports.chunk_size, exports.chunk_size)
       local offset1, offset2 = 0, inplen - exports.chunk_size
 
       match_chunk(chunk1, input, inplen,
-          offset1, compiled_patterns, processed_patterns, log_obj, res, part)
+        offset1, compiled_patterns, processed_patterns, log_obj, res, part)
       match_chunk(chunk2, input, inplen,
-          offset2, compiled_patterns, processed_patterns, log_obj, res, part)
+        offset2, compiled_patterns, processed_patterns, log_obj, res, part)
     else
       -- Input is short enough to match it at all
       match_chunk(input, input, inplen, 0,
-          compiled_patterns, processed_patterns, log_obj, res, part)
+        compiled_patterns, processed_patterns, log_obj, res, part)
     end
   else
     -- Table input is NYI
@@ -372,6 +371,18 @@ exports.detect_mime_part = function(part, log_obj)
     return ext, types[ext]
   end
 
+  -- Fallback by filename extension (e.g. .eml attachments with generic content-type)
+  local fname
+  if part.get_filename then
+    fname = part:get_filename()
+  end
+  if type(fname) == 'string' then
+    local lfn = fname:lower()
+    if #lfn > 4 and lfn:sub(-4) == '.eml' then
+      return 'eml', types['eml']
+    end
+  end
+
   -- Text/html and other parts
   ext, weight = heuristics.text_part_heuristic(part, log_obj)
   if ext and weight and weight > 20 then
@@ -385,4 +396,4 @@ exports.chunk_size = 32768
 
 exports.types = types
 
-return exports
\ No newline at end of file
+return exports
index 8442c80ac82a5597034fb4bbad3f57ac982f4f8a..cba061d829a0c13450be10306ffa16ae445b6ed4 100644 (file)
 #include <unicode/uchar.h>
 #include "sodium.h"
 #include "libserver/cfg_file_private.h"
-#include "lua/lua_common.h"
+#define RSPAMD_TOKENIZER_INTERNAL
 #include "contrib/uthash/utlist.h"
 #include "contrib/t1ha/t1ha.h"
-#include "received.h"
-#define RSPAMD_TOKENIZER_INTERNAL
+#include "mime_parser.h"
 #include "libstat/tokenizers/custom_tokenizer.h"
+#include "received.h"
 
 #define GTUBE_SYMBOL "GTUBE"
 
@@ -989,8 +989,38 @@ rspamd_message_from_data(struct rspamd_task *task, const unsigned char *start,
        else if (task->cfg && task->cfg->libs_ctx) {
                lua_State *L = task->cfg->lua_state;
 
-               if (rspamd_lua_require_function(L,
-                                                                               "lua_magic", "detect_mime_part")) {
+               if (task->cfg->mime_parser_cfg &&
+                       rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg) != -1) {
+                       struct rspamd_mime_part **pmime;
+                       struct rspamd_task **ptask;
+
+                       lua_rawgeti(L, LUA_REGISTRYINDEX, rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg));
+                       pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+                       rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
+                       *pmime = part;
+                       ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+                       rspamd_lua_setclass(L, rspamd_task_classname, -1);
+                       *ptask = task;
+
+                       if (lua_pcall(L, 2, 2, 0) != 0) {
+                               msg_err_task("cannot detect type: %s", lua_tostring(L, -1));
+                       }
+                       else {
+                               if (lua_istable(L, -1)) {
+                                       lua_pushstring(L, "ct");
+                                       lua_gettable(L, -2);
+
+                                       if (lua_isstring(L, -1)) {
+                                               mb = rspamd_mempool_strdup(task->task_pool,
+                                                                                                  lua_tostring(L, -1));
+                                       }
+                               }
+                       }
+
+                       lua_settop(L, 0);
+               }
+               else if (rspamd_lua_require_function(L,
+                                                                                        "lua_magic", "detect_mime_part")) {
 
                        struct rspamd_mime_part **pmime;
                        struct rspamd_task **ptask;
@@ -1405,7 +1435,7 @@ void rspamd_message_process(struct rspamd_task *task)
        unsigned int tw, *ptw, dw;
        struct rspamd_mime_part *part;
        lua_State *L = NULL;
-       int magic_func_pos = -1, content_func_pos = -1, old_top = -1, funcs_top = -1;
+       int content_func_pos = -1, old_top = -1, funcs_top = -1;
 
        if (task->cfg) {
                L = task->cfg->lua_state;
@@ -1417,13 +1447,7 @@ void rspamd_message_process(struct rspamd_task *task)
                old_top = lua_gettop(L);
        }
 
-       if (L && rspamd_lua_require_function(L,
-                                                                                "lua_magic", "detect_mime_part")) {
-               magic_func_pos = lua_gettop(L);
-       }
-       else {
-               msg_err_task("cannot require lua_magic.detect_mime_part");
-       }
+       /* lua_magic is preloaded by mime parser init; do not require here */
 
        if (L && rspamd_lua_require_function(L,
                                                                                 "lua_content", "maybe_process_mime_part")) {
@@ -1441,75 +1465,7 @@ void rspamd_message_process(struct rspamd_task *task)
 
        PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
        {
-               if (magic_func_pos != -1 && part->parsed_data.len > 0) {
-                       struct rspamd_mime_part **pmime;
-                       struct rspamd_task **ptask;
-
-                       lua_pushcfunction(L, &rspamd_lua_traceback);
-                       int err_idx = lua_gettop(L);
-                       lua_pushvalue(L, magic_func_pos);
-                       pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
-                       rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
-                       *pmime = part;
-                       ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
-                       rspamd_lua_setclass(L, rspamd_task_classname, -1);
-                       *ptask = task;
-
-                       if (lua_pcall(L, 2, 2, err_idx) != 0) {
-                               msg_err_task("cannot detect type: %s", lua_tostring(L, -1));
-                       }
-                       else {
-                               if (lua_istable(L, -1)) {
-                                       const char *mb;
-
-                                       /* First returned value */
-                                       part->detected_ext = rspamd_mempool_strdup(task->task_pool,
-                                                                                                                          lua_tostring(L, -2));
-
-                                       lua_pushstring(L, "ct");
-                                       lua_gettable(L, -2);
-
-                                       if (lua_isstring(L, -1)) {
-                                               mb = lua_tostring(L, -1);
-
-                                               if (mb) {
-                                                       rspamd_ftok_t srch;
-
-                                                       srch.begin = mb;
-                                                       srch.len = strlen(mb);
-                                                       part->detected_ct = rspamd_content_type_parse(srch.begin,
-                                                                                                                                                 srch.len,
-                                                                                                                                                 task->task_pool);
-                                               }
-                                       }
-
-                                       lua_pop(L, 1);
-
-                                       lua_pushstring(L, "type");
-                                       lua_gettable(L, -2);
-
-                                       if (lua_isstring(L, -1)) {
-                                               part->detected_type = rspamd_mempool_strdup(task->task_pool,
-                                                                                                                                       lua_tostring(L, -1));
-                                       }
-
-                                       lua_pop(L, 1);
-
-                                       lua_pushstring(L, "no_text");
-                                       lua_gettable(L, -2);
-
-                                       if (lua_isboolean(L, -1)) {
-                                               if (!!lua_toboolean(L, -1)) {
-                                                       part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
-                                               }
-                                       }
-
-                                       lua_pop(L, 1);
-                               }
-                       }
-
-                       lua_settop(L, funcs_top);
-               }
+               /* detected_* are already set by mime_parser; no extra lua_magic call here */
 
                /* Now detect content */
                if (content_func_pos != -1 && part->parsed_data.len > 0 &&
index 1fe8b86e35cfa3d1ad4157d5d73bccb517a86c89..751cc1ee045f22fe2588d8c818552edc780d0b50 100644 (file)
 #include "multipattern.h"
 #include "contrib/libottery/ottery.h"
 #include "contrib/uthash/utlist.h"
+#include "lua/lua_common.h"
+#include "lua/lua_classnames.h"
 #include <openssl/cms.h>
 #include <openssl/pkcs7.h>
 #include "rspamd_simdutf.h"
 
-struct rspamd_mime_parser_lib_ctx {
+struct rspamd_mime_parser_config {
        struct rspamd_multipattern *mp_boundary;
        unsigned char hkey[rspamd_cryptobox_SIPKEYBYTES]; /* Key for hashing */
        unsigned int key_usages;
+       int lua_magic_detect_cbref;
+       lua_State *L;
 };
 
-struct rspamd_mime_parser_lib_ctx *lib_ctx = NULL;
+static struct rspamd_mime_parser_config *mime_parser_cfg = NULL;
+
+struct rspamd_mime_parser_config *
+rspamd_mime_parser_init_shared(struct rspamd_config *cfg)
+{
+       if (mime_parser_cfg == NULL) {
+               mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg));
+               mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
+               g_assert(mime_parser_cfg->mp_boundary != NULL);
+               rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0);
+               rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0);
+
+               GError *err = NULL;
+               if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
+                       msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err);
+                       g_error_free(err);
+                       g_abort();
+               }
+               ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
+               mime_parser_cfg->key_usages = 0;
+               mime_parser_cfg->lua_magic_detect_cbref = -1;
+       }
+
+       mime_parser_cfg->L = (lua_State *) cfg->lua_state;
+
+       if (mime_parser_cfg->L && mime_parser_cfg->lua_magic_detect_cbref == -1) {
+               int old_top = lua_gettop(mime_parser_cfg->L);
+               if (rspamd_lua_require_function(mime_parser_cfg->L, "lua_magic", "detect_mime_part")) {
+                       mime_parser_cfg->lua_magic_detect_cbref = luaL_ref(mime_parser_cfg->L, LUA_REGISTRYINDEX);
+               }
+               lua_settop(mime_parser_cfg->L, old_top);
+       }
+
+       cfg->mime_parser_cfg = mime_parser_cfg;
+       return mime_parser_cfg;
+}
+
+void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *unused)
+{
+       /* noop: lifetime tied to process */
+}
+
+int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg)
+{
+       if (cfg) {
+               return cfg->lua_magic_detect_cbref;
+       }
+       return -1;
+}
 
 static const unsigned int max_nested = 64;
 static const unsigned int max_key_usages = 10000;
@@ -56,7 +108,7 @@ struct rspamd_mime_boundary {
        int flags;
 };
 
-struct rspamd_mime_parser_ctx {
+struct rspamd_mime_parser_runtime {
        GPtrArray *stack;   /* Stack of parts */
        GArray *boundaries; /* Boundaries found in the whole message */
        const char *start;
@@ -69,23 +121,23 @@ struct rspamd_mime_parser_ctx {
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_multipart_part(struct rspamd_task *task,
                                                                 struct rspamd_mime_part *part,
-                                                                struct rspamd_mime_parser_ctx *st,
+                                                                struct rspamd_mime_parser_runtime *st,
                                                                 GError **err);
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_message(struct rspamd_task *task,
                                                  struct rspamd_mime_part *part,
-                                                 struct rspamd_mime_parser_ctx *st,
+                                                 struct rspamd_mime_parser_runtime *st,
                                                  GError **err);
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_normal_part(struct rspamd_task *task,
                                                          struct rspamd_mime_part *part,
-                                                         struct rspamd_mime_parser_ctx *st,
+                                                         struct rspamd_mime_parser_runtime *st,
                                                          struct rspamd_content_type *ct,
                                                          GError **err);
 
 static enum rspamd_mime_parse_error
 rspamd_mime_process_multipart_node(struct rspamd_task *task,
-                                                                  struct rspamd_mime_parser_ctx *st,
+                                                                  struct rspamd_mime_parser_runtime *st,
                                                                   struct rspamd_mime_part *multipart,
                                                                   const char *start, const char *end,
                                                                   gboolean is_finished,
@@ -162,19 +214,22 @@ rspamd_cte_from_string(const char *str)
 static void
 rspamd_mime_parser_init_lib(void)
 {
-       lib_ctx = g_malloc0(sizeof(*lib_ctx));
-       lib_ctx->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
-       g_assert(lib_ctx->mp_boundary != NULL);
-       rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\r--", 0);
-       rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\n--", 0);
+       mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg));
+       mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
+       g_assert(mime_parser_cfg->mp_boundary != NULL);
+       rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0);
+       rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0);
 
        GError *err = NULL;
-       if (!rspamd_multipattern_compile(lib_ctx->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
+       if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
                msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err);
                g_error_free(err);
                g_abort();
        }
-       ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey));
+       ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
+       mime_parser_cfg->key_usages = 0;
+       mime_parser_cfg->L = NULL;
+       mime_parser_cfg->lua_magic_detect_cbref = -1;
 }
 
 static enum rspamd_cte
@@ -398,7 +453,8 @@ rspamd_mime_part_get_cte(struct rspamd_task *task,
        enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN;
        gboolean parent_propagated = FALSE;
 
-       hdr = rspamd_message_get_header_from_hash(hdrs, "Content-Transfer-Encoding", FALSE);
+       hdr = rspamd_message_get_header_from_hash(hdrs,
+                                                                                         "Content-Transfer-Encoding", FALSE);
 
        if (hdr == NULL) {
                if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN &&
@@ -648,7 +704,7 @@ void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part)
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_normal_part(struct rspamd_task *task,
                                                          struct rspamd_mime_part *part,
-                                                         struct rspamd_mime_parser_ctx *st,
+                                                         struct rspamd_mime_parser_runtime *st,
                                                          struct rspamd_content_type *ct,
                                                          GError **err)
 {
@@ -845,10 +901,11 @@ rspamd_mime_parse_normal_part(struct rspamd_task *task,
        return RSPAMD_MIME_PARSE_OK;
 }
 
+
 struct rspamd_mime_multipart_cbdata {
        struct rspamd_task *task;
        struct rspamd_mime_part *multipart;
-       struct rspamd_mime_parser_ctx *st;
+       struct rspamd_mime_parser_runtime *st;
        const char *part_start;
        rspamd_ftok_t *cur_boundary;
        uint64_t bhash;
@@ -857,7 +914,7 @@ struct rspamd_mime_multipart_cbdata {
 
 static enum rspamd_mime_parse_error
 rspamd_mime_process_multipart_node(struct rspamd_task *task,
-                                                                  struct rspamd_mime_parser_ctx *st,
+                                                                  struct rspamd_mime_parser_runtime *st,
                                                                   struct rspamd_mime_part *multipart,
                                                                   const char *start, const char *end,
                                                                   gboolean is_finished,
@@ -996,7 +1053,123 @@ rspamd_mime_process_multipart_node(struct rspamd_task *task,
                }
        }
        else {
+               /* First, decode the part normally */
                ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err);
+
+               if (ret == RSPAMD_MIME_PARSE_OK) {
+                       /* Ask lua_magic if this is a message (e.g. .eml) */
+                       lua_State *L = NULL;
+                       int old_top = -1, err_idx;
+                       gboolean promote_to_message = FALSE;
+
+                       if (task->cfg) {
+                               L = task->cfg->lua_state;
+                       }
+
+                       if (L) {
+                               old_top = lua_gettop(L);
+                               lua_pushcfunction(L, &rspamd_lua_traceback);
+                               err_idx = lua_gettop(L);
+
+                               if (task->cfg->mime_parser_cfg && task->cfg->mime_parser_cfg->lua_magic_detect_cbref != -1) {
+                                       lua_rawgeti(L, LUA_REGISTRYINDEX, task->cfg->mime_parser_cfg->lua_magic_detect_cbref);
+                                       struct rspamd_mime_part **pmime;
+                                       struct rspamd_task **ptask;
+
+                                       pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+                                       rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
+                                       *pmime = npart;
+                                       ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+                                       rspamd_lua_setclass(L, rspamd_task_classname, -1);
+                                       *ptask = task;
+
+                                       if (lua_pcall(L, 2, 2, err_idx) != 0) {
+                                               msg_err_task("cannot detect type (lua_magic): %s", lua_tostring(L, -1));
+                                       }
+                                       else {
+                                               /* Stack: [traceback][ext][table] */
+                                               if (lua_istable(L, -1)) {
+                                                       /* Fill detected_ext */
+                                                       if (lua_isstring(L, -2)) {
+                                                               npart->detected_ext = rspamd_mempool_strdup(task->task_pool,
+                                                                                                                                                       lua_tostring(L, -2));
+                                                       }
+
+                                                       /* detected_ct */
+                                                       lua_pushstring(L, "ct");
+                                                       lua_gettable(L, -2);
+
+                                                       if (lua_isstring(L, -1)) {
+                                                               const char *mb = lua_tostring(L, -1);
+
+                                                               if (mb) {
+                                                                       rspamd_ftok_t srch;
+
+                                                                       srch.begin = mb;
+                                                                       srch.len = strlen(mb);
+                                                                       npart->detected_ct = rspamd_content_type_parse(srch.begin,
+                                                                                                                                                                  srch.len,
+                                                                                                                                                                  task->task_pool);
+                                                               }
+                                                       }
+
+                                                       lua_pop(L, 1);
+
+                                                       /* detected_type and promotion */
+                                                       lua_pushstring(L, "type");
+                                                       lua_gettable(L, -2);
+
+                                                       if (lua_isstring(L, -1)) {
+                                                               const char *t = lua_tostring(L, -1);
+                                                               if (t) {
+                                                                       npart->detected_type = rspamd_mempool_strdup(task->task_pool, t);
+                                                                       if (strcmp(t, "message") == 0) {
+                                                                               promote_to_message = TRUE;
+                                                                       }
+                                                               }
+                                                       }
+
+                                                       lua_pop(L, 1);
+
+                                                       /* no_text flag */
+                                                       lua_pushstring(L, "no_text");
+                                                       lua_gettable(L, -2);
+
+                                                       if (lua_isboolean(L, -1)) {
+                                                               if (!!lua_toboolean(L, -1)) {
+                                                                       npart->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
+                                                               }
+                                                       }
+
+                                                       lua_pop(L, 1);
+
+                                                       /* ext fallback for promotion */
+                                                       if (!promote_to_message && lua_isstring(L, -2)) {
+                                                               const char *ext = lua_tostring(L, -2);
+                                                               if (ext && g_ascii_strcasecmp(ext, "eml") == 0) {
+                                                                       promote_to_message = TRUE;
+                                                               }
+                                                       }
+                                               }
+                                       }
+
+                                       /* Clean stack */
+                                       lua_settop(L, old_top);
+                               }
+                               else {
+                                       /* Pop traceback */
+                                       lua_settop(L, old_top);
+                               }
+                       }
+
+                       if (promote_to_message) {
+                               msg_debug_mime("treat part as embedded message (lua_magic)");
+                               st->nesting++;
+                               g_ptr_array_add(st->stack, npart);
+                               npart->part_type = RSPAMD_MIME_PART_MESSAGE;
+                               ret = rspamd_mime_parse_message(task, npart, st, err);
+                       }
+               }
        }
 
        return ret;
@@ -1005,7 +1178,7 @@ rspamd_mime_process_multipart_node(struct rspamd_task *task,
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_multipart_cb(struct rspamd_task *task,
                                                           struct rspamd_mime_part *multipart,
-                                                          struct rspamd_mime_parser_ctx *st,
+                                                          struct rspamd_mime_parser_runtime *st,
                                                           struct rspamd_mime_multipart_cbdata *cb,
                                                           struct rspamd_mime_boundary *b)
 {
@@ -1048,7 +1221,7 @@ rspamd_mime_parse_multipart_cb(struct rspamd_task *task,
 static enum rspamd_mime_parse_error
 rspamd_multipart_boundaries_filter(struct rspamd_task *task,
                                                                   struct rspamd_mime_part *multipart,
-                                                                  struct rspamd_mime_parser_ctx *st,
+                                                                  struct rspamd_mime_parser_runtime *st,
                                                                   struct rspamd_mime_multipart_cbdata *cb)
 {
        struct rspamd_mime_boundary *cur;
@@ -1162,7 +1335,7 @@ rspamd_multipart_boundaries_filter(struct rspamd_task *task,
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_multipart_part(struct rspamd_task *task,
                                                                 struct rspamd_mime_part *part,
-                                                                struct rspamd_mime_parser_ctx *st,
+                                                                struct rspamd_mime_parser_runtime *st,
                                                                 GError **err)
 {
        struct rspamd_mime_multipart_cbdata cbdata;
@@ -1192,7 +1365,7 @@ rspamd_mime_parse_multipart_part(struct rspamd_task *task,
                cbdata.cur_boundary = &part->ct->boundary;
                rspamd_cryptobox_siphash((unsigned char *) &cbdata.bhash,
                                                                 cbdata.cur_boundary->begin, cbdata.cur_boundary->len,
-                                                                lib_ctx->hkey);
+                                                                mime_parser_cfg->hkey);
                msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash);
        }
        else {
@@ -1223,7 +1396,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
        gsize blen;
        gboolean closing = FALSE;
        struct rspamd_mime_boundary b;
-       struct rspamd_mime_parser_ctx *st = context;
+       struct rspamd_mime_parser_runtime *st = context;
        struct rspamd_task *task;
 
        task = st->task;
@@ -1307,7 +1480,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
                        }
 
                        rspamd_cryptobox_siphash((unsigned char *) &b.hash, lc_copy, blen,
-                                                                        lib_ctx->hkey);
+                                                                        mime_parser_cfg->hkey);
                        msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset",
                                                   (int) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start);
 
@@ -1315,7 +1488,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
                                b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED;
                                rspamd_cryptobox_siphash((unsigned char *) &b.closed_hash, lc_copy,
                                                                                 blen + 2,
-                                                                                lib_ctx->hkey);
+                                                                                mime_parser_cfg->hkey);
                                msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset",
                                                           (int) blen + 2, lc_copy,
                                                           b.closed_hash,
@@ -1406,17 +1579,17 @@ end:
 static void
 rspamd_mime_preprocess_message(struct rspamd_task *task,
                                                           struct rspamd_mime_part *top,
-                                                          struct rspamd_mime_parser_ctx *st)
+                                                          struct rspamd_mime_parser_runtime *st)
 {
 
        if (top->raw_data.begin >= st->pos) {
-               rspamd_multipattern_lookup(lib_ctx->mp_boundary,
+               rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary,
                                                                   top->raw_data.begin - 1,
                                                                   top->raw_data.len + 1,
                                                                   rspamd_mime_preprocess_cb, st, NULL);
        }
        else {
-               rspamd_multipattern_lookup(lib_ctx->mp_boundary,
+               rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary,
                                                                   st->pos,
                                                                   st->end - st->pos,
                                                                   rspamd_mime_preprocess_cb, st, NULL);
@@ -1424,7 +1597,7 @@ rspamd_mime_preprocess_message(struct rspamd_task *task,
 }
 
 static void
-rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st)
+rspamd_mime_parse_stack_free(struct rspamd_mime_parser_runtime *st)
 {
        if (st) {
                g_ptr_array_free(st->stack, TRUE);
@@ -1436,7 +1609,7 @@ rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st)
 static enum rspamd_mime_parse_error
 rspamd_mime_parse_message(struct rspamd_task *task,
                                                  struct rspamd_mime_part *part,
-                                                 struct rspamd_mime_parser_ctx *st,
+                                                 struct rspamd_mime_parser_runtime *st,
                                                  GError **err)
 {
        struct rspamd_content_type *ct, *sel = NULL;
@@ -1448,7 +1621,7 @@ rspamd_mime_parse_message(struct rspamd_task *task,
        unsigned int i;
        enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
        GString str;
-       struct rspamd_mime_parser_ctx *nst = st;
+       struct rspamd_mime_parser_runtime *nst = st;
 
        if (st->nesting > max_nested) {
                g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d",
@@ -1732,17 +1905,17 @@ rspamd_mime_parse_message(struct rspamd_task *task,
 enum rspamd_mime_parse_error
 rspamd_mime_parse_task(struct rspamd_task *task, GError **err)
 {
-       struct rspamd_mime_parser_ctx *st;
+       struct rspamd_mime_parser_runtime *st;
        enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
 
-       if (lib_ctx == NULL) {
-               rspamd_mime_parser_init_lib();
+       if (mime_parser_cfg == NULL) {
+               rspamd_mime_parser_init_shared(task->cfg);
        }
 
-       if (++lib_ctx->key_usages > max_key_usages) {
+       if (++mime_parser_cfg->key_usages > max_key_usages) {
                /* Regenerate siphash key */
-               ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey));
-               lib_ctx->key_usages = 0;
+               ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
+               mime_parser_cfg->key_usages = 0;
        }
 
        st = g_malloc0(sizeof(*st));
index aa77b2b30b09456281a8d2641cd78f88e5327f8e..6ed175dc8b8a400c7a61eb0616c4831eb297c9ea 100644 (file)
 
 #include "config.h"
 
+struct rspamd_config;
+
+struct rspamd_mime_parser_config;
+
+/* Initialize shared mime parser config (stores Lua refs, precompiled data) */
+struct rspamd_mime_parser_config *rspamd_mime_parser_init_shared(struct rspamd_config *cfg);
+void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *cfg);
+
+/* Accessors */
+int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg);
 
 #ifdef __cplusplus
 extern "C" {
index 76062e9b1e3fcb72e41f316d8e55e7a9d5fc3943..32168c754c937a1bef4e28d8d88d8870b0c1a254 100644 (file)
@@ -49,6 +49,7 @@ struct rspamd_external_libs_ctx;
 struct rspamd_cryptobox_pubkey;
 struct rspamd_dns_resolver;
 struct rspamd_tokenizer_manager;
+struct rspamd_mime_parser_config;
 
 /**
  * Logging type
@@ -490,7 +491,8 @@ struct rspamd_config {
        struct rspamd_monitored_ctx *monitored_ctx; /**< context for monitored resources                                        */
        void *redis_pool;                           /**< redis connection pool                                                          */
 
-       struct rspamd_re_cache *re_cache; /**< static regexp cache                                                              */
+       struct rspamd_re_cache *re_cache;                  /**< static regexp cache                                                             */
+       struct rspamd_mime_parser_config *mime_parser_cfg; /**< mime parser shared config */
 
        GHashTable *trusted_keys; /**< list of trusted public keys                                              */