]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Rework] MIME detection via Lua Magic; enforce cfg in Lua task API
authorVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 15 Sep 2025 18:44:37 +0000 (19:44 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 15 Sep 2025 18:44:37 +0000 (19:44 +0100)
- Add rspamd_mime_parser_config on cfg; remove global state and lazy init
- Initialize parser config once per cfg; preload lua_magic.detect_mime_part
- Always run detection after normal part parse; promote .eml/message parts
- Preserve detected_ext/detected_ct/detected_type and NO_TEXT flag
- Remove duplicate detection from message.c; add debug logs
- Restore CTE parsing API and fix call sites
- Enforce cfg requirement in rspamd_task.load_from_string/load_from_file/create
- Fix unit tests to pass rspamd_config to load_from_string

lualib/lua_magic/heuristics.lua
lualib/lua_magic/patterns.lua
lualib/lua_magic/types.lua
src/libmime/archives.c
src/libmime/images.c
src/libmime/message.c
src/libmime/mime_parser.c
src/libmime/mime_parser.h
src/libserver/cfg_utils.cxx

index b8a1b4188622441cba9806f7c33889554eb0d91f..8258ff2493ad60ea66d9f25f42af5748f76f8597 100644 (file)
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-]]--
+]] --
 
 --[[[
 -- @module lua_magic/heuristics
@@ -63,16 +63,16 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    { [=[(?i)<html[\s>]]=], 32 },
-    { [[(?i)<script\b]], 20 }, -- Commonly used by spammers
+    { [=[(?i)<html[\s>]]=],                   32 },
+    { [[(?i)<script\b]],                      20 }, -- Commonly used by spammers
     { [[<script\s+type="text\/javascript">]], 31 }, -- Another spammy pattern
-    { [[(?i)<\!DOCTYPE HTML\b]], 33 },
-    { [[(?i)<body\b]], 20 },
-    { [[(?i)<table\b]], 20 },
-    { [[(?i)<a\s]], 10 },
-    { [[(?i)<p\b]], 10 },
-    { [[(?i)<div\b]], 10 },
-    { [[(?i)<span\b]], 10 },
+    { [[(?i)<\!DOCTYPE HTML\b]],              33 },
+    { [[(?i)<body\b]],                        20 },
+    { [[(?i)<table\b]],                       20 },
+    { [[(?i)<a\s]],                           10 },
+    { [[(?i)<p\b]],                           10 },
+    { [[(?i)<div\b]],                         10 },
+    { [[(?i)<span\b]],                        10 },
   },
   csv = {
     { [[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20 }
@@ -98,9 +98,9 @@ local exports = {}
 
 local function compile_tries()
   local default_compile_flags = bit.bor(rspamd_trie.flags.re,
-      rspamd_trie.flags.dot_all,
-      rspamd_trie.flags.single_match,
-      rspamd_trie.flags.no_start)
+    rspamd_trie.flags.dot_all,
+    rspamd_trie.flags.single_match,
+    rspamd_trie.flags.no_start)
   local function compile_pats(patterns, indexes, transform_func, compile_flags)
     local strs = {}
     for ext, pats in pairs(patterns) do
@@ -119,11 +119,11 @@ local function compile_tries()
     local function msoffice_pattern_transform(pat)
       return '^' ..
           table.concat(
-              fun.totable(
-                  fun.map(function(c)
-                    return c .. [[\x{00}]]
-                  end,
-                      fun.iter(pat))))
+            fun.totable(
+              fun.map(function(c)
+                  return c .. [[\x{00}]]
+                end,
+                fun.iter(pat))))
     end
     local function msoffice_clsid_transform(pat)
       local hex_table = {}
@@ -136,23 +136,23 @@ local function compile_tries()
     end
     -- Directory entries
     msoffice_trie = compile_pats(msoffice_patterns, msoffice_patterns_indexes,
-        msoffice_pattern_transform)
+      msoffice_pattern_transform)
     -- Clsids
     msoffice_trie_clsid = compile_pats(msoffice_clsids, msoffice_clsid_indexes,
-        msoffice_clsid_transform)
+      msoffice_clsid_transform)
     -- Misc zip patterns at the initial fragment
     zip_trie = compile_pats(zip_patterns, zip_patterns_indexes,
-        function(pat)
-          return pat
-        end)
+      function(pat)
+        return pat
+      end)
     -- Text patterns at the initial fragment
     txt_trie = compile_pats(txt_patterns, txt_patterns_indexes,
-        function(pat_tbl)
-          return pat_tbl[1]
-        end,
-        bit.bor(rspamd_trie.flags.re,
-            rspamd_trie.flags.dot_all,
-            rspamd_trie.flags.no_start))
+      function(pat_tbl)
+        return pat_tbl[1]
+      end,
+      bit.bor(rspamd_trie.flags.re,
+        rspamd_trie.flags.dot_all,
+        rspamd_trie.flags.no_start))
   end
 end
 
@@ -204,7 +204,7 @@ local function detect_ole_format(input, log_obj, _, part)
           for n, _ in pairs(matches) do
             if msoffice_clsid_indexes[n] then
               lua_util.debugm(N, log_obj, "found valid clsid for %s",
-                  msoffice_clsid_indexes[n][1])
+                msoffice_clsid_indexes[n][1])
               return true, msoffice_clsid_indexes[n][1]
             end
           end
@@ -273,6 +273,15 @@ local function detect_archive_flaw(part, arch, log_obj, _)
     apk = 0,
   } -- ext + confidence pairs
 
+  local function has_control_or_zw(fname)
+    -- control ASCII
+    if fname:find("[%z\1-\31]") then return true end
+    -- common zero-width UTF-8: U+200B..U+200D, U+FEFF
+    if fname:find("\226\128[\139-\141]") then return true end -- U+200B..U+200D
+    if fname:find("\239\187\191") then return true end        -- U+FEFF
+    return false
+  end
+
   -- General msoffice patterns
   local function add_msoffice_confidence(incr)
     res.docx = res.docx + incr
@@ -284,6 +293,9 @@ local function detect_archive_flaw(part, arch, log_obj, _)
     -- Find specific files/folders in zip file
     local files = arch:get_files(100) or {}
     for _, file in ipairs(files) do
+      if has_control_or_zw(file) then
+        lua_util.debugm(N, log_obj, "archive filename has control/zw chars: %s", file)
+      end
       if file == '[Content_Types].xml' then
         add_msoffice_confidence(10)
       elseif file:sub(1, 3) == 'xl/' then
@@ -316,7 +328,7 @@ local function detect_archive_flaw(part, arch, log_obj, _)
         for n, _ in pairs(matches) do
           if zip_patterns_indexes[n] then
             lua_util.debugm(N, log_obj, "found zip pattern for %s",
-                zip_patterns_indexes[n][1])
+              zip_patterns_indexes[n][1])
             return zip_patterns_indexes[n][1], 40
           end
         end
@@ -338,9 +350,9 @@ local function get_csv_grammar()
 
     csv_grammar = lpeg.Cf(lpeg.Cc(0) * field * lpeg.P((lpeg.P(',') +
         lpeg.P('\t')) * field) ^ 1 * (lpeg.S '\r\n' + -1),
-        function(acc)
-          return acc + 1
-        end)
+      function(acc)
+        return acc + 1
+      end)
   end
 
   return csv_grammar
@@ -360,14 +372,14 @@ local function validate_csv(part, content, log_obj)
 
     if not ncommas then
       lua_util.debugm(N, log_obj, "not a csv line at line number %s",
-          matched_lines)
+        matched_lines)
       return false
     end
 
     if expected_commas and ncommas ~= expected_commas then
       -- Mismatched commas
       lua_util.debugm(N, log_obj, "missmatched commas on line %s: %s != %s",
-          matched_lines, ncommas, expected_commas)
+        matched_lines, ncommas, expected_commas)
       return false
     elseif not expected_commas then
       if ncommas == 0 then
@@ -385,7 +397,7 @@ local function validate_csv(part, content, log_obj)
   end
 
   lua_util.debugm(N, log_obj, "csv content is sane: %s fields; %s lines checked",
-      expected_commas, matched_lines)
+    expected_commas, matched_lines)
 
   return true
 end
@@ -460,7 +472,7 @@ exports.text_part_heuristic = function(part, log_obj, _)
     until i > tlen
 
     lua_util.debugm(N, log_obj, "text part check: %s printable, %s non-printable, %s total",
-        tlen - non_printable, non_printable, tlen)
+      tlen - non_printable, non_printable, tlen)
     if non_printable / tlen > 0.0078125 then
       return false
     end
@@ -509,7 +521,7 @@ exports.text_part_heuristic = function(part, log_obj, _)
           if ext then
             res[ext] = (res[ext] or 0) + weight * #positions
             lua_util.debugm(N, log_obj, "found txt pattern for %s: %s, total: %s; %s/%s announced",
-                ext, weight * #positions, res[ext], mtype, msubtype)
+              ext, weight * #positions, res[ext], mtype, msubtype)
           end
         end
 
index 4a5abd8ce963c5dc21758bea76573a21c12b7310..b51deab7f61d809267815426765008a13c079940 100644 (file)
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-]]--
+]] --
 
 --[[[
 -- @module lua_magic/patterns
@@ -255,6 +255,47 @@ local patterns = {
       },
     }
   },
+  zip = {
+    matches = {
+      {
+        hex = [[504b0304]], -- PK\x03\x04
+        relative_position = 0,
+        weight = 60,
+      },
+    }
+  },
+  rar = {
+    matches = {
+      {
+        hex = [[526172211a0700]], -- RAR4
+        relative_position = 0,
+        weight = 60,
+      },
+      {
+        hex = [[526172211a070100]], -- RAR5
+        relative_position = 0,
+        weight = 60,
+      },
+    }
+  },
+  ['7z'] = {
+    matches = {
+      {
+        hex = [[377abcaf271c]], -- 7z signature
+        relative_position = 0,
+        weight = 60,
+      },
+    }
+  },
+  gz = {
+    matches = {
+      {
+        string = [[^\x{1f}\x{8b}\x{08}]], -- gzip with deflate method
+        position = 3,
+        weight = 60,
+      },
+    }
+  },
   xar = {
     matches = {
       {
@@ -392,6 +433,32 @@ local patterns = {
       },
     }
   },
+  webp = {
+    matches = {
+      {
+        -- RIFF....WEBP
+        string = [[^RIFF....WEBP]],
+        position = 12,
+        weight = 60,
+      },
+    }
+  },
+  svg = {
+    matches = {
+      {
+        -- Case-insensitive <svg ...> in the first chunk
+        string = [[(?i)<svg\b]],
+        position = { '>=', 0 },
+        weight = 40,
+      },
+      {
+        -- XML prolog hints
+        string = [[<\?xml\b]],
+        position = { '>=', 0 },
+        weight = 20,
+      },
+    }
+  },
   -- Other
   pgp = {
     matches = {
index ad4ae4349e112abc90046d8489bf97ba26d37e0a..a005247b594aaaa32605e9db3259ecb8c59edf65 100644 (file)
@@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-]]--
+]] --
 
 --[[[
 -- @module lua_magic/patterns
@@ -284,6 +284,17 @@ local types = {
     ct = 'image/heic',
     av_check = false,
   },
+  webp = {
+    type = 'image',
+    ct = 'image/webp',
+    av_check = false,
+  },
+  svg = {
+    type = 'image',
+    ct = 'image/svg+xml',
+    av_check = false,
+    no_text = true,
+  },
   dwg = {
     type = 'image',
     ct = 'image/vnd.dwg',
index c40c0e88a1163f5e313731409031482b85d4dc38..b02a659e2e9c96b447c3b4b6415946f76a6bbd8e 100644 (file)
@@ -1777,8 +1777,7 @@ rspamd_archive_process_7zip(struct rspamd_task *task,
                return;
        }
 
-       while ((p = rspamd_7zip_read_next_section(task, p, end, arch, part)) != NULL)
-               ;
+       while ((p = rspamd_7zip_read_next_section(task, p, end, arch, part)) != NULL);
 
        part->part_type = RSPAMD_MIME_PART_ARCHIVE;
        part->specific.arch = arch;
@@ -2026,44 +2025,37 @@ void rspamd_archives_process(struct rspamd_task *task)
 {
        unsigned int i;
        struct rspamd_mime_part *part;
-       const unsigned char rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
-       const unsigned char zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
-       const unsigned char sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
-       const unsigned char gz_magic[] = {0x1F, 0x8B, 0x08};
 
        PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
        {
-               if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
-                       if (part->parsed_data.len > 0) {
-                               if (rspamd_archive_cheat_detect(part, "zip",
-                                                                                               zip_magic, sizeof(zip_magic))) {
+               if (part->parsed_data.len > 0 && part->part_type != RSPAMD_MIME_PART_ARCHIVE) {
+                       const char *ext = part->detected_ext;
+                       if (ext) {
+                               if (g_ascii_strcasecmp(ext, "zip") == 0) {
                                        rspamd_archive_process_zip(task, part);
                                }
-                               else if (rspamd_archive_cheat_detect(part, "rar",
-                                                                                                        rar_magic, sizeof(rar_magic))) {
+                               else if (g_ascii_strcasecmp(ext, "rar") == 0) {
                                        rspamd_archive_process_rar(task, part);
                                }
-                               else if (rspamd_archive_cheat_detect(part, "7z",
-                                                                                                        sz_magic, sizeof(sz_magic))) {
+                               else if (g_ascii_strcasecmp(ext, "7z") == 0) {
                                        rspamd_archive_process_7zip(task, part);
                                }
-                               else if (rspamd_archive_cheat_detect(part, "gz",
-                                                                                                        gz_magic, sizeof(gz_magic))) {
+                               else if (g_ascii_strcasecmp(ext, "gz") == 0) {
                                        rspamd_archive_process_gzip(task, part);
                                }
+                       }
 
-                               if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
-                                       part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
-                                       part->specific.arch) {
-                                       struct rspamd_archive *arch = part->specific.arch;
+                       if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
+                               part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
+                               part->specific.arch) {
+                               struct rspamd_archive *arch = part->specific.arch;
 
-                                       msg_info_task("found %s archive with incorrect content-type: %T/%T",
-                                                                 rspamd_archive_type_str(arch->type),
-                                                                 &part->ct->type, &part->ct->subtype);
+                               msg_info_task("found %s archive with incorrect content-type: %T/%T",
+                                                         rspamd_archive_type_str(arch->type),
+                                                         &part->ct->type, &part->ct->subtype);
 
-                                       if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
-                                               part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
-                                       }
+                               if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
+                                       part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
                                }
                        }
                }
index 1078baba683cdbf568fa8652a44a65c04112efe8..d137311b108af9844e7057403116376792ccc08e 100644 (file)
@@ -600,27 +600,47 @@ static bool
 process_image(struct rspamd_task *task, struct rspamd_mime_part *part)
 {
        struct rspamd_image *img;
+       const char *ext = part->detected_ext;
 
-       img = rspamd_maybe_process_image(task->task_pool, &part->parsed_data);
-
-       if (img != NULL) {
-               msg_debug_images("detected %s image of size %ud x %ud",
-                                                rspamd_image_type_str(img->type),
-                                                img->width, img->height);
-
-               if (part->cd) {
-                       img->filename = &part->cd->filename;
+       if (ext != NULL && part->parsed_data.len > 0) {
+               /* Prefer Lua Magic decision; do not re-detect by magic */
+               if (g_ascii_strcasecmp(ext, "png") == 0) {
+                       img = process_png_image(task->task_pool, &part->parsed_data);
                }
+               else if (g_ascii_strcasecmp(ext, "jpg") == 0 || g_ascii_strcasecmp(ext, "jpeg") == 0) {
+                       img = process_jpg_image(task->task_pool, &part->parsed_data);
+               }
+               else if (g_ascii_strcasecmp(ext, "gif") == 0) {
+                       img = process_gif_image(task->task_pool, &part->parsed_data);
+               }
+               else if (g_ascii_strcasecmp(ext, "bmp") == 0) {
+                       img = process_bmp_image(task->task_pool, &part->parsed_data);
+               }
+               else {
+                       /* Unsupported image subtype for structural parsing; skip without re-magic */
+                       return false;
+               }
+       }
+       else {
+               /* Fallback for legacy/unknown cases */
+               img = rspamd_maybe_process_image(task->task_pool, &part->parsed_data);
+       }
 
-               img->parent = part;
-
-               part->part_type = RSPAMD_MIME_PART_IMAGE;
-               part->specific.img = img;
+       if (img == NULL) {
+               return false;
+       }
 
-               return true;
+       img->parent = part;
+       if (part->cd) {
+               img->filename = &part->cd->filename;
        }
 
-       return false;
+       part->specific.img = img;
+       part->part_type = RSPAMD_MIME_PART_IMAGE;
+       if (part->cd == NULL) {
+               part->cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->cd));
+       }
+       return true;
 }
 
 const char *
@@ -715,4 +735,4 @@ void rspamd_images_link(struct rspamd_task *task)
                        rspamd_image_process_part(task, part);
                }
        }
-}
\ No newline at end of file
+}
index 61f675d075f19abc8b95da59e2d3cca090a7499d..21b54e7ec92ccea2d5e0f6b2e31fe36fb7b98de3 100644 (file)
@@ -1447,6 +1447,73 @@ void rspamd_message_process(struct rspamd_task *task)
 
        rspamd_archives_process(task);
 
+       /* Second pass: fill detected_* for parts not decided during parsing */
+       if (L && task->cfg->mime_parser_cfg &&
+               rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg) != -1) {
+               unsigned int j;
+               struct rspamd_mime_part *pp;
+               PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), j, pp)
+               {
+                       if (pp->parsed_data.len > 0 &&
+                               (/* no detection yet */ (pp->detected_type == NULL && pp->detected_ext == NULL) ||
+                                /* refine generic archives */
+                                (pp->detected_ext && (g_ascii_strcasecmp(pp->detected_ext, "zip") == 0 ||
+                                                                          g_ascii_strcasecmp(pp->detected_ext, "rar") == 0 ||
+                                                                          g_ascii_strcasecmp(pp->detected_ext, "7z") == 0 ||
+                                                                          g_ascii_strcasecmp(pp->detected_ext, "gz") == 0)))) {
+                               struct rspamd_mime_part **pmime;
+                               struct rspamd_task **ptask;
+                               lua_pushcfunction(L, &rspamd_lua_traceback);
+                               int err_idx2 = lua_gettop(L);
+                               lua_rawgeti(L, LUA_REGISTRYINDEX, rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg));
+                               pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+                               rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
+                               *pmime = pp;
+                               ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+                               rspamd_lua_setclass(L, rspamd_task_classname, -1);
+                               *ptask = task;
+
+                               if (lua_pcall(L, 2, 2, err_idx2) == 0) {
+                                       if (lua_istable(L, -1)) {
+                                               const char *mb;
+                                               if (lua_isstring(L, -2)) {
+                                                       pp->detected_ext = rspamd_mempool_strdup(task->task_pool, lua_tostring(L, -2));
+                                               }
+                                               lua_pushstring(L, "ct");
+                                               lua_gettable(L, -2);
+                                               if (lua_isstring(L, -1)) {
+                                                       mb = lua_tostring(L, -1);
+                                                       if (mb) {
+                                                               rspamd_ftok_t srch;
+                                                               srch.begin = mb;
+                                                               srch.len = strlen(mb);
+                                                               pp->detected_ct = rspamd_content_type_parse(srch.begin, srch.len, task->task_pool);
+                                                       }
+                                               }
+                                               lua_pop(L, 1);
+                                               lua_pushstring(L, "type");
+                                               lua_gettable(L, -2);
+                                               if (lua_isstring(L, -1)) {
+                                                       pp->detected_type = rspamd_mempool_strdup(task->task_pool, lua_tostring(L, -1));
+                                               }
+                                               lua_pop(L, 1);
+                                               lua_pushstring(L, "no_text");
+                                               lua_gettable(L, -2);
+                                               if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) {
+                                                       pp->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
+                                               }
+                                               lua_pop(L, 1);
+                                       }
+                               }
+                               else {
+                                       msg_err_task("second-pass detect type: %s", lua_tostring(L, -1));
+                               }
+                               /* restore stack */
+                               lua_settop(L, 0);
+                       }
+               }
+       }
+
        if (L) {
                old_top = lua_gettop(L);
        }
index d66731dd6b01d15f8b20948310c308b05afe6c53..075fec347d02c7b2ce2c11101928011d6ac086bb 100644 (file)
@@ -63,16 +63,43 @@ rspamd_mime_parser_init_shared(struct rspamd_config *cfg)
                        if (rspamd_lua_require_function(cfg->mime_parser_cfg->L, "lua_magic", "detect_mime_part")) {
                                cfg->mime_parser_cfg->lua_magic_detect_cbref = luaL_ref(cfg->mime_parser_cfg->L, LUA_REGISTRYINDEX);
                        }
+                       else {
+                               msg_err("fatal error: cannot load lua_magic.detect_mime_part (see previous errors)");
+                               lua_settop(cfg->mime_parser_cfg->L, old_top);
+                               g_abort();
+                       }
                        lua_settop(cfg->mime_parser_cfg->L, old_top);
                }
+               else if (!cfg->mime_parser_cfg->L) {
+                       msg_err("fatal error: lua state is not initialised for mime parser");
+                       g_abort();
+               }
        }
 
        return cfg->mime_parser_cfg;
 }
 
-void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *unused)
+void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *cfg)
 {
-       /* noop: lifetime tied to process */
+       if (cfg == NULL) {
+               return;
+       }
+
+       /* Unref Lua callback if registered */
+       if (cfg->L && cfg->lua_magic_detect_cbref != -1) {
+               int old_top = lua_gettop(cfg->L);
+               luaL_unref(cfg->L, LUA_REGISTRYINDEX, cfg->lua_magic_detect_cbref);
+               cfg->lua_magic_detect_cbref = -1;
+               lua_settop(cfg->L, old_top);
+       }
+
+       /* Destroy multipattern */
+       if (cfg->mp_boundary) {
+               rspamd_multipattern_destroy(cfg->mp_boundary);
+               cfg->mp_boundary = NULL;
+       }
+
+       g_free(cfg);
 }
 
 int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg)
@@ -918,6 +945,7 @@ rspamd_mime_maybe_detect_type(struct rspamd_task *task,
 
        if (L && task->cfg->mime_parser_cfg &&
                rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg) != -1) {
+               msg_debug_mime("will call lua_magic.detect_mime_part for part #%ud", npart->part_number);
                old_top = lua_gettop(L);
                lua_pushcfunction(L, &rspamd_lua_traceback);
                err_idx = lua_gettop(L);
@@ -1007,6 +1035,14 @@ rspamd_mime_maybe_detect_type(struct rspamd_task *task,
 
                lua_settop(L, old_top);
        }
+       else {
+               int cbref = -1;
+               if (task->cfg && task->cfg->mime_parser_cfg) {
+                       cbref = rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg);
+               }
+               msg_debug_mime("skip lua_magic for part #%ud: L=%p, cbref=%d",
+                                          npart->part_number, (void *) L, cbref);
+       }
 
        /* Fallback: if nothing detected but declared CT is text, set detected_type to text */
        if (npart->detected_type == NULL && npart->ct &&
index 6ed175dc8b8a400c7a61eb0616c4831eb297c9ea..38175256cc1dec264262a8adc4ac620ff61f988b 100644 (file)
@@ -22,6 +22,10 @@ struct rspamd_config;
 
 struct rspamd_mime_parser_config;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Initialize shared mime parser config (stores Lua refs, precompiled data) */
 struct rspamd_mime_parser_config *rspamd_mime_parser_init_shared(struct rspamd_config *cfg);
 void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *cfg);
@@ -29,10 +33,6 @@ void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *cfg);
 /* Accessors */
 int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg);
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct rspamd_task;
 struct rspamd_mime_part;
 
index c22a9b877bf7408ff5edd0dbf39f14021efca7b4..1e96c320af4343d9716b047a9eaaedb708beb7ae 100644 (file)
@@ -21,6 +21,7 @@
 #include "cfg_file.h"
 #include "rspamd.h"
 #include "cfg_file_private.h"
+#include "libmime/mime_parser.h"
 
 #include "maps/map.h"
 #include "maps/map_helpers.h"
@@ -383,6 +384,12 @@ void rspamd_config_free(struct rspamd_config *cfg)
                luaL_unref(RSPAMD_LUA_CFG_STATE(cfg), LUA_REGISTRYINDEX, sc->cbref);
        }
 
+       /* Free mime parser shared config if created */
+       if (cfg->mime_parser_cfg) {
+               rspamd_mime_parser_free_shared(cfg->mime_parser_cfg);
+               cfg->mime_parser_cfg = nullptr;
+       }
+
        DL_FOREACH_SAFE(cfg->setting_ids, set, stmp)
        {
                REF_RELEASE(set);