From: Vsevolod Stakhov Date: Mon, 15 Sep 2025 10:06:14 +0000 (+0100) Subject: [Feature] Detect part types in mime parser X-Git-Tag: 3.13.0~7^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=476ff023a25c3b8458e117ed0ad596b3a9097667;p=thirdparty%2Frspamd.git [Feature] Detect part types in mime parser --- diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua index 38bfddbf29..cef1ddcea8 100644 --- a/lualib/lua_magic/init.lua +++ b/lualib/lua_magic/init.lua @@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -]]-- +]] -- --[[[ -- @module lua_magic @@ -57,17 +57,17 @@ local function process_patterns(log_obj) end lua_util.debugm(N, log_obj, 'add tail pattern %s for ext %s', - str, pattern.ext) + str, pattern.ext) elseif match.position < short_match_limit then short_patterns[#short_patterns + 1] = { str, match, pattern } if str:sub(1, 1) == '^' then lua_util.debugm(N, log_obj, 'add head pattern %s for ext %s', - str, pattern.ext) + str, pattern.ext) else lua_util.debugm(N, log_obj, 'add short pattern %s for ext %s', - str, pattern.ext) + str, pattern.ext) end if max_short_offset < match.position then @@ -79,7 +79,7 @@ local function process_patterns(log_obj) } lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s', - str, pattern.ext) + str, pattern.ext) end else processed_patterns[#processed_patterns + 1] = { @@ -87,7 +87,7 @@ local function process_patterns(log_obj) } lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s', - str, pattern.ext) + str, pattern.ext) end end @@ -133,25 +133,25 @@ local function process_patterns(log_obj) fun.map(function(t) return t[1] end, processed_patterns)), - compile_flags + compile_flags ) compiled_short_patterns = rspamd_trie.create(fun.totable( fun.map(function(t) return t[1] end, short_patterns)), - compile_flags + compile_flags ) compiled_tail_patterns = rspamd_trie.create(fun.totable( fun.map(function(t) return t[1] end, tail_patterns)), - compile_flags + compile_flags ) lua_util.debugm(N, log_obj, - 'compiled %s (%s short; %s long; %s tail) patterns', - #processed_patterns + #short_patterns + #tail_patterns, - #short_patterns, #processed_patterns, #tail_patterns) + 'compiled %s (%s short; %s long; %s tail) patterns', + #processed_patterns + #short_patterns + #tail_patterns, + #short_patterns, #processed_patterns, #tail_patterns) end end @@ -173,7 +173,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_ end lua_util.debugm(N, log_obj, 'add pattern for %s, weight %s, total weight %s', - ext, weight, res[ext]) + ext, weight, res[ext]) end local function match_position(pos, expected) @@ -224,7 +224,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_ for _, pos in ipairs(matched_positions) do lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)', - pattern.ext, pos, offset) + pattern.ext, pos, offset) if match_position(pos + offset, position) then if match.heuristic then local ext, weight = match.heuristic(input, log_obj, pos + offset, part) @@ -247,7 +247,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_ local matched = false for _, pos in ipairs(matched_positions) do lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)', - pattern.ext, pos, offset) + pattern.ext, pos, offset) if not match_position(pos + offset, position) then matched = true matched_pos = pos @@ -275,7 +275,6 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_ end end end - end local function process_detected(res) @@ -312,13 +311,13 @@ exports.detect = function(part, log_obj) if inplen > min_tail_offset then local tail = input:span(inplen - min_tail_offset, min_tail_offset) match_chunk(tail, input, inplen, inplen - min_tail_offset, - compiled_tail_patterns, tail_patterns, log_obj, res, part) + compiled_tail_patterns, tail_patterns, log_obj, res, part) end -- Try short match local head = input:span(1, math.min(max_short_offset, inplen)) match_chunk(head, input, inplen, 0, - compiled_short_patterns, short_patterns, log_obj, res, part) + compiled_short_patterns, short_patterns, log_obj, res, part) -- Check if we have enough data or go to long patterns local extensions, confidence = process_detected(res) @@ -332,17 +331,17 @@ exports.detect = function(part, log_obj) if #input > exports.chunk_size * 3 then -- Chunked version as input is too long local chunk1, chunk2 = input:span(1, exports.chunk_size * 2), - input:span(inplen - exports.chunk_size, exports.chunk_size) + input:span(inplen - exports.chunk_size, exports.chunk_size) local offset1, offset2 = 0, inplen - exports.chunk_size match_chunk(chunk1, input, inplen, - offset1, compiled_patterns, processed_patterns, log_obj, res, part) + offset1, compiled_patterns, processed_patterns, log_obj, res, part) match_chunk(chunk2, input, inplen, - offset2, compiled_patterns, processed_patterns, log_obj, res, part) + offset2, compiled_patterns, processed_patterns, log_obj, res, part) else -- Input is short enough to match it at all match_chunk(input, input, inplen, 0, - compiled_patterns, processed_patterns, log_obj, res, part) + compiled_patterns, processed_patterns, log_obj, res, part) end else -- Table input is NYI @@ -372,6 +371,18 @@ exports.detect_mime_part = function(part, log_obj) return ext, types[ext] end + -- Fallback by filename extension (e.g. .eml attachments with generic content-type) + local fname + if part.get_filename then + fname = part:get_filename() + end + if type(fname) == 'string' then + local lfn = fname:lower() + if #lfn > 4 and lfn:sub(-4) == '.eml' then + return 'eml', types['eml'] + end + end + -- Text/html and other parts ext, weight = heuristics.text_part_heuristic(part, log_obj) if ext and weight and weight > 20 then @@ -385,4 +396,4 @@ exports.chunk_size = 32768 exports.types = types -return exports \ No newline at end of file +return exports diff --git a/src/libmime/message.c b/src/libmime/message.c index 8442c80ac8..cba061d829 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -36,12 +36,12 @@ #include #include "sodium.h" #include "libserver/cfg_file_private.h" -#include "lua/lua_common.h" +#define RSPAMD_TOKENIZER_INTERNAL #include "contrib/uthash/utlist.h" #include "contrib/t1ha/t1ha.h" -#include "received.h" -#define RSPAMD_TOKENIZER_INTERNAL +#include "mime_parser.h" #include "libstat/tokenizers/custom_tokenizer.h" +#include "received.h" #define GTUBE_SYMBOL "GTUBE" @@ -989,8 +989,38 @@ rspamd_message_from_data(struct rspamd_task *task, const unsigned char *start, else if (task->cfg && task->cfg->libs_ctx) { lua_State *L = task->cfg->lua_state; - if (rspamd_lua_require_function(L, - "lua_magic", "detect_mime_part")) { + if (task->cfg->mime_parser_cfg && + rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg) != -1) { + struct rspamd_mime_part **pmime; + struct rspamd_task **ptask; + + lua_rawgeti(L, LUA_REGISTRYINDEX, rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg)); + pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); + rspamd_lua_setclass(L, rspamd_mimepart_classname, -1); + *pmime = part; + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); + rspamd_lua_setclass(L, rspamd_task_classname, -1); + *ptask = task; + + if (lua_pcall(L, 2, 2, 0) != 0) { + msg_err_task("cannot detect type: %s", lua_tostring(L, -1)); + } + else { + if (lua_istable(L, -1)) { + lua_pushstring(L, "ct"); + lua_gettable(L, -2); + + if (lua_isstring(L, -1)) { + mb = rspamd_mempool_strdup(task->task_pool, + lua_tostring(L, -1)); + } + } + } + + lua_settop(L, 0); + } + else if (rspamd_lua_require_function(L, + "lua_magic", "detect_mime_part")) { struct rspamd_mime_part **pmime; struct rspamd_task **ptask; @@ -1405,7 +1435,7 @@ void rspamd_message_process(struct rspamd_task *task) unsigned int tw, *ptw, dw; struct rspamd_mime_part *part; lua_State *L = NULL; - int magic_func_pos = -1, content_func_pos = -1, old_top = -1, funcs_top = -1; + int content_func_pos = -1, old_top = -1, funcs_top = -1; if (task->cfg) { L = task->cfg->lua_state; @@ -1417,13 +1447,7 @@ void rspamd_message_process(struct rspamd_task *task) old_top = lua_gettop(L); } - if (L && rspamd_lua_require_function(L, - "lua_magic", "detect_mime_part")) { - magic_func_pos = lua_gettop(L); - } - else { - msg_err_task("cannot require lua_magic.detect_mime_part"); - } + /* lua_magic is preloaded by mime parser init; do not require here */ if (L && rspamd_lua_require_function(L, "lua_content", "maybe_process_mime_part")) { @@ -1441,75 +1465,7 @@ void rspamd_message_process(struct rspamd_task *task) PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) { - if (magic_func_pos != -1 && part->parsed_data.len > 0) { - struct rspamd_mime_part **pmime; - struct rspamd_task **ptask; - - lua_pushcfunction(L, &rspamd_lua_traceback); - int err_idx = lua_gettop(L); - lua_pushvalue(L, magic_func_pos); - pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); - rspamd_lua_setclass(L, rspamd_mimepart_classname, -1); - *pmime = part; - ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); - rspamd_lua_setclass(L, rspamd_task_classname, -1); - *ptask = task; - - if (lua_pcall(L, 2, 2, err_idx) != 0) { - msg_err_task("cannot detect type: %s", lua_tostring(L, -1)); - } - else { - if (lua_istable(L, -1)) { - const char *mb; - - /* First returned value */ - part->detected_ext = rspamd_mempool_strdup(task->task_pool, - lua_tostring(L, -2)); - - lua_pushstring(L, "ct"); - lua_gettable(L, -2); - - if (lua_isstring(L, -1)) { - mb = lua_tostring(L, -1); - - if (mb) { - rspamd_ftok_t srch; - - srch.begin = mb; - srch.len = strlen(mb); - part->detected_ct = rspamd_content_type_parse(srch.begin, - srch.len, - task->task_pool); - } - } - - lua_pop(L, 1); - - lua_pushstring(L, "type"); - lua_gettable(L, -2); - - if (lua_isstring(L, -1)) { - part->detected_type = rspamd_mempool_strdup(task->task_pool, - lua_tostring(L, -1)); - } - - lua_pop(L, 1); - - lua_pushstring(L, "no_text"); - lua_gettable(L, -2); - - if (lua_isboolean(L, -1)) { - if (!!lua_toboolean(L, -1)) { - part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION; - } - } - - lua_pop(L, 1); - } - } - - lua_settop(L, funcs_top); - } + /* detected_* are already set by mime_parser; no extra lua_magic call here */ /* Now detect content */ if (content_func_pos != -1 && part->parsed_data.len > 0 && diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 1fe8b86e35..751cc1ee04 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -23,17 +23,69 @@ #include "multipattern.h" #include "contrib/libottery/ottery.h" #include "contrib/uthash/utlist.h" +#include "lua/lua_common.h" +#include "lua/lua_classnames.h" #include #include #include "rspamd_simdutf.h" -struct rspamd_mime_parser_lib_ctx { +struct rspamd_mime_parser_config { struct rspamd_multipattern *mp_boundary; unsigned char hkey[rspamd_cryptobox_SIPKEYBYTES]; /* Key for hashing */ unsigned int key_usages; + int lua_magic_detect_cbref; + lua_State *L; }; -struct rspamd_mime_parser_lib_ctx *lib_ctx = NULL; +static struct rspamd_mime_parser_config *mime_parser_cfg = NULL; + +struct rspamd_mime_parser_config * +rspamd_mime_parser_init_shared(struct rspamd_config *cfg) +{ + if (mime_parser_cfg == NULL) { + mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg)); + mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT); + g_assert(mime_parser_cfg->mp_boundary != NULL); + rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0); + rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0); + + GError *err = NULL; + if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) { + msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err); + g_error_free(err); + g_abort(); + } + ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey)); + mime_parser_cfg->key_usages = 0; + mime_parser_cfg->lua_magic_detect_cbref = -1; + } + + mime_parser_cfg->L = (lua_State *) cfg->lua_state; + + if (mime_parser_cfg->L && mime_parser_cfg->lua_magic_detect_cbref == -1) { + int old_top = lua_gettop(mime_parser_cfg->L); + if (rspamd_lua_require_function(mime_parser_cfg->L, "lua_magic", "detect_mime_part")) { + mime_parser_cfg->lua_magic_detect_cbref = luaL_ref(mime_parser_cfg->L, LUA_REGISTRYINDEX); + } + lua_settop(mime_parser_cfg->L, old_top); + } + + cfg->mime_parser_cfg = mime_parser_cfg; + return mime_parser_cfg; +} + +void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *unused) +{ + /* noop: lifetime tied to process */ +} + +int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg) +{ + if (cfg) { + return cfg->lua_magic_detect_cbref; + } + return -1; +} static const unsigned int max_nested = 64; static const unsigned int max_key_usages = 10000; @@ -56,7 +108,7 @@ struct rspamd_mime_boundary { int flags; }; -struct rspamd_mime_parser_ctx { +struct rspamd_mime_parser_runtime { GPtrArray *stack; /* Stack of parts */ GArray *boundaries; /* Boundaries found in the whole message */ const char *start; @@ -69,23 +121,23 @@ struct rspamd_mime_parser_ctx { static enum rspamd_mime_parse_error rspamd_mime_parse_multipart_part(struct rspamd_task *task, struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, GError **err); static enum rspamd_mime_parse_error rspamd_mime_parse_message(struct rspamd_task *task, struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, GError **err); static enum rspamd_mime_parse_error rspamd_mime_parse_normal_part(struct rspamd_task *task, struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, struct rspamd_content_type *ct, GError **err); static enum rspamd_mime_parse_error rspamd_mime_process_multipart_node(struct rspamd_task *task, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, struct rspamd_mime_part *multipart, const char *start, const char *end, gboolean is_finished, @@ -162,19 +214,22 @@ rspamd_cte_from_string(const char *str) static void rspamd_mime_parser_init_lib(void) { - lib_ctx = g_malloc0(sizeof(*lib_ctx)); - lib_ctx->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT); - g_assert(lib_ctx->mp_boundary != NULL); - rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\r--", 0); - rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\n--", 0); + mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg)); + mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT); + g_assert(mime_parser_cfg->mp_boundary != NULL); + rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0); + rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0); GError *err = NULL; - if (!rspamd_multipattern_compile(lib_ctx->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) { + if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) { msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err); g_error_free(err); g_abort(); } - ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey)); + ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey)); + mime_parser_cfg->key_usages = 0; + mime_parser_cfg->L = NULL; + mime_parser_cfg->lua_magic_detect_cbref = -1; } static enum rspamd_cte @@ -398,7 +453,8 @@ rspamd_mime_part_get_cte(struct rspamd_task *task, enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN; gboolean parent_propagated = FALSE; - hdr = rspamd_message_get_header_from_hash(hdrs, "Content-Transfer-Encoding", FALSE); + hdr = rspamd_message_get_header_from_hash(hdrs, + "Content-Transfer-Encoding", FALSE); if (hdr == NULL) { if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN && @@ -648,7 +704,7 @@ void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part) static enum rspamd_mime_parse_error rspamd_mime_parse_normal_part(struct rspamd_task *task, struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, struct rspamd_content_type *ct, GError **err) { @@ -845,10 +901,11 @@ rspamd_mime_parse_normal_part(struct rspamd_task *task, return RSPAMD_MIME_PARSE_OK; } + struct rspamd_mime_multipart_cbdata { struct rspamd_task *task; struct rspamd_mime_part *multipart; - struct rspamd_mime_parser_ctx *st; + struct rspamd_mime_parser_runtime *st; const char *part_start; rspamd_ftok_t *cur_boundary; uint64_t bhash; @@ -857,7 +914,7 @@ struct rspamd_mime_multipart_cbdata { static enum rspamd_mime_parse_error rspamd_mime_process_multipart_node(struct rspamd_task *task, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, struct rspamd_mime_part *multipart, const char *start, const char *end, gboolean is_finished, @@ -996,7 +1053,123 @@ rspamd_mime_process_multipart_node(struct rspamd_task *task, } } else { + /* First, decode the part normally */ ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err); + + if (ret == RSPAMD_MIME_PARSE_OK) { + /* Ask lua_magic if this is a message (e.g. .eml) */ + lua_State *L = NULL; + int old_top = -1, err_idx; + gboolean promote_to_message = FALSE; + + if (task->cfg) { + L = task->cfg->lua_state; + } + + if (L) { + old_top = lua_gettop(L); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + + if (task->cfg->mime_parser_cfg && task->cfg->mime_parser_cfg->lua_magic_detect_cbref != -1) { + lua_rawgeti(L, LUA_REGISTRYINDEX, task->cfg->mime_parser_cfg->lua_magic_detect_cbref); + struct rspamd_mime_part **pmime; + struct rspamd_task **ptask; + + pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); + rspamd_lua_setclass(L, rspamd_mimepart_classname, -1); + *pmime = npart; + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); + rspamd_lua_setclass(L, rspamd_task_classname, -1); + *ptask = task; + + if (lua_pcall(L, 2, 2, err_idx) != 0) { + msg_err_task("cannot detect type (lua_magic): %s", lua_tostring(L, -1)); + } + else { + /* Stack: [traceback][ext][table] */ + if (lua_istable(L, -1)) { + /* Fill detected_ext */ + if (lua_isstring(L, -2)) { + npart->detected_ext = rspamd_mempool_strdup(task->task_pool, + lua_tostring(L, -2)); + } + + /* detected_ct */ + lua_pushstring(L, "ct"); + lua_gettable(L, -2); + + if (lua_isstring(L, -1)) { + const char *mb = lua_tostring(L, -1); + + if (mb) { + rspamd_ftok_t srch; + + srch.begin = mb; + srch.len = strlen(mb); + npart->detected_ct = rspamd_content_type_parse(srch.begin, + srch.len, + task->task_pool); + } + } + + lua_pop(L, 1); + + /* detected_type and promotion */ + lua_pushstring(L, "type"); + lua_gettable(L, -2); + + if (lua_isstring(L, -1)) { + const char *t = lua_tostring(L, -1); + if (t) { + npart->detected_type = rspamd_mempool_strdup(task->task_pool, t); + if (strcmp(t, "message") == 0) { + promote_to_message = TRUE; + } + } + } + + lua_pop(L, 1); + + /* no_text flag */ + lua_pushstring(L, "no_text"); + lua_gettable(L, -2); + + if (lua_isboolean(L, -1)) { + if (!!lua_toboolean(L, -1)) { + npart->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION; + } + } + + lua_pop(L, 1); + + /* ext fallback for promotion */ + if (!promote_to_message && lua_isstring(L, -2)) { + const char *ext = lua_tostring(L, -2); + if (ext && g_ascii_strcasecmp(ext, "eml") == 0) { + promote_to_message = TRUE; + } + } + } + } + + /* Clean stack */ + lua_settop(L, old_top); + } + else { + /* Pop traceback */ + lua_settop(L, old_top); + } + } + + if (promote_to_message) { + msg_debug_mime("treat part as embedded message (lua_magic)"); + st->nesting++; + g_ptr_array_add(st->stack, npart); + npart->part_type = RSPAMD_MIME_PART_MESSAGE; + ret = rspamd_mime_parse_message(task, npart, st, err); + } + } } return ret; @@ -1005,7 +1178,7 @@ rspamd_mime_process_multipart_node(struct rspamd_task *task, static enum rspamd_mime_parse_error rspamd_mime_parse_multipart_cb(struct rspamd_task *task, struct rspamd_mime_part *multipart, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, struct rspamd_mime_multipart_cbdata *cb, struct rspamd_mime_boundary *b) { @@ -1048,7 +1221,7 @@ rspamd_mime_parse_multipart_cb(struct rspamd_task *task, static enum rspamd_mime_parse_error rspamd_multipart_boundaries_filter(struct rspamd_task *task, struct rspamd_mime_part *multipart, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, struct rspamd_mime_multipart_cbdata *cb) { struct rspamd_mime_boundary *cur; @@ -1162,7 +1335,7 @@ rspamd_multipart_boundaries_filter(struct rspamd_task *task, static enum rspamd_mime_parse_error rspamd_mime_parse_multipart_part(struct rspamd_task *task, struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, GError **err) { struct rspamd_mime_multipart_cbdata cbdata; @@ -1192,7 +1365,7 @@ rspamd_mime_parse_multipart_part(struct rspamd_task *task, cbdata.cur_boundary = &part->ct->boundary; rspamd_cryptobox_siphash((unsigned char *) &cbdata.bhash, cbdata.cur_boundary->begin, cbdata.cur_boundary->len, - lib_ctx->hkey); + mime_parser_cfg->hkey); msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash); } else { @@ -1223,7 +1396,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp, gsize blen; gboolean closing = FALSE; struct rspamd_mime_boundary b; - struct rspamd_mime_parser_ctx *st = context; + struct rspamd_mime_parser_runtime *st = context; struct rspamd_task *task; task = st->task; @@ -1307,7 +1480,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp, } rspamd_cryptobox_siphash((unsigned char *) &b.hash, lc_copy, blen, - lib_ctx->hkey); + mime_parser_cfg->hkey); msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset", (int) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start); @@ -1315,7 +1488,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp, b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED; rspamd_cryptobox_siphash((unsigned char *) &b.closed_hash, lc_copy, blen + 2, - lib_ctx->hkey); + mime_parser_cfg->hkey); msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset", (int) blen + 2, lc_copy, b.closed_hash, @@ -1406,17 +1579,17 @@ end: static void rspamd_mime_preprocess_message(struct rspamd_task *task, struct rspamd_mime_part *top, - struct rspamd_mime_parser_ctx *st) + struct rspamd_mime_parser_runtime *st) { if (top->raw_data.begin >= st->pos) { - rspamd_multipattern_lookup(lib_ctx->mp_boundary, + rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary, top->raw_data.begin - 1, top->raw_data.len + 1, rspamd_mime_preprocess_cb, st, NULL); } else { - rspamd_multipattern_lookup(lib_ctx->mp_boundary, + rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary, st->pos, st->end - st->pos, rspamd_mime_preprocess_cb, st, NULL); @@ -1424,7 +1597,7 @@ rspamd_mime_preprocess_message(struct rspamd_task *task, } static void -rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st) +rspamd_mime_parse_stack_free(struct rspamd_mime_parser_runtime *st) { if (st) { g_ptr_array_free(st->stack, TRUE); @@ -1436,7 +1609,7 @@ rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st) static enum rspamd_mime_parse_error rspamd_mime_parse_message(struct rspamd_task *task, struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_parser_runtime *st, GError **err) { struct rspamd_content_type *ct, *sel = NULL; @@ -1448,7 +1621,7 @@ rspamd_mime_parse_message(struct rspamd_task *task, unsigned int i; enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK; GString str; - struct rspamd_mime_parser_ctx *nst = st; + struct rspamd_mime_parser_runtime *nst = st; if (st->nesting > max_nested) { g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", @@ -1732,17 +1905,17 @@ rspamd_mime_parse_message(struct rspamd_task *task, enum rspamd_mime_parse_error rspamd_mime_parse_task(struct rspamd_task *task, GError **err) { - struct rspamd_mime_parser_ctx *st; + struct rspamd_mime_parser_runtime *st; enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK; - if (lib_ctx == NULL) { - rspamd_mime_parser_init_lib(); + if (mime_parser_cfg == NULL) { + rspamd_mime_parser_init_shared(task->cfg); } - if (++lib_ctx->key_usages > max_key_usages) { + if (++mime_parser_cfg->key_usages > max_key_usages) { /* Regenerate siphash key */ - ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey)); - lib_ctx->key_usages = 0; + ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey)); + mime_parser_cfg->key_usages = 0; } st = g_malloc0(sizeof(*st)); diff --git a/src/libmime/mime_parser.h b/src/libmime/mime_parser.h index aa77b2b30b..6ed175dc8b 100644 --- a/src/libmime/mime_parser.h +++ b/src/libmime/mime_parser.h @@ -18,6 +18,16 @@ #include "config.h" +struct rspamd_config; + +struct rspamd_mime_parser_config; + +/* Initialize shared mime parser config (stores Lua refs, precompiled data) */ +struct rspamd_mime_parser_config *rspamd_mime_parser_init_shared(struct rspamd_config *cfg); +void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *cfg); + +/* Accessors */ +int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg); #ifdef __cplusplus extern "C" { diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 76062e9b1e..32168c754c 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -49,6 +49,7 @@ struct rspamd_external_libs_ctx; struct rspamd_cryptobox_pubkey; struct rspamd_dns_resolver; struct rspamd_tokenizer_manager; +struct rspamd_mime_parser_config; /** * Logging type @@ -490,7 +491,8 @@ struct rspamd_config { struct rspamd_monitored_ctx *monitored_ctx; /**< context for monitored resources */ void *redis_pool; /**< redis connection pool */ - struct rspamd_re_cache *re_cache; /**< static regexp cache */ + struct rspamd_re_cache *re_cache; /**< static regexp cache */ + struct rspamd_mime_parser_config *mime_parser_cfg; /**< mime parser shared config */ GHashTable *trusted_keys; /**< list of trusted public keys */