/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
lua_State *L;
};
-static struct rspamd_mime_parser_config *mime_parser_cfg = NULL;
-
struct rspamd_mime_parser_config *
rspamd_mime_parser_init_shared(struct rspamd_config *cfg)
{
- if (mime_parser_cfg == NULL) {
- mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg));
- mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
- g_assert(mime_parser_cfg->mp_boundary != NULL);
- rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0);
- rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0);
+ if (cfg->mime_parser_cfg == NULL) {
+ cfg->mime_parser_cfg = g_malloc0(sizeof(*cfg->mime_parser_cfg));
+ cfg->mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
+ g_assert(cfg->mime_parser_cfg->mp_boundary != NULL);
+ rspamd_multipattern_add_pattern(cfg->mime_parser_cfg->mp_boundary, "\r--", 0);
+ rspamd_multipattern_add_pattern(cfg->mime_parser_cfg->mp_boundary, "\n--", 0);
GError *err = NULL;
- if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
+ if (!rspamd_multipattern_compile(cfg->mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err);
g_error_free(err);
g_abort();
}
- ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
- mime_parser_cfg->key_usages = 0;
- mime_parser_cfg->lua_magic_detect_cbref = -1;
- }
-
- mime_parser_cfg->L = (lua_State *) cfg->lua_state;
-
- if (mime_parser_cfg->L && mime_parser_cfg->lua_magic_detect_cbref == -1) {
- int old_top = lua_gettop(mime_parser_cfg->L);
- if (rspamd_lua_require_function(mime_parser_cfg->L, "lua_magic", "detect_mime_part")) {
- mime_parser_cfg->lua_magic_detect_cbref = luaL_ref(mime_parser_cfg->L, LUA_REGISTRYINDEX);
+ ottery_rand_bytes(cfg->mime_parser_cfg->hkey, sizeof(cfg->mime_parser_cfg->hkey));
+ cfg->mime_parser_cfg->key_usages = 0;
+ cfg->mime_parser_cfg->lua_magic_detect_cbref = -1;
+ cfg->mime_parser_cfg->L = (lua_State *) cfg->lua_state;
+
+ if (cfg->mime_parser_cfg->L && cfg->mime_parser_cfg->lua_magic_detect_cbref == -1) {
+ int old_top = lua_gettop(cfg->mime_parser_cfg->L);
+ if (rspamd_lua_require_function(cfg->mime_parser_cfg->L, "lua_magic", "detect_mime_part")) {
+ cfg->mime_parser_cfg->lua_magic_detect_cbref = luaL_ref(cfg->mime_parser_cfg->L, LUA_REGISTRYINDEX);
+ }
+ lua_settop(cfg->mime_parser_cfg->L, old_top);
}
- lua_settop(mime_parser_cfg->L, old_top);
}
- cfg->mime_parser_cfg = mime_parser_cfg;
- return mime_parser_cfg;
+ return cfg->mime_parser_cfg;
}
void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *unused)
struct rspamd_mime_part *part,
struct rspamd_mime_parser_runtime *st,
GError **err);
+
static enum rspamd_mime_parse_error
rspamd_mime_parse_message(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_runtime *st,
GError **err);
+
static enum rspamd_mime_parse_error
rspamd_mime_parse_normal_part(struct rspamd_task *task,
struct rspamd_mime_part *part,
#define RSPAMD_MIME_QUARK (rspamd_mime_parser_quark())
+
static GQuark
rspamd_mime_parser_quark(void)
{
static void
rspamd_mime_parser_init_lib(void)
{
+ struct rspamd_mime_parser_config *mime_parser_cfg;
+
mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg));
mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
g_assert(mime_parser_cfg->mp_boundary != NULL);
}
}
else {
-
if (((end - (const unsigned char *) part->raw_data.begin) + padeqsign) % 4 == 0) {
if (padeqsign == 0) {
/*
}
static void
-rspamd_mime_part_get_cte(struct rspamd_task *task,
+rspamd_mime_part_get_cte(struct rspamd_task *task, struct rspamd_mime_part *part,
struct rspamd_mime_headers_table *hdrs,
- struct rspamd_mime_part *part,
gboolean apply_heuristic)
{
struct rspamd_mime_header *hdr, *cur;
}
}
}
+
static void
rspamd_mime_part_get_cd(struct rspamd_task *task, struct rspamd_mime_part *part)
{
g_assert(part != NULL);
- rspamd_mime_part_get_cte(task, part->raw_headers, part,
- part->ct && !(part->ct->flags & RSPAMD_CONTENT_TYPE_MESSAGE));
+ rspamd_mime_part_get_cte(task, part, part->raw_headers, FALSE);
rspamd_mime_part_get_cd(task, part);
switch (part->cte) {
return RSPAMD_MIME_PARSE_OK;
}
+/* Run lua_magic.detect_mime_part for a decoded normal part and maybe promote to message */
+static enum rspamd_mime_parse_error
+rspamd_mime_maybe_detect_type(struct rspamd_task *task,
+ struct rspamd_mime_part *npart,
+ struct rspamd_mime_parser_runtime *st,
+ GError **err)
+{
+ lua_State *L = NULL;
+ int old_top = -1, err_idx;
+ gboolean promote_to_message = FALSE;
+
+
+ if (task->cfg) {
+ L = task->cfg->lua_state;
+ }
+
+ if (L && task->cfg->mime_parser_cfg &&
+ rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg) != -1) {
+ old_top = lua_gettop(L);
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ err_idx = lua_gettop(L);
+ lua_rawgeti(L, LUA_REGISTRYINDEX,
+ rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg));
+
+ struct rspamd_mime_part **pmime;
+ struct rspamd_task **ptask;
+
+ pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+ rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
+ *pmime = npart;
+ ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+ rspamd_lua_setclass(L, rspamd_task_classname, -1);
+ *ptask = task;
+
+ if (lua_pcall(L, 2, 2, err_idx) != 0) {
+ msg_err_task("cannot detect type (lua_magic): %s", lua_tostring(L, -1));
+ }
+ else {
+ msg_debug_mime("called lua_magic.detect_mime_part for part #%ud", npart->part_number);
+ /* Stack: [traceback][ext][table] */
+ if (lua_istable(L, -1)) {
+ /* detected_ext */
+ if (lua_isstring(L, -2)) {
+ npart->detected_ext = rspamd_mempool_strdup(task->task_pool,
+ lua_tostring(L, -2));
+ }
+
+ /* detected_ct */
+ lua_pushstring(L, "ct");
+ lua_gettable(L, -2);
+
+ if (lua_isstring(L, -1)) {
+ const char *mb = lua_tostring(L, -1);
+
+ if (mb) {
+ rspamd_ftok_t srch;
+
+ srch.begin = mb;
+ srch.len = strlen(mb);
+ npart->detected_ct = rspamd_content_type_parse(srch.begin,
+ srch.len,
+ task->task_pool);
+ }
+ }
+
+ lua_pop(L, 1);
+
+ /* detected_type and promotion */
+ lua_pushstring(L, "type");
+ lua_gettable(L, -2);
+
+ if (lua_isstring(L, -1)) {
+ const char *t = lua_tostring(L, -1);
+ if (t) {
+ npart->detected_type = rspamd_mempool_strdup(task->task_pool, t);
+ if (strcmp(t, "message") == 0) {
+ promote_to_message = TRUE;
+ }
+ }
+ }
+
+ lua_pop(L, 1);
+
+ /* no_text flag */
+ lua_pushstring(L, "no_text");
+ lua_gettable(L, -2);
+
+ if (lua_isboolean(L, -1)) {
+ if (!!lua_toboolean(L, -1)) {
+ npart->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
+ }
+ }
+
+ lua_pop(L, 1);
+
+ /* ext fallback for promotion */
+ if (!promote_to_message && lua_isstring(L, -2)) {
+ const char *ext = lua_tostring(L, -2);
+ if (ext && g_ascii_strcasecmp(ext, "eml") == 0) {
+ promote_to_message = TRUE;
+ }
+ }
+ }
+ }
+
+ lua_settop(L, old_top);
+ }
+
+ /* Fallback: if nothing detected but declared CT is text, set detected_type to text */
+ if (npart->detected_type == NULL && npart->ct &&
+ (npart->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) {
+ npart->detected_type = rspamd_mempool_strdup(task->task_pool, "text");
+ }
+
+ if (promote_to_message) {
+ msg_debug_mime("treat part as embedded message (lua_magic)");
+ st->nesting++;
+ g_ptr_array_add(st->stack, npart);
+ npart->part_type = RSPAMD_MIME_PART_MESSAGE;
+ return rspamd_mime_parse_message(task, npart, st, err);
+ }
+
+ return RSPAMD_MIME_PARSE_OK;
+}
struct rspamd_mime_multipart_cbdata {
struct rspamd_task *task;
if (hdr != NULL) {
-
DL_FOREACH(hdr, cur)
{
ct = rspamd_content_type_parse(cur->value, strlen(cur->value),
ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err);
if (ret == RSPAMD_MIME_PARSE_OK) {
- /* Ask lua_magic if this is a message (e.g. .eml) */
- lua_State *L = NULL;
- int old_top = -1, err_idx;
- gboolean promote_to_message = FALSE;
-
- if (task->cfg) {
- L = task->cfg->lua_state;
- }
-
- if (L) {
- old_top = lua_gettop(L);
- lua_pushcfunction(L, &rspamd_lua_traceback);
- err_idx = lua_gettop(L);
-
- if (task->cfg->mime_parser_cfg && task->cfg->mime_parser_cfg->lua_magic_detect_cbref != -1) {
- lua_rawgeti(L, LUA_REGISTRYINDEX, task->cfg->mime_parser_cfg->lua_magic_detect_cbref);
- struct rspamd_mime_part **pmime;
- struct rspamd_task **ptask;
-
- pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
- rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
- *pmime = npart;
- ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
- rspamd_lua_setclass(L, rspamd_task_classname, -1);
- *ptask = task;
-
- if (lua_pcall(L, 2, 2, err_idx) != 0) {
- msg_err_task("cannot detect type (lua_magic): %s", lua_tostring(L, -1));
- }
- else {
- /* Stack: [traceback][ext][table] */
- if (lua_istable(L, -1)) {
- /* Fill detected_ext */
- if (lua_isstring(L, -2)) {
- npart->detected_ext = rspamd_mempool_strdup(task->task_pool,
- lua_tostring(L, -2));
- }
-
- /* detected_ct */
- lua_pushstring(L, "ct");
- lua_gettable(L, -2);
-
- if (lua_isstring(L, -1)) {
- const char *mb = lua_tostring(L, -1);
-
- if (mb) {
- rspamd_ftok_t srch;
-
- srch.begin = mb;
- srch.len = strlen(mb);
- npart->detected_ct = rspamd_content_type_parse(srch.begin,
- srch.len,
- task->task_pool);
- }
- }
-
- lua_pop(L, 1);
-
- /* detected_type and promotion */
- lua_pushstring(L, "type");
- lua_gettable(L, -2);
-
- if (lua_isstring(L, -1)) {
- const char *t = lua_tostring(L, -1);
- if (t) {
- npart->detected_type = rspamd_mempool_strdup(task->task_pool, t);
- if (strcmp(t, "message") == 0) {
- promote_to_message = TRUE;
- }
- }
- }
-
- lua_pop(L, 1);
-
- /* no_text flag */
- lua_pushstring(L, "no_text");
- lua_gettable(L, -2);
-
- if (lua_isboolean(L, -1)) {
- if (!!lua_toboolean(L, -1)) {
- npart->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
- }
- }
-
- lua_pop(L, 1);
-
- /* ext fallback for promotion */
- if (!promote_to_message && lua_isstring(L, -2)) {
- const char *ext = lua_tostring(L, -2);
- if (ext && g_ascii_strcasecmp(ext, "eml") == 0) {
- promote_to_message = TRUE;
- }
- }
- }
- }
-
- /* Clean stack */
- lua_settop(L, old_top);
- }
- else {
- /* Pop traceback */
- lua_settop(L, old_top);
- }
- }
-
- if (promote_to_message) {
- msg_debug_mime("treat part as embedded message (lua_magic)");
- st->nesting++;
- g_ptr_array_add(st->stack, npart);
- npart->part_type = RSPAMD_MIME_PART_MESSAGE;
- ret = rspamd_mime_parse_message(task, npart, st, err);
- }
+ /* Always try to detect type after normal parse */
+ ret = rspamd_mime_maybe_detect_type(task, npart, st, err);
}
}
* but it might be unsuitable (e.g. in broken headers)
*/
if (cb->part_start < pos && cb->cur_boundary) {
-
if ((ret = rspamd_mime_process_multipart_node(task, cb->st,
cb->multipart, cb->part_start, pos, TRUE, cb->err)) != RSPAMD_MIME_PARSE_OK) {
return ret;
part->urls = g_ptr_array_new();
g_ptr_array_add(MESSAGE_FIELD(task, parts), part);
st->nesting++;
- rspamd_mime_part_get_cte(task, part->raw_headers, part, FALSE);
+ rspamd_mime_part_get_cte(task, part, part->raw_headers, FALSE);
st->pos = part->raw_data.begin;
cbdata.multipart = part;
cbdata.cur_boundary = &part->ct->boundary;
rspamd_cryptobox_siphash((unsigned char *) &cbdata.bhash,
cbdata.cur_boundary->begin, cbdata.cur_boundary->len,
- mime_parser_cfg->hkey);
+ task->cfg->mime_parser_cfg->hkey);
msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash);
}
else {
task = st->task;
if (G_LIKELY(p < end)) {
-
blen = 0;
while (p < end) {
}
rspamd_cryptobox_siphash((unsigned char *) &b.hash, lc_copy, blen,
- mime_parser_cfg->hkey);
+ task->cfg->mime_parser_cfg->hkey);
msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset",
(int) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start);
b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED;
rspamd_cryptobox_siphash((unsigned char *) &b.closed_hash, lc_copy,
blen + 2,
- mime_parser_cfg->hkey);
+ task->cfg->mime_parser_cfg->hkey);
msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset",
(int) blen + 2, lc_copy,
b.closed_hash,
struct rspamd_mime_part *top,
struct rspamd_mime_parser_runtime *st)
{
-
if (top->raw_data.begin >= st->pos) {
- rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary,
+ rspamd_multipattern_lookup(task->cfg->mime_parser_cfg->mp_boundary,
top->raw_data.begin - 1,
top->raw_data.len + 1,
rspamd_mime_preprocess_cb, st, NULL);
}
else {
- rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary,
+ rspamd_multipattern_lookup(task->cfg->mime_parser_cfg->mp_boundary,
st->pos,
st->end - st->pos,
rspamd_mime_preprocess_cb, st, NULL);
hdr_pos = rspamd_string_find_eoh(&str, &body_pos);
if (hdr_pos > 0 && hdr_pos < str.len) {
-
MESSAGE_FIELD(task, raw_headers_content).begin = str.str;
MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos;
MESSAGE_FIELD(task, raw_headers_content).body_start = str.str + body_pos;
}
else {
ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err);
+ if (ret == RSPAMD_MIME_PARSE_OK) {
+ /* Always try to detect type after normal parse */
+ ret = rspamd_mime_maybe_detect_type(task, npart, nst, err);
+ }
}
if (ret != RSPAMD_MIME_PARSE_OK) {
if (end > start &&
(ret = rspamd_mime_process_multipart_node(task, nst,
NULL, start, end, FALSE, err)) != RSPAMD_MIME_PARSE_OK) {
-
if (nst != st) {
rspamd_mime_parse_stack_free(nst);
}
struct rspamd_mime_parser_runtime *st;
enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
- if (mime_parser_cfg == NULL) {
- rspamd_mime_parser_init_shared(task->cfg);
- }
+ rspamd_mime_parser_init_shared(task->cfg);
- if (++mime_parser_cfg->key_usages > max_key_usages) {
+ if (++task->cfg->mime_parser_cfg->key_usages > max_key_usages) {
/* Regenerate siphash key */
- ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
- mime_parser_cfg->key_usages = 0;
+ ottery_rand_bytes(task->cfg->mime_parser_cfg->hkey, sizeof(task->cfg->mime_parser_cfg->hkey));
+ task->cfg->mime_parser_cfg->key_usages = 0;
}
st = g_malloc0(sizeof(*st));
Test.
]]
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
task:destroy()
]]
test("Process mime nesting: simple", function()
local msg = hdrs .. body
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
- assert_rspamd_table_eq_sorted({actual = fun.totable(fun.map(function(u)
- return u:get_host()
- end, task:get_urls())), expect = {
- 'evil.com', 'example.com'
- }})
+ assert_rspamd_table_eq_sorted({
+ actual = fun.totable(fun.map(function(u)
+ return u:get_host()
+ end, task:get_urls())),
+ expect = {
+ 'evil.com', 'example.com'
+ }
+ })
task:destroy()
end)
test("Process mime nesting: multipart", function()
- local msg = table.concat{
+ local msg = table.concat {
hdrs, mpart, '\n', '--XXX\n', body, '\n--XXX--\n'
}
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
assert_rspamd_table_eq_sorted({
expect = {
'evil.com', 'example.com'
- }})
+ }
+ })
task:destroy()
end)
test("Process mime nesting: multipart, broken", function()
- local msg = table.concat{
+ local msg = table.concat {
hdrs, mpart, '\n', '--XXX\n', 'garbadge\n', '\n--XXX--\n', '--XXX\n', body
}
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
assert_rspamd_table_eq_sorted({
expect = {
'evil.com', 'example.com'
- }})
+ }
+ })
task:destroy()
end)
test("Process mime nesting: message", function()
- local msg = table.concat{
+ local msg = table.concat {
hdrs, 'Content-Type: message/rfc822\n', '\n', hdrs, body
}
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
assert_rspamd_table_eq_sorted({
expect = {
'evil.com', 'example.com'
- }})
+ }
+ })
task:destroy()
end)
test("Process mime nesting: message in multipart", function()
- local msg = table.concat{
+ local msg = table.concat {
hdrs, mpart, '\n',
'--XXX\n',
- 'Content-Type: message/rfc822\n', '\n', hdrs, body ,
+ 'Content-Type: message/rfc822\n', '\n', hdrs, body,
'\n--XXX--\n',
}
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
assert_rspamd_table_eq_sorted({
expect = {
'evil.com', 'example.com'
- }})
+ }
+ })
task:destroy()
end)
test("Process mime nesting: multipart message in multipart", function()
- local msg = table.concat{
+ local msg = table.concat {
hdrs, mpart, '\n',
'--XXX\n',
- 'Content-Type: message/rfc822\n', '\n', hdrs, mpart, '\n',
+ 'Content-Type: message/rfc822\n', '\n', hdrs, mpart, '\n',
'--XXX\n',
- body ,
+ body,
'\n--XXX--\n',
'\n--XXX--\n',
}
- local res,task = rspamd_task.load_from_string(msg)
+ local res, task = rspamd_task.load_from_string(msg, rspamd_config)
assert_true(res, "failed to load message")
task:process_message()
assert_rspamd_table_eq_sorted({
expect = {
'evil.com', 'example.com'
- }})
+ }
+ })
task:destroy()
end)
-end)
\ No newline at end of file
+end)