From: Cursor Agent Date: Sat, 4 Oct 2025 12:31:41 +0000 (+0000) Subject: feat: Add milter ESMTP argument parsing and Lua access X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6ec202f50f573b4b0303fa12d33b6afd3f3090ca;p=thirdparty%2Frspamd.git feat: Add milter ESMTP argument parsing and Lua access This commit introduces parsing for ESMTP arguments from MAIL and RCPT commands in the milter protocol. It also adds Lua functions to access these arguments, enabling more sophisticated mail processing based on ESMTP options. Co-authored-by: v --- diff --git a/src/libserver/mempool_vars_internal.h b/src/libserver/mempool_vars_internal.h index 6c9553868a..9483a65596 100644 --- a/src/libserver/mempool_vars_internal.h +++ b/src/libserver/mempool_vars_internal.h @@ -28,6 +28,7 @@ #define RSPAMD_MEMPOOL_PRINCIPAL_RECIPIENT "principal_recipient" #define RSPAMD_MEMPOOL_PROFILE "profile" #define RSPAMD_MEMPOOL_MILTER_REPLY "milter_reply" +#define RSPAMD_MEMPOOL_MILTER_SESSION "milter_session" #define RSPAMD_MEMPOOL_DKIM_SIGNATURE "dkim-signature" #define RSPAMD_MEMPOOL_DMARC_CHECKS "dmarc_checks" #define RSPAMD_MEMPOOL_DKIM_BH_CACHE "dkim_bh_cache" diff --git a/src/libserver/milter.c b/src/libserver/milter.c index 09ddddabaa..650a3370b6 100644 --- a/src/libserver/milter.c +++ b/src/libserver/milter.c @@ -139,6 +139,27 @@ rspamd_milter_session_reset(struct rspamd_milter_session *session, session->from = NULL; } + if (session->mail_esmtp_args) { + msg_debug_milter("cleanup mail esmtp args"); + g_hash_table_unref(session->mail_esmtp_args); + session->mail_esmtp_args = NULL; + } + + if (session->rcpt_esmtp_args) { + GHashTable *args; + + msg_debug_milter("cleanup rcpt esmtp args"); + PTR_ARRAY_FOREACH(session->rcpt_esmtp_args, i, args) + { + if (args) { + g_hash_table_unref(args); + } + } + + g_ptr_array_free(session->rcpt_esmtp_args, TRUE); + session->rcpt_esmtp_args = NULL; + } + if (priv->headers) { msg_debug_milter("cleanup headers"); char *k; @@ -323,6 +344,65 @@ rspamd_milter_plan_io(struct rspamd_milter_session *session, (var) = ntohs(var); \ } while (0) +/** + * Parse ESMTP arguments from MAIL/RCPT commands + * Arguments are null-terminated strings after the email address + * Format: KEY=VALUE or KEY (without value) + */ +static GHashTable * +rspamd_milter_parse_esmtp_args(const unsigned char *pos, + const unsigned char *end, + rspamd_mempool_t *pool) +{ + GHashTable *args = NULL; + const unsigned char *arg_start, *arg_end, *eq; + rspamd_fstring_t *key, *value; + rspamd_ftok_t *key_tok, *value_tok; + + while (pos < end) { + /* Each argument is null-terminated */ + arg_end = memchr(pos, '\0', end - pos); + + if (!arg_end || arg_end == pos) { + /* No more arguments or empty argument */ + break; + } + + arg_start = pos; + + /* Look for KEY=VALUE separator */ + eq = memchr(arg_start, '=', arg_end - arg_start); + + if (!args) { + /* Lazy initialization */ + args = g_hash_table_new_full(rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, + rspamd_fstring_mapped_ftok_free, + rspamd_fstring_mapped_ftok_free); + } + + if (eq && eq > arg_start) { + /* KEY=VALUE format */ + key = rspamd_fstring_new_init(arg_start, eq - arg_start); + value = rspamd_fstring_new_init(eq + 1, arg_end - eq - 1); + } + else { + /* KEY only format (no value) */ + key = rspamd_fstring_new_init(arg_start, arg_end - arg_start); + value = rspamd_fstring_new_init("", 0); + } + + key_tok = rspamd_ftok_map(key); + value_tok = rspamd_ftok_map(value); + + g_hash_table_replace(args, key_tok, value_tok); + + pos = arg_end + 1; + } + + return args; +} + static gboolean rspamd_milter_process_command(struct rspamd_milter_session *session, struct rspamd_milter_private *priv) @@ -658,7 +738,23 @@ rspamd_milter_process_command(struct rspamd_milter_session *session, session->from = addr; } - /* TODO: parse esmtp arguments */ + /* Parse ESMTP arguments */ + pos = zero + 1; + if (pos < end) { + session->mail_esmtp_args = rspamd_milter_parse_esmtp_args(pos, end, priv->pool); + + if (session->mail_esmtp_args) { + GHashTableIter iter; + gpointer key, value; + + g_hash_table_iter_init(&iter, session->mail_esmtp_args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + msg_debug_milter("mail esmtp arg: %T=%T", k, v); + } + } + } break; } else { @@ -749,6 +845,7 @@ rspamd_milter_process_command(struct rspamd_milter_session *session, while (pos < end) { struct rspamd_email_address *addr; char *cpy; + GHashTable *esmtp_args = NULL; zero = memchr(pos, '\0', end - pos); @@ -765,6 +862,43 @@ rspamd_milter_process_command(struct rspamd_milter_session *session, } g_ptr_array_add(session->rcpts, addr); + + /* Parse ESMTP arguments for this recipient */ + if (zero + 1 < end) { + esmtp_args = rspamd_milter_parse_esmtp_args(zero + 1, end, priv->pool); + + if (esmtp_args) { + GHashTableIter iter; + gpointer key, value; + + if (!session->rcpt_esmtp_args) { + session->rcpt_esmtp_args = g_ptr_array_sized_new(1); + } + + g_ptr_array_add(session->rcpt_esmtp_args, esmtp_args); + + g_hash_table_iter_init(&iter, esmtp_args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + msg_debug_milter("rcpt esmtp arg: %T=%T", k, v); + } + } + else { + /* Add NULL placeholder to keep indices aligned with rcpts array */ + if (!session->rcpt_esmtp_args) { + session->rcpt_esmtp_args = g_ptr_array_sized_new(1); + } + g_ptr_array_add(session->rcpt_esmtp_args, NULL); + } + } + else { + /* No ESMTP args, add NULL placeholder */ + if (!session->rcpt_esmtp_args) { + session->rcpt_esmtp_args = g_ptr_array_sized_new(1); + } + g_ptr_array_add(session->rcpt_esmtp_args, NULL); + } } pos = zero + 1; @@ -784,6 +918,12 @@ rspamd_milter_process_command(struct rspamd_milter_session *session, } g_ptr_array_add(session->rcpts, addr); + + /* No ESMTP args in this case, add NULL placeholder */ + if (!session->rcpt_esmtp_args) { + session->rcpt_esmtp_args = g_ptr_array_sized_new(1); + } + g_ptr_array_add(session->rcpt_esmtp_args, NULL); } break; @@ -1654,6 +1794,65 @@ rspamd_milter_to_http(struct rspamd_milter_session *session) rspamd_milter_macro_http(session, msg); rspamd_http_message_add_header(msg, FLAGS_HEADER, "milter,body_block"); + /* Add ESMTP arguments as HTTP headers */ + if (session->mail_esmtp_args) { + GHashTableIter iter; + gpointer key, value; + GString *hdr_val; + + g_hash_table_iter_init(&iter, session->mail_esmtp_args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + + /* Use X-Rspamd-Mail-Esmtp-Arg- prefix for mail ESMTP args */ + hdr_val = g_string_sized_new(k->len + v->len + 1); + g_string_append_len(hdr_val, k->begin, k->len); + + if (v->len > 0) { + g_string_append_c(hdr_val, '='); + g_string_append_len(hdr_val, v->begin, v->len); + } + + rspamd_http_message_add_header(msg, "X-Rspamd-Mail-Esmtp-Args", hdr_val->str); + g_string_free(hdr_val, TRUE); + } + } + + if (session->rcpt_esmtp_args) { + GHashTable *rcpt_args; + GString *hdr_val; + unsigned int idx; + + PTR_ARRAY_FOREACH(session->rcpt_esmtp_args, idx, rcpt_args) + { + if (rcpt_args) { + GHashTableIter iter; + gpointer key, value; + + g_hash_table_iter_init(&iter, rcpt_args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + + /* Use X-Rspamd-Rcpt-Esmtp-Arg- prefix for rcpt ESMTP args */ + /* Format: rcpt_index:key=value */ + hdr_val = g_string_sized_new(k->len + v->len + 16); + g_string_append_printf(hdr_val, "%u:", idx); + g_string_append_len(hdr_val, k->begin, k->len); + + if (v->len > 0) { + g_string_append_c(hdr_val, '='); + g_string_append_len(hdr_val, v->begin, v->len); + } + + rspamd_http_message_add_header(msg, "X-Rspamd-Rcpt-Esmtp-Args", hdr_val->str); + g_string_free(hdr_val, TRUE); + } + } + } + } + return msg; } diff --git a/src/libserver/milter.h b/src/libserver/milter.h index 5b4b750e23..b70092f452 100644 --- a/src/libserver/milter.h +++ b/src/libserver/milter.h @@ -71,6 +71,8 @@ struct rspamd_milter_session { rspamd_fstring_t *helo; rspamd_fstring_t *hostname; rspamd_fstring_t *message; + GHashTable *mail_esmtp_args; /* ESMTP arguments from MAIL FROM command */ + GPtrArray *rcpt_esmtp_args; /* Array of GHashTable, one per recipient with ESMTP arguments */ void *priv; ref_entry_t ref; }; diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 4912339957..273f6bc618 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -626,6 +626,48 @@ LUA_FUNCTION_DEF(task, get_from); * @return {boolean} success or not */ LUA_FUNCTION_DEF(task, set_from); + +/*** + * @method task:get_mail_esmtp_args() + * Returns a table of ESMTP arguments from MAIL FROM command (milter only). + * Each argument is a key-value pair where the key is the argument name and + * the value is the argument value (or empty string if no value). + * @return {table} ESMTP arguments or nil if not available + * @example + * local esmtp_args = task:get_mail_esmtp_args() + * if esmtp_args then + * -- Check for DSN arguments + * local ret = esmtp_args['RET'] + * local envid = esmtp_args['ENVID'] + * end + */ +LUA_FUNCTION_DEF(task, get_mail_esmtp_args); + +/*** + * @method task:get_rcpt_esmtp_args([idx]) + * Returns a table of ESMTP arguments from RCPT TO command (milter only). + * If idx is specified, returns arguments for the recipient at that index (0-based). + * If idx is not specified, returns an array of tables, one per recipient. + * Each argument is a key-value pair where the key is the argument name and + * the value is the argument value (or empty string if no value). + * @param {integer} idx optional index of the recipient (0-based) + * @return {table|array of tables} ESMTP arguments or nil if not available + * @example + * -- Get args for all recipients + * local rcpt_args = task:get_rcpt_esmtp_args() + * if rcpt_args then + * for i, args in ipairs(rcpt_args) do + * if args and args['NOTIFY'] then + * -- Process NOTIFY argument + * end + * end + * end + * + * -- Get args for first recipient only + * local first_rcpt_args = task:get_rcpt_esmtp_args(0) + */ +LUA_FUNCTION_DEF(task, get_rcpt_esmtp_args); + /*** * @method task:get_user() * Returns authenticated user name for this task if specified by an MTA. @@ -1295,6 +1337,8 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF(task, has_from), LUA_INTERFACE_DEF(task, get_from), LUA_INTERFACE_DEF(task, set_from), + LUA_INTERFACE_DEF(task, get_mail_esmtp_args), + LUA_INTERFACE_DEF(task, get_rcpt_esmtp_args), LUA_INTERFACE_DEF(task, get_user), LUA_INTERFACE_DEF(task, set_user), {"get_addr", lua_task_get_from_ip}, @@ -4291,6 +4335,273 @@ lua_task_get_reply_sender(lua_State *L) return 1; } +static int +lua_task_get_mail_esmtp_args(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_task *task = lua_check_task(L, 1); + struct rspamd_mime_header *hdr; + + if (task) { + /* Check if this is a milter task */ + if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER)) { + lua_pushnil(L); + return 1; + } + + /* Get ESMTP args from HTTP headers */ + hdr = rspamd_message_get_header_array(task, "X-Rspamd-Mail-Esmtp-Args", FALSE); + + if (hdr) { + lua_createtable(L, 0, 0); + + while (hdr) { + const char *p, *eq; + gsize len; + + if (hdr->decoded) { + p = hdr->decoded; + len = strlen(p); + } + else { + p = hdr->value; + len = hdr->value_len; + } + + /* Parse KEY=VALUE format */ + eq = memchr(p, '=', len); + + if (eq) { + /* KEY=VALUE */ + lua_pushlstring(L, p, eq - p); + lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + } + else { + /* KEY only */ + lua_pushlstring(L, p, len); + lua_pushstring(L, ""); + } + + lua_settable(L, -3); + hdr = hdr->next; + } + } + else { + lua_pushnil(L); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_task_get_rcpt_esmtp_args(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_task *task = lua_check_task(L, 1); + struct rspamd_mime_header *hdr; + int idx = -1; + gboolean all_rcpts = TRUE; + GHashTable *rcpt_args_by_idx = NULL; + + if (task) { + /* Check if this is a milter task */ + if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER)) { + lua_pushnil(L); + return 1; + } + + /* Check if idx was specified */ + if (lua_gettop(L) >= 2 && lua_type(L, 2) == LUA_TNUMBER) { + idx = lua_tointeger(L, 2); + all_rcpts = FALSE; + } + + /* Get ESMTP args from HTTP headers */ + hdr = rspamd_message_get_header_array(task, "X-Rspamd-Rcpt-Esmtp-Args", FALSE); + + if (hdr) { + if (all_rcpts) { + /* Build hash table mapping recipient index to args table */ + rcpt_args_by_idx = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, NULL); + + /* First pass: collect all args by recipient index */ + while (hdr) { + const char *p, *colon, *eq; + gsize len; + int rcpt_idx; + lua_State *tmp_L; + + if (hdr->decoded) { + p = hdr->decoded; + len = strlen(p); + } + else { + p = hdr->value; + len = hdr->value_len; + } + + /* Parse IDX:KEY=VALUE format */ + colon = memchr(p, ':', len); + + if (colon) { + char *endptr; + rcpt_idx = strtol(p, &endptr, 10); + + if (endptr == colon) { + /* Valid index found */ + p = colon + 1; + len -= (colon - p) + 1; + + /* Store this arg for this recipient */ + if (!g_hash_table_contains(rcpt_args_by_idx, GINT_TO_POINTER(rcpt_idx))) { + /* Create new table for this recipient */ + lua_newtable(L); + g_hash_table_insert(rcpt_args_by_idx, + GINT_TO_POINTER(rcpt_idx), + GINT_TO_POINTER(lua_gettop(L))); + } + + /* Get the table for this recipient */ + int table_idx = GPOINTER_TO_INT(g_hash_table_lookup(rcpt_args_by_idx, + GINT_TO_POINTER(rcpt_idx))); + lua_pushvalue(L, table_idx); + + /* Parse KEY=VALUE */ + eq = memchr(p, '=', len); + + if (eq) { + lua_pushlstring(L, p, eq - p); + lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + } + else { + lua_pushlstring(L, p, len); + lua_pushstring(L, ""); + } + + lua_settable(L, -3); + lua_pop(L, 1); /* Pop the table */ + } + } + + hdr = hdr->next; + } + + /* Now create the result array */ + if (g_hash_table_size(rcpt_args_by_idx) > 0) { + GHashTableIter iter; + gpointer key, value; + int max_idx = 0; + + /* Find max index */ + g_hash_table_iter_init(&iter, rcpt_args_by_idx); + while (g_hash_table_iter_next(&iter, &key, &value)) { + int i = GPOINTER_TO_INT(key); + if (i > max_idx) { + max_idx = i; + } + } + + /* Create result array */ + lua_createtable(L, max_idx + 1, 0); + + /* Fill array with tables or nils */ + for (int i = 0; i <= max_idx; i++) { + if (g_hash_table_contains(rcpt_args_by_idx, GINT_TO_POINTER(i))) { + int table_idx = GPOINTER_TO_INT(g_hash_table_lookup(rcpt_args_by_idx, + GINT_TO_POINTER(i))); + lua_pushvalue(L, table_idx); + } + else { + lua_pushnil(L); + } + lua_rawseti(L, -2, i + 1); /* Lua arrays are 1-based */ + } + + /* Clean up temporary tables */ + g_hash_table_iter_init(&iter, rcpt_args_by_idx); + while (g_hash_table_iter_next(&iter, &key, &value)) { + int table_idx = GPOINTER_TO_INT(value); + lua_remove(L, table_idx); + } + } + else { + lua_pushnil(L); + } + + g_hash_table_destroy(rcpt_args_by_idx); + } + else { + /* Return args for specific recipient */ + lua_newtable(L); + gboolean found = FALSE; + + while (hdr) { + const char *p, *colon, *eq; + gsize len; + int rcpt_idx; + + if (hdr->decoded) { + p = hdr->decoded; + len = strlen(p); + } + else { + p = hdr->value; + len = hdr->value_len; + } + + /* Parse IDX:KEY=VALUE format */ + colon = memchr(p, ':', len); + + if (colon) { + char *endptr; + rcpt_idx = strtol(p, &endptr, 10); + + if (endptr == colon && rcpt_idx == idx) { + found = TRUE; + p = colon + 1; + len -= (colon - p) + 1; + + /* Parse KEY=VALUE */ + eq = memchr(p, '=', len); + + if (eq) { + lua_pushlstring(L, p, eq - p); + lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + } + else { + lua_pushlstring(L, p, len); + lua_pushstring(L, ""); + } + + lua_settable(L, -3); + } + } + + hdr = hdr->next; + } + + if (!found) { + lua_pop(L, 1); + lua_pushnil(L); + } + } + } + else { + lua_pushnil(L); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + static int lua_task_get_user(lua_State *L) {