From: Vsevolod Stakhov Date: Sun, 5 Oct 2025 20:31:22 +0000 (+0100) Subject: [Feature] Persist milter ESMTP args in task and expose via Lua API X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a0de89852ddd6a91f644221a3cbe9bd57631f61d;p=thirdparty%2Frspamd.git [Feature] Persist milter ESMTP args in task and expose via Lua API - Store MAIL/RCPT ESMTP arguments in task (mempool-backed) - Transfer args from milter session and over HTTP headers - Parse X-Rspamd-{Mail,Rcpt}-Esmtp-Args in protocol and fill task - Update Lua API to read from task with HTTP fallback - Keep milter flag semantics intact and robust across proxy hops --- diff --git a/src/libserver/milter.c b/src/libserver/milter.c index 650a3370b6..9e89f20545 100644 --- a/src/libserver/milter.c +++ b/src/libserver/milter.c @@ -1856,6 +1856,28 @@ rspamd_milter_to_http(struct rspamd_milter_session *session) return msg; } +struct rspamd_http_message * +rspamd_milter_to_http_with_task(struct rspamd_milter_session *session, + struct rspamd_task *task) +{ + struct rspamd_http_message *msg; + + /* First create the HTTP message using the existing function */ + msg = rspamd_milter_to_http(session); + + /* Then store ESMTP arguments in the task */ + if (task && session) { + if (session->mail_esmtp_args) { + rspamd_task_set_mail_esmtp_args(task, session->mail_esmtp_args); + } + if (session->rcpt_esmtp_args) { + rspamd_task_set_rcpt_esmtp_args(task, session->rcpt_esmtp_args); + } + } + + return msg; +} + void * rspamd_milter_update_userdata(struct rspamd_milter_session *session, void *ud) diff --git a/src/libserver/milter.h b/src/libserver/milter.h index b70092f452..eb3f40e9fb 100644 --- a/src/libserver/milter.h +++ b/src/libserver/milter.h @@ -50,6 +50,8 @@ struct rspamd_email_address; struct ev_loop; struct rspamd_http_message; struct rspamd_config; +/* Forward declaration to avoid heavy includes */ +struct rspamd_task; struct rspamd_milter_context { const char *spam_header; @@ -161,6 +163,16 @@ struct rspamd_milter_session *rspamd_milter_session_ref( struct rspamd_http_message *rspamd_milter_to_http( struct rspamd_milter_session *session); +/** + * Convert milter session to HTTP message and store ESMTP args in task + * @param session milter session + * @param task task to store ESMTP arguments in + * @return HTTP message + */ +struct rspamd_http_message *rspamd_milter_to_http_with_task( + struct rspamd_milter_session *session, + struct rspamd_task *task); + /** * Sends task results to the * @param session diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index b085c69d75..368e6145f4 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -490,271 +490,349 @@ rspamd_protocol_handle_headers(struct rspamd_task *task, hv_tok->len = h->value.len; switch (*hn_tok->begin) { - case 'd': - case 'D': - IF_HEADER(DELIVER_TO_HEADER) - { - task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok); - msg_debug_protocol("read deliver-to header, value: %s", - task->deliver_to); - } - else - { - msg_debug_protocol("wrong header: %T", hn_tok); - } - break; - case 'h': - case 'H': - IF_HEADER(HELO_HEADER) - { - task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok); - msg_debug_protocol("read helo header, value: %s", task->helo); - } - IF_HEADER(HOSTNAME_HEADER) - { - task->hostname = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - msg_debug_protocol("read hostname header, value: %s", task->hostname); + case 'd': + case 'D': + IF_HEADER(DELIVER_TO_HEADER) + { + task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok); + msg_debug_protocol("read deliver-to header, value: %s", + task->deliver_to); + } + else + { + msg_debug_protocol("wrong header: %T", hn_tok); + } + break; + case 'h': + case 'H': + IF_HEADER(HELO_HEADER) + { + task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok); + msg_debug_protocol("read helo header, value: %s", task->helo); + } + IF_HEADER(HOSTNAME_HEADER) + { + task->hostname = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + msg_debug_protocol("read hostname header, value: %s", task->hostname); + } + break; + case 'f': + case 'F': + IF_HEADER(FROM_HEADER) + { + if (hv_tok->len == 0) { + /* Replace '' with '<>' to fix parsing issue */ + RSPAMD_FTOK_ASSIGN(hv_tok, "<>"); } - break; - case 'f': - case 'F': - IF_HEADER(FROM_HEADER) - { - if (hv_tok->len == 0) { - /* Replace '' with '<>' to fix parsing issue */ - RSPAMD_FTOK_ASSIGN(hv_tok, "<>"); - } - task->from_envelope = rspamd_email_address_from_smtp( - hv_tok->begin, - hv_tok->len); - msg_debug_protocol("read from header, value: %T", hv_tok); + task->from_envelope = rspamd_email_address_from_smtp( + hv_tok->begin, + hv_tok->len); + msg_debug_protocol("read from header, value: %T", hv_tok); - if (!task->from_envelope) { - msg_err_protocol("bad from header: '%T'", hv_tok); - task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; - } - } - IF_HEADER(FILENAME_HEADER) - { - task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - msg_debug_protocol("read filename header, value: %s", task->msg.fpath); - } - IF_HEADER(FLAGS_HEADER) - { - msg_debug_protocol("read flags header, value: %T", hv_tok); - rspamd_protocol_process_flags(task, hv_tok); + if (!task->from_envelope) { + msg_err_protocol("bad from header: '%T'", hv_tok); + task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } - break; - case 'q': - case 'Q': - IF_HEADER(QUEUE_ID_HEADER) - { - task->queue_id = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - msg_debug_protocol("read queue_id header, value: %s", task->queue_id); + } + IF_HEADER(FILENAME_HEADER) + { + task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + msg_debug_protocol("read filename header, value: %s", task->msg.fpath); + } + IF_HEADER(FLAGS_HEADER) + { + msg_debug_protocol("read flags header, value: %T", hv_tok); + rspamd_protocol_process_flags(task, hv_tok); + } + break; + case 'q': + case 'Q': + IF_HEADER(QUEUE_ID_HEADER) + { + task->queue_id = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + msg_debug_protocol("read queue_id header, value: %s", task->queue_id); + } + else + { + msg_debug_protocol("wrong header: %T", hn_tok); + } + break; + case 'r': + case 'R': + IF_HEADER(RCPT_HEADER) + { + rspamd_protocol_process_recipients(task, hv_tok); + msg_debug_protocol("read rcpt header, value: %T", hv_tok); + } + IF_HEADER(RAW_DATA_HEADER) + { + srch.begin = "yes"; + srch.len = 3; + + msg_debug_protocol("read raw data header, value: %T", hv_tok); + + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->flags &= ~RSPAMD_TASK_FLAG_MIME; + msg_debug_protocol("disable mime parsing"); } - else - { - msg_debug_protocol("wrong header: %T", hn_tok); + } + break; + case 'i': + case 'I': + IF_HEADER(IP_ADDR_HEADER) + { + if (!rspamd_parse_inet_address(&task->from_addr, + hv_tok->begin, hv_tok->len, + RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + msg_err_protocol("bad ip header: '%T'", hv_tok); } - break; - case 'r': - case 'R': - IF_HEADER(RCPT_HEADER) - { - rspamd_protocol_process_recipients(task, hv_tok); - msg_debug_protocol("read rcpt header, value: %T", hv_tok); + else { + msg_debug_protocol("read IP header, value: %T", hv_tok); + has_ip = TRUE; } - IF_HEADER(RAW_DATA_HEADER) - { - srch.begin = "yes"; - srch.len = 3; + } + else + { + msg_debug_protocol("wrong header: %T", hn_tok); + } + break; + case 'p': + case 'P': + IF_HEADER(PASS_HEADER) + { + srch.begin = "all"; + srch.len = 3; - msg_debug_protocol("read raw data header, value: %T", hv_tok); + msg_debug_protocol("read pass header, value: %T", hv_tok); - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->flags &= ~RSPAMD_TASK_FLAG_MIME; - msg_debug_protocol("disable mime parsing"); - } + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->flags |= RSPAMD_TASK_FLAG_PASS_ALL; + msg_debug_protocol("pass all filters"); } - break; - case 'i': - case 'I': - IF_HEADER(IP_ADDR_HEADER) - { - if (!rspamd_parse_inet_address(&task->from_addr, - hv_tok->begin, hv_tok->len, - RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { - msg_err_protocol("bad ip header: '%T'", hv_tok); - } - else { - msg_debug_protocol("read IP header, value: %T", hv_tok); - has_ip = TRUE; - } - } - else - { - msg_debug_protocol("wrong header: %T", hn_tok); - } - break; - case 'p': - case 'P': - IF_HEADER(PASS_HEADER) - { - srch.begin = "all"; - srch.len = 3; - - msg_debug_protocol("read pass header, value: %T", hv_tok); - - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->flags |= RSPAMD_TASK_FLAG_PASS_ALL; - msg_debug_protocol("pass all filters"); + } + IF_HEADER(PROFILE_HEADER) + { + msg_debug_protocol("read profile header, value: %T", hv_tok); + task->flags |= RSPAMD_TASK_FLAG_PROFILE; + } + break; + case 's': + case 'S': + IF_HEADER(SETTINGS_ID_HEADER) + { + msg_debug_protocol("read settings-id header, value: %T", hv_tok); + task->settings_elt = rspamd_config_find_settings_name_ref( + task->cfg, hv_tok->begin, hv_tok->len); + + if (task->settings_elt == NULL) { + GString *known_ids = g_string_new(NULL); + struct rspamd_config_settings_elt *cur; + + DL_FOREACH(task->cfg->setting_ids, cur) + { + rspamd_printf_gstring(known_ids, "%s(%ud);", + cur->name, cur->id); } - } - IF_HEADER(PROFILE_HEADER) - { - msg_debug_protocol("read profile header, value: %T", hv_tok); - task->flags |= RSPAMD_TASK_FLAG_PROFILE; - } - break; - case 's': - case 'S': - IF_HEADER(SETTINGS_ID_HEADER) - { - msg_debug_protocol("read settings-id header, value: %T", hv_tok); - task->settings_elt = rspamd_config_find_settings_name_ref( - task->cfg, hv_tok->begin, hv_tok->len); - - if (task->settings_elt == NULL) { - GString *known_ids = g_string_new(NULL); - struct rspamd_config_settings_elt *cur; - - DL_FOREACH(task->cfg->setting_ids, cur) - { - rspamd_printf_gstring(known_ids, "%s(%ud);", - cur->name, cur->id); - } - msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v", - hv_tok, - rspamd_config_name_to_id(hv_tok->begin, hv_tok->len), - known_ids); + msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v", + hv_tok, + rspamd_config_name_to_id(hv_tok->begin, hv_tok->len), + known_ids); - g_string_free(known_ids, TRUE); - } - else { - msg_debug_protocol("applied settings id %T -> %ud", hv_tok, - task->settings_elt->id); - } + g_string_free(known_ids, TRUE); } - IF_HEADER(SETTINGS_HEADER) - { - msg_debug_protocol("read settings header, value: %T", hv_tok); - seen_settings_header = TRUE; + else { + msg_debug_protocol("applied settings id %T -> %ud", hv_tok, + task->settings_elt->id); } - break; - case 'u': - case 'U': - IF_HEADER(USER_HEADER) - { - /* + } + IF_HEADER(SETTINGS_HEADER) + { + msg_debug_protocol("read settings header, value: %T", hv_tok); + seen_settings_header = TRUE; + } + break; + case 'u': + case 'U': + IF_HEADER(USER_HEADER) + { + /* * We must ignore User header in case of spamc, as SA has * different meaning of this header */ - msg_debug_protocol("read user header, value: %T", hv_tok); - if (!RSPAMD_TASK_IS_SPAMC(task)) { - task->auth_user = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - } - else { - msg_info_protocol("ignore user header: legacy SA protocol"); - } + msg_debug_protocol("read user header, value: %T", hv_tok); + if (!RSPAMD_TASK_IS_SPAMC(task)) { + task->auth_user = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); } - IF_HEADER(URLS_HEADER) - { - msg_debug_protocol("read urls header, value: %T", hv_tok); + else { + msg_info_protocol("ignore user header: legacy SA protocol"); + } + } + IF_HEADER(URLS_HEADER) + { + msg_debug_protocol("read urls header, value: %T", hv_tok); - srch.begin = "extended"; - srch.len = 8; + srch.begin = "extended"; + srch.len = 8; - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS; - msg_debug_protocol("extended urls information"); - } - - /* TODO: add more formats there */ + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS; + msg_debug_protocol("extended urls information"); } - IF_HEADER(USER_AGENT_HEADER) - { - msg_debug_protocol("read user-agent header, value: %T", hv_tok); - if (hv_tok->len == 6 && - rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) { - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT; - } + /* TODO: add more formats there */ + } + IF_HEADER(USER_AGENT_HEADER) + { + msg_debug_protocol("read user-agent header, value: %T", hv_tok); + + if (hv_tok->len == 6 && + rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) { + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT; } - break; - case 'l': - case 'L': - IF_HEADER(NO_LOG_HEADER) - { - msg_debug_protocol("read log header, value: %T", hv_tok); - srch.begin = "no"; - srch.len = 2; - - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->flags |= RSPAMD_TASK_FLAG_NO_LOG; - } + } + break; + case 'l': + case 'L': + IF_HEADER(NO_LOG_HEADER) + { + msg_debug_protocol("read log header, value: %T", hv_tok); + srch.begin = "no"; + srch.len = 2; + + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->flags |= RSPAMD_TASK_FLAG_NO_LOG; } - IF_HEADER(LOG_TAG_HEADER) - { - msg_debug_protocol("read log-tag header, value: %T", hv_tok); - /* Ensure that a tag is valid */ - if (rspamd_fast_utf8_validate(hv_tok->begin, hv_tok->len) == 0) { - memcpy(task->task_pool->tag.uid, hv_tok->begin, - MIN(hv_tok->len, sizeof(task->task_pool->tag.uid))); - } + } + IF_HEADER(LOG_TAG_HEADER) + { + msg_debug_protocol("read log-tag header, value: %T", hv_tok); + /* Ensure that a tag is valid */ + if (rspamd_fast_utf8_validate(hv_tok->begin, hv_tok->len) == 0) { + memcpy(task->task_pool->tag.uid, hv_tok->begin, + MIN(hv_tok->len, sizeof(task->task_pool->tag.uid))); } - break; - case 'm': - case 'M': - IF_HEADER(MTA_TAG_HEADER) - { - char *mta_tag; - mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok); - rspamd_mempool_set_variable(task->task_pool, - RSPAMD_MEMPOOL_MTA_TAG, - mta_tag, NULL); - msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag); + } + break; + case 'm': + case 'M': + IF_HEADER(MTA_TAG_HEADER) + { + char *mta_tag; + mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_MTA_TAG, + mta_tag, NULL); + msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag); + } + IF_HEADER(MTA_NAME_HEADER) + { + char *mta_name; + mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_MTA_NAME, + mta_name, NULL); + msg_debug_protocol("read MTA-Name header, value: %s", mta_name); + } + IF_HEADER(MILTER_HEADER) + { + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER; + msg_debug_protocol("read Milter header, value: %T", hv_tok); + } + break; + case 't': + case 'T': + IF_HEADER(TLS_CIPHER_HEADER) + { + task->flags |= RSPAMD_TASK_FLAG_SSL; + msg_debug_protocol("read TLS cipher header, value: %T", hv_tok); + } + break; + case 'x': + case 'X': + IF_HEADER("X-Rspamd-Mail-Esmtp-Args") + { + /* Parse MAIL ESMTP arguments from HTTP header */ + if (!task->mail_esmtp_args) { + task->mail_esmtp_args = g_hash_table_new_full( + rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, + rspamd_fstring_mapped_ftok_free, + rspamd_fstring_mapped_ftok_free); } - IF_HEADER(MTA_NAME_HEADER) - { - char *mta_name; - mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok); - rspamd_mempool_set_variable(task->task_pool, - RSPAMD_MEMPOOL_MTA_NAME, - mta_name, NULL); - msg_debug_protocol("read MTA-Name header, value: %s", mta_name); + + /* Parse KEY=VALUE format */ + const char *p = hv_tok->begin; + const char *end = hv_tok->begin + hv_tok->len; + const char *eq = memchr(p, '=', hv_tok->len); + + if (eq && eq > p) { + rspamd_fstring_t *key = rspamd_fstring_new_init(p, eq - p); + rspamd_fstring_t *value = rspamd_fstring_new_init(eq + 1, end - eq - 1); + rspamd_ftok_t *key_tok = rspamd_ftok_map(key); + rspamd_ftok_t *value_tok = rspamd_ftok_map(value); + + g_hash_table_replace(task->mail_esmtp_args, key_tok, value_tok); + msg_debug_protocol("parsed mail ESMTP arg: %T=%T", key_tok, value_tok); } - IF_HEADER(MILTER_HEADER) - { - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER; - msg_debug_protocol("read Milter header, value: %T", hv_tok); + } + IF_HEADER("X-Rspamd-Rcpt-Esmtp-Args") + { + /* Parse RCPT ESMTP arguments from HTTP header */ + if (!task->rcpt_esmtp_args) { + task->rcpt_esmtp_args = g_ptr_array_new(); } - break; - case 't': - case 'T': - IF_HEADER(TLS_CIPHER_HEADER) - { - task->flags |= RSPAMD_TASK_FLAG_SSL; - msg_debug_protocol("read TLS cipher header, value: %T", hv_tok); + + /* Parse IDX:KEY=VALUE format */ + const char *p = hv_tok->begin; + const char *end = hv_tok->begin + hv_tok->len; + const char *colon = memchr(p, ':', hv_tok->len); + + if (colon && colon > p) { + char *endptr; + int rcpt_idx = strtol(p, &endptr, 10); + + if (endptr == colon) { + /* Ensure we have enough entries in the array */ + while (task->rcpt_esmtp_args->len <= rcpt_idx) { + g_ptr_array_add(task->rcpt_esmtp_args, NULL); + } + + /* Get or create hash table for this recipient */ + GHashTable *rcpt_args = g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx); + if (!rcpt_args) { + rcpt_args = g_hash_table_new_full( + rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, + rspamd_fstring_mapped_ftok_free, + rspamd_fstring_mapped_ftok_free); + g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx) = rcpt_args; + } + + /* Parse KEY=VALUE */ + p = colon + 1; + const char *eq = memchr(p, '=', end - p); + + if (eq && eq > p) { + rspamd_fstring_t *key = rspamd_fstring_new_init(p, eq - p); + rspamd_fstring_t *value = rspamd_fstring_new_init(eq + 1, end - eq - 1); + rspamd_ftok_t *key_tok = rspamd_ftok_map(key); + rspamd_ftok_t *value_tok = rspamd_ftok_map(value); + + g_hash_table_replace(rcpt_args, key_tok, value_tok); + msg_debug_protocol("parsed rcpt ESMTP arg for idx %d: %T=%T", rcpt_idx, key_tok, value_tok); + } + } } - break; - default: - msg_debug_protocol("generic header: %T", hn_tok); - break; + } + break; + default: + msg_debug_protocol("generic header: %T", hn_tok); + break; } rspamd_task_add_request_header (task, hn_tok, hv_tok); diff --git a/src/libserver/task.c b/src/libserver/task.c index f655ab11b2..0d58ad3c7b 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -119,6 +119,10 @@ rspamd_task_new(struct rspamd_worker *worker, new_task->messages = ucl_object_typed_new(UCL_OBJECT); kh_static_init(rspamd_task_lua_cache, &new_task->lua_cache); + /* Initialize ESMTP arguments fields */ + new_task->mail_esmtp_args = NULL; + new_task->rcpt_esmtp_args = NULL; + return new_task; } @@ -1965,3 +1969,39 @@ void rspamd_worker_guard_handler(EV_P_ ev_io *w, int revents) } } } + +/* + * ESMTP arguments management functions + */ + +void rspamd_task_set_mail_esmtp_args(struct rspamd_task *task, GHashTable *args) +{ + if (task && args) { + task->mail_esmtp_args = args; + } +} + +void rspamd_task_set_rcpt_esmtp_args(struct rspamd_task *task, GPtrArray *args) +{ + if (task && args) { + task->rcpt_esmtp_args = args; + } +} + +GHashTable * +rspamd_task_get_mail_esmtp_args(struct rspamd_task *task) +{ + if (task) { + return task->mail_esmtp_args; + } + return NULL; +} + +GPtrArray * +rspamd_task_get_rcpt_esmtp_args(struct rspamd_task *task) +{ + if (task) { + return task->rcpt_esmtp_args; + } + return NULL; +} diff --git a/src/libserver/task.h b/src/libserver/task.h index a1742e1608..29f9781f27 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -220,6 +220,10 @@ struct rspamd_task { const char *classifier; /**< Classifier to learn (if needed) */ struct rspamd_lang_detector *lang_det; /**< Languages detector */ struct rspamd_message *message; + + /* ESMTP arguments from milter protocol */ + GHashTable *mail_esmtp_args; /**< ESMTP arguments from MAIL FROM command */ + GPtrArray *rcpt_esmtp_args; /**< Array of GHashTable, one per recipient with ESMTP arguments */ }; /** @@ -287,6 +291,34 @@ const char *rspamd_task_get_principal_recipient(struct rspamd_task *task); */ gboolean rspamd_task_add_recipient(struct rspamd_task *task, const char *rcpt); +/** + * Set ESMTP arguments for MAIL FROM command + * @param task task object + * @param args hash table with ESMTP arguments + */ +void rspamd_task_set_mail_esmtp_args(struct rspamd_task *task, GHashTable *args); + +/** + * Set ESMTP arguments for RCPT TO commands + * @param task task object + * @param args array of hash tables with ESMTP arguments (one per recipient) + */ +void rspamd_task_set_rcpt_esmtp_args(struct rspamd_task *task, GPtrArray *args); + +/** + * Get ESMTP arguments for MAIL FROM command + * @param task task object + * @return hash table with ESMTP arguments or NULL + */ +GHashTable *rspamd_task_get_mail_esmtp_args(struct rspamd_task *task); + +/** + * Get ESMTP arguments for RCPT TO commands + * @param task task object + * @return array of hash tables with ESMTP arguments or NULL + */ +GPtrArray *rspamd_task_get_rcpt_esmtp_args(struct rspamd_task *task); + /** * Learn specified statfile with message in a task * @param task worker's task object diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 273f6bc618..946910c258 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -4343,51 +4343,73 @@ lua_task_get_mail_esmtp_args(lua_State *L) struct rspamd_mime_header *hdr; if (task) { - /* Check if this is a milter task */ - if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER)) { - lua_pushnil(L); - return 1; - } + /* First try to get ESMTP args from task */ + GHashTable *mail_args = rspamd_task_get_mail_esmtp_args(task); - /* Get ESMTP args from HTTP headers */ - hdr = rspamd_message_get_header_array(task, "X-Rspamd-Mail-Esmtp-Args", FALSE); + if (mail_args) { + GHashTableIter iter; + gpointer key, value; - if (hdr) { lua_createtable(L, 0, 0); - while (hdr) { - const char *p, *eq; - gsize len; - - if (hdr->decoded) { - p = hdr->decoded; - len = strlen(p); - } - else { - p = hdr->value; - len = hdr->value_len; - } - - /* Parse KEY=VALUE format */ - eq = memchr(p, '=', len); - - if (eq) { - /* KEY=VALUE */ - lua_pushlstring(L, p, eq - p); - lua_pushlstring(L, eq + 1, len - (eq - p) - 1); - } - else { - /* KEY only */ - lua_pushlstring(L, p, len); - lua_pushstring(L, ""); - } + g_hash_table_iter_init(&iter, mail_args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + lua_pushlstring(L, k->begin, k->len); + lua_pushlstring(L, v->begin, v->len); lua_settable(L, -3); - hdr = hdr->next; } } else { - lua_pushnil(L); + /* Fallback to HTTP headers for backward compatibility */ + /* Check if this is a milter task */ + if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER)) { + lua_pushnil(L); + return 1; + } + + /* Get ESMTP args from HTTP headers */ + hdr = rspamd_message_get_header_array(task, "X-Rspamd-Mail-Esmtp-Args", FALSE); + + if (hdr) { + lua_createtable(L, 0, 0); + + while (hdr) { + const char *p, *eq; + gsize len; + + if (hdr->decoded) { + p = hdr->decoded; + len = strlen(p); + } + else { + p = hdr->value; + len = strlen(p); + } + + /* Parse KEY=VALUE format */ + eq = memchr(p, '=', len); + + if (eq) { + /* KEY=VALUE */ + lua_pushlstring(L, p, eq - p); + lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + } + else { + /* KEY only */ + lua_pushlstring(L, p, len); + lua_pushstring(L, ""); + } + + lua_settable(L, -3); + hdr = hdr->next; + } + } + else { + lua_pushnil(L); + } } } else { @@ -4408,191 +4430,246 @@ lua_task_get_rcpt_esmtp_args(lua_State *L) GHashTable *rcpt_args_by_idx = NULL; if (task) { - /* Check if this is a milter task */ - if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER)) { - lua_pushnil(L); - return 1; - } - /* Check if idx was specified */ if (lua_gettop(L) >= 2 && lua_type(L, 2) == LUA_TNUMBER) { idx = lua_tointeger(L, 2); all_rcpts = FALSE; } - /* Get ESMTP args from HTTP headers */ - hdr = rspamd_message_get_header_array(task, "X-Rspamd-Rcpt-Esmtp-Args", FALSE); + /* First try to get ESMTP args from task */ + GPtrArray *rcpt_args = rspamd_task_get_rcpt_esmtp_args(task); - if (hdr) { + if (rcpt_args) { if (all_rcpts) { - /* Build hash table mapping recipient index to args table */ - rcpt_args_by_idx = g_hash_table_new_full(g_direct_hash, g_direct_equal, - NULL, NULL); - - /* First pass: collect all args by recipient index */ - while (hdr) { - const char *p, *colon, *eq; - gsize len; - int rcpt_idx; - lua_State *tmp_L; - - if (hdr->decoded) { - p = hdr->decoded; - len = strlen(p); - } - else { - p = hdr->value; - len = hdr->value_len; - } + /* Return all recipients' args */ + lua_createtable(L, 0, 0); - /* Parse IDX:KEY=VALUE format */ - colon = memchr(p, ':', len); - - if (colon) { - char *endptr; - rcpt_idx = strtol(p, &endptr, 10); - - if (endptr == colon) { - /* Valid index found */ - p = colon + 1; - len -= (colon - p) + 1; - - /* Store this arg for this recipient */ - if (!g_hash_table_contains(rcpt_args_by_idx, GINT_TO_POINTER(rcpt_idx))) { - /* Create new table for this recipient */ - lua_newtable(L); - g_hash_table_insert(rcpt_args_by_idx, - GINT_TO_POINTER(rcpt_idx), - GINT_TO_POINTER(lua_gettop(L))); - } + for (unsigned int i = 0; i < rcpt_args->len; i++) { + GHashTable *args = g_ptr_array_index(rcpt_args, i); + if (args) { + GHashTableIter iter; + gpointer key, value; - /* Get the table for this recipient */ - int table_idx = GPOINTER_TO_INT(g_hash_table_lookup(rcpt_args_by_idx, - GINT_TO_POINTER(rcpt_idx))); - lua_pushvalue(L, table_idx); + lua_createtable(L, 0, 0); - /* Parse KEY=VALUE */ - eq = memchr(p, '=', len); - - if (eq) { - lua_pushlstring(L, p, eq - p); - lua_pushlstring(L, eq + 1, len - (eq - p) - 1); - } - else { - lua_pushlstring(L, p, len); - lua_pushstring(L, ""); - } + g_hash_table_iter_init(&iter, args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + lua_pushlstring(L, k->begin, k->len); + lua_pushlstring(L, v->begin, v->len); lua_settable(L, -3); - lua_pop(L, 1); /* Pop the table */ } - } - hdr = hdr->next; - } - - /* Now create the result array */ - if (g_hash_table_size(rcpt_args_by_idx) > 0) { - GHashTableIter iter; - gpointer key, value; - int max_idx = 0; - - /* Find max index */ - g_hash_table_iter_init(&iter, rcpt_args_by_idx); - while (g_hash_table_iter_next(&iter, &key, &value)) { - int i = GPOINTER_TO_INT(key); - if (i > max_idx) { - max_idx = i; - } + lua_rawseti(L, -2, i + 1); } - - /* Create result array */ - lua_createtable(L, max_idx + 1, 0); - - /* Fill array with tables or nils */ - for (int i = 0; i <= max_idx; i++) { - if (g_hash_table_contains(rcpt_args_by_idx, GINT_TO_POINTER(i))) { - int table_idx = GPOINTER_TO_INT(g_hash_table_lookup(rcpt_args_by_idx, - GINT_TO_POINTER(i))); - lua_pushvalue(L, table_idx); - } - else { - lua_pushnil(L); + } + } + else { + /* Return specific recipient's args */ + if (idx >= 0 && idx < (int) rcpt_args->len) { + GHashTable *args = g_ptr_array_index(rcpt_args, idx); + if (args) { + GHashTableIter iter; + gpointer key, value; + + lua_createtable(L, 0, 0); + + g_hash_table_iter_init(&iter, args); + while (g_hash_table_iter_next(&iter, &key, &value)) { + rspamd_ftok_t *k = (rspamd_ftok_t *) key; + rspamd_ftok_t *v = (rspamd_ftok_t *) value; + + lua_pushlstring(L, k->begin, k->len); + lua_pushlstring(L, v->begin, v->len); + lua_settable(L, -3); } - lua_rawseti(L, -2, i + 1); /* Lua arrays are 1-based */ } - - /* Clean up temporary tables */ - g_hash_table_iter_init(&iter, rcpt_args_by_idx); - while (g_hash_table_iter_next(&iter, &key, &value)) { - int table_idx = GPOINTER_TO_INT(value); - lua_remove(L, table_idx); + else { + lua_pushnil(L); } } else { lua_pushnil(L); } - - g_hash_table_destroy(rcpt_args_by_idx); } - else { - /* Return args for specific recipient */ - lua_newtable(L); - gboolean found = FALSE; + } + else { + /* Fallback to HTTP headers for backward compatibility */ + + /* Get ESMTP args from HTTP headers */ + hdr = rspamd_message_get_header_array(task, "X-Rspamd-Rcpt-Esmtp-Args", FALSE); + if (hdr) { + if (all_rcpts) { + /* Build hash table mapping recipient index to args table */ + rcpt_args_by_idx = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, NULL); + + /* First pass: collect all args by recipient index */ + while (hdr) { + const char *p, *colon, *eq; + gsize len; + int rcpt_idx; + + if (hdr->decoded) { + p = hdr->decoded; + len = strlen(p); + } + else { + p = hdr->value; + len = strlen(p); + } - while (hdr) { - const char *p, *colon, *eq; - gsize len; - int rcpt_idx; + /* Parse IDX:KEY=VALUE format */ + colon = memchr(p, ':', len); + + if (colon) { + char *endptr; + rcpt_idx = strtol(p, &endptr, 10); + + if (endptr == colon) { + /* Valid index found */ + p = colon + 1; + len -= (colon - p) + 1; + + /* Store this arg for this recipient */ + if (!g_hash_table_contains(rcpt_args_by_idx, GINT_TO_POINTER(rcpt_idx))) { + /* Create new table for this recipient */ + lua_newtable(L); + g_hash_table_insert(rcpt_args_by_idx, + GINT_TO_POINTER(rcpt_idx), + GINT_TO_POINTER(lua_gettop(L))); + } - if (hdr->decoded) { - p = hdr->decoded; - len = strlen(p); - } - else { - p = hdr->value; - len = hdr->value_len; - } + /* Get the table for this recipient */ + int table_idx = GPOINTER_TO_INT(g_hash_table_lookup(rcpt_args_by_idx, + GINT_TO_POINTER(rcpt_idx))); + lua_pushvalue(L, table_idx); + + /* Parse KEY=VALUE */ + eq = memchr(p, '=', len); + + if (eq) { + lua_pushlstring(L, p, eq - p); + lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + } + else { + lua_pushlstring(L, p, len); + lua_pushstring(L, ""); + } - /* Parse IDX:KEY=VALUE format */ - colon = memchr(p, ':', len); + lua_settable(L, -3); + lua_pop(L, 1); /* Pop the table */ + } + } - if (colon) { - char *endptr; - rcpt_idx = strtol(p, &endptr, 10); + hdr = hdr->next; + } - if (endptr == colon && rcpt_idx == idx) { - found = TRUE; - p = colon + 1; - len -= (colon - p) + 1; + /* Now create the result array */ + if (g_hash_table_size(rcpt_args_by_idx) > 0) { + GHashTableIter iter; + gpointer key, value; + int max_idx = 0; + + /* Find max index */ + g_hash_table_iter_init(&iter, rcpt_args_by_idx); + while (g_hash_table_iter_next(&iter, &key, &value)) { + int i = GPOINTER_TO_INT(key); + if (i > max_idx) { + max_idx = i; + } + } - /* Parse KEY=VALUE */ - eq = memchr(p, '=', len); + /* Create result array */ + lua_createtable(L, max_idx + 1, 0); - if (eq) { - lua_pushlstring(L, p, eq - p); - lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + /* Fill array with tables or nils */ + for (int i = 0; i <= max_idx; i++) { + if (g_hash_table_contains(rcpt_args_by_idx, GINT_TO_POINTER(i))) { + int table_idx = GPOINTER_TO_INT(g_hash_table_lookup(rcpt_args_by_idx, + GINT_TO_POINTER(i))); + lua_pushvalue(L, table_idx); } else { - lua_pushlstring(L, p, len); - lua_pushstring(L, ""); + lua_pushnil(L); } + lua_rawseti(L, -2, i + 1); /* Lua arrays are 1-based */ + } - lua_settable(L, -3); + /* Clean up temporary tables */ + g_hash_table_iter_init(&iter, rcpt_args_by_idx); + while (g_hash_table_iter_next(&iter, &key, &value)) { + int table_idx = GPOINTER_TO_INT(value); + lua_remove(L, table_idx); } } + else { + lua_pushnil(L); + } - hdr = hdr->next; + g_hash_table_destroy(rcpt_args_by_idx); } + else { + /* Return args for specific recipient */ + lua_newtable(L); + gboolean found = FALSE; + + while (hdr) { + const char *p, *colon, *eq; + gsize len; + int rcpt_idx; + + if (hdr->decoded) { + p = hdr->decoded; + len = strlen(p); + } + else { + p = hdr->value; + len = strlen(p); + } - if (!found) { - lua_pop(L, 1); - lua_pushnil(L); + /* Parse IDX:KEY=VALUE format */ + colon = memchr(p, ':', len); + + if (colon) { + char *endptr; + rcpt_idx = strtol(p, &endptr, 10); + + if (endptr == colon && rcpt_idx == idx) { + found = TRUE; + p = colon + 1; + len -= (colon - p) + 1; + + /* Parse KEY=VALUE */ + eq = memchr(p, '=', len); + + if (eq) { + lua_pushlstring(L, p, eq - p); + lua_pushlstring(L, eq + 1, len - (eq - p) - 1); + } + else { + lua_pushlstring(L, p, len); + lua_pushstring(L, ""); + } + + lua_settable(L, -3); + } + } + + hdr = hdr->next; + } + + if (!found) { + lua_pop(L, 1); + lua_pushnil(L); + } } } - } - else { - lua_pushnil(L); + else { + lua_pushnil(L); + } } } else { diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index 195442f514..d642ae0bc0 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -236,6 +236,10 @@ struct rspamd_proxy_session { int retries; ref_entry_t ref; enum rspamd_proxy_session_flags flags; + + /* ESMTP arguments from milter session */ + GHashTable *mail_esmtp_args; + GPtrArray *rcpt_esmtp_args; }; static gboolean proxy_send_master_message(struct rspamd_proxy_session *session); @@ -1425,6 +1429,10 @@ proxy_session_refresh(struct rspamd_proxy_session *session) nsession->mirror_conns = g_ptr_array_sized_new(nsession->ctx->mirrors->len); nsession->flags = session->flags; + /* Copy ESMTP arguments */ + nsession->mail_esmtp_args = session->mail_esmtp_args; + nsession->rcpt_esmtp_args = session->rcpt_esmtp_args; + REF_INIT_RETAIN(nsession, proxy_session_dtor); if (nsession->ctx->sessions_cache) { @@ -2342,6 +2350,14 @@ rspamd_proxy_self_scan(struct rspamd_proxy_session *session) if (session->ctx->milter) { task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER | RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK; + + /* Transfer ESMTP arguments from session to task */ + if (session->mail_esmtp_args) { + rspamd_task_set_mail_esmtp_args(task, session->mail_esmtp_args); + } + if (session->rcpt_esmtp_args) { + rspamd_task_set_rcpt_esmtp_args(task, session->rcpt_esmtp_args); + } } task->sock = -1; @@ -2783,6 +2799,14 @@ proxy_milter_finish_handler(int fd, session->master_conn->name = "master"; session->client_message = msg; + /* Store ESMTP arguments from milter session */ + if (rms->mail_esmtp_args) { + session->mail_esmtp_args = rms->mail_esmtp_args; + } + if (rms->rcpt_esmtp_args) { + session->rcpt_esmtp_args = rms->rcpt_esmtp_args; + } + /* Milter protocol doesn't support compression, so no need to set compression flag */ proxy_open_mirror_connections(session);