From: Vsevolod Stakhov Date: Thu, 5 Feb 2026 22:00:00 +0000 (+0000) Subject: [Feature] Add /checkv3 multipart scan endpoint X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=43ac466b66997a47539c723f6e0457dc1ba13161;p=thirdparty%2Frspamd.git [Feature] Add /checkv3 multipart scan endpoint Implement a new /checkv3 endpoint that uses multipart/form-data for requests and multipart/mixed for responses. Metadata (from, rcpt, ip, settings, etc.) is sent as a structured JSON/msgpack part instead of HTTP headers. The response includes a "result" part and an optional "body" part for rewritten messages. New C++ multipart parser and response builder with C bridge functions. Per-part zstd compression support. Client-side support via rspamc --protocol-v3 flag. Proxy self-scan path updated for v3. --- diff --git a/src/client/rspamc.cxx b/src/client/rspamc.cxx index e5abf17383..b367e183a9 100644 --- a/src/client/rspamc.cxx +++ b/src/client/rspamc.cxx @@ -89,6 +89,7 @@ static gboolean compressed = TRUE; static gboolean profile = FALSE; static gboolean skip_images = FALSE; static gboolean skip_attachments = FALSE; +static gboolean protocol_v3 = FALSE; static const char *pubkey = nullptr; static const char *user_agent = "rspamc"; static const char *files_list = nullptr; @@ -193,6 +194,8 @@ static GOptionEntry entries[] = "Skip images when learning/unlearning fuzzy", nullptr}, {"skip-attachments", '\0', 0, G_OPTION_ARG_NONE, &skip_attachments, "Skip attachments when learning/unlearning fuzzy", nullptr}, + {"protocol-v3", '\0', 0, G_OPTION_ARG_NONE, &protocol_v3, + "Use v3 multipart protocol (structured metadata, multipart response)", nullptr}, {"user-agent", 'U', 0, G_OPTION_ARG_STRING, &user_agent, "Use specific User-Agent instead of \"rspamc\"", nullptr}, {"files-list", '\0', 0, G_OPTION_ARG_FILENAME, &files_list, @@ -2281,7 +2284,88 @@ rspamc_process_input(struct ev_loop *ev_base, const struct rspamc_command &cmd, cbdata->cmd = cmd; cbdata->filename = name; - if (cmd.need_input) { + if (protocol_v3 && cmd.need_input && + (cmd.cmd == RSPAMC_COMMAND_SYMBOLS || cmd.cmd == RSPAMC_COMMAND_CHECK)) { + /* Build metadata UCL object from CLI options */ + ucl_object_t *metadata = ucl_object_typed_new(UCL_OBJECT); + + if (from) { + ucl_object_insert_key(metadata, ucl_object_fromstring(from), + "from", 0, false); + } + if (rcpts) { + ucl_object_t *rcpt_arr = ucl_object_typed_new(UCL_ARRAY); + for (auto *rcpt = rcpts; *rcpt; rcpt++) { + ucl_array_append(rcpt_arr, ucl_object_fromstring(*rcpt)); + } + ucl_object_insert_key(metadata, rcpt_arr, "rcpt", 0, false); + } + if (ip) { + ucl_object_insert_key(metadata, ucl_object_fromstring(ip), + "ip", 0, false); + } + if (helo) { + ucl_object_insert_key(metadata, ucl_object_fromstring(helo), + "helo", 0, false); + } + if (hostname) { + ucl_object_insert_key(metadata, ucl_object_fromstring(hostname), + "hostname", 0, false); + } + if (user) { + ucl_object_insert_key(metadata, ucl_object_fromstring(user), + "user", 0, false); + } + if (deliver_to) { + ucl_object_insert_key(metadata, ucl_object_fromstring(deliver_to), + "deliver_to", 0, false); + } + if (queue_id) { + ucl_object_insert_key(metadata, ucl_object_fromstring(queue_id), + "queue_id", 0, false); + } + if (log_tag) { + ucl_object_insert_key(metadata, ucl_object_fromstring(log_tag), + "log_tag", 0, false); + } + if (!settings.empty()) { + /* Try to parse settings as UCL */ + struct ucl_parser *sp = ucl_parser_new(UCL_PARSER_DEFAULT); + if (ucl_parser_add_string(sp, settings.c_str(), settings.size())) { + ucl_object_t *sobj = ucl_parser_get_object(sp); + ucl_object_insert_key(metadata, sobj, "settings", 0, false); + } + ucl_parser_free(sp); + } + if (raw) { + ucl_object_insert_key(metadata, ucl_object_frombool(true), + "raw", 0, false); + } + + /* Build flags array */ + ucl_object_t *flags_arr = ucl_object_typed_new(UCL_ARRAY); + if (pass_all) { + ucl_array_append(flags_arr, ucl_object_fromstring("pass_all")); + } + if (extended_urls) { + ucl_array_append(flags_arr, ucl_object_fromstring("ext_urls")); + } + if (profile) { + ucl_array_append(flags_arr, ucl_object_fromstring("profile")); + } + if (ucl_array_size(flags_arr) > 0) { + ucl_object_insert_key(metadata, flags_arr, "flags", 0, false); + } + else { + ucl_object_unref(flags_arr); + } + + rspamd_client_command_v3(conn, "checkv3", metadata, in, + rspamc_client_cb, cbdata, compressed, + cbdata->filename.c_str(), &err); + ucl_object_unref(metadata); + } + else if (cmd.need_input) { const char *path = path_override.empty() ? cmd.path : path_override.c_str(); rspamd_client_command(conn, path, attrs, in, rspamc_client_cb, cbdata, compressed, dictionary, cbdata->filename.c_str(), &err); diff --git a/src/client/rspamdclient.c b/src/client/rspamdclient.c index adffcf43f0..0c2a87c969 100644 --- a/src/client/rspamdclient.c +++ b/src/client/rspamdclient.c @@ -18,6 +18,9 @@ #include "libserver/http/http_connection.h" #include "libserver/http/http_private.h" #include "libserver/protocol_internal.h" +#include "libserver/multipart_form.h" +#include "libmime/content_type.h" +#include "ottery.h" #include "unix-std.h" #ifdef SYS_ZSTD @@ -46,6 +49,7 @@ struct rspamd_client_connection { ev_tstamp timeout; struct rspamd_http_connection *http_conn; gboolean req_sent; + gboolean v3_mode; double start_time; double send_time; struct rspamd_client_request *req; @@ -104,6 +108,10 @@ rspamd_client_error_handler(struct rspamd_http_connection *conn, GError *err) c->start_time, c->send_time, NULL, 0, err); } +static int +rspamd_client_v3_finish_handler(struct rspamd_http_connection *conn, + struct rspamd_http_message *msg); + static int rspamd_client_finish_handler(struct rspamd_http_connection *conn, struct rspamd_http_message *msg) @@ -120,6 +128,10 @@ rspamd_client_finish_handler(struct rspamd_http_connection *conn, c = req->conn; + if (c->v3_mode) { + return rspamd_client_v3_finish_handler(conn, msg); + } + if (!c->req_sent) { c->req_sent = TRUE; c->send_time = rspamd_get_ticks(FALSE); @@ -486,6 +498,383 @@ rspamd_client_command(struct rspamd_client_connection *conn, return ret; } +/* + * V3 client: finish handler for multipart/mixed responses + */ +static int +rspamd_client_v3_finish_handler(struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct rspamd_client_request *req = + (struct rspamd_client_request *) conn->ud; + struct rspamd_client_connection *c; + struct ucl_parser *parser; + GError *err; + const char *start, *body = NULL; + gsize len, bodylen = 0; + + c = req->conn; + + if (!c->req_sent) { + c->req_sent = TRUE; + c->send_time = rspamd_get_ticks(FALSE); + rspamd_http_connection_reset(c->http_conn); + rspamd_http_connection_read_message(c->http_conn, c->req, c->timeout); + return 0; + } + + if (rspamd_http_message_get_body(msg, NULL) == NULL || msg->code / 100 != 2) { + err = g_error_new(RCLIENT_ERROR, msg->code, "HTTP error: %d, %.*s", + msg->code, + (int) msg->status->len, msg->status->str); + req->cb(c, msg, c->server_name->str, NULL, req->input, req->ud, + c->start_time, c->send_time, NULL, 0, err); + g_error_free(err); + return 0; + } + + /* Check if response is multipart/mixed */ + const rspamd_ftok_t *ct = rspamd_http_message_find_header(msg, "Content-Type"); + + if (ct && rspamd_substring_search_caseless(ct->begin, ct->len, + "multipart/mixed", sizeof("multipart/mixed") - 1) != -1) { + /* Parse multipart response to extract result and body */ + /* Extract boundary from Content-Type */ + struct rspamd_content_type *parsed_ct = rspamd_content_type_parse( + ct->begin, ct->len, rspamd_mempool_new(256, "v3-client", 0)); + /* Note: we leak this small pool; acceptable for client-side */ + + if (parsed_ct && parsed_ct->boundary.len > 0) { + struct rspamd_multipart_form_c *form = rspamd_multipart_form_parse( + msg->body_buf.begin, msg->body_buf.len, + parsed_ct->boundary.begin, parsed_ct->boundary.len); + + if (form) { + const struct rspamd_multipart_entry_c *result_part = + rspamd_multipart_form_find(form, "result", sizeof("result") - 1); + + if (result_part) { + start = result_part->data; + len = result_part->data_len; + + /* Check for per-part zstd compression */ + if (result_part->content_encoding && + result_part->content_encoding_len > 0 && + rspamd_substring_search_caseless(result_part->content_encoding, + result_part->content_encoding_len, + "zstd", 4) != -1) { + /* Decompress */ + ZSTD_DStream *zstream = ZSTD_createDStream(); + ZSTD_initDStream(zstream); + ZSTD_inBuffer zin = {start, len, 0}; + gsize outlen = ZSTD_getDecompressedSize(start, len); + if (outlen == 0) outlen = ZSTD_DStreamOutSize(); + unsigned char *out = g_malloc(outlen); + ZSTD_outBuffer zout = {out, outlen, 0}; + + while (zin.pos < zin.size) { + gsize r = ZSTD_decompressStream(zstream, &zout, &zin); + if (ZSTD_isError(r)) { + g_free(out); + ZSTD_freeDStream(zstream); + rspamd_multipart_form_free(form); + err = g_error_new(RCLIENT_ERROR, 500, + "result decompression error: %s", + ZSTD_getErrorName(r)); + req->cb(c, msg, c->server_name->str, NULL, + req->input, req->ud, c->start_time, + c->send_time, NULL, 0, err); + g_error_free(err); + return 0; + } + if (zout.pos == zout.size) { + zout.size *= 2; + out = g_realloc(zout.dst, zout.size); + zout.dst = out; + } + } + ZSTD_freeDStream(zstream); + start = (const char *) zout.dst; + len = zout.pos; + /* Note: out will be freed below via goto end pattern */ + } + + /* Extract optional body part */ + const struct rspamd_multipart_entry_c *body_part = + rspamd_multipart_form_find(form, "body", sizeof("body") - 1); + if (body_part && body_part->data_len > 0) { + body = body_part->data; + bodylen = body_part->data_len; + /* TODO: decompress body part if needed */ + } + + parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); + /* Detect msgpack from content type */ + if (result_part->content_type && + rspamd_substring_search_caseless(result_part->content_type, + result_part->content_type_len, + "msgpack", 7) != -1) { + ucl_parser_add_chunk_full(parser, (const unsigned char *) start, len, + ucl_parser_get_default_priority(parser), + UCL_DUPLICATE_APPEND, UCL_PARSE_MSGPACK); + } + else { + ucl_parser_add_chunk(parser, (const unsigned char *) start, len); + } + + if (ucl_parser_get_error(parser)) { + err = g_error_new(RCLIENT_ERROR, msg->code, + "Cannot parse UCL: %s", + ucl_parser_get_error(parser)); + ucl_parser_free(parser); + rspamd_multipart_form_free(form); + req->cb(c, msg, c->server_name->str, NULL, + req->input, req->ud, c->start_time, + c->send_time, body, bodylen, err); + g_error_free(err); + return 0; + } + + req->cb(c, msg, c->server_name->str, + ucl_parser_get_object(parser), + req->input, req->ud, + c->start_time, c->send_time, body, bodylen, NULL); + ucl_parser_free(parser); + } + else { + err = g_error_new(RCLIENT_ERROR, 500, + "No 'result' part in multipart response"); + req->cb(c, msg, c->server_name->str, NULL, + req->input, req->ud, c->start_time, + c->send_time, NULL, 0, err); + g_error_free(err); + } + + rspamd_multipart_form_free(form); + } + else { + err = g_error_new(RCLIENT_ERROR, 500, + "Cannot parse multipart response"); + req->cb(c, msg, c->server_name->str, NULL, + req->input, req->ud, c->start_time, + c->send_time, NULL, 0, err); + g_error_free(err); + } + } + else { + err = g_error_new(RCLIENT_ERROR, 500, + "No boundary in multipart Content-Type"); + req->cb(c, msg, c->server_name->str, NULL, + req->input, req->ud, c->start_time, + c->send_time, NULL, 0, err); + g_error_free(err); + } + } + else { + /* Fallback: non-multipart response, handle like v2 */ + start = msg->body_buf.begin; + len = msg->body_buf.len; + + parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); + if (!ucl_parser_add_chunk(parser, (const unsigned char *) start, len)) { + err = g_error_new(RCLIENT_ERROR, msg->code, "Cannot parse UCL: %s", + ucl_parser_get_error(parser)); + ucl_parser_free(parser); + req->cb(c, msg, c->server_name->str, NULL, + req->input, req->ud, c->start_time, + c->send_time, NULL, 0, err); + g_error_free(err); + return 0; + } + + req->cb(c, msg, c->server_name->str, + ucl_parser_get_object(parser), + req->input, req->ud, + c->start_time, c->send_time, NULL, 0, NULL); + ucl_parser_free(parser); + } + + return 0; +} + +gboolean +rspamd_client_command_v3(struct rspamd_client_connection *conn, + const char *command, + const ucl_object_t *metadata, + FILE *in, + rspamd_client_callback cb, + gpointer ud, + gboolean compressed, + const char *filename, + GError **err) +{ + struct rspamd_client_request *req; + GString *input = NULL; + rspamd_fstring_t *body; + gboolean ret; + + req = g_malloc0(sizeof(struct rspamd_client_request)); + req->conn = conn; + req->cb = cb; + req->ud = ud; + + req->msg = rspamd_http_new_message(HTTP_REQUEST); + if (conn->key) { + req->msg->peer_key = rspamd_pubkey_ref(conn->key); + } + + /* Read message input */ + const char *msg_data = NULL; + gsize msg_len = 0; + + if (in != NULL) { + input = g_string_sized_new(BUFSIZ); + char *p; + gsize remain, old_len; + + while (!feof(in)) { + p = input->str + input->len; + remain = input->allocated_len - input->len - 1; + if (remain == 0) { + old_len = input->len; + g_string_set_size(input, old_len * 2); + input->len = old_len; + continue; + } + remain = fread(p, 1, remain, in); + if (remain > 0) { + input->len += remain; + input->str[input->len] = '\0'; + } + } + + if (ferror(in) != 0) { + g_set_error(err, RCLIENT_ERROR, ferror(in), + "input IO error: %s", strerror(ferror(in))); + g_free(req); + g_string_free(input, TRUE); + return FALSE; + } + + msg_data = input->str; + msg_len = input->len; + req->input = input; + } + + /* Serialize metadata to JSON */ + char *metadata_json = NULL; + gsize metadata_len = 0; + + if (metadata) { + metadata_json = (char *) ucl_object_emit(metadata, UCL_EMIT_JSON_COMPACT); + metadata_len = strlen(metadata_json); + } + else { + metadata_json = g_strdup("{}"); + metadata_len = 2; + } + + /* Build multipart/form-data body with random boundary */ + char boundary_buf[64]; + rspamd_snprintf(boundary_buf, sizeof(boundary_buf), + "rspamc-v3-%016xL-%016xL", + ottery_rand_uint64(), ottery_rand_uint64()); + const char *boundary = boundary_buf; + GString *mp_body = g_string_sized_new(metadata_len + msg_len + 512); + + /* Metadata part */ + rspamd_printf_gstring(mp_body, + "--%s\r\n" + "Content-Disposition: form-data; name=\"metadata\"\r\n" + "Content-Type: application/json\r\n" + "\r\n", + boundary); + g_string_append_len(mp_body, metadata_json, metadata_len); + g_string_append(mp_body, "\r\n"); + + /* Message part */ + if (msg_data && msg_len > 0) { + if (compressed) { + /* Compress message with zstd */ + gsize comp_bound = ZSTD_compressBound(msg_len); + char *comp_buf = g_malloc(comp_bound); + gsize comp_len = ZSTD_compress(comp_buf, comp_bound, + msg_data, msg_len, 1); + + if (ZSTD_isError(comp_len)) { + g_set_error(err, RCLIENT_ERROR, 500, "compression error"); + g_free(comp_buf); + g_free(metadata_json); + g_string_free(mp_body, TRUE); + g_free(req); + if (input) g_string_free(input, TRUE); + return FALSE; + } + + rspamd_printf_gstring(mp_body, + "--%s\r\n" + "Content-Disposition: form-data; name=\"message\"\r\n" + "Content-Type: application/octet-stream\r\n" + "Content-Encoding: zstd\r\n" + "\r\n", + boundary); + g_string_append_len(mp_body, comp_buf, comp_len); + g_string_append(mp_body, "\r\n"); + g_free(comp_buf); + } + else { + rspamd_printf_gstring(mp_body, + "--%s\r\n" + "Content-Disposition: form-data; name=\"message\"\r\n" + "Content-Type: application/octet-stream\r\n" + "\r\n", + boundary); + g_string_append_len(mp_body, msg_data, msg_len); + g_string_append(mp_body, "\r\n"); + } + } + + /* Closing boundary */ + rspamd_printf_gstring(mp_body, "--%s--\r\n", boundary); + + g_free(metadata_json); + + /* Set body */ + body = rspamd_fstring_new_init(mp_body->str, mp_body->len); + g_string_free(mp_body, TRUE); + rspamd_http_message_set_body_from_fstring_steal(req->msg, body); + + /* Set Content-Type with boundary */ + char ct_buf[128]; + rspamd_snprintf(ct_buf, sizeof(ct_buf), + "multipart/form-data; boundary=%s", boundary); + + /* Add Accept headers */ + rspamd_http_message_add_header(req->msg, "Accept", "application/json"); + if (compressed) { + rspamd_http_message_add_header(req->msg, "Accept-Encoding", "zstd"); + } + + /* Append URL path */ + if (command != NULL && command[0] == '/') { + req->msg->url = rspamd_fstring_append(req->msg->url, command, strlen(command)); + } + else { + req->msg->url = rspamd_fstring_append(req->msg->url, "/", 1); + req->msg->url = rspamd_fstring_append(req->msg->url, command ? command : "", + command ? strlen(command) : 0); + } + + conn->req = req; + conn->v3_mode = TRUE; + conn->start_time = rspamd_get_ticks(FALSE); + + ret = rspamd_http_connection_write_message(conn->http_conn, req->msg, + NULL, ct_buf, req, conn->timeout); + + return ret; +} + void rspamd_client_destroy(struct rspamd_client_connection *conn) { if (conn != NULL) { diff --git a/src/client/rspamdclient.h b/src/client/rspamdclient.h index 094676cfbe..1f41ac327a 100644 --- a/src/client/rspamdclient.h +++ b/src/client/rspamdclient.h @@ -93,6 +93,22 @@ gboolean rspamd_client_command( const char *filename, GError **err); +/** + * Send a v3 multipart/form-data command. + * Metadata is sent as a JSON part, message as an octet-stream part. + * Response is multipart/mixed with "result" (JSON/msgpack) and optional "body" parts. + */ +gboolean rspamd_client_command_v3( + struct rspamd_client_connection *conn, + const char *command, + const ucl_object_t *metadata, + FILE *in, + rspamd_client_callback cb, + gpointer ud, + gboolean compressed, + const char *filename, + GError **err); + /** * Destroy a connection to rspamd * @param conn diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 8cc673f138..6fa046abc9 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -18,6 +18,8 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/fuzzy_storage_stat.c ${CMAKE_CURRENT_SOURCE_DIR}/milter.c ${CMAKE_CURRENT_SOURCE_DIR}/monitored.c + ${CMAKE_CURRENT_SOURCE_DIR}/multipart_form.cxx + ${CMAKE_CURRENT_SOURCE_DIR}/multipart_response.cxx ${CMAKE_CURRENT_SOURCE_DIR}/protocol.c ${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c ${CMAKE_CURRENT_SOURCE_DIR}/redis_pool.cxx diff --git a/src/libserver/multipart_form.cxx b/src/libserver/multipart_form.cxx new file mode 100644 index 0000000000..e0db741681 --- /dev/null +++ b/src/libserver/multipart_form.cxx @@ -0,0 +1,383 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "multipart_form.hxx" +#include +#include + +namespace rspamd::http { + +namespace { + +/* Trim leading and trailing whitespace (spaces and tabs) */ +auto trim(std::string_view sv) -> std::string_view +{ + while (!sv.empty() && (sv.front() == ' ' || sv.front() == '\t')) { + sv.remove_prefix(1); + } + while (!sv.empty() && (sv.back() == ' ' || sv.back() == '\t')) { + sv.remove_suffix(1); + } + return sv; +} + +/* Case-insensitive prefix check */ +auto starts_with_ci(std::string_view haystack, std::string_view needle) -> bool +{ + if (haystack.size() < needle.size()) { + return false; + } + for (size_t i = 0; i < needle.size(); i++) { + if (std::tolower(static_cast(haystack[i])) != + std::tolower(static_cast(needle[i]))) { + return false; + } + } + return true; +} + +/** + * Extract a quoted or unquoted parameter value from a header value string. + * Given: name="value"; other="x" + * extract_param(sv, "name") returns "value" + */ +auto extract_param(std::string_view header, std::string_view param_name) -> std::string_view +{ + auto pos = size_t{0}; + + while (pos < header.size()) { + /* Find param_name */ + auto found = header.find(param_name, pos); + if (found == std::string_view::npos) { + return {}; + } + + /* Check that it's preceded by ; or start, not part of another word */ + if (found > 0) { + auto prev = header[found - 1]; + if (prev != ';' && prev != ' ' && prev != '\t') { + pos = found + param_name.size(); + continue; + } + } + + auto after = found + param_name.size(); + /* Skip whitespace before = */ + while (after < header.size() && (header[after] == ' ' || header[after] == '\t')) { + after++; + } + if (after >= header.size() || header[after] != '=') { + pos = after; + continue; + } + after++; /* skip = */ + while (after < header.size() && (header[after] == ' ' || header[after] == '\t')) { + after++; + } + if (after >= header.size()) { + return {}; + } + + if (header[after] == '"') { + /* Quoted value */ + after++; /* skip opening quote */ + auto end = header.find('"', after); + if (end == std::string_view::npos) { + return header.substr(after); + } + return header.substr(after, end - after); + } + else { + /* Unquoted value - ends at ; or end */ + auto end = header.find(';', after); + if (end == std::string_view::npos) { + return trim(header.substr(after)); + } + return trim(header.substr(after, end - after)); + } + } + + return {}; +} + +/** + * Parse headers from a part preamble. + * Headers end at the first \r\n\r\n or \n\n. + * Returns: (headers_end_offset, entry with parsed headers) + */ +auto parse_part_headers(std::string_view part_data, multipart_entry &entry) -> size_t +{ + /* Find end of headers */ + auto hdr_end = part_data.find("\r\n\r\n"); + size_t skip = 4; + + if (hdr_end == std::string_view::npos) { + hdr_end = part_data.find("\n\n"); + skip = 2; + if (hdr_end == std::string_view::npos) { + return 0; + } + } + + auto headers = part_data.substr(0, hdr_end); + + /* Parse individual headers (split by \r\n or \n) */ + size_t pos = 0; + while (pos < headers.size()) { + auto line_end = headers.find('\n', pos); + std::string_view line; + if (line_end == std::string_view::npos) { + line = headers.substr(pos); + pos = headers.size(); + } + else { + line = headers.substr(pos, line_end - pos); + pos = line_end + 1; + } + + /* Strip trailing \r */ + if (!line.empty() && line.back() == '\r') { + line.remove_suffix(1); + } + + if (line.empty()) { + continue; + } + + auto colon = line.find(':'); + if (colon == std::string_view::npos) { + continue; + } + + auto hdr_name = trim(line.substr(0, colon)); + auto hdr_value = trim(line.substr(colon + 1)); + + if (starts_with_ci(hdr_name, "content-disposition")) { + entry.name = extract_param(hdr_value, "name"); + entry.filename = extract_param(hdr_value, "filename"); + } + else if (starts_with_ci(hdr_name, "content-type")) { + /* Content-Type value is everything up to first ; or end */ + auto semi = hdr_value.find(';'); + if (semi != std::string_view::npos) { + entry.content_type = trim(hdr_value.substr(0, semi)); + } + else { + entry.content_type = hdr_value; + } + } + else if (starts_with_ci(hdr_name, "content-encoding") || + starts_with_ci(hdr_name, "content-transfer-encoding")) { + entry.content_encoding = hdr_value; + } + } + + return hdr_end + skip; +} + +}// anonymous namespace + +auto parse_multipart_form(std::string_view data, + std::string_view boundary) -> std::optional +{ + if (boundary.empty() || data.empty()) { + return std::nullopt; + } + + /* Build delimiter strings: "\r\n--" and "--" */ + std::string delim; + delim.reserve(boundary.size() + 4); + delim = "--"; + delim.append(boundary.data(), boundary.size()); + + std::string crlf_delim = "\r\n"; + crlf_delim.append(delim); + + std::string lf_delim = "\n"; + lf_delim.append(delim); + + /* Find the first boundary */ + auto first = data.find(delim); + if (first == std::string_view::npos) { + return std::nullopt; + } + + /* Skip past first boundary line */ + auto pos = first + delim.size(); + + /* Skip optional \r\n after boundary */ + if (pos < data.size() && data[pos] == '\r') { + pos++; + } + if (pos < data.size() && data[pos] == '\n') { + pos++; + } + + static constexpr size_t max_parts = 8; + multipart_form form; + + while (pos < data.size()) { + /* Find next boundary (try \r\n-- first, then \n--) */ + auto next = data.find(crlf_delim, pos); + size_t delim_size = crlf_delim.size(); + + if (next == std::string_view::npos) { + next = data.find(lf_delim, pos); + delim_size = lf_delim.size(); + } + + if (next == std::string_view::npos) { + break; + } + + auto part_data = data.substr(pos, next - pos); + + /* Parse headers from this part */ + multipart_entry entry{}; + auto body_offset = parse_part_headers(part_data, entry); + + if (body_offset > 0 && body_offset <= part_data.size()) { + entry.data = part_data.substr(body_offset); + } + else { + /* No headers found, treat entire part as data */ + entry.data = part_data; + } + + form.parts.push_back(entry); + + if (form.parts.size() >= max_parts) { + break; + } + + /* Move past the boundary */ + pos = next + delim_size; + + /* Check for closing boundary -- */ + if (pos + 1 < data.size() && data[pos] == '-' && data[pos + 1] == '-') { + break; + } + + /* Skip \r\n after boundary */ + if (pos < data.size() && data[pos] == '\r') { + pos++; + } + if (pos < data.size() && data[pos] == '\n') { + pos++; + } + } + + if (form.parts.empty()) { + return std::nullopt; + } + + return form; +} + +auto find_part(const multipart_form &form, + std::string_view name) -> const multipart_entry * +{ + for (const auto &entry: form.parts) { + if (entry.name == name) { + return &entry; + } + } + return nullptr; +} + +}// namespace rspamd::http + + +/* + * C bridge implementation + */ + +struct rspamd_multipart_form_c { + rspamd::http::multipart_form form; + /* Pre-built C entries for find() results */ + std::vector c_entries; + + void build_c_entries() + { + c_entries.clear(); + c_entries.reserve(form.parts.size()); + for (const auto &p: form.parts) { + rspamd_multipart_entry_c ce{}; + ce.name = p.name.data(); + ce.name_len = p.name.size(); + ce.filename = p.filename.data(); + ce.filename_len = p.filename.size(); + ce.content_type = p.content_type.data(); + ce.content_type_len = p.content_type.size(); + ce.content_encoding = p.content_encoding.data(); + ce.content_encoding_len = p.content_encoding.size(); + ce.data = p.data.data(); + ce.data_len = p.data.size(); + c_entries.push_back(ce); + } + } +}; + +extern "C" { + +struct rspamd_multipart_form_c * +rspamd_multipart_form_parse(const char *data, gsize len, + const char *boundary, gsize boundary_len) +{ + auto result = rspamd::http::parse_multipart_form( + {data, len}, {boundary, boundary_len}); + + if (!result) { + return nullptr; + } + + auto *form = new rspamd_multipart_form_c(); + form->form = std::move(*result); + form->build_c_entries(); + return form; +} + +gsize rspamd_multipart_form_nparts(const struct rspamd_multipart_form_c *form) +{ + if (!form) { + return 0; + } + return form->form.parts.size(); +} + +const struct rspamd_multipart_entry_c * +rspamd_multipart_form_find(const struct rspamd_multipart_form_c *form, + const char *name, gsize name_len) +{ + if (!form || !name) { + return nullptr; + } + + std::string_view name_sv{name, name_len}; + for (size_t i = 0; i < form->form.parts.size(); i++) { + if (form->form.parts[i].name == name_sv) { + return &form->c_entries[i]; + } + } + return nullptr; +} + +void rspamd_multipart_form_free(struct rspamd_multipart_form_c *form) +{ + delete form; +} + +} /* extern "C" */ diff --git a/src/libserver/multipart_form.h b/src/libserver/multipart_form.h new file mode 100644 index 0000000000..df7bdd2a24 --- /dev/null +++ b/src/libserver/multipart_form.h @@ -0,0 +1,74 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MULTIPART_FORM_H +#define RSPAMD_MULTIPART_FORM_H + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_multipart_entry_c { + const char *name; + gsize name_len; + const char *filename; + gsize filename_len; + const char *content_type; + gsize content_type_len; + const char *content_encoding; + gsize content_encoding_len; + const char *data; + gsize data_len; +}; + +struct rspamd_multipart_form_c; + +/** + * Parse multipart/form-data body. Returns NULL on error. + * The returned handle must be freed with rspamd_multipart_form_free(). + * + * IMPORTANT: The returned form contains pointers into the original + * data buffer (zero-copy). The caller MUST ensure 'data' remains valid + * for the lifetime of the returned form. + */ +struct rspamd_multipart_form_c *rspamd_multipart_form_parse( + const char *data, gsize len, + const char *boundary, gsize boundary_len); + +/** + * Get number of parts. + */ +gsize rspamd_multipart_form_nparts(const struct rspamd_multipart_form_c *form); + +/** + * Find part by name. Returns NULL if not found. + */ +const struct rspamd_multipart_entry_c *rspamd_multipart_form_find( + const struct rspamd_multipart_form_c *form, + const char *name, gsize name_len); + +/** + * Free parsed form. + */ +void rspamd_multipart_form_free(struct rspamd_multipart_form_c *form); + +#ifdef __cplusplus +} +#endif + +#endif /* RSPAMD_MULTIPART_FORM_H */ diff --git a/src/libserver/multipart_form.hxx b/src/libserver/multipart_form.hxx new file mode 100644 index 0000000000..104afbba6b --- /dev/null +++ b/src/libserver/multipart_form.hxx @@ -0,0 +1,60 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MULTIPART_FORM_HXX +#define RSPAMD_MULTIPART_FORM_HXX + +#include +#include +#include + +namespace rspamd::http { + +struct multipart_entry { + std::string_view name; + std::string_view filename; + std::string_view content_type; + std::string_view content_encoding; + std::string_view data; +}; + +struct multipart_form { + std::vector parts; +}; + +/** + * Parse multipart/form-data body. + * All string_views point into the original data buffer (zero-copy). + * @param data raw body + * @param boundary boundary string (without leading --) + * @return parsed form or nullopt on error + */ +auto parse_multipart_form(std::string_view data, + std::string_view boundary) -> std::optional; + +/** + * Find part by name. + * @return pointer to entry or nullptr + */ +auto find_part(const multipart_form &form, + std::string_view name) -> const multipart_entry *; + +}// namespace rspamd::http + +/* C bridge - include the C-only header */ +#include "multipart_form.h" + +#endif// RSPAMD_MULTIPART_FORM_HXX diff --git a/src/libserver/multipart_response.cxx b/src/libserver/multipart_response.cxx new file mode 100644 index 0000000000..99856bf702 --- /dev/null +++ b/src/libserver/multipart_response.cxx @@ -0,0 +1,203 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "multipart_response.hxx" +#include "ottery.h" +#include "printf.h" +#include +#include + +#ifdef SYS_ZSTD +#include "zstd.h" +#else +#include "contrib/zstd/zstd.h" +#endif + +namespace rspamd::http { + +multipart_response::multipart_response() +{ + /* Generate a random boundary using ottery CSPRNG */ + char buf[64]; + uint64_t rnd = ottery_rand_uint64(); + static unsigned int counter = 0; + rspamd_snprintf(buf, sizeof(buf), "rspamd-v3-%016xL-%ud", + rnd, counter++); + boundary_ = buf; +} + +void multipart_response::add_part(std::string name, std::string content_type, + std::string_view data, bool compress) +{ + parts_.push_back({std::move(name), std::move(content_type), data, compress}); +} + +auto multipart_response::serialize(void *zstream) const -> std::string +{ + std::string out; + out.reserve(4096); + + for (const auto &part: parts_) { + out.append("--"); + out.append(boundary_); + out.append("\r\n"); + + /* Content-Disposition */ + out.append("Content-Disposition: form-data; name=\""); + out.append(part.name); + out.append("\"\r\n"); + + /* Content-Type */ + if (!part.content_type.empty()) { + out.append("Content-Type: "); + out.append(part.content_type); + out.append("\r\n"); + } + + /* Compress if requested and zstream is available */ + if (part.compress && zstream && !part.data.empty()) { + auto *cstream = static_cast(zstream); + size_t bound = ZSTD_compressBound(part.data.size()); + std::string compressed(bound, '\0'); + + ZSTD_inBuffer zin{}; + zin.src = part.data.data(); + zin.size = part.data.size(); + zin.pos = 0; + + ZSTD_outBuffer zout{}; + zout.dst = compressed.data(); + zout.size = compressed.size(); + zout.pos = 0; + + ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only); + + bool ok = true; + while (zin.pos < zin.size) { + size_t r = ZSTD_compressStream2(cstream, &zout, &zin, ZSTD_e_continue); + if (ZSTD_isError(r)) { + ok = false; + break; + } + } + + if (ok) { + size_t r = ZSTD_compressStream2(cstream, &zout, &zin, ZSTD_e_end); + if (ZSTD_isError(r)) { + ok = false; + } + } + + if (ok) { + compressed.resize(zout.pos); + out.append("Content-Encoding: zstd\r\n"); + out.append("\r\n"); + out.append(compressed); + } + else { + /* Fallback: write uncompressed */ + out.append("\r\n"); + out.append(part.data); + } + } + else { + out.append("\r\n"); + out.append(part.data); + } + + out.append("\r\n"); + } + + /* Closing boundary */ + out.append("--"); + out.append(boundary_); + out.append("--\r\n"); + + return out; +} + +auto multipart_response::content_type() const -> std::string +{ + return "multipart/mixed; boundary=\"" + boundary_ + "\""; +} + +}// namespace rspamd::http + + +/* + * C bridge implementation + */ + +struct rspamd_multipart_response_c { + rspamd::http::multipart_response resp; + std::string cached_content_type; +}; + +extern "C" { + +struct rspamd_multipart_response_c * +rspamd_multipart_response_new(void) +{ + return new rspamd_multipart_response_c(); +} + +void rspamd_multipart_response_add_part( + struct rspamd_multipart_response_c *resp, + const char *name, + const char *content_type, + const char *data, gsize len, + gboolean compress) +{ + if (!resp) { + return; + } + resp->resp.add_part( + name ? name : "", + content_type ? content_type : "", + {data, len}, + compress != FALSE); +} + +rspamd_fstring_t * +rspamd_multipart_response_serialize( + struct rspamd_multipart_response_c *resp, + void *zstream) +{ + if (!resp) { + return nullptr; + } + auto serialized = resp->resp.serialize(zstream); + return rspamd_fstring_new_init(serialized.data(), serialized.size()); +} + +const char * +rspamd_multipart_response_content_type( + struct rspamd_multipart_response_c *resp) +{ + if (!resp) { + return "multipart/mixed"; + } + resp->cached_content_type = resp->resp.content_type(); + return resp->cached_content_type.c_str(); +} + +void rspamd_multipart_response_free( + struct rspamd_multipart_response_c *resp) +{ + delete resp; +} + +} /* extern "C" */ diff --git a/src/libserver/multipart_response.h b/src/libserver/multipart_response.h new file mode 100644 index 0000000000..6b7e97f7b6 --- /dev/null +++ b/src/libserver/multipart_response.h @@ -0,0 +1,62 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MULTIPART_RESPONSE_H +#define RSPAMD_MULTIPART_RESPONSE_H + +#include "config.h" +#include "libutil/fstring.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_multipart_response_c; + +struct rspamd_multipart_response_c *rspamd_multipart_response_new(void); + +void rspamd_multipart_response_add_part( + struct rspamd_multipart_response_c *resp, + const char *name, + const char *content_type, + const char *data, gsize len, + gboolean compress); + +/** + * Serialize the multipart response. + * @param resp response handle + * @param zstream ZSTD compression stream (may be NULL) + * @return newly allocated fstring (caller owns) + */ +rspamd_fstring_t *rspamd_multipart_response_serialize( + struct rspamd_multipart_response_c *resp, + void *zstream); + +/** + * Get the Content-Type header value (includes boundary). + * The returned string is valid until resp is freed. + */ +const char *rspamd_multipart_response_content_type( + struct rspamd_multipart_response_c *resp); + +void rspamd_multipart_response_free( + struct rspamd_multipart_response_c *resp); + +#ifdef __cplusplus +} +#endif + +#endif /* RSPAMD_MULTIPART_RESPONSE_H */ diff --git a/src/libserver/multipart_response.hxx b/src/libserver/multipart_response.hxx new file mode 100644 index 0000000000..a8611d1b0d --- /dev/null +++ b/src/libserver/multipart_response.hxx @@ -0,0 +1,67 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MULTIPART_RESPONSE_HXX +#define RSPAMD_MULTIPART_RESPONSE_HXX + +#include +#include +#include + +namespace rspamd::http { + +struct response_part { + std::string name; + std::string content_type; + std::string_view data; /* Points to external buffer; caller must keep alive */ + bool compress = false; +}; + +class multipart_response { +public: + multipart_response(); + + void add_part(std::string name, std::string content_type, + std::string_view data, bool compress = false); + + /** + * Serialize the multipart response. + * @param zstream ZSTD compression stream (may be null if no compression needed) + * @return serialized body + */ + auto serialize(void *zstream = nullptr) const -> std::string; + + /** + * Get the Content-Type header value including boundary. + */ + auto content_type() const -> std::string; + + auto get_boundary() const -> std::string_view + { + return boundary_; + } + +private: + std::string boundary_; + std::vector parts_; +}; + +}// namespace rspamd::http + +/* C bridge - include the C-only header */ +#include "multipart_response.h" + +#endif// RSPAMD_MULTIPART_RESPONSE_HXX diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 83bcb6e2a8..560282b41e 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -29,6 +29,9 @@ #include "rspamd_simdutf.h" #include "task.h" #include "lua/lua_classnames.h" +#include "multipart_form.h" +#include "multipart_response.h" +#include "libmime/content_type.h" #include #ifdef SYS_ZSTD @@ -148,7 +151,12 @@ rspamd_protocol_handle_url(struct rspamd_task *task, case 'c': case 'C': /* check */ - if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) { + if (COMPARE_CMD(p, MSG_CMD_CHECK_V3, pathlen)) { + task->cmd = CMD_CHECK_V3; + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MULTIPART_V3; + msg_debug_protocol("got checkv3 command"); + } + else if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) { task->cmd = CMD_CHECK_V2; msg_debug_protocol("got checkv2 command"); } @@ -1150,7 +1158,7 @@ rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result * ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false); ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false); - if (task->cmd == CMD_CHECK_V2) { + if (task->cmd == CMD_CHECK_V2 || task->cmd == CMD_CHECK_V3) { if (sym->sym) { ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false); } @@ -2094,6 +2102,680 @@ void rspamd_protocol_write_log_pipe(struct rspamd_task *task) g_array_free(extra, TRUE); } +/* + * Handle metadata from a parsed UCL object for v3 protocol. + * Maps structured metadata fields to task fields. + */ +static gboolean +rspamd_protocol_handle_metadata(struct rspamd_task *task, + const ucl_object_t *metadata) +{ + const ucl_object_t *elt, *cur; + gboolean has_ip = FALSE; + + if (!metadata || ucl_object_type(metadata) != UCL_OBJECT) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "metadata is not a valid object"); + return FALSE; + } + + /* from */ + elt = ucl_object_lookup(metadata, "from"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + const char *from_str = ucl_object_tostring(elt); + gsize from_len = strlen(from_str); + + if (from_len == 0) { + task->from_envelope = rspamd_email_address_from_smtp("<>", 2); + } + else { + task->from_envelope = rspamd_email_address_from_smtp(from_str, from_len); + } + + if (!task->from_envelope) { + msg_err_protocol("bad from in metadata: '%s'", from_str); + task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; + } + } + + /* rcpt (array) */ + elt = ucl_object_lookup(metadata, "rcpt"); + if (elt) { + if (ucl_object_type(elt) == UCL_ARRAY) { + ucl_object_iter_t it = NULL; + + while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_STRING) { + const char *rcpt_str = ucl_object_tostring(cur); + struct rspamd_email_address *addr = + rspamd_email_address_from_smtp(rcpt_str, strlen(rcpt_str)); + + if (addr) { + if (!task->rcpt_envelope) { + task->rcpt_envelope = g_ptr_array_sized_new(2); + } + g_ptr_array_add(task->rcpt_envelope, addr); + } + else { + msg_err_protocol("bad rcpt in metadata: '%s'", rcpt_str); + task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; + } + } + } + } + else if (ucl_object_type(elt) == UCL_STRING) { + /* Single recipient as string */ + const char *rcpt_str = ucl_object_tostring(elt); + struct rspamd_email_address *addr = + rspamd_email_address_from_smtp(rcpt_str, strlen(rcpt_str)); + + if (addr) { + if (!task->rcpt_envelope) { + task->rcpt_envelope = g_ptr_array_sized_new(2); + } + g_ptr_array_add(task->rcpt_envelope, addr); + } + } + } + + /* ip */ + elt = ucl_object_lookup(metadata, "ip"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + const char *ip_str = ucl_object_tostring(elt); + + if (!rspamd_parse_inet_address(&task->from_addr, + ip_str, strlen(ip_str), + RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + msg_err_protocol("bad ip in metadata: '%s'", ip_str); + } + else { + has_ip = TRUE; + } + } + + if (!has_ip) { + task->flags |= RSPAMD_TASK_FLAG_NO_IP; + } + + /* helo */ + elt = ucl_object_lookup(metadata, "helo"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + task->helo = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + } + + /* hostname */ + elt = ucl_object_lookup(metadata, "hostname"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + task->hostname = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + } + + /* queue_id */ + elt = ucl_object_lookup(metadata, "queue_id"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + task->queue_id = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + } + + /* user */ + elt = ucl_object_lookup(metadata, "user"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + task->auth_user = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + } + + /* deliver_to */ + elt = ucl_object_lookup(metadata, "deliver_to"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + task->deliver_to = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + } + + /* settings_id */ + elt = ucl_object_lookup(metadata, "settings_id"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + const char *sid = ucl_object_tostring(elt); + task->settings_elt = rspamd_config_find_settings_name_ref( + task->cfg, sid, strlen(sid)); + + if (!task->settings_elt) { + msg_warn_protocol("unknown settings id in metadata: '%s'", sid); + } + } + + /* settings (inline UCL object) */ + elt = ucl_object_lookup(metadata, "settings"); + if (elt && ucl_object_type(elt) == UCL_OBJECT) { + /* If both settings_id and settings are present, settings wins */ + if (task->settings_elt) { + msg_warn_protocol("ignore settings_id because inline settings is also present"); + REF_RELEASE(task->settings_elt); + task->settings_elt = NULL; + } + task->settings = ucl_object_ref(elt); + } + + /* tls.cipher - sets SSL flag */ + elt = ucl_object_lookup_path(metadata, "tls.cipher"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + task->flags |= RSPAMD_TASK_FLAG_SSL; + } + + /* mta.tag */ + elt = ucl_object_lookup_path(metadata, "mta.tag"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + char *mta_tag = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_MTA_TAG, mta_tag, NULL); + } + + /* mta.name */ + elt = ucl_object_lookup_path(metadata, "mta.name"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + char *mta_name = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt)); + rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_MTA_NAME, mta_name, NULL); + } + + /* flags (array of strings) */ + elt = ucl_object_lookup(metadata, "flags"); + if (elt && ucl_object_type(elt) == UCL_ARRAY) { + ucl_object_iter_t it = NULL; + + while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_STRING) { + const char *flag_str = ucl_object_tostring(cur); + rspamd_protocol_handle_flag(task, flag_str, strlen(flag_str)); + } + } + } + + /* raw - disable MIME parsing */ + elt = ucl_object_lookup(metadata, "raw"); + if (elt && ucl_object_type(elt) == UCL_BOOLEAN) { + if (ucl_object_toboolean(elt)) { + task->flags &= ~RSPAMD_TASK_FLAG_MIME; + } + } + + /* log_tag */ + elt = ucl_object_lookup(metadata, "log_tag"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + const char *tag = ucl_object_tostring(elt); + gsize tag_len = strlen(tag); + + if (rspamd_fast_utf8_validate(tag, tag_len) == 0) { + int len = MIN(tag_len, sizeof(task->task_pool->tag.uid) - 1); + memcpy(task->task_pool->tag.uid, tag, len); + task->task_pool->tag.uid[len] = '\0'; + } + } + + /* mail_esmtp_args (object: key -> value) */ + elt = ucl_object_lookup(metadata, "mail_esmtp_args"); + if (elt && ucl_object_type(elt) == UCL_OBJECT) { + if (!task->mail_esmtp_args) { + task->mail_esmtp_args = g_hash_table_new_full( + rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, + rspamd_fstring_mapped_ftok_free, + rspamd_fstring_mapped_ftok_free); + } + + ucl_object_iter_t it = NULL; + while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_STRING) { + const char *key = ucl_object_key(cur); + const char *val = ucl_object_tostring(cur); + + rspamd_fstring_t *fkey = rspamd_fstring_new_init(key, strlen(key)); + rspamd_fstring_t *fval = rspamd_fstring_new_init(val, strlen(val)); + rspamd_ftok_t *key_tok = rspamd_ftok_map(fkey); + rspamd_ftok_t *val_tok = rspamd_ftok_map(fval); + + g_hash_table_replace(task->mail_esmtp_args, key_tok, val_tok); + } + } + } + + /* rcpt_esmtp_args (array of objects) */ + elt = ucl_object_lookup(metadata, "rcpt_esmtp_args"); + if (elt && ucl_object_type(elt) == UCL_ARRAY) { + if (!task->rcpt_esmtp_args) { + task->rcpt_esmtp_args = g_ptr_array_new(); + } + + ucl_object_iter_t arr_it = NULL; + int rcpt_idx = 0; + + while ((cur = ucl_object_iterate(elt, &arr_it, true)) != NULL) { + GHashTable *rcpt_args = NULL; + + if (ucl_object_type(cur) == UCL_OBJECT) { + rcpt_args = g_hash_table_new_full( + rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal, + rspamd_fstring_mapped_ftok_free, + rspamd_fstring_mapped_ftok_free); + + ucl_object_iter_t obj_it = NULL; + const ucl_object_t *kv; + + while ((kv = ucl_object_iterate(cur, &obj_it, true)) != NULL) { + if (ucl_object_type(kv) == UCL_STRING) { + const char *key = ucl_object_key(kv); + const char *val = ucl_object_tostring(kv); + + rspamd_fstring_t *fkey = rspamd_fstring_new_init(key, strlen(key)); + rspamd_fstring_t *fval = rspamd_fstring_new_init(val, strlen(val)); + rspamd_ftok_t *key_tok = rspamd_ftok_map(fkey); + rspamd_ftok_t *val_tok = rspamd_ftok_map(fval); + + g_hash_table_replace(rcpt_args, key_tok, val_tok); + } + } + } + + /* Ensure array is large enough */ + while ((int) task->rcpt_esmtp_args->len <= rcpt_idx) { + g_ptr_array_add(task->rcpt_esmtp_args, NULL); + } + g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx) = rcpt_args; + rcpt_idx++; + } + } + + return TRUE; +} + +/* + * Handle v3 multipart/form-data request. + */ +gboolean +rspamd_protocol_handle_v3_request(struct rspamd_task *task, + struct rspamd_http_message *msg, + const char *chunk, gsize len) +{ + const char *boundary = NULL; + gsize boundary_len = 0; + + /* Extract boundary from HTTP Content-Type header */ + const rspamd_ftok_t *ct_hdr = rspamd_http_message_find_header(msg, "Content-Type"); + + if (!ct_hdr) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "missing Content-Type header for v3 request"); + return FALSE; + } + + struct rspamd_content_type *ct = rspamd_content_type_parse( + ct_hdr->begin, ct_hdr->len, task->task_pool); + + if (!ct || ct->boundary.len == 0) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "cannot extract boundary from Content-Type"); + return FALSE; + } + + boundary = ct->boundary.begin; + boundary_len = ct->boundary.len; + + /* Parse multipart body */ + struct rspamd_multipart_form_c *form = rspamd_multipart_form_parse( + chunk, len, boundary, boundary_len); + + if (!form) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "cannot parse multipart/form-data body"); + return FALSE; + } + + /* Register destructor for the form */ + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_multipart_form_free, + form); + + /* Find metadata part */ + const struct rspamd_multipart_entry_c *metadata_part = + rspamd_multipart_form_find(form, "metadata", sizeof("metadata") - 1); + + if (!metadata_part || metadata_part->data_len == 0) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "missing 'metadata' part in v3 request"); + return FALSE; + } + + /* Parse metadata as UCL (detect JSON vs msgpack from Content-Type) */ + struct ucl_parser *parser; + gboolean is_msgpack = FALSE; + + if (metadata_part->content_type && + metadata_part->content_type_len > 0 && + rspamd_substring_search_caseless(metadata_part->content_type, + metadata_part->content_type_len, + "msgpack", + sizeof("msgpack") - 1) != -1) { + is_msgpack = TRUE; + parser = ucl_parser_new(UCL_PARSER_DEFAULT | UCL_PARSER_NO_FILEVARS); + ucl_parser_add_chunk_full(parser, (const unsigned char *) metadata_part->data, + metadata_part->data_len, + ucl_parser_get_default_priority(parser), + UCL_DUPLICATE_APPEND, + UCL_PARSE_MSGPACK); + } + else { + parser = ucl_parser_new(UCL_PARSER_DEFAULT | UCL_PARSER_NO_FILEVARS); + ucl_parser_add_chunk(parser, (const unsigned char *) metadata_part->data, + metadata_part->data_len); + } + + if (ucl_parser_get_error(parser) != NULL) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "cannot parse metadata: %s", ucl_parser_get_error(parser)); + ucl_parser_free(parser); + return FALSE; + } + + ucl_object_t *metadata_obj = ucl_parser_get_object(parser); + ucl_parser_free(parser); + + if (!metadata_obj) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "empty metadata object"); + return FALSE; + } + + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) ucl_object_unref, + metadata_obj); + + /* Apply metadata to task */ + if (!rspamd_protocol_handle_metadata(task, metadata_obj)) { + return FALSE; + } + + /* Check for file/shm in metadata (zero-copy paths) */ + const ucl_object_t *file_elt = ucl_object_lookup(metadata_obj, "file"); + const ucl_object_t *shm_elt = ucl_object_lookup(metadata_obj, "shm"); + + if (file_elt && ucl_object_type(file_elt) == UCL_STRING) { + /* Set file path and let rspamd_task_load_message handle it via task header */ + const char *fpath = ucl_object_tostring(file_elt); + task->msg.fpath = rspamd_mempool_strdup(task->task_pool, fpath); + + /* Synthesize a request header so rspamd_task_load_message's file path works */ + rspamd_fstring_t *fhdr = rspamd_fstring_new_init(fpath, strlen(fpath)); + rspamd_ftok_t *name_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*name_tok)); + rspamd_ftok_t *val_tok = rspamd_ftok_map(fhdr); + + RSPAMD_FTOK_ASSIGN(name_tok, "file"); + rspamd_task_add_request_header(task, name_tok, val_tok); + + /* Now load the message from file */ + return rspamd_task_load_message(task, NULL, NULL, 0); + } + else if (shm_elt && ucl_object_type(shm_elt) == UCL_STRING) { + /* Synthesize shm headers */ + const char *shm_name = ucl_object_tostring(shm_elt); + rspamd_fstring_t *fhdr = rspamd_fstring_new_init(shm_name, strlen(shm_name)); + rspamd_ftok_t *name_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*name_tok)); + rspamd_ftok_t *val_tok = rspamd_ftok_map(fhdr); + + RSPAMD_FTOK_ASSIGN(name_tok, "shm"); + rspamd_task_add_request_header(task, name_tok, val_tok); + + const ucl_object_t *off_elt = ucl_object_lookup(metadata_obj, "shm_offset"); + if (off_elt) { + char buf[32]; + int blen = rspamd_snprintf(buf, sizeof(buf), "%L", + ucl_object_toint(off_elt)); + rspamd_fstring_t *foff = rspamd_fstring_new_init(buf, blen); + rspamd_ftok_t *off_name = rspamd_mempool_alloc(task->task_pool, sizeof(*off_name)); + rspamd_ftok_t *off_val = rspamd_ftok_map(foff); + + RSPAMD_FTOK_ASSIGN(off_name, "shm-offset"); + rspamd_task_add_request_header(task, off_name, off_val); + } + + const ucl_object_t *len_elt = ucl_object_lookup(metadata_obj, "shm_length"); + if (len_elt) { + char buf[32]; + int blen = rspamd_snprintf(buf, sizeof(buf), "%L", + ucl_object_toint(len_elt)); + rspamd_fstring_t *flen = rspamd_fstring_new_init(buf, blen); + rspamd_ftok_t *len_name = rspamd_mempool_alloc(task->task_pool, sizeof(*len_name)); + rspamd_ftok_t *len_val = rspamd_ftok_map(flen); + + RSPAMD_FTOK_ASSIGN(len_name, "shm-length"); + rspamd_task_add_request_header(task, len_name, len_val); + } + + return rspamd_task_load_message(task, NULL, NULL, 0); + } + else { + /* Use inline message part */ + const struct rspamd_multipart_entry_c *msg_part = + rspamd_multipart_form_find(form, "message", sizeof("message") - 1); + + if (!msg_part || msg_part->data_len == 0) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "missing 'message' part in v3 request"); + return FALSE; + } + + /* Check for per-part zstd compression */ + if (msg_part->content_encoding && msg_part->content_encoding_len > 0 && + rspamd_substring_search_caseless(msg_part->content_encoding, + msg_part->content_encoding_len, + "zstd", 4) != -1) { + /* Decompress message */ + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + gsize outlen, r; + + if (!rspamd_libs_reset_decompression(task->cfg->libs_ctx)) { + g_set_error(&task->err, rspamd_protocol_quark(), 500, + "cannot init decompressor"); + return FALSE; + } + + zstream = task->cfg->libs_ctx->in_zstream; + zin.src = msg_part->data; + zin.size = msg_part->data_len; + zin.pos = 0; + + outlen = ZSTD_getDecompressedSize(msg_part->data, msg_part->data_len); + if (outlen == 0) { + outlen = ZSTD_DStreamOutSize(); + } + + unsigned char *out = (unsigned char *) g_malloc(outlen); + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream(zstream, &zout, &zin); + + if (ZSTD_isError(r)) { + g_set_error(&task->err, rspamd_protocol_quark(), 400, + "message decompression error: %s", + ZSTD_getErrorName(r)); + g_free(out); + return FALSE; + } + + if (zout.pos == zout.size) { + if (zout.size > task->cfg->max_message) { + g_set_error(&task->err, rspamd_protocol_quark(), 413, + "decompressed message exceeds max_message limit: %lu > %lu", + (unsigned long) zout.size, (unsigned long) task->cfg->max_message); + g_free(out); + return FALSE; + } + zout.size = zout.size * 2 + 1; + out = g_realloc(zout.dst, zout.size); + zout.dst = out; + } + } + + rspamd_mempool_add_destructor(task->task_pool, g_free, zout.dst); + task->msg.begin = (const char *) zout.dst; + task->msg.len = zout.pos; + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED; + + msg_info_protocol("v3: loaded message from zstd compressed part; " + "compressed: %ul; uncompressed: %ul", + (gulong) zin.size, (gulong) zout.pos); + } + else { + /* Zero-copy: point directly into the multipart buffer */ + task->msg.begin = msg_part->data; + task->msg.len = msg_part->data_len; + } + + if (task->msg.len == 0) { + task->flags |= RSPAMD_TASK_FLAG_EMPTY; + } + + return TRUE; + } +} + +/* + * Build a v3 multipart/mixed HTTP reply. + */ +void rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg, + struct rspamd_task *task) +{ + int flags = RSPAMD_PROTOCOL_DEFAULT | RSPAMD_PROTOCOL_URLS; + ucl_object_t *top = rspamd_protocol_write_ucl(task, flags); + + if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) { + if (task->worker->srv->history) { + rspamd_roll_history_update(task->worker->srv->history, task); + } + } + + rspamd_task_write_log(task); + + /* Determine output format from metadata part's Content-Type or Accept header */ + const rspamd_ftok_t *accept_hdr = rspamd_task_get_request_header(task, "Accept"); + int out_type = UCL_EMIT_JSON_COMPACT; + const char *result_ctype = "application/json"; + + if (accept_hdr && rspamd_substring_search(accept_hdr->begin, accept_hdr->len, + "application/msgpack", + sizeof("application/msgpack") - 1) != -1) { + out_type = UCL_EMIT_MSGPACK; + result_ctype = "application/msgpack"; + } + + /* Serialize result UCL */ + rspamd_fstring_t *result_data = rspamd_fstring_sized_new(1000); + rspamd_ucl_emit_fstring(top, out_type, &result_data); + + /* Check if client wants compression */ + gboolean want_compress = FALSE; + const rspamd_ftok_t *ae_hdr = rspamd_task_get_request_header(task, "Accept-Encoding"); + if (ae_hdr && rspamd_substring_search_caseless(ae_hdr->begin, ae_hdr->len, + "zstd", 4) != -1) { + want_compress = TRUE; + } + + /* Build multipart response */ + struct rspamd_multipart_response_c *resp = rspamd_multipart_response_new(); + + rspamd_multipart_response_add_part(resp, "result", result_ctype, + result_data->str, result_data->len, + want_compress); + + /* If message was rewritten, add body part */ + if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) { + const char *body_start = task->msg.begin; + gsize body_len = task->msg.len; + + if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) { + /* For milter, only send the body after headers */ + goffset hdr_off = MESSAGE_FIELD(task, raw_headers_content).len; + + if (hdr_off < (goffset) body_len) { + body_start += hdr_off; + body_len -= hdr_off; + + if (*body_start == '\r' && body_len > 0) { + body_start++; + body_len--; + } + if (*body_start == '\n' && body_len > 0) { + body_start++; + body_len--; + } + } + } + + rspamd_multipart_response_add_part(resp, "body", "application/octet-stream", + body_start, body_len, want_compress); + } + + /* Get compression stream if needed */ + void *zstream = NULL; + if (want_compress && rspamd_libs_reset_compression(task->cfg->libs_ctx)) { + zstream = task->cfg->libs_ctx->out_zstream; + } + + rspamd_fstring_t *reply = rspamd_multipart_response_serialize(resp, zstream); + const char *ctype = rspamd_multipart_response_content_type(resp); + + /* Set the content type on the HTTP message */ + rspamd_http_message_add_header(msg, "Content-Type", ctype); + + rspamd_http_message_set_body_from_fstring_steal(msg, reply); + rspamd_fstring_free(result_data); + rspamd_multipart_response_free(resp); + + /* Update stats */ + if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) { + struct rspamd_scan_result *metric_res = task->result; + + if (metric_res) { + struct rspamd_action *action = rspamd_check_action_metric(task, NULL, NULL); + + if (action->action_type == METRIC_ACTION_SOFT_REJECT && + (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) { +#ifndef HAVE_ATOMIC_BUILTINS + task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++; +#else + __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST], + 1, __ATOMIC_RELEASE); +#endif + } + else if (action->action_type < METRIC_ACTION_MAX) { +#ifndef HAVE_ATOMIC_BUILTINS + task->worker->srv->stat->actions_stat[action->action_type]++; +#else + __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type], + 1, __ATOMIC_RELEASE); +#endif + } + } + +#ifndef HAVE_ATOMIC_BUILTINS + task->worker->srv->stat->messages_scanned++; +#else + __atomic_add_fetch(&task->worker->srv->stat->messages_scanned, + 1, __ATOMIC_RELEASE); +#endif + + uint32_t slot; + float processing_time = task->time_real_finish - task->task_timestamp; + +#ifndef HAVE_ATOMIC_BUILTINS + slot = task->worker->srv->stat->avg_time.cur_slot++; +#else + slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot, + 1, __ATOMIC_RELEASE); +#endif + slot = slot % MAX_AVG_TIME_SLOTS; + task->worker->srv->stat->avg_time.avg_time[slot] = processing_time; + } +} + void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout, struct rspamd_main *srv) { struct rspamd_http_message *msg; @@ -2173,6 +2855,12 @@ void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout, st rspamd_protocol_http_reply(msg, task, NULL, out_type); rspamd_protocol_write_log_pipe(task); break; + case CMD_CHECK_V3: + rspamd_protocol_http_reply_v3(msg, task); + rspamd_protocol_write_log_pipe(task); + /* Override ctype — it was set by the v3 reply builder via header */ + ctype = NULL; + break; case CMD_PING: msg_debug_protocol("writing pong to client"); rspamd_http_message_set_body(msg, "pong" CRLF, 6); diff --git a/src/libserver/protocol.h b/src/libserver/protocol.h index 9d2b985da5..f6957833ac 100644 --- a/src/libserver/protocol.h +++ b/src/libserver/protocol.h @@ -80,6 +80,19 @@ gboolean rspamd_protocol_handle_control(struct rspamd_task *task, gboolean rspamd_protocol_handle_request(struct rspamd_task *task, struct rspamd_http_message *msg); +/** + * Handle checkv3 multipart/form-data request. + * Parses metadata part (JSON/msgpack), sets task fields, and sets message data. + * @param task + * @param msg HTTP message + * @param chunk body data + * @param len body length + * @return TRUE on success + */ +gboolean rspamd_protocol_handle_v3_request(struct rspamd_task *task, + struct rspamd_http_message *msg, + const char *chunk, gsize len); + /** * Write task results to http message * @param msg @@ -89,6 +102,16 @@ void rspamd_protocol_http_reply(struct rspamd_http_message *msg, struct rspamd_task *task, ucl_object_t **pobj, int how); +/** + * Write checkv3 multipart/mixed reply. + * Result part contains JSON/msgpack scan results. + * Optional body part contains rewritten message. + * @param msg HTTP response message to fill + * @param task task object + */ +void rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg, + struct rspamd_task *task); + /** * Write data to log pipes * @param task diff --git a/src/libserver/protocol_internal.h b/src/libserver/protocol_internal.h index 5582908c2b..319c895194 100644 --- a/src/libserver/protocol_internal.h +++ b/src/libserver/protocol_internal.h @@ -31,6 +31,10 @@ extern "C" { * Modern check version */ #define MSG_CMD_CHECK_V2 "checkv2" +/* + * Multipart check version + */ +#define MSG_CMD_CHECK_V3 "checkv3" #define MSG_CMD_SCAN "scan" /* diff --git a/src/libserver/task.c b/src/libserver/task.c index 3ed0ee4e71..bfe3d5e3b6 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -418,7 +418,7 @@ rspamd_task_load_message(struct rspamd_task *task, ft = "file"; #endif - if (msg) { + if (msg && task->cmd != CMD_CHECK_V3) { rspamd_protocol_handle_headers(task, msg); } diff --git a/src/libserver/task.h b/src/libserver/task.h index da7ffb5b22..27fe463076 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -37,6 +37,7 @@ enum rspamd_command { CMD_CHECK_RSPAMC, /* Legacy rspamc format (like SA one) */ CMD_CHECK, /* Legacy check - metric json reply */ CMD_CHECK_V2, /* Modern check - symbols in json reply */ + CMD_CHECK_V3, /* Multipart check - structured metadata + multipart response */ CMD_METRICS, }; @@ -128,7 +129,9 @@ enum rspamd_task_stage { #define RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK (1u << 5u) /* Emit groups information */ #define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u) -#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (6u) +/* Request is multipart/form-data v3 protocol */ +#define RSPAMD_TASK_PROTOCOL_FLAG_MULTIPART_V3 (1u << 7u) +#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (7u) #define RSPAMD_TASK_IS_SKIPPED(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_SKIP)) #define RSPAMD_TASK_IS_SPAMC(task) (G_UNLIKELY((task)->cmd == CMD_CHECK_SPAMC)) diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index 683f35b079..4c6878a250 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -2335,6 +2335,12 @@ rspamd_proxy_scan_self_reply(struct rspamd_task *task) rspamd_protocol_http_reply(msg, task, &rep, out_type); rspamd_protocol_write_log_pipe(task); break; + case CMD_CHECK_V3: + rspamd_task_set_finish_time(task); + rspamd_protocol_http_reply_v3(msg, task); + rspamd_protocol_write_log_pipe(task); + ctype = NULL; /* Content-Type set by rspamd_protocol_http_reply_v3 as a header */ + break; case CMD_PING: rspamd_http_message_set_body(msg, "pong" CRLF, 6); ctype = "text/plain"; @@ -2506,6 +2512,12 @@ rspamd_proxy_self_scan(struct rspamd_proxy_session *session) if (task->cmd == CMD_PING || task->cmd == CMD_METRICS) { task->flags |= RSPAMD_TASK_FLAG_SKIP; } + else if (task->cmd == CMD_CHECK_V3) { + if (!rspamd_protocol_handle_v3_request(task, msg, data, len)) { + msg_err_task("cannot handle v3 request: %e", task->err); + task->flags |= RSPAMD_TASK_FLAG_SKIP; + } + } else { if (!rspamd_task_load_message(task, msg, data, len)) { msg_err_task("cannot load message: %e", task->err); diff --git a/src/worker.c b/src/worker.c index 5fdb2789f4..2d46754d87 100644 --- a/src/worker.c +++ b/src/worker.c @@ -175,6 +175,12 @@ rspamd_worker_body_handler(struct rspamd_http_connection *conn, if (task->cmd == CMD_PING || task->cmd == CMD_METRICS) { task->flags |= RSPAMD_TASK_FLAG_SKIP; } + else if (task->cmd == CMD_CHECK_V3) { + if (!rspamd_protocol_handle_v3_request(task, msg, chunk, len)) { + msg_err_task("cannot handle v3 request: %e", task->err); + task->flags |= RSPAMD_TASK_FLAG_SKIP; + } + } else { if (!rspamd_task_load_message(task, msg, chunk, len)) { msg_err_task("cannot load message: %e", task->err);