]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add /checkv3 multipart scan endpoint
authorVsevolod Stakhov <vsevolod@rspamd.com>
Thu, 5 Feb 2026 22:00:00 +0000 (22:00 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Thu, 5 Feb 2026 22:00:00 +0000 (22:00 +0000)
Implement a new /checkv3 endpoint that uses multipart/form-data for
requests and multipart/mixed for responses. Metadata (from, rcpt, ip,
settings, etc.) is sent as a structured JSON/msgpack part instead of
HTTP headers. The response includes a "result" part and an optional
"body" part for rewritten messages.

New C++ multipart parser and response builder with C bridge functions.
Per-part zstd compression support. Client-side support via rspamc
--protocol-v3 flag. Proxy self-scan path updated for v3.

17 files changed:
src/client/rspamc.cxx
src/client/rspamdclient.c
src/client/rspamdclient.h
src/libserver/CMakeLists.txt
src/libserver/multipart_form.cxx [new file with mode: 0644]
src/libserver/multipart_form.h [new file with mode: 0644]
src/libserver/multipart_form.hxx [new file with mode: 0644]
src/libserver/multipart_response.cxx [new file with mode: 0644]
src/libserver/multipart_response.h [new file with mode: 0644]
src/libserver/multipart_response.hxx [new file with mode: 0644]
src/libserver/protocol.c
src/libserver/protocol.h
src/libserver/protocol_internal.h
src/libserver/task.c
src/libserver/task.h
src/rspamd_proxy.c
src/worker.c

index e5abf17383a332d98944332305c969e4d800cd61..b367e183a9be5c542590581b656bd3204aba5efc 100644 (file)
@@ -89,6 +89,7 @@ static gboolean compressed = TRUE;
 static gboolean profile = FALSE;
 static gboolean skip_images = FALSE;
 static gboolean skip_attachments = FALSE;
+static gboolean protocol_v3 = FALSE;
 static const char *pubkey = nullptr;
 static const char *user_agent = "rspamc";
 static const char *files_list = nullptr;
@@ -193,6 +194,8 @@ static GOptionEntry entries[] =
                 "Skip images when learning/unlearning fuzzy", nullptr},
                {"skip-attachments", '\0', 0, G_OPTION_ARG_NONE, &skip_attachments,
                 "Skip attachments when learning/unlearning fuzzy", nullptr},
+               {"protocol-v3", '\0', 0, G_OPTION_ARG_NONE, &protocol_v3,
+                "Use v3 multipart protocol (structured metadata, multipart response)", nullptr},
                {"user-agent", 'U', 0, G_OPTION_ARG_STRING, &user_agent,
                 "Use specific User-Agent instead of \"rspamc\"", nullptr},
                {"files-list", '\0', 0, G_OPTION_ARG_FILENAME, &files_list,
@@ -2281,7 +2284,88 @@ rspamc_process_input(struct ev_loop *ev_base, const struct rspamc_command &cmd,
                cbdata->cmd = cmd;
                cbdata->filename = name;
 
-               if (cmd.need_input) {
+               if (protocol_v3 && cmd.need_input &&
+                       (cmd.cmd == RSPAMC_COMMAND_SYMBOLS || cmd.cmd == RSPAMC_COMMAND_CHECK)) {
+                       /* Build metadata UCL object from CLI options */
+                       ucl_object_t *metadata = ucl_object_typed_new(UCL_OBJECT);
+
+                       if (from) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(from),
+                                                                         "from", 0, false);
+                       }
+                       if (rcpts) {
+                               ucl_object_t *rcpt_arr = ucl_object_typed_new(UCL_ARRAY);
+                               for (auto *rcpt = rcpts; *rcpt; rcpt++) {
+                                       ucl_array_append(rcpt_arr, ucl_object_fromstring(*rcpt));
+                               }
+                               ucl_object_insert_key(metadata, rcpt_arr, "rcpt", 0, false);
+                       }
+                       if (ip) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(ip),
+                                                                         "ip", 0, false);
+                       }
+                       if (helo) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(helo),
+                                                                         "helo", 0, false);
+                       }
+                       if (hostname) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(hostname),
+                                                                         "hostname", 0, false);
+                       }
+                       if (user) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(user),
+                                                                         "user", 0, false);
+                       }
+                       if (deliver_to) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(deliver_to),
+                                                                         "deliver_to", 0, false);
+                       }
+                       if (queue_id) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(queue_id),
+                                                                         "queue_id", 0, false);
+                       }
+                       if (log_tag) {
+                               ucl_object_insert_key(metadata, ucl_object_fromstring(log_tag),
+                                                                         "log_tag", 0, false);
+                       }
+                       if (!settings.empty()) {
+                               /* Try to parse settings as UCL */
+                               struct ucl_parser *sp = ucl_parser_new(UCL_PARSER_DEFAULT);
+                               if (ucl_parser_add_string(sp, settings.c_str(), settings.size())) {
+                                       ucl_object_t *sobj = ucl_parser_get_object(sp);
+                                       ucl_object_insert_key(metadata, sobj, "settings", 0, false);
+                               }
+                               ucl_parser_free(sp);
+                       }
+                       if (raw) {
+                               ucl_object_insert_key(metadata, ucl_object_frombool(true),
+                                                                         "raw", 0, false);
+                       }
+
+                       /* Build flags array */
+                       ucl_object_t *flags_arr = ucl_object_typed_new(UCL_ARRAY);
+                       if (pass_all) {
+                               ucl_array_append(flags_arr, ucl_object_fromstring("pass_all"));
+                       }
+                       if (extended_urls) {
+                               ucl_array_append(flags_arr, ucl_object_fromstring("ext_urls"));
+                       }
+                       if (profile) {
+                               ucl_array_append(flags_arr, ucl_object_fromstring("profile"));
+                       }
+                       if (ucl_array_size(flags_arr) > 0) {
+                               ucl_object_insert_key(metadata, flags_arr, "flags", 0, false);
+                       }
+                       else {
+                               ucl_object_unref(flags_arr);
+                       }
+
+                       rspamd_client_command_v3(conn, "checkv3", metadata, in,
+                                                                        rspamc_client_cb, cbdata, compressed,
+                                                                        cbdata->filename.c_str(), &err);
+                       ucl_object_unref(metadata);
+               }
+               else if (cmd.need_input) {
                        const char *path = path_override.empty() ? cmd.path : path_override.c_str();
                        rspamd_client_command(conn, path, attrs, in, rspamc_client_cb,
                                                                  cbdata, compressed, dictionary, cbdata->filename.c_str(), &err);
index adffcf43f091ae651330dc82530a72a00cf4b9c1..0c2a87c9694af4be11f3d70e24905587ca72a4a8 100644 (file)
@@ -18,6 +18,9 @@
 #include "libserver/http/http_connection.h"
 #include "libserver/http/http_private.h"
 #include "libserver/protocol_internal.h"
+#include "libserver/multipart_form.h"
+#include "libmime/content_type.h"
+#include "ottery.h"
 #include "unix-std.h"
 
 #ifdef SYS_ZSTD
@@ -46,6 +49,7 @@ struct rspamd_client_connection {
        ev_tstamp timeout;
        struct rspamd_http_connection *http_conn;
        gboolean req_sent;
+       gboolean v3_mode;
        double start_time;
        double send_time;
        struct rspamd_client_request *req;
@@ -104,6 +108,10 @@ rspamd_client_error_handler(struct rspamd_http_connection *conn, GError *err)
                        c->start_time, c->send_time, NULL, 0, err);
 }
 
+static int
+rspamd_client_v3_finish_handler(struct rspamd_http_connection *conn,
+                                                               struct rspamd_http_message *msg);
+
 static int
 rspamd_client_finish_handler(struct rspamd_http_connection *conn,
                                                         struct rspamd_http_message *msg)
@@ -120,6 +128,10 @@ rspamd_client_finish_handler(struct rspamd_http_connection *conn,
 
        c = req->conn;
 
+       if (c->v3_mode) {
+               return rspamd_client_v3_finish_handler(conn, msg);
+       }
+
        if (!c->req_sent) {
                c->req_sent = TRUE;
                c->send_time = rspamd_get_ticks(FALSE);
@@ -486,6 +498,383 @@ rspamd_client_command(struct rspamd_client_connection *conn,
        return ret;
 }
 
+/*
+ * V3 client: finish handler for multipart/mixed responses
+ */
+static int
+rspamd_client_v3_finish_handler(struct rspamd_http_connection *conn,
+                                                               struct rspamd_http_message *msg)
+{
+       struct rspamd_client_request *req =
+               (struct rspamd_client_request *) conn->ud;
+       struct rspamd_client_connection *c;
+       struct ucl_parser *parser;
+       GError *err;
+       const char *start, *body = NULL;
+       gsize len, bodylen = 0;
+
+       c = req->conn;
+
+       if (!c->req_sent) {
+               c->req_sent = TRUE;
+               c->send_time = rspamd_get_ticks(FALSE);
+               rspamd_http_connection_reset(c->http_conn);
+               rspamd_http_connection_read_message(c->http_conn, c->req, c->timeout);
+               return 0;
+       }
+
+       if (rspamd_http_message_get_body(msg, NULL) == NULL || msg->code / 100 != 2) {
+               err = g_error_new(RCLIENT_ERROR, msg->code, "HTTP error: %d, %.*s",
+                                                 msg->code,
+                                                 (int) msg->status->len, msg->status->str);
+               req->cb(c, msg, c->server_name->str, NULL, req->input, req->ud,
+                               c->start_time, c->send_time, NULL, 0, err);
+               g_error_free(err);
+               return 0;
+       }
+
+       /* Check if response is multipart/mixed */
+       const rspamd_ftok_t *ct = rspamd_http_message_find_header(msg, "Content-Type");
+
+       if (ct && rspamd_substring_search_caseless(ct->begin, ct->len,
+                                                                                          "multipart/mixed", sizeof("multipart/mixed") - 1) != -1) {
+               /* Parse multipart response to extract result and body */
+               /* Extract boundary from Content-Type */
+               struct rspamd_content_type *parsed_ct = rspamd_content_type_parse(
+                       ct->begin, ct->len, rspamd_mempool_new(256, "v3-client", 0));
+               /* Note: we leak this small pool; acceptable for client-side */
+
+               if (parsed_ct && parsed_ct->boundary.len > 0) {
+                       struct rspamd_multipart_form_c *form = rspamd_multipart_form_parse(
+                               msg->body_buf.begin, msg->body_buf.len,
+                               parsed_ct->boundary.begin, parsed_ct->boundary.len);
+
+                       if (form) {
+                               const struct rspamd_multipart_entry_c *result_part =
+                                       rspamd_multipart_form_find(form, "result", sizeof("result") - 1);
+
+                               if (result_part) {
+                                       start = result_part->data;
+                                       len = result_part->data_len;
+
+                                       /* Check for per-part zstd compression */
+                                       if (result_part->content_encoding &&
+                                               result_part->content_encoding_len > 0 &&
+                                               rspamd_substring_search_caseless(result_part->content_encoding,
+                                                                                                                result_part->content_encoding_len,
+                                                                                                                "zstd", 4) != -1) {
+                                               /* Decompress */
+                                               ZSTD_DStream *zstream = ZSTD_createDStream();
+                                               ZSTD_initDStream(zstream);
+                                               ZSTD_inBuffer zin = {start, len, 0};
+                                               gsize outlen = ZSTD_getDecompressedSize(start, len);
+                                               if (outlen == 0) outlen = ZSTD_DStreamOutSize();
+                                               unsigned char *out = g_malloc(outlen);
+                                               ZSTD_outBuffer zout = {out, outlen, 0};
+
+                                               while (zin.pos < zin.size) {
+                                                       gsize r = ZSTD_decompressStream(zstream, &zout, &zin);
+                                                       if (ZSTD_isError(r)) {
+                                                               g_free(out);
+                                                               ZSTD_freeDStream(zstream);
+                                                               rspamd_multipart_form_free(form);
+                                                               err = g_error_new(RCLIENT_ERROR, 500,
+                                                                                                 "result decompression error: %s",
+                                                                                                 ZSTD_getErrorName(r));
+                                                               req->cb(c, msg, c->server_name->str, NULL,
+                                                                               req->input, req->ud, c->start_time,
+                                                                               c->send_time, NULL, 0, err);
+                                                               g_error_free(err);
+                                                               return 0;
+                                                       }
+                                                       if (zout.pos == zout.size) {
+                                                               zout.size *= 2;
+                                                               out = g_realloc(zout.dst, zout.size);
+                                                               zout.dst = out;
+                                                       }
+                                               }
+                                               ZSTD_freeDStream(zstream);
+                                               start = (const char *) zout.dst;
+                                               len = zout.pos;
+                                               /* Note: out will be freed below via goto end pattern */
+                                       }
+
+                                       /* Extract optional body part */
+                                       const struct rspamd_multipart_entry_c *body_part =
+                                               rspamd_multipart_form_find(form, "body", sizeof("body") - 1);
+                                       if (body_part && body_part->data_len > 0) {
+                                               body = body_part->data;
+                                               bodylen = body_part->data_len;
+                                               /* TODO: decompress body part if needed */
+                                       }
+
+                                       parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
+                                       /* Detect msgpack from content type */
+                                       if (result_part->content_type &&
+                                               rspamd_substring_search_caseless(result_part->content_type,
+                                                                                                                result_part->content_type_len,
+                                                                                                                "msgpack", 7) != -1) {
+                                               ucl_parser_add_chunk_full(parser, (const unsigned char *) start, len,
+                                                                                                 ucl_parser_get_default_priority(parser),
+                                                                                                 UCL_DUPLICATE_APPEND, UCL_PARSE_MSGPACK);
+                                       }
+                                       else {
+                                               ucl_parser_add_chunk(parser, (const unsigned char *) start, len);
+                                       }
+
+                                       if (ucl_parser_get_error(parser)) {
+                                               err = g_error_new(RCLIENT_ERROR, msg->code,
+                                                                                 "Cannot parse UCL: %s",
+                                                                                 ucl_parser_get_error(parser));
+                                               ucl_parser_free(parser);
+                                               rspamd_multipart_form_free(form);
+                                               req->cb(c, msg, c->server_name->str, NULL,
+                                                               req->input, req->ud, c->start_time,
+                                                               c->send_time, body, bodylen, err);
+                                               g_error_free(err);
+                                               return 0;
+                                       }
+
+                                       req->cb(c, msg, c->server_name->str,
+                                                       ucl_parser_get_object(parser),
+                                                       req->input, req->ud,
+                                                       c->start_time, c->send_time, body, bodylen, NULL);
+                                       ucl_parser_free(parser);
+                               }
+                               else {
+                                       err = g_error_new(RCLIENT_ERROR, 500,
+                                                                         "No 'result' part in multipart response");
+                                       req->cb(c, msg, c->server_name->str, NULL,
+                                                       req->input, req->ud, c->start_time,
+                                                       c->send_time, NULL, 0, err);
+                                       g_error_free(err);
+                               }
+
+                               rspamd_multipart_form_free(form);
+                       }
+                       else {
+                               err = g_error_new(RCLIENT_ERROR, 500,
+                                                                 "Cannot parse multipart response");
+                               req->cb(c, msg, c->server_name->str, NULL,
+                                               req->input, req->ud, c->start_time,
+                                               c->send_time, NULL, 0, err);
+                               g_error_free(err);
+                       }
+               }
+               else {
+                       err = g_error_new(RCLIENT_ERROR, 500,
+                                                         "No boundary in multipart Content-Type");
+                       req->cb(c, msg, c->server_name->str, NULL,
+                                       req->input, req->ud, c->start_time,
+                                       c->send_time, NULL, 0, err);
+                       g_error_free(err);
+               }
+       }
+       else {
+               /* Fallback: non-multipart response, handle like v2 */
+               start = msg->body_buf.begin;
+               len = msg->body_buf.len;
+
+               parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
+               if (!ucl_parser_add_chunk(parser, (const unsigned char *) start, len)) {
+                       err = g_error_new(RCLIENT_ERROR, msg->code, "Cannot parse UCL: %s",
+                                                         ucl_parser_get_error(parser));
+                       ucl_parser_free(parser);
+                       req->cb(c, msg, c->server_name->str, NULL,
+                                       req->input, req->ud, c->start_time,
+                                       c->send_time, NULL, 0, err);
+                       g_error_free(err);
+                       return 0;
+               }
+
+               req->cb(c, msg, c->server_name->str,
+                               ucl_parser_get_object(parser),
+                               req->input, req->ud,
+                               c->start_time, c->send_time, NULL, 0, NULL);
+               ucl_parser_free(parser);
+       }
+
+       return 0;
+}
+
+gboolean
+rspamd_client_command_v3(struct rspamd_client_connection *conn,
+                                                const char *command,
+                                                const ucl_object_t *metadata,
+                                                FILE *in,
+                                                rspamd_client_callback cb,
+                                                gpointer ud,
+                                                gboolean compressed,
+                                                const char *filename,
+                                                GError **err)
+{
+       struct rspamd_client_request *req;
+       GString *input = NULL;
+       rspamd_fstring_t *body;
+       gboolean ret;
+
+       req = g_malloc0(sizeof(struct rspamd_client_request));
+       req->conn = conn;
+       req->cb = cb;
+       req->ud = ud;
+
+       req->msg = rspamd_http_new_message(HTTP_REQUEST);
+       if (conn->key) {
+               req->msg->peer_key = rspamd_pubkey_ref(conn->key);
+       }
+
+       /* Read message input */
+       const char *msg_data = NULL;
+       gsize msg_len = 0;
+
+       if (in != NULL) {
+               input = g_string_sized_new(BUFSIZ);
+               char *p;
+               gsize remain, old_len;
+
+               while (!feof(in)) {
+                       p = input->str + input->len;
+                       remain = input->allocated_len - input->len - 1;
+                       if (remain == 0) {
+                               old_len = input->len;
+                               g_string_set_size(input, old_len * 2);
+                               input->len = old_len;
+                               continue;
+                       }
+                       remain = fread(p, 1, remain, in);
+                       if (remain > 0) {
+                               input->len += remain;
+                               input->str[input->len] = '\0';
+                       }
+               }
+
+               if (ferror(in) != 0) {
+                       g_set_error(err, RCLIENT_ERROR, ferror(in),
+                                               "input IO error: %s", strerror(ferror(in)));
+                       g_free(req);
+                       g_string_free(input, TRUE);
+                       return FALSE;
+               }
+
+               msg_data = input->str;
+               msg_len = input->len;
+               req->input = input;
+       }
+
+       /* Serialize metadata to JSON */
+       char *metadata_json = NULL;
+       gsize metadata_len = 0;
+
+       if (metadata) {
+               metadata_json = (char *) ucl_object_emit(metadata, UCL_EMIT_JSON_COMPACT);
+               metadata_len = strlen(metadata_json);
+       }
+       else {
+               metadata_json = g_strdup("{}");
+               metadata_len = 2;
+       }
+
+       /* Build multipart/form-data body with random boundary */
+       char boundary_buf[64];
+       rspamd_snprintf(boundary_buf, sizeof(boundary_buf),
+                                       "rspamc-v3-%016xL-%016xL",
+                                       ottery_rand_uint64(), ottery_rand_uint64());
+       const char *boundary = boundary_buf;
+       GString *mp_body = g_string_sized_new(metadata_len + msg_len + 512);
+
+       /* Metadata part */
+       rspamd_printf_gstring(mp_body,
+                                                 "--%s\r\n"
+                                                 "Content-Disposition: form-data; name=\"metadata\"\r\n"
+                                                 "Content-Type: application/json\r\n"
+                                                 "\r\n",
+                                                 boundary);
+       g_string_append_len(mp_body, metadata_json, metadata_len);
+       g_string_append(mp_body, "\r\n");
+
+       /* Message part */
+       if (msg_data && msg_len > 0) {
+               if (compressed) {
+                       /* Compress message with zstd */
+                       gsize comp_bound = ZSTD_compressBound(msg_len);
+                       char *comp_buf = g_malloc(comp_bound);
+                       gsize comp_len = ZSTD_compress(comp_buf, comp_bound,
+                                                                                  msg_data, msg_len, 1);
+
+                       if (ZSTD_isError(comp_len)) {
+                               g_set_error(err, RCLIENT_ERROR, 500, "compression error");
+                               g_free(comp_buf);
+                               g_free(metadata_json);
+                               g_string_free(mp_body, TRUE);
+                               g_free(req);
+                               if (input) g_string_free(input, TRUE);
+                               return FALSE;
+                       }
+
+                       rspamd_printf_gstring(mp_body,
+                                                                 "--%s\r\n"
+                                                                 "Content-Disposition: form-data; name=\"message\"\r\n"
+                                                                 "Content-Type: application/octet-stream\r\n"
+                                                                 "Content-Encoding: zstd\r\n"
+                                                                 "\r\n",
+                                                                 boundary);
+                       g_string_append_len(mp_body, comp_buf, comp_len);
+                       g_string_append(mp_body, "\r\n");
+                       g_free(comp_buf);
+               }
+               else {
+                       rspamd_printf_gstring(mp_body,
+                                                                 "--%s\r\n"
+                                                                 "Content-Disposition: form-data; name=\"message\"\r\n"
+                                                                 "Content-Type: application/octet-stream\r\n"
+                                                                 "\r\n",
+                                                                 boundary);
+                       g_string_append_len(mp_body, msg_data, msg_len);
+                       g_string_append(mp_body, "\r\n");
+               }
+       }
+
+       /* Closing boundary */
+       rspamd_printf_gstring(mp_body, "--%s--\r\n", boundary);
+
+       g_free(metadata_json);
+
+       /* Set body */
+       body = rspamd_fstring_new_init(mp_body->str, mp_body->len);
+       g_string_free(mp_body, TRUE);
+       rspamd_http_message_set_body_from_fstring_steal(req->msg, body);
+
+       /* Set Content-Type with boundary */
+       char ct_buf[128];
+       rspamd_snprintf(ct_buf, sizeof(ct_buf),
+                                       "multipart/form-data; boundary=%s", boundary);
+
+       /* Add Accept headers */
+       rspamd_http_message_add_header(req->msg, "Accept", "application/json");
+       if (compressed) {
+               rspamd_http_message_add_header(req->msg, "Accept-Encoding", "zstd");
+       }
+
+       /* Append URL path */
+       if (command != NULL && command[0] == '/') {
+               req->msg->url = rspamd_fstring_append(req->msg->url, command, strlen(command));
+       }
+       else {
+               req->msg->url = rspamd_fstring_append(req->msg->url, "/", 1);
+               req->msg->url = rspamd_fstring_append(req->msg->url, command ? command : "",
+                                                                                         command ? strlen(command) : 0);
+       }
+
+       conn->req = req;
+       conn->v3_mode = TRUE;
+       conn->start_time = rspamd_get_ticks(FALSE);
+
+       ret = rspamd_http_connection_write_message(conn->http_conn, req->msg,
+                                                                                          NULL, ct_buf, req, conn->timeout);
+
+       return ret;
+}
+
 void rspamd_client_destroy(struct rspamd_client_connection *conn)
 {
        if (conn != NULL) {
index 094676cfbeb76eeae9c9b5e44bdc7aee1ca4068e..1f41ac327a76c377108a0d8fd06cfd299e83a858 100644 (file)
@@ -93,6 +93,22 @@ gboolean rspamd_client_command(
        const char *filename,
        GError **err);
 
+/**
+ * Send a v3 multipart/form-data command.
+ * Metadata is sent as a JSON part, message as an octet-stream part.
+ * Response is multipart/mixed with "result" (JSON/msgpack) and optional "body" parts.
+ */
+gboolean rspamd_client_command_v3(
+       struct rspamd_client_connection *conn,
+       const char *command,
+       const ucl_object_t *metadata,
+       FILE *in,
+       rspamd_client_callback cb,
+       gpointer ud,
+       gboolean compressed,
+       const char *filename,
+       GError **err);
+
 /**
  * Destroy a connection to rspamd
  * @param conn
index 8cc673f138ef75eb0e00e2367e123856fc0bb729..6fa046abc95ae705101f1f6344eb83fb06c42809 100644 (file)
@@ -18,6 +18,8 @@ SET(LIBRSPAMDSERVERSRC
         ${CMAKE_CURRENT_SOURCE_DIR}/fuzzy_storage_stat.c
         ${CMAKE_CURRENT_SOURCE_DIR}/milter.c
         ${CMAKE_CURRENT_SOURCE_DIR}/monitored.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/multipart_form.cxx
+        ${CMAKE_CURRENT_SOURCE_DIR}/multipart_response.cxx
         ${CMAKE_CURRENT_SOURCE_DIR}/protocol.c
         ${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c
         ${CMAKE_CURRENT_SOURCE_DIR}/redis_pool.cxx
diff --git a/src/libserver/multipart_form.cxx b/src/libserver/multipart_form.cxx
new file mode 100644 (file)
index 0000000..e0db741
--- /dev/null
@@ -0,0 +1,383 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "multipart_form.hxx"
+#include <algorithm>
+#include <cstring>
+
+namespace rspamd::http {
+
+namespace {
+
+/* Trim leading and trailing whitespace (spaces and tabs) */
+auto trim(std::string_view sv) -> std::string_view
+{
+       while (!sv.empty() && (sv.front() == ' ' || sv.front() == '\t')) {
+               sv.remove_prefix(1);
+       }
+       while (!sv.empty() && (sv.back() == ' ' || sv.back() == '\t')) {
+               sv.remove_suffix(1);
+       }
+       return sv;
+}
+
+/* Case-insensitive prefix check */
+auto starts_with_ci(std::string_view haystack, std::string_view needle) -> bool
+{
+       if (haystack.size() < needle.size()) {
+               return false;
+       }
+       for (size_t i = 0; i < needle.size(); i++) {
+               if (std::tolower(static_cast<unsigned char>(haystack[i])) !=
+                       std::tolower(static_cast<unsigned char>(needle[i]))) {
+                       return false;
+               }
+       }
+       return true;
+}
+
+/**
+ * Extract a quoted or unquoted parameter value from a header value string.
+ * Given: name="value"; other="x"
+ * extract_param(sv, "name") returns "value"
+ */
+auto extract_param(std::string_view header, std::string_view param_name) -> std::string_view
+{
+       auto pos = size_t{0};
+
+       while (pos < header.size()) {
+               /* Find param_name */
+               auto found = header.find(param_name, pos);
+               if (found == std::string_view::npos) {
+                       return {};
+               }
+
+               /* Check that it's preceded by ; or start, not part of another word */
+               if (found > 0) {
+                       auto prev = header[found - 1];
+                       if (prev != ';' && prev != ' ' && prev != '\t') {
+                               pos = found + param_name.size();
+                               continue;
+                       }
+               }
+
+               auto after = found + param_name.size();
+               /* Skip whitespace before = */
+               while (after < header.size() && (header[after] == ' ' || header[after] == '\t')) {
+                       after++;
+               }
+               if (after >= header.size() || header[after] != '=') {
+                       pos = after;
+                       continue;
+               }
+               after++; /* skip = */
+               while (after < header.size() && (header[after] == ' ' || header[after] == '\t')) {
+                       after++;
+               }
+               if (after >= header.size()) {
+                       return {};
+               }
+
+               if (header[after] == '"') {
+                       /* Quoted value */
+                       after++; /* skip opening quote */
+                       auto end = header.find('"', after);
+                       if (end == std::string_view::npos) {
+                               return header.substr(after);
+                       }
+                       return header.substr(after, end - after);
+               }
+               else {
+                       /* Unquoted value - ends at ; or end */
+                       auto end = header.find(';', after);
+                       if (end == std::string_view::npos) {
+                               return trim(header.substr(after));
+                       }
+                       return trim(header.substr(after, end - after));
+               }
+       }
+
+       return {};
+}
+
+/**
+ * Parse headers from a part preamble.
+ * Headers end at the first \r\n\r\n or \n\n.
+ * Returns: (headers_end_offset, entry with parsed headers)
+ */
+auto parse_part_headers(std::string_view part_data, multipart_entry &entry) -> size_t
+{
+       /* Find end of headers */
+       auto hdr_end = part_data.find("\r\n\r\n");
+       size_t skip = 4;
+
+       if (hdr_end == std::string_view::npos) {
+               hdr_end = part_data.find("\n\n");
+               skip = 2;
+               if (hdr_end == std::string_view::npos) {
+                       return 0;
+               }
+       }
+
+       auto headers = part_data.substr(0, hdr_end);
+
+       /* Parse individual headers (split by \r\n or \n) */
+       size_t pos = 0;
+       while (pos < headers.size()) {
+               auto line_end = headers.find('\n', pos);
+               std::string_view line;
+               if (line_end == std::string_view::npos) {
+                       line = headers.substr(pos);
+                       pos = headers.size();
+               }
+               else {
+                       line = headers.substr(pos, line_end - pos);
+                       pos = line_end + 1;
+               }
+
+               /* Strip trailing \r */
+               if (!line.empty() && line.back() == '\r') {
+                       line.remove_suffix(1);
+               }
+
+               if (line.empty()) {
+                       continue;
+               }
+
+               auto colon = line.find(':');
+               if (colon == std::string_view::npos) {
+                       continue;
+               }
+
+               auto hdr_name = trim(line.substr(0, colon));
+               auto hdr_value = trim(line.substr(colon + 1));
+
+               if (starts_with_ci(hdr_name, "content-disposition")) {
+                       entry.name = extract_param(hdr_value, "name");
+                       entry.filename = extract_param(hdr_value, "filename");
+               }
+               else if (starts_with_ci(hdr_name, "content-type")) {
+                       /* Content-Type value is everything up to first ; or end */
+                       auto semi = hdr_value.find(';');
+                       if (semi != std::string_view::npos) {
+                               entry.content_type = trim(hdr_value.substr(0, semi));
+                       }
+                       else {
+                               entry.content_type = hdr_value;
+                       }
+               }
+               else if (starts_with_ci(hdr_name, "content-encoding") ||
+                                starts_with_ci(hdr_name, "content-transfer-encoding")) {
+                       entry.content_encoding = hdr_value;
+               }
+       }
+
+       return hdr_end + skip;
+}
+
+}// anonymous namespace
+
+auto parse_multipart_form(std::string_view data,
+                                                 std::string_view boundary) -> std::optional<multipart_form>
+{
+       if (boundary.empty() || data.empty()) {
+               return std::nullopt;
+       }
+
+       /* Build delimiter strings: "\r\n--<boundary>" and "--<boundary>" */
+       std::string delim;
+       delim.reserve(boundary.size() + 4);
+       delim = "--";
+       delim.append(boundary.data(), boundary.size());
+
+       std::string crlf_delim = "\r\n";
+       crlf_delim.append(delim);
+
+       std::string lf_delim = "\n";
+       lf_delim.append(delim);
+
+       /* Find the first boundary */
+       auto first = data.find(delim);
+       if (first == std::string_view::npos) {
+               return std::nullopt;
+       }
+
+       /* Skip past first boundary line */
+       auto pos = first + delim.size();
+
+       /* Skip optional \r\n after boundary */
+       if (pos < data.size() && data[pos] == '\r') {
+               pos++;
+       }
+       if (pos < data.size() && data[pos] == '\n') {
+               pos++;
+       }
+
+       static constexpr size_t max_parts = 8;
+       multipart_form form;
+
+       while (pos < data.size()) {
+               /* Find next boundary (try \r\n-- first, then \n--) */
+               auto next = data.find(crlf_delim, pos);
+               size_t delim_size = crlf_delim.size();
+
+               if (next == std::string_view::npos) {
+                       next = data.find(lf_delim, pos);
+                       delim_size = lf_delim.size();
+               }
+
+               if (next == std::string_view::npos) {
+                       break;
+               }
+
+               auto part_data = data.substr(pos, next - pos);
+
+               /* Parse headers from this part */
+               multipart_entry entry{};
+               auto body_offset = parse_part_headers(part_data, entry);
+
+               if (body_offset > 0 && body_offset <= part_data.size()) {
+                       entry.data = part_data.substr(body_offset);
+               }
+               else {
+                       /* No headers found, treat entire part as data */
+                       entry.data = part_data;
+               }
+
+               form.parts.push_back(entry);
+
+               if (form.parts.size() >= max_parts) {
+                       break;
+               }
+
+               /* Move past the boundary */
+               pos = next + delim_size;
+
+               /* Check for closing boundary -- */
+               if (pos + 1 < data.size() && data[pos] == '-' && data[pos + 1] == '-') {
+                       break;
+               }
+
+               /* Skip \r\n after boundary */
+               if (pos < data.size() && data[pos] == '\r') {
+                       pos++;
+               }
+               if (pos < data.size() && data[pos] == '\n') {
+                       pos++;
+               }
+       }
+
+       if (form.parts.empty()) {
+               return std::nullopt;
+       }
+
+       return form;
+}
+
+auto find_part(const multipart_form &form,
+                          std::string_view name) -> const multipart_entry *
+{
+       for (const auto &entry: form.parts) {
+               if (entry.name == name) {
+                       return &entry;
+               }
+       }
+       return nullptr;
+}
+
+}// namespace rspamd::http
+
+
+/*
+ * C bridge implementation
+ */
+
+struct rspamd_multipart_form_c {
+       rspamd::http::multipart_form form;
+       /* Pre-built C entries for find() results */
+       std::vector<rspamd_multipart_entry_c> c_entries;
+
+       void build_c_entries()
+       {
+               c_entries.clear();
+               c_entries.reserve(form.parts.size());
+               for (const auto &p: form.parts) {
+                       rspamd_multipart_entry_c ce{};
+                       ce.name = p.name.data();
+                       ce.name_len = p.name.size();
+                       ce.filename = p.filename.data();
+                       ce.filename_len = p.filename.size();
+                       ce.content_type = p.content_type.data();
+                       ce.content_type_len = p.content_type.size();
+                       ce.content_encoding = p.content_encoding.data();
+                       ce.content_encoding_len = p.content_encoding.size();
+                       ce.data = p.data.data();
+                       ce.data_len = p.data.size();
+                       c_entries.push_back(ce);
+               }
+       }
+};
+
+extern "C" {
+
+struct rspamd_multipart_form_c *
+rspamd_multipart_form_parse(const char *data, gsize len,
+                                                       const char *boundary, gsize boundary_len)
+{
+       auto result = rspamd::http::parse_multipart_form(
+               {data, len}, {boundary, boundary_len});
+
+       if (!result) {
+               return nullptr;
+       }
+
+       auto *form = new rspamd_multipart_form_c();
+       form->form = std::move(*result);
+       form->build_c_entries();
+       return form;
+}
+
+gsize rspamd_multipart_form_nparts(const struct rspamd_multipart_form_c *form)
+{
+       if (!form) {
+               return 0;
+       }
+       return form->form.parts.size();
+}
+
+const struct rspamd_multipart_entry_c *
+rspamd_multipart_form_find(const struct rspamd_multipart_form_c *form,
+                                                  const char *name, gsize name_len)
+{
+       if (!form || !name) {
+               return nullptr;
+       }
+
+       std::string_view name_sv{name, name_len};
+       for (size_t i = 0; i < form->form.parts.size(); i++) {
+               if (form->form.parts[i].name == name_sv) {
+                       return &form->c_entries[i];
+               }
+       }
+       return nullptr;
+}
+
+void rspamd_multipart_form_free(struct rspamd_multipart_form_c *form)
+{
+       delete form;
+}
+
+} /* extern "C" */
diff --git a/src/libserver/multipart_form.h b/src/libserver/multipart_form.h
new file mode 100644 (file)
index 0000000..df7bdd2
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MULTIPART_FORM_H
+#define RSPAMD_MULTIPART_FORM_H
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_multipart_entry_c {
+       const char *name;
+       gsize name_len;
+       const char *filename;
+       gsize filename_len;
+       const char *content_type;
+       gsize content_type_len;
+       const char *content_encoding;
+       gsize content_encoding_len;
+       const char *data;
+       gsize data_len;
+};
+
+struct rspamd_multipart_form_c;
+
+/**
+ * Parse multipart/form-data body. Returns NULL on error.
+ * The returned handle must be freed with rspamd_multipart_form_free().
+ *
+ * IMPORTANT: The returned form contains pointers into the original
+ * data buffer (zero-copy). The caller MUST ensure 'data' remains valid
+ * for the lifetime of the returned form.
+ */
+struct rspamd_multipart_form_c *rspamd_multipart_form_parse(
+       const char *data, gsize len,
+       const char *boundary, gsize boundary_len);
+
+/**
+ * Get number of parts.
+ */
+gsize rspamd_multipart_form_nparts(const struct rspamd_multipart_form_c *form);
+
+/**
+ * Find part by name. Returns NULL if not found.
+ */
+const struct rspamd_multipart_entry_c *rspamd_multipart_form_find(
+       const struct rspamd_multipart_form_c *form,
+       const char *name, gsize name_len);
+
+/**
+ * Free parsed form.
+ */
+void rspamd_multipart_form_free(struct rspamd_multipart_form_c *form);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RSPAMD_MULTIPART_FORM_H */
diff --git a/src/libserver/multipart_form.hxx b/src/libserver/multipart_form.hxx
new file mode 100644 (file)
index 0000000..104afbb
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MULTIPART_FORM_HXX
+#define RSPAMD_MULTIPART_FORM_HXX
+
+#include <string_view>
+#include <vector>
+#include <optional>
+
+namespace rspamd::http {
+
+struct multipart_entry {
+       std::string_view name;
+       std::string_view filename;
+       std::string_view content_type;
+       std::string_view content_encoding;
+       std::string_view data;
+};
+
+struct multipart_form {
+       std::vector<multipart_entry> parts;
+};
+
+/**
+ * Parse multipart/form-data body.
+ * All string_views point into the original data buffer (zero-copy).
+ * @param data raw body
+ * @param boundary boundary string (without leading --)
+ * @return parsed form or nullopt on error
+ */
+auto parse_multipart_form(std::string_view data,
+                                                 std::string_view boundary) -> std::optional<multipart_form>;
+
+/**
+ * Find part by name.
+ * @return pointer to entry or nullptr
+ */
+auto find_part(const multipart_form &form,
+                          std::string_view name) -> const multipart_entry *;
+
+}// namespace rspamd::http
+
+/* C bridge - include the C-only header */
+#include "multipart_form.h"
+
+#endif// RSPAMD_MULTIPART_FORM_HXX
diff --git a/src/libserver/multipart_response.cxx b/src/libserver/multipart_response.cxx
new file mode 100644 (file)
index 0000000..99856bf
--- /dev/null
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "multipart_response.hxx"
+#include "ottery.h"
+#include "printf.h"
+#include <cstdlib>
+#include <cstring>
+
+#ifdef SYS_ZSTD
+#include "zstd.h"
+#else
+#include "contrib/zstd/zstd.h"
+#endif
+
+namespace rspamd::http {
+
+multipart_response::multipart_response()
+{
+       /* Generate a random boundary using ottery CSPRNG */
+       char buf[64];
+       uint64_t rnd = ottery_rand_uint64();
+       static unsigned int counter = 0;
+       rspamd_snprintf(buf, sizeof(buf), "rspamd-v3-%016xL-%ud",
+                                       rnd, counter++);
+       boundary_ = buf;
+}
+
+void multipart_response::add_part(std::string name, std::string content_type,
+                                                                 std::string_view data, bool compress)
+{
+       parts_.push_back({std::move(name), std::move(content_type), data, compress});
+}
+
+auto multipart_response::serialize(void *zstream) const -> std::string
+{
+       std::string out;
+       out.reserve(4096);
+
+       for (const auto &part: parts_) {
+               out.append("--");
+               out.append(boundary_);
+               out.append("\r\n");
+
+               /* Content-Disposition */
+               out.append("Content-Disposition: form-data; name=\"");
+               out.append(part.name);
+               out.append("\"\r\n");
+
+               /* Content-Type */
+               if (!part.content_type.empty()) {
+                       out.append("Content-Type: ");
+                       out.append(part.content_type);
+                       out.append("\r\n");
+               }
+
+               /* Compress if requested and zstream is available */
+               if (part.compress && zstream && !part.data.empty()) {
+                       auto *cstream = static_cast<ZSTD_CStream *>(zstream);
+                       size_t bound = ZSTD_compressBound(part.data.size());
+                       std::string compressed(bound, '\0');
+
+                       ZSTD_inBuffer zin{};
+                       zin.src = part.data.data();
+                       zin.size = part.data.size();
+                       zin.pos = 0;
+
+                       ZSTD_outBuffer zout{};
+                       zout.dst = compressed.data();
+                       zout.size = compressed.size();
+                       zout.pos = 0;
+
+                       ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only);
+
+                       bool ok = true;
+                       while (zin.pos < zin.size) {
+                               size_t r = ZSTD_compressStream2(cstream, &zout, &zin, ZSTD_e_continue);
+                               if (ZSTD_isError(r)) {
+                                       ok = false;
+                                       break;
+                               }
+                       }
+
+                       if (ok) {
+                               size_t r = ZSTD_compressStream2(cstream, &zout, &zin, ZSTD_e_end);
+                               if (ZSTD_isError(r)) {
+                                       ok = false;
+                               }
+                       }
+
+                       if (ok) {
+                               compressed.resize(zout.pos);
+                               out.append("Content-Encoding: zstd\r\n");
+                               out.append("\r\n");
+                               out.append(compressed);
+                       }
+                       else {
+                               /* Fallback: write uncompressed */
+                               out.append("\r\n");
+                               out.append(part.data);
+                       }
+               }
+               else {
+                       out.append("\r\n");
+                       out.append(part.data);
+               }
+
+               out.append("\r\n");
+       }
+
+       /* Closing boundary */
+       out.append("--");
+       out.append(boundary_);
+       out.append("--\r\n");
+
+       return out;
+}
+
+auto multipart_response::content_type() const -> std::string
+{
+       return "multipart/mixed; boundary=\"" + boundary_ + "\"";
+}
+
+}// namespace rspamd::http
+
+
+/*
+ * C bridge implementation
+ */
+
+struct rspamd_multipart_response_c {
+       rspamd::http::multipart_response resp;
+       std::string cached_content_type;
+};
+
+extern "C" {
+
+struct rspamd_multipart_response_c *
+rspamd_multipart_response_new(void)
+{
+       return new rspamd_multipart_response_c();
+}
+
+void rspamd_multipart_response_add_part(
+       struct rspamd_multipart_response_c *resp,
+       const char *name,
+       const char *content_type,
+       const char *data, gsize len,
+       gboolean compress)
+{
+       if (!resp) {
+               return;
+       }
+       resp->resp.add_part(
+               name ? name : "",
+               content_type ? content_type : "",
+               {data, len},
+               compress != FALSE);
+}
+
+rspamd_fstring_t *
+rspamd_multipart_response_serialize(
+       struct rspamd_multipart_response_c *resp,
+       void *zstream)
+{
+       if (!resp) {
+               return nullptr;
+       }
+       auto serialized = resp->resp.serialize(zstream);
+       return rspamd_fstring_new_init(serialized.data(), serialized.size());
+}
+
+const char *
+rspamd_multipart_response_content_type(
+       struct rspamd_multipart_response_c *resp)
+{
+       if (!resp) {
+               return "multipart/mixed";
+       }
+       resp->cached_content_type = resp->resp.content_type();
+       return resp->cached_content_type.c_str();
+}
+
+void rspamd_multipart_response_free(
+       struct rspamd_multipart_response_c *resp)
+{
+       delete resp;
+}
+
+} /* extern "C" */
diff --git a/src/libserver/multipart_response.h b/src/libserver/multipart_response.h
new file mode 100644 (file)
index 0000000..6b7e97f
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MULTIPART_RESPONSE_H
+#define RSPAMD_MULTIPART_RESPONSE_H
+
+#include "config.h"
+#include "libutil/fstring.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_multipart_response_c;
+
+struct rspamd_multipart_response_c *rspamd_multipart_response_new(void);
+
+void rspamd_multipart_response_add_part(
+       struct rspamd_multipart_response_c *resp,
+       const char *name,
+       const char *content_type,
+       const char *data, gsize len,
+       gboolean compress);
+
+/**
+ * Serialize the multipart response.
+ * @param resp response handle
+ * @param zstream ZSTD compression stream (may be NULL)
+ * @return newly allocated fstring (caller owns)
+ */
+rspamd_fstring_t *rspamd_multipart_response_serialize(
+       struct rspamd_multipart_response_c *resp,
+       void *zstream);
+
+/**
+ * Get the Content-Type header value (includes boundary).
+ * The returned string is valid until resp is freed.
+ */
+const char *rspamd_multipart_response_content_type(
+       struct rspamd_multipart_response_c *resp);
+
+void rspamd_multipart_response_free(
+       struct rspamd_multipart_response_c *resp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RSPAMD_MULTIPART_RESPONSE_H */
diff --git a/src/libserver/multipart_response.hxx b/src/libserver/multipart_response.hxx
new file mode 100644 (file)
index 0000000..a8611d1
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MULTIPART_RESPONSE_HXX
+#define RSPAMD_MULTIPART_RESPONSE_HXX
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace rspamd::http {
+
+struct response_part {
+       std::string name;
+       std::string content_type;
+       std::string_view data; /* Points to external buffer; caller must keep alive */
+       bool compress = false;
+};
+
+class multipart_response {
+public:
+       multipart_response();
+
+       void add_part(std::string name, std::string content_type,
+                                 std::string_view data, bool compress = false);
+
+       /**
+        * Serialize the multipart response.
+        * @param zstream ZSTD compression stream (may be null if no compression needed)
+        * @return serialized body
+        */
+       auto serialize(void *zstream = nullptr) const -> std::string;
+
+       /**
+        * Get the Content-Type header value including boundary.
+        */
+       auto content_type() const -> std::string;
+
+       auto get_boundary() const -> std::string_view
+       {
+               return boundary_;
+       }
+
+private:
+       std::string boundary_;
+       std::vector<response_part> parts_;
+};
+
+}// namespace rspamd::http
+
+/* C bridge - include the C-only header */
+#include "multipart_response.h"
+
+#endif// RSPAMD_MULTIPART_RESPONSE_HXX
index 83bcb6e2a8767aba6f2c302e2169ff64d4e760d5..560282b41e41bf44d0affe65e29347e65dae6b22 100644 (file)
@@ -29,6 +29,9 @@
 #include "rspamd_simdutf.h"
 #include "task.h"
 #include "lua/lua_classnames.h"
+#include "multipart_form.h"
+#include "multipart_response.h"
+#include "libmime/content_type.h"
 #include <math.h>
 
 #ifdef SYS_ZSTD
@@ -148,7 +151,12 @@ rspamd_protocol_handle_url(struct rspamd_task *task,
        case 'c':
        case 'C':
                /* check */
-               if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) {
+               if (COMPARE_CMD(p, MSG_CMD_CHECK_V3, pathlen)) {
+                       task->cmd = CMD_CHECK_V3;
+                       task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MULTIPART_V3;
+                       msg_debug_protocol("got checkv3 command");
+               }
+               else if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) {
                        task->cmd = CMD_CHECK_V2;
                        msg_debug_protocol("got checkv2 command");
                }
@@ -1150,7 +1158,7 @@ rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result *
        ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false);
        ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false);
 
-       if (task->cmd == CMD_CHECK_V2) {
+       if (task->cmd == CMD_CHECK_V2 || task->cmd == CMD_CHECK_V3) {
                if (sym->sym) {
                        ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false);
                }
@@ -2094,6 +2102,680 @@ void rspamd_protocol_write_log_pipe(struct rspamd_task *task)
        g_array_free(extra, TRUE);
 }
 
+/*
+ * Handle metadata from a parsed UCL object for v3 protocol.
+ * Maps structured metadata fields to task fields.
+ */
+static gboolean
+rspamd_protocol_handle_metadata(struct rspamd_task *task,
+                                                               const ucl_object_t *metadata)
+{
+       const ucl_object_t *elt, *cur;
+       gboolean has_ip = FALSE;
+
+       if (!metadata || ucl_object_type(metadata) != UCL_OBJECT) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "metadata is not a valid object");
+               return FALSE;
+       }
+
+       /* from */
+       elt = ucl_object_lookup(metadata, "from");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               const char *from_str = ucl_object_tostring(elt);
+               gsize from_len = strlen(from_str);
+
+               if (from_len == 0) {
+                       task->from_envelope = rspamd_email_address_from_smtp("<>", 2);
+               }
+               else {
+                       task->from_envelope = rspamd_email_address_from_smtp(from_str, from_len);
+               }
+
+               if (!task->from_envelope) {
+                       msg_err_protocol("bad from in metadata: '%s'", from_str);
+                       task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+               }
+       }
+
+       /* rcpt (array) */
+       elt = ucl_object_lookup(metadata, "rcpt");
+       if (elt) {
+               if (ucl_object_type(elt) == UCL_ARRAY) {
+                       ucl_object_iter_t it = NULL;
+
+                       while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+                               if (ucl_object_type(cur) == UCL_STRING) {
+                                       const char *rcpt_str = ucl_object_tostring(cur);
+                                       struct rspamd_email_address *addr =
+                                               rspamd_email_address_from_smtp(rcpt_str, strlen(rcpt_str));
+
+                                       if (addr) {
+                                               if (!task->rcpt_envelope) {
+                                                       task->rcpt_envelope = g_ptr_array_sized_new(2);
+                                               }
+                                               g_ptr_array_add(task->rcpt_envelope, addr);
+                                       }
+                                       else {
+                                               msg_err_protocol("bad rcpt in metadata: '%s'", rcpt_str);
+                                               task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+                                       }
+                               }
+                       }
+               }
+               else if (ucl_object_type(elt) == UCL_STRING) {
+                       /* Single recipient as string */
+                       const char *rcpt_str = ucl_object_tostring(elt);
+                       struct rspamd_email_address *addr =
+                               rspamd_email_address_from_smtp(rcpt_str, strlen(rcpt_str));
+
+                       if (addr) {
+                               if (!task->rcpt_envelope) {
+                                       task->rcpt_envelope = g_ptr_array_sized_new(2);
+                               }
+                               g_ptr_array_add(task->rcpt_envelope, addr);
+                       }
+               }
+       }
+
+       /* ip */
+       elt = ucl_object_lookup(metadata, "ip");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               const char *ip_str = ucl_object_tostring(elt);
+
+               if (!rspamd_parse_inet_address(&task->from_addr,
+                                                                          ip_str, strlen(ip_str),
+                                                                          RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+                       msg_err_protocol("bad ip in metadata: '%s'", ip_str);
+               }
+               else {
+                       has_ip = TRUE;
+               }
+       }
+
+       if (!has_ip) {
+               task->flags |= RSPAMD_TASK_FLAG_NO_IP;
+       }
+
+       /* helo */
+       elt = ucl_object_lookup(metadata, "helo");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               task->helo = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+       }
+
+       /* hostname */
+       elt = ucl_object_lookup(metadata, "hostname");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               task->hostname = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+       }
+
+       /* queue_id */
+       elt = ucl_object_lookup(metadata, "queue_id");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               task->queue_id = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+       }
+
+       /* user */
+       elt = ucl_object_lookup(metadata, "user");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               task->auth_user = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+       }
+
+       /* deliver_to */
+       elt = ucl_object_lookup(metadata, "deliver_to");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               task->deliver_to = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+       }
+
+       /* settings_id */
+       elt = ucl_object_lookup(metadata, "settings_id");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               const char *sid = ucl_object_tostring(elt);
+               task->settings_elt = rspamd_config_find_settings_name_ref(
+                       task->cfg, sid, strlen(sid));
+
+               if (!task->settings_elt) {
+                       msg_warn_protocol("unknown settings id in metadata: '%s'", sid);
+               }
+       }
+
+       /* settings (inline UCL object) */
+       elt = ucl_object_lookup(metadata, "settings");
+       if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+               /* If both settings_id and settings are present, settings wins */
+               if (task->settings_elt) {
+                       msg_warn_protocol("ignore settings_id because inline settings is also present");
+                       REF_RELEASE(task->settings_elt);
+                       task->settings_elt = NULL;
+               }
+               task->settings = ucl_object_ref(elt);
+       }
+
+       /* tls.cipher - sets SSL flag */
+       elt = ucl_object_lookup_path(metadata, "tls.cipher");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               task->flags |= RSPAMD_TASK_FLAG_SSL;
+       }
+
+       /* mta.tag */
+       elt = ucl_object_lookup_path(metadata, "mta.tag");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               char *mta_tag = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+               rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_MTA_TAG, mta_tag, NULL);
+       }
+
+       /* mta.name */
+       elt = ucl_object_lookup_path(metadata, "mta.name");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               char *mta_name = rspamd_mempool_strdup(task->task_pool, ucl_object_tostring(elt));
+               rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_MTA_NAME, mta_name, NULL);
+       }
+
+       /* flags (array of strings) */
+       elt = ucl_object_lookup(metadata, "flags");
+       if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+               ucl_object_iter_t it = NULL;
+
+               while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+                       if (ucl_object_type(cur) == UCL_STRING) {
+                               const char *flag_str = ucl_object_tostring(cur);
+                               rspamd_protocol_handle_flag(task, flag_str, strlen(flag_str));
+                       }
+               }
+       }
+
+       /* raw - disable MIME parsing */
+       elt = ucl_object_lookup(metadata, "raw");
+       if (elt && ucl_object_type(elt) == UCL_BOOLEAN) {
+               if (ucl_object_toboolean(elt)) {
+                       task->flags &= ~RSPAMD_TASK_FLAG_MIME;
+               }
+       }
+
+       /* log_tag */
+       elt = ucl_object_lookup(metadata, "log_tag");
+       if (elt && ucl_object_type(elt) == UCL_STRING) {
+               const char *tag = ucl_object_tostring(elt);
+               gsize tag_len = strlen(tag);
+
+               if (rspamd_fast_utf8_validate(tag, tag_len) == 0) {
+                       int len = MIN(tag_len, sizeof(task->task_pool->tag.uid) - 1);
+                       memcpy(task->task_pool->tag.uid, tag, len);
+                       task->task_pool->tag.uid[len] = '\0';
+               }
+       }
+
+       /* mail_esmtp_args (object: key -> value) */
+       elt = ucl_object_lookup(metadata, "mail_esmtp_args");
+       if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+               if (!task->mail_esmtp_args) {
+                       task->mail_esmtp_args = g_hash_table_new_full(
+                               rspamd_ftok_icase_hash,
+                               rspamd_ftok_icase_equal,
+                               rspamd_fstring_mapped_ftok_free,
+                               rspamd_fstring_mapped_ftok_free);
+               }
+
+               ucl_object_iter_t it = NULL;
+               while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+                       if (ucl_object_type(cur) == UCL_STRING) {
+                               const char *key = ucl_object_key(cur);
+                               const char *val = ucl_object_tostring(cur);
+
+                               rspamd_fstring_t *fkey = rspamd_fstring_new_init(key, strlen(key));
+                               rspamd_fstring_t *fval = rspamd_fstring_new_init(val, strlen(val));
+                               rspamd_ftok_t *key_tok = rspamd_ftok_map(fkey);
+                               rspamd_ftok_t *val_tok = rspamd_ftok_map(fval);
+
+                               g_hash_table_replace(task->mail_esmtp_args, key_tok, val_tok);
+                       }
+               }
+       }
+
+       /* rcpt_esmtp_args (array of objects) */
+       elt = ucl_object_lookup(metadata, "rcpt_esmtp_args");
+       if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+               if (!task->rcpt_esmtp_args) {
+                       task->rcpt_esmtp_args = g_ptr_array_new();
+               }
+
+               ucl_object_iter_t arr_it = NULL;
+               int rcpt_idx = 0;
+
+               while ((cur = ucl_object_iterate(elt, &arr_it, true)) != NULL) {
+                       GHashTable *rcpt_args = NULL;
+
+                       if (ucl_object_type(cur) == UCL_OBJECT) {
+                               rcpt_args = g_hash_table_new_full(
+                                       rspamd_ftok_icase_hash,
+                                       rspamd_ftok_icase_equal,
+                                       rspamd_fstring_mapped_ftok_free,
+                                       rspamd_fstring_mapped_ftok_free);
+
+                               ucl_object_iter_t obj_it = NULL;
+                               const ucl_object_t *kv;
+
+                               while ((kv = ucl_object_iterate(cur, &obj_it, true)) != NULL) {
+                                       if (ucl_object_type(kv) == UCL_STRING) {
+                                               const char *key = ucl_object_key(kv);
+                                               const char *val = ucl_object_tostring(kv);
+
+                                               rspamd_fstring_t *fkey = rspamd_fstring_new_init(key, strlen(key));
+                                               rspamd_fstring_t *fval = rspamd_fstring_new_init(val, strlen(val));
+                                               rspamd_ftok_t *key_tok = rspamd_ftok_map(fkey);
+                                               rspamd_ftok_t *val_tok = rspamd_ftok_map(fval);
+
+                                               g_hash_table_replace(rcpt_args, key_tok, val_tok);
+                                       }
+                               }
+                       }
+
+                       /* Ensure array is large enough */
+                       while ((int) task->rcpt_esmtp_args->len <= rcpt_idx) {
+                               g_ptr_array_add(task->rcpt_esmtp_args, NULL);
+                       }
+                       g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx) = rcpt_args;
+                       rcpt_idx++;
+               }
+       }
+
+       return TRUE;
+}
+
+/*
+ * Handle v3 multipart/form-data request.
+ */
+gboolean
+rspamd_protocol_handle_v3_request(struct rspamd_task *task,
+                                                                 struct rspamd_http_message *msg,
+                                                                 const char *chunk, gsize len)
+{
+       const char *boundary = NULL;
+       gsize boundary_len = 0;
+
+       /* Extract boundary from HTTP Content-Type header */
+       const rspamd_ftok_t *ct_hdr = rspamd_http_message_find_header(msg, "Content-Type");
+
+       if (!ct_hdr) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "missing Content-Type header for v3 request");
+               return FALSE;
+       }
+
+       struct rspamd_content_type *ct = rspamd_content_type_parse(
+               ct_hdr->begin, ct_hdr->len, task->task_pool);
+
+       if (!ct || ct->boundary.len == 0) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "cannot extract boundary from Content-Type");
+               return FALSE;
+       }
+
+       boundary = ct->boundary.begin;
+       boundary_len = ct->boundary.len;
+
+       /* Parse multipart body */
+       struct rspamd_multipart_form_c *form = rspamd_multipart_form_parse(
+               chunk, len, boundary, boundary_len);
+
+       if (!form) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "cannot parse multipart/form-data body");
+               return FALSE;
+       }
+
+       /* Register destructor for the form */
+       rspamd_mempool_add_destructor(task->task_pool,
+                                                                 (rspamd_mempool_destruct_t) rspamd_multipart_form_free,
+                                                                 form);
+
+       /* Find metadata part */
+       const struct rspamd_multipart_entry_c *metadata_part =
+               rspamd_multipart_form_find(form, "metadata", sizeof("metadata") - 1);
+
+       if (!metadata_part || metadata_part->data_len == 0) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "missing 'metadata' part in v3 request");
+               return FALSE;
+       }
+
+       /* Parse metadata as UCL (detect JSON vs msgpack from Content-Type) */
+       struct ucl_parser *parser;
+       gboolean is_msgpack = FALSE;
+
+       if (metadata_part->content_type &&
+               metadata_part->content_type_len > 0 &&
+               rspamd_substring_search_caseless(metadata_part->content_type,
+                                                                                metadata_part->content_type_len,
+                                                                                "msgpack",
+                                                                                sizeof("msgpack") - 1) != -1) {
+               is_msgpack = TRUE;
+               parser = ucl_parser_new(UCL_PARSER_DEFAULT | UCL_PARSER_NO_FILEVARS);
+               ucl_parser_add_chunk_full(parser, (const unsigned char *) metadata_part->data,
+                                                                 metadata_part->data_len,
+                                                                 ucl_parser_get_default_priority(parser),
+                                                                 UCL_DUPLICATE_APPEND,
+                                                                 UCL_PARSE_MSGPACK);
+       }
+       else {
+               parser = ucl_parser_new(UCL_PARSER_DEFAULT | UCL_PARSER_NO_FILEVARS);
+               ucl_parser_add_chunk(parser, (const unsigned char *) metadata_part->data,
+                                                        metadata_part->data_len);
+       }
+
+       if (ucl_parser_get_error(parser) != NULL) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "cannot parse metadata: %s", ucl_parser_get_error(parser));
+               ucl_parser_free(parser);
+               return FALSE;
+       }
+
+       ucl_object_t *metadata_obj = ucl_parser_get_object(parser);
+       ucl_parser_free(parser);
+
+       if (!metadata_obj) {
+               g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                       "empty metadata object");
+               return FALSE;
+       }
+
+       rspamd_mempool_add_destructor(task->task_pool,
+                                                                 (rspamd_mempool_destruct_t) ucl_object_unref,
+                                                                 metadata_obj);
+
+       /* Apply metadata to task */
+       if (!rspamd_protocol_handle_metadata(task, metadata_obj)) {
+               return FALSE;
+       }
+
+       /* Check for file/shm in metadata (zero-copy paths) */
+       const ucl_object_t *file_elt = ucl_object_lookup(metadata_obj, "file");
+       const ucl_object_t *shm_elt = ucl_object_lookup(metadata_obj, "shm");
+
+       if (file_elt && ucl_object_type(file_elt) == UCL_STRING) {
+               /* Set file path and let rspamd_task_load_message handle it via task header */
+               const char *fpath = ucl_object_tostring(file_elt);
+               task->msg.fpath = rspamd_mempool_strdup(task->task_pool, fpath);
+
+               /* Synthesize a request header so rspamd_task_load_message's file path works */
+               rspamd_fstring_t *fhdr = rspamd_fstring_new_init(fpath, strlen(fpath));
+               rspamd_ftok_t *name_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*name_tok));
+               rspamd_ftok_t *val_tok = rspamd_ftok_map(fhdr);
+
+               RSPAMD_FTOK_ASSIGN(name_tok, "file");
+               rspamd_task_add_request_header(task, name_tok, val_tok);
+
+               /* Now load the message from file */
+               return rspamd_task_load_message(task, NULL, NULL, 0);
+       }
+       else if (shm_elt && ucl_object_type(shm_elt) == UCL_STRING) {
+               /* Synthesize shm headers */
+               const char *shm_name = ucl_object_tostring(shm_elt);
+               rspamd_fstring_t *fhdr = rspamd_fstring_new_init(shm_name, strlen(shm_name));
+               rspamd_ftok_t *name_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*name_tok));
+               rspamd_ftok_t *val_tok = rspamd_ftok_map(fhdr);
+
+               RSPAMD_FTOK_ASSIGN(name_tok, "shm");
+               rspamd_task_add_request_header(task, name_tok, val_tok);
+
+               const ucl_object_t *off_elt = ucl_object_lookup(metadata_obj, "shm_offset");
+               if (off_elt) {
+                       char buf[32];
+                       int blen = rspamd_snprintf(buf, sizeof(buf), "%L",
+                                                                          ucl_object_toint(off_elt));
+                       rspamd_fstring_t *foff = rspamd_fstring_new_init(buf, blen);
+                       rspamd_ftok_t *off_name = rspamd_mempool_alloc(task->task_pool, sizeof(*off_name));
+                       rspamd_ftok_t *off_val = rspamd_ftok_map(foff);
+
+                       RSPAMD_FTOK_ASSIGN(off_name, "shm-offset");
+                       rspamd_task_add_request_header(task, off_name, off_val);
+               }
+
+               const ucl_object_t *len_elt = ucl_object_lookup(metadata_obj, "shm_length");
+               if (len_elt) {
+                       char buf[32];
+                       int blen = rspamd_snprintf(buf, sizeof(buf), "%L",
+                                                                          ucl_object_toint(len_elt));
+                       rspamd_fstring_t *flen = rspamd_fstring_new_init(buf, blen);
+                       rspamd_ftok_t *len_name = rspamd_mempool_alloc(task->task_pool, sizeof(*len_name));
+                       rspamd_ftok_t *len_val = rspamd_ftok_map(flen);
+
+                       RSPAMD_FTOK_ASSIGN(len_name, "shm-length");
+                       rspamd_task_add_request_header(task, len_name, len_val);
+               }
+
+               return rspamd_task_load_message(task, NULL, NULL, 0);
+       }
+       else {
+               /* Use inline message part */
+               const struct rspamd_multipart_entry_c *msg_part =
+                       rspamd_multipart_form_find(form, "message", sizeof("message") - 1);
+
+               if (!msg_part || msg_part->data_len == 0) {
+                       g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                               "missing 'message' part in v3 request");
+                       return FALSE;
+               }
+
+               /* Check for per-part zstd compression */
+               if (msg_part->content_encoding && msg_part->content_encoding_len > 0 &&
+                       rspamd_substring_search_caseless(msg_part->content_encoding,
+                                                                                        msg_part->content_encoding_len,
+                                                                                        "zstd", 4) != -1) {
+                       /* Decompress message */
+                       ZSTD_DStream *zstream;
+                       ZSTD_inBuffer zin;
+                       ZSTD_outBuffer zout;
+                       gsize outlen, r;
+
+                       if (!rspamd_libs_reset_decompression(task->cfg->libs_ctx)) {
+                               g_set_error(&task->err, rspamd_protocol_quark(), 500,
+                                                       "cannot init decompressor");
+                               return FALSE;
+                       }
+
+                       zstream = task->cfg->libs_ctx->in_zstream;
+                       zin.src = msg_part->data;
+                       zin.size = msg_part->data_len;
+                       zin.pos = 0;
+
+                       outlen = ZSTD_getDecompressedSize(msg_part->data, msg_part->data_len);
+                       if (outlen == 0) {
+                               outlen = ZSTD_DStreamOutSize();
+                       }
+
+                       unsigned char *out = (unsigned char *) g_malloc(outlen);
+                       zout.dst = out;
+                       zout.pos = 0;
+                       zout.size = outlen;
+
+                       while (zin.pos < zin.size) {
+                               r = ZSTD_decompressStream(zstream, &zout, &zin);
+
+                               if (ZSTD_isError(r)) {
+                                       g_set_error(&task->err, rspamd_protocol_quark(), 400,
+                                                               "message decompression error: %s",
+                                                               ZSTD_getErrorName(r));
+                                       g_free(out);
+                                       return FALSE;
+                               }
+
+                               if (zout.pos == zout.size) {
+                                       if (zout.size > task->cfg->max_message) {
+                                               g_set_error(&task->err, rspamd_protocol_quark(), 413,
+                                                                       "decompressed message exceeds max_message limit: %lu > %lu",
+                                                                       (unsigned long) zout.size, (unsigned long) task->cfg->max_message);
+                                               g_free(out);
+                                               return FALSE;
+                                       }
+                                       zout.size = zout.size * 2 + 1;
+                                       out = g_realloc(zout.dst, zout.size);
+                                       zout.dst = out;
+                               }
+                       }
+
+                       rspamd_mempool_add_destructor(task->task_pool, g_free, zout.dst);
+                       task->msg.begin = (const char *) zout.dst;
+                       task->msg.len = zout.pos;
+                       task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED;
+
+                       msg_info_protocol("v3: loaded message from zstd compressed part; "
+                                                         "compressed: %ul; uncompressed: %ul",
+                                                         (gulong) zin.size, (gulong) zout.pos);
+               }
+               else {
+                       /* Zero-copy: point directly into the multipart buffer */
+                       task->msg.begin = msg_part->data;
+                       task->msg.len = msg_part->data_len;
+               }
+
+               if (task->msg.len == 0) {
+                       task->flags |= RSPAMD_TASK_FLAG_EMPTY;
+               }
+
+               return TRUE;
+       }
+}
+
+/*
+ * Build a v3 multipart/mixed HTTP reply.
+ */
+void rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg,
+                                                                  struct rspamd_task *task)
+{
+       int flags = RSPAMD_PROTOCOL_DEFAULT | RSPAMD_PROTOCOL_URLS;
+       ucl_object_t *top = rspamd_protocol_write_ucl(task, flags);
+
+       if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
+               if (task->worker->srv->history) {
+                       rspamd_roll_history_update(task->worker->srv->history, task);
+               }
+       }
+
+       rspamd_task_write_log(task);
+
+       /* Determine output format from metadata part's Content-Type or Accept header */
+       const rspamd_ftok_t *accept_hdr = rspamd_task_get_request_header(task, "Accept");
+       int out_type = UCL_EMIT_JSON_COMPACT;
+       const char *result_ctype = "application/json";
+
+       if (accept_hdr && rspamd_substring_search(accept_hdr->begin, accept_hdr->len,
+                                                                                         "application/msgpack",
+                                                                                         sizeof("application/msgpack") - 1) != -1) {
+               out_type = UCL_EMIT_MSGPACK;
+               result_ctype = "application/msgpack";
+       }
+
+       /* Serialize result UCL */
+       rspamd_fstring_t *result_data = rspamd_fstring_sized_new(1000);
+       rspamd_ucl_emit_fstring(top, out_type, &result_data);
+
+       /* Check if client wants compression */
+       gboolean want_compress = FALSE;
+       const rspamd_ftok_t *ae_hdr = rspamd_task_get_request_header(task, "Accept-Encoding");
+       if (ae_hdr && rspamd_substring_search_caseless(ae_hdr->begin, ae_hdr->len,
+                                                                                                  "zstd", 4) != -1) {
+               want_compress = TRUE;
+       }
+
+       /* Build multipart response */
+       struct rspamd_multipart_response_c *resp = rspamd_multipart_response_new();
+
+       rspamd_multipart_response_add_part(resp, "result", result_ctype,
+                                                                          result_data->str, result_data->len,
+                                                                          want_compress);
+
+       /* If message was rewritten, add body part */
+       if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) {
+               const char *body_start = task->msg.begin;
+               gsize body_len = task->msg.len;
+
+               if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
+                       /* For milter, only send the body after headers */
+                       goffset hdr_off = MESSAGE_FIELD(task, raw_headers_content).len;
+
+                       if (hdr_off < (goffset) body_len) {
+                               body_start += hdr_off;
+                               body_len -= hdr_off;
+
+                               if (*body_start == '\r' && body_len > 0) {
+                                       body_start++;
+                                       body_len--;
+                               }
+                               if (*body_start == '\n' && body_len > 0) {
+                                       body_start++;
+                                       body_len--;
+                               }
+                       }
+               }
+
+               rspamd_multipart_response_add_part(resp, "body", "application/octet-stream",
+                                                                                  body_start, body_len, want_compress);
+       }
+
+       /* Get compression stream if needed */
+       void *zstream = NULL;
+       if (want_compress && rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
+               zstream = task->cfg->libs_ctx->out_zstream;
+       }
+
+       rspamd_fstring_t *reply = rspamd_multipart_response_serialize(resp, zstream);
+       const char *ctype = rspamd_multipart_response_content_type(resp);
+
+       /* Set the content type on the HTTP message */
+       rspamd_http_message_add_header(msg, "Content-Type", ctype);
+
+       rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+       rspamd_fstring_free(result_data);
+       rspamd_multipart_response_free(resp);
+
+       /* Update stats */
+       if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) {
+               struct rspamd_scan_result *metric_res = task->result;
+
+               if (metric_res) {
+                       struct rspamd_action *action = rspamd_check_action_metric(task, NULL, NULL);
+
+                       if (action->action_type == METRIC_ACTION_SOFT_REJECT &&
+                               (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) {
+#ifndef HAVE_ATOMIC_BUILTINS
+                               task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++;
+#else
+                               __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST],
+                                                                  1, __ATOMIC_RELEASE);
+#endif
+                       }
+                       else if (action->action_type < METRIC_ACTION_MAX) {
+#ifndef HAVE_ATOMIC_BUILTINS
+                               task->worker->srv->stat->actions_stat[action->action_type]++;
+#else
+                               __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type],
+                                                                  1, __ATOMIC_RELEASE);
+#endif
+                       }
+               }
+
+#ifndef HAVE_ATOMIC_BUILTINS
+               task->worker->srv->stat->messages_scanned++;
+#else
+               __atomic_add_fetch(&task->worker->srv->stat->messages_scanned,
+                                                  1, __ATOMIC_RELEASE);
+#endif
+
+               uint32_t slot;
+               float processing_time = task->time_real_finish - task->task_timestamp;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+               slot = task->worker->srv->stat->avg_time.cur_slot++;
+#else
+               slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot,
+                                                                 1, __ATOMIC_RELEASE);
+#endif
+               slot = slot % MAX_AVG_TIME_SLOTS;
+               task->worker->srv->stat->avg_time.avg_time[slot] = processing_time;
+       }
+}
+
 void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout, struct rspamd_main *srv)
 {
        struct rspamd_http_message *msg;
@@ -2173,6 +2855,12 @@ void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout, st
                        rspamd_protocol_http_reply(msg, task, NULL, out_type);
                        rspamd_protocol_write_log_pipe(task);
                        break;
+               case CMD_CHECK_V3:
+                       rspamd_protocol_http_reply_v3(msg, task);
+                       rspamd_protocol_write_log_pipe(task);
+                       /* Override ctype — it was set by the v3 reply builder via header */
+                       ctype = NULL;
+                       break;
                case CMD_PING:
                        msg_debug_protocol("writing pong to client");
                        rspamd_http_message_set_body(msg, "pong" CRLF, 6);
index 9d2b985da5222b788e37061390a8ab9a0245044c..f6957833aceb318211a3f7631c0efc8672fdf6d8 100644 (file)
@@ -80,6 +80,19 @@ gboolean rspamd_protocol_handle_control(struct rspamd_task *task,
 gboolean rspamd_protocol_handle_request(struct rspamd_task *task,
                                                                                struct rspamd_http_message *msg);
 
+/**
+ * Handle checkv3 multipart/form-data request.
+ * Parses metadata part (JSON/msgpack), sets task fields, and sets message data.
+ * @param task
+ * @param msg HTTP message
+ * @param chunk body data
+ * @param len body length
+ * @return TRUE on success
+ */
+gboolean rspamd_protocol_handle_v3_request(struct rspamd_task *task,
+                                                                                  struct rspamd_http_message *msg,
+                                                                                  const char *chunk, gsize len);
+
 /**
  * Write task results to http message
  * @param msg
@@ -89,6 +102,16 @@ void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
                                                                struct rspamd_task *task, ucl_object_t **pobj,
                                                                int how);
 
+/**
+ * Write checkv3 multipart/mixed reply.
+ * Result part contains JSON/msgpack scan results.
+ * Optional body part contains rewritten message.
+ * @param msg HTTP response message to fill
+ * @param task task object
+ */
+void rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg,
+                                                                  struct rspamd_task *task);
+
 /**
  * Write data to log pipes
  * @param task
index 5582908c2bb743ab3f5e3c21342dce0535d43ea2..319c895194f31f4fb4dbfe273a67ee427b9e95a0 100644 (file)
@@ -31,6 +31,10 @@ extern "C" {
  * Modern check version
  */
 #define MSG_CMD_CHECK_V2 "checkv2"
+/*
+ * Multipart check version
+ */
+#define MSG_CMD_CHECK_V3 "checkv3"
 #define MSG_CMD_SCAN "scan"
 
 /*
index 3ed0ee4e71dfaf496c1f2c3704277c73d79f8667..bfe3d5e3b6da4cad3deb41761cc636ebf03b5709 100644 (file)
@@ -418,7 +418,7 @@ rspamd_task_load_message(struct rspamd_task *task,
        ft = "file";
 #endif
 
-       if (msg) {
+       if (msg && task->cmd != CMD_CHECK_V3) {
                rspamd_protocol_handle_headers(task, msg);
        }
 
index da7ffb5b2272d3c1607bcbf2ecdd2a1148df6388..27fe463076d9494bd971814cbcbfcc43db5f4c9c 100644 (file)
@@ -37,6 +37,7 @@ enum rspamd_command {
        CMD_CHECK_RSPAMC, /* Legacy rspamc format (like SA one) */
        CMD_CHECK,        /* Legacy check - metric json reply */
        CMD_CHECK_V2,     /* Modern check - symbols in json reply  */
+       CMD_CHECK_V3,     /* Multipart check - structured metadata + multipart response */
        CMD_METRICS,
 };
 
@@ -128,7 +129,9 @@ enum rspamd_task_stage {
 #define RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK (1u << 5u)
 /* Emit groups information */
 #define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u)
-#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (6u)
+/* Request is multipart/form-data v3 protocol */
+#define RSPAMD_TASK_PROTOCOL_FLAG_MULTIPART_V3 (1u << 7u)
+#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (7u)
 
 #define RSPAMD_TASK_IS_SKIPPED(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_SKIP))
 #define RSPAMD_TASK_IS_SPAMC(task) (G_UNLIKELY((task)->cmd == CMD_CHECK_SPAMC))
index 683f35b079aab78db26111386b293034ae1c57fb..4c6878a2500eec9f015a6b73bc9a7066d5dba724 100644 (file)
@@ -2335,6 +2335,12 @@ rspamd_proxy_scan_self_reply(struct rspamd_task *task)
                rspamd_protocol_http_reply(msg, task, &rep, out_type);
                rspamd_protocol_write_log_pipe(task);
                break;
+       case CMD_CHECK_V3:
+               rspamd_task_set_finish_time(task);
+               rspamd_protocol_http_reply_v3(msg, task);
+               rspamd_protocol_write_log_pipe(task);
+               ctype = NULL; /* Content-Type set by rspamd_protocol_http_reply_v3 as a header */
+               break;
        case CMD_PING:
                rspamd_http_message_set_body(msg, "pong" CRLF, 6);
                ctype = "text/plain";
@@ -2506,6 +2512,12 @@ rspamd_proxy_self_scan(struct rspamd_proxy_session *session)
                if (task->cmd == CMD_PING || task->cmd == CMD_METRICS) {
                        task->flags |= RSPAMD_TASK_FLAG_SKIP;
                }
+               else if (task->cmd == CMD_CHECK_V3) {
+                       if (!rspamd_protocol_handle_v3_request(task, msg, data, len)) {
+                               msg_err_task("cannot handle v3 request: %e", task->err);
+                               task->flags |= RSPAMD_TASK_FLAG_SKIP;
+                       }
+               }
                else {
                        if (!rspamd_task_load_message(task, msg, data, len)) {
                                msg_err_task("cannot load message: %e", task->err);
index 5fdb2789f4fd6914c0b9eab480a0546de1d6dca0..2d46754d87cb7cc66783ec2e7ea3c57386dccd53 100644 (file)
@@ -175,6 +175,12 @@ rspamd_worker_body_handler(struct rspamd_http_connection *conn,
                if (task->cmd == CMD_PING || task->cmd == CMD_METRICS) {
                        task->flags |= RSPAMD_TASK_FLAG_SKIP;
                }
+               else if (task->cmd == CMD_CHECK_V3) {
+                       if (!rspamd_protocol_handle_v3_request(task, msg, chunk, len)) {
+                               msg_err_task("cannot handle v3 request: %e", task->err);
+                               task->flags |= RSPAMD_TASK_FLAG_SKIP;
+                       }
+               }
                else {
                        if (!rspamd_task_load_message(task, msg, chunk, len)) {
                                msg_err_task("cannot load message: %e", task->err);