]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] checkv3: Accept/Accept-Encoding negotiation
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 6 Jun 2026 14:33:58 +0000 (15:33 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 6 Jun 2026 14:34:23 +0000 (15:34 +0100)
The /checkv3 reply ignored Accept beyond a json-vs-msgpack toggle and
always emitted a hard-coded multipart/mixed body (with form-data part
headers). There was no way to ask for a plain v2-style json/msgpack
body, no true multipart/form-data reply for HTTP multipart parsers,
and Accept-Encoding had no defined default.

Negotiate the representation solely from Accept and compression solely
from Accept-Encoding on the single chokepoint all three workers (normal
scan worker, rspamd_proxy, controller) share, the reply_v3 helper:

  application/json | application/msgpack -> single-body v2 reply
  message/rfc822                         -> multipart/mixed envelope
  multipart/form-data                    -> multipart/form-data envelope
  absent / wildcard                      -> multipart/form-data default
  only unsupported types (e.g. xml)      -> 406 Not Acceptable

Inside the multipart envelopes the result-part serialization mirrors
the input metadata serialization (json or msgpack); the two envelopes
differ only in the top-level Content-Type. Compression honours
Accept-Encoding: zstd and defaults to identity. Vary: Accept,
Accept-Encoding is always advertised. Negotiation reuses the existing
http_content_negotiation parser (q-values + wildcards), extended with
two media types; the input metadata serialization is recorded on the
task via a new protocol flag.

rspamc previously sent Accept: application/json|msgpack for v3, which
now selects a single-body reply it does not expect; it now requests
multipart/form-data and accepts any multipart/ subtype, with the result
serialization carried by the metadata Content-Type.

Tested by a new C++ content_negotiation suite, multipart envelope-mode
unit tests, and a functional negotiation suite run against both the
normal worker and the controller (json/msgpack/email-MIME/HTTP-multipart
parsers). Adds msgpack/requests/requests-toolbelt to functional CI deps.

14 files changed:
.github/workflows/ci_rspamd.yml
src/client/rspamdclient.c
src/libserver/http_content_negotiation.c
src/libserver/http_content_negotiation.h
src/libserver/multipart_response.cxx
src/libserver/multipart_response.h
src/libserver/multipart_response.hxx
src/libserver/protocol.c
src/libserver/task.h
test/functional/cases/001_merged/430_checkv3.robot
test/functional/lib/rspamd.py
test/rspamd_cxx_unit.cxx
test/rspamd_cxx_unit_content_negotiation.hxx [new file with mode: 0644]
test/rspamd_cxx_unit_multipart.hxx

index 07515fad9643b7cb473210b3149bcc1c5cc4a693..5b30d324f5fa47ec9b5e9ad9bfad74946b44b38d 100644 (file)
@@ -82,9 +82,12 @@ jobs:
         # --break-system-packages: Ubuntu 24.04 ships Python 3.12 with PEP 668
         # marking the system interpreter as externally-managed. Older pip
         # versions (Fedora image) don't know the flag, so fall back without it.
+        # msgpack/requests/requests-toolbelt are used by the /checkv3 content
+        # negotiation functional tests (msgpack reply decode, HTTP multipart parse).
         run: |
           pip install --break-system-packages robotframework-pabot \
-            || pip install robotframework-pabot
+            msgpack requests requests-toolbelt \
+            || pip install robotframework-pabot msgpack requests requests-toolbelt
 
       - name: Run functional tests
         # Two phases run concurrently:
index 3573b5c122cefdb3232f07cf4d2d7cac6c7e7061..ec3bb852626781d7a3ed74fd882f493c89b4dbc1 100644 (file)
@@ -587,11 +587,11 @@ rspamd_client_v3_finish_handler(struct rspamd_http_connection *conn,
                }
        }
 
-       /* Check if response is multipart/mixed */
+       /* Check if response is a multipart reply (form-data or mixed envelope) */
        const rspamd_ftok_t *ct = rspamd_http_message_find_header(msg, "Content-Type");
 
        if (ct && rspamd_substring_search_caseless(ct->begin, ct->len,
-                                                                                          "multipart/mixed", sizeof("multipart/mixed") - 1) != -1) {
+                                                                                          "multipart/", sizeof("multipart/") - 1) != -1) {
                /* Parse multipart response to extract result and body */
                /* Extract boundary from Content-Type */
                struct rspamd_content_type *parsed_ct = rspamd_content_type_parse(
@@ -967,13 +967,14 @@ rspamd_client_command_v3(struct rspamd_client_connection *conn,
        rspamd_snprintf(ct_buf, sizeof(ct_buf),
                                        "multipart/form-data; boundary=%s", boundary);
 
-       /* Add Accept headers */
-       if (msgpack) {
-               rspamd_http_message_add_header(req->msg, "Accept", "application/msgpack");
-       }
-       else {
-               rspamd_http_message_add_header(req->msg, "Accept", "application/json");
-       }
+       /*
+        * Request the multipart protocol explicitly. The result-part serialization
+        * (json vs msgpack) is mirrored from the metadata Content-Type we send
+        * above, so the Accept media type only needs to select the envelope, not
+        * the serialization. Asking for application/json|msgpack here would instead
+        * select a single-body v2 reply, which this client does not expect.
+        */
+       rspamd_http_message_add_header(req->msg, "Accept", "multipart/form-data");
        if (compressed) {
                rspamd_http_message_add_header(req->msg, "Accept-Encoding", "zstd");
        }
index 68deb8ca5e216e33956a6286f6bed5164691e688..a20d9fb396782737afad5d8c4d2d9385c177c2ae 100644 (file)
@@ -62,6 +62,18 @@ static const struct rspamd_content_type_mapping content_type_map[] = {
                .type_enum = RSPAMD_HTTP_CTYPE_OCTET_STREAM,
                .ucl_emit_type = -1,
        },
+       {
+               .mime_type = "message/rfc822",
+               .full_type_str = "message/rfc822",
+               .type_enum = RSPAMD_HTTP_CTYPE_MESSAGE_RFC822,
+               .ucl_emit_type = -1,
+       },
+       {
+               .mime_type = "multipart/form-data",
+               .full_type_str = "multipart/form-data",
+               .type_enum = RSPAMD_HTTP_CTYPE_MULTIPART_FORM,
+               .ucl_emit_type = -1,
+       },
        {NULL, NULL, RSPAMD_HTTP_CTYPE_UNKNOWN, -1},
 };
 
index 9f65d1ab917d6990cd6ddb125e06e1a66d06ddfc..6839a97e294ad0cc227c3aae59568de7497890d0 100644 (file)
@@ -30,6 +30,8 @@ enum rspamd_http_content_type {
        RSPAMD_HTTP_CTYPE_OPENMETRICS,
        RSPAMD_HTTP_CTYPE_TEXT_PLAIN,
        RSPAMD_HTTP_CTYPE_OCTET_STREAM,
+       RSPAMD_HTTP_CTYPE_MESSAGE_RFC822,
+       RSPAMD_HTTP_CTYPE_MULTIPART_FORM,
        RSPAMD_HTTP_CTYPE_UNKNOWN
 };
 
index 8143b47084c0dde09c73e408a17a2a695674c863..0cec068ce24398f85625cbb0dc883691a25b6e8e 100644 (file)
@@ -243,7 +243,10 @@ void multipart_response::prepare_iov(void *zstream)
 
 auto multipart_response::content_type() const -> std::string
 {
-       return "multipart/mixed; boundary=\"" + boundary_ + "\"";
+       const char *subtype = envelope_ == multipart_envelope::mixed
+                                                         ? "multipart/mixed"
+                                                         : "multipart/form-data";
+       return std::string(subtype) + "; boundary=\"" + boundary_ + "\"";
 }
 
 }// namespace rspamd::http
@@ -266,6 +269,18 @@ rspamd_multipart_response_new(void)
        return new rspamd_multipart_response_c();
 }
 
+void rspamd_multipart_response_set_envelope(
+       struct rspamd_multipart_response_c *resp,
+       enum rspamd_multipart_envelope_c env)
+{
+       if (!resp) {
+               return;
+       }
+       resp->resp.set_envelope(env == RSPAMD_MULTIPART_ENVELOPE_MIXED
+                                                               ? rspamd::http::multipart_envelope::mixed
+                                                               : rspamd::http::multipart_envelope::form_data);
+}
+
 void rspamd_multipart_response_add_part(
        struct rspamd_multipart_response_c *resp,
        const char *name,
index 62b3422005746f03a10f1b95974b29a0510b7902..4213021637e7977d67b7a96eb7430626c6c29bee 100644 (file)
@@ -29,6 +29,20 @@ struct rspamd_multipart_response_c;
 
 struct rspamd_multipart_response_c *rspamd_multipart_response_new(void);
 
+/* Top-level multipart envelope selector for the response Content-Type */
+enum rspamd_multipart_envelope_c {
+       RSPAMD_MULTIPART_ENVELOPE_FORM_DATA = 0, /* multipart/form-data */
+       RSPAMD_MULTIPART_ENVELOPE_MIXED = 1,     /* multipart/mixed */
+};
+
+/**
+ * Select the top-level multipart subtype reported by the Content-Type.
+ * Part layout is identical for both; only the subtype string differs.
+ */
+void rspamd_multipart_response_set_envelope(
+       struct rspamd_multipart_response_c *resp,
+       enum rspamd_multipart_envelope_c env);
+
 void rspamd_multipart_response_add_part(
        struct rspamd_multipart_response_c *resp,
        const char *name,
index a43c38df9ccd3eb12d403f576bb6b78c59c748b6..100581ea7f97dd315b214b39bcf96649f37b02e6 100644 (file)
@@ -31,6 +31,17 @@ struct response_part {
        bool compress = false;
 };
 
+/*
+ * Top-level envelope type. Both variants use the same part layout
+ * (Content-Disposition: form-data; name=...); only the multipart subtype in
+ * the response Content-Type differs, which is what lets a client pick between
+ * an HTTP form parser (form_data) and a MIME parser (mixed).
+ */
+enum class multipart_envelope {
+       form_data, /* multipart/form-data */
+       mixed,     /* multipart/mixed */
+};
+
 class multipart_response {
 public:
        multipart_response();
@@ -38,6 +49,11 @@ public:
        void add_part(std::string name, std::string content_type,
                                  std::string_view data, bool compress = false);
 
+       void set_envelope(multipart_envelope env)
+       {
+               envelope_ = env;
+       }
+
        /**
         * Serialize the multipart response.
         * @param zstream ZSTD compression stream (may be null if no compression needed)
@@ -78,6 +94,7 @@ public:
 
 private:
        std::string boundary_;
+       multipart_envelope envelope_ = multipart_envelope::form_data;
        std::vector<response_part> parts_;
 
        /* Iov support (populated by prepare_iov) */
index 896e965b8596fced1c43cd742a42f87164e66ad5..e5d18b125d8c4d96d89b9042fe2b8378f0e0cba8 100644 (file)
@@ -31,6 +31,7 @@
 #include "lua/lua_classnames.h"
 #include "multipart_form.h"
 #include "multipart_response.h"
+#include "http_content_negotiation.h"
 #include "libmime/content_type.h"
 #include <math.h>
 
@@ -2919,6 +2920,8 @@ rspamd_protocol_handle_v3_request(struct rspamd_task *task,
                                                                                 metadata_part->content_type_len,
                                                                                 "msgpack",
                                                                                 sizeof("msgpack") - 1) != -1) {
+               /* Remember the input serialization so the reply can mirror it */
+               task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_V3_MSGPACK;
                parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
                ucl_parser_add_chunk_full(parser, (const unsigned char *) metadata_part->data,
                                                                  metadata_part->data_len,
@@ -3111,27 +3114,101 @@ rspamd_protocol_handle_v3_request(struct rspamd_task *task,
 }
 
 /*
- * Build a v3 multipart/mixed HTTP reply.
- * Returns the Content-Type string (allocated on task pool) for use as
- * the mime_type parameter in rspamd_http_connection_write_message.
+ * Build a 406 Not Acceptable reply listing the representations /checkv3 can
+ * produce. Used when the client's Accept matches none of them.
+ */
+static const char *
+rspamd_protocol_v3_not_acceptable(struct rspamd_http_message *msg,
+                                                                 struct rspamd_task *task)
+{
+       static const char body[] =
+               "{\"error\":\"Not Acceptable\",\"supported\":["
+               "\"application/json\",\"application/msgpack\","
+               "\"message/rfc822\",\"multipart/form-data\"]}";
+
+       msg->code = 406;
+       if (msg->status) {
+               rspamd_fstring_free(msg->status);
+       }
+       msg->status = rspamd_fstring_new_init("Not Acceptable", sizeof("Not Acceptable") - 1);
+       rspamd_http_message_set_body(msg, body, sizeof(body) - 1);
+
+       msg_info_task("v3 reply: no acceptable representation for requested Accept");
+
+       return "application/json";
+}
+
+/*
+ * Build a v3 HTTP reply, negotiating the representation from Accept and the
+ * compression from Accept-Encoding (see the negotiation contract above).
+ * Returns the Content-Type string (allocated on the task pool, except for the
+ * static literals) for use as the mime_type argument in
+ * rspamd_http_connection_write_message.
  */
 const char *
 rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg,
                                                          struct rspamd_task *task)
 {
+       /*
+        * Proactive content negotiation. The representation is chosen solely from
+        * the Accept header (never inferred from the request body), and compression
+        * solely from Accept-Encoding. Advertise both so caches behave.
+        */
+       rspamd_http_message_add_header(msg, "Vary", "Accept, Accept-Encoding");
+
+       /*
+        * Supported representations in preference order. MULTIPART_FORM is first so
+        * that an absent Accept or a wildcard media range (catch-all, or the
+        * multipart wildcard) resolves to the multipart/form-data default.
+        */
+       static const enum rspamd_http_content_type desired[] = {
+               RSPAMD_HTTP_CTYPE_MULTIPART_FORM,
+               RSPAMD_HTTP_CTYPE_MESSAGE_RFC822,
+               RSPAMD_HTTP_CTYPE_JSON,
+               RSPAMD_HTTP_CTYPE_MSGPACK,
+               RSPAMD_HTTP_CTYPE_UNKNOWN,
+       };
+
+       const rspamd_ftok_t *accept_hdr = rspamd_task_get_request_header(task, "Accept");
+       enum rspamd_http_content_type rep = RSPAMD_HTTP_CTYPE_MULTIPART_FORM;
+
+       if (accept_hdr && accept_hdr->len > 0) {
+               double quality = 0.0;
+               enum rspamd_http_content_type matched =
+                       rspamd_http_parse_accept_header(accept_hdr, desired, &quality);
+
+               if (matched == RSPAMD_HTTP_CTYPE_UNKNOWN || quality <= 0.0) {
+                       return rspamd_protocol_v3_not_acceptable(msg, task);
+               }
+
+               rep = matched;
+       }
+
+       /*
+        * Single-body (v2-style) representations: delegate to the regular reply
+        * writer, which serializes the result, updates history/stats and the log
+        * pipe internally. There is no place for a rewritten-message part here.
+        */
+       if (rep == RSPAMD_HTTP_CTYPE_JSON || rep == RSPAMD_HTTP_CTYPE_MSGPACK) {
+               int out_type = (rep == RSPAMD_HTTP_CTYPE_MSGPACK) ? UCL_EMIT_MSGPACK
+                                                                                                                 : UCL_EMIT_JSON_COMPACT;
+               rspamd_protocol_http_reply(msg, task, NULL, out_type);
+
+               return (rep == RSPAMD_HTTP_CTYPE_MSGPACK) ? "application/msgpack"
+                                                                                                 : "application/json";
+       }
+
+       /* Multipart representations: form-data (default) or mixed (message/rfc822) */
        int flags = RSPAMD_PROTOCOL_DEFAULT | RSPAMD_PROTOCOL_URLS;
        ucl_object_t *top = rspamd_protocol_write_ucl(task, flags);
 
        rspamd_protocol_update_history_and_log(task);
 
-       /* Determine output format from metadata part's Content-Type or Accept header */
-       const rspamd_ftok_t *accept_hdr = rspamd_task_get_request_header(task, "Accept");
+       /* Inner result serialization mirrors the input metadata serialization */
        int out_type = UCL_EMIT_JSON_COMPACT;
        const char *result_ctype = "application/json";
 
-       if (accept_hdr && rspamd_substring_search(accept_hdr->begin, accept_hdr->len,
-                                                                                         "application/msgpack",
-                                                                                         sizeof("application/msgpack") - 1) != -1) {
+       if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_V3_MSGPACK) {
                out_type = UCL_EMIT_MSGPACK;
                result_ctype = "application/msgpack";
        }
@@ -3140,17 +3217,21 @@ rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg,
        rspamd_fstring_t *result_data = rspamd_fstring_sized_new(1000);
        rspamd_ucl_emit_fstring(top, out_type, &result_data);
 
-       /* Check if client wants compression */
+       /* Compression: honor Accept-Encoding: zstd, otherwise identity */
        gboolean want_compress = FALSE;
        const rspamd_ftok_t *ae_hdr = rspamd_task_get_request_header(task, "Accept-Encoding");
-       if (ae_hdr && rspamd_substring_search_caseless(ae_hdr->begin, ae_hdr->len,
-                                                                                                  "zstd", 4) != -1) {
+       if ((rspamd_http_parse_accept_encoding(ae_hdr) & RSPAMD_HTTP_COMPRESS_ZSTD) != 0) {
                want_compress = TRUE;
        }
 
        /* Build multipart response */
        struct rspamd_multipart_response_c *resp = rspamd_multipart_response_new();
 
+       rspamd_multipart_response_set_envelope(
+               resp,
+               rep == RSPAMD_HTTP_CTYPE_MESSAGE_RFC822 ? RSPAMD_MULTIPART_ENVELOPE_MIXED
+                                                                                               : RSPAMD_MULTIPART_ENVELOPE_FORM_DATA);
+
        rspamd_multipart_response_add_part(resp, "result", result_ctype,
                                                                           result_data->str, result_data->len,
                                                                           want_compress);
index 78cd9319ed69fdc6a963cd277bb4121f24065b71..8225e6293a9201a9c0a244d9366ce4e0ac090e9a 100644 (file)
@@ -131,7 +131,9 @@ enum rspamd_task_stage {
 #define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u)
 /* Request is multipart/form-data v3 protocol */
 #define RSPAMD_TASK_PROTOCOL_FLAG_MULTIPART_V3 (1u << 7u)
-#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (7u)
+/* v3 request metadata part was msgpack-serialized (mirror it in the reply) */
+#define RSPAMD_TASK_PROTOCOL_FLAG_V3_MSGPACK (1u << 8u)
+#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (8u)
 
 #define RSPAMD_TASK_IS_SKIPPED(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_SKIP))
 #define RSPAMD_TASK_IS_SPAMC(task) (G_UNLIKELY((task)->cmd == CMD_CHECK_SPAMC))
index fc046f9b4d825685a2d0f026f1249c4ed6d14366..4d0218f2ec5167d6e072a305f356a999970d8d5c 100644 (file)
@@ -108,3 +108,83 @@ checkv3 via rspamc with metadata-header
   ${result} =  Run Rspamc  -p  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_NORMAL}  --protocol-v3
   ...  --metadata-header=X-V3-Custom=from-rspamc  ${MESSAGE}
   Check Rspamc  ${result}  TEST_V3_META_HEADER (
+
+checkv3 content negotiation on normal worker
+  [Documentation]  Accept / Accept-Encoding content negotiation for /checkv3
+  ...              against the normal scan worker.
+  Run V3 Negotiation Checks  ${RSPAMD_PORT_NORMAL}
+
+checkv3 content negotiation on controller
+  [Documentation]  Same negotiation contract must hold on the controller endpoint.
+  Run V3 Negotiation Checks  ${RSPAMD_PORT_CONTROLLER}
+
+*** Keywords ***
+Run V3 Negotiation Checks
+  [Arguments]  ${port}
+  # 1. No Accept -> multipart/form-data default, Vary advertised, result usable
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  port=${port}  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Start With  ${r}[content_type]  multipart/form-data
+  Should Contain  ${r}[vary]  Accept
+  Should Contain  ${r}[vary]  Accept-Encoding
+  Expect Symbol  GTUBE
+
+  # 2. Accept: application/json -> single JSON body, no multipart parts
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=application/json  port=${port}
+  ...  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Start With  ${r}[content_type]  application/json
+  Should Be Equal  ${r}[parser]  json
+  Should Be Empty  ${r}[parts]
+  Expect Symbol  GTUBE
+
+  # 3. Accept: application/msgpack -> single msgpack body
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=application/msgpack  port=${port}
+  ...  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Start With  ${r}[content_type]  application/msgpack
+  Should Be Equal  ${r}[parser]  msgpack
+  Expect Symbol  GTUBE
+
+  # 4. Accept: message/rfc822 -> multipart/mixed, parseable as MIME
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=message/rfc822  port=${port}
+  ...  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Start With  ${r}[content_type]  multipart/mixed
+  Should Be Equal  ${r}[parser]  mime
+  Should Start With  ${r}[result_ctype]  application/json
+  Expect Symbol  GTUBE
+
+  # 5. Accept: multipart/form-data -> parseable by an HTTP multipart parser
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=multipart/form-data  port=${port}
+  ...  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Start With  ${r}[content_type]  multipart/form-data
+  Should Be Equal  ${r}[parser]  form-data
+  Expect Symbol  GTUBE
+
+  # 6. msgpack metadata, no Accept -> multipart default, result part mirrors input (msgpack)
+  &{meta6} =  Create Dictionary  from=sender@example.com
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  metadata=${meta6}  metadata_format=msgpack
+  ...  port=${port}  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Start With  ${r}[content_type]  multipart/form-data
+  Should Start With  ${r}[result_ctype]  application/msgpack
+  Expect Symbol  GTUBE
+
+  # 7. Accept names only unsupported types -> 406 Not Acceptable
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=application/xml  port=${port}
+  ...  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  406
+
+  # 8. Accept-Encoding: zstd -> parts carry Content-Encoding: zstd
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=multipart/form-data
+  ...  accept_encoding=zstd  port=${port}  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Contain  ${r}[part_encodings]  zstd
+
+  # 8b. No Accept-Encoding -> identity (uncompressed)
+  &{r} =  Scan File V3 Negotiated  ${GTUBE}  accept=multipart/form-data  port=${port}
+  ...  Settings=${SETTINGS_NOSYMBOLS}
+  Should Be Equal As Integers  ${r}[status]  200
+  Should Be Empty  ${r}[part_encodings]
index 2886b9e46741acfc6cb7060c495589be3dc0e934..39bb214b7b151b69f5800c262aff65b7f0c48bbb 100644 (file)
@@ -25,6 +25,7 @@
 #  limitations under the License.
 
 from urllib.request import urlopen
+import email
 import glob
 import grp
 import http.client
@@ -442,6 +443,166 @@ def Scan_File_V3_Single_Part(part_name, part_data, content_type_part=None, **hea
     return status
 
 
+def _build_multipart_meta(boundary, meta_bytes, meta_ctype, message_bytes):
+    """multipart/form-data body with an explicit metadata Content-Type."""
+    if isinstance(message_bytes, str):
+        message_bytes = message_bytes.encode('utf-8')
+    body = b""
+    body += ("--" + boundary + "\r\n").encode()
+    body += b'Content-Disposition: form-data; name="metadata"\r\n'
+    body += ("Content-Type: %s\r\n\r\n" % meta_ctype).encode()
+    body += meta_bytes + b"\r\n"
+    body += ("--" + boundary + "\r\n").encode()
+    body += b'Content-Disposition: form-data; name="message"\r\n\r\n'
+    body += message_bytes + b"\r\n"
+    body += ("--" + boundary + "--\r\n").encode()
+    return body
+
+
+def _v3_disposition_name(content_disposition):
+    m = re.search(r'name="?([^";]+)"?', content_disposition or "")
+    return m.group(1) if m else None
+
+
+def _v3_parts_form_data(body, content_type):
+    """Parse a multipart/form-data reply with an HTTP multipart parser.
+
+    Uses requests_toolbelt (not a MIME parser) to prove the reply is consumable
+    by standard HTTP multipart tooling.
+    """
+    from requests_toolbelt.multipart.decoder import MultipartDecoder
+    dec = MultipartDecoder(body, content_type)
+    parts = []
+    for part in dec.parts:
+        hdrs = {k.decode().lower(): v.decode() for k, v in part.headers.items()}
+        parts.append({
+            "name": _v3_disposition_name(hdrs.get("content-disposition", "")),
+            "ctype": hdrs.get("content-type", ""),
+            "encoding": hdrs.get("content-encoding", ""),
+            "data": part.content,
+        })
+    return parts
+
+
+def _v3_parts_mime(body, content_type):
+    """Parse a multipart/mixed reply with the stdlib MIME parser (email)."""
+    full = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body
+    msg = email.message_from_bytes(full)
+    parts = []
+    for part in msg.walk():
+        if part.get_content_maintype() == "multipart":
+            continue
+        parts.append({
+            "name": part.get_param("name", header="content-disposition"),
+            "ctype": part.get_content_type(),
+            "encoding": part.get("Content-Encoding", "") or "",
+            "data": part.get_payload(decode=True),
+        })
+    return parts
+
+
+def _v3_decode_result(part):
+    """Decode a 'result' part's payload into a dict per its Content-Type."""
+    if not part or part.get("encoding"):
+        # Compressed payloads are not decoded here (zstd has no stdlib codec)
+        return None
+    data = part["data"]
+    if "msgpack" in (part["ctype"] or ""):
+        import msgpack
+        return msgpack.unpackb(data, raw=False)
+    return json.loads(data)
+
+
+def Scan_File_V3_Negotiated(filename, accept=None, accept_encoding=None,
+                            port=None, metadata=None, metadata_format="json",
+                            **headers):
+    """Send /checkv3 with explicit Accept / Accept-Encoding and parse the reply.
+
+    Sets ${SCAN_RESULT} to the parsed scan result (when the reply carries one,
+    i.e. not a 406) so the usual Expect Symbol/Action keywords work. Returns a
+    dict describing the negotiated reply: status, content_type, vary, parser,
+    result_ctype, parts (name -> content-type), part_encodings.
+    """
+    addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}")
+    if port is None:
+        port = BuiltIn().get_variable_value("${RSPAMD_PORT_NORMAL}")
+
+    meta = metadata if metadata else {}
+    if metadata_format == "msgpack":
+        import msgpack
+        meta_bytes = msgpack.packb(meta)
+        meta_ctype = "application/msgpack"
+    else:
+        meta_bytes = json.dumps(meta).encode('utf-8')
+        meta_ctype = "application/json"
+
+    message_data = open(filename, "rb").read()
+    boundary = "----rspamd-test-%016x" % random.getrandbits(64)
+    body = _build_multipart_meta(boundary, meta_bytes, meta_ctype, message_data)
+
+    headers["Content-Type"] = "multipart/form-data; boundary=" + boundary
+    if accept is not None:
+        headers["Accept"] = accept
+    if accept_encoding is not None:
+        headers["Accept-Encoding"] = accept_encoding
+    if "Queue-Id" not in headers:
+        headers["Queue-Id"] = BuiltIn().get_variable_value("${TEST_NAME}")
+
+    c = http.client.HTTPConnection("%s:%s" % (addr, port))
+    c.request("POST", "/checkv3", body, headers)
+    r = c.getresponse()
+    resp_body = r.read()
+    ct = r.getheader("Content-Type", "") or ""
+    vary = r.getheader("Vary", "") or ""
+    status = r.status
+    c.close()
+
+    info = {
+        "status": status,
+        "content_type": ct,
+        "vary": vary,
+        "parser": "none",
+        "result_ctype": "",
+        "parts": {},
+        "part_encodings": [],
+    }
+
+    if status != 200:
+        return info
+
+    result = None
+    if ct.startswith("application/json"):
+        info["parser"] = "json"
+        info["result_ctype"] = "application/json"
+        result = json.loads(resp_body)
+    elif ct.startswith("application/msgpack"):
+        import msgpack
+        info["parser"] = "msgpack"
+        info["result_ctype"] = "application/msgpack"
+        result = msgpack.unpackb(resp_body, raw=False)
+    elif ct.startswith("multipart/mixed"):
+        info["parser"] = "mime"
+        parts = _v3_parts_mime(resp_body, ct)
+        info["parts"] = {p["name"]: p["ctype"] for p in parts}
+        info["part_encodings"] = [p["encoding"] for p in parts if p["encoding"]]
+        rp = next((p for p in parts if p["name"] == "result"), None)
+        info["result_ctype"] = rp["ctype"] if rp else ""
+        result = _v3_decode_result(rp)
+    elif ct.startswith("multipart/form-data"):
+        info["parser"] = "form-data"
+        parts = _v3_parts_form_data(resp_body, ct)
+        info["parts"] = {p["name"]: p["ctype"] for p in parts}
+        info["part_encodings"] = [p["encoding"] for p in parts if p["encoding"]]
+        rp = next((p for p in parts if p["name"] == "result"), None)
+        info["result_ctype"] = rp["ctype"] if rp else ""
+        result = _v3_decode_result(rp)
+
+    if result is not None:
+        BuiltIn().set_test_variable("${SCAN_RESULT}", result)
+
+    return info
+
+
 def Scan_File_SSL(filename, port=None, **headers):
     """Like Scan_File but over HTTPS (TLS) to the normal worker SSL port."""
     addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}")
index 4ab10c73c795cc7f489d8176ea3cd7576b49e4b7..94afedfa86d353547bc3a8cc2916cf9bf69d2fe2 100644 (file)
@@ -36,6 +36,7 @@
 #include "rspamd_cxx_unit_upstream_latency.hxx"
 #include "rspamd_cxx_unit_upstream_srv.hxx"
 #include "rspamd_cxx_unit_multipart.hxx"
+#include "rspamd_cxx_unit_content_negotiation.hxx"
 #include "rspamd_cxx_unit_settings_merge.hxx"
 #include "rspamd_cxx_unit_fpconv.hxx"
 
diff --git a/test/rspamd_cxx_unit_content_negotiation.hxx b/test/rspamd_cxx_unit_content_negotiation.hxx
new file mode 100644 (file)
index 0000000..f8841d6
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2026 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_CXX_UNIT_CONTENT_NEGOTIATION_HXX
+#define RSPAMD_CXX_UNIT_CONTENT_NEGOTIATION_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include "libserver/http_content_negotiation.h"
+
+#include <cstring>
+
+namespace {
+rspamd_ftok_t cn_tok(const char *s)
+{
+       rspamd_ftok_t t;
+       t.begin = s;
+       t.len = s ? strlen(s) : 0;
+       return t;
+}
+
+/* Same preference order the /checkv3 reply uses */
+const enum rspamd_http_content_type cn_v3_desired[] = {
+       RSPAMD_HTTP_CTYPE_MULTIPART_FORM,
+       RSPAMD_HTTP_CTYPE_MESSAGE_RFC822,
+       RSPAMD_HTTP_CTYPE_JSON,
+       RSPAMD_HTTP_CTYPE_MSGPACK,
+       RSPAMD_HTTP_CTYPE_UNKNOWN,
+};
+
+enum rspamd_http_content_type cn_match(const char *accept)
+{
+       rspamd_ftok_t tok = cn_tok(accept);
+       return rspamd_http_parse_accept_header(&tok, cn_v3_desired, nullptr);
+}
+}// namespace
+
+TEST_SUITE("content_negotiation")
+{
+       TEST_CASE("explicit media types map to their representation")
+       {
+               CHECK(cn_match("application/json") == RSPAMD_HTTP_CTYPE_JSON);
+               CHECK(cn_match("application/msgpack") == RSPAMD_HTTP_CTYPE_MSGPACK);
+               CHECK(cn_match("message/rfc822") == RSPAMD_HTTP_CTYPE_MESSAGE_RFC822);
+               CHECK(cn_match("multipart/form-data") == RSPAMD_HTTP_CTYPE_MULTIPART_FORM);
+       }
+
+       TEST_CASE("wildcards resolve to the first desired (multipart/form-data)")
+       {
+               CHECK(cn_match("*/*") == RSPAMD_HTTP_CTYPE_MULTIPART_FORM);
+               CHECK(cn_match("multipart/*") == RSPAMD_HTTP_CTYPE_MULTIPART_FORM);
+       }
+
+       TEST_CASE("type wildcard picks the matching subtype family")
+       {
+               /* an application type-wildcard should match a desired application
+                * subtype (json comes first) */
+               CHECK(cn_match("application/*") == RSPAMD_HTTP_CTYPE_JSON);
+       }
+
+       TEST_CASE("unsupported media type yields UNKNOWN (caller maps to 406)")
+       {
+               CHECK(cn_match("application/xml") == RSPAMD_HTTP_CTYPE_UNKNOWN);
+               CHECK(cn_match("text/html") == RSPAMD_HTTP_CTYPE_UNKNOWN);
+       }
+
+       TEST_CASE("empty / null Accept yields UNKNOWN (caller uses default)")
+       {
+               rspamd_ftok_t empty = cn_tok("");
+               CHECK(rspamd_http_parse_accept_header(&empty, cn_v3_desired, nullptr) ==
+                         RSPAMD_HTTP_CTYPE_UNKNOWN);
+               CHECK(rspamd_http_parse_accept_header(nullptr, cn_v3_desired, nullptr) ==
+                         RSPAMD_HTTP_CTYPE_UNKNOWN);
+       }
+
+       TEST_CASE("q-values select the highest-quality acceptable type")
+       {
+               CHECK(cn_match("application/json;q=0.3, multipart/form-data;q=0.9") ==
+                         RSPAMD_HTTP_CTYPE_MULTIPART_FORM);
+               CHECK(cn_match("application/json;q=0.9, multipart/form-data;q=0.3") ==
+                         RSPAMD_HTTP_CTYPE_JSON);
+       }
+
+       TEST_CASE("browser-style Accept falls back to the wildcard default")
+       {
+               CHECK(cn_match("text/html, application/xhtml+xml, */*;q=0.8") ==
+                         RSPAMD_HTTP_CTYPE_MULTIPART_FORM);
+       }
+
+       TEST_CASE("Accept-Encoding zstd detection")
+       {
+               rspamd_ftok_t zstd = cn_tok("zstd");
+               rspamd_ftok_t gzip = cn_tok("gzip");
+               rspamd_ftok_t both = cn_tok("gzip, zstd");
+
+               CHECK((rspamd_http_parse_accept_encoding(&zstd) & RSPAMD_HTTP_COMPRESS_ZSTD) != 0);
+               CHECK((rspamd_http_parse_accept_encoding(&gzip) & RSPAMD_HTTP_COMPRESS_ZSTD) == 0);
+               CHECK((rspamd_http_parse_accept_encoding(&both) & RSPAMD_HTTP_COMPRESS_ZSTD) != 0);
+               CHECK(rspamd_http_parse_accept_encoding(nullptr) == RSPAMD_HTTP_COMPRESS_NONE);
+       }
+}
+
+#endif// RSPAMD_CXX_UNIT_CONTENT_NEGOTIATION_HXX
index 8455a7860090979dbe6426f8010b24c2de9e0e66..5f4d5aa99ca90b97d9f1288db162bbaa78252b3b 100644 (file)
@@ -446,11 +446,48 @@ TEST_SUITE("multipart_response")
                rspamd::http::multipart_response resp;
                auto ct = resp.content_type();
 
-               CHECK(ct.find("multipart/mixed") != std::string::npos);
+               /* form_data is the default envelope */
+               CHECK(ct.find("multipart/form-data") != std::string::npos);
                CHECK(ct.find("boundary=\"") != std::string::npos);
                CHECK(ct.find(std::string(resp.get_boundary())) != std::string::npos);
        }
 
+       TEST_CASE("envelope controls multipart subtype")
+       {
+               rspamd::http::multipart_response resp;
+
+               /* Default is form-data */
+               CHECK(resp.content_type().find("multipart/form-data") != std::string::npos);
+
+               resp.set_envelope(rspamd::http::multipart_envelope::mixed);
+               auto mixed = resp.content_type();
+               CHECK(mixed.find("multipart/mixed") != std::string::npos);
+               CHECK(mixed.find("multipart/form-data") == std::string::npos);
+               CHECK(mixed.find("boundary=\"") != std::string::npos);
+
+               resp.set_envelope(rspamd::http::multipart_envelope::form_data);
+               CHECK(resp.content_type().find("multipart/form-data") != std::string::npos);
+       }
+
+       TEST_CASE("envelope does not change part layout")
+       {
+               /* Both envelopes keep the form-data part headers; only the top-level
+                * subtype differs. */
+               std::string data = "{\"action\":\"reject\"}";
+
+               rspamd::http::multipart_response form;
+               form.add_part("result", "application/json", data);
+               auto form_body = form.serialize();
+
+               rspamd::http::multipart_response mixed;
+               mixed.set_envelope(rspamd::http::multipart_envelope::mixed);
+               mixed.add_part("result", "application/json", data);
+               auto mixed_body = mixed.serialize();
+
+               CHECK(form_body.find("Content-Disposition: form-data; name=\"result\"") != std::string::npos);
+               CHECK(mixed_body.find("Content-Disposition: form-data; name=\"result\"") != std::string::npos);
+       }
+
        TEST_CASE("unique boundaries")
        {
                rspamd::http::multipart_response resp1;