From: Vsevolod Stakhov Date: Sat, 6 Jun 2026 14:33:58 +0000 (+0100) Subject: [Feature] checkv3: Accept/Accept-Encoding negotiation X-Git-Tag: 4.1.1~30^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6fa991b5f1adc040df527582785de9fc2d0bfeaf;p=thirdparty%2Frspamd.git [Feature] checkv3: Accept/Accept-Encoding negotiation The /checkv3 reply ignored Accept beyond a json-vs-msgpack toggle and always emitted a hard-coded multipart/mixed body (with form-data part headers). There was no way to ask for a plain v2-style json/msgpack body, no true multipart/form-data reply for HTTP multipart parsers, and Accept-Encoding had no defined default. Negotiate the representation solely from Accept and compression solely from Accept-Encoding on the single chokepoint all three workers (normal scan worker, rspamd_proxy, controller) share, the reply_v3 helper: application/json | application/msgpack -> single-body v2 reply message/rfc822 -> multipart/mixed envelope multipart/form-data -> multipart/form-data envelope absent / wildcard -> multipart/form-data default only unsupported types (e.g. xml) -> 406 Not Acceptable Inside the multipart envelopes the result-part serialization mirrors the input metadata serialization (json or msgpack); the two envelopes differ only in the top-level Content-Type. Compression honours Accept-Encoding: zstd and defaults to identity. Vary: Accept, Accept-Encoding is always advertised. Negotiation reuses the existing http_content_negotiation parser (q-values + wildcards), extended with two media types; the input metadata serialization is recorded on the task via a new protocol flag. rspamc previously sent Accept: application/json|msgpack for v3, which now selects a single-body reply it does not expect; it now requests multipart/form-data and accepts any multipart/ subtype, with the result serialization carried by the metadata Content-Type. Tested by a new C++ content_negotiation suite, multipart envelope-mode unit tests, and a functional negotiation suite run against both the normal worker and the controller (json/msgpack/email-MIME/HTTP-multipart parsers). Adds msgpack/requests/requests-toolbelt to functional CI deps. --- diff --git a/.github/workflows/ci_rspamd.yml b/.github/workflows/ci_rspamd.yml index 07515fad96..5b30d324f5 100644 --- a/.github/workflows/ci_rspamd.yml +++ b/.github/workflows/ci_rspamd.yml @@ -82,9 +82,12 @@ jobs: # --break-system-packages: Ubuntu 24.04 ships Python 3.12 with PEP 668 # marking the system interpreter as externally-managed. Older pip # versions (Fedora image) don't know the flag, so fall back without it. + # msgpack/requests/requests-toolbelt are used by the /checkv3 content + # negotiation functional tests (msgpack reply decode, HTTP multipart parse). run: | pip install --break-system-packages robotframework-pabot \ - || pip install robotframework-pabot + msgpack requests requests-toolbelt \ + || pip install robotframework-pabot msgpack requests requests-toolbelt - name: Run functional tests # Two phases run concurrently: diff --git a/src/client/rspamdclient.c b/src/client/rspamdclient.c index 3573b5c122..ec3bb85262 100644 --- a/src/client/rspamdclient.c +++ b/src/client/rspamdclient.c @@ -587,11 +587,11 @@ rspamd_client_v3_finish_handler(struct rspamd_http_connection *conn, } } - /* Check if response is multipart/mixed */ + /* Check if response is a multipart reply (form-data or mixed envelope) */ const rspamd_ftok_t *ct = rspamd_http_message_find_header(msg, "Content-Type"); if (ct && rspamd_substring_search_caseless(ct->begin, ct->len, - "multipart/mixed", sizeof("multipart/mixed") - 1) != -1) { + "multipart/", sizeof("multipart/") - 1) != -1) { /* Parse multipart response to extract result and body */ /* Extract boundary from Content-Type */ struct rspamd_content_type *parsed_ct = rspamd_content_type_parse( @@ -967,13 +967,14 @@ rspamd_client_command_v3(struct rspamd_client_connection *conn, rspamd_snprintf(ct_buf, sizeof(ct_buf), "multipart/form-data; boundary=%s", boundary); - /* Add Accept headers */ - if (msgpack) { - rspamd_http_message_add_header(req->msg, "Accept", "application/msgpack"); - } - else { - rspamd_http_message_add_header(req->msg, "Accept", "application/json"); - } + /* + * Request the multipart protocol explicitly. The result-part serialization + * (json vs msgpack) is mirrored from the metadata Content-Type we send + * above, so the Accept media type only needs to select the envelope, not + * the serialization. Asking for application/json|msgpack here would instead + * select a single-body v2 reply, which this client does not expect. + */ + rspamd_http_message_add_header(req->msg, "Accept", "multipart/form-data"); if (compressed) { rspamd_http_message_add_header(req->msg, "Accept-Encoding", "zstd"); } diff --git a/src/libserver/http_content_negotiation.c b/src/libserver/http_content_negotiation.c index 68deb8ca5e..a20d9fb396 100644 --- a/src/libserver/http_content_negotiation.c +++ b/src/libserver/http_content_negotiation.c @@ -62,6 +62,18 @@ static const struct rspamd_content_type_mapping content_type_map[] = { .type_enum = RSPAMD_HTTP_CTYPE_OCTET_STREAM, .ucl_emit_type = -1, }, + { + .mime_type = "message/rfc822", + .full_type_str = "message/rfc822", + .type_enum = RSPAMD_HTTP_CTYPE_MESSAGE_RFC822, + .ucl_emit_type = -1, + }, + { + .mime_type = "multipart/form-data", + .full_type_str = "multipart/form-data", + .type_enum = RSPAMD_HTTP_CTYPE_MULTIPART_FORM, + .ucl_emit_type = -1, + }, {NULL, NULL, RSPAMD_HTTP_CTYPE_UNKNOWN, -1}, }; diff --git a/src/libserver/http_content_negotiation.h b/src/libserver/http_content_negotiation.h index 9f65d1ab91..6839a97e29 100644 --- a/src/libserver/http_content_negotiation.h +++ b/src/libserver/http_content_negotiation.h @@ -30,6 +30,8 @@ enum rspamd_http_content_type { RSPAMD_HTTP_CTYPE_OPENMETRICS, RSPAMD_HTTP_CTYPE_TEXT_PLAIN, RSPAMD_HTTP_CTYPE_OCTET_STREAM, + RSPAMD_HTTP_CTYPE_MESSAGE_RFC822, + RSPAMD_HTTP_CTYPE_MULTIPART_FORM, RSPAMD_HTTP_CTYPE_UNKNOWN }; diff --git a/src/libserver/multipart_response.cxx b/src/libserver/multipart_response.cxx index 8143b47084..0cec068ce2 100644 --- a/src/libserver/multipart_response.cxx +++ b/src/libserver/multipart_response.cxx @@ -243,7 +243,10 @@ void multipart_response::prepare_iov(void *zstream) auto multipart_response::content_type() const -> std::string { - return "multipart/mixed; boundary=\"" + boundary_ + "\""; + const char *subtype = envelope_ == multipart_envelope::mixed + ? "multipart/mixed" + : "multipart/form-data"; + return std::string(subtype) + "; boundary=\"" + boundary_ + "\""; } }// namespace rspamd::http @@ -266,6 +269,18 @@ rspamd_multipart_response_new(void) return new rspamd_multipart_response_c(); } +void rspamd_multipart_response_set_envelope( + struct rspamd_multipart_response_c *resp, + enum rspamd_multipart_envelope_c env) +{ + if (!resp) { + return; + } + resp->resp.set_envelope(env == RSPAMD_MULTIPART_ENVELOPE_MIXED + ? rspamd::http::multipart_envelope::mixed + : rspamd::http::multipart_envelope::form_data); +} + void rspamd_multipart_response_add_part( struct rspamd_multipart_response_c *resp, const char *name, diff --git a/src/libserver/multipart_response.h b/src/libserver/multipart_response.h index 62b3422005..4213021637 100644 --- a/src/libserver/multipart_response.h +++ b/src/libserver/multipart_response.h @@ -29,6 +29,20 @@ struct rspamd_multipart_response_c; struct rspamd_multipart_response_c *rspamd_multipart_response_new(void); +/* Top-level multipart envelope selector for the response Content-Type */ +enum rspamd_multipart_envelope_c { + RSPAMD_MULTIPART_ENVELOPE_FORM_DATA = 0, /* multipart/form-data */ + RSPAMD_MULTIPART_ENVELOPE_MIXED = 1, /* multipart/mixed */ +}; + +/** + * Select the top-level multipart subtype reported by the Content-Type. + * Part layout is identical for both; only the subtype string differs. + */ +void rspamd_multipart_response_set_envelope( + struct rspamd_multipart_response_c *resp, + enum rspamd_multipart_envelope_c env); + void rspamd_multipart_response_add_part( struct rspamd_multipart_response_c *resp, const char *name, diff --git a/src/libserver/multipart_response.hxx b/src/libserver/multipart_response.hxx index a43c38df9c..100581ea7f 100644 --- a/src/libserver/multipart_response.hxx +++ b/src/libserver/multipart_response.hxx @@ -31,6 +31,17 @@ struct response_part { bool compress = false; }; +/* + * Top-level envelope type. Both variants use the same part layout + * (Content-Disposition: form-data; name=...); only the multipart subtype in + * the response Content-Type differs, which is what lets a client pick between + * an HTTP form parser (form_data) and a MIME parser (mixed). + */ +enum class multipart_envelope { + form_data, /* multipart/form-data */ + mixed, /* multipart/mixed */ +}; + class multipart_response { public: multipart_response(); @@ -38,6 +49,11 @@ public: void add_part(std::string name, std::string content_type, std::string_view data, bool compress = false); + void set_envelope(multipart_envelope env) + { + envelope_ = env; + } + /** * Serialize the multipart response. * @param zstream ZSTD compression stream (may be null if no compression needed) @@ -78,6 +94,7 @@ public: private: std::string boundary_; + multipart_envelope envelope_ = multipart_envelope::form_data; std::vector parts_; /* Iov support (populated by prepare_iov) */ diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 896e965b85..e5d18b125d 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -31,6 +31,7 @@ #include "lua/lua_classnames.h" #include "multipart_form.h" #include "multipart_response.h" +#include "http_content_negotiation.h" #include "libmime/content_type.h" #include @@ -2919,6 +2920,8 @@ rspamd_protocol_handle_v3_request(struct rspamd_task *task, metadata_part->content_type_len, "msgpack", sizeof("msgpack") - 1) != -1) { + /* Remember the input serialization so the reply can mirror it */ + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_V3_MSGPACK; parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); ucl_parser_add_chunk_full(parser, (const unsigned char *) metadata_part->data, metadata_part->data_len, @@ -3111,27 +3114,101 @@ rspamd_protocol_handle_v3_request(struct rspamd_task *task, } /* - * Build a v3 multipart/mixed HTTP reply. - * Returns the Content-Type string (allocated on task pool) for use as - * the mime_type parameter in rspamd_http_connection_write_message. + * Build a 406 Not Acceptable reply listing the representations /checkv3 can + * produce. Used when the client's Accept matches none of them. + */ +static const char * +rspamd_protocol_v3_not_acceptable(struct rspamd_http_message *msg, + struct rspamd_task *task) +{ + static const char body[] = + "{\"error\":\"Not Acceptable\",\"supported\":[" + "\"application/json\",\"application/msgpack\"," + "\"message/rfc822\",\"multipart/form-data\"]}"; + + msg->code = 406; + if (msg->status) { + rspamd_fstring_free(msg->status); + } + msg->status = rspamd_fstring_new_init("Not Acceptable", sizeof("Not Acceptable") - 1); + rspamd_http_message_set_body(msg, body, sizeof(body) - 1); + + msg_info_task("v3 reply: no acceptable representation for requested Accept"); + + return "application/json"; +} + +/* + * Build a v3 HTTP reply, negotiating the representation from Accept and the + * compression from Accept-Encoding (see the negotiation contract above). + * Returns the Content-Type string (allocated on the task pool, except for the + * static literals) for use as the mime_type argument in + * rspamd_http_connection_write_message. */ const char * rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg, struct rspamd_task *task) { + /* + * Proactive content negotiation. The representation is chosen solely from + * the Accept header (never inferred from the request body), and compression + * solely from Accept-Encoding. Advertise both so caches behave. + */ + rspamd_http_message_add_header(msg, "Vary", "Accept, Accept-Encoding"); + + /* + * Supported representations in preference order. MULTIPART_FORM is first so + * that an absent Accept or a wildcard media range (catch-all, or the + * multipart wildcard) resolves to the multipart/form-data default. + */ + static const enum rspamd_http_content_type desired[] = { + RSPAMD_HTTP_CTYPE_MULTIPART_FORM, + RSPAMD_HTTP_CTYPE_MESSAGE_RFC822, + RSPAMD_HTTP_CTYPE_JSON, + RSPAMD_HTTP_CTYPE_MSGPACK, + RSPAMD_HTTP_CTYPE_UNKNOWN, + }; + + const rspamd_ftok_t *accept_hdr = rspamd_task_get_request_header(task, "Accept"); + enum rspamd_http_content_type rep = RSPAMD_HTTP_CTYPE_MULTIPART_FORM; + + if (accept_hdr && accept_hdr->len > 0) { + double quality = 0.0; + enum rspamd_http_content_type matched = + rspamd_http_parse_accept_header(accept_hdr, desired, &quality); + + if (matched == RSPAMD_HTTP_CTYPE_UNKNOWN || quality <= 0.0) { + return rspamd_protocol_v3_not_acceptable(msg, task); + } + + rep = matched; + } + + /* + * Single-body (v2-style) representations: delegate to the regular reply + * writer, which serializes the result, updates history/stats and the log + * pipe internally. There is no place for a rewritten-message part here. + */ + if (rep == RSPAMD_HTTP_CTYPE_JSON || rep == RSPAMD_HTTP_CTYPE_MSGPACK) { + int out_type = (rep == RSPAMD_HTTP_CTYPE_MSGPACK) ? UCL_EMIT_MSGPACK + : UCL_EMIT_JSON_COMPACT; + rspamd_protocol_http_reply(msg, task, NULL, out_type); + + return (rep == RSPAMD_HTTP_CTYPE_MSGPACK) ? "application/msgpack" + : "application/json"; + } + + /* Multipart representations: form-data (default) or mixed (message/rfc822) */ int flags = RSPAMD_PROTOCOL_DEFAULT | RSPAMD_PROTOCOL_URLS; ucl_object_t *top = rspamd_protocol_write_ucl(task, flags); rspamd_protocol_update_history_and_log(task); - /* Determine output format from metadata part's Content-Type or Accept header */ - const rspamd_ftok_t *accept_hdr = rspamd_task_get_request_header(task, "Accept"); + /* Inner result serialization mirrors the input metadata serialization */ int out_type = UCL_EMIT_JSON_COMPACT; const char *result_ctype = "application/json"; - if (accept_hdr && rspamd_substring_search(accept_hdr->begin, accept_hdr->len, - "application/msgpack", - sizeof("application/msgpack") - 1) != -1) { + if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_V3_MSGPACK) { out_type = UCL_EMIT_MSGPACK; result_ctype = "application/msgpack"; } @@ -3140,17 +3217,21 @@ rspamd_protocol_http_reply_v3(struct rspamd_http_message *msg, rspamd_fstring_t *result_data = rspamd_fstring_sized_new(1000); rspamd_ucl_emit_fstring(top, out_type, &result_data); - /* Check if client wants compression */ + /* Compression: honor Accept-Encoding: zstd, otherwise identity */ gboolean want_compress = FALSE; const rspamd_ftok_t *ae_hdr = rspamd_task_get_request_header(task, "Accept-Encoding"); - if (ae_hdr && rspamd_substring_search_caseless(ae_hdr->begin, ae_hdr->len, - "zstd", 4) != -1) { + if ((rspamd_http_parse_accept_encoding(ae_hdr) & RSPAMD_HTTP_COMPRESS_ZSTD) != 0) { want_compress = TRUE; } /* Build multipart response */ struct rspamd_multipart_response_c *resp = rspamd_multipart_response_new(); + rspamd_multipart_response_set_envelope( + resp, + rep == RSPAMD_HTTP_CTYPE_MESSAGE_RFC822 ? RSPAMD_MULTIPART_ENVELOPE_MIXED + : RSPAMD_MULTIPART_ENVELOPE_FORM_DATA); + rspamd_multipart_response_add_part(resp, "result", result_ctype, result_data->str, result_data->len, want_compress); diff --git a/src/libserver/task.h b/src/libserver/task.h index 78cd9319ed..8225e6293a 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -131,7 +131,9 @@ enum rspamd_task_stage { #define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u) /* Request is multipart/form-data v3 protocol */ #define RSPAMD_TASK_PROTOCOL_FLAG_MULTIPART_V3 (1u << 7u) -#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (7u) +/* v3 request metadata part was msgpack-serialized (mirror it in the reply) */ +#define RSPAMD_TASK_PROTOCOL_FLAG_V3_MSGPACK (1u << 8u) +#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (8u) #define RSPAMD_TASK_IS_SKIPPED(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_SKIP)) #define RSPAMD_TASK_IS_SPAMC(task) (G_UNLIKELY((task)->cmd == CMD_CHECK_SPAMC)) diff --git a/test/functional/cases/001_merged/430_checkv3.robot b/test/functional/cases/001_merged/430_checkv3.robot index fc046f9b4d..4d0218f2ec 100644 --- a/test/functional/cases/001_merged/430_checkv3.robot +++ b/test/functional/cases/001_merged/430_checkv3.robot @@ -108,3 +108,83 @@ checkv3 via rspamc with metadata-header ${result} = Run Rspamc -p -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_NORMAL} --protocol-v3 ... --metadata-header=X-V3-Custom=from-rspamc ${MESSAGE} Check Rspamc ${result} TEST_V3_META_HEADER ( + +checkv3 content negotiation on normal worker + [Documentation] Accept / Accept-Encoding content negotiation for /checkv3 + ... against the normal scan worker. + Run V3 Negotiation Checks ${RSPAMD_PORT_NORMAL} + +checkv3 content negotiation on controller + [Documentation] Same negotiation contract must hold on the controller endpoint. + Run V3 Negotiation Checks ${RSPAMD_PORT_CONTROLLER} + +*** Keywords *** +Run V3 Negotiation Checks + [Arguments] ${port} + # 1. No Accept -> multipart/form-data default, Vary advertised, result usable + &{r} = Scan File V3 Negotiated ${GTUBE} port=${port} Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Start With ${r}[content_type] multipart/form-data + Should Contain ${r}[vary] Accept + Should Contain ${r}[vary] Accept-Encoding + Expect Symbol GTUBE + + # 2. Accept: application/json -> single JSON body, no multipart parts + &{r} = Scan File V3 Negotiated ${GTUBE} accept=application/json port=${port} + ... Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Start With ${r}[content_type] application/json + Should Be Equal ${r}[parser] json + Should Be Empty ${r}[parts] + Expect Symbol GTUBE + + # 3. Accept: application/msgpack -> single msgpack body + &{r} = Scan File V3 Negotiated ${GTUBE} accept=application/msgpack port=${port} + ... Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Start With ${r}[content_type] application/msgpack + Should Be Equal ${r}[parser] msgpack + Expect Symbol GTUBE + + # 4. Accept: message/rfc822 -> multipart/mixed, parseable as MIME + &{r} = Scan File V3 Negotiated ${GTUBE} accept=message/rfc822 port=${port} + ... Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Start With ${r}[content_type] multipart/mixed + Should Be Equal ${r}[parser] mime + Should Start With ${r}[result_ctype] application/json + Expect Symbol GTUBE + + # 5. Accept: multipart/form-data -> parseable by an HTTP multipart parser + &{r} = Scan File V3 Negotiated ${GTUBE} accept=multipart/form-data port=${port} + ... Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Start With ${r}[content_type] multipart/form-data + Should Be Equal ${r}[parser] form-data + Expect Symbol GTUBE + + # 6. msgpack metadata, no Accept -> multipart default, result part mirrors input (msgpack) + &{meta6} = Create Dictionary from=sender@example.com + &{r} = Scan File V3 Negotiated ${GTUBE} metadata=${meta6} metadata_format=msgpack + ... port=${port} Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Start With ${r}[content_type] multipart/form-data + Should Start With ${r}[result_ctype] application/msgpack + Expect Symbol GTUBE + + # 7. Accept names only unsupported types -> 406 Not Acceptable + &{r} = Scan File V3 Negotiated ${GTUBE} accept=application/xml port=${port} + ... Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 406 + + # 8. Accept-Encoding: zstd -> parts carry Content-Encoding: zstd + &{r} = Scan File V3 Negotiated ${GTUBE} accept=multipart/form-data + ... accept_encoding=zstd port=${port} Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Contain ${r}[part_encodings] zstd + + # 8b. No Accept-Encoding -> identity (uncompressed) + &{r} = Scan File V3 Negotiated ${GTUBE} accept=multipart/form-data port=${port} + ... Settings=${SETTINGS_NOSYMBOLS} + Should Be Equal As Integers ${r}[status] 200 + Should Be Empty ${r}[part_encodings] diff --git a/test/functional/lib/rspamd.py b/test/functional/lib/rspamd.py index 2886b9e467..39bb214b7b 100644 --- a/test/functional/lib/rspamd.py +++ b/test/functional/lib/rspamd.py @@ -25,6 +25,7 @@ # limitations under the License. from urllib.request import urlopen +import email import glob import grp import http.client @@ -442,6 +443,166 @@ def Scan_File_V3_Single_Part(part_name, part_data, content_type_part=None, **hea return status +def _build_multipart_meta(boundary, meta_bytes, meta_ctype, message_bytes): + """multipart/form-data body with an explicit metadata Content-Type.""" + if isinstance(message_bytes, str): + message_bytes = message_bytes.encode('utf-8') + body = b"" + body += ("--" + boundary + "\r\n").encode() + body += b'Content-Disposition: form-data; name="metadata"\r\n' + body += ("Content-Type: %s\r\n\r\n" % meta_ctype).encode() + body += meta_bytes + b"\r\n" + body += ("--" + boundary + "\r\n").encode() + body += b'Content-Disposition: form-data; name="message"\r\n\r\n' + body += message_bytes + b"\r\n" + body += ("--" + boundary + "--\r\n").encode() + return body + + +def _v3_disposition_name(content_disposition): + m = re.search(r'name="?([^";]+)"?', content_disposition or "") + return m.group(1) if m else None + + +def _v3_parts_form_data(body, content_type): + """Parse a multipart/form-data reply with an HTTP multipart parser. + + Uses requests_toolbelt (not a MIME parser) to prove the reply is consumable + by standard HTTP multipart tooling. + """ + from requests_toolbelt.multipart.decoder import MultipartDecoder + dec = MultipartDecoder(body, content_type) + parts = [] + for part in dec.parts: + hdrs = {k.decode().lower(): v.decode() for k, v in part.headers.items()} + parts.append({ + "name": _v3_disposition_name(hdrs.get("content-disposition", "")), + "ctype": hdrs.get("content-type", ""), + "encoding": hdrs.get("content-encoding", ""), + "data": part.content, + }) + return parts + + +def _v3_parts_mime(body, content_type): + """Parse a multipart/mixed reply with the stdlib MIME parser (email).""" + full = b"Content-Type: " + content_type.encode() + b"\r\n\r\n" + body + msg = email.message_from_bytes(full) + parts = [] + for part in msg.walk(): + if part.get_content_maintype() == "multipart": + continue + parts.append({ + "name": part.get_param("name", header="content-disposition"), + "ctype": part.get_content_type(), + "encoding": part.get("Content-Encoding", "") or "", + "data": part.get_payload(decode=True), + }) + return parts + + +def _v3_decode_result(part): + """Decode a 'result' part's payload into a dict per its Content-Type.""" + if not part or part.get("encoding"): + # Compressed payloads are not decoded here (zstd has no stdlib codec) + return None + data = part["data"] + if "msgpack" in (part["ctype"] or ""): + import msgpack + return msgpack.unpackb(data, raw=False) + return json.loads(data) + + +def Scan_File_V3_Negotiated(filename, accept=None, accept_encoding=None, + port=None, metadata=None, metadata_format="json", + **headers): + """Send /checkv3 with explicit Accept / Accept-Encoding and parse the reply. + + Sets ${SCAN_RESULT} to the parsed scan result (when the reply carries one, + i.e. not a 406) so the usual Expect Symbol/Action keywords work. Returns a + dict describing the negotiated reply: status, content_type, vary, parser, + result_ctype, parts (name -> content-type), part_encodings. + """ + addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}") + if port is None: + port = BuiltIn().get_variable_value("${RSPAMD_PORT_NORMAL}") + + meta = metadata if metadata else {} + if metadata_format == "msgpack": + import msgpack + meta_bytes = msgpack.packb(meta) + meta_ctype = "application/msgpack" + else: + meta_bytes = json.dumps(meta).encode('utf-8') + meta_ctype = "application/json" + + message_data = open(filename, "rb").read() + boundary = "----rspamd-test-%016x" % random.getrandbits(64) + body = _build_multipart_meta(boundary, meta_bytes, meta_ctype, message_data) + + headers["Content-Type"] = "multipart/form-data; boundary=" + boundary + if accept is not None: + headers["Accept"] = accept + if accept_encoding is not None: + headers["Accept-Encoding"] = accept_encoding + if "Queue-Id" not in headers: + headers["Queue-Id"] = BuiltIn().get_variable_value("${TEST_NAME}") + + c = http.client.HTTPConnection("%s:%s" % (addr, port)) + c.request("POST", "/checkv3", body, headers) + r = c.getresponse() + resp_body = r.read() + ct = r.getheader("Content-Type", "") or "" + vary = r.getheader("Vary", "") or "" + status = r.status + c.close() + + info = { + "status": status, + "content_type": ct, + "vary": vary, + "parser": "none", + "result_ctype": "", + "parts": {}, + "part_encodings": [], + } + + if status != 200: + return info + + result = None + if ct.startswith("application/json"): + info["parser"] = "json" + info["result_ctype"] = "application/json" + result = json.loads(resp_body) + elif ct.startswith("application/msgpack"): + import msgpack + info["parser"] = "msgpack" + info["result_ctype"] = "application/msgpack" + result = msgpack.unpackb(resp_body, raw=False) + elif ct.startswith("multipart/mixed"): + info["parser"] = "mime" + parts = _v3_parts_mime(resp_body, ct) + info["parts"] = {p["name"]: p["ctype"] for p in parts} + info["part_encodings"] = [p["encoding"] for p in parts if p["encoding"]] + rp = next((p for p in parts if p["name"] == "result"), None) + info["result_ctype"] = rp["ctype"] if rp else "" + result = _v3_decode_result(rp) + elif ct.startswith("multipart/form-data"): + info["parser"] = "form-data" + parts = _v3_parts_form_data(resp_body, ct) + info["parts"] = {p["name"]: p["ctype"] for p in parts} + info["part_encodings"] = [p["encoding"] for p in parts if p["encoding"]] + rp = next((p for p in parts if p["name"] == "result"), None) + info["result_ctype"] = rp["ctype"] if rp else "" + result = _v3_decode_result(rp) + + if result is not None: + BuiltIn().set_test_variable("${SCAN_RESULT}", result) + + return info + + def Scan_File_SSL(filename, port=None, **headers): """Like Scan_File but over HTTPS (TLS) to the normal worker SSL port.""" addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}") diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx index 4ab10c73c7..94afedfa86 100644 --- a/test/rspamd_cxx_unit.cxx +++ b/test/rspamd_cxx_unit.cxx @@ -36,6 +36,7 @@ #include "rspamd_cxx_unit_upstream_latency.hxx" #include "rspamd_cxx_unit_upstream_srv.hxx" #include "rspamd_cxx_unit_multipart.hxx" +#include "rspamd_cxx_unit_content_negotiation.hxx" #include "rspamd_cxx_unit_settings_merge.hxx" #include "rspamd_cxx_unit_fpconv.hxx" diff --git a/test/rspamd_cxx_unit_content_negotiation.hxx b/test/rspamd_cxx_unit_content_negotiation.hxx new file mode 100644 index 0000000000..f8841d663b --- /dev/null +++ b/test/rspamd_cxx_unit_content_negotiation.hxx @@ -0,0 +1,117 @@ +/* + * Copyright 2026 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_CXX_UNIT_CONTENT_NEGOTIATION_HXX +#define RSPAMD_CXX_UNIT_CONTENT_NEGOTIATION_HXX + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +#include "libserver/http_content_negotiation.h" + +#include + +namespace { +rspamd_ftok_t cn_tok(const char *s) +{ + rspamd_ftok_t t; + t.begin = s; + t.len = s ? strlen(s) : 0; + return t; +} + +/* Same preference order the /checkv3 reply uses */ +const enum rspamd_http_content_type cn_v3_desired[] = { + RSPAMD_HTTP_CTYPE_MULTIPART_FORM, + RSPAMD_HTTP_CTYPE_MESSAGE_RFC822, + RSPAMD_HTTP_CTYPE_JSON, + RSPAMD_HTTP_CTYPE_MSGPACK, + RSPAMD_HTTP_CTYPE_UNKNOWN, +}; + +enum rspamd_http_content_type cn_match(const char *accept) +{ + rspamd_ftok_t tok = cn_tok(accept); + return rspamd_http_parse_accept_header(&tok, cn_v3_desired, nullptr); +} +}// namespace + +TEST_SUITE("content_negotiation") +{ + TEST_CASE("explicit media types map to their representation") + { + CHECK(cn_match("application/json") == RSPAMD_HTTP_CTYPE_JSON); + CHECK(cn_match("application/msgpack") == RSPAMD_HTTP_CTYPE_MSGPACK); + CHECK(cn_match("message/rfc822") == RSPAMD_HTTP_CTYPE_MESSAGE_RFC822); + CHECK(cn_match("multipart/form-data") == RSPAMD_HTTP_CTYPE_MULTIPART_FORM); + } + + TEST_CASE("wildcards resolve to the first desired (multipart/form-data)") + { + CHECK(cn_match("*/*") == RSPAMD_HTTP_CTYPE_MULTIPART_FORM); + CHECK(cn_match("multipart/*") == RSPAMD_HTTP_CTYPE_MULTIPART_FORM); + } + + TEST_CASE("type wildcard picks the matching subtype family") + { + /* an application type-wildcard should match a desired application + * subtype (json comes first) */ + CHECK(cn_match("application/*") == RSPAMD_HTTP_CTYPE_JSON); + } + + TEST_CASE("unsupported media type yields UNKNOWN (caller maps to 406)") + { + CHECK(cn_match("application/xml") == RSPAMD_HTTP_CTYPE_UNKNOWN); + CHECK(cn_match("text/html") == RSPAMD_HTTP_CTYPE_UNKNOWN); + } + + TEST_CASE("empty / null Accept yields UNKNOWN (caller uses default)") + { + rspamd_ftok_t empty = cn_tok(""); + CHECK(rspamd_http_parse_accept_header(&empty, cn_v3_desired, nullptr) == + RSPAMD_HTTP_CTYPE_UNKNOWN); + CHECK(rspamd_http_parse_accept_header(nullptr, cn_v3_desired, nullptr) == + RSPAMD_HTTP_CTYPE_UNKNOWN); + } + + TEST_CASE("q-values select the highest-quality acceptable type") + { + CHECK(cn_match("application/json;q=0.3, multipart/form-data;q=0.9") == + RSPAMD_HTTP_CTYPE_MULTIPART_FORM); + CHECK(cn_match("application/json;q=0.9, multipart/form-data;q=0.3") == + RSPAMD_HTTP_CTYPE_JSON); + } + + TEST_CASE("browser-style Accept falls back to the wildcard default") + { + CHECK(cn_match("text/html, application/xhtml+xml, */*;q=0.8") == + RSPAMD_HTTP_CTYPE_MULTIPART_FORM); + } + + TEST_CASE("Accept-Encoding zstd detection") + { + rspamd_ftok_t zstd = cn_tok("zstd"); + rspamd_ftok_t gzip = cn_tok("gzip"); + rspamd_ftok_t both = cn_tok("gzip, zstd"); + + CHECK((rspamd_http_parse_accept_encoding(&zstd) & RSPAMD_HTTP_COMPRESS_ZSTD) != 0); + CHECK((rspamd_http_parse_accept_encoding(&gzip) & RSPAMD_HTTP_COMPRESS_ZSTD) == 0); + CHECK((rspamd_http_parse_accept_encoding(&both) & RSPAMD_HTTP_COMPRESS_ZSTD) != 0); + CHECK(rspamd_http_parse_accept_encoding(nullptr) == RSPAMD_HTTP_COMPRESS_NONE); + } +} + +#endif// RSPAMD_CXX_UNIT_CONTENT_NEGOTIATION_HXX diff --git a/test/rspamd_cxx_unit_multipart.hxx b/test/rspamd_cxx_unit_multipart.hxx index 8455a78600..5f4d5aa99c 100644 --- a/test/rspamd_cxx_unit_multipart.hxx +++ b/test/rspamd_cxx_unit_multipart.hxx @@ -446,11 +446,48 @@ TEST_SUITE("multipart_response") rspamd::http::multipart_response resp; auto ct = resp.content_type(); - CHECK(ct.find("multipart/mixed") != std::string::npos); + /* form_data is the default envelope */ + CHECK(ct.find("multipart/form-data") != std::string::npos); CHECK(ct.find("boundary=\"") != std::string::npos); CHECK(ct.find(std::string(resp.get_boundary())) != std::string::npos); } + TEST_CASE("envelope controls multipart subtype") + { + rspamd::http::multipart_response resp; + + /* Default is form-data */ + CHECK(resp.content_type().find("multipart/form-data") != std::string::npos); + + resp.set_envelope(rspamd::http::multipart_envelope::mixed); + auto mixed = resp.content_type(); + CHECK(mixed.find("multipart/mixed") != std::string::npos); + CHECK(mixed.find("multipart/form-data") == std::string::npos); + CHECK(mixed.find("boundary=\"") != std::string::npos); + + resp.set_envelope(rspamd::http::multipart_envelope::form_data); + CHECK(resp.content_type().find("multipart/form-data") != std::string::npos); + } + + TEST_CASE("envelope does not change part layout") + { + /* Both envelopes keep the form-data part headers; only the top-level + * subtype differs. */ + std::string data = "{\"action\":\"reject\"}"; + + rspamd::http::multipart_response form; + form.add_part("result", "application/json", data); + auto form_body = form.serialize(); + + rspamd::http::multipart_response mixed; + mixed.set_envelope(rspamd::http::multipart_envelope::mixed); + mixed.add_part("result", "application/json", data); + auto mixed_body = mixed.serialize(); + + CHECK(form_body.find("Content-Disposition: form-data; name=\"result\"") != std::string::npos); + CHECK(mixed_body.find("Content-Disposition: form-data; name=\"result\"") != std::string::npos); + } + TEST_CASE("unique boundaries") { rspamd::http::multipart_response resp1;