From: Vsevolod Stakhov Date: Sat, 7 Feb 2026 12:23:37 +0000 (+0000) Subject: [Test] Add tests for /checkv3 multipart endpoint X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ad0f7d2562533bf7c3cc416ee8cd7624aef1bced;p=thirdparty%2Frspamd.git [Test] Add tests for /checkv3 multipart endpoint C++ unit tests (23 cases): multipart form parser, response builder, and round-trip serialization. Robot Framework functional tests (6 cases): GTUBE scan, metadata handling, settings_id, and error cases for missing parts and malformed boundaries. Python helpers for building and parsing multipart/form-data requests and multipart/mixed responses. --- diff --git a/test/functional/cases/001_merged/430_checkv3.robot b/test/functional/cases/001_merged/430_checkv3.robot new file mode 100644 index 0000000000..c355599445 --- /dev/null +++ b/test/functional/cases/001_merged/430_checkv3.robot @@ -0,0 +1,41 @@ +*** Settings *** +Library ${RSPAMD_TESTDIR}/lib/rspamd.py +Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot +Variables ${RSPAMD_TESTDIR}/lib/vars.py + +*** Variables *** +${GTUBE} ${RSPAMD_TESTDIR}/messages/gtube.eml +${SETTINGS_NOSYMBOLS} {symbols_enabled = []} + +*** Test Cases *** +GTUBE via checkv3 + [Documentation] Basic /checkv3 scan, expect GTUBE symbol + Scan File V3 ${GTUBE} Settings=${SETTINGS_NOSYMBOLS} + Expect Symbol GTUBE + +checkv3 with metadata from and rcpt + [Documentation] Set from and rcpt in metadata JSON, verify they are applied + &{meta} = Create Dictionary from=sender@example.com rcpt=rcpt@example.com + Scan File V3 ${GTUBE} metadata=${meta} Settings=${SETTINGS_NOSYMBOLS} + Expect Symbol GTUBE + +checkv3 with settings_id + [Documentation] Pass settings_id in metadata, verify settings are applied + &{meta} = Create Dictionary settings_id=id_test + Scan File V3 ${GTUBE} metadata=${meta} + Expect Symbol GTUBE + +checkv3 missing metadata part + [Documentation] Send only message part without metadata, expect HTTP 400 + ${status} = Scan File V3 Single Part message test message body + Should Be Equal As Integers ${status} 400 + +checkv3 missing message part + [Documentation] Send only metadata part without message, expect HTTP 400 + ${status} = Scan File V3 Single Part metadata {} application/json + Should Be Equal As Integers ${status} 400 + +checkv3 malformed boundary + [Documentation] Send body with wrong boundary, expect HTTP 400 + Scan File V3 Expect Error ${GTUBE} 400 + ... content_type_override=multipart/form-data; boundary=wrong-boundary-does-not-match diff --git a/test/functional/lib/rspamd.py b/test/functional/lib/rspamd.py index 9c869df0ef..244900f106 100644 --- a/test/functional/lib/rspamd.py +++ b/test/functional/lib/rspamd.py @@ -36,6 +36,8 @@ import shutil import signal import socket import stat +import random +import re import sys import tempfile @@ -221,6 +223,168 @@ def Scan_File(filename, **headers): return +def _build_multipart(boundary, metadata_json, message_bytes): + """Build a multipart/form-data body with metadata and message parts.""" + body = b"" + body += ("--" + boundary + "\r\n").encode() + body += b"Content-Disposition: form-data; name=\"metadata\"\r\n" + body += b"Content-Type: application/json\r\n" + body += b"\r\n" + if isinstance(metadata_json, str): + metadata_json = metadata_json.encode('utf-8') + body += metadata_json + body += b"\r\n" + body += ("--" + boundary + "\r\n").encode() + body += b"Content-Disposition: form-data; name=\"message\"\r\n" + body += b"\r\n" + if isinstance(message_bytes, str): + message_bytes = message_bytes.encode('utf-8') + body += message_bytes + body += b"\r\n" + body += ("--" + boundary + "--\r\n").encode() + return body + + +def _build_multipart_single(boundary, part_name, part_data, content_type=None): + """Build a multipart/form-data body with a single part.""" + body = b"" + body += ("--" + boundary + "\r\n").encode() + body += ("Content-Disposition: form-data; name=\"%s\"\r\n" % part_name).encode() + if content_type: + body += ("Content-Type: %s\r\n" % content_type).encode() + body += b"\r\n" + if isinstance(part_data, str): + part_data = part_data.encode('utf-8') + body += part_data + body += b"\r\n" + body += ("--" + boundary + "--\r\n").encode() + return body + + +def _parse_multipart_response(body, content_type): + """Parse a multipart/mixed response and return the 'result' part data as string.""" + if isinstance(body, bytes): + body = body.decode('utf-8', errors='replace') + + # Extract boundary from Content-Type header + m = re.search(r'boundary="?([^";]+)"?', content_type) + if not m: + raise ValueError("No boundary found in Content-Type: %s" % content_type) + boundary = m.group(1) + + # Split on boundary + parts = body.split("--" + boundary) + for part in parts: + if part.startswith("--"): + continue # closing boundary + if not part.strip(): + continue + + # Split headers from body + if "\r\n\r\n" in part: + headers, data = part.split("\r\n\r\n", 1) + elif "\n\n" in part: + headers, data = part.split("\n\n", 1) + else: + continue + + # Check if this is the "result" part + if 'name="result"' in headers: + # Strip trailing \r\n + data = data.rstrip("\r\n") + return data + + raise ValueError("No 'result' part found in multipart response") + + +def Scan_File_V3(filename, metadata=None, **headers): + """Send a /checkv3 multipart request and set ${SCAN_RESULT}.""" + addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}") + port = BuiltIn().get_variable_value("${RSPAMD_PORT_NORMAL}") + + meta = metadata if metadata else {} + meta_json = json.dumps(meta) + message_data = open(filename, "rb").read() + + boundary = "----rspamd-test-%016x" % random.getrandbits(64) + body = _build_multipart(boundary, meta_json, message_data) + + headers["Content-Type"] = "multipart/form-data; boundary=" + boundary + if "Queue-Id" not in headers: + headers["Queue-Id"] = BuiltIn().get_variable_value("${TEST_NAME}") + + c = http.client.HTTPConnection("%s:%s" % (addr, port)) + c.request("POST", "/checkv3", body, headers) + r = c.getresponse() + assert r.status == 200, "Expected HTTP 200 but got %d" % r.status + + resp_body = r.read() + resp_ct = r.getheader("Content-Type", "") + result_data = _parse_multipart_response(resp_body, resp_ct) + + d = json.JSONDecoder(strict=True).decode(result_data) + c.close() + BuiltIn().set_test_variable("${SCAN_RESULT}", d) + return + + +def Scan_File_V3_Expect_Error(filename, expected_status, metadata=None, + body_override=None, content_type_override=None, + **headers): + """Send a /checkv3 request and expect a specific HTTP error status.""" + addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}") + port = BuiltIn().get_variable_value("${RSPAMD_PORT_NORMAL}") + + boundary = "----rspamd-test-%016x" % random.getrandbits(64) + + if body_override is not None: + body = body_override + else: + meta = metadata if metadata else {} + meta_json = json.dumps(meta) + message_data = open(filename, "rb").read() if filename else b"" + body = _build_multipart(boundary, meta_json, message_data) + + if content_type_override: + headers["Content-Type"] = content_type_override + else: + headers["Content-Type"] = "multipart/form-data; boundary=" + boundary + + if "Queue-Id" not in headers: + headers["Queue-Id"] = BuiltIn().get_variable_value("${TEST_NAME}") + + c = http.client.HTTPConnection("%s:%s" % (addr, port)) + c.request("POST", "/checkv3", body, headers) + r = c.getresponse() + actual_status = r.status + r.read() + c.close() + assert actual_status == int(expected_status), \ + "Expected HTTP %s but got %d" % (expected_status, actual_status) + return + + +def Scan_File_V3_Single_Part(part_name, part_data, content_type_part=None, **headers): + """Send a /checkv3 request with only a single part.""" + addr = BuiltIn().get_variable_value("${RSPAMD_LOCAL_ADDR}") + port = BuiltIn().get_variable_value("${RSPAMD_PORT_NORMAL}") + + boundary = "----rspamd-test-%016x" % random.getrandbits(64) + body = _build_multipart_single(boundary, part_name, part_data, content_type_part) + + headers["Content-Type"] = "multipart/form-data; boundary=" + boundary + if "Queue-Id" not in headers: + headers["Queue-Id"] = BuiltIn().get_variable_value("${TEST_NAME}") + + c = http.client.HTTPConnection("%s:%s" % (addr, port)) + c.request("POST", "/checkv3", body, headers) + r = c.getresponse() + status = r.status + r.read() + c.close() + return status + + def Send_SIGUSR1(pid): pid = int(pid) os.kill(pid, signal.SIGUSR1) diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx index 04906855e5..a8b7d8bb29 100644 --- a/test/rspamd_cxx_unit.cxx +++ b/test/rspamd_cxx_unit.cxx @@ -29,6 +29,7 @@ #include "rspamd_cxx_unit_html_url_rewrite.hxx" #include "rspamd_cxx_unit_html_cta.hxx" #include "rspamd_cxx_unit_upstream_token_bucket.hxx" +#include "rspamd_cxx_unit_multipart.hxx" static gboolean verbose = false; static const GOptionEntry entries[] = diff --git a/test/rspamd_cxx_unit_multipart.hxx b/test/rspamd_cxx_unit_multipart.hxx new file mode 100644 index 0000000000..163d66a772 --- /dev/null +++ b/test/rspamd_cxx_unit_multipart.hxx @@ -0,0 +1,412 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_CXX_UNIT_MULTIPART_HXX +#define RSPAMD_CXX_UNIT_MULTIPART_HXX + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +#include "libserver/multipart_form.hxx" +#include "libserver/multipart_response.hxx" + +#include +#include + +TEST_SUITE("multipart_form") +{ + TEST_CASE("basic two-part form") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"metadata\"\r\n" + "Content-Type: application/json\r\n" + "\r\n" + "{\"from\":\"test@example.com\"}\r\n" + "--boundary\r\n" + "Content-Disposition: form-data; name=\"message\"\r\n" + "\r\n" + "Subject: test\r\n\r\nHello world\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 2); + CHECK(result->parts[0].name == "metadata"); + CHECK(result->parts[0].data == "{\"from\":\"test@example.com\"}"); + CHECK(result->parts[1].name == "message"); + CHECK(result->parts[1].data == "Subject: test\r\n\r\nHello world"); + } + + TEST_CASE("LF-only line endings") + { + std::string body = + "--boundary\n" + "Content-Disposition: form-data; name=\"metadata\"\n" + "\n" + "meta-data-here\n" + "--boundary\n" + "Content-Disposition: form-data; name=\"message\"\n" + "\n" + "message-data-here\n" + "--boundary--\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 2); + CHECK(result->parts[0].name == "metadata"); + CHECK(result->parts[0].data == "meta-data-here"); + CHECK(result->parts[1].name == "message"); + CHECK(result->parts[1].data == "message-data-here"); + } + + TEST_CASE("single part") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"metadata\"\r\n" + "\r\n" + "{\"file\":\"/tmp/test.eml\"}\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 1); + CHECK(result->parts[0].name == "metadata"); + } + + TEST_CASE("find_part by name") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"metadata\"\r\n" + "\r\n" + "meta\r\n" + "--boundary\r\n" + "Content-Disposition: form-data; name=\"message\"\r\n" + "\r\n" + "msg\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + + auto *meta = rspamd::http::find_part(*result, "metadata"); + REQUIRE(meta != nullptr); + CHECK(meta->data == "meta"); + + auto *msg = rspamd::http::find_part(*result, "message"); + REQUIRE(msg != nullptr); + CHECK(msg->data == "msg"); + + auto *none = rspamd::http::find_part(*result, "nonexistent"); + CHECK(none == nullptr); + } + + TEST_CASE("content-type and encoding headers") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"metadata\"\r\n" + "Content-Type: application/json\r\n" + "Content-Encoding: zstd\r\n" + "\r\n" + "data\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts[0].content_type == "application/json"); + CHECK(result->parts[0].content_encoding == "zstd"); + } + + TEST_CASE("filename in content-disposition") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"file\"; filename=\"test.eml\"\r\n" + "\r\n" + "file-data\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts[0].name == "file"); + CHECK(result->parts[0].filename == "test.eml"); + } + + TEST_CASE("empty part data") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"empty\"\r\n" + "\r\n" + "\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts[0].name == "empty"); + CHECK(result->parts[0].data.empty()); + } + + TEST_CASE("empty boundary") + { + auto result = rspamd::http::parse_multipart_form("some data", ""); + CHECK(!result.has_value()); + } + + TEST_CASE("empty data") + { + auto result = rspamd::http::parse_multipart_form("", "boundary"); + CHECK(!result.has_value()); + } + + TEST_CASE("missing closing boundary") + { + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"part1\"\r\n" + "\r\n" + "data1\r\n" + "--boundary\r\n" + "Content-Disposition: form-data; name=\"part2\"\r\n" + "\r\n" + "data2"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + /* Parser should return the first part it found before the second boundary */ + CHECK(result->parts.size() >= 1); + CHECK(result->parts[0].name == "part1"); + CHECK(result->parts[0].data == "data1"); + } + + TEST_CASE("max parts limit") + { + std::string body; + for (int i = 0; i < 10; i++) { + body += "--boundary\r\n"; + body += "Content-Disposition: form-data; name=\"part" + std::to_string(i) + "\"\r\n"; + body += "\r\n"; + body += "data" + std::to_string(i) + "\r\n"; + } + body += "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 8); + } + + TEST_CASE("garbage data") + { + std::string garbage = "this is just random garbage with no boundary markers at all"; + auto result = rspamd::http::parse_multipart_form(garbage, "boundary"); + CHECK(!result.has_value()); + } + + TEST_CASE("boundary embedded in content") + { + /* The boundary string appears in the part body but not preceded by \r\n-- */ + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"test\"\r\n" + "\r\n" + "This text mentions boundary as a word\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 1); + CHECK(result->parts[0].data == "This text mentions boundary as a word"); + } + + TEST_CASE("no headers in part") + { + /* Part has no Content-Disposition header, just raw data after boundary. + * Without a proper header block (\r\n\r\n separator), the parser treats + * the entire part content as data (including the leading \r\n). */ + std::string body = + "--boundary\r\n" + "\r\n" + "raw data without headers\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 1); + CHECK(result->parts[0].name.empty()); + /* Data includes the leading \r\n since no header separator was found */ + CHECK(result->parts[0].data == "\r\nraw data without headers"); + } + + TEST_CASE("mixed CRLF and LF") + { + /* When the body between parts uses LF-only to separate from the + * next boundary, the parser should find both parts via lf_delim fallback. */ + std::string body = + "--boundary\r\n" + "Content-Disposition: form-data; name=\"part1\"\r\n" + "\r\n" + "data1\n" + "--boundary\r\n" + "Content-Disposition: form-data; name=\"part2\"\r\n" + "\r\n" + "data2\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts.size() == 2); + CHECK(result->parts[0].name == "part1"); + CHECK(result->parts[0].data == "data1"); + CHECK(result->parts[1].name == "part2"); + CHECK(result->parts[1].data == "data2"); + } + + TEST_CASE("case-insensitive header names") + { + std::string body = + "--boundary\r\n" + "content-disposition: form-data; name=\"lower\"\r\n" + "content-type: text/plain\r\n" + "content-encoding: gzip\r\n" + "\r\n" + "test\r\n" + "--boundary--\r\n"; + + auto result = rspamd::http::parse_multipart_form(body, "boundary"); + REQUIRE(result.has_value()); + CHECK(result->parts[0].name == "lower"); + CHECK(result->parts[0].content_type == "text/plain"); + CHECK(result->parts[0].content_encoding == "gzip"); + } +} + +TEST_SUITE("multipart_response") +{ + TEST_CASE("single part serialization") + { + rspamd::http::multipart_response resp; + std::string data = "{\"action\":\"reject\"}"; + resp.add_part("result", "application/json", data); + + auto serialized = resp.serialize(); + auto boundary = std::string(resp.get_boundary()); + + CHECK(serialized.find("--" + boundary) != std::string::npos); + CHECK(serialized.find("Content-Disposition: form-data; name=\"result\"") != std::string::npos); + CHECK(serialized.find("Content-Type: application/json") != std::string::npos); + CHECK(serialized.find(data) != std::string::npos); + CHECK(serialized.find("--" + boundary + "--") != std::string::npos); + } + + TEST_CASE("two parts serialization") + { + rspamd::http::multipart_response resp; + std::string result_data = "{\"action\":\"reject\"}"; + std::string body_data = "rewritten body"; + resp.add_part("result", "application/json", result_data); + resp.add_part("body", "message/rfc822", body_data); + + auto serialized = resp.serialize(); + + /* Both parts present */ + CHECK(serialized.find("name=\"result\"") != std::string::npos); + CHECK(serialized.find("name=\"body\"") != std::string::npos); + CHECK(serialized.find(result_data) != std::string::npos); + CHECK(serialized.find(body_data) != std::string::npos); + + /* result appears before body */ + CHECK(serialized.find("name=\"result\"") < serialized.find("name=\"body\"")); + } + + TEST_CASE("content_type includes boundary") + { + rspamd::http::multipart_response resp; + auto ct = resp.content_type(); + + CHECK(ct.find("multipart/mixed") != std::string::npos); + CHECK(ct.find("boundary=\"") != std::string::npos); + CHECK(ct.find(std::string(resp.get_boundary())) != std::string::npos); + } + + TEST_CASE("unique boundaries") + { + rspamd::http::multipart_response resp1; + rspamd::http::multipart_response resp2; + CHECK(resp1.get_boundary() != resp2.get_boundary()); + } + + TEST_CASE("empty data part") + { + rspamd::http::multipart_response resp; + std::string empty; + resp.add_part("empty", "application/octet-stream", empty); + + auto serialized = resp.serialize(); + CHECK(serialized.find("name=\"empty\"") != std::string::npos); + CHECK(serialized.find("Content-Type: application/octet-stream") != std::string::npos); + } +} + +TEST_SUITE("multipart_roundtrip") +{ + TEST_CASE("build then parse") + { + rspamd::http::multipart_response resp; + std::string result_data = "{\"action\":\"reject\",\"score\":15.0}"; + std::string body_data = "Subject: test\r\n\r\nRewritten body content"; + resp.add_part("result", "application/json", result_data); + resp.add_part("body", "message/rfc822", body_data); + + auto serialized = resp.serialize(); + auto boundary = std::string(resp.get_boundary()); + + auto parsed = rspamd::http::parse_multipart_form(serialized, boundary); + REQUIRE(parsed.has_value()); + CHECK(parsed->parts.size() == 2); + + auto *result_part = rspamd::http::find_part(*parsed, "result"); + REQUIRE(result_part != nullptr); + CHECK(result_part->data == result_data); + CHECK(result_part->content_type == "application/json"); + + auto *body_part = rspamd::http::find_part(*parsed, "body"); + REQUIRE(body_part != nullptr); + CHECK(body_part->data == body_data); + } + + TEST_CASE("build then parse single part") + { + rspamd::http::multipart_response resp; + std::string data = "{\"action\":\"no action\"}"; + resp.add_part("result", "application/json", data); + + auto serialized = resp.serialize(); + auto boundary = std::string(resp.get_boundary()); + + auto parsed = rspamd::http::parse_multipart_form(serialized, boundary); + REQUIRE(parsed.has_value()); + CHECK(parsed->parts.size() == 1); + + auto *result_part = rspamd::http::find_part(*parsed, "result"); + REQUIRE(result_part != nullptr); + CHECK(result_part->data == data); + } +} + +#endif// RSPAMD_CXX_UNIT_MULTIPART_HXX