]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] protocol: Expose custom metadata for /checkv3 6074/head
authorVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 2 Jun 2026 18:04:34 +0000 (19:04 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 2 Jun 2026 18:09:26 +0000 (19:09 +0100)
Add two complementary ways to read custom fields sent with a /checkv3
multipart scan request, both free of the 80KB HTTP header limit that v2
hits, since the metadata travels in the multipart body:

  * A "headers" sub-object in the metadata part is injected into the
    task request headers, so task:get_request_header() works for custom
    fields exactly like v2 HTTP request headers. Reserved control-header
    names (shm/file/path/dictionary/Content-Encoding...) are skipped so
    client metadata cannot collide with the message-loading channel, and
    a repeated name (collapsed by UCL into an array) expands to a
    multi-valued request header.

  * The parsed metadata object is kept on task->meta and exposed to Lua
    via task:get_metadata() and task:get_metadata_field(key), mirroring
    get_settings()/lookup_settings(). The task now owns the object and
    frees it once in rspamd_task_free instead of via a pool destructor.

rspamc gains a repeatable --metadata-header KEY=VALUE option that builds
the metadata "headers" sub-object for v3 requests. Also drop a dead
is_msgpack variable in the v3 request handler.

Tests: functional cases in 430_checkv3.robot plus a checkv3_meta.lua
plugin exercising both options via raw multipart and rspamc.

src/client/rspamc.cxx
src/libserver/protocol.c
src/libserver/task.c
src/libserver/task.h
src/lua/lua_task.c
test/functional/cases/001_merged/430_checkv3.robot
test/functional/configs/merged.conf
test/functional/lua/checkv3_meta.lua [new file with mode: 0644]

index b7cffdf4ae1b7856aca26a2513eb012206977254..13fd507f3094fcb10ab45ef009ac6807dd40e7e6 100644 (file)
@@ -65,6 +65,7 @@ static const char *local_addr = nullptr;
 static const char *execute = nullptr;
 static const char *sort = nullptr;
 static const char **http_headers = nullptr;
+static const char **metadata_headers = nullptr;
 static const char **exclude_patterns = nullptr;
 static int weight = 0;
 static int flag = 0;
@@ -177,6 +178,8 @@ static GOptionEntry entries[] =
                 "Write mime body of message with headers instead of just a scan's result", nullptr},
                {"header", 0, 0, G_OPTION_ARG_STRING_ARRAY, &http_headers,
                 "Add custom HTTP header to query (can be repeated)", nullptr},
+               {"metadata-header", 0, 0, G_OPTION_ARG_STRING_ARRAY, &metadata_headers,
+                "Add custom field to v3 metadata headers as KEY=VALUE or KEY:VALUE (can be repeated)", nullptr},
                {"exclude", 0, 0, G_OPTION_ARG_STRING_ARRAY, &exclude_patterns,
                 "Exclude specific glob patterns in file names (can be repeated)", nullptr},
                {"sort", 0, 0, G_OPTION_ARG_STRING, &sort,
@@ -2380,6 +2383,46 @@ rspamc_process_input(struct ev_loop *ev_base, const struct rspamc_command &cmd,
                                ucl_object_unref(flags_arr);
                        }
 
+                       /*
+                        * Custom metadata headers: carried in the metadata body part and
+                        * exposed server-side via task:get_request_header(), free of the
+                        * HTTP header size limit.
+                        */
+                       if (metadata_headers) {
+                               ucl_object_t *hdrs_obj = ucl_object_typed_new(UCL_OBJECT);
+                               unsigned int nhdrs = 0;
+
+                               for (auto *mhdr = metadata_headers; *mhdr; mhdr++) {
+                                       std::string_view hdr_view{*mhdr};
+                                       auto delim_pos = std::find_if(std::begin(hdr_view), std::end(hdr_view),
+                                                                                                 [](auto c) { return c == ':' || c == '='; });
+                                       std::string key, val;
+
+                                       if (delim_pos == std::end(hdr_view)) {
+                                               key = std::string{hdr_view};
+                                       }
+                                       else {
+                                               auto off = std::distance(std::begin(hdr_view), delim_pos);
+                                               key = std::string{hdr_view.substr(0, off)};
+                                               val = std::string{hdr_view.substr(off + 1)};
+                                       }
+
+                                       if (!key.empty()) {
+                                               ucl_object_insert_key(hdrs_obj,
+                                                                                         ucl_object_fromstring(val.c_str()),
+                                                                                         key.c_str(), 0, true);
+                                               nhdrs++;
+                                       }
+                               }
+
+                               if (nhdrs > 0) {
+                                       ucl_object_insert_key(metadata, hdrs_obj, "headers", 0, false);
+                               }
+                               else {
+                                       ucl_object_unref(hdrs_obj);
+                               }
+                       }
+
                        rspamd_client_command_v3(conn, "checkv3", metadata, in,
                                                                         rspamc_client_cb, cbdata, compressed,
                                                                         msgpack_mode,
index b795d9a2f6b7cb5c168190c9ba4c24b0d93dfa0c..896e965b8596fced1c43cd742a42f87164e66ad5 100644 (file)
@@ -2327,6 +2327,35 @@ void rspamd_protocol_write_log_pipe(struct rspamd_task *task)
        g_array_free(extra, TRUE);
 }
 
+/*
+ * Inject a single metadata "headers" entry into the task request headers.
+ * The ftok structs point directly into the metadata UCL object (owned by
+ * task->meta for the whole task lifetime), so no copy of the bytes is needed.
+ * Lengths come from the UCL accessors, so embedded NULs (msgpack) are kept.
+ */
+static void
+rspamd_protocol_metadata_add_header(struct rspamd_task *task,
+                                                                       const char *key, gsize klen,
+                                                                       const ucl_object_t *val_obj)
+{
+       gsize vlen;
+       const char *val = ucl_object_tolstring(val_obj, &vlen);
+       rspamd_ftok_t *name_tok, *val_tok;
+
+       if (val == NULL) {
+               return;
+       }
+
+       name_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*name_tok));
+       val_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*val_tok));
+       name_tok->begin = key;
+       name_tok->len = klen;
+       val_tok->begin = val;
+       val_tok->len = vlen;
+
+       rspamd_task_add_request_header(task, name_tok, val_tok);
+}
+
 /*
  * Handle metadata from a parsed UCL object for v3 protocol.
  * Maps structured metadata fields to task fields.
@@ -2582,6 +2611,72 @@ rspamd_protocol_handle_metadata(struct rspamd_task *task,
                }
        }
 
+       /*
+        * headers (object: header-name -> string value, or array of strings when a
+        * name is repeated)
+        *
+        * Custom fields carried in the metadata body part are exposed as task
+        * request headers, so they are retrievable via task:get_request_header()
+        * exactly like v2 HTTP request headers - but without the HTTP header size
+        * limit, since the metadata travels in the multipart body.
+        *
+        * NB: task->request_headers is also the control channel that
+        * rspamd_task_load_message consults for message-loading directives
+        * (shm/file/path/dictionary/Content-Encoding...). Those reserved names are
+        * skipped here so client-supplied metadata can never collide with them.
+        */
+       elt = ucl_object_lookup(metadata, "headers");
+       if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+               static const char *reserved_hdrs[] = {
+                       "shm", "shm-offset", "shm-length", "file", "path",
+                       "dictionary", "compression", "content-encoding"};
+               ucl_object_iter_t it = NULL;
+
+               while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+                       gsize klen;
+                       const char *key = ucl_object_keyl(cur, &klen);
+                       gboolean reserved = FALSE;
+                       unsigned int i;
+
+                       if (key == NULL || klen == 0) {
+                               continue;
+                       }
+
+                       for (i = 0; i < G_N_ELEMENTS(reserved_hdrs); i++) {
+                               if (strlen(reserved_hdrs[i]) == klen &&
+                                       rspamd_lc_cmp(key, reserved_hdrs[i], klen) == 0) {
+                                       reserved = TRUE;
+                                       break;
+                               }
+                       }
+
+                       if (reserved) {
+                               msg_info_protocol("ignore reserved metadata header '%*s'",
+                                                                 (int) klen, key);
+                               continue;
+                       }
+
+                       if (ucl_object_type(cur) == UCL_STRING) {
+                               rspamd_protocol_metadata_add_header(task, key, klen, cur);
+                       }
+                       else if (ucl_object_type(cur) == UCL_ARRAY) {
+                               /*
+                                * A repeated header name is collapsed by the UCL parser into an
+                                * array under that key; expand each string value into its own
+                                * request header (request headers are multi-valued).
+                                */
+                               ucl_object_iter_t ait = NULL;
+                               const ucl_object_t *aval;
+
+                               while ((aval = ucl_object_iterate(cur, &ait, true)) != NULL) {
+                                       if (ucl_object_type(aval) == UCL_STRING) {
+                                               rspamd_protocol_metadata_add_header(task, key, klen, aval);
+                                       }
+                               }
+                       }
+               }
+       }
+
        return TRUE;
 }
 
@@ -2854,9 +2949,12 @@ rspamd_protocol_handle_v3_request(struct rspamd_task *task,
                return FALSE;
        }
 
-       rspamd_mempool_add_destructor(task->task_pool,
-                                                                 (rspamd_mempool_destruct_t) ucl_object_unref,
-                                                                 metadata_obj);
+       /*
+        * The task takes ownership of the metadata object; it is unref'd in
+        * rspamd_task_free. Keeping it alive for the whole task lifetime also
+        * exposes it to Lua via task:get_metadata()/get_metadata_field().
+        */
+       task->meta = metadata_obj;
 
        /* Apply metadata to task */
        if (!rspamd_protocol_handle_metadata(task, metadata_obj)) {
index 2b5a443cf814998689520be50d29ee659e085a1c..31953cf16a194ecd09743c71d530442fb256a843 100644 (file)
@@ -297,6 +297,10 @@ void rspamd_task_free(struct rspamd_task *task)
                        ucl_object_unref(task->settings);
                }
 
+               if (task->meta != NULL) {
+                       ucl_object_unref(task->meta);
+               }
+
                if (task->settings_elt != NULL) {
                        REF_RELEASE(task->settings_elt);
                }
index fe8cbf901270d1ff51f68e36edd84e13c0d1c637..78cd9319ed69fdc6a963cd277bb4121f24065b71 100644 (file)
@@ -224,6 +224,7 @@ struct rspamd_task {
        const char *classifier;                /**< Classifier to learn (if needed)                             */
        struct rspamd_lang_detector *lang_det; /**< Languages detector                                                          */
        struct rspamd_message *message;
+       ucl_object_t *meta; /**< custom metadata object from a checkv3 request (or NULL) */
 
        /* ESMTP arguments from milter protocol */
        GHashTable *mail_esmtp_args; /**< ESMTP arguments from MAIL FROM command */
index 7d70ff168f52cf4b149c2b808d169e409df4ba14..44477f8fac2293e2a25ea9458623ba41b6189114 100644 (file)
@@ -1061,6 +1061,22 @@ LUA_FUNCTION_DEF(task, get_settings);
  */
 LUA_FUNCTION_DEF(task, lookup_settings);
 
+/***
+ * @method task:get_metadata()
+ * Gets the custom metadata object supplied with a /checkv3 multipart request.
+ * Returns nil for requests that carried no metadata part (e.g. /checkv2).
+ * @return {lua object|nil} lua object generated from the metadata UCL
+ */
+LUA_FUNCTION_DEF(task, get_metadata);
+
+/***
+ * @method task:get_metadata_field(key)
+ * Gets a single top-level field from the /checkv3 metadata object.
+ * @param {string} key optional; if omitted the whole metadata object is returned (mirrors lookup_settings)
+ * @return {lua object|nil} lua object generated from the metadata field
+ */
+LUA_FUNCTION_DEF(task, get_metadata_field);
+
 /***
  * @method task:get_settings_id()
  * Get numeric hash of settings id if specified for this task. 0 is returned otherwise.
@@ -1478,6 +1494,8 @@ static const struct luaL_reg tasklib_m[] = {
        LUA_INTERFACE_DEF(task, set_settings),
        LUA_INTERFACE_DEF(task, get_settings),
        LUA_INTERFACE_DEF(task, lookup_settings),
+       LUA_INTERFACE_DEF(task, get_metadata),
+       LUA_INTERFACE_DEF(task, get_metadata_field),
        LUA_INTERFACE_DEF(task, get_settings_id),
        LUA_INTERFACE_DEF(task, set_settings_id),
        LUA_INTERFACE_DEF(task, merge_and_apply_settings),
@@ -6855,6 +6873,68 @@ lua_task_lookup_settings(lua_State *L)
        return 1;
 }
 
+static int
+lua_task_get_metadata(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_task *task = lua_check_task(L, 1);
+
+       if (task != NULL) {
+
+               if (task->meta) {
+                       return ucl_object_push_lua(L, task->meta, true);
+               }
+               else {
+                       lua_pushnil(L);
+               }
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       return 1;
+}
+
+static int
+lua_task_get_metadata_field(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_task *task = lua_check_task(L, 1);
+       const char *key = NULL;
+       const ucl_object_t *elt;
+
+       if (task != NULL) {
+
+               if (lua_isstring(L, 2)) {
+                       key = lua_tostring(L, 2);
+               }
+
+               if (task->meta) {
+                       if (key == NULL) {
+                               return ucl_object_push_lua(L, task->meta, true);
+                       }
+                       else {
+                               elt = ucl_object_lookup(task->meta, key);
+
+                               if (elt) {
+                                       return ucl_object_push_lua(L, elt, true);
+                               }
+                               else {
+                                       lua_pushnil(L);
+                               }
+                       }
+               }
+               else {
+                       lua_pushnil(L);
+               }
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       return 1;
+}
+
 static int
 lua_task_get_settings_id(lua_State *L)
 {
index fc013c06a32ec5750b0dfea3f18b32f9fa864015..fc046f9b4d825685a2d0f026f1249c4ed6d14366 100644 (file)
@@ -88,3 +88,23 @@ checkv3 via rspamc encrypted with msgpack
   ${result} =  Run Rspamc  -p  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_NORMAL}  --protocol-v3
   ...  --msgpack  --key  ${RSPAMD_KEY_PUB1}  --settings=${SETTINGS_NOSYMBOLS}  ${GTUBE}
   Check Rspamc  ${result}  GTUBE (
+
+checkv3 custom metadata header via get_request_header
+  [Documentation]  Custom field in the metadata "headers" sub-object is retrievable via task:get_request_header
+  &{V3_HDRS} =  Create Dictionary  X-V3-Custom=hello-from-meta
+  &{V3_META} =  Create Dictionary  headers=${V3_HDRS}
+  Scan File V3  ${MESSAGE}  metadata=${V3_META}
+  Expect Symbol With Option  TEST_V3_META_HEADER  hello-from-meta
+
+checkv3 metadata fields via get_metadata and get_metadata_field
+  [Documentation]  Arbitrary top-level metadata fields are readable via task:get_metadata()/get_metadata_field()
+  &{V3_META} =  Create Dictionary  custom_field=meta-value-42
+  Scan File V3  ${MESSAGE}  metadata=${V3_META}
+  Expect Symbol With Option  TEST_V3_META_FIELD  meta-value-42
+  Expect Symbol With Option  TEST_V3_META_FIELD_LOOKUP  meta-value-42
+
+checkv3 via rspamc with metadata-header
+  [Documentation]  rspamc --metadata-header injects a metadata header retrievable via task:get_request_header
+  ${result} =  Run Rspamc  -p  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_NORMAL}  --protocol-v3
+  ...  --metadata-header=X-V3-Custom=from-rspamc  ${MESSAGE}
+  Check Rspamc  ${result}  TEST_V3_META_HEADER (
index 6e270432a5d6a96be5523efbff2b2b0b99aed2d4..4b03265876d64e11e7bfe630c4be303abd640a30 100644 (file)
@@ -43,6 +43,9 @@ lua = "{= env.TESTDIR =}/lua/external_relay.lua"
 # 109_composites_postfilter
 lua = "{= env.TESTDIR =}/lua/composites_postfilter.lua"
 
+# 430_checkv3 custom metadata
+lua = "{= env.TESTDIR =}/lua/checkv3_meta.lua"
+
 
 .include(priority=1,duplicate=merge) "{= env.TESTDIR =}/configs/merged-local.conf"
 .include(priority=2,duplicate=replace) "{= env.TESTDIR =}/configs/merged-override.conf"
diff --git a/test/functional/lua/checkv3_meta.lua b/test/functional/lua/checkv3_meta.lua
new file mode 100644 (file)
index 0000000..3e001db
--- /dev/null
@@ -0,0 +1,39 @@
+-- Symbols exercising the /checkv3 custom-metadata feature.
+--
+-- Option A: a custom field carried in the metadata "headers" sub-object is
+--           exposed as a task request header (task:get_request_header).
+-- Option B: arbitrary metadata fields are readable via task:get_metadata()
+--           and task:get_metadata_field(key).
+--
+-- All callbacks are no-ops unless their specific field is present, so the
+-- symbols stay inert for every other suite sharing the merged config.
+
+rspamd_config:register_symbol({
+  name = 'TEST_V3_META_HEADER',
+  score = 1.0,
+  callback = function(task)
+    local h = task:get_request_header('X-V3-Custom')
+    if not h then return end
+    return true, tostring(h)
+  end
+})
+
+rspamd_config:register_symbol({
+  name = 'TEST_V3_META_FIELD',
+  score = 1.0,
+  callback = function(task)
+    local meta = task:get_metadata()
+    if not meta or not meta.custom_field then return end
+    return true, tostring(meta.custom_field)
+  end
+})
+
+rspamd_config:register_symbol({
+  name = 'TEST_V3_META_FIELD_LOOKUP',
+  score = 1.0,
+  callback = function(task)
+    local v = task:get_metadata_field('custom_field')
+    if not v then return end
+    return true, tostring(v)
+  end
+})