static const char *execute = nullptr;
static const char *sort = nullptr;
static const char **http_headers = nullptr;
+static const char **metadata_headers = nullptr;
static const char **exclude_patterns = nullptr;
static int weight = 0;
static int flag = 0;
"Write mime body of message with headers instead of just a scan's result", nullptr},
{"header", 0, 0, G_OPTION_ARG_STRING_ARRAY, &http_headers,
"Add custom HTTP header to query (can be repeated)", nullptr},
+ {"metadata-header", 0, 0, G_OPTION_ARG_STRING_ARRAY, &metadata_headers,
+ "Add custom field to v3 metadata headers as KEY=VALUE or KEY:VALUE (can be repeated)", nullptr},
{"exclude", 0, 0, G_OPTION_ARG_STRING_ARRAY, &exclude_patterns,
"Exclude specific glob patterns in file names (can be repeated)", nullptr},
{"sort", 0, 0, G_OPTION_ARG_STRING, &sort,
ucl_object_unref(flags_arr);
}
+ /*
+ * Custom metadata headers: carried in the metadata body part and
+ * exposed server-side via task:get_request_header(), free of the
+ * HTTP header size limit.
+ */
+ if (metadata_headers) {
+ ucl_object_t *hdrs_obj = ucl_object_typed_new(UCL_OBJECT);
+ unsigned int nhdrs = 0;
+
+ for (auto *mhdr = metadata_headers; *mhdr; mhdr++) {
+ std::string_view hdr_view{*mhdr};
+ auto delim_pos = std::find_if(std::begin(hdr_view), std::end(hdr_view),
+ [](auto c) { return c == ':' || c == '='; });
+ std::string key, val;
+
+ if (delim_pos == std::end(hdr_view)) {
+ key = std::string{hdr_view};
+ }
+ else {
+ auto off = std::distance(std::begin(hdr_view), delim_pos);
+ key = std::string{hdr_view.substr(0, off)};
+ val = std::string{hdr_view.substr(off + 1)};
+ }
+
+ if (!key.empty()) {
+ ucl_object_insert_key(hdrs_obj,
+ ucl_object_fromstring(val.c_str()),
+ key.c_str(), 0, true);
+ nhdrs++;
+ }
+ }
+
+ if (nhdrs > 0) {
+ ucl_object_insert_key(metadata, hdrs_obj, "headers", 0, false);
+ }
+ else {
+ ucl_object_unref(hdrs_obj);
+ }
+ }
+
rspamd_client_command_v3(conn, "checkv3", metadata, in,
rspamc_client_cb, cbdata, compressed,
msgpack_mode,
g_array_free(extra, TRUE);
}
+/*
+ * Inject a single metadata "headers" entry into the task request headers.
+ * The ftok structs point directly into the metadata UCL object (owned by
+ * task->meta for the whole task lifetime), so no copy of the bytes is needed.
+ * Lengths come from the UCL accessors, so embedded NULs (msgpack) are kept.
+ */
+static void
+rspamd_protocol_metadata_add_header(struct rspamd_task *task,
+ const char *key, gsize klen,
+ const ucl_object_t *val_obj)
+{
+ gsize vlen;
+ const char *val = ucl_object_tolstring(val_obj, &vlen);
+ rspamd_ftok_t *name_tok, *val_tok;
+
+ if (val == NULL) {
+ return;
+ }
+
+ name_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*name_tok));
+ val_tok = rspamd_mempool_alloc(task->task_pool, sizeof(*val_tok));
+ name_tok->begin = key;
+ name_tok->len = klen;
+ val_tok->begin = val;
+ val_tok->len = vlen;
+
+ rspamd_task_add_request_header(task, name_tok, val_tok);
+}
+
/*
* Handle metadata from a parsed UCL object for v3 protocol.
* Maps structured metadata fields to task fields.
}
}
+ /*
+ * headers (object: header-name -> string value, or array of strings when a
+ * name is repeated)
+ *
+ * Custom fields carried in the metadata body part are exposed as task
+ * request headers, so they are retrievable via task:get_request_header()
+ * exactly like v2 HTTP request headers - but without the HTTP header size
+ * limit, since the metadata travels in the multipart body.
+ *
+ * NB: task->request_headers is also the control channel that
+ * rspamd_task_load_message consults for message-loading directives
+ * (shm/file/path/dictionary/Content-Encoding...). Those reserved names are
+ * skipped here so client-supplied metadata can never collide with them.
+ */
+ elt = ucl_object_lookup(metadata, "headers");
+ if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+ static const char *reserved_hdrs[] = {
+ "shm", "shm-offset", "shm-length", "file", "path",
+ "dictionary", "compression", "content-encoding"};
+ ucl_object_iter_t it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+ gsize klen;
+ const char *key = ucl_object_keyl(cur, &klen);
+ gboolean reserved = FALSE;
+ unsigned int i;
+
+ if (key == NULL || klen == 0) {
+ continue;
+ }
+
+ for (i = 0; i < G_N_ELEMENTS(reserved_hdrs); i++) {
+ if (strlen(reserved_hdrs[i]) == klen &&
+ rspamd_lc_cmp(key, reserved_hdrs[i], klen) == 0) {
+ reserved = TRUE;
+ break;
+ }
+ }
+
+ if (reserved) {
+ msg_info_protocol("ignore reserved metadata header '%*s'",
+ (int) klen, key);
+ continue;
+ }
+
+ if (ucl_object_type(cur) == UCL_STRING) {
+ rspamd_protocol_metadata_add_header(task, key, klen, cur);
+ }
+ else if (ucl_object_type(cur) == UCL_ARRAY) {
+ /*
+ * A repeated header name is collapsed by the UCL parser into an
+ * array under that key; expand each string value into its own
+ * request header (request headers are multi-valued).
+ */
+ ucl_object_iter_t ait = NULL;
+ const ucl_object_t *aval;
+
+ while ((aval = ucl_object_iterate(cur, &ait, true)) != NULL) {
+ if (ucl_object_type(aval) == UCL_STRING) {
+ rspamd_protocol_metadata_add_header(task, key, klen, aval);
+ }
+ }
+ }
+ }
+ }
+
return TRUE;
}
return FALSE;
}
- rspamd_mempool_add_destructor(task->task_pool,
- (rspamd_mempool_destruct_t) ucl_object_unref,
- metadata_obj);
+ /*
+ * The task takes ownership of the metadata object; it is unref'd in
+ * rspamd_task_free. Keeping it alive for the whole task lifetime also
+ * exposes it to Lua via task:get_metadata()/get_metadata_field().
+ */
+ task->meta = metadata_obj;
/* Apply metadata to task */
if (!rspamd_protocol_handle_metadata(task, metadata_obj)) {
ucl_object_unref(task->settings);
}
+ if (task->meta != NULL) {
+ ucl_object_unref(task->meta);
+ }
+
if (task->settings_elt != NULL) {
REF_RELEASE(task->settings_elt);
}
const char *classifier; /**< Classifier to learn (if needed) */
struct rspamd_lang_detector *lang_det; /**< Languages detector */
struct rspamd_message *message;
+ ucl_object_t *meta; /**< custom metadata object from a checkv3 request (or NULL) */
/* ESMTP arguments from milter protocol */
GHashTable *mail_esmtp_args; /**< ESMTP arguments from MAIL FROM command */
*/
LUA_FUNCTION_DEF(task, lookup_settings);
+/***
+ * @method task:get_metadata()
+ * Gets the custom metadata object supplied with a /checkv3 multipart request.
+ * Returns nil for requests that carried no metadata part (e.g. /checkv2).
+ * @return {lua object|nil} lua object generated from the metadata UCL
+ */
+LUA_FUNCTION_DEF(task, get_metadata);
+
+/***
+ * @method task:get_metadata_field(key)
+ * Gets a single top-level field from the /checkv3 metadata object.
+ * @param {string} key optional; if omitted the whole metadata object is returned (mirrors lookup_settings)
+ * @return {lua object|nil} lua object generated from the metadata field
+ */
+LUA_FUNCTION_DEF(task, get_metadata_field);
+
/***
* @method task:get_settings_id()
* Get numeric hash of settings id if specified for this task. 0 is returned otherwise.
LUA_INTERFACE_DEF(task, set_settings),
LUA_INTERFACE_DEF(task, get_settings),
LUA_INTERFACE_DEF(task, lookup_settings),
+ LUA_INTERFACE_DEF(task, get_metadata),
+ LUA_INTERFACE_DEF(task, get_metadata_field),
LUA_INTERFACE_DEF(task, get_settings_id),
LUA_INTERFACE_DEF(task, set_settings_id),
LUA_INTERFACE_DEF(task, merge_and_apply_settings),
return 1;
}
+static int
+lua_task_get_metadata(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_task *task = lua_check_task(L, 1);
+
+ if (task != NULL) {
+
+ if (task->meta) {
+ return ucl_object_push_lua(L, task->meta, true);
+ }
+ else {
+ lua_pushnil(L);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static int
+lua_task_get_metadata_field(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_task *task = lua_check_task(L, 1);
+ const char *key = NULL;
+ const ucl_object_t *elt;
+
+ if (task != NULL) {
+
+ if (lua_isstring(L, 2)) {
+ key = lua_tostring(L, 2);
+ }
+
+ if (task->meta) {
+ if (key == NULL) {
+ return ucl_object_push_lua(L, task->meta, true);
+ }
+ else {
+ elt = ucl_object_lookup(task->meta, key);
+
+ if (elt) {
+ return ucl_object_push_lua(L, elt, true);
+ }
+ else {
+ lua_pushnil(L);
+ }
+ }
+ }
+ else {
+ lua_pushnil(L);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
static int
lua_task_get_settings_id(lua_State *L)
{
${result} = Run Rspamc -p -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_NORMAL} --protocol-v3
... --msgpack --key ${RSPAMD_KEY_PUB1} --settings=${SETTINGS_NOSYMBOLS} ${GTUBE}
Check Rspamc ${result} GTUBE (
+
+checkv3 custom metadata header via get_request_header
+ [Documentation] Custom field in the metadata "headers" sub-object is retrievable via task:get_request_header
+ &{V3_HDRS} = Create Dictionary X-V3-Custom=hello-from-meta
+ &{V3_META} = Create Dictionary headers=${V3_HDRS}
+ Scan File V3 ${MESSAGE} metadata=${V3_META}
+ Expect Symbol With Option TEST_V3_META_HEADER hello-from-meta
+
+checkv3 metadata fields via get_metadata and get_metadata_field
+ [Documentation] Arbitrary top-level metadata fields are readable via task:get_metadata()/get_metadata_field()
+ &{V3_META} = Create Dictionary custom_field=meta-value-42
+ Scan File V3 ${MESSAGE} metadata=${V3_META}
+ Expect Symbol With Option TEST_V3_META_FIELD meta-value-42
+ Expect Symbol With Option TEST_V3_META_FIELD_LOOKUP meta-value-42
+
+checkv3 via rspamc with metadata-header
+ [Documentation] rspamc --metadata-header injects a metadata header retrievable via task:get_request_header
+ ${result} = Run Rspamc -p -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_NORMAL} --protocol-v3
+ ... --metadata-header=X-V3-Custom=from-rspamc ${MESSAGE}
+ Check Rspamc ${result} TEST_V3_META_HEADER (
# 109_composites_postfilter
lua = "{= env.TESTDIR =}/lua/composites_postfilter.lua"
+# 430_checkv3 custom metadata
+lua = "{= env.TESTDIR =}/lua/checkv3_meta.lua"
+
.include(priority=1,duplicate=merge) "{= env.TESTDIR =}/configs/merged-local.conf"
.include(priority=2,duplicate=replace) "{= env.TESTDIR =}/configs/merged-override.conf"
--- /dev/null
+-- Symbols exercising the /checkv3 custom-metadata feature.
+--
+-- Option A: a custom field carried in the metadata "headers" sub-object is
+-- exposed as a task request header (task:get_request_header).
+-- Option B: arbitrary metadata fields are readable via task:get_metadata()
+-- and task:get_metadata_field(key).
+--
+-- All callbacks are no-ops unless their specific field is present, so the
+-- symbols stay inert for every other suite sharing the merged config.
+
+rspamd_config:register_symbol({
+ name = 'TEST_V3_META_HEADER',
+ score = 1.0,
+ callback = function(task)
+ local h = task:get_request_header('X-V3-Custom')
+ if not h then return end
+ return true, tostring(h)
+ end
+})
+
+rspamd_config:register_symbol({
+ name = 'TEST_V3_META_FIELD',
+ score = 1.0,
+ callback = function(task)
+ local meta = task:get_metadata()
+ if not meta or not meta.custom_field then return end
+ return true, tostring(meta.custom_field)
+ end
+})
+
+rspamd_config:register_symbol({
+ name = 'TEST_V3_META_FIELD_LOOKUP',
+ score = 1.0,
+ callback = function(task)
+ local v = task:get_metadata_field('custom_field')
+ if not v then return end
+ return true, tostring(v)
+ end
+})