]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Honor mime_utf8 option in INVALID_MSGID rule 6011/head
authorAlexander Moisseev <moiseev@mezonplus.ru>
Wed, 29 Apr 2026 07:42:53 +0000 (10:42 +0300)
committerAlexander Moisseev <moiseev@mezonplus.ru>
Wed, 29 Apr 2026 08:00:37 +0000 (11:00 +0300)
Two related issues caused INVALID_MSGID false positives on valid
EAI/SMTPUTF8 Message-IDs (RFC 6532):

* The sane_msgid regexp unconditionally rejected bytes \x80-\xff,
even when mime_utf8 was enabled. Relax the regexp in that case
while keeping structural checks intact.

* The configuration option was registered only as enable_mime_utf,
but the corresponding Lua API is rspamd_config:is_mime_utf8(),
so users naturally try enable_mime_utf8. That spelling silently
had no effect because the parser did not bind it to any field.
Register enable_mime_utf8 as an alias mapped to the same struct
field so configs using it actually take effect.

Add a functional test (configs/mid_utf8.conf, messages/mid_eai_utf8.eml,
cases/107_mid_utf8.robot) that exercises both fixes via the new
option name and verifies that structurally invalid Message-IDs are
still flagged.

Issue #6007

rules/regexp/headers.lua
src/libserver/cfg_rcl.cxx
test/functional/cases/107_mid_utf8.robot [new file with mode: 0644]
test/functional/configs/mid_utf8.conf [new file with mode: 0644]
test/functional/messages/mid_eai_utf8.eml [new file with mode: 0644]

index 1b5f55db16e66857098b0b829edb5a3a6a0efe47..0e0e17a031aec6e9692208625fd96f894e3bdd6b 100644 (file)
@@ -533,7 +533,13 @@ reconf['FORGED_MUA_POSTBOX_MSGID_UNKNOWN'] = {
 }
 
 -- Message id validity
-local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H'
+-- Permit non-ASCII bytes in EAI/SMTPUTF8 Message-IDs (RFC 6532) when mime_utf8 is enabled
+local sane_msgid
+if rspamd_config:is_mime_utf8() then
+  sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b]+\\@[^<>\\\\ \\t\\n\\r\\x0b]+>?\\s*$/H'
+else
+  sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H'
+end
 local msgid_comment = 'Message-Id=/\\(.*\\)/H'
 reconf['INVALID_MSGID'] = {
   re = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment),
index add33f39bc87bc7cbd7dc78dc92dae49136d8f12..8a472f42469379cb861fc31d0936a3483f51cd99 100644 (file)
@@ -2125,6 +2125,13 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
                                                                           G_STRUCT_OFFSET(struct rspamd_config, enable_mime_utf),
                                                                           0,
                                                                           "Enable UTF8 mode for mime");
+               /* Alias matching the Lua API name rspamd_config:is_mime_utf8() */
+               rspamd_rcl_add_default_handler(sub,
+                                                                          "enable_mime_utf8",
+                                                                          rspamd_rcl_parse_struct_boolean,
+                                                                          G_STRUCT_OFFSET(struct rspamd_config, enable_mime_utf),
+                                                                          0,
+                                                                          "Enable UTF8 mode for mime (alias of enable_mime_utf)");
                rspamd_rcl_add_default_handler(sub,
                                                                           "enable_url_rewrite",
                                                                           rspamd_rcl_parse_struct_boolean,
diff --git a/test/functional/cases/107_mid_utf8.robot b/test/functional/cases/107_mid_utf8.robot
new file mode 100644 (file)
index 0000000..f203a97
--- /dev/null
@@ -0,0 +1,28 @@
+*** Settings ***
+Suite Setup     Rspamd Setup
+Suite Teardown  Rspamd Teardown
+Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables       ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG}              ${RSPAMD_TESTDIR}/configs/mid_utf8.conf
+${RSPAMD_SCOPE}        Suite
+${RSPAMD_URL_TLD}      ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+${SETTINGS_MID_UTF8}   {symbols_enabled = [INVALID_MSGID,MISSING_MID]}
+
+*** Test Cases ***
+MID UTF8 - valid EAI Message-ID is not flagged as invalid
+  [Documentation]  RFC 6532: when enable_mime_utf8 is enabled, a Message-ID
+  ...              with a UTF-8 internationalized domain must be accepted.
+  Scan File  ${RSPAMD_TESTDIR}/messages/mid_eai_utf8.eml
+  ...  Settings=${SETTINGS_MID_UTF8}
+  Do Not Expect Symbol  INVALID_MSGID
+  Do Not Expect Symbol  MISSING_MID
+
+MID UTF8 - structurally broken Message-ID is still flagged
+  [Documentation]  Even with enable_mime_utf8 enabled, a Message-ID that
+  ...              violates structural rules (no @) must still be detected.
+  Scan File  ${RSPAMD_TESTDIR}/messages/fws_fp.eml
+  ...  Settings=${SETTINGS_MID_UTF8}
+  Expect Symbol With Score  INVALID_MSGID  1.70
diff --git a/test/functional/configs/mid_utf8.conf b/test/functional/configs/mid_utf8.conf
new file mode 100644 (file)
index 0000000..cedbc7e
--- /dev/null
@@ -0,0 +1,48 @@
+options = {
+       filters = ["regexp"]
+       url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat"
+       pidfile = "{= env.TMPDIR =}/rspamd.pid";
+       lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua";
+       # Enable UTF-8 mime mode (SMTPUTF8/EAI per RFC 6532).
+       # Use the alias name to also cover the option-name compatibility fix.
+       enable_mime_utf8 = true;
+       dns {
+               retransmits = 2;
+       }
+}
+logging = {
+       type = "file",
+       level = "debug"
+       filename = "{= env.TMPDIR =}/rspamd.log";
+}
+metric = {
+       name = "default",
+       actions = {
+               reject = 100500,
+       }
+       unknown_weight = 1
+}
+
+worker {
+       type = normal
+       bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+       count = 1
+       keypair {
+               pubkey = "{= env.KEY_PUB1 =}";
+               privkey = "{= env.KEY_PVT1 =}";
+       }
+       task_timeout = 10s;
+}
+
+worker {
+       type = controller
+       bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+       count = 1
+       secure_ip = ["127.0.0.1", "::1"];
+       stats_path = "{= env.TMPDIR =}/stats.ucl"
+}
+
+modules {
+       path = "{= env.TESTDIR =}/../../src/plugins/lua/"
+}
+lua = "{= env.INSTALLROOT =}/share/rspamd/rules/rspamd.lua"
diff --git a/test/functional/messages/mid_eai_utf8.eml b/test/functional/messages/mid_eai_utf8.eml
new file mode 100644 (file)
index 0000000..079df66
--- /dev/null
@@ -0,0 +1,12 @@
+From: Sender <sender@example.com>
+To: Receiver <receiver@example.com>
+Date: Fri, 5 Oct 2018 19:56:40 -0400
+Message-Id: <49faaad5-3d90-2713-583b-6cb8a5d06345@wildduck.รครครค.test>
+Subject: EAI/SMTPUTF8 message with internationalized Message-ID
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+This message has a valid Message-ID containing a UTF-8 internationalized
+domain (RFC 6532) and must not trigger INVALID_MSGID when
+enable_mime_utf8 is enabled.