]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Add tests and fix stuff
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 12 Sep 2025 15:33:13 +0000 (16:33 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 12 Sep 2025 15:33:13 +0000 (16:33 +0100)
src/libserver/re_cache.c
src/lua/lua_task.c
src/plugins/lua/multimap.lua
test/functional/cases/001_merged/270_selector.robot
test/functional/cases/270_regexp_maps.robot
test/functional/configs/maps/sa_selector_rules.map [new file with mode: 0644]
test/functional/configs/merged-local.conf
test/functional/configs/regexp_maps.conf
test/functional/messages/sa_full_boundary.eml [new file with mode: 0644]
test/functional/messages/sa_header_body_raw.eml [new file with mode: 0644]
test/functional/messages/url1.eml

index 06ba26528df032462aaa0fc6a9a63b5a14a93623..fc056ecd86b678c32ce5ba6d2da559a85f420142 100644 (file)
@@ -2013,6 +2013,13 @@ rspamd_re_cache_type_from_string(const char *str)
                        ret = RSPAMD_RE_MAX;
                        break;
                }
+
+               /* Fallback string checks for types not covered by the hash switch */
+               if (ret == RSPAMD_RE_MAX) {
+                       if (g_ascii_strcasecmp(str, "selector") == 0) {
+                               ret = RSPAMD_RE_SELECTOR;
+                       }
+               }
        }
        else {
                ret = RSPAMD_RE_MAX;
index 5085e6c2abe79e261405811fa22ab691b1896eff..a6eef5455cc46d5e549544732adae76d370a0860 100644 (file)
@@ -6294,69 +6294,82 @@ lua_task_process_regexp(lua_State *L)
        struct rspamd_lua_regexp *re = NULL;
        gboolean strong = FALSE;
        const char *type_str = NULL, *header_str = NULL, *selector_str = NULL;
-       gsize header_len = 0, selector_len = 0;
        GError *err = NULL;
        int ret = 0;
        enum rspamd_re_type type = RSPAMD_RE_BODY;
 
        /*
-        * - `re`* : regular expression object
-        * - `type`*: type of regular expression:
-        *   + `mime`: mime regexp
-        *   + `rawmime`: raw mime regexp
-        *   + `header`: header regexp
-        *   + `rawheader`: raw header expression
-        *   + `body`: raw body regexp
-        *   + `url`: url regexp
-        *   + `selector`: selector regexp
-        * - `header`: for header/rawheader/mimeheader regexp means the name of header
-        * - `selector`: for selector regexp means the selector name (registered in scope)
-        * - `strong`: case sensitive match for headers
+        * Two calling conventions are supported:
+        * 1) Table form: task:process_regexp({ re=..., type=..., header=?, selector=?, strong=? })
+        * 2) Positional: task:process_regexp(re, type, header_or_selector, strong)
         */
        if (task != NULL) {
-               if (!rspamd_lua_parse_table_arguments(L, 2, &err,
-                                                                                         RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT,
-                                                                                         "*re=U{regexp};*type=S;header=V;selector=V;strong=B",
-                                                                                         &re, &type_str,
-                                                                                         &header_len, &header_str,
-                                                                                         &selector_len, &selector_str,
-                                                                                         &strong)) {
-                       msg_err_task("cannot get parameters list: %e", err);
-
-                       if (err) {
-                               g_error_free(err);
-                       }
+               if (lua_type(L, 2) == LUA_TTABLE) {
+                       /* Table-based API */
+                       size_t header_len = 0, selector_len = 0;
+                       if (!rspamd_lua_parse_table_arguments(L, 2, &err,
+                                                                                                 RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT,
+                                                                                                 "*re=U{regexp};*type=S;header=V;selector=V;strong=B",
+                                                                                                 &re, &type_str,
+                                                                                                 &header_len, &header_str,
+                                                                                                 &selector_len, &selector_str,
+                                                                                                 &strong)) {
+                               msg_err_task("cannot get parameters list: %e", err);
 
-                       return luaL_error(L, "invalid arguments");
-               }
-               else {
-                       type = rspamd_re_cache_type_from_string(type_str);
+                               if (err) {
+                                       g_error_free(err);
+                               }
 
-                       if ((type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) && header_str == NULL) {
-                               msg_err_task(
-                                       "header argument is mandatory for header/rawheader regexps");
+                               return luaL_error(L, "invalid arguments");
                        }
                        else {
-                               const char *type_data = NULL;
-                               gsize type_len = 0;
+                               type = rspamd_re_cache_type_from_string(type_str);
 
-                               if (type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) {
-                                       type_data = header_str;
-                                       type_len = header_len;
+                               if ((type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) && header_str == NULL) {
+                                       msg_err_task(
+                                               "header argument is mandatory for header/rawheader regexps");
                                }
-                               else if (type == RSPAMD_RE_SELECTOR) {
-                                       if (selector_str == NULL) {
-                                               msg_err_task("selector argument is mandatory for selector regexps");
+                               else {
+                                       const char *type_data = NULL;
+                                       size_t type_len = 0;
+
+                                       if (type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) {
+                                               type_data = header_str;
+                                               type_len = header_str ? (strlen(header_str) + 1) : 0;
                                        }
-                                       else {
+                                       else if (type == RSPAMD_RE_SELECTOR) {
                                                type_data = selector_str;
-                                               type_len = selector_len;
+                                               type_len = selector_str ? (strlen(selector_str) + 1) : 0;
                                        }
+
+                                       ret = rspamd_re_cache_process(task, re->re, type,
+                                                                                                 (gpointer) type_data, type_len, strong);
                                }
+                       }
+               }
+               else {
+                       /* Positional API: (re, type, header_or_selector, strong) */
+                       re = lua_check_regexp(L, 2);
+                       type_str = luaL_checkstring(L, 3);
+                       type = rspamd_re_cache_type_from_string(type_str);
+                       const char *type_data = NULL;
+                       size_t type_len = 0;
+
+                       if (lua_type(L, 4) == LUA_TSTRING) {
+                               type_data = lua_tostring(L, 4);
+                               type_len = strlen(type_data) + 1;
+                       }
+                       if (lua_type(L, 5) == LUA_TBOOLEAN) {
+                               strong = lua_toboolean(L, 5);
+                       }
 
-                               ret = rspamd_re_cache_process(task, re->re, type,
-                                                                                         (gpointer) type_data, type_len, strong);
+                       /* For header/rawheader/mimeheader, arg4 is header; for selector, arg4 is selector name */
+                       if ((type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) && type_data == NULL) {
+                               msg_err_task("header argument is mandatory for header/rawheader regexps");
                        }
+
+                       ret = rspamd_re_cache_process(task, re->re, type,
+                                                                                 (gpointer) type_data, type_len, strong);
                }
        }
        else {
index 7e93ba058fcaef395404520837c475dd21936ac8..21f4e3e67bf59f8b3be03545d52c8a25bf87830d 100644 (file)
@@ -39,6 +39,12 @@ local sa_atoms = {}
 local sa_scores = {}
 local sa_meta_rules = {}
 local sa_descriptions = {}
+-- Cache meta callbacks to avoid recreating closures per message
+local sa_meta_callbacks = {}
+-- Keep atom definitions to optionally register them as standalone symbols
+local sa_atom_defs = {}
+-- Track atoms that we have promoted to real symbols (scored/with description)
+local scored_atom_symbols = {}
 
 -- Symbol state tracking for graceful map reloads
 -- States: 'available', 'loading', 'orphaned'
@@ -201,6 +207,9 @@ local function create_sa_atom_function(name, re, match_type, opts)
       ret = process_re_match(re, task, 'sabody')
     end
 
+    -- Normalize return to a number before any further logic
+    ret = tonumber(ret) or 0
+
     if opts and opts.negate then
       -- Negate the result for !~ operators
       ret = (ret > 0) and 0 or 1
@@ -264,6 +273,13 @@ local function process_sa_line(rule, line)
           negate = negate
         })
 
+        -- Save atom definition for potential symbol registration
+        sa_atom_defs[atom_name] = {
+          re = re,
+          match_type = 'header',
+          opts = { header = header_name, strong = false, negate = negate },
+        }
+
         -- Track atom state
         regexp_rules_symbol_states[atom_name] = {
           state = 'loading',
@@ -294,6 +310,12 @@ local function process_sa_line(rule, line)
 
         sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'body', {})
 
+        sa_atom_defs[atom_name] = {
+          re = re,
+          match_type = 'body',
+          opts = {},
+        }
+
         -- Track atom state
         regexp_rules_symbol_states[atom_name] = {
           state = 'loading',
@@ -323,6 +345,12 @@ local function process_sa_line(rule, line)
 
         sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'rawbody', {})
 
+        sa_atom_defs[atom_name] = {
+          re = re,
+          match_type = 'rawbody',
+          opts = {},
+        }
+
         -- Track atom state
         regexp_rules_symbol_states[atom_name] = {
           state = 'loading',
@@ -352,6 +380,12 @@ local function process_sa_line(rule, line)
 
         sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'uri', {})
 
+        sa_atom_defs[atom_name] = {
+          re = re,
+          match_type = 'uri',
+          opts = {},
+        }
+
         -- Track atom state
         regexp_rules_symbol_states[atom_name] = {
           state = 'loading',
@@ -381,6 +415,12 @@ local function process_sa_line(rule, line)
 
         sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'full', {})
 
+        sa_atom_defs[atom_name] = {
+          re = re,
+          match_type = 'full',
+          opts = {},
+        }
+
         -- Track atom state
         regexp_rules_symbol_states[atom_name] = {
           state = 'loading',
@@ -443,6 +483,12 @@ local function process_sa_line(rule, line)
           negate = negate,
         })
 
+        sa_atom_defs[atom_name] = {
+          re = re,
+          match_type = 'selector',
+          opts = { selector = atom_name, negate = negate },
+        }
+
         -- Track atom state consistent with scoped regexps
         regexp_rules_symbol_states[atom_name] = {
           state = 'loading',
@@ -484,6 +530,28 @@ local function process_sa_line(rule, line)
       if score_value then
         sa_scores[score_symbol] = score_value
         lua_util.debugm(N, rspamd_config, 'added SA score: %s = %s', score_symbol, score_value)
+
+        -- If this score applies to an atom (not meta) and the atom exists,
+        -- promote it to a standalone symbol so it can appear in results and be checked via task:has_symbol
+        if sa_atom_defs[score_symbol] and not scored_atom_symbols[score_symbol] then
+          local adef = sa_atom_defs[score_symbol]
+          local id = rspamd_config:register_symbol({
+            name = score_symbol,
+            weight = score_value,
+            callback = create_sa_atom_function(score_symbol, adef.re, adef.match_type, adef.opts or {}),
+            type = 'normal',
+            flags = 'one_shot',
+            augmentations = {},
+          })
+          rspamd_config:set_metric_symbol({
+            name = score_symbol,
+            score = score_value,
+            description = sa_descriptions[score_symbol] or ('SA atom ' .. score_symbol),
+            group = N,
+          })
+          scored_atom_symbols[score_symbol] = id or true
+          lua_util.debugm(N, rspamd_config, 'promoted SA atom %s to symbol with score %s', score_symbol, score_value)
+        end
       end
     end
   elseif words[1] == 'describe' then
@@ -543,27 +611,37 @@ local function gen_sa_process_atom_cb(task, rule_name)
       end
     end
 
-    local atom_cb = sa_atoms[atom]
+    -- Meta must depend on symbols; atom is considered present if it is a registered symbol hit
+    if task:has_symbol(atom) then
+      return 1
+    end
 
-    if atom_cb then
-      local res = atom_cb(task)
+    -- Also allow meta to reference another meta by name via cached callback
+    local meta_cb = sa_meta_callbacks[atom]
+    if meta_cb then
+      local res = meta_cb(task)
+      return res or 0
+    end
 
-      -- Return result without logging each atom
-      return res
-    else
-      -- Check if this is a SA meta rule
-      local meta_rule = sa_meta_rules[atom]
-      if meta_rule then
-        local meta_cb = create_sa_meta_callback(meta_rule)
-        local res = meta_cb(task)
-        return res or 0
+    -- Finally, evaluate atom via cached atom callback if present (and cache per-task)
+    local atom_cb = sa_atoms[atom]
+    if atom_cb then
+      local atoms_cache = task:cache_get('sa_multimap_atoms_evaluated')
+      if not atoms_cache then
+        atoms_cache = {}
+        task:cache_set('sa_multimap_atoms_evaluated', atoms_cache)
       end
 
-      -- External atom - check if task has this symbol
-      if task:has_symbol(atom) then
-        return 1
+      local cached_res = atoms_cache[atom]
+      if cached_res ~= nil then
+        return cached_res
       end
+
+      local res = atom_cb(task) or 0
+      atoms_cache[atom] = res
+      return res
     end
+
     return 0
   end
 end
@@ -611,12 +689,16 @@ create_sa_meta_callback = function(meta_rule)
     local already_processed = cached[meta_rule.symbol]
 
     if not (already_processed and already_processed['default']) then
-      local expression = rspamd_expression.create(meta_rule.expression,
-        parse_sa_atom,
-        rspamd_config:get_mempool())
+      local expression = meta_rule.compiled_expression
       if not expression then
-        rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression)
-        return
+        expression = rspamd_expression.create(meta_rule.expression,
+          parse_sa_atom,
+          rspamd_config:get_mempool())
+        if not expression then
+          rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression)
+          return
+        end
+        meta_rule.compiled_expression = expression
       end
 
       local function exec_symbol(cur_res)
@@ -659,16 +741,31 @@ local function finalize_sa_rules()
     fun.length(sa_meta_rules))
 
   for meta_name, meta_rule in pairs(sa_meta_rules) do
+    -- Precompile expression at load time
+    if not meta_rule.compiled_expression then
+      local compiled = rspamd_expression.create(meta_rule.expression,
+        parse_sa_atom,
+        rspamd_config:get_mempool())
+      if not compiled then
+        rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression during finalize: %s', meta_rule.expression)
+        goto continue_meta
+      end
+      meta_rule.compiled_expression = compiled
+    end
+
     local score = sa_scores[meta_name] or 1.0
     local description = sa_descriptions[meta_name] or ('multimap symbol ' .. meta_name)
 
     lua_util.debugm(N, rspamd_config, 'Registering SA meta rule %s (score: %s, expression: %s)',
       meta_name, score, meta_rule.expression)
 
+    local meta_cb = create_sa_meta_callback(meta_rule)
+    sa_meta_callbacks[meta_name] = meta_cb
+
     local id = rspamd_config:register_symbol({
       name = meta_name,
       weight = score,
-      callback = create_sa_meta_callback(meta_rule),
+      callback = meta_cb,
       type = 'normal',
       flags = 'one_shot',
       augmentations = {},
@@ -700,6 +797,7 @@ local function finalize_sa_rules()
 
     lua_util.debugm(N, rspamd_config, 'registered SA meta symbol: %s (score: %s)',
       meta_name, score)
+    ::continue_meta::
   end
 
   -- Mark orphaned symbols - only check meta symbols (not atoms) since atoms are just expression parts
index fa3ab8753a7b2d560d62fbc8105d5e2341116f75..91daba8936e5d5b377183c799b4810facd630b7e 100644 (file)
@@ -5,9 +5,11 @@ Variables       ${RSPAMD_TESTDIR}/lib/vars.py
 
 *** Variables ***
 ${MESSAGE}         ${RSPAMD_TESTDIR}/messages/subject1.eml
+${MSG_SPAM}        ${RSPAMD_TESTDIR}/messages/spam_message.eml
+${MSG_URL1}        ${RSPAMD_TESTDIR}/messages/url1.eml
 
 *** Test Cases ***
-Newlines 
+Newlines
   Scan File  ${MESSAGE}  User=test@user.com  Pass=all
   ...  Settings={symbols_enabled = [CONFIG_SELECTOR_RE_RCPT_SUBJECT, LUA_SELECTOR_RE]}
   Expect Symbol  CONFIG_SELECTOR_RE_RCPT_SUBJECT
@@ -17,3 +19,63 @@ Rspamd_text selector
   Scan File  ${MESSAGE}
   ...  Settings={symbols_enabled = [RSPAMD_TEXT_SELECTOR]}
   Expect Symbol  RSPAMD_TEXT_SELECTOR
+
+# SA-like regexp_rules: header/body/rawbody/uri/full/selector/meta
+SA Header Atom
+  Scan File  ${MSG_SPAM}
+  ...   Settings={symbols_enabled = [SA_HDR_SUBJ]}
+  Expect Symbol  SA_HDR_SUBJ
+
+SA Body Atom
+  Scan File  ${MSG_SPAM}
+  ...   Settings={symbols_enabled = [SA_BODY_SIMPLE]}
+  Expect Symbol  SA_BODY_SIMPLE
+
+SA Rawbody Atom
+  Scan File  ${MSG_SPAM}
+  ...   Settings={symbols_enabled = [SA_RAW_SIMPLE]}
+  Expect Symbol  SA_RAW_SIMPLE
+
+SA URI Atom
+  Scan File  ${MSG_URL1}
+  ...   Settings={symbols_enabled = [SA_URI_SHORT]}
+  Expect Symbol  SA_URI_SHORT
+
+SA Full Atom
+  Scan File  ${MSG_SPAM}
+  ...   Settings={symbols_enabled = [SA_FULL_BOUNDARY]}
+  Expect Symbol  SA_FULL_BOUNDARY
+
+SA Selector Atom (From Domain)
+  Scan File  ${MSG_SPAM}
+  ...   From=user@example.com
+  ...   Settings={symbols_enabled = [SA_SEL_FROM_DOM]}
+  Expect Symbol  SA_SEL_FROM_DOM
+
+SA Selector Atom (URL TLD)
+  Scan File  ${MSG_URL1}
+  ...   Settings={symbols_enabled = [SA_SEL_URL_TLD]}
+  Expect Symbol  SA_SEL_URL_TLD
+
+SA Selector Negation
+  Scan File  ${MSG_SPAM}
+  ...   From=user@example.com
+  ...   Settings={symbols_enabled = [SA_SEL_NOT_CORP]}
+  Expect Symbol  SA_SEL_NOT_CORP
+
+SA Meta AND
+  Scan File  ${MSG_SPAM}
+  ...   From=user@example.com
+  ...   Settings={symbols_enabled = [SA_META_AND, SA_HDR_SUBJ, SA_BODY_SIMPLE, SA_SEL_FROM_DOM]}
+  Expect Symbol  SA_META_AND
+
+SA Meta OR
+  Scan File  ${MSG_URL1}
+  ...   Settings={symbols_enabled = [SA_META_OR, SA_URI_SHORT, SA_SEL_URL_TLD]}
+  Expect Symbol  SA_META_OR
+
+SA Meta Complex
+  Scan File  ${MSG_SPAM}
+  ...   From=user@example.com
+  ...   Settings={symbols_enabled = [SA_META_COMPLEX, SA_RAW_SIMPLE, SA_SEL_NOT_CORP]}
+  Expect Symbol  SA_META_COMPLEX
index d89143bc8967e866345d76f9866452d40c52f3d3..f4a2565609105b26b55cb364d72ef27049f8ad01 100644 (file)
@@ -1,6 +1,6 @@
 *** Settings ***
-Test Setup      Rspamd Setup
-Test Teardown   Rspamd Teardown
+Suite Setup     Rspamd Setup
+Suite Teardown  Rspamd Teardown
 Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
 Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
 Variables       ${RSPAMD_TESTDIR}/lib/vars.py
@@ -8,8 +8,10 @@ Variables       ${RSPAMD_TESTDIR}/lib/vars.py
 *** Variables ***
 ${CONFIG}          ${RSPAMD_TESTDIR}/configs/regexp_maps.conf
 ${MESSAGE1}        ${RSPAMD_TESTDIR}/messages/advance_fee_fraud.eml
-${MESSAGE2}        ${RSPAMD_TESTDIR}/messages/spam_message.eml
-${RSPAMD_SCOPE}    Test
+${MESSAGE2}        ${RSPAMD_TESTDIR}/messages/sa_header_body_raw.eml
+${FULLMSG}         ${RSPAMD_TESTDIR}/messages/sa_full_boundary.eml
+${URL1}            ${RSPAMD_TESTDIR}/messages/url1.eml
+${RSPAMD_SCOPE}    Suite
 ${RSPAMD_URL_TLD}  ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
 
 *** Test Cases ***
@@ -46,3 +48,58 @@ Atom Rules Availability
     # We test by ensuring the meta rules work correctly
     Expect Symbol  ADVANCE_FEE_2
     Expect Symbol  ADVANCE_FEE_3
+
+SA-Like: Header Atom
+    [Documentation]    Header regexp atom works (SA_HDR_SUBJ)
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_HDR_SUBJ
+
+SA-Like: Body Atom
+    [Documentation]    Body regexp atom works (SA_BODY_SIMPLE)
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_BODY_SIMPLE
+
+SA-Like: Rawbody Atom
+    [Documentation]    Rawbody regexp atom works (SA_RAW_SIMPLE)
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_RAW_SIMPLE
+
+SA-Like: URI Atom
+    [Documentation]    URI regexp atom works (SA_URI_SHORT)
+    Scan File  ${URL1}
+    Expect Symbol  SA_URI_SHORT
+
+SA-Like: Full Atom
+    [Documentation]    Full message regexp atom works (SA_FULL_BOUNDARY)
+    Scan File  ${FULLMSG}
+    Expect Symbol  SA_FULL_BOUNDARY
+
+SA-Like: Selector From Domain
+    [Documentation]    Selector-based atom (from:domain) works (SA_SEL_FROM_DOM)
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_SEL_FROM_DOM
+
+SA-Like: Selector URL TLD
+    [Documentation]    Selector-based atom (specific_urls:tld) works (SA_SEL_URL_TLD)
+    Scan File  ${URL1}
+    Expect Symbol  SA_SEL_URL_TLD
+
+SA-Like: Selector Negation
+    [Documentation]    Selector negation works (SA_SEL_NOT_CORP)
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_SEL_NOT_CORP
+
+SA-Like: Meta AND
+    [Documentation]    Meta rule with AND over header+body+selector
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_META_AND
+
+SA-Like: Meta OR
+    [Documentation]    Meta rule with OR over uri+selector
+    Scan File  ${URL1}
+    Expect Symbol  SA_META_OR
+
+SA-Like: Meta Complex
+    [Documentation]    Complex meta combining negation and rawbody
+    Scan File  ${MESSAGE2}
+    Expect Symbol  SA_META_COMPLEX
diff --git a/test/functional/configs/maps/sa_selector_rules.map b/test/functional/configs/maps/sa_selector_rules.map
new file mode 100644 (file)
index 0000000..5088dda
--- /dev/null
@@ -0,0 +1,56 @@
+# SA-like rules set to test selector/header/body/rawbody/uri/full/meta
+
+# Header atom
+header SA_HDR_SUBJ Subject =~ /test\s+subject/i
+
+# Body atom
+body SA_BODY_SIMPLE /hello\s+world/i
+
+# Rawbody atom
+rawbody SA_RAW_SIMPLE /RAW\s+TOKEN/i
+
+# URI atom
+uri SA_URI_SHORT /bit\.ly|tinyurl/i
+
+# Full message atom
+full SA_FULL_BOUNDARY /From:.*\n(?:.*\n)*?Subject:/i
+
+# Selector atom: from domain equals example.com
+selector SA_SEL_FROM_DOM from:domain =~ /^example\.com$/i
+
+# Selector atom: specific_urls tld equals example.com
+selector SA_SEL_URL_TLD specific_urls({need_content = true, limit = 10}):get_tld =~ /^example\.com$/i
+
+# Selector negation (use domain to avoid addr formatting quirks)
+selector SA_SEL_NOT_CORP from:domain !~ /^corp\.example$/i
+
+# Meta rules combining atoms and selectors
+meta SA_META_AND SA_HDR_SUBJ & SA_BODY_SIMPLE & SA_SEL_FROM_DOM
+meta SA_META_OR SA_URI_SHORT | SA_SEL_URL_TLD
+meta SA_META_COMPLEX (SA_BODY_SIMPLE & SA_SEL_NOT_CORP) | SA_RAW_SIMPLE
+
+# Scores
+score SA_HDR_SUBJ 1.0
+score SA_BODY_SIMPLE 1.0
+score SA_RAW_SIMPLE 1.0
+score SA_URI_SHORT 1.0
+score SA_FULL_BOUNDARY 0.5
+score SA_SEL_FROM_DOM 1.0
+score SA_SEL_URL_TLD 1.0
+score SA_SEL_NOT_CORP 0.5
+score SA_META_AND 2.5
+score SA_META_OR 2.0
+score SA_META_COMPLEX 2.0
+
+# Descriptions
+describe SA_HDR_SUBJ Subject matches test subject
+describe SA_BODY_SIMPLE Body contains hello world
+describe SA_RAW_SIMPLE Raw body contains RAW TOKEN
+describe SA_URI_SHORT Message contains a shortener URL
+describe SA_FULL_BOUNDARY Header boundary present
+describe SA_SEL_FROM_DOM From domain equals example.com
+describe SA_SEL_URL_TLD URL tld equals example.com
+describe SA_SEL_NOT_CORP From address is not corp.example
+describe SA_META_AND Header+Body+Selector combo
+describe SA_META_OR URI or URL TLD selector
+describe SA_META_COMPLEX Complex combination with negation
index f70c4f865077cd4f6264ef4f5ee217c2365b1c1d..1d3b17cef28738190db43b4b2ef78ff337e03864 100644 (file)
@@ -72,6 +72,15 @@ logging = {
   log_usec = true;
 }
 
+# Add SA-like regexp_rules (selector/header/uri/full/meta) for merged suite
+multimap {
+  SA_SELECTOR_RULES {
+    type = "regexp_rules";
+    map = "{= env.TESTDIR =}/configs/maps/sa_selector_rules.map";
+    description = "SA-like selector/header/uri/full tests";
+  }
+}
+
 mid = {
     source = {
         url = [
index be4cd047b39803a321739c940d2de9f5c803ea59..730b9dd7c6e2d034a24db3625ada6e250c22f2c8 100644 (file)
@@ -1,5 +1,10 @@
 .include(duplicate=append,priority=0) "{= env.TESTDIR =}/configs/plugins.conf"
 
+# Ensure effective TLD data is loaded for URL/selector tests
+options {
+  url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat";
+}
+
 # Configure multimap for regexp rules testing
 multimap {
   ADVANCE_FEE_SA_RULES {
@@ -8,6 +13,11 @@ multimap {
     scope = "advance_fee_scope";
     description = "Advance fee fraud detection rules";
   }
+  SA_SELECTOR_RULES {
+    type = "regexp_rules";
+    map = "{= env.TESTDIR =}/configs/maps/sa_selector_rules.map";
+    description = "SA-like selector/header/uri/full tests";
+  }
 }
 
 # Override symbol scores for testing
diff --git a/test/functional/messages/sa_full_boundary.eml b/test/functional/messages/sa_full_boundary.eml
new file mode 100644 (file)
index 0000000..3ff8be6
--- /dev/null
@@ -0,0 +1,19 @@
+From: someone@example.com
+To: test@example.com
+Subject: something else
+Date: Fri, 01 Jan 2021 00:00:00 +0000
+Message-ID: <sa2@example.com>
+MIME-Version: 1.0
+Content-Type: multipart/alternative; boundary="bnd123"
+
+--bnd123
+Content-Type: text/plain; charset=UTF-8
+
+Plain part
+
+--bnd123
+Content-Type: text/html; charset=UTF-8
+
+<html><body>HTML part</body></html>
+
+--bnd123--
diff --git a/test/functional/messages/sa_header_body_raw.eml b/test/functional/messages/sa_header_body_raw.eml
new file mode 100644 (file)
index 0000000..96d01d3
--- /dev/null
@@ -0,0 +1,11 @@
+From: user@example.com
+To: test@example.com
+Subject: test subject
+Date: Fri, 01 Jan 2021 00:00:00 +0000
+Message-ID: <sa1@example.com>
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 7bit
+
+hello world
+This message body also contains a RAW TOKEN marker for rawbody matching.
index 42361ae2d410faab77b20d3c58ac1507954b6563..f6cd306932e3d1cdf4c0cf3d97606d038cf09a12 100644 (file)
@@ -1,3 +1,14 @@
-Content-Type: text/plain
+From: user@example.com
+To: test@example.com
+Subject: Links test
+Date: Fri, 01 Jan 2021 00:00:00 +0000
+Message-ID: <url1@example.com>
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 7bit
 
-hello https://www.example.com/foo?a=b hello
+Here is a shortener URL to match SA_URI_SHORT:
+https://tinyurl.com/abc123
+
+And here is a normal URL with TLD example.com to match SA_SEL_URL_TLD:
+https://www.example.com/foo?a=b