From: Vsevolod Stakhov Date: Fri, 12 Sep 2025 15:33:13 +0000 (+0100) Subject: [Project] Add tests and fix stuff X-Git-Tag: 3.13.0~11^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ecc587a3df77f59dc2b15b6ca7dcb516c0db81f1;p=thirdparty%2Frspamd.git [Project] Add tests and fix stuff --- diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 06ba26528d..fc056ecd86 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -2013,6 +2013,13 @@ rspamd_re_cache_type_from_string(const char *str) ret = RSPAMD_RE_MAX; break; } + + /* Fallback string checks for types not covered by the hash switch */ + if (ret == RSPAMD_RE_MAX) { + if (g_ascii_strcasecmp(str, "selector") == 0) { + ret = RSPAMD_RE_SELECTOR; + } + } } else { ret = RSPAMD_RE_MAX; diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 5085e6c2ab..a6eef5455c 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -6294,69 +6294,82 @@ lua_task_process_regexp(lua_State *L) struct rspamd_lua_regexp *re = NULL; gboolean strong = FALSE; const char *type_str = NULL, *header_str = NULL, *selector_str = NULL; - gsize header_len = 0, selector_len = 0; GError *err = NULL; int ret = 0; enum rspamd_re_type type = RSPAMD_RE_BODY; /* - * - `re`* : regular expression object - * - `type`*: type of regular expression: - * + `mime`: mime regexp - * + `rawmime`: raw mime regexp - * + `header`: header regexp - * + `rawheader`: raw header expression - * + `body`: raw body regexp - * + `url`: url regexp - * + `selector`: selector regexp - * - `header`: for header/rawheader/mimeheader regexp means the name of header - * - `selector`: for selector regexp means the selector name (registered in scope) - * - `strong`: case sensitive match for headers + * Two calling conventions are supported: + * 1) Table form: task:process_regexp({ re=..., type=..., header=?, selector=?, strong=? }) + * 2) Positional: task:process_regexp(re, type, header_or_selector, strong) */ if (task != NULL) { - if (!rspamd_lua_parse_table_arguments(L, 2, &err, - RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, - "*re=U{regexp};*type=S;header=V;selector=V;strong=B", - &re, &type_str, - &header_len, &header_str, - &selector_len, &selector_str, - &strong)) { - msg_err_task("cannot get parameters list: %e", err); - - if (err) { - g_error_free(err); - } + if (lua_type(L, 2) == LUA_TTABLE) { + /* Table-based API */ + size_t header_len = 0, selector_len = 0; + if (!rspamd_lua_parse_table_arguments(L, 2, &err, + RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, + "*re=U{regexp};*type=S;header=V;selector=V;strong=B", + &re, &type_str, + &header_len, &header_str, + &selector_len, &selector_str, + &strong)) { + msg_err_task("cannot get parameters list: %e", err); - return luaL_error(L, "invalid arguments"); - } - else { - type = rspamd_re_cache_type_from_string(type_str); + if (err) { + g_error_free(err); + } - if ((type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) && header_str == NULL) { - msg_err_task( - "header argument is mandatory for header/rawheader regexps"); + return luaL_error(L, "invalid arguments"); } else { - const char *type_data = NULL; - gsize type_len = 0; + type = rspamd_re_cache_type_from_string(type_str); - if (type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) { - type_data = header_str; - type_len = header_len; + if ((type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) && header_str == NULL) { + msg_err_task( + "header argument is mandatory for header/rawheader regexps"); } - else if (type == RSPAMD_RE_SELECTOR) { - if (selector_str == NULL) { - msg_err_task("selector argument is mandatory for selector regexps"); + else { + const char *type_data = NULL; + size_t type_len = 0; + + if (type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) { + type_data = header_str; + type_len = header_str ? (strlen(header_str) + 1) : 0; } - else { + else if (type == RSPAMD_RE_SELECTOR) { type_data = selector_str; - type_len = selector_len; + type_len = selector_str ? (strlen(selector_str) + 1) : 0; } + + ret = rspamd_re_cache_process(task, re->re, type, + (gpointer) type_data, type_len, strong); } + } + } + else { + /* Positional API: (re, type, header_or_selector, strong) */ + re = lua_check_regexp(L, 2); + type_str = luaL_checkstring(L, 3); + type = rspamd_re_cache_type_from_string(type_str); + const char *type_data = NULL; + size_t type_len = 0; + + if (lua_type(L, 4) == LUA_TSTRING) { + type_data = lua_tostring(L, 4); + type_len = strlen(type_data) + 1; + } + if (lua_type(L, 5) == LUA_TBOOLEAN) { + strong = lua_toboolean(L, 5); + } - ret = rspamd_re_cache_process(task, re->re, type, - (gpointer) type_data, type_len, strong); + /* For header/rawheader/mimeheader, arg4 is header; for selector, arg4 is selector name */ + if ((type == RSPAMD_RE_HEADER || type == RSPAMD_RE_RAWHEADER || type == RSPAMD_RE_MIMEHEADER) && type_data == NULL) { + msg_err_task("header argument is mandatory for header/rawheader regexps"); } + + ret = rspamd_re_cache_process(task, re->re, type, + (gpointer) type_data, type_len, strong); } } else { diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua index 7e93ba058f..21f4e3e67b 100644 --- a/src/plugins/lua/multimap.lua +++ b/src/plugins/lua/multimap.lua @@ -39,6 +39,12 @@ local sa_atoms = {} local sa_scores = {} local sa_meta_rules = {} local sa_descriptions = {} +-- Cache meta callbacks to avoid recreating closures per message +local sa_meta_callbacks = {} +-- Keep atom definitions to optionally register them as standalone symbols +local sa_atom_defs = {} +-- Track atoms that we have promoted to real symbols (scored/with description) +local scored_atom_symbols = {} -- Symbol state tracking for graceful map reloads -- States: 'available', 'loading', 'orphaned' @@ -201,6 +207,9 @@ local function create_sa_atom_function(name, re, match_type, opts) ret = process_re_match(re, task, 'sabody') end + -- Normalize return to a number before any further logic + ret = tonumber(ret) or 0 + if opts and opts.negate then -- Negate the result for !~ operators ret = (ret > 0) and 0 or 1 @@ -264,6 +273,13 @@ local function process_sa_line(rule, line) negate = negate }) + -- Save atom definition for potential symbol registration + sa_atom_defs[atom_name] = { + re = re, + match_type = 'header', + opts = { header = header_name, strong = false, negate = negate }, + } + -- Track atom state regexp_rules_symbol_states[atom_name] = { state = 'loading', @@ -294,6 +310,12 @@ local function process_sa_line(rule, line) sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'body', {}) + sa_atom_defs[atom_name] = { + re = re, + match_type = 'body', + opts = {}, + } + -- Track atom state regexp_rules_symbol_states[atom_name] = { state = 'loading', @@ -323,6 +345,12 @@ local function process_sa_line(rule, line) sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'rawbody', {}) + sa_atom_defs[atom_name] = { + re = re, + match_type = 'rawbody', + opts = {}, + } + -- Track atom state regexp_rules_symbol_states[atom_name] = { state = 'loading', @@ -352,6 +380,12 @@ local function process_sa_line(rule, line) sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'uri', {}) + sa_atom_defs[atom_name] = { + re = re, + match_type = 'uri', + opts = {}, + } + -- Track atom state regexp_rules_symbol_states[atom_name] = { state = 'loading', @@ -381,6 +415,12 @@ local function process_sa_line(rule, line) sa_atoms[atom_name] = create_sa_atom_function(atom_name, re, 'full', {}) + sa_atom_defs[atom_name] = { + re = re, + match_type = 'full', + opts = {}, + } + -- Track atom state regexp_rules_symbol_states[atom_name] = { state = 'loading', @@ -443,6 +483,12 @@ local function process_sa_line(rule, line) negate = negate, }) + sa_atom_defs[atom_name] = { + re = re, + match_type = 'selector', + opts = { selector = atom_name, negate = negate }, + } + -- Track atom state consistent with scoped regexps regexp_rules_symbol_states[atom_name] = { state = 'loading', @@ -484,6 +530,28 @@ local function process_sa_line(rule, line) if score_value then sa_scores[score_symbol] = score_value lua_util.debugm(N, rspamd_config, 'added SA score: %s = %s', score_symbol, score_value) + + -- If this score applies to an atom (not meta) and the atom exists, + -- promote it to a standalone symbol so it can appear in results and be checked via task:has_symbol + if sa_atom_defs[score_symbol] and not scored_atom_symbols[score_symbol] then + local adef = sa_atom_defs[score_symbol] + local id = rspamd_config:register_symbol({ + name = score_symbol, + weight = score_value, + callback = create_sa_atom_function(score_symbol, adef.re, adef.match_type, adef.opts or {}), + type = 'normal', + flags = 'one_shot', + augmentations = {}, + }) + rspamd_config:set_metric_symbol({ + name = score_symbol, + score = score_value, + description = sa_descriptions[score_symbol] or ('SA atom ' .. score_symbol), + group = N, + }) + scored_atom_symbols[score_symbol] = id or true + lua_util.debugm(N, rspamd_config, 'promoted SA atom %s to symbol with score %s', score_symbol, score_value) + end end end elseif words[1] == 'describe' then @@ -543,27 +611,37 @@ local function gen_sa_process_atom_cb(task, rule_name) end end - local atom_cb = sa_atoms[atom] + -- Meta must depend on symbols; atom is considered present if it is a registered symbol hit + if task:has_symbol(atom) then + return 1 + end - if atom_cb then - local res = atom_cb(task) + -- Also allow meta to reference another meta by name via cached callback + local meta_cb = sa_meta_callbacks[atom] + if meta_cb then + local res = meta_cb(task) + return res or 0 + end - -- Return result without logging each atom - return res - else - -- Check if this is a SA meta rule - local meta_rule = sa_meta_rules[atom] - if meta_rule then - local meta_cb = create_sa_meta_callback(meta_rule) - local res = meta_cb(task) - return res or 0 + -- Finally, evaluate atom via cached atom callback if present (and cache per-task) + local atom_cb = sa_atoms[atom] + if atom_cb then + local atoms_cache = task:cache_get('sa_multimap_atoms_evaluated') + if not atoms_cache then + atoms_cache = {} + task:cache_set('sa_multimap_atoms_evaluated', atoms_cache) end - -- External atom - check if task has this symbol - if task:has_symbol(atom) then - return 1 + local cached_res = atoms_cache[atom] + if cached_res ~= nil then + return cached_res end + + local res = atom_cb(task) or 0 + atoms_cache[atom] = res + return res end + return 0 end end @@ -611,12 +689,16 @@ create_sa_meta_callback = function(meta_rule) local already_processed = cached[meta_rule.symbol] if not (already_processed and already_processed['default']) then - local expression = rspamd_expression.create(meta_rule.expression, - parse_sa_atom, - rspamd_config:get_mempool()) + local expression = meta_rule.compiled_expression if not expression then - rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression) - return + expression = rspamd_expression.create(meta_rule.expression, + parse_sa_atom, + rspamd_config:get_mempool()) + if not expression then + rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression: %s', meta_rule.expression) + return + end + meta_rule.compiled_expression = expression end local function exec_symbol(cur_res) @@ -659,16 +741,31 @@ local function finalize_sa_rules() fun.length(sa_meta_rules)) for meta_name, meta_rule in pairs(sa_meta_rules) do + -- Precompile expression at load time + if not meta_rule.compiled_expression then + local compiled = rspamd_expression.create(meta_rule.expression, + parse_sa_atom, + rspamd_config:get_mempool()) + if not compiled then + rspamd_logger.errx(rspamd_config, 'Cannot parse SA meta expression during finalize: %s', meta_rule.expression) + goto continue_meta + end + meta_rule.compiled_expression = compiled + end + local score = sa_scores[meta_name] or 1.0 local description = sa_descriptions[meta_name] or ('multimap symbol ' .. meta_name) lua_util.debugm(N, rspamd_config, 'Registering SA meta rule %s (score: %s, expression: %s)', meta_name, score, meta_rule.expression) + local meta_cb = create_sa_meta_callback(meta_rule) + sa_meta_callbacks[meta_name] = meta_cb + local id = rspamd_config:register_symbol({ name = meta_name, weight = score, - callback = create_sa_meta_callback(meta_rule), + callback = meta_cb, type = 'normal', flags = 'one_shot', augmentations = {}, @@ -700,6 +797,7 @@ local function finalize_sa_rules() lua_util.debugm(N, rspamd_config, 'registered SA meta symbol: %s (score: %s)', meta_name, score) + ::continue_meta:: end -- Mark orphaned symbols - only check meta symbols (not atoms) since atoms are just expression parts diff --git a/test/functional/cases/001_merged/270_selector.robot b/test/functional/cases/001_merged/270_selector.robot index fa3ab8753a..91daba8936 100644 --- a/test/functional/cases/001_merged/270_selector.robot +++ b/test/functional/cases/001_merged/270_selector.robot @@ -5,9 +5,11 @@ Variables ${RSPAMD_TESTDIR}/lib/vars.py *** Variables *** ${MESSAGE} ${RSPAMD_TESTDIR}/messages/subject1.eml +${MSG_SPAM} ${RSPAMD_TESTDIR}/messages/spam_message.eml +${MSG_URL1} ${RSPAMD_TESTDIR}/messages/url1.eml *** Test Cases *** -Newlines +Newlines Scan File ${MESSAGE} User=test@user.com Pass=all ... Settings={symbols_enabled = [CONFIG_SELECTOR_RE_RCPT_SUBJECT, LUA_SELECTOR_RE]} Expect Symbol CONFIG_SELECTOR_RE_RCPT_SUBJECT @@ -17,3 +19,63 @@ Rspamd_text selector Scan File ${MESSAGE} ... Settings={symbols_enabled = [RSPAMD_TEXT_SELECTOR]} Expect Symbol RSPAMD_TEXT_SELECTOR + +# SA-like regexp_rules: header/body/rawbody/uri/full/selector/meta +SA Header Atom + Scan File ${MSG_SPAM} + ... Settings={symbols_enabled = [SA_HDR_SUBJ]} + Expect Symbol SA_HDR_SUBJ + +SA Body Atom + Scan File ${MSG_SPAM} + ... Settings={symbols_enabled = [SA_BODY_SIMPLE]} + Expect Symbol SA_BODY_SIMPLE + +SA Rawbody Atom + Scan File ${MSG_SPAM} + ... Settings={symbols_enabled = [SA_RAW_SIMPLE]} + Expect Symbol SA_RAW_SIMPLE + +SA URI Atom + Scan File ${MSG_URL1} + ... Settings={symbols_enabled = [SA_URI_SHORT]} + Expect Symbol SA_URI_SHORT + +SA Full Atom + Scan File ${MSG_SPAM} + ... Settings={symbols_enabled = [SA_FULL_BOUNDARY]} + Expect Symbol SA_FULL_BOUNDARY + +SA Selector Atom (From Domain) + Scan File ${MSG_SPAM} + ... From=user@example.com + ... Settings={symbols_enabled = [SA_SEL_FROM_DOM]} + Expect Symbol SA_SEL_FROM_DOM + +SA Selector Atom (URL TLD) + Scan File ${MSG_URL1} + ... Settings={symbols_enabled = [SA_SEL_URL_TLD]} + Expect Symbol SA_SEL_URL_TLD + +SA Selector Negation + Scan File ${MSG_SPAM} + ... From=user@example.com + ... Settings={symbols_enabled = [SA_SEL_NOT_CORP]} + Expect Symbol SA_SEL_NOT_CORP + +SA Meta AND + Scan File ${MSG_SPAM} + ... From=user@example.com + ... Settings={symbols_enabled = [SA_META_AND, SA_HDR_SUBJ, SA_BODY_SIMPLE, SA_SEL_FROM_DOM]} + Expect Symbol SA_META_AND + +SA Meta OR + Scan File ${MSG_URL1} + ... Settings={symbols_enabled = [SA_META_OR, SA_URI_SHORT, SA_SEL_URL_TLD]} + Expect Symbol SA_META_OR + +SA Meta Complex + Scan File ${MSG_SPAM} + ... From=user@example.com + ... Settings={symbols_enabled = [SA_META_COMPLEX, SA_RAW_SIMPLE, SA_SEL_NOT_CORP]} + Expect Symbol SA_META_COMPLEX diff --git a/test/functional/cases/270_regexp_maps.robot b/test/functional/cases/270_regexp_maps.robot index d89143bc89..f4a2565609 100644 --- a/test/functional/cases/270_regexp_maps.robot +++ b/test/functional/cases/270_regexp_maps.robot @@ -1,6 +1,6 @@ *** Settings *** -Test Setup Rspamd Setup -Test Teardown Rspamd Teardown +Suite Setup Rspamd Setup +Suite Teardown Rspamd Teardown Library ${RSPAMD_TESTDIR}/lib/rspamd.py Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot Variables ${RSPAMD_TESTDIR}/lib/vars.py @@ -8,8 +8,10 @@ Variables ${RSPAMD_TESTDIR}/lib/vars.py *** Variables *** ${CONFIG} ${RSPAMD_TESTDIR}/configs/regexp_maps.conf ${MESSAGE1} ${RSPAMD_TESTDIR}/messages/advance_fee_fraud.eml -${MESSAGE2} ${RSPAMD_TESTDIR}/messages/spam_message.eml -${RSPAMD_SCOPE} Test +${MESSAGE2} ${RSPAMD_TESTDIR}/messages/sa_header_body_raw.eml +${FULLMSG} ${RSPAMD_TESTDIR}/messages/sa_full_boundary.eml +${URL1} ${RSPAMD_TESTDIR}/messages/url1.eml +${RSPAMD_SCOPE} Suite ${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat *** Test Cases *** @@ -46,3 +48,58 @@ Atom Rules Availability # We test by ensuring the meta rules work correctly Expect Symbol ADVANCE_FEE_2 Expect Symbol ADVANCE_FEE_3 + +SA-Like: Header Atom + [Documentation] Header regexp atom works (SA_HDR_SUBJ) + Scan File ${MESSAGE2} + Expect Symbol SA_HDR_SUBJ + +SA-Like: Body Atom + [Documentation] Body regexp atom works (SA_BODY_SIMPLE) + Scan File ${MESSAGE2} + Expect Symbol SA_BODY_SIMPLE + +SA-Like: Rawbody Atom + [Documentation] Rawbody regexp atom works (SA_RAW_SIMPLE) + Scan File ${MESSAGE2} + Expect Symbol SA_RAW_SIMPLE + +SA-Like: URI Atom + [Documentation] URI regexp atom works (SA_URI_SHORT) + Scan File ${URL1} + Expect Symbol SA_URI_SHORT + +SA-Like: Full Atom + [Documentation] Full message regexp atom works (SA_FULL_BOUNDARY) + Scan File ${FULLMSG} + Expect Symbol SA_FULL_BOUNDARY + +SA-Like: Selector From Domain + [Documentation] Selector-based atom (from:domain) works (SA_SEL_FROM_DOM) + Scan File ${MESSAGE2} + Expect Symbol SA_SEL_FROM_DOM + +SA-Like: Selector URL TLD + [Documentation] Selector-based atom (specific_urls:tld) works (SA_SEL_URL_TLD) + Scan File ${URL1} + Expect Symbol SA_SEL_URL_TLD + +SA-Like: Selector Negation + [Documentation] Selector negation works (SA_SEL_NOT_CORP) + Scan File ${MESSAGE2} + Expect Symbol SA_SEL_NOT_CORP + +SA-Like: Meta AND + [Documentation] Meta rule with AND over header+body+selector + Scan File ${MESSAGE2} + Expect Symbol SA_META_AND + +SA-Like: Meta OR + [Documentation] Meta rule with OR over uri+selector + Scan File ${URL1} + Expect Symbol SA_META_OR + +SA-Like: Meta Complex + [Documentation] Complex meta combining negation and rawbody + Scan File ${MESSAGE2} + Expect Symbol SA_META_COMPLEX diff --git a/test/functional/configs/maps/sa_selector_rules.map b/test/functional/configs/maps/sa_selector_rules.map new file mode 100644 index 0000000000..5088ddad34 --- /dev/null +++ b/test/functional/configs/maps/sa_selector_rules.map @@ -0,0 +1,56 @@ +# SA-like rules set to test selector/header/body/rawbody/uri/full/meta + +# Header atom +header SA_HDR_SUBJ Subject =~ /test\s+subject/i + +# Body atom +body SA_BODY_SIMPLE /hello\s+world/i + +# Rawbody atom +rawbody SA_RAW_SIMPLE /RAW\s+TOKEN/i + +# URI atom +uri SA_URI_SHORT /bit\.ly|tinyurl/i + +# Full message atom +full SA_FULL_BOUNDARY /From:.*\n(?:.*\n)*?Subject:/i + +# Selector atom: from domain equals example.com +selector SA_SEL_FROM_DOM from:domain =~ /^example\.com$/i + +# Selector atom: specific_urls tld equals example.com +selector SA_SEL_URL_TLD specific_urls({need_content = true, limit = 10}):get_tld =~ /^example\.com$/i + +# Selector negation (use domain to avoid addr formatting quirks) +selector SA_SEL_NOT_CORP from:domain !~ /^corp\.example$/i + +# Meta rules combining atoms and selectors +meta SA_META_AND SA_HDR_SUBJ & SA_BODY_SIMPLE & SA_SEL_FROM_DOM +meta SA_META_OR SA_URI_SHORT | SA_SEL_URL_TLD +meta SA_META_COMPLEX (SA_BODY_SIMPLE & SA_SEL_NOT_CORP) | SA_RAW_SIMPLE + +# Scores +score SA_HDR_SUBJ 1.0 +score SA_BODY_SIMPLE 1.0 +score SA_RAW_SIMPLE 1.0 +score SA_URI_SHORT 1.0 +score SA_FULL_BOUNDARY 0.5 +score SA_SEL_FROM_DOM 1.0 +score SA_SEL_URL_TLD 1.0 +score SA_SEL_NOT_CORP 0.5 +score SA_META_AND 2.5 +score SA_META_OR 2.0 +score SA_META_COMPLEX 2.0 + +# Descriptions +describe SA_HDR_SUBJ Subject matches test subject +describe SA_BODY_SIMPLE Body contains hello world +describe SA_RAW_SIMPLE Raw body contains RAW TOKEN +describe SA_URI_SHORT Message contains a shortener URL +describe SA_FULL_BOUNDARY Header boundary present +describe SA_SEL_FROM_DOM From domain equals example.com +describe SA_SEL_URL_TLD URL tld equals example.com +describe SA_SEL_NOT_CORP From address is not corp.example +describe SA_META_AND Header+Body+Selector combo +describe SA_META_OR URI or URL TLD selector +describe SA_META_COMPLEX Complex combination with negation diff --git a/test/functional/configs/merged-local.conf b/test/functional/configs/merged-local.conf index f70c4f8650..1d3b17cef2 100644 --- a/test/functional/configs/merged-local.conf +++ b/test/functional/configs/merged-local.conf @@ -72,6 +72,15 @@ logging = { log_usec = true; } +# Add SA-like regexp_rules (selector/header/uri/full/meta) for merged suite +multimap { + SA_SELECTOR_RULES { + type = "regexp_rules"; + map = "{= env.TESTDIR =}/configs/maps/sa_selector_rules.map"; + description = "SA-like selector/header/uri/full tests"; + } +} + mid = { source = { url = [ diff --git a/test/functional/configs/regexp_maps.conf b/test/functional/configs/regexp_maps.conf index be4cd047b3..730b9dd7c6 100644 --- a/test/functional/configs/regexp_maps.conf +++ b/test/functional/configs/regexp_maps.conf @@ -1,5 +1,10 @@ .include(duplicate=append,priority=0) "{= env.TESTDIR =}/configs/plugins.conf" +# Ensure effective TLD data is loaded for URL/selector tests +options { + url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat"; +} + # Configure multimap for regexp rules testing multimap { ADVANCE_FEE_SA_RULES { @@ -8,6 +13,11 @@ multimap { scope = "advance_fee_scope"; description = "Advance fee fraud detection rules"; } + SA_SELECTOR_RULES { + type = "regexp_rules"; + map = "{= env.TESTDIR =}/configs/maps/sa_selector_rules.map"; + description = "SA-like selector/header/uri/full tests"; + } } # Override symbol scores for testing diff --git a/test/functional/messages/sa_full_boundary.eml b/test/functional/messages/sa_full_boundary.eml new file mode 100644 index 0000000000..3ff8be6d2d --- /dev/null +++ b/test/functional/messages/sa_full_boundary.eml @@ -0,0 +1,19 @@ +From: someone@example.com +To: test@example.com +Subject: something else +Date: Fri, 01 Jan 2021 00:00:00 +0000 +Message-ID: +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="bnd123" + +--bnd123 +Content-Type: text/plain; charset=UTF-8 + +Plain part + +--bnd123 +Content-Type: text/html; charset=UTF-8 + +HTML part + +--bnd123-- diff --git a/test/functional/messages/sa_header_body_raw.eml b/test/functional/messages/sa_header_body_raw.eml new file mode 100644 index 0000000000..96d01d34ab --- /dev/null +++ b/test/functional/messages/sa_header_body_raw.eml @@ -0,0 +1,11 @@ +From: user@example.com +To: test@example.com +Subject: test subject +Date: Fri, 01 Jan 2021 00:00:00 +0000 +Message-ID: +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit + +hello world +This message body also contains a RAW TOKEN marker for rawbody matching. diff --git a/test/functional/messages/url1.eml b/test/functional/messages/url1.eml index 42361ae2d4..f6cd306932 100644 --- a/test/functional/messages/url1.eml +++ b/test/functional/messages/url1.eml @@ -1,3 +1,14 @@ -Content-Type: text/plain +From: user@example.com +To: test@example.com +Subject: Links test +Date: Fri, 01 Jan 2021 00:00:00 +0000 +Message-ID: +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit -hello https://www.example.com/foo?a=b hello +Here is a shortener URL to match SA_URI_SHORT: +https://tinyurl.com/abc123 + +And here is a normal URL with TLD example.com to match SA_SEL_URL_TLD: +https://www.example.com/foo?a=b