From: Vsevolod Stakhov Date: Thu, 4 Jun 2026 07:53:23 +0000 (+0100) Subject: [Fix] composites: avoid over-eager second-pass deferral X-Git-Tag: 4.1.0~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ccc181fa0e3b9278dcbd357454e50f0c1dd4fa34;p=thirdparty%2Frspamd.git [Fix] composites: avoid over-eager second-pass deferral The two-phase composite evaluation classified a composite as second-pass if any referenced symbol carried SYMBOL_TYPE_NOSTAT. NOSTAT is auto-set on nearly every virtual/callback symbol (regexp rules, multimap, rbl, ...), so most composites that depend only on ordinary filter rules were wrongly deferred to the COMPOSITES_POST stage, which runs after post-filters. As a result task:get_groups()/get_symbols() called from a postfilter no longer saw those composite symbols and their groups (regression vs 3.5); they only became visible from idempotent rules. Classifiers run before composites, so keying off SYMBOL_TYPE_CLASSIFIER was also unnecessary. Only postfilter-stage symbols are genuinely unavailable during the first composites pass. Add rspamd_symcache_get_symbol_stage(), which resolves virtual symbols to their parent and returns the processing stage as a SYMBOL_TYPE_* bit, and defer a composite only when a dependency resolves to the postfilter stage. NEURAL_SPAM (a virtual child of the NEURAL_CHECK postfilter) still resolves to POSTFILTER, so the original #5674 fix keeps working. Add a functional regression test covering a composite that depends only on a filter-stage NOSTAT symbol and must be visible from a postfilter. --- diff --git a/src/libserver/composites/composites_manager.cxx b/src/libserver/composites/composites_manager.cxx index 1cd91f932f..c43f032f99 100644 --- a/src/libserver/composites/composites_manager.cxx +++ b/src/libserver/composites/composites_manager.cxx @@ -412,10 +412,16 @@ symbol_needs_second_pass(struct rspamd_config *cfg, const char *symbol_name) return false; } - auto flags = rspamd_symcache_get_symbol_flags(cfg->cache, symbol_name); + /* + * Only postfilter-stage symbols are unavailable during the first composites + * pass (filters and classifiers already ran). get_symbol_stage() resolves + * virtuals to their parent, so e.g. NEURAL_SPAM reports its NEURAL_CHECK + * postfilter stage. Do NOT key off SYMBOL_TYPE_NOSTAT: it is set on nearly + * every virtual/callback symbol and wrongly deferred most composites. + */ + auto stage = rspamd_symcache_get_symbol_stage(cfg->cache, symbol_name); - /* Postfilters and classifiers/statistics symbols require second pass */ - return (flags & (SYMBOL_TYPE_POSTFILTER | SYMBOL_TYPE_CLASSIFIER | SYMBOL_TYPE_NOSTAT)) != 0; + return stage == SYMBOL_TYPE_POSTFILTER; } /* Callback data for walking expression atoms to find symbol dependencies */ diff --git a/src/libserver/rspamd_symcache.h b/src/libserver/rspamd_symcache.h index 06e265076c..1ae33bd294 100644 --- a/src/libserver/rspamd_symcache.h +++ b/src/libserver/rspamd_symcache.h @@ -268,6 +268,17 @@ const char *rspamd_symcache_get_parent(struct rspamd_symcache *cache, unsigned int rspamd_symcache_get_symbol_flags(struct rspamd_symcache *cache, const char *symbol); +/** + * Returns the processing stage of a symbol as a single SYMBOL_TYPE_* bit + * (SYMBOL_TYPE_NORMAL for a plain filter). Virtual symbols are resolved to + * their parent so the returned stage reflects when the symbol is actually + * produced. Returns 0 if the symbol is unknown or its parent cannot be + * resolved. Used by the composites two-phase analysis to detect dependencies + * on symbols computed after the first composites pass (postfilters). + */ +unsigned int rspamd_symcache_get_symbol_stage(struct rspamd_symcache *cache, + const char *symbol); + void rspamd_symcache_get_symbol_details(struct rspamd_symcache *cache, const char *symbol, ucl_object_t *this_sym_ucl); diff --git a/src/libserver/symcache/symcache_c.cxx b/src/libserver/symcache/symcache_c.cxx index c07f5a08cc..6460d01781 100644 --- a/src/libserver/symcache/symcache_c.cxx +++ b/src/libserver/symcache/symcache_c.cxx @@ -327,6 +327,42 @@ unsigned int rspamd_symcache_get_symbol_flags(struct rspamd_symcache *cache, return 0; } +unsigned int rspamd_symcache_get_symbol_stage(struct rspamd_symcache *cache, + const char *symbol) +{ + auto *real_cache = C_API_SYMCACHE(cache); + + /* Resolve virtual symbols to their parent so the stage reflects where the + * symbol is actually produced (e.g. NEURAL_SPAM -> NEURAL_CHECK postfilter). */ + auto *sym = real_cache->get_item_by_name(symbol, true); + + if (sym == nullptr) { + return 0; + } + + switch (sym->get_type()) { + case rspamd::symcache::symcache_item_type::CONNFILTER: + return SYMBOL_TYPE_CONNFILTER; + case rspamd::symcache::symcache_item_type::PREFILTER: + return SYMBOL_TYPE_PREFILTER; + case rspamd::symcache::symcache_item_type::FILTER: + return SYMBOL_TYPE_NORMAL; + case rspamd::symcache::symcache_item_type::POSTFILTER: + return SYMBOL_TYPE_POSTFILTER; + case rspamd::symcache::symcache_item_type::IDEMPOTENT: + return SYMBOL_TYPE_IDEMPOTENT; + case rspamd::symcache::symcache_item_type::CLASSIFIER: + return SYMBOL_TYPE_CLASSIFIER; + case rspamd::symcache::symcache_item_type::COMPOSITE: + return SYMBOL_TYPE_COMPOSITE; + case rspamd::symcache::symcache_item_type::VIRTUAL: + /* Unresolved virtual (parent missing); treat stage as unknown */ + return SYMBOL_TYPE_VIRTUAL; + } + + return 0; +} + const struct rspamd_symcache_item_stat * rspamd_symcache_item_stat(struct rspamd_symcache_item *item) { diff --git a/test/functional/cases/001_merged/109_composites_postfilter.robot b/test/functional/cases/001_merged/109_composites_postfilter.robot index 020264247c..8301f4f283 100644 --- a/test/functional/cases/001_merged/109_composites_postfilter.robot +++ b/test/functional/cases/001_merged/109_composites_postfilter.robot @@ -7,6 +7,7 @@ Variables ${RSPAMD_TESTDIR}/lib/vars.py ${MESSAGE} ${RSPAMD_TESTDIR}/messages/spam_message.eml ${RSPAMD_LUA_SCRIPT} ${RSPAMD_TESTDIR}/lua/composites_postfilter.lua ${SETTINGS_COMPOSITES_POSTFILTER} symbols_enabled [TEST_FILTER_SYM, TEST_POSTFILTER_COMPOSITE, TEST_POSTFILTER_SYM] +${SETTINGS_COMPOSITES_FIRSTPASS} symbols_enabled [TEST_NOSTAT_FILTER_SYM, TEST_FIRSTPASS_COMPOSITE, TEST_OBSERVE_FIRSTPASS_COMPOSITE, TEST_COMPOSITE_SEEN_IN_POSTFILTER] *** Test Cases *** Composite With Postfilter And Filter @@ -16,3 +17,11 @@ Composite With Postfilter And Filter Expect Symbol With Score TEST_POSTFILTER_COMPOSITE 10.0 Do Not Expect Symbol TEST_FILTER_SYM Do Not Expect Symbol TEST_POSTFILTER_SYM + +Composite Of Filter Symbol Is Visible In Postfilter + [Documentation] Composite depending only on a filter-stage (NOSTAT) symbol must be + ... evaluated in the first pass and thus visible to postfilters + Scan File ${MESSAGE} + ... Settings=${SETTINGS_COMPOSITES_FIRSTPASS} + Expect Symbol TEST_FIRSTPASS_COMPOSITE + Expect Symbol TEST_COMPOSITE_SEEN_IN_POSTFILTER diff --git a/test/functional/configs/merged-local.conf b/test/functional/configs/merged-local.conf index c064cce82a..66e6ec6518 100644 --- a/test/functional/configs/merged-local.conf +++ b/test/functional/configs/merged-local.conf @@ -1078,6 +1078,12 @@ composites { expression = "TEST_FILTER_SYM & TEST_POSTFILTER_SYM"; score = 10.0; } + # Depends only on a filter-stage symbol (carrying NOSTAT) -> must be + # evaluated in the first composites pass, before post-filters run. + TEST_FIRSTPASS_COMPOSITE { + expression = "TEST_NOSTAT_FILTER_SYM"; + score = 5.0; + } } worker "controller" { diff --git a/test/functional/lua/composites_postfilter.lua b/test/functional/lua/composites_postfilter.lua index ae7e36b2a6..323c266ea6 100644 --- a/test/functional/lua/composites_postfilter.lua +++ b/test/functional/lua/composites_postfilter.lua @@ -32,3 +32,42 @@ rspamd_config:set_metric_symbol({ -- This should match when both symbols are present -- BUG: Currently fails because composite is evaluated before postfilter runs -- and is not re-evaluated in the second pass + +-- Regression for the inverse of #5674: a composite that depends ONLY on +-- filter-stage symbols must be evaluated in the FIRST composites pass so that +-- it (and its groups) are visible from postfilters via task:get_symbols() / +-- task:get_groups(). NOSTAT is set on essentially every virtual/callback rule +-- (regexp, multimap, rbl, ...); a filter-stage symbol carrying NOSTAT must not +-- cause the composite to be wrongly deferred to the second (post-filters) pass. +rspamd_config:register_symbol({ + type = 'normal', + flags = 'nostat', + name = 'TEST_NOSTAT_FILTER_SYM', + callback = function(task) + task:insert_result('TEST_NOSTAT_FILTER_SYM', 1.0) + end +}) +rspamd_config:set_metric_symbol({ + name = 'TEST_NOSTAT_FILTER_SYM', + score = 1.0 +}) + +-- Postfilter that observes whether the first-pass composite has already fired. +-- It only inserts its marker if TEST_FIRSTPASS_COMPOSITE is already present, +-- which can only happen if the composite was evaluated during the first pass. +rspamd_config:register_symbol({ + type = 'postfilter', + name = 'TEST_OBSERVE_FIRSTPASS_COMPOSITE', + callback = function(task) + if task:has_symbol('TEST_FIRSTPASS_COMPOSITE') then + task:insert_result('TEST_COMPOSITE_SEEN_IN_POSTFILTER', 1.0) + end + end +}) +rspamd_config:set_metric_symbol({ + name = 'TEST_COMPOSITE_SEEN_IN_POSTFILTER', + score = 1.0 +}) + +-- Composite TEST_FIRSTPASS_COMPOSITE = TEST_NOSTAT_FILTER_SYM is defined in +-- merged-local.conf