From: Vsevolod Stakhov Date: Sat, 29 Nov 2025 14:24:36 +0000 (+0000) Subject: [Feature] Auto-mark whitelist symbols with SYMBOL_TYPE_FINE flag X-Git-Tag: 3.14.1~2^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e8c0a9e5339d8960f493a6233cf1f4d6fbf7132e;p=thirdparty%2Frspamd.git [Feature] Auto-mark whitelist symbols with SYMBOL_TYPE_FINE flag This change ensures that symbols with negative weight and symbols used in whitelist composites (composites with negative score) will always execute regardless of whether the reject threshold has been reached. Previously, when the early-stop optimization kicked in after reaching the reject score, whitelist symbols could be skipped, leading to potential false positives where emails should have been whitelisted. Changes: - Symbols with negative weight are automatically marked as FINE during config validation in symcache::validate() - New rspamd_composites_mark_whitelist_deps() function traverses all composites with negative score and marks their constituent symbols as FINE (with transitive expansion for nested composites) - New C API rspamd_symcache_set_symbol_fine() to programmatically set the FINE flag with proper parent/child propagation - FINE flag is properly synchronized between virtual symbols and their parent symbols --- diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx index 2446db1da5..d07c3c7f9d 100644 --- a/src/libserver/cfg_utils.cxx +++ b/src/libserver/cfg_utils.cxx @@ -1016,6 +1016,8 @@ rspamd_config_post_load(struct rspamd_config *cfg, rspamd_composites_set_inverted_index(cfg->composites_manager, cfg->composites_inverted_index); rspamd_composites_process_deps(cfg->composites_manager, cfg); + /* Mark symbols used by whitelist composites (negative score) as FINE */ + rspamd_composites_mark_whitelist_deps(cfg->composites_manager, cfg); } } diff --git a/src/libserver/composites/composites.h b/src/libserver/composites/composites.h index 63761eed29..599590b784 100644 --- a/src/libserver/composites/composites.h +++ b/src/libserver/composites/composites.h @@ -101,6 +101,15 @@ struct rspamd_composites_stats_export { */ void rspamd_composites_get_stats(void *cm_ptr, struct rspamd_composites_stats_export *stats); +/** + * Mark symbols used in whitelist composites (negative score) with SYMBOL_TYPE_FINE + * so they won't be skipped when reject threshold is reached. This ensures + * whitelist composites can still evaluate correctly. + * @param cm_ptr composites manager pointer + * @param cfg config structure + */ +void rspamd_composites_mark_whitelist_deps(void *cm_ptr, struct rspamd_config *cfg); + #ifdef __cplusplus } #endif diff --git a/src/libserver/composites/composites_internal.hxx b/src/libserver/composites/composites_internal.hxx index 4f10eca338..270ec9d699 100644 --- a/src/libserver/composites/composites_internal.hxx +++ b/src/libserver/composites/composites_internal.hxx @@ -144,6 +144,8 @@ public: void process_dependencies(); /* Build inverted index for fast composite lookup */ void build_inverted_index(); + /* Mark symbols used in whitelist composites (negative score) as FINE */ + void mark_whitelist_dependencies(); }; /** diff --git a/src/libserver/composites/composites_manager.cxx b/src/libserver/composites/composites_manager.cxx index 5663762f1f..f951e37e0b 100644 --- a/src/libserver/composites/composites_manager.cxx +++ b/src/libserver/composites/composites_manager.cxx @@ -24,6 +24,7 @@ #include "libserver/cfg_file.h" #include "libserver/logger.h" #include "libserver/maps/map.h" +#include "libserver/rspamd_symcache.h" #include "libutil/cxx/util.hxx" namespace rspamd::composites { @@ -605,6 +606,112 @@ void composites_manager::build_inverted_index() (int) symbol_to_composites.size(), (int) not_only_composites.size()); } +/* Callback data for collecting atoms from whitelist composites */ +struct whitelist_atom_cbdata { + ankerl::unordered_dense::set *fine_symbols; +}; + +static void +whitelist_atom_callback(const rspamd_ftok_t *atom, gpointer ud) +{ + auto *cbd = reinterpret_cast(ud); + + if (atom->len == 0) { + return; + } + + std::string_view atom_str(atom->begin, atom->len); + + /* Skip operators */ + if (atom_str[0] == '&' || atom_str[0] == '|' || + atom_str[0] == '!' || atom_str[0] == '(' || atom_str[0] == ')') { + return; + } + + /* Skip prefix characters (~, -, ^) */ + size_t start = 0; + while (start < atom_str.size() && + (atom_str[start] == '~' || atom_str[start] == '-' || atom_str[start] == '^')) { + ++start; + } + + if (start >= atom_str.size()) { + return; + } + + auto remaining = atom_str.substr(start); + + /* Skip group matchers (g:, g+:, g-:) - we can't determine specific symbols */ + if (remaining.starts_with("g:") || remaining.starts_with("g+:") || remaining.starts_with("g-:")) { + return; + } + + /* Extract symbol name (before '[' if present for options) */ + auto bracket_pos = remaining.find('['); + std::string symbol_name; + if (bracket_pos != std::string_view::npos) { + symbol_name = std::string(remaining.substr(0, bracket_pos)); + } + else { + symbol_name = std::string(remaining); + } + + if (!symbol_name.empty()) { + cbd->fine_symbols->emplace(std::move(symbol_name)); + } +} + +void composites_manager::mark_whitelist_dependencies() +{ + ankerl::unordered_dense::set fine_symbols; + + msg_debug_config("analyzing whitelist composites for FINE symbol marking"); + + /* Step 1: Find composites with negative score and collect their atoms */ + for (const auto &comp: all_composites) { + auto *sym_def = static_cast( + g_hash_table_lookup(cfg->symbols, comp->sym.c_str())); + + if (sym_def && *sym_def->weight_ptr < 0) { + /* This is a whitelist composite - collect all its atoms */ + whitelist_atom_cbdata cbd{&fine_symbols}; + rspamd_expression_atom_foreach(comp->expr, whitelist_atom_callback, &cbd); + + msg_debug_config("composite '%s' has negative weight (%.2f), collecting dependencies", + comp->sym.c_str(), *sym_def->weight_ptr); + } + } + + /* Step 2: Transitively expand - if an atom is also a whitelist composite, add its atoms */ + bool changed; + do { + changed = false; + for (const auto &comp: all_composites) { + if (fine_symbols.contains(comp->sym)) { + size_t before = fine_symbols.size(); + whitelist_atom_cbdata cbd{&fine_symbols}; + rspamd_expression_atom_foreach(comp->expr, whitelist_atom_callback, &cbd); + if (fine_symbols.size() > before) { + changed = true; + } + } + } + } while (changed); + + /* Step 3: Mark all collected symbols as FINE in symcache */ + int marked_count = 0; + for (const auto &sym_name: fine_symbols) { + if (rspamd_symcache_set_symbol_fine(cfg->cache, sym_name.c_str())) { + msg_debug_config("marked symbol '%s' as FINE (whitelist composite dependency)", + sym_name.c_str()); + marked_count++; + } + } + + msg_info_config("marked %d symbols as FINE for whitelist composite dependencies", + marked_count); +} + }// namespace rspamd::composites void rspamd_composites_process_deps(void *cm_ptr, struct rspamd_config *cfg) @@ -642,4 +749,10 @@ void rspamd_composites_get_stats(void *cm_ptr, struct rspamd_composites_stats_ex stats->time_fast_mean = cm->stats.time_fast.mean; stats->time_fast_stddev = cm->stats.time_fast.stddev; stats->time_fast_count = cm->stats.time_fast.number; +} + +void rspamd_composites_mark_whitelist_deps(void *cm_ptr, struct rspamd_config *cfg) +{ + auto *cm = COMPOSITE_MANAGER_FROM_PTR(cm_ptr); + cm->mark_whitelist_dependencies(); } \ No newline at end of file diff --git a/src/libserver/rspamd_symcache.h b/src/libserver/rspamd_symcache.h index f020b6055b..47582523f5 100644 --- a/src/libserver/rspamd_symcache.h +++ b/src/libserver/rspamd_symcache.h @@ -578,6 +578,16 @@ void rspamd_symcache_runtime_destroy(struct rspamd_task *task); */ void rspamd_symcache_promote_resort(struct rspamd_symcache *cache); +/** + * Marks a symbol with SYMBOL_TYPE_FINE flag so it won't be skipped on early stop + * (when reject threshold is reached). Also propagates flag to parent/children. + * @param cache + * @param symbol symbol name + * @return TRUE if symbol was found and marked + */ +gboolean rspamd_symcache_set_symbol_fine(struct rspamd_symcache *cache, + const char *symbol); + #ifdef __cplusplus } #endif diff --git a/src/libserver/symcache/symcache_c.cxx b/src/libserver/symcache/symcache_c.cxx index 6221aa238c..50af263403 100644 --- a/src/libserver/symcache/symcache_c.cxx +++ b/src/libserver/symcache/symcache_c.cxx @@ -727,3 +727,36 @@ void rspamd_symcache_promote_resort(struct rspamd_symcache *cache) real_cache->promote_resort(); } + +gboolean rspamd_symcache_set_symbol_fine(struct rspamd_symcache *cache, + const char *symbol) +{ + auto *real_cache = C_API_SYMCACHE(cache); + auto *item = real_cache->get_item_by_name_mut(symbol, false); + + if (item == nullptr) { + return FALSE; + } + + if (!(item->flags & SYMBOL_TYPE_FINE)) { + item->flags |= SYMBOL_TYPE_FINE; + + /* Also mark parent if this is a virtual symbol */ + if (item->is_virtual()) { + auto *parent = const_cast(item->get_parent(*real_cache)); + if (parent && !(parent->flags & SYMBOL_TYPE_FINE)) { + parent->flags |= SYMBOL_TYPE_FINE; + } + } + + /* And mark all virtual children */ + const auto *children = item->get_children(); + if (children) { + for (auto *child: *children) { + child->flags |= SYMBOL_TYPE_FINE; + } + } + } + + return TRUE; +} diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx index c1ca2a6ed4..aae7793759 100644 --- a/src/libserver/symcache/symcache_impl.cxx +++ b/src/libserver/symcache/symcache_impl.cxx @@ -925,6 +925,17 @@ auto symcache::validate(bool strict) -> bool item->priority++; } + /* + * Mark symbols with negative weight as FINE, so they are not skipped + * when reject threshold is reached. This ensures whitelist symbols + * always have a chance to execute. + */ + if (item->st->weight < 0 && !(item->flags & SYMBOL_TYPE_FINE)) { + item->flags |= SYMBOL_TYPE_FINE; + msg_debug_cache("symbol %s has negative weight (%.2f), marking as FINE", + item->symbol.c_str(), item->st->weight); + } + if (item->is_virtual()) { if (!(item->flags & SYMBOL_TYPE_GHOST)) { auto *parent = const_cast(item->get_parent(*this)); @@ -945,6 +956,17 @@ auto symcache::validate(bool strict) -> bool parent->priority = MAX(p1, p2); item->priority = parent->priority; } + + /* + * Sync SYMBOL_TYPE_FINE between virtual symbol and parent. + * If either has negative weight and is marked FINE, propagate to both. + */ + if ((item->flags & SYMBOL_TYPE_FINE) && !(parent->flags & SYMBOL_TYPE_FINE)) { + parent->flags |= SYMBOL_TYPE_FINE; + } + else if ((parent->flags & SYMBOL_TYPE_FINE) && !(item->flags & SYMBOL_TYPE_FINE)) { + item->flags |= SYMBOL_TYPE_FINE; + } } }