}
}
else {
- /* First pass: use symcache iteration (will skip second-pass composites in callback) */
- msg_debug_composites("processing first-pass composites via symcache");
- rspamd_symcache_composites_foreach(task,
- task->cfg->cache,
- composites_foreach_callback,
- &cd);
+ /* First pass: use inverted index for fast lookup */
+ ankerl::unordered_dense::set<rspamd_composite *> potentially_active;
+
+ /* Callback data for collecting potentially active composites */
+ struct collect_active_cbdata {
+ composites_manager *cm;
+ ankerl::unordered_dense::set<rspamd_composite *> *active;
+ } collect_data{cm, &potentially_active};
+
+ /* Collect composites that have at least one positive atom present */
+ rspamd_task_symbol_result_foreach(task, mres, [](gpointer key, gpointer value, gpointer ud) {
+ auto *cbd = reinterpret_cast<collect_active_cbdata *>(ud);
+ std::string_view sym_name{reinterpret_cast<const char *>(key)};
+
+ auto it = cbd->cm->symbol_to_composites.find(sym_name);
+ if (it != cbd->cm->symbol_to_composites.end()) {
+ for (auto *comp: it->second) {
+ /* Only add first-pass composites */
+ if (!comp->second_pass) {
+ cbd->active->insert(comp);
+ }
+ }
+ } }, &collect_data);
+
+ /* Always add NOT-only composites (they have no positive atoms) */
+ for (auto *comp: cm->not_only_composites) {
+ if (!comp->second_pass) {
+ potentially_active.insert(comp);
+ }
+ }
+
+ msg_debug_composites("processing %d potentially active composites (from %d first-pass)",
+ (int) potentially_active.size(),
+ (int) cm->first_pass_composites.size());
+
+ /* Process only potentially active composites */
+ for (auto *comp: potentially_active) {
+ composites_foreach_callback((gpointer) comp->sym.c_str(),
+ (gpointer) comp,
+ &cd);
+ }
}
}
struct rspamd_expression *expr;
int id;
rspamd_composite_policy policy;
- bool second_pass; /**< true if this composite needs second pass evaluation */
+ bool second_pass; /**< true if this composite needs second pass evaluation */
+ bool has_positive_atoms; /**< true if composite has at least one non-negated atom */
};
#define COMPOSITE_MANAGER_FROM_PTR(ptr) (reinterpret_cast<rspamd::composites::composites_manager *>(ptr))
std::vector<rspamd_composite *> first_pass_composites; /* Evaluated during COMPOSITES stage */
std::vector<rspamd_composite *> second_pass_composites; /* Evaluated during COMPOSITES_POST stage */
+ /* Inverted index: symbol -> composites that contain this symbol as positive atom */
+ ankerl::unordered_dense::map<std::string, std::vector<rspamd_composite *>,
+ rspamd::smart_str_hash, rspamd::smart_str_equal>
+ symbol_to_composites;
+ /* Composites that have only negated atoms (must always be checked) */
+ std::vector<rspamd_composite *> not_only_composites;
+
/* Analyze composite dependencies and split into first/second pass vectors */
void process_dependencies();
+ /* Build inverted index for fast composite lookup */
+ void build_inverted_index();
};
}// namespace rspamd::composites
(int) first_pass_composites.size(), (int) second_pass_composites.size());
}
+/* Context for building inverted index */
+struct inverted_index_cbdata {
+ composites_manager *cm;
+ rspamd_composite *comp;
+ bool has_positive;
+};
+
+static void
+inverted_index_atom_callback(GNode *atom_node, rspamd_expression_atom_t *atom, gpointer ud)
+{
+ auto *cbd = reinterpret_cast<inverted_index_cbdata *>(ud);
+
+ /* Check if this atom is under NOT operation */
+ if (atom_node->parent && rspamd_expression_node_is_op(atom_node->parent, OP_NOT)) {
+ /* Negated atom - don't add to inverted index */
+ return;
+ }
+
+ /* Extract normalized symbol name from atom string */
+ std::string_view atom_str(atom->str, atom->len);
+
+ /* Skip special characters and find the actual symbol name */
+ /* Atom format: [~-^]SYMBOL[options] */
+ auto start = atom_str.begin();
+ while (start != atom_str.end() && (*start == '~' || *start == '-' || *start == '^')) {
+ ++start;
+ }
+
+ /* Find end of symbol name (before '[' if present) */
+ auto end = std::find(start, atom_str.end(), '[');
+
+ if (start >= end) {
+ return; /* Empty or invalid symbol */
+ }
+
+ std::string symbol_name(start, end);
+
+ /* Mark that we have at least one positive atom */
+ cbd->has_positive = true;
+
+ /* Add to inverted index */
+ cbd->cm->symbol_to_composites[symbol_name].push_back(cbd->comp);
+}
+
+void composites_manager::build_inverted_index()
+{
+ msg_debug_config("building inverted index for %d composites", (int) all_composites.size());
+
+ for (auto &comp: all_composites) {
+ inverted_index_cbdata cbd{this, comp.get(), false};
+
+ rspamd_expression_atom_foreach_ex(comp->expr, inverted_index_atom_callback, &cbd);
+
+ comp->has_positive_atoms = cbd.has_positive;
+
+ if (!cbd.has_positive) {
+ /* Composite with only negated atoms - must always be checked */
+ not_only_composites.push_back(comp.get());
+ msg_debug_config("composite '%s' has only negated atoms, will always be checked",
+ comp->sym.c_str());
+ }
+ }
+
+ msg_debug_config("inverted index built: %d unique symbols, %d not-only composites",
+ (int) symbol_to_composites.size(), (int) not_only_composites.size());
+}
+
}// namespace rspamd::composites
void rspamd_composites_process_deps(void *cm_ptr, struct rspamd_config *cfg)
{
auto *cm = COMPOSITE_MANAGER_FROM_PTR(cm_ptr);
cm->process_dependencies();
+ cm->build_inverted_index();
}
\ No newline at end of file
rspamd_ast_atom_traverse, &data);
}
+struct atom_foreach_cbdata_ex {
+ rspamd_expression_atom_foreach_cb_ex cb;
+ gpointer cbdata;
+};
+
+static gboolean
+rspamd_ast_atom_traverse_ex(GNode *n, gpointer d)
+{
+ struct atom_foreach_cbdata_ex *data = d;
+ struct rspamd_expression_elt *elt = n->data;
+
+ if (elt->type == ELT_ATOM) {
+ data->cb(n, elt->p.atom, data->cbdata);
+ }
+
+ return FALSE;
+}
+
+void rspamd_expression_atom_foreach_ex(struct rspamd_expression *expr,
+ rspamd_expression_atom_foreach_cb_ex cb, gpointer cbdata)
+{
+ struct atom_foreach_cbdata_ex data;
+
+ g_assert(expr != NULL);
+
+ data.cb = cb;
+ data.cbdata = cbdata;
+ g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_ast_atom_traverse_ex, &data);
+}
+
gboolean
rspamd_expression_node_is_op(GNode *node, enum rspamd_expression_op op)
{
void rspamd_expression_atom_foreach(struct rspamd_expression *expr,
rspamd_expression_atom_foreach_cb cb, gpointer cbdata);
+/**
+ * Extended callback that provides access to the AST node (for checking parent operations like NOT)
+ */
+typedef void (*rspamd_expression_atom_foreach_cb_ex)(GNode *atom_node,
+ rspamd_expression_atom_t *atom,
+ gpointer ud);
+
+/**
+ * Traverse over all atoms in the expression with access to AST nodes
+ * @param expr expression
+ * @param cb callback to be called with GNode and full atom structure
+ * @param ud opaque data passed to `cb`
+ */
+void rspamd_expression_atom_foreach_ex(struct rspamd_expression *expr,
+ rspamd_expression_atom_foreach_cb_ex cb, gpointer cbdata);
+
/**
* Checks if a specified node in AST is the specified operation
* @param node AST node packed in GNode container