Regression test for the symcache-driven profile rotation fix.
Drives a live rspamd + Redis through: train ANN with providers-only
input (metatokens, disable_symbols_input=true) -> verify NEURAL_SPAM /
NEURAL_HAM fire -> mutate set.symbols/set.digest in the scanner worker
(simulates a symcache shift) -> verify inference still fires after the
next check_anns poll.
Pre-fix the mutation pushes the symbol-list Levenshtein distance well
past the 30% tolerance, the worker rejects the trained profile, and
NEURAL_SPAM stops firing. Post-fix the providers_digest stays
constant and is recognised as the authoritative schema fingerprint, so
the trained ANN is reloaded.
max_trains=1 because metatokens-only scans produce an identical
vector per message and Redis SADD deduplicates — one spam + one ham
scan are enough to fire training.
--- /dev/null
+*** Settings ***
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Library Process
+Library ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/neural_rotation.conf
+${SPAM_MSG} ${RSPAMD_TESTDIR}/messages/spam.eml
+${HAM_MSG} ${RSPAMD_TESTDIR}/messages/ham.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_SCOPE} Suite
+${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+
+*** Test Cases ***
+Train providers-driven ANN
+ # max_trains=1 means a single spam + single ham scan triggers training.
+ # Metatokens-only vector + disable_symbols_input=true makes the input
+ # vector independent of which symbols fire — providers_digest is the
+ # only schema fingerprint.
+ Sleep 2s Wait for redis and initial check_anns
+ Scan File ${SPAM_MSG} Settings={symbols_enabled = ["SPAM_SYMBOL1", "SPAM_SYMBOL2", "SPAM_SYMBOL3"]}
+ Expect Symbol SPAM_SYMBOL1
+ Scan File ${HAM_MSG} Settings={symbols_enabled = ["HAM_SYMBOL1", "HAM_SYMBOL2", "HAM_SYMBOL3"]}
+ Expect Symbol HAM_SYMBOL1
+
+Inference fires before rotation
+ Sleep 5s Wait for training to complete and ANN to be reloaded
+ Scan File ${SPAM_MSG} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_SPAM_SHORT
+ Scan File ${HAM_MSG} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_HAM_SHORT
+
+Force symcache-style rotation
+ # Mutate set.symbols/set.digest in the scanner worker so the next
+ # check_anns poll re-runs profile selection. With the fix, the
+ # providers_digest-based match preserves the trained ANN; pre-fix
+ # the symbol-digest shift would orphan it.
+ Scan File ${SPAM_MSG} Settings={symbols_enabled = ["FORCE_ROTATE_NEURAL"];symbols_disabled = ["NEURAL_LEARN","NEURAL_CHECK"]}
+ Expect Symbol FORCE_ROTATE_NEURAL
+ Sleep 3s Wait for check_anns periodic to reload after rotation
+
+Inference still fires after rotation
+ Scan File ${SPAM_MSG} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_SPAM_SHORT
+ Scan File ${HAM_MSG} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_HAM_SHORT
--- /dev/null
+options = {
+ url_tld = "{= env.URL_TLD =}"
+ pidfile = "{= env.TMPDIR =}/rspamd.pid"
+ lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua"
+ filters = [];
+ explicit_modules = ["settings"];
+}
+
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "{= env.TMPDIR =}/rspamd.log"
+ log_usec = true;
+}
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ add_header = 50500,
+ }
+ unknown_weight = 1
+}
+worker {
+ type = normal
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+ count = 1
+ task_timeout = 10s;
+}
+worker {
+ type = controller
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+ count = 1
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "{= env.TMPDIR =}/stats.ucl"
+}
+
+modules {
+ path = "{= env.TESTDIR =}/../../src/plugins/lua/"
+}
+
+lua = "{= env.TESTDIR =}/lua/test_coverage.lua";
+
+neural {
+ rules {
+ SHORT {
+ train {
+ learning_rate = 0.001;
+ max_usages = 2;
+ spam_score = 1;
+ ham_score = -1;
+ # metatokens-only vectors deduplicate per message in Redis SADD,
+ # so a single sample per class is enough — keep max_trains at 1
+ # so balanced-mode training fires after one spam + one ham scan.
+ max_trains = 1;
+ max_iterations = 250;
+ classes_bias = 0.0;
+ }
+ symbol_spam = "NEURAL_SPAM_SHORT";
+ symbol_ham = "NEURAL_HAM_SHORT";
+ ann_expire = 86400;
+ watch_interval = 0.5;
+ # Providers-driven input vector; symbol set is decoupled from the ANN.
+ # Rotating set.symbols/set.digest mid-life must not invalidate the
+ # trained model so long as providers_digest stays constant.
+ providers = [
+ { type = "metatokens"; }
+ ];
+ disable_symbols_input = true;
+ fusion {
+ include_meta = false;
+ normalization = "none";
+ }
+ }
+ }
+ allow_local = true;
+}
+redis {
+ servers = "{= env.REDIS_ADDR =}:{= env.REDIS_PORT =}";
+ expand_keys = true;
+}
+
+lua = "{= env.TESTDIR =}/lua/neural_rotation.lua";
--- /dev/null
+-- Test helper for the providers-digest rotation scenario.
+--
+-- Mirrors the SPAM_SYMBOL{i}/HAM_SYMBOL{i} setup from neural.lua (so a Robot
+-- suite can drive autolearn via verdict scoring) and adds a callback symbol
+-- that forces a symcache-style rotation: mutates the loaded neural rule's
+-- settings to flip set.symbols and set.digest in place, then clears
+-- set.ann/set.training_profile so the next check_anns poll re-runs profile
+-- selection.
+--
+-- With providers configured + disable_symbols_input=true, the rotation must
+-- not invalidate the trained ANN: providers_digest stays constant, so the
+-- old profile is still compatible and must be reloaded.
+
+local lua_util = require "lua_util"
+local neural_common = require "plugins/neural"
+
+for i = 1, 14 do
+ rspamd_config:register_symbol({
+ name = 'SPAM_SYMBOL' .. tostring(i),
+ score = 5.0,
+ callback = function()
+ return true, 'Fires always'
+ end
+ })
+ rspamd_config:register_symbol({
+ name = 'HAM_SYMBOL' .. tostring(i),
+ score = -3.0,
+ callback = function()
+ return true, 'Fires always'
+ end
+ })
+end
+
+-- Force an in-place "symcache shift" on the loaded neural rule(s).
+-- Appends a unique symbol to set.symbols, recomputes set.digest, and clears
+-- the loaded ANN reference so the next check_anns poll re-selects a profile
+-- from Redis.
+--
+-- IMPORTANT: registered WITHOUT explicit_disable so it stays subject to the
+-- symbols_enabled allowlist — otherwise it would fire on every training scan
+-- and trample set.can_store_vectors before training data can accumulate.
+-- Replace set.symbols with a wholly fresh list so the Levenshtein distance
+-- against the stored profile exceeds the legacy 30% tolerance — pre-fix this
+-- would orphan the trained ANN; with providers_digest matching it is still
+-- recognised as compatible.
+local rotation_counter = 0
+rspamd_config.FORCE_ROTATE_NEURAL = {
+ callback = function(task)
+ rotation_counter = rotation_counter + 1
+ for _, rule in pairs(neural_common.settings.rules or {}) do
+ for _, set in pairs(rule.settings or {}) do
+ if type(set) == 'table' and type(set.symbols) == 'table' then
+ local fresh = {}
+ for i = 1, math.max(#set.symbols * 2, 32) do
+ fresh[#fresh + 1] = string.format('ROTATED_SYM_%d_%d',
+ rotation_counter, i)
+ end
+ table.sort(fresh)
+ set.symbols = fresh
+ set.digest = lua_util.table_digest(fresh)
+ set.ann = nil
+ set.training_profile = nil
+ -- Leave set.can_store_vectors alone: check_anns has already
+ -- populated profile state for this set, the next poll will
+ -- reselect from Redis.
+ end
+ end
+ end
+ return true, 1.0, string.format('rotated_%d', rotation_counter)
+ end
+}
+
+dofile(rspamd_env.INSTALLROOT .. "/share/rspamd/rules/controller/init.lua")