--- /dev/null
+*** Settings ***
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Library Process
+Library ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/neural_drift_pure.conf
+${MESSAGE} ${RSPAMD_TESTDIR}/messages/spam_message.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_SCOPE} Suite
+${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+
+*** Test Cases ***
+Train pure-symbols ANN
+ # Mirrors 001_autotrain's pure-symbols training: 10 spam + 10 ham scans
+ # each producing a distinct vector (one extra SPAM_SYMBOL${INDEX} per
+ # scan) so the ANN sees enough variance to converge.
+ Sleep 2s Wait for redis and initial check_anns
+ FOR ${INDEX} IN RANGE 4 14
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1", "SPAM_SYMBOL2", "SPAM_SYMBOL3", "SPAM_SYMBOL${INDEX}"]}
+ Expect Symbol SPAM_SYMBOL${INDEX}
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL1", "HAM_SYMBOL2", "HAM_SYMBOL3", "HAM_SYMBOL${INDEX}"]}
+ Expect Symbol HAM_SYMBOL${INDEX}
+ END
+
+Inference fires before drift
+ Sleep 5s Wait for training to complete and ANN to be reloaded
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3","SPAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_SPAM_SHORT
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3","HAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_HAM_SHORT
+
+40 percent drift keeps the prior profile compatible
+ # FORCE_DRIFT_NEURAL_40 swaps ~20% of set.symbols for fresh ones, making
+ # the symmetric difference against the trained profile ~40% of
+ # |set.symbols|. is_profile_compatible's new 50% cap accepts it
+ # (pre-fix: 30% cap would have rejected and inference would go dark).
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["FORCE_DRIFT_NEURAL_40"];symbols_disabled = ["NEURAL_LEARN","NEURAL_CHECK"]}
+ Expect Symbol FORCE_DRIFT_NEURAL_40
+ Sleep 3s Wait for check_anns periodic to reload after drift
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3","SPAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_SPAM_SHORT
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3","HAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_HAM_SHORT
+
+60 percent drift rejects the prior profile
+ # FORCE_DRIFT_NEURAL_60 swaps ~30% of set.symbols for fresh ones,
+ # taking symmetric difference to ~60% of |set.symbols|. Above the 50%
+ # cap: is_profile_compatible rejects, set.ann stays unset, no NEURAL_*
+ # symbols fire until a fresh ANN trains under the new digest.
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["FORCE_DRIFT_NEURAL_60"];symbols_disabled = ["NEURAL_LEARN","NEURAL_CHECK"]}
+ Expect Symbol FORCE_DRIFT_NEURAL_60
+ Sleep 3s Wait for check_anns periodic to reload after drift
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3","SPAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_SPAM_SHORT
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3","HAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_HAM_SHORT
--- /dev/null
+options = {
+ url_tld = "{= env.URL_TLD =}"
+ pidfile = "{= env.TMPDIR =}/rspamd.pid"
+ lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua"
+ filters = [];
+ explicit_modules = ["settings"];
+}
+
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "{= env.TMPDIR =}/rspamd.log"
+ log_usec = true;
+}
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ add_header = 50500,
+ }
+ unknown_weight = 1
+}
+worker {
+ type = normal
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+ count = 1
+ task_timeout = 10s;
+}
+worker {
+ type = controller
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+ count = 1
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "{= env.TMPDIR =}/stats.ucl"
+}
+
+modules {
+ path = "{= env.TESTDIR =}/../../src/plugins/lua/"
+}
+
+lua = "{= env.TESTDIR =}/lua/test_coverage.lua";
+
+# Pure-symbols neural rule (no providers, no disable_symbols_input).
+# Validates is_profile_compatible's symmetric-difference threshold:
+# the prior profile must survive symbol drift up to 50% of |set.symbols|
+# (raised from the legacy 30% cap, which orphaned ANNs on every modest
+# config change). Two helpers in lua/neural_drift_pure.lua drive a 40%
+# drift (must keep inference alive) and a 60% drift (must drop it).
+neural {
+ rules {
+ SHORT {
+ train {
+ learning_rate = 0.001;
+ max_usages = 2;
+ spam_score = 1;
+ ham_score = -1;
+ max_trains = 10;
+ max_iterations = 250;
+ }
+ symbol_spam = "NEURAL_SPAM_SHORT";
+ symbol_ham = "NEURAL_HAM_SHORT";
+ ann_expire = 86400;
+ watch_interval = 0.5;
+ }
+ }
+ allow_local = true;
+}
+redis {
+ servers = "{= env.REDIS_ADDR =}:{= env.REDIS_PORT =}";
+ expand_keys = true;
+}
+
+lua = "{= env.TESTDIR =}/lua/neural_drift_pure.lua";
--- /dev/null
+-- Test helper for the pure-symbols drift scenario.
+--
+-- Drives is_profile_compatible's symbol-list threshold in pure-symbols
+-- mode (no providers configured). The threshold was raised from 30% to
+-- 50%; this helper exercises both sides of that line:
+-- FORCE_DRIFT_NEURAL_40 produces a 40%-drift set (must stay compatible),
+-- FORCE_DRIFT_NEURAL_60 produces a 60%-drift set (must be rejected).
+--
+-- distance_sorted is an asymmetric edit-distance walk: when the fresh
+-- entries sort to one end of the list (here, before every baseline
+-- name, since "DRIFT_*" < "FORCE_*" < "HAM_*" < "SPAM_*") and the dropped
+-- entries are at the other end, the function reports dist ≈ replace_k
+-- (not 2*replace_k). So to hit dist == drift_pct% of n we drop and add
+-- k = drift_pct * n / 100 entries, not / 200.
+
+local lua_util = require "lua_util"
+local neural_common = require "plugins/neural"
+
+for i = 1, 14 do
+ rspamd_config:register_symbol({
+ name = 'SPAM_SYMBOL' .. tostring(i),
+ score = 5.0,
+ callback = function()
+ return true, 'Fires always'
+ end
+ })
+ rspamd_config:register_symbol({
+ name = 'HAM_SYMBOL' .. tostring(i),
+ score = -3.0,
+ callback = function()
+ return true, 'Fires always'
+ end
+ })
+end
+
+-- Per-(rule, set) baseline snapshot, captured on the first drift call so
+-- subsequent calls compare against the originally-trained symbol list and
+-- not against a previously-drifted one.
+local baselines = {}
+
+local function snapshot_key(rule, set)
+ return tostring(rule.prefix or rule.name or rule) .. ':' ..
+ tostring(set.name or 'default')
+end
+
+local function apply_drift(drift_pct)
+ for _, rule in pairs(neural_common.settings.rules or {}) do
+ for _, set in pairs(rule.settings or {}) do
+ if type(set) == 'table' and type(set.symbols) == 'table' then
+ local key = snapshot_key(rule, set)
+ if not baselines[key] then
+ local snap = {}
+ for i, s in ipairs(set.symbols) do snap[i] = s end
+ baselines[key] = snap
+ end
+ local base = baselines[key]
+ local n = #base
+ local replace = math.floor(drift_pct * n / 100 + 0.5)
+ if replace < 1 then replace = 1 end
+ if replace > n then replace = n end
+ local result = {}
+ for i = 1, n - replace do result[i] = base[i] end
+ for i = 1, replace do
+ result[#result + 1] = string.format('DRIFT_NEW_SYM_%d_%d',
+ drift_pct, i)
+ end
+ table.sort(result)
+ set.symbols = result
+ set.digest = lua_util.table_digest(result)
+ -- Clear loaded ANN + training_profile so the next check_anns poll
+ -- re-runs profile selection against the freshly-drifted symbol list.
+ set.ann = nil
+ set.training_profile = nil
+ end
+ end
+ end
+end
+
+-- Both drift callbacks are registered WITHOUT explicit_disable so they stay
+-- subject to the symbols_enabled allowlist -- otherwise they would fire on
+-- every training scan and trample set state before training data accumulates.
+rspamd_config.FORCE_DRIFT_NEURAL_40 = {
+ callback = function()
+ apply_drift(40)
+ return true, 1.0, 'drift_40'
+ end
+}
+
+rspamd_config.FORCE_DRIFT_NEURAL_60 = {
+ callback = function()
+ apply_drift(60)
+ return true, 1.0, 'drift_60'
+ end
+}
+
+dofile(rspamd_env.INSTALLROOT .. "/share/rspamd/rules/controller/init.lua")