From: Vsevolod Stakhov Date: Sat, 16 May 2026 20:13:29 +0000 (+0100) Subject: [Test] neural: cover providers_digest rotation carryover X-Git-Tag: 4.1.0~55^2 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=7a84ee0da7672c1020a45f3d32bc0c99f8e9c690;p=thirdparty%2Frspamd.git [Test] neural: cover providers_digest rotation carryover Regression test for the symcache-driven profile rotation fix. Drives a live rspamd + Redis through: train ANN with providers-only input (metatokens, disable_symbols_input=true) -> verify NEURAL_SPAM / NEURAL_HAM fire -> mutate set.symbols/set.digest in the scanner worker (simulates a symcache shift) -> verify inference still fires after the next check_anns poll. Pre-fix the mutation pushes the symbol-list Levenshtein distance well past the 30% tolerance, the worker rejects the trained profile, and NEURAL_SPAM stops firing. Post-fix the providers_digest stays constant and is recognised as the authoritative schema fingerprint, so the trained ANN is reloaded. max_trains=1 because metatokens-only scans produce an identical vector per message and Redis SADD deduplicates — one spam + one ham scan are enough to fire training. --- diff --git a/test/functional/cases/330_neural/003_carryover.robot b/test/functional/cases/330_neural/003_carryover.robot new file mode 100644 index 0000000000..6f06353bab --- /dev/null +++ b/test/functional/cases/330_neural/003_carryover.robot @@ -0,0 +1,49 @@ +*** Settings *** +Suite Setup Rspamd Redis Setup +Suite Teardown Rspamd Redis Teardown +Library Process +Library ${RSPAMD_TESTDIR}/lib/rspamd.py +Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot +Variables ${RSPAMD_TESTDIR}/lib/vars.py + +*** Variables *** +${CONFIG} ${RSPAMD_TESTDIR}/configs/neural_rotation.conf +${SPAM_MSG} ${RSPAMD_TESTDIR}/messages/spam.eml +${HAM_MSG} ${RSPAMD_TESTDIR}/messages/ham.eml +${REDIS_SCOPE} Suite +${RSPAMD_SCOPE} Suite +${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat + +*** Test Cases *** +Train providers-driven ANN + # max_trains=1 means a single spam + single ham scan triggers training. + # Metatokens-only vector + disable_symbols_input=true makes the input + # vector independent of which symbols fire — providers_digest is the + # only schema fingerprint. + Sleep 2s Wait for redis and initial check_anns + Scan File ${SPAM_MSG} Settings={symbols_enabled = ["SPAM_SYMBOL1", "SPAM_SYMBOL2", "SPAM_SYMBOL3"]} + Expect Symbol SPAM_SYMBOL1 + Scan File ${HAM_MSG} Settings={symbols_enabled = ["HAM_SYMBOL1", "HAM_SYMBOL2", "HAM_SYMBOL3"]} + Expect Symbol HAM_SYMBOL1 + +Inference fires before rotation + Sleep 5s Wait for training to complete and ANN to be reloaded + Scan File ${SPAM_MSG} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Expect Symbol NEURAL_SPAM_SHORT + Scan File ${HAM_MSG} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Expect Symbol NEURAL_HAM_SHORT + +Force symcache-style rotation + # Mutate set.symbols/set.digest in the scanner worker so the next + # check_anns poll re-runs profile selection. With the fix, the + # providers_digest-based match preserves the trained ANN; pre-fix + # the symbol-digest shift would orphan it. + Scan File ${SPAM_MSG} Settings={symbols_enabled = ["FORCE_ROTATE_NEURAL"];symbols_disabled = ["NEURAL_LEARN","NEURAL_CHECK"]} + Expect Symbol FORCE_ROTATE_NEURAL + Sleep 3s Wait for check_anns periodic to reload after rotation + +Inference still fires after rotation + Scan File ${SPAM_MSG} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Expect Symbol NEURAL_SPAM_SHORT + Scan File ${HAM_MSG} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Expect Symbol NEURAL_HAM_SHORT diff --git a/test/functional/configs/neural_rotation.conf b/test/functional/configs/neural_rotation.conf new file mode 100644 index 0000000000..95aa773a6c --- /dev/null +++ b/test/functional/configs/neural_rotation.conf @@ -0,0 +1,82 @@ +options = { + url_tld = "{= env.URL_TLD =}" + pidfile = "{= env.TMPDIR =}/rspamd.pid" + lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua" + filters = []; + explicit_modules = ["settings"]; +} + +logging = { + type = "file", + level = "debug" + filename = "{= env.TMPDIR =}/rspamd.log" + log_usec = true; +} +metric = { + name = "default", + actions = { + reject = 100500, + add_header = 50500, + } + unknown_weight = 1 +} +worker { + type = normal + bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}" + count = 1 + task_timeout = 10s; +} +worker { + type = controller + bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}" + count = 1 + secure_ip = ["127.0.0.1", "::1"]; + stats_path = "{= env.TMPDIR =}/stats.ucl" +} + +modules { + path = "{= env.TESTDIR =}/../../src/plugins/lua/" +} + +lua = "{= env.TESTDIR =}/lua/test_coverage.lua"; + +neural { + rules { + SHORT { + train { + learning_rate = 0.001; + max_usages = 2; + spam_score = 1; + ham_score = -1; + # metatokens-only vectors deduplicate per message in Redis SADD, + # so a single sample per class is enough — keep max_trains at 1 + # so balanced-mode training fires after one spam + one ham scan. + max_trains = 1; + max_iterations = 250; + classes_bias = 0.0; + } + symbol_spam = "NEURAL_SPAM_SHORT"; + symbol_ham = "NEURAL_HAM_SHORT"; + ann_expire = 86400; + watch_interval = 0.5; + # Providers-driven input vector; symbol set is decoupled from the ANN. + # Rotating set.symbols/set.digest mid-life must not invalidate the + # trained model so long as providers_digest stays constant. + providers = [ + { type = "metatokens"; } + ]; + disable_symbols_input = true; + fusion { + include_meta = false; + normalization = "none"; + } + } + } + allow_local = true; +} +redis { + servers = "{= env.REDIS_ADDR =}:{= env.REDIS_PORT =}"; + expand_keys = true; +} + +lua = "{= env.TESTDIR =}/lua/neural_rotation.lua"; diff --git a/test/functional/lua/neural_rotation.lua b/test/functional/lua/neural_rotation.lua new file mode 100644 index 0000000000..4371f44e8e --- /dev/null +++ b/test/functional/lua/neural_rotation.lua @@ -0,0 +1,73 @@ +-- Test helper for the providers-digest rotation scenario. +-- +-- Mirrors the SPAM_SYMBOL{i}/HAM_SYMBOL{i} setup from neural.lua (so a Robot +-- suite can drive autolearn via verdict scoring) and adds a callback symbol +-- that forces a symcache-style rotation: mutates the loaded neural rule's +-- settings to flip set.symbols and set.digest in place, then clears +-- set.ann/set.training_profile so the next check_anns poll re-runs profile +-- selection. +-- +-- With providers configured + disable_symbols_input=true, the rotation must +-- not invalidate the trained ANN: providers_digest stays constant, so the +-- old profile is still compatible and must be reloaded. + +local lua_util = require "lua_util" +local neural_common = require "plugins/neural" + +for i = 1, 14 do + rspamd_config:register_symbol({ + name = 'SPAM_SYMBOL' .. tostring(i), + score = 5.0, + callback = function() + return true, 'Fires always' + end + }) + rspamd_config:register_symbol({ + name = 'HAM_SYMBOL' .. tostring(i), + score = -3.0, + callback = function() + return true, 'Fires always' + end + }) +end + +-- Force an in-place "symcache shift" on the loaded neural rule(s). +-- Appends a unique symbol to set.symbols, recomputes set.digest, and clears +-- the loaded ANN reference so the next check_anns poll re-selects a profile +-- from Redis. +-- +-- IMPORTANT: registered WITHOUT explicit_disable so it stays subject to the +-- symbols_enabled allowlist — otherwise it would fire on every training scan +-- and trample set.can_store_vectors before training data can accumulate. +-- Replace set.symbols with a wholly fresh list so the Levenshtein distance +-- against the stored profile exceeds the legacy 30% tolerance — pre-fix this +-- would orphan the trained ANN; with providers_digest matching it is still +-- recognised as compatible. +local rotation_counter = 0 +rspamd_config.FORCE_ROTATE_NEURAL = { + callback = function(task) + rotation_counter = rotation_counter + 1 + for _, rule in pairs(neural_common.settings.rules or {}) do + for _, set in pairs(rule.settings or {}) do + if type(set) == 'table' and type(set.symbols) == 'table' then + local fresh = {} + for i = 1, math.max(#set.symbols * 2, 32) do + fresh[#fresh + 1] = string.format('ROTATED_SYM_%d_%d', + rotation_counter, i) + end + table.sort(fresh) + set.symbols = fresh + set.digest = lua_util.table_digest(fresh) + set.ann = nil + set.training_profile = nil + -- Leave set.can_store_vectors alone: check_anns has already + -- populated profile state for this set, the next poll will + -- reselect from Redis. + end + end + end + return true, 1.0, string.format('rotated_%d', rotation_counter) + end +} + +dofile(rspamd_env.INSTALLROOT .. "/share/rspamd/rules/controller/init.lua")