store_pool_only = false, -- store tokens in cache only (disables autotrain);
store_set_only = false, -- store ham and spam sets in Redis, but do not train ANN (autotrain must be enabled);
-- neural_vec_mpack stores vector of training data in messagepack neural_profile_digest stores profile digest
+ -- frozen: first-class freeze. Stops automatic training and stops auto-storing
+ -- live vectors (so a frozen model's pools never accrue an imbalanced live
+ -- set), while inference keeps serving the current ANN unchanged. Explicit
+ -- ANN-Train (manual_train) still stores AND trains on demand. Supersedes the
+ -- auto-learn side of store_set_only/store_pool_only (those keep working when
+ -- frozen is not set).
+ frozen = false,
+ -- forced_learn_minimal_scan: when a manual-train scan (ANN-Train header) maps
+ -- to a disable_symbols_input rule, a high-priority neural prefilter disables
+ -- every non-neural symbol so the symbols-independent training vector is built
+ -- without issuing any RBL/DNS, fuzzy, ClickHouse, capture/cluster work. nil
+ -- means "default to disable_symbols_input" (resolved per-rule at init); set to
+ -- false to opt out and keep running the full pipeline for forced learns.
+ forced_learn_minimal_scan = nil,
},
watch_interval = 60.0,
lock_expire = 600,
* @return {boolean} `true` if symbol has been found
*/
LUA_FUNCTION_DEF(task, disable_symbol);
+/***
+ * @method task:disable_all_symbols([skip_mask])
+ * Disable execution of every symbol for this particular task except those whose
+ * type/flags intersect `skip_mask`. This is the "process only these" primitive:
+ * it mirrors what the `symbols_enabled` settings key does internally. Combine it
+ * with `task:enable_symbol()` (called afterwards) to run only a chosen subset of
+ * symbols. Typically invoked from a high-priority prefilter so that the disabled
+ * symbols never execute (no wasted DNS/Redis/HTTP work).
+ * @param {number} skip_mask optional bitmask of SYMBOL_TYPE_* flags to keep
+ * enabled; defaults to `SYMBOL_TYPE_EXPLICIT_DISABLE` (i.e. symbols flagged
+ * `explicit_disable` are left running, matching the `symbols_enabled` default)
+ */
+LUA_FUNCTION_DEF(task, disable_all_symbols);
/***
* @method task:get_date(type[, gmt])
* Returns timestamp for a connection or for a MIME message. This function can be called with a
LUA_INTERFACE_DEF(task, has_symbol_regexp),
LUA_INTERFACE_DEF(task, enable_symbol),
LUA_INTERFACE_DEF(task, disable_symbol),
+ LUA_INTERFACE_DEF(task, disable_all_symbols),
LUA_INTERFACE_DEF(task, get_date),
LUA_INTERFACE_DEF(task, get_message_id),
LUA_INTERFACE_DEF(task, get_timeval),
return 1;
}
+static int
+lua_task_disable_all_symbols(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_task *task = lua_check_task(L, 1);
+ unsigned int skip_mask = SYMBOL_TYPE_EXPLICIT_DISABLE;
+
+ if (task) {
+ if (lua_isnumber(L, 2)) {
+ skip_mask = (unsigned int) lua_tointeger(L, 2);
+ }
+
+ /* No runtime means we are not inside a scan; nothing to disable */
+ if (task->symcache_runtime != NULL) {
+ rspamd_symcache_disable_all_symbols(task, task->cfg->cache, skip_mask);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 0;
+}
+
static int
lua_task_get_symbols(lua_State *L)
{
return nil
end
+-- High-priority prefilter for the forced-learn fast path.
+--
+-- When a scan carries an explicit `ANN-Train: spam|ham` header and every neural
+-- rule that applies to this task can train from a symbols-independent vector
+-- (disable_symbols_input + train.forced_learn_minimal_scan), there is no reason
+-- to run any non-neural symbol: the stored training vector is built purely from
+-- registered providers + metatokens, all of which read parsed-message data
+-- (text parts, URLs, headers) that is already available at the prefilter stage
+-- (PROCESS_MESSAGE runs before PRE_FILTERS). So we disable every symbol except
+-- the neural ones, which prevents RBL/DNS, fuzzy, bayes-scoring, ClickHouse,
+-- capture/cluster and other idempotent work from ever being issued.
+--
+-- This never changes the neural settings id or the profile key: with
+-- disable_symbols_input the profile/digest are derived from providers_digest
+-- (see process_settings_elt + is_profile_compatible), so the vector and the key
+-- are byte-for-byte identical to what the live full-scan path would store.
+local function neural_forced_learn_prefilter(task)
+ -- Only act on explicit manual-train scans
+ local hv = get_ann_train_header(task)
+ if not (hv == 'spam' or hv == 'ham') then
+ return
+ end
+
+ -- Always keep the neural symbols runnable. NEURAL_LEARN is flagged
+ -- explicit_disable, so disable_all_symbols (skip_mask = explicit_disable)
+ -- leaves it alone; we still list it for clarity and re-enable NEURAL_CHECK and
+ -- every rule's spam/ham virtuals (these are NOT explicit_disable).
+ local keep = {
+ NEURAL_CHECK = true,
+ NEURAL_LEARN = true,
+ }
+ local any_minimal = false
+
+ for _, rule in pairs(settings.rules) do
+ local set = neural_common.get_rule_settings(task, rule)
+ if set then
+ keep[rule.symbol_spam] = true
+ keep[rule.symbol_ham] = true
+ local minimal = rule.disable_symbols_input and rule.train
+ and rule.train.forced_learn_minimal_scan
+ if minimal then
+ any_minimal = true
+ else
+ -- An applicable neural rule is NOT eligible for the minimal scan: either
+ -- its vector depends on symbols, or the operator opted out. Stripping
+ -- symbols could change that rule's stored vector (or just defeats an
+ -- explicit opt-out), so fall back to a full scan for the whole task.
+ lua_util.debugm(N, task,
+ 'forced-learn minimal scan disabled: rule %s is not eligible ' ..
+ '(disable_symbols_input=%s, forced_learn_minimal_scan=%s)',
+ rule.prefix, rule.disable_symbols_input,
+ rule.train and rule.train.forced_learn_minimal_scan)
+ return
+ end
+ end
+ end
+
+ if not any_minimal then
+ return
+ end
+
+ -- Disable every symbol-cache item except explicit_disable ones (keeps
+ -- NEURAL_LEARN), then re-enable the rest of the neural symbols. This is the
+ -- same "process only these" primitive that `symbols_enabled` uses internally,
+ -- so it reliably covers filter, postfilter and idempotent stages.
+ task:disable_all_symbols()
+ for sym in pairs(keep) do
+ task:enable_symbol(sym)
+ end
+
+ lua_util.debugm(N, task,
+ 'forced-learn minimal scan: disabled all non-neural symbols for %s training', hv)
+end
+
local function ann_push_task_result(rule, task, verdict, score, set)
local train_opts = rule.train
local learn_spam, learn_ham
has_symbols_provider = true
end
- if has_llm_provider and not manual_train then
+ if train_opts.frozen and not manual_train then
+ -- Frozen model: never auto-learn and never auto-store a live vector, so the
+ -- pools cannot accrue an imbalanced live set. Inference keeps serving the
+ -- current ANN unchanged; only an explicit ANN-Train (manual_train) below can
+ -- still store and (on demand) retrain. This supersedes the auto-learn side
+ -- of store_set_only/store_pool_only.
+ learn_spam = false
+ learn_ham = false
+ skip_reason = 'model is frozen (train.frozen): auto-learn disabled'
+ lua_util.debugm(N, task, '%s:%s is frozen, skip auto-store of live vector',
+ rule.prefix, set.name)
+ elseif has_llm_provider and not manual_train then
-- Use expression-based autolearn conditions for LLM providers
if rule.autolearn and rule.autolearn.enabled then
local learn_type, reason = neural_learn.get_learn_type(task, rule)
'SADD', -- command
{ target_key, str } -- arguments
)
+
+ -- A frozen model trains ONLY when an operator pushes a corpus via
+ -- ANN-Train; record that request so the controller's auto-train
+ -- trigger (which is otherwise short-circuited for frozen models)
+ -- knows to retrain from these manual vectors. TTL keeps it from
+ -- forcing stale retrains long after the corpus push.
+ if rule.train.frozen and manual_train then
+ local marker_key = neural_common.pending_train_key(rule, set) .. '_retrain_req'
+ lua_redis.redis_make_request(task,
+ rule.redis,
+ nil,
+ true, -- is write
+ function(merr)
+ if merr then
+ lua_util.debugm(N, task, 'cannot set frozen retrain marker %s: %s',
+ marker_key, merr)
+ end
+ end,
+ 'SET',
+ { marker_key, tostring(rspamd_util.get_time()), 'EX', tostring(rule.ann_expire) }
+ )
+ end
end
if rule.providers and #rule.providers > 0 then
-- We have our ANN and that's train vectors, check if we can learn
local ann_key = sel_elt.redis_key
- -- Check if we need to train ann
- if rule.train.store_set_only then
+ -- Check if we need to train ann. Frozen supersedes store_set_only: a frozen
+ -- model never auto-trains, but unlike store_set_only it still retrains when an
+ -- operator pushes a corpus via ANN-Train (gated on the retrain-request marker
+ -- below). When not frozen, the historical store_set_only behaviour applies.
+ if not rule.train.frozen and rule.train.store_set_only then
lua_util.debugm(N, rspamd_config, "skiped check if ANN %s needs to be trained due to store_set_only", ann_key)
return
end
ann_key, pending_key, lens)
lua_util.debugm(N, rspamd_config, 'maybe_train_existing_ann: initiating train for key=%s spam=%s ham=%s', ann_key,
lens.spam or -1, lens.ham or -1)
+ -- Consume the frozen retrain-request marker now that an actual training is
+ -- starting, so one operator corpus push triggers exactly one retrain.
+ if rule.train.frozen then
+ lua_redis.redis_make_request_taskless(ev_base, rspamd_config, rule.redis,
+ nil, true, function(_, _) end, 'DEL', { pending_key .. '_retrain_req' })
+ end
do_train_ann(worker, ev_base, rule, set, ann_key)
end
)
end
- -- Start the chain
- check_spam_len()
+ -- Start the chain. For a frozen model the controller's auto-train trigger is
+ -- short-circuited: it only proceeds when an operator-driven ANN-Train left a
+ -- retrain-request marker (the marker is consumed in initiate_train, so a
+ -- single corpus push yields a single retrain).
+ if rule.train.frozen then
+ local marker_key = pending_key .. '_retrain_req'
+ lua_redis.redis_make_request_taskless(ev_base,
+ rspamd_config,
+ rule.redis,
+ nil,
+ false, -- is read
+ function(err, data)
+ if err then
+ rspamd_logger.errx(rspamd_config, 'cannot read frozen retrain marker %s: %s',
+ marker_key, err)
+ return
+ end
+ -- Redis GET returns a boolean `false` (userdata/boolean) for a missing
+ -- key via lua_redis; treat anything non-string/empty as "no request".
+ if type(data) ~= 'string' or data == '' then
+ lua_util.debugm(N, rspamd_config,
+ 'frozen ANN %s: no pending ANN-Train retrain request, skip auto-train', ann_key)
+ return
+ end
+ lua_util.debugm(N, rspamd_config,
+ 'frozen ANN %s: ANN-Train retrain requested, counting vectors', ann_key)
+ check_spam_len()
+ end,
+ 'GET',
+ { marker_key }
+ )
+ else
+ check_spam_len()
+ end
end
end
rule_elt.train.max_trains = rule_elt.train.max_train
end
+ -- forced_learn_minimal_scan defaults to ON whenever the rule's training vector
+ -- is symbols-independent (disable_symbols_input): a forced ANN-Train scan then
+ -- skips the whole non-neural pipeline. Operators can set it to false to opt out
+ -- explicitly. For symbol-dependent rules it stays off (stripping symbols would
+ -- change the stored vector relative to the live full-scan path).
+ if rule_elt.train.forced_learn_minimal_scan == nil then
+ rule_elt.train.forced_learn_minimal_scan = rule_elt.disable_symbols_input and true or false
+ end
+
if not rule_elt.profile then
rule_elt.profile = {}
end
callback = ann_push_vector
})
+-- Forced-learn fast path: a prefilter that, for qualifying ANN-Train scans of
+-- disable_symbols_input rules, disables the whole non-neural pipeline. Priority
+-- `high` runs it after the settings prefilters (priority `top`) so that
+-- get_settings_id()/get_rule_settings() see the resolved settings, but before
+-- the heavy DNS/Redis filter symbols.
+rspamd_config:register_symbol({
+ name = 'NEURAL_FORCED_LEARN_CHECK',
+ type = 'prefilter',
+ flags = 'empty,nostat,explicit_disable',
+ priority = lua_util.symbols_priorities.high,
+ callback = neural_forced_learn_prefilter
+})
+
-- We also need to deal with settings
rspamd_config:add_post_init(neural_common.process_rules_settings)
--- /dev/null
+*** Settings ***
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Library Process
+Library Collections
+Library ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/neural_forced_learn.conf
+${SPAM_MSG} ${RSPAMD_TESTDIR}/messages/spam.eml
+${HAM_MSG} ${RSPAMD_TESTDIR}/messages/ham.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_SCOPE} Suite
+${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+
+*** Test Cases ***
+Forced-learn minimal scan disables non-neural symbols
+ # A disable_symbols_input rule with forced_learn_minimal_scan=true: an
+ # ANN-Train scan must run the neural prefilter, which disables every non-neural
+ # symbol. SPAM_SYMBOL1 is a plain always-firing filter symbol, so it must NOT
+ # appear when ANN-Train is set, and MUST appear on a normal (full) scan.
+ Sleep 2s Wait for redis and initial check_anns
+ Scan File ${SPAM_MSG} ANN-Train=spam
+ Do Not Expect Symbol SPAM_SYMBOL1
+ Do Not Expect Symbol SPAM_SYMBOL2
+ Scan File ${SPAM_MSG}
+ Expect Symbol SPAM_SYMBOL1
+
+Minimal scan stores the vector under the providers-digest profile key
+ # The forced-learn scan above must have stored a training vector under the
+ # providers-digest key (rn_SHORT_default_<digest>_<ver>_spam_set), exactly the
+ # key the live full-scan path uses (disable_symbols_input keys on
+ # providers_digest, not on which symbols fired).
+ ${spam_set} = Get Neural Train Set spam
+ Should Not Be Empty ${spam_set} msg=no spam training set created by forced learn
+ ${n} = Redis SCARD ${spam_set}
+ Should Be True ${n} >= 1 msg=forced-learn scan did not store a training vector
+
+Minimal scan vector is byte-identical to the full-scan vector
+ # Re-scan the SAME message through the full pipeline (NEURAL_FORCED_LEARN_CHECK
+ # disabled at config-equivalent level is not needed: metatokens are
+ # symbols-independent, so the full-scan auto-learn vector equals the minimal
+ # one). Storing both into the same Redis SET must dedup to a single member —
+ # the byte-for-byte equivalence the feature guarantees.
+ ${spam_set} = Get Neural Train Set spam
+ # ANN-Train scan again (minimal path) — identical vector, dedups
+ Scan File ${SPAM_MSG} ANN-Train=spam
+ # Full-pipeline auto-learn of the same message — identical metatokens vector
+ Scan File ${SPAM_MSG}
+ Expect Symbol SPAM_SYMBOL1
+ Sleep 0.5s Let the async SADDs settle
+ ${n} = Redis SCARD ${spam_set}
+ Should Be Equal As Integers ${n} 1
+ ... msg=minimal-scan and full-scan vectors for the same message are not identical
+
+Forced-learn corpus trains the model
+ # Add one ham vector via a minimal ANN-Train scan: with max_trains=1 the
+ # balanced trigger now fires (1 spam + 1 ham) and the model trains from the
+ # symbols-independent corpus. Inference must then fire on both classes.
+ Scan File ${HAM_MSG} ANN-Train=ham
+ Do Not Expect Symbol HAM_SYMBOL1
+ Sleep 6s Wait for training to complete and ANN to be reloaded
+ Scan File ${SPAM_MSG} Settings={groups_enabled=["neural"];symbols_disabled=["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_SPAM_SHORT
+ Scan File ${HAM_MSG} Settings={groups_enabled=["neural"];symbols_disabled=["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_HAM_SHORT
+
+*** Keywords ***
+Get Neural Train Set
+ [Arguments] ${class}
+ # The training set keys use the rn_ prefix (ANN blobs/sets), distinct from the
+ # rn3_ profile zset. Return the first rn_SHORT_*_<class>_set key.
+ ${res} = Run Process redis-cli -h ${RSPAMD_REDIS_ADDR} -p ${RSPAMD_REDIS_PORT}
+ ... KEYS rn_SHORT_*_${class}_set
+ ${key} = Evaluate $res.stdout.strip().split('\\n')[0]
+ [Return] ${key}
+
+Redis SCARD
+ [Arguments] ${key}
+ ${res} = Run Process redis-cli -h ${RSPAMD_REDIS_ADDR} -p ${RSPAMD_REDIS_PORT}
+ ... SCARD ${key}
+ ${n} = Convert To Integer ${res.stdout.strip()}
+ [Return] ${n}
--- /dev/null
+*** Settings ***
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Library Process
+Library Collections
+Library ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/neural_frozen.conf
+${MESSAGE} ${RSPAMD_TESTDIR}/messages/spam_message.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_SCOPE} Suite
+${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+
+*** Test Cases ***
+Live traffic does not grow a frozen model's pools
+ # Identical to 001_autotrain's training drive, but the rule is train.frozen=true.
+ # Each scan reaches a spam/ham verdict that would normally auto-store a vector;
+ # a frozen model must store NOTHING from live traffic, so no training-set key is
+ # ever created.
+ Sleep 2s Wait for redis and initial check_anns
+ FOR ${INDEX} IN RANGE 4 14
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1", "SPAM_SYMBOL2", "SPAM_SYMBOL3", "SPAM_SYMBOL${INDEX}"]}
+ Expect Symbol SPAM_SYMBOL${INDEX}
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL1", "HAM_SYMBOL2", "HAM_SYMBOL3", "HAM_SYMBOL${INDEX}"]}
+ Expect Symbol HAM_SYMBOL${INDEX}
+ END
+ Sleep 2s Give any (erroneous) auto-store a chance to land
+ ${nkeys} = Count Neural Train Set Keys
+ Should Be Equal As Integers ${nkeys} 0
+ ... msg=frozen model accrued live training vectors (pools must not grow)
+
+Frozen model does not auto-train
+ # With no stored vectors and a short-circuited auto-train trigger, inference
+ # must stay dark — nothing has been trained from the live traffic above.
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3","SPAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_SPAM_SHORT
+ Do Not Expect Symbol NEURAL_HAM_SHORT
+
+ANN-Train trains a frozen model on demand
+ # Freeze stops auto-learn, NOT operator-driven corpus retrains. Pushing a
+ # balanced corpus with the ANN-Train header stores vectors, sets the retrain
+ # marker and lets the controller train once. Inference must then fire.
+ FOR ${INDEX} IN RANGE 4 14
+ Scan File ${MESSAGE} ANN-Train=spam Settings={symbols_enabled = ["SPAM_SYMBOL1", "SPAM_SYMBOL2", "SPAM_SYMBOL3", "SPAM_SYMBOL${INDEX}"]}
+ Scan File ${MESSAGE} ANN-Train=ham Settings={symbols_enabled = ["HAM_SYMBOL1", "HAM_SYMBOL2", "HAM_SYMBOL3", "HAM_SYMBOL${INDEX}"]}
+ END
+ ${nkeys} = Count Neural Train Set Keys
+ Should Be True ${nkeys} >= 1 msg=ANN-Train did not store vectors on a frozen model
+ Sleep 6s Wait for training to complete and ANN to be reloaded
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL1","SPAM_SYMBOL2","SPAM_SYMBOL3","SPAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_SPAM_SHORT
+ Do Not Expect Symbol NEURAL_HAM_SHORT
+
+Check Neural HAM after frozen ANN-Train
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL1","HAM_SYMBOL2","HAM_SYMBOL3","HAM_SYMBOL8"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Expect Symbol NEURAL_HAM_SHORT
+ Do Not Expect Symbol NEURAL_SPAM_SHORT
+
+*** Keywords ***
+Count Neural Train Set Keys
+ # Number of rn_SHORT_*_set training keys (spam_set / ham_set). The rn3_ profile
+ # zset is registered regardless; only training-set keys signal accrued vectors.
+ ${res} = Run Process redis-cli -h ${RSPAMD_REDIS_ADDR} -p ${RSPAMD_REDIS_PORT}
+ ... KEYS rn_SHORT_*_set
+ ${count} = Evaluate len([k for k in $res.stdout.strip().split('\\n') if k])
+ [Return] ${count}
--- /dev/null
+options = {
+ url_tld = "{= env.URL_TLD =}"
+ pidfile = "{= env.TMPDIR =}/rspamd.pid"
+ lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua"
+ filters = [];
+ explicit_modules = ["settings"];
+}
+
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "{= env.TMPDIR =}/rspamd.log"
+ log_usec = true;
+}
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ add_header = 50500,
+ }
+ unknown_weight = 1
+}
+worker {
+ type = normal
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+ count = 1
+ task_timeout = 10s;
+}
+worker {
+ type = controller
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+ count = 1
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "{= env.TMPDIR =}/stats.ucl"
+}
+
+modules {
+ path = "{= env.TESTDIR =}/../../src/plugins/lua/"
+}
+
+lua = "{= env.TESTDIR =}/lua/test_coverage.lua";
+
+neural {
+ rules {
+ SHORT {
+ train {
+ learning_rate = 0.001;
+ max_usages = 2;
+ spam_score = 1;
+ ham_score = -1;
+ # metatokens-only vectors deduplicate per message in Redis SADD, so
+ # a single sample per class is enough; balanced-mode training fires
+ # only once both a spam and a ham vector exist (one spam scan on its
+ # own never rotates the key, keeping the equivalence asserts stable).
+ max_trains = 1;
+ max_iterations = 250;
+ classes_bias = 0.0;
+ # default would already be true for disable_symbols_input; set it
+ # explicitly so the intent of the suite is obvious.
+ forced_learn_minimal_scan = true;
+ }
+ symbol_spam = "NEURAL_SPAM_SHORT";
+ symbol_ham = "NEURAL_HAM_SHORT";
+ ann_expire = 86400;
+ watch_interval = 0.5;
+ # Symbols-independent vector: input is metatokens only, so the stored
+ # vector never depends on which rule symbols fired.
+ providers = [
+ { type = "metatokens"; }
+ ];
+ disable_symbols_input = true;
+ fusion {
+ include_meta = false;
+ normalization = "none";
+ }
+ }
+ }
+ allow_local = true;
+}
+redis {
+ servers = "{= env.REDIS_ADDR =}:{= env.REDIS_PORT =}";
+ expand_keys = true;
+}
+
+lua = "{= env.TESTDIR =}/lua/neural_rotation.lua";
--- /dev/null
+options = {
+ url_tld = "{= env.URL_TLD =}"
+ pidfile = "{= env.TMPDIR =}/rspamd.pid"
+ lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua"
+ filters = [];
+ explicit_modules = ["settings"];
+}
+
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "{= env.TMPDIR =}/rspamd.log"
+ log_usec = true;
+}
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ add_header = 50500,
+ }
+ unknown_weight = 1
+}
+worker {
+ type = normal
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+ count = 1
+ task_timeout = 10s;
+}
+worker {
+ type = controller
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+ count = 1
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "{= env.TMPDIR =}/stats.ucl"
+}
+
+modules {
+ path = "{= env.TESTDIR =}/../../src/plugins/lua/"
+}
+
+lua = "{= env.TESTDIR =}/lua/test_coverage.lua";
+
+neural {
+ rules {
+ SHORT {
+ train {
+ learning_rate = 0.001;
+ max_usages = 2;
+ spam_score = 1;
+ ham_score = -1;
+ max_trains = 10;
+ max_iterations = 250;
+ # Frozen: live (verdict-based) traffic must NOT accrue vectors and
+ # the controller must NOT auto-train; only an explicit ANN-Train
+ # corpus push may store and retrain.
+ frozen = true;
+ }
+ symbol_spam = "NEURAL_SPAM_SHORT";
+ symbol_ham = "NEURAL_HAM_SHORT";
+ ann_expire = 86400;
+ watch_interval = 0.5;
+ }
+ }
+ allow_local = true;
+}
+redis {
+ servers = "{= env.REDIS_ADDR =}:{= env.REDIS_PORT =}";
+ expand_keys = true;
+}
+
+lua = "{= env.TESTDIR =}/lua/neural.lua";