[Fix] neural: resilient ANN reuse across symbol-list drift

author Vsevolod Stakhov <vsevolod@rspamd.com>

Sat, 23 May 2026 10:34:17 +0000 (11:34 +0100)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Sat, 23 May 2026 10:34:17 +0000 (11:34 +0100)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Sat, 23 May 2026 10:34:17 +0000 (11:34 +0100)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Sat, 23 May 2026 10:34:17 +0000 (11:34 +0100)
diff --git a/lualib/plugins/neural.lua b/lualib/plugins/neural.lua

index c2362a0a4217a91a0911078f9c2e64e871e68043..a116adc14a39bafd038559277177278627ecfee0 100644 (file)
--- a/lualib/plugins/neural.lua
+++ b/lualib/plugins/neural.lua
@@ -763,8 +763,16 @@ local function is_profile_compatible(rule, set, profile_elt, current_providers_d
    if not profile_elt.symbols or not set.symbols then
      return false, math.huge
    end
+  -- Accept profiles whose symbol list still overlaps the current one by at
+  -- least 50% (i.e. Levenshtein drift < 50% of |set.symbols|). The previous
+  -- 30% threshold rejected the old profile on every modest config change
+  -- and inference went completely dark until a new ANN trained from scratch
+  -- (weeks under realistic class imbalance). With this looser cap the worker
+  -- keeps using the old profile's redis_key -- and crucially its OWN symbol
+  -- list, since result_to_vector uses profile.symbols -- so the trained
+  -- weights stay correctly indexed against the features that produced them.
    local dist = lua_util.distance_sorted(profile_elt.symbols, set.symbols)
-  if dist >= #set.symbols * 0.3 then
+  if dist >= #set.symbols * 0.5 then
      return false, dist
    end
    return true, dist
diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua

index c6a00c4fa2d7195f059b7fef60e8e906a4b6b1bd..b8c787b5cc26fafa1173e69dd6cdb476c4624c3f 100644 (file)
--- a/src/plugins/lua/neural.lua
+++ b/src/plugins/lua/neural.lua
@@ -85,12 +85,19 @@ local function new_ann_profile(task, rule, set, version)
      else
        rspamd_logger.infox(task, 'created new ANN profile for %s:%s, data stored at prefix %s',
          rule.prefix, set.name, profile.redis_key)
-      -- If a prior profile with the same providers_digest holds trained
-      -- weights, carry them over into the fresh profile key.  This prevents
-      -- a symcache-driven profile rotation from abandoning a still-valid
-      -- ANN whenever the input vector schema is decided by providers
-      -- (rather than the symbol list).
-      if providers_digest then
+      -- Carry weights from a prior profile (same providers_digest, different
+      -- symbol-list digest) into the fresh profile key ONLY when the input
+      -- vector schema is decided entirely by providers -- i.e. when
+      -- disable_symbols_input is set. In hybrid mode (providers + symbols)
+      -- the symbol portion of the vector reshapes with symbol drift, and
+      -- load_new_ann then sets set.ann.symbols = profile.symbols (= current
+      -- symbol list), so copied weights would be indexed against features
+      -- they were never trained against -- silent garbage at inference.
+      -- For hybrid mode is_profile_compatible already routes inference to
+      -- the prior profile entry, which carries its own (older) symbol list
+      -- and therefore keeps weights correctly aligned at inference time;
+      -- skipping carryover is the right behaviour.
+      if providers_digest and rule.disable_symbols_input then
          maybe_carryover_ann(task, rule, set, ann_key, providers_digest)
        end
      end
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Sat, 23 May 2026 10:34:17 +0000 (11:34 +0100)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Sat, 23 May 2026 10:34:17 +0000 (11:34 +0100)
lualib/plugins/neural.lua		patch \| blob \| blame \| history
src/plugins/lua/neural.lua		patch \| blob \| blame \| history