if not err and type(data) == 'table' then
local nspam,nham = data[1],data[2]
- if nspam > 0 and nham > 0 and
- can_push_train_vector(rule, task, learn_type, nspam, nham) then
-
+ if can_push_train_vector(rule, task, learn_type, nspam, nham) then
local vec = result_to_vector(task, set)
local str = rspamd_util.zstd_compress(table.concat(vec, ';'))
'LPUSH', -- command
{ target_key, str } -- arguments
)
+ else
+ lua_util.debugm(N, task,
+ "do not add %s train data for ANN rule " ..
+ "%s:%s",
+ learn_type, rule.prefix, set.name)
end
else
if err then
-- at least (10 * (1 - 0.25)) = 8 trains
local max_len = math.max(lua_util.unpack(lua_util.values(lens)))
+ local min_len = math.min(lua_util.unpack(lua_util.values(lens)))
if rule.train.learn_type == 'balanced' then
local len_bias_check_pred = function(_, l)
end
else
-- Probabilistic mode, just ensure that at least one vector is okay
- if max_len >= rule.train.max_trains then
+ if min_len > 0 and max_len >= rule.train.max_trains then
rspamd_logger.debugm(N, rspamd_config,
'can start ANN %s learn as it has %s learn vectors; %s required, after checking %s vectors',
ann_key, lens, rule.train.max_trains, what)