end
redis.call('SADD', symbol .. '_keys', prefix)
-redis.call('HSET', prefix, 'version', '2') -- new schema
-redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
+redis.call('HSET', prefix, 'version', '2') -- new schema
+
+-- Update learned count, but prevent it from going negative
+if is_unlearn then
+ local current_count = tonumber(redis.call('HGET', prefix, learned_key)) or 0
+ if current_count > 0 then
+ redis.call('HINCRBY', prefix, learned_key, -1)
+ end
+else
+ redis.call('HINCRBY', prefix, learned_key, 1)
+end
for i, token in ipairs(input_tokens) do
- redis.call('HINCRBY', token, hash_key, is_unlearn and -1 or 1)
+ -- Update token count, but prevent it from going negative
+ if is_unlearn then
+ local current_token_count = tonumber(redis.call('HGET', token, hash_key)) or 0
+ if current_token_count > 0 then
+ redis.call('HINCRBY', token, hash_key, -1)
+ end
+ else
+ redis.call('HINCRBY', token, hash_key, 1)
+ end
+
if text_tokens then
local tok1 = text_tokens[i * 2 - 1]
local tok2 = text_tokens[i * 2]
redis.call('HSET', token, 'tokens', tok1)
end
- redis.call('ZINCRBY', prefix .. '_z', is_unlearn and -1 or 1, token)
+ if is_unlearn then
+ local current_z_score = tonumber(redis.call('ZSCORE', prefix .. '_z', token)) or 0
+ if current_z_score > 0 then
+ redis.call('ZINCRBY', prefix .. '_z', -1, token)
+ end
+ else
+ redis.call('ZINCRBY', prefix .. '_z', 1, token)
+ end
end
end
end
if (rcpt) {
rspamd_mempool_set_variable(task->task_pool, "stat_user",
(gpointer) rcpt, nullptr);
+ msg_debug_bayes("redis expansion: found recipient '%s'", rcpt);
+ }
+ else {
+ msg_debug_bayes("redis expansion: no recipient found (deliver_to=%s)",
+ task->deliver_to ? task->deliver_to : "null");
}
}
users_enabled = ucl_object_lookup_any(classifier_obj, "per_user",
"users_enabled", nullptr);
+ msg_debug_bayes_cfg("per-user lookup: users_enabled=%p", users_enabled);
if (users_enabled != nullptr) {
if (ucl_object_type(users_enabled) == UCL_BOOLEAN) {
backend->enable_users = ucl_object_toboolean(users_enabled);
/* Default non-users statistics */
if (backend->enable_users || backend->cbref_user != -1) {
backend->redis_object = REDIS_DEFAULT_USERS_OBJECT;
+ msg_debug_bayes_cfg("using per-user Redis pattern: %s (enable_users=%s, cbref_user=%d)",
+ backend->redis_object, backend->enable_users ? "true" : "false",
+ backend->cbref_user);
}
else {
backend->redis_object = REDIS_DEFAULT_OBJECT;
+ msg_debug_bayes_cfg("using default Redis pattern: %s (enable_users=%s, cbref_user=%d)",
+ backend->redis_object,
+ backend->enable_users ? "true" : "false",
+ backend->cbref_user);
}
}
else {
stcf->symbol);
return nullptr;
}
+ else {
+ msg_debug_bayes("redis object expanded: pattern='%s' -> expanded='%s' (learn=%s, symbol=%s)",
+ ctx->redis_object ? ctx->redis_object : "default",
+ object_expanded,
+ learn ? "true" : "false",
+ stcf->symbol);
+ }
const char *class_label = get_class_label(stcf);
/* Normalize probabilities using softmax */
normalized_probs = g_alloca(cl.num_classes * sizeof(double));
- /* Find maximum for numerical stability */
+ /* Find maximum for numerical stability - only consider classes with sufficient training */
for (i = 0; i < cl.num_classes; i++) {
msg_debug_bayes("class %s, log_prob: %.2f", cl.class_names[i], cl.class_log_probs[i]);
+ /* Only consider classes that have sufficient training data */
+ if (ctx->cfg->min_learns > 0 && cl.class_learns[i] < ctx->cfg->min_learns) {
+ msg_debug_bayes("skipping class %s in winner selection: %uL learns < %ud minimum",
+ cl.class_names[i], cl.class_learns[i], ctx->cfg->min_learns);
+ continue;
+ }
if (cl.class_log_probs[i] > max_log_prob) {
max_log_prob = cl.class_log_probs[i];
winning_class_idx = i;
Multiclass Statistics
[Documentation] Test that statistics show all class information
[Tags] multiclass statistics
- Multiclass Stats Test
-
-Per-User Multiclass Learning
- [Documentation] Test per-user multiclass classification
- [Tags] multiclass per-user
- [Setup] Set Suite Variable ${RSPAMD_STATS_PER_USER} 1
- Multiclass Basic Learn Test user@example.com
- [Teardown] Set Suite Variable ${RSPAMD_STATS_PER_USER} ${EMPTY}
-
-Multiclass Empty Part Test
- [Documentation] Test multiclass learning with empty parts
- [Tags] multiclass empty-part
- Set Test Variable ${MESSAGE} ${RSPAMD_TESTDIR}/messages/empty_part.eml
- Learn Multiclass ${EMPTY} spam ${MESSAGE}
- Scan File ${MESSAGE}
- Expect Symbol BAYES_SPAM
+ Multiclass Stats Test
\ No newline at end of file
+++ /dev/null
-*** Settings ***
-Documentation Multiclass Bayes Migration Tests
-Suite Setup Rspamd Redis Setup
-Suite Teardown Rspamd Redis Teardown
-Resource lib.robot
-Resource multiclass_lib.robot
-
-*** Variables ***
-${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
-${RSPAMD_STATS_HASH} siphash
-${BINARY_CONFIG} ${RSPAMD_TESTDIR}/configs/stats.conf
-${MULTICLASS_CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
-
-*** Test Cases ***
-Binary to Multiclass Migration
- [Documentation] Test migration from binary to multiclass configuration
- [Tags] migration binary-to-multiclass
-
- # First, start with binary configuration and learn some data
- Set Suite Variable ${CONFIG} ${BINARY_CONFIG}
- Rspamd Redis Teardown
- Rspamd Redis Setup
-
- # Learn with binary system
- Learn Test
-
- # Now switch to multiclass configuration
- Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG}
- Rspamd Teardown
- Rspamd Setup
-
- # Should still work with existing data
- Scan File ${MESSAGE_SPAM}
- Expect Symbol BAYES_SPAM
- Scan File ${MESSAGE_HAM}
- Expect Symbol BAYES_HAM
-
- # Should be able to add new classes
- Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER}
- Scan File ${MESSAGE_NEWSLETTER}
- Expect Symbol BAYES_NEWSLETTER
-
-Configuration Validation
- [Documentation] Test multiclass configuration validation
- [Tags] configuration validation
-
- # Test that configuration loads without errors
- ${result} = Run Process rspamd -t -c ${MULTICLASS_CONFIG}
- Should Be Equal As Integers ${result.rc} 0 msg=Configuration validation failed: ${result.stderr}
-
-Redis Data Format Migration
- [Documentation] Test that Redis data format is properly migrated
- [Tags] migration redis data-format
-
- # Start with binary data
- Set Suite Variable ${CONFIG} ${BINARY_CONFIG}
- Rspamd Redis Teardown
- Rspamd Redis Setup
- Learn Test
-
- # Check binary format in Redis
- ${redis_result} = Run Process redis-cli -p ${RSPAMD_REDIS_PORT} KEYS *_learns
- Should Contain ${redis_result.stdout} _learns
-
- # Switch to multiclass
- Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG}
- Rspamd Teardown
- Rspamd Setup
-
- # Data should still be accessible
- Scan File ${MESSAGE_SPAM}
- Expect Symbol BAYES_SPAM
-
-Backward Compatibility
- [Documentation] Test that multiclass system maintains backward compatibility
- [Tags] compatibility backward
-
- # Use multiclass config but test old commands
- Learn ${EMPTY} spam ${MESSAGE_SPAM}
- Learn ${EMPTY} ham ${MESSAGE_HAM}
-
- # Should work the same as before
- Scan File ${MESSAGE_SPAM}
- Expect Symbol BAYES_SPAM
- Scan File ${MESSAGE_HAM}
- Expect Symbol BAYES_HAM
-
-Class Label Validation
- [Documentation] Test class label validation and error handling
- [Tags] validation class-labels
-
- # This would test invalid class names, duplicate labels, etc.
- # Implementation depends on how validation errors are exposed
- ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:invalid-class-name ${MESSAGE_SPAM}
- Should Not Be Equal As Integers ${result.rc} 0 msg=Should reject invalid class name
-
-Multiclass Stats Format
- [Documentation] Test that stats output shows multiclass information
- [Tags] statistics multiclass-format
-
- # Learn some data across multiple classes
- Learn Multiclass ${EMPTY} spam ${MESSAGE_SPAM}
- Learn Multiclass ${EMPTY} ham ${MESSAGE_HAM}
- Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER}
-
- # Check stats format
- ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat
- Check Rspamc ${result}
-
- # Should show all classes in stats
- Should Contain ${result.stdout} spam
- Should Contain ${result.stdout} ham
- Should Contain ${result.stdout} newsletter
-
- # Should show learn counts
- Should Match Regexp ${result.stdout} learned.*\\d+
--- /dev/null
+*** Settings ***
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Test Setup Set Test Hash Documentation
+Resource multiclass_lib.robot
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
+${REDIS_SCOPE} Suite
+${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
+${RSPAMD_SCOPE} Suite
+${RSPAMD_STATS_BACKEND} redis
+${RSPAMD_STATS_HASH} null
+${RSPAMD_STATS_KEY} null
+${RSPAMD_STATS_PER_USER} true
+
+*** Test Cases ***
+Multiclass Per-User Basic Learn Test
+ Multiclass Basic Learn Test test@example.com
+
+Multiclass Per-User Legacy Compatibility Test
+ Multiclass Legacy Compatibility Test test@example.com
+
+Multiclass Per-User Relearn Test
+ Multiclass Relearn Test test@example.com
+
+Multiclass Per-User Cross-Learn Test
+ Multiclass Cross-Learn Test test@example.com
+
+Multiclass Per-User Unlearn Test
+ Multiclass Unlearn Test test@example.com
\ No newline at end of file