From a9fbcf9d498061d6104e3ccc04bb7f5fd1bbdf5b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 25 Jul 2025 11:58:36 +0100 Subject: [PATCH] [Project] Fix various issues --- lualib/lua_bayes_redis.lua | 42 +++++++-------- src/libserver/cfg_file.h | 2 +- src/libserver/cfg_rcl.cxx | 39 ++++++++++++-- src/libserver/cfg_utils.cxx | 35 ++++++------ src/libstat/backends/redis_backend.cxx | 43 +++++++++++++-- src/libstat/classifiers/bayes.c | 16 ++---- src/libstat/stat_process.c | 23 ++++++-- .../110_statistics/300-multiclass-redis.robot | 10 ++-- .../310-multiclass-migration.robot | 38 ++++++------- .../cases/110_statistics/multiclass_lib.robot | 54 +++++++++---------- test/functional/configs/multiclass_bayes.conf | 6 +-- 11 files changed, 187 insertions(+), 121 deletions(-) diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua index 4de7126c7f..53ee06b0ef 100644 --- a/lualib/lua_bayes_redis.lua +++ b/lualib/lua_bayes_redis.lua @@ -54,15 +54,15 @@ local function gen_classify_functor(redis_params, classify_script_id) end lua_redis.exec_redis_script(classify_script_id, - { task = task, is_write = false, key = expanded_key }, - classify_redis_cb, { expanded_key, script_class_labels, stat_tokens }) + { task = task, is_write = false, key = expanded_key }, + classify_redis_cb, { expanded_key, script_class_labels, stat_tokens }) end end local function gen_learn_functor(redis_params, learn_script_id) return function(task, expanded_key, id, class_label, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens) local function learn_redis_cb(err, data) - lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data) + lua_util.debugm(N, task, 'learn redis cb: %s, %s for class %s', err, data, class_label) if err then callback(task, false, err) else @@ -80,13 +80,13 @@ local function gen_learn_functor(redis_params, learn_script_id) if maybe_text_tokens then lua_redis.exec_redis_script(learn_script_id, - { task = task, is_write = true, key = expanded_key }, - learn_redis_cb, - { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens }) + { task = task, is_write = true, key = expanded_key }, + learn_redis_cb, + { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens }) else lua_redis.exec_redis_script(learn_script_id, - { task = task, is_write = true, key = expanded_key }, - learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens }) + { task = task, is_write = true, key = expanded_key }, + learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens }) end end end @@ -196,11 +196,11 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol, end lua_redis.exec_redis_script(stat_script_id, - { ev_base = ev_base, cfg = cfg, is_write = false }, - stat_redis_cb, { tostring(cursor), - symbol, - learn_key, - tostring(max_users) }) + { ev_base = ev_base, cfg = cfg, is_write = false }, + stat_redis_cb, { tostring(cursor), + symbol, + learn_key, + tostring(max_users) }) return statfile_ucl.monitor_timeout or classifier_ucl.monitor_timeout or 30.0 end) end @@ -226,8 +226,8 @@ local function gen_cache_check_functor(redis_params, check_script_id, conf) lua_util.debugm(N, task, 'checking cache: %s', cache_id) lua_redis.exec_redis_script(check_script_id, - { task = task, is_write = false, key = cache_id }, - classify_redis_cb, { cache_id, packed_conf }) + { task = task, is_write = false, key = cache_id }, + classify_redis_cb, { cache_id, packed_conf }) end end @@ -250,9 +250,9 @@ local function gen_cache_learn_functor(redis_params, learn_script_id, conf) lua_util.debugm(N, task, 'try to learn cache: %s as %s', cache_id, cache_class_name) lua_redis.exec_redis_script(learn_script_id, - { task = task, is_write = true, key = cache_id }, - learn_redis_cb, - { cache_id, cache_class_name, packed_conf }) + { task = task, is_write = true, key = cache_id }, + learn_redis_cb, + { cache_id, cache_class_name, packed_conf }) end end @@ -266,8 +266,8 @@ exports.lua_bayes_init_cache = function(classifier_ucl, statfile_ucl) local default_conf = { cache_prefix = "learned_ids", cache_max_elt = 10000, -- Maximum number of elements in the cache key - cache_max_keys = 5, -- Maximum number of keys in the cache - cache_elt_len = 32, -- Length of the element in the cache (will trim id to that value) + cache_max_keys = 5, -- Maximum number of keys in the cache + cache_elt_len = 32, -- Length of the element in the cache (will trim id to that value) } local conf = lua_util.override_defaults(default_conf, classifier_ucl) @@ -282,7 +282,7 @@ exports.lua_bayes_init_cache = function(classifier_ucl, statfile_ucl) local learn_script_id = lua_redis.load_redis_script_from_file("bayes_cache_learn.lua", redis_params) return gen_cache_check_functor(redis_params, check_script_id, conf), gen_cache_learn_functor(redis_params, - learn_script_id, conf) + learn_script_id, conf) end return exports diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 9f83f80244..355046cac0 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -629,7 +629,7 @@ gboolean rspamd_config_check_statfiles(struct rspamd_classifier_config *cf); /** * Multi-class configuration helpers */ -gboolean rspamd_config_parse_class_labels(ucl_object_t *obj, +gboolean rspamd_config_parse_class_labels(const ucl_object_t *obj, GHashTable **class_labels); gboolean rspamd_config_migrate_binary_config(struct rspamd_statfile_config *stcf); diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx index 68b6460d89..3978b23b01 100644 --- a/src/libserver/cfg_rcl.cxx +++ b/src/libserver/cfg_rcl.cxx @@ -1246,7 +1246,24 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj, } st->is_spam_converted = TRUE; } - /* If class field is present, it was already parsed by the default parser */ + else if (class_val != nullptr && spam_val == nullptr) { + /* Only class field present - set is_spam for backward compatibility */ + if (st->class_name != nullptr) { + if (strcmp(st->class_name, "spam") == 0) { + st->is_spam = TRUE; + } + else if (strcmp(st->class_name, "ham") == 0) { + st->is_spam = FALSE; + } + else { + /* For non-binary classes, default to not spam */ + st->is_spam = FALSE; + } + msg_debug_config("statfile %s with class '%s' set is_spam=%s for compatibility", + st->symbol, st->class_name, st->is_spam ? "true" : "false"); + } + } + /* If both fields are present, class takes precedence and was already parsed by the default parser */ return TRUE; } @@ -1269,7 +1286,7 @@ rspamd_rcl_class_labels_handler(rspamd_mempool_t *pool, return FALSE; } - if (!rspamd_config_parse_class_labels((ucl_object_t *) obj, &ccf->class_labels)) { + if (!rspamd_config_parse_class_labels(obj, &ccf->class_labels)) { g_set_error(err, CFG_RCL_ERROR, EINVAL, "invalid class_labels configuration"); return FALSE; @@ -1351,6 +1368,22 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool, } } } + else if (g_ascii_strcasecmp(st_key, "class_labels") == 0) { + /* Parse class_labels configuration directly */ + if (ucl_object_type(val) != UCL_OBJECT) { + g_set_error(err, CFG_RCL_ERROR, EINVAL, + "class_labels must be an object"); + ucl_object_iterate_free(it); + return FALSE; + } + + if (!rspamd_config_parse_class_labels(val, &ccf->class_labels)) { + g_set_error(err, CFG_RCL_ERROR, EINVAL, + "invalid class_labels configuration"); + ucl_object_iterate_free(it); + return FALSE; + } + } } } @@ -2579,7 +2612,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections) FALSE, TRUE, cfg->doc_strings, - "CLassifier options"); + "Classifier options"); /* Default classifier is 'bayes' for now */ sub->default_key = "bayes"; diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx index 2533bd65e8..c22a9b877b 100644 --- a/src/libserver/cfg_utils.cxx +++ b/src/libserver/cfg_utils.cxx @@ -3044,21 +3044,23 @@ rspamd_ip_is_local_cfg(struct rspamd_config *cfg, } gboolean -rspamd_config_parse_class_labels(ucl_object_t *obj, GHashTable **class_labels) +rspamd_config_parse_class_labels(const ucl_object_t *obj, GHashTable **class_labels) { const ucl_object_t *cur; ucl_object_iter_t it = nullptr; - const char *class_name, *label; if (!obj || ucl_object_type(obj) != UCL_OBJECT) { return FALSE; } - *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free); + if (*class_labels == nullptr) { + *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal, + g_free, g_free); + } while ((cur = ucl_object_iterate(obj, &it, true)) != nullptr) { - class_name = ucl_object_key(cur); - label = ucl_object_tostring(cur); + const char *class_name = ucl_object_key(cur); + const char *label = ucl_object_tostring(cur); if (class_name && label) { /* Validate class name: alphanumeric + underscore, max 32 chars */ @@ -3079,20 +3081,15 @@ rspamd_config_parse_class_labels(ucl_object_t *obj, GHashTable **class_labels) } /* Validate label uniqueness */ - GHashTableIter label_iter; - gpointer key, value; - g_hash_table_iter_init(&label_iter, *class_labels); - while (g_hash_table_iter_next(&label_iter, &key, &value)) { - if (strcmp((const char *) value, label) == 0) { - msg_err("backend label '%s' is used by multiple classes", label); - g_hash_table_destroy(*class_labels); - *class_labels = nullptr; - return FALSE; - } + if (g_hash_table_lookup(*class_labels, label)) { + msg_err("backend label '%s' is used by multiple classes", label); + g_hash_table_destroy(*class_labels); + *class_labels = nullptr; + return FALSE; } - - g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label)); } + + g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label)); } return g_hash_table_size(*class_labels) > 0; @@ -3171,13 +3168,13 @@ rspamd_config_validate_class_config(struct rspamd_classifier_config *ccf, GError /* Validate class count */ if (class_count < 2) { g_set_error(err, g_quark_from_static_string("config"), 1, - "classifier must have at least 2 classes, found %u", class_count); + "classifier must have at least 2 classes, found %ud", class_count); g_hash_table_destroy(seen_classes); return FALSE; } if (class_count > 20) { - msg_warn("classifier has %u classes, performance may be degraded above 20 classes", + msg_warn("classifier has %ud classes, performance may be degraded above 20 classes", class_count); } diff --git a/src/libstat/backends/redis_backend.cxx b/src/libstat/backends/redis_backend.cxx index 5d55bf3ace..a0305ec8d1 100644 --- a/src/libstat/backends/redis_backend.cxx +++ b/src/libstat/backends/redis_backend.cxx @@ -167,10 +167,12 @@ public: auto save_in_mempool(const char *class_label) const { - auto var_name = fmt::format("{}_{}", redis_object_expanded, class_label); + auto var_name = + rspamd_mempool_strdup(task->task_pool, + fmt::format("{}_{}", redis_object_expanded, class_label).c_str()); /* We do not set destructor for the variable, as it should be already added on creation */ - rspamd_mempool_set_variable(task->task_pool, var_name.c_str(), (gpointer) this, nullptr); - msg_debug_bayes("saved runtime in mempool at %s", var_name.c_str()); + rspamd_mempool_set_variable(task->task_pool, var_name, (gpointer) this, nullptr); + msg_debug_bayes("saved runtime in mempool at %s", var_name); } }; @@ -911,6 +913,39 @@ rspamd_redis_classified(lua_State *L) lua_rawgeti(L, 3, 1); /* learned_counts -> position 4 */ lua_rawgeti(L, 3, 2); /* token_results -> position 5 */ + /* First, process learned_counts for all statfiles */ + if (lua_istable(L, 4) && rt->stcf->clcf && rt->stcf->clcf->statfiles) { + GList *cur = rt->stcf->clcf->statfiles; + int redis_idx = 1; /* Lua array index starts at 1 */ + + while (cur) { + auto *stcf = (struct rspamd_statfile_config *) cur->data; + const char *class_label = get_class_label(stcf); + + /* Get the runtime for this statfile */ + auto maybe_rt = redis_stat_runtime::maybe_recover_from_mempool(rt->task, + rt->redis_object_expanded, + class_label); + if (maybe_rt) { + auto *statfile_rt = maybe_rt.value(); + + /* Extract learned count for this statfile */ + lua_rawgeti(L, 4, redis_idx); /* learned_counts[redis_idx] */ + if (lua_isnumber(L, -1)) { + statfile_rt->learned = lua_tointeger(L, -1); + msg_debug_bayes("set learned count for class %s (label %s): %L", + stcf->class_name ? stcf->class_name : "unknown", + class_label, + statfile_rt->learned); + } + lua_pop(L, 1); /* Pop learned_counts[redis_idx] */ + } + + cur = g_list_next(cur); + redis_idx++; + } + } + /* Process results for all statfiles in order using class_index (O(N) instead of O(N²)) */ if (rt->stcf->clcf && rt->stcf->clcf->statfiles) { GList *cur = rt->stcf->clcf->statfiles; @@ -1246,6 +1281,8 @@ rspamd_redis_learn_tokens(struct rspamd_task *task, lua_new_text(L, text_tokens_buf, text_tokens_len, false); } + msg_debug_bayes("called lua learn script for %s (cookie=%s)", rt->stcf->symbol, cookie); + if (lua_pcall(L, nargs, 0, err_idx) != 0) { msg_err_task("call to script failed: %s", lua_tostring(L, -1)); lua_settop(L, err_idx - 1); diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 52613d6489..3d1bd71074 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -136,7 +136,6 @@ bayes_classify_token(struct rspamd_classifier *ctx, unsigned int spam_count = 0, ham_count = 0, total_count = 0; struct rspamd_statfile *st; struct rspamd_task *task; - const char *token_type = "txt"; double spam_prob, spam_freq, ham_freq, bayes_spam_prob, bayes_ham_prob, ham_prob, fw, w, val; @@ -225,11 +224,6 @@ bayes_classify_token(struct rspamd_classifier *ctx, if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) { cl->text_tokens++; } - else { - token_type = "meta"; - } - - /* Per-token debug logging removed to reduce verbosity */ } } @@ -245,10 +239,9 @@ bayes_classify_token_multiclass(struct rspamd_classifier *ctx, int id; struct rspamd_statfile *st; struct rspamd_task *task; - const char *token_type = "txt"; double val, fw, w; - unsigned int *class_counts; - unsigned int total_count = 0; + guint64 *class_counts; + guint64 total_count = 0; task = cl->task; @@ -258,12 +251,11 @@ bayes_classify_token_multiclass(struct rspamd_classifier *ctx, if (val <= cl->meta_skip_prob) { return; } - token_type = "meta"; } /* Allocate array for class counts */ - class_counts = g_alloca(cl->num_classes * sizeof(unsigned int)); - memset(class_counts, 0, cl->num_classes * sizeof(unsigned int)); + class_counts = g_alloca(cl->num_classes * sizeof(guint64)); + memset(class_counts, 0, cl->num_classes * sizeof(guint64)); /* Collect counts for each class */ for (i = 0; i < ctx->statfiles_ids->len; i++) { diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 5126fd2cc3..11b31decca 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -893,9 +893,26 @@ rspamd_stat_backends_learn(struct rspamd_stat_ctx *st_ctx, backend_found = TRUE; if (!(task->flags & RSPAMD_TASK_FLAG_UNLEARN)) { - if (!!spam != !!st->stcf->is_spam) { - /* If we are not unlearning, then do not touch another class */ - continue; + /* For multiclass learning, check if this statfile has any tokens to learn */ + if (task->flags & RSPAMD_TASK_FLAG_LEARN_CLASS) { + /* Multiclass learning: only process statfiles that have tokens set up by the classifier */ + gboolean has_tokens = FALSE; + for (unsigned int k = 0; k < task->tokens->len && !has_tokens; k++) { + rspamd_token_t *tok = (rspamd_token_t *) g_ptr_array_index(task->tokens, k); + if (tok->values[id] != 0) { + has_tokens = TRUE; + } + } + if (!has_tokens) { + continue; + } + } + else { + /* Binary learning: use traditional spam/ham check */ + if (!!spam != !!st->stcf->is_spam) { + /* If we are not unlearning, then do not touch another class */ + continue; + } } } diff --git a/test/functional/cases/110_statistics/300-multiclass-redis.robot b/test/functional/cases/110_statistics/300-multiclass-redis.robot index 1663a78258..12ec34d628 100644 --- a/test/functional/cases/110_statistics/300-multiclass-redis.robot +++ b/test/functional/cases/110_statistics/300-multiclass-redis.robot @@ -7,13 +7,14 @@ Resource multiclass_lib.robot *** Variables *** ${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT} ${RSPAMD_STATS_HASH} siphash +${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf *** Test Cases *** Multiclass Basic Learning and Classification [Documentation] Test basic multiclass learning and classification [Tags] multiclass basic learning Multiclass Basic Learn Test - + Multiclass Legacy Compatibility [Documentation] Test that old learn_spam/learn_ham commands still work [Tags] multiclass compatibility legacy @@ -39,11 +40,6 @@ Multiclass Statistics [Tags] multiclass statistics Multiclass Stats Test -Multiclass Performance - [Documentation] Test classification performance with multiple classes - [Tags] multiclass performance - Multiclass Performance Test 50 - Per-User Multiclass Learning [Documentation] Test per-user multiclass classification [Tags] multiclass per-user @@ -57,4 +53,4 @@ Multiclass Empty Part Test Set Test Variable ${MESSAGE} ${RSPAMD_TESTDIR}/messages/empty_part.eml Learn Multiclass ${EMPTY} spam ${MESSAGE} Scan File ${MESSAGE} - Expect Symbol BAYES_SPAM \ No newline at end of file + Expect Symbol BAYES_SPAM diff --git a/test/functional/cases/110_statistics/310-multiclass-migration.robot b/test/functional/cases/110_statistics/310-multiclass-migration.robot index ef13880054..4ce4d67e48 100644 --- a/test/functional/cases/110_statistics/310-multiclass-migration.robot +++ b/test/functional/cases/110_statistics/310-multiclass-migration.robot @@ -2,8 +2,8 @@ Documentation Multiclass Bayes Migration Tests Suite Setup Rspamd Redis Setup Suite Teardown Rspamd Redis Teardown -Resource multiclass_lib.robot Resource lib.robot +Resource multiclass_lib.robot *** Variables *** ${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT} @@ -15,26 +15,26 @@ ${MULTICLASS_CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf Binary to Multiclass Migration [Documentation] Test migration from binary to multiclass configuration [Tags] migration binary-to-multiclass - + # First, start with binary configuration and learn some data Set Suite Variable ${CONFIG} ${BINARY_CONFIG} Rspamd Redis Teardown Rspamd Redis Setup - + # Learn with binary system Learn Test - + # Now switch to multiclass configuration Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG} Rspamd Teardown Rspamd Setup - + # Should still work with existing data Scan File ${MESSAGE_SPAM} Expect Symbol BAYES_SPAM Scan File ${MESSAGE_HAM} Expect Symbol BAYES_HAM - + # Should be able to add new classes Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER} Scan File ${MESSAGE_NEWSLETTER} @@ -43,7 +43,7 @@ Binary to Multiclass Migration Configuration Validation [Documentation] Test multiclass configuration validation [Tags] configuration validation - + # Test that configuration loads without errors ${result} = Run Process rspamd -t -c ${MULTICLASS_CONFIG} Should Be Equal As Integers ${result.rc} 0 msg=Configuration validation failed: ${result.stderr} @@ -51,22 +51,22 @@ Configuration Validation Redis Data Format Migration [Documentation] Test that Redis data format is properly migrated [Tags] migration redis data-format - + # Start with binary data Set Suite Variable ${CONFIG} ${BINARY_CONFIG} Rspamd Redis Teardown Rspamd Redis Setup Learn Test - + # Check binary format in Redis ${redis_result} = Run Process redis-cli -p ${RSPAMD_REDIS_PORT} KEYS *_learns Should Contain ${redis_result.stdout} _learns - + # Switch to multiclass Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG} Rspamd Teardown Rspamd Setup - + # Data should still be accessible Scan File ${MESSAGE_SPAM} Expect Symbol BAYES_SPAM @@ -74,11 +74,11 @@ Redis Data Format Migration Backward Compatibility [Documentation] Test that multiclass system maintains backward compatibility [Tags] compatibility backward - + # Use multiclass config but test old commands Learn ${EMPTY} spam ${MESSAGE_SPAM} Learn ${EMPTY} ham ${MESSAGE_HAM} - + # Should work the same as before Scan File ${MESSAGE_SPAM} Expect Symbol BAYES_SPAM @@ -88,7 +88,7 @@ Backward Compatibility Class Label Validation [Documentation] Test class label validation and error handling [Tags] validation class-labels - + # This would test invalid class names, duplicate labels, etc. # Implementation depends on how validation errors are exposed ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:invalid-class-name ${MESSAGE_SPAM} @@ -97,20 +97,20 @@ Class Label Validation Multiclass Stats Format [Documentation] Test that stats output shows multiclass information [Tags] statistics multiclass-format - + # Learn some data across multiple classes Learn Multiclass ${EMPTY} spam ${MESSAGE_SPAM} Learn Multiclass ${EMPTY} ham ${MESSAGE_HAM} Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER} - + # Check stats format ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat Check Rspamc ${result} - + # Should show all classes in stats Should Contain ${result.stdout} spam Should Contain ${result.stdout} ham Should Contain ${result.stdout} newsletter - + # Should show learn counts - Should Match Regexp ${result.stdout} learned.*\\d+ \ No newline at end of file + Should Match Regexp ${result.stdout} learned.*\\d+ diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot index e6e1788d40..4fa4284bb5 100644 --- a/test/functional/cases/110_statistics/multiclass_lib.robot +++ b/test/functional/cases/110_statistics/multiclass_lib.robot @@ -1,7 +1,5 @@ *** Settings *** -Library ${RSPAMD_TESTDIR}/lib/rspamd.py -Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot -Variables ${RSPAMD_TESTDIR}/lib/vars.py +Resource lib.robot *** Variables *** ${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf @@ -44,26 +42,26 @@ Multiclass Basic Learn Test IF "${user}" Set To Dictionary ${kwargs} Deliver-To=${user} END - + # Learn all classes Learn Multiclass ${user} spam ${MESSAGE_SPAM} Learn Multiclass ${user} ham ${MESSAGE_HAM} Learn Multiclass ${user} newsletter ${MESSAGE_NEWSLETTER} Learn Multiclass ${user} transactional ${MESSAGE_TRANSACTIONAL} - + # Test classification Scan File ${MESSAGE_SPAM} &{kwargs} Expect Symbol BAYES_SPAM - + Scan File ${MESSAGE_HAM} &{kwargs} Expect Symbol BAYES_HAM - + Scan File ${MESSAGE_NEWSLETTER} &{kwargs} Expect Symbol BAYES_NEWSLETTER - + Scan File ${MESSAGE_TRANSACTIONAL} &{kwargs} Expect Symbol BAYES_TRANSACTIONAL - + Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 1 Multiclass Legacy Compatibility Test @@ -72,15 +70,15 @@ Multiclass Legacy Compatibility Test IF "${user}" Set To Dictionary ${kwargs} Deliver-To=${user} END - + # Test legacy learn_spam and learn_ham commands still work Learn Multiclass Legacy ${user} spam ${MESSAGE_SPAM} Learn Multiclass Legacy ${user} ham ${MESSAGE_HAM} - + # Should still classify correctly Scan File ${MESSAGE_SPAM} &{kwargs} Expect Symbol BAYES_SPAM - + Scan File ${MESSAGE_HAM} &{kwargs} Expect Symbol BAYES_HAM @@ -89,15 +87,15 @@ Multiclass Relearn Test IF ${RSPAMD_STATS_LEARNTEST} == 0 Fail "Learn test was not run" END - + Set Test Variable ${kwargs} &{EMPTY} IF "${user}" Set To Dictionary ${kwargs} Deliver-To=${user} END - + # Relearn spam message as ham Learn Multiclass ${user} ham ${MESSAGE_SPAM} - + # Should now classify as ham or at least not spam Scan File ${MESSAGE_SPAM} &{kwargs} ${pass} = Run Keyword And Return Status Expect Symbol BAYES_HAM @@ -112,10 +110,10 @@ Multiclass Cross-Learn Test IF "${user}" Set To Dictionary ${kwargs} Deliver-To=${user} END - + # Learn newsletter message as transactional Learn Multiclass ${user} transactional ${MESSAGE_NEWSLETTER} - + # Should classify as transactional, not newsletter Scan File ${MESSAGE_NEWSLETTER} &{kwargs} Expect Symbol BAYES_TRANSACTIONAL @@ -127,15 +125,15 @@ Multiclass Unlearn Test IF "${user}" Set To Dictionary ${kwargs} Deliver-To=${user} END - + # First learn spam Learn Multiclass ${user} spam ${MESSAGE_SPAM} Scan File ${MESSAGE_SPAM} &{kwargs} Expect Symbol BAYES_SPAM - + # Then unlearn spam (learn as ham) Learn Multiclass ${user} ham ${MESSAGE_SPAM} - + # Should no longer classify as spam Scan File ${MESSAGE_SPAM} &{kwargs} Do Not Expect Symbol BAYES_SPAM @@ -151,7 +149,7 @@ Multiclass Stats Test # Check that rspamc stat shows learning counts for all classes ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat Check Rspamc ${result} - + # Should show statistics for all classes Should Contain ${result.stdout} spam Should Contain ${result.stdout} ham @@ -161,11 +159,11 @@ Multiclass Stats Test Multiclass Configuration Migration Test # Test that old binary config can be automatically migrated Set Test Variable ${binary_config} ${RSPAMD_TESTDIR}/configs/stats.conf - + # Start with binary config ${result} = Run Rspamc --config ${binary_config} stat Check Rspamc ${result} - + # Should show deprecation warning but work Should Contain ${result.stderr} deprecated ignore_case=True @@ -173,17 +171,17 @@ Multiclass Performance Test [Arguments] ${num_messages}=100 # Test classification performance with multiple classes ${start_time} = Get Time epoch - + FOR ${i} IN RANGE ${num_messages} Scan File ${MESSAGE_SPAM} Scan File ${MESSAGE_HAM} Scan File ${MESSAGE_NEWSLETTER} Scan File ${MESSAGE_TRANSACTIONAL} END - + ${end_time} = Get Time epoch ${duration} = Evaluate ${end_time} - ${start_time} - + # Should complete in reasonable time (adjust threshold as needed) Should Be True ${duration} < 30 msg=Performance test took ${duration}s, expected < 30s @@ -191,6 +189,6 @@ Multiclass Memory Test # Test that memory usage is reasonable for multiclass classification ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat Check Rspamc ${result} - + # Extract memory usage if available in stats - # This is a placeholder - actual implementation would parse memory stats \ No newline at end of file + # This is a placeholder - actual implementation would parse memory stats diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf index 3504cd16eb..5b8d077341 100644 --- a/test/functional/configs/multiclass_bayes.conf +++ b/test/functional/configs/multiclass_bayes.conf @@ -64,29 +64,25 @@ classifier { statfile { class = "spam"; symbol = BAYES_SPAM; - size = 1M; server = {= env.REDIS_SERVER =} } statfile { class = "ham"; symbol = BAYES_HAM; - size = 1M; server = {= env.REDIS_SERVER =} } statfile { class = "newsletter"; symbol = BAYES_NEWSLETTER; - size = 1M; server = {= env.REDIS_SERVER =} } statfile { class = "transactional"; symbol = BAYES_TRANSACTIONAL; - size = 1M; server = {= env.REDIS_SERVER =} } - # Backend class labels for Redis storage optimization + # Backend class labels for Redis class_labels = { "spam" = "S"; "ham" = "H"; -- 2.47.3