]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Fix various issues
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
lualib/lua_bayes_redis.lua
src/libserver/cfg_file.h
src/libserver/cfg_rcl.cxx
src/libserver/cfg_utils.cxx
src/libstat/backends/redis_backend.cxx
src/libstat/classifiers/bayes.c
src/libstat/stat_process.c
test/functional/cases/110_statistics/300-multiclass-redis.robot
test/functional/cases/110_statistics/310-multiclass-migration.robot
test/functional/cases/110_statistics/multiclass_lib.robot
test/functional/configs/multiclass_bayes.conf

index 4de7126c7f48cbdd4b1b8fddd60cf48d8530ef66..53ee06b0efa822f16466ddf112bd787e45dd9291 100644 (file)
@@ -54,15 +54,15 @@ local function gen_classify_functor(redis_params, classify_script_id)
     end
 
     lua_redis.exec_redis_script(classify_script_id,
-      { task = task, is_write = false, key = expanded_key },
-      classify_redis_cb, { expanded_key, script_class_labels, stat_tokens })
+        { task = task, is_write = false, key = expanded_key },
+        classify_redis_cb, { expanded_key, script_class_labels, stat_tokens })
   end
 end
 
 local function gen_learn_functor(redis_params, learn_script_id)
   return function(task, expanded_key, id, class_label, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens)
     local function learn_redis_cb(err, data)
-      lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data)
+      lua_util.debugm(N, task, 'learn redis cb: %s, %s for class %s', err, data, class_label)
       if err then
         callback(task, false, err)
       else
@@ -80,13 +80,13 @@ local function gen_learn_functor(redis_params, learn_script_id)
 
     if maybe_text_tokens then
       lua_redis.exec_redis_script(learn_script_id,
-        { task = task, is_write = true, key = expanded_key },
-        learn_redis_cb,
-        { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
+          { task = task, is_write = true, key = expanded_key },
+          learn_redis_cb,
+          { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
     else
       lua_redis.exec_redis_script(learn_script_id,
-        { task = task, is_write = true, key = expanded_key },
-        learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens })
+          { task = task, is_write = true, key = expanded_key },
+          learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens })
     end
   end
 end
@@ -196,11 +196,11 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol,
       end
 
       lua_redis.exec_redis_script(stat_script_id,
-        { ev_base = ev_base, cfg = cfg, is_write = false },
-        stat_redis_cb, { tostring(cursor),
-          symbol,
-          learn_key,
-          tostring(max_users) })
+          { ev_base = ev_base, cfg = cfg, is_write = false },
+          stat_redis_cb, { tostring(cursor),
+                           symbol,
+                           learn_key,
+                           tostring(max_users) })
       return statfile_ucl.monitor_timeout or classifier_ucl.monitor_timeout or 30.0
     end)
   end
@@ -226,8 +226,8 @@ local function gen_cache_check_functor(redis_params, check_script_id, conf)
 
     lua_util.debugm(N, task, 'checking cache: %s', cache_id)
     lua_redis.exec_redis_script(check_script_id,
-      { task = task, is_write = false, key = cache_id },
-      classify_redis_cb, { cache_id, packed_conf })
+        { task = task, is_write = false, key = cache_id },
+        classify_redis_cb, { cache_id, packed_conf })
   end
 end
 
@@ -250,9 +250,9 @@ local function gen_cache_learn_functor(redis_params, learn_script_id, conf)
 
     lua_util.debugm(N, task, 'try to learn cache: %s as %s', cache_id, cache_class_name)
     lua_redis.exec_redis_script(learn_script_id,
-      { task = task, is_write = true, key = cache_id },
-      learn_redis_cb,
-      { cache_id, cache_class_name, packed_conf })
+        { task = task, is_write = true, key = cache_id },
+        learn_redis_cb,
+        { cache_id, cache_class_name, packed_conf })
   end
 end
 
@@ -266,8 +266,8 @@ exports.lua_bayes_init_cache = function(classifier_ucl, statfile_ucl)
   local default_conf = {
     cache_prefix = "learned_ids",
     cache_max_elt = 10000, -- Maximum number of elements in the cache key
-    cache_max_keys = 5,    -- Maximum number of keys in the cache
-    cache_elt_len = 32,    -- Length of the element in the cache (will trim id to that value)
+    cache_max_keys = 5, -- Maximum number of keys in the cache
+    cache_elt_len = 32, -- Length of the element in the cache (will trim id to that value)
   }
 
   local conf = lua_util.override_defaults(default_conf, classifier_ucl)
@@ -282,7 +282,7 @@ exports.lua_bayes_init_cache = function(classifier_ucl, statfile_ucl)
   local learn_script_id = lua_redis.load_redis_script_from_file("bayes_cache_learn.lua", redis_params)
 
   return gen_cache_check_functor(redis_params, check_script_id, conf), gen_cache_learn_functor(redis_params,
-    learn_script_id, conf)
+      learn_script_id, conf)
 end
 
 return exports
index 9f83f80244c44075576bbedc81572b0191c4abe3..355046cac0cf68606f71c9911ada01551d1142cf 100644 (file)
@@ -629,7 +629,7 @@ gboolean rspamd_config_check_statfiles(struct rspamd_classifier_config *cf);
 /**
  * Multi-class configuration helpers
  */
-gboolean rspamd_config_parse_class_labels(ucl_object_t *obj,
+gboolean rspamd_config_parse_class_labels(const ucl_object_t *obj,
                                                                                  GHashTable **class_labels);
 
 gboolean rspamd_config_migrate_binary_config(struct rspamd_statfile_config *stcf);
index 68b6460d891cea12a4593a03360823a852f1166d..3978b23b01f0d66276582515a8bbe8e157721415 100644 (file)
@@ -1246,7 +1246,24 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
                        }
                        st->is_spam_converted = TRUE;
                }
-               /* If class field is present, it was already parsed by the default parser */
+               else if (class_val != nullptr && spam_val == nullptr) {
+                       /* Only class field present - set is_spam for backward compatibility */
+                       if (st->class_name != nullptr) {
+                               if (strcmp(st->class_name, "spam") == 0) {
+                                       st->is_spam = TRUE;
+                               }
+                               else if (strcmp(st->class_name, "ham") == 0) {
+                                       st->is_spam = FALSE;
+                               }
+                               else {
+                                       /* For non-binary classes, default to not spam */
+                                       st->is_spam = FALSE;
+                               }
+                               msg_debug_config("statfile %s with class '%s' set is_spam=%s for compatibility",
+                                                                st->symbol, st->class_name, st->is_spam ? "true" : "false");
+                       }
+               }
+               /* If both fields are present, class takes precedence and was already parsed by the default parser */
                return TRUE;
        }
 
@@ -1269,7 +1286,7 @@ rspamd_rcl_class_labels_handler(rspamd_mempool_t *pool,
                return FALSE;
        }
 
-       if (!rspamd_config_parse_class_labels((ucl_object_t *) obj, &ccf->class_labels)) {
+       if (!rspamd_config_parse_class_labels(obj, &ccf->class_labels)) {
                g_set_error(err, CFG_RCL_ERROR, EINVAL,
                                        "invalid class_labels configuration");
                return FALSE;
@@ -1351,6 +1368,22 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
                                                }
                                        }
                                }
+                               else if (g_ascii_strcasecmp(st_key, "class_labels") == 0) {
+                                       /* Parse class_labels configuration directly */
+                                       if (ucl_object_type(val) != UCL_OBJECT) {
+                                               g_set_error(err, CFG_RCL_ERROR, EINVAL,
+                                                                       "class_labels must be an object");
+                                               ucl_object_iterate_free(it);
+                                               return FALSE;
+                                       }
+
+                                       if (!rspamd_config_parse_class_labels(val, &ccf->class_labels)) {
+                                               g_set_error(err, CFG_RCL_ERROR, EINVAL,
+                                                                       "invalid class_labels configuration");
+                                               ucl_object_iterate_free(it);
+                                               return FALSE;
+                                       }
+                               }
                        }
                }
 
@@ -2579,7 +2612,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
                                                                                           FALSE,
                                                                                           TRUE,
                                                                                           cfg->doc_strings,
-                                                                                          "CLassifier options");
+                                                                                          "Classifier options");
                /* Default classifier is 'bayes' for now */
                sub->default_key = "bayes";
 
index 2533bd65e8a10b6c280abf881e501ce9c69b8436..c22a9b877bf7408ff5edd0dbf39f14021efca7b4 100644 (file)
@@ -3044,21 +3044,23 @@ rspamd_ip_is_local_cfg(struct rspamd_config *cfg,
 }
 
 gboolean
-rspamd_config_parse_class_labels(ucl_object_t *obj, GHashTable **class_labels)
+rspamd_config_parse_class_labels(const ucl_object_t *obj, GHashTable **class_labels)
 {
        const ucl_object_t *cur;
        ucl_object_iter_t it = nullptr;
-       const char *class_name, *label;
 
        if (!obj || ucl_object_type(obj) != UCL_OBJECT) {
                return FALSE;
        }
 
-       *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
+       if (*class_labels == nullptr) {
+               *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal,
+                                                                                         g_free, g_free);
+       }
 
        while ((cur = ucl_object_iterate(obj, &it, true)) != nullptr) {
-               class_name = ucl_object_key(cur);
-               label = ucl_object_tostring(cur);
+               const char *class_name = ucl_object_key(cur);
+               const char *label = ucl_object_tostring(cur);
 
                if (class_name && label) {
                        /* Validate class name: alphanumeric + underscore, max 32 chars */
@@ -3079,20 +3081,15 @@ rspamd_config_parse_class_labels(ucl_object_t *obj, GHashTable **class_labels)
                        }
 
                        /* Validate label uniqueness */
-                       GHashTableIter label_iter;
-                       gpointer key, value;
-                       g_hash_table_iter_init(&label_iter, *class_labels);
-                       while (g_hash_table_iter_next(&label_iter, &key, &value)) {
-                               if (strcmp((const char *) value, label) == 0) {
-                                       msg_err("backend label '%s' is used by multiple classes", label);
-                                       g_hash_table_destroy(*class_labels);
-                                       *class_labels = nullptr;
-                                       return FALSE;
-                               }
+                       if (g_hash_table_lookup(*class_labels, label)) {
+                               msg_err("backend label '%s' is used by multiple classes", label);
+                               g_hash_table_destroy(*class_labels);
+                               *class_labels = nullptr;
+                               return FALSE;
                        }
-
-                       g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label));
                }
+
+               g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label));
        }
 
        return g_hash_table_size(*class_labels) > 0;
@@ -3171,13 +3168,13 @@ rspamd_config_validate_class_config(struct rspamd_classifier_config *ccf, GError
        /* Validate class count */
        if (class_count < 2) {
                g_set_error(err, g_quark_from_static_string("config"), 1,
-                                       "classifier must have at least 2 classes, found %u", class_count);
+                                       "classifier must have at least 2 classes, found %ud", class_count);
                g_hash_table_destroy(seen_classes);
                return FALSE;
        }
 
        if (class_count > 20) {
-               msg_warn("classifier has %u classes, performance may be degraded above 20 classes",
+               msg_warn("classifier has %ud classes, performance may be degraded above 20 classes",
                                 class_count);
        }
 
index 5d55bf3acedaafb615491bb0f03eadab0fc46c01..a0305ec8d1c6f5f9c7aa7d425d2bd45fa4d30a1c 100644 (file)
@@ -167,10 +167,12 @@ public:
 
        auto save_in_mempool(const char *class_label) const
        {
-               auto var_name = fmt::format("{}_{}", redis_object_expanded, class_label);
+               auto var_name =
+                       rspamd_mempool_strdup(task->task_pool,
+                                                                 fmt::format("{}_{}", redis_object_expanded, class_label).c_str());
                /* We do not set destructor for the variable, as it should be already added on creation */
-               rspamd_mempool_set_variable(task->task_pool, var_name.c_str(), (gpointer) this, nullptr);
-               msg_debug_bayes("saved runtime in mempool at %s", var_name.c_str());
+               rspamd_mempool_set_variable(task->task_pool, var_name, (gpointer) this, nullptr);
+               msg_debug_bayes("saved runtime in mempool at %s", var_name);
        }
 };
 
@@ -911,6 +913,39 @@ rspamd_redis_classified(lua_State *L)
                lua_rawgeti(L, 3, 1); /* learned_counts -> position 4 */
                lua_rawgeti(L, 3, 2); /* token_results -> position 5 */
 
+               /* First, process learned_counts for all statfiles */
+               if (lua_istable(L, 4) && rt->stcf->clcf && rt->stcf->clcf->statfiles) {
+                       GList *cur = rt->stcf->clcf->statfiles;
+                       int redis_idx = 1; /* Lua array index starts at 1 */
+
+                       while (cur) {
+                               auto *stcf = (struct rspamd_statfile_config *) cur->data;
+                               const char *class_label = get_class_label(stcf);
+
+                               /* Get the runtime for this statfile */
+                               auto maybe_rt = redis_stat_runtime<float>::maybe_recover_from_mempool(rt->task,
+                                                                                                                                                                         rt->redis_object_expanded,
+                                                                                                                                                                         class_label);
+                               if (maybe_rt) {
+                                       auto *statfile_rt = maybe_rt.value();
+
+                                       /* Extract learned count for this statfile */
+                                       lua_rawgeti(L, 4, redis_idx); /* learned_counts[redis_idx] */
+                                       if (lua_isnumber(L, -1)) {
+                                               statfile_rt->learned = lua_tointeger(L, -1);
+                                               msg_debug_bayes("set learned count for class %s (label %s): %L",
+                                                                               stcf->class_name ? stcf->class_name : "unknown",
+                                                                               class_label,
+                                                                               statfile_rt->learned);
+                                       }
+                                       lua_pop(L, 1); /* Pop learned_counts[redis_idx] */
+                               }
+
+                               cur = g_list_next(cur);
+                               redis_idx++;
+                       }
+               }
+
                /* Process results for all statfiles in order using class_index (O(N) instead of O(N²)) */
                if (rt->stcf->clcf && rt->stcf->clcf->statfiles) {
                        GList *cur = rt->stcf->clcf->statfiles;
@@ -1246,6 +1281,8 @@ rspamd_redis_learn_tokens(struct rspamd_task *task,
                lua_new_text(L, text_tokens_buf, text_tokens_len, false);
        }
 
+       msg_debug_bayes("called lua learn script for %s (cookie=%s)", rt->stcf->symbol, cookie);
+
        if (lua_pcall(L, nargs, 0, err_idx) != 0) {
                msg_err_task("call to script failed: %s", lua_tostring(L, -1));
                lua_settop(L, err_idx - 1);
index 52613d64897e30cdb21b7b39cead82b8cf1ad0fd..3d1bd7107469a86b507feab17b21b79bce163983 100644 (file)
@@ -136,7 +136,6 @@ bayes_classify_token(struct rspamd_classifier *ctx,
        unsigned int spam_count = 0, ham_count = 0, total_count = 0;
        struct rspamd_statfile *st;
        struct rspamd_task *task;
-       const char *token_type = "txt";
        double spam_prob, spam_freq, ham_freq, bayes_spam_prob, bayes_ham_prob,
                ham_prob, fw, w, val;
 
@@ -225,11 +224,6 @@ bayes_classify_token(struct rspamd_classifier *ctx,
                if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) {
                        cl->text_tokens++;
                }
-               else {
-                       token_type = "meta";
-               }
-
-               /* Per-token debug logging removed to reduce verbosity */
        }
 }
 
@@ -245,10 +239,9 @@ bayes_classify_token_multiclass(struct rspamd_classifier *ctx,
        int id;
        struct rspamd_statfile *st;
        struct rspamd_task *task;
-       const char *token_type = "txt";
        double val, fw, w;
-       unsigned int *class_counts;
-       unsigned int total_count = 0;
+       guint64 *class_counts;
+       guint64 total_count = 0;
 
        task = cl->task;
 
@@ -258,12 +251,11 @@ bayes_classify_token_multiclass(struct rspamd_classifier *ctx,
                if (val <= cl->meta_skip_prob) {
                        return;
                }
-               token_type = "meta";
        }
 
        /* Allocate array for class counts */
-       class_counts = g_alloca(cl->num_classes * sizeof(unsigned int));
-       memset(class_counts, 0, cl->num_classes * sizeof(unsigned int));
+       class_counts = g_alloca(cl->num_classes * sizeof(guint64));
+       memset(class_counts, 0, cl->num_classes * sizeof(guint64));
 
        /* Collect counts for each class */
        for (i = 0; i < ctx->statfiles_ids->len; i++) {
index 5126fd2cc3bea7219131fd4bd4e5f5606035b02a..11b31decca73a090fbcea15f4a4decfa5811ba74 100644 (file)
@@ -893,9 +893,26 @@ rspamd_stat_backends_learn(struct rspamd_stat_ctx *st_ctx,
                        backend_found = TRUE;
 
                        if (!(task->flags & RSPAMD_TASK_FLAG_UNLEARN)) {
-                               if (!!spam != !!st->stcf->is_spam) {
-                                       /* If we are not unlearning, then do not touch another class */
-                                       continue;
+                               /* For multiclass learning, check if this statfile has any tokens to learn */
+                               if (task->flags & RSPAMD_TASK_FLAG_LEARN_CLASS) {
+                                       /* Multiclass learning: only process statfiles that have tokens set up by the classifier */
+                                       gboolean has_tokens = FALSE;
+                                       for (unsigned int k = 0; k < task->tokens->len && !has_tokens; k++) {
+                                               rspamd_token_t *tok = (rspamd_token_t *) g_ptr_array_index(task->tokens, k);
+                                               if (tok->values[id] != 0) {
+                                                       has_tokens = TRUE;
+                                               }
+                                       }
+                                       if (!has_tokens) {
+                                               continue;
+                                       }
+                               }
+                               else {
+                                       /* Binary learning: use traditional spam/ham check */
+                                       if (!!spam != !!st->stcf->is_spam) {
+                                               /* If we are not unlearning, then do not touch another class */
+                                               continue;
+                                       }
                                }
                        }
 
index 1663a78258d68791ad1a1879d1907eeb9176a3ce..12ec34d6287b62f1d937b034593d9b345c4c6ecf 100644 (file)
@@ -7,13 +7,14 @@ Resource        multiclass_lib.robot
 *** Variables ***
 ${RSPAMD_REDIS_SERVER}  ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
 ${RSPAMD_STATS_HASH}    siphash
+${CONFIG}               ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
 
 *** Test Cases ***
 Multiclass Basic Learning and Classification
     [Documentation]    Test basic multiclass learning and classification
     [Tags]             multiclass  basic  learning
     Multiclass Basic Learn Test
-    
+
 Multiclass Legacy Compatibility
     [Documentation]    Test that old learn_spam/learn_ham commands still work
     [Tags]             multiclass  compatibility  legacy
@@ -39,11 +40,6 @@ Multiclass Statistics
     [Tags]             multiclass  statistics
     Multiclass Stats Test
 
-Multiclass Performance
-    [Documentation]    Test classification performance with multiple classes
-    [Tags]             multiclass  performance
-    Multiclass Performance Test  50
-
 Per-User Multiclass Learning
     [Documentation]    Test per-user multiclass classification
     [Tags]             multiclass  per-user
@@ -57,4 +53,4 @@ Multiclass Empty Part Test
     Set Test Variable  ${MESSAGE}  ${RSPAMD_TESTDIR}/messages/empty_part.eml
     Learn Multiclass  ${EMPTY}  spam  ${MESSAGE}
     Scan File  ${MESSAGE}
-    Expect Symbol  BAYES_SPAM
\ No newline at end of file
+    Expect Symbol  BAYES_SPAM
index ef138800548b8f892ad92778c4a8bb895521f1a0..4ce4d67e48255f9eaef26d73235cf626e484a56d 100644 (file)
@@ -2,8 +2,8 @@
 Documentation   Multiclass Bayes Migration Tests
 Suite Setup     Rspamd Redis Setup
 Suite Teardown  Rspamd Redis Teardown
-Resource        multiclass_lib.robot
 Resource        lib.robot
+Resource        multiclass_lib.robot
 
 *** Variables ***
 ${RSPAMD_REDIS_SERVER}    ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
@@ -15,26 +15,26 @@ ${MULTICLASS_CONFIG}      ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
 Binary to Multiclass Migration
     [Documentation]    Test migration from binary to multiclass configuration
     [Tags]             migration  binary-to-multiclass
-    
+
     # First, start with binary configuration and learn some data
     Set Suite Variable  ${CONFIG}  ${BINARY_CONFIG}
     Rspamd Redis Teardown
     Rspamd Redis Setup
-    
+
     # Learn with binary system
     Learn Test
-    
+
     # Now switch to multiclass configuration
     Set Suite Variable  ${CONFIG}  ${MULTICLASS_CONFIG}
     Rspamd Teardown
     Rspamd Setup
-    
+
     # Should still work with existing data
     Scan File  ${MESSAGE_SPAM}
     Expect Symbol  BAYES_SPAM
     Scan File  ${MESSAGE_HAM}
     Expect Symbol  BAYES_HAM
-    
+
     # Should be able to add new classes
     Learn Multiclass  ${EMPTY}  newsletter  ${MESSAGE_NEWSLETTER}
     Scan File  ${MESSAGE_NEWSLETTER}
@@ -43,7 +43,7 @@ Binary to Multiclass Migration
 Configuration Validation
     [Documentation]    Test multiclass configuration validation
     [Tags]             configuration  validation
-    
+
     # Test that configuration loads without errors
     ${result} =  Run Process  rspamd  -t  -c  ${MULTICLASS_CONFIG}
     Should Be Equal As Integers  ${result.rc}  0  msg=Configuration validation failed: ${result.stderr}
@@ -51,22 +51,22 @@ Configuration Validation
 Redis Data Format Migration
     [Documentation]    Test that Redis data format is properly migrated
     [Tags]             migration  redis  data-format
-    
+
     # Start with binary data
     Set Suite Variable  ${CONFIG}  ${BINARY_CONFIG}
     Rspamd Redis Teardown
     Rspamd Redis Setup
     Learn Test
-    
+
     # Check binary format in Redis
     ${redis_result} =  Run Process  redis-cli  -p  ${RSPAMD_REDIS_PORT}  KEYS  *_learns
     Should Contain  ${redis_result.stdout}  _learns
-    
+
     # Switch to multiclass
     Set Suite Variable  ${CONFIG}  ${MULTICLASS_CONFIG}
     Rspamd Teardown
     Rspamd Setup
-    
+
     # Data should still be accessible
     Scan File  ${MESSAGE_SPAM}
     Expect Symbol  BAYES_SPAM
@@ -74,11 +74,11 @@ Redis Data Format Migration
 Backward Compatibility
     [Documentation]    Test that multiclass system maintains backward compatibility
     [Tags]             compatibility  backward
-    
+
     # Use multiclass config but test old commands
     Learn  ${EMPTY}  spam  ${MESSAGE_SPAM}
     Learn  ${EMPTY}  ham  ${MESSAGE_HAM}
-    
+
     # Should work the same as before
     Scan File  ${MESSAGE_SPAM}
     Expect Symbol  BAYES_SPAM
@@ -88,7 +88,7 @@ Backward Compatibility
 Class Label Validation
     [Documentation]    Test class label validation and error handling
     [Tags]             validation  class-labels
-    
+
     # This would test invalid class names, duplicate labels, etc.
     # Implementation depends on how validation errors are exposed
     ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:invalid-class-name  ${MESSAGE_SPAM}
@@ -97,20 +97,20 @@ Class Label Validation
 Multiclass Stats Format
     [Documentation]    Test that stats output shows multiclass information
     [Tags]             statistics  multiclass-format
-    
+
     # Learn some data across multiple classes
     Learn Multiclass  ${EMPTY}  spam  ${MESSAGE_SPAM}
     Learn Multiclass  ${EMPTY}  ham  ${MESSAGE_HAM}
     Learn Multiclass  ${EMPTY}  newsletter  ${MESSAGE_NEWSLETTER}
-    
+
     # Check stats format
     ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
     Check Rspamc  ${result}
-    
+
     # Should show all classes in stats
     Should Contain  ${result.stdout}  spam
     Should Contain  ${result.stdout}  ham
     Should Contain  ${result.stdout}  newsletter
-    
+
     # Should show learn counts
-    Should Match Regexp  ${result.stdout}  learned.*\\d+
\ No newline at end of file
+    Should Match Regexp  ${result.stdout}  learned.*\\d+
index e6e1788d4031cb72fe651ffa8c98ff403ef06caf..4fa4284bb5bba5680a3d26a76da168a3ad64a990 100644 (file)
@@ -1,7 +1,5 @@
 *** Settings ***
-Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
-Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
-Variables       ${RSPAMD_TESTDIR}/lib/vars.py
+Resource        lib.robot
 
 *** Variables ***
 ${CONFIG}                      ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
@@ -44,26 +42,26 @@ Multiclass Basic Learn Test
     IF  "${user}"
         Set To Dictionary  ${kwargs}  Deliver-To=${user}
     END
-    
+
     # Learn all classes
     Learn Multiclass  ${user}  spam  ${MESSAGE_SPAM}
     Learn Multiclass  ${user}  ham  ${MESSAGE_HAM}
     Learn Multiclass  ${user}  newsletter  ${MESSAGE_NEWSLETTER}
     Learn Multiclass  ${user}  transactional  ${MESSAGE_TRANSACTIONAL}
-    
+
     # Test classification
     Scan File  ${MESSAGE_SPAM}  &{kwargs}
     Expect Symbol  BAYES_SPAM
-    
+
     Scan File  ${MESSAGE_HAM}  &{kwargs}
     Expect Symbol  BAYES_HAM
-    
+
     Scan File  ${MESSAGE_NEWSLETTER}  &{kwargs}
     Expect Symbol  BAYES_NEWSLETTER
-    
+
     Scan File  ${MESSAGE_TRANSACTIONAL}  &{kwargs}
     Expect Symbol  BAYES_TRANSACTIONAL
-    
+
     Set Suite Variable  ${RSPAMD_STATS_LEARNTEST}  1
 
 Multiclass Legacy Compatibility Test
@@ -72,15 +70,15 @@ Multiclass Legacy Compatibility Test
     IF  "${user}"
         Set To Dictionary  ${kwargs}  Deliver-To=${user}
     END
-    
+
     # Test legacy learn_spam and learn_ham commands still work
     Learn Multiclass Legacy  ${user}  spam  ${MESSAGE_SPAM}
     Learn Multiclass Legacy  ${user}  ham  ${MESSAGE_HAM}
-    
+
     # Should still classify correctly
     Scan File  ${MESSAGE_SPAM}  &{kwargs}
     Expect Symbol  BAYES_SPAM
-    
+
     Scan File  ${MESSAGE_HAM}  &{kwargs}
     Expect Symbol  BAYES_HAM
 
@@ -89,15 +87,15 @@ Multiclass Relearn Test
     IF  ${RSPAMD_STATS_LEARNTEST} == 0
         Fail  "Learn test was not run"
     END
-    
+
     Set Test Variable  ${kwargs}  &{EMPTY}
     IF  "${user}"
         Set To Dictionary  ${kwargs}  Deliver-To=${user}
     END
-    
+
     # Relearn spam message as ham
     Learn Multiclass  ${user}  ham  ${MESSAGE_SPAM}
-    
+
     # Should now classify as ham or at least not spam
     Scan File  ${MESSAGE_SPAM}  &{kwargs}
     ${pass} =  Run Keyword And Return Status  Expect Symbol  BAYES_HAM
@@ -112,10 +110,10 @@ Multiclass Cross-Learn Test
     IF  "${user}"
         Set To Dictionary  ${kwargs}  Deliver-To=${user}
     END
-    
+
     # Learn newsletter message as transactional
     Learn Multiclass  ${user}  transactional  ${MESSAGE_NEWSLETTER}
-    
+
     # Should classify as transactional, not newsletter
     Scan File  ${MESSAGE_NEWSLETTER}  &{kwargs}
     Expect Symbol  BAYES_TRANSACTIONAL
@@ -127,15 +125,15 @@ Multiclass Unlearn Test
     IF  "${user}"
         Set To Dictionary  ${kwargs}  Deliver-To=${user}
     END
-    
+
     # First learn spam
     Learn Multiclass  ${user}  spam  ${MESSAGE_SPAM}
     Scan File  ${MESSAGE_SPAM}  &{kwargs}
     Expect Symbol  BAYES_SPAM
-    
+
     # Then unlearn spam (learn as ham)
     Learn Multiclass  ${user}  ham  ${MESSAGE_SPAM}
-    
+
     # Should no longer classify as spam
     Scan File  ${MESSAGE_SPAM}  &{kwargs}
     Do Not Expect Symbol  BAYES_SPAM
@@ -151,7 +149,7 @@ Multiclass Stats Test
     # Check that rspamc stat shows learning counts for all classes
     ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
     Check Rspamc  ${result}
-    
+
     # Should show statistics for all classes
     Should Contain  ${result.stdout}  spam
     Should Contain  ${result.stdout}  ham
@@ -161,11 +159,11 @@ Multiclass Stats Test
 Multiclass Configuration Migration Test
     # Test that old binary config can be automatically migrated
     Set Test Variable  ${binary_config}  ${RSPAMD_TESTDIR}/configs/stats.conf
-    
+
     # Start with binary config
     ${result} =  Run Rspamc  --config  ${binary_config}  stat
     Check Rspamc  ${result}
-    
+
     # Should show deprecation warning but work
     Should Contain  ${result.stderr}  deprecated  ignore_case=True
 
@@ -173,17 +171,17 @@ Multiclass Performance Test
     [Arguments]  ${num_messages}=100
     # Test classification performance with multiple classes
     ${start_time} =  Get Time  epoch
-    
+
     FOR  ${i}  IN RANGE  ${num_messages}
         Scan File  ${MESSAGE_SPAM}
         Scan File  ${MESSAGE_HAM}
         Scan File  ${MESSAGE_NEWSLETTER}
         Scan File  ${MESSAGE_TRANSACTIONAL}
     END
-    
+
     ${end_time} =  Get Time  epoch
     ${duration} =  Evaluate  ${end_time} - ${start_time}
-    
+
     # Should complete in reasonable time (adjust threshold as needed)
     Should Be True  ${duration} < 30  msg=Performance test took ${duration}s, expected < 30s
 
@@ -191,6 +189,6 @@ Multiclass Memory Test
     # Test that memory usage is reasonable for multiclass classification
     ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
     Check Rspamc  ${result}
-    
+
     # Extract memory usage if available in stats
-    # This is a placeholder - actual implementation would parse memory stats
\ No newline at end of file
+    # This is a placeholder - actual implementation would parse memory stats
index 3504cd16ebe27bc7e0f326ffc432273c54e01bf4..5b8d077341c986529292fb57d64a4c7c3e48b4ae 100644 (file)
@@ -64,29 +64,25 @@ classifier {
        statfile {
                class = "spam";
                symbol = BAYES_SPAM;
-               size = 1M;
                server = {= env.REDIS_SERVER =}
        }
        statfile {
                class = "ham";
                symbol = BAYES_HAM;
-               size = 1M;
                server = {= env.REDIS_SERVER =}
        }
        statfile {
                class = "newsletter";
                symbol = BAYES_NEWSLETTER;
-               size = 1M;
                server = {= env.REDIS_SERVER =}
        }
        statfile {
                class = "transactional";
                symbol = BAYES_TRANSACTIONAL;
-               size = 1M;
                server = {= env.REDIS_SERVER =}
        }
 
-       # Backend class labels for Redis storage optimization
+       # Backend class labels for Redis
        class_labels = {
                "spam" = "S";
                "ham" = "H";