[Project] Fix various issues

author Vsevolod Stakhov <vsevolod@rspamd.com>

Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua

index 4de7126c7f48cbdd4b1b8fddd60cf48d8530ef66..53ee06b0efa822f16466ddf112bd787e45dd9291 100644 (file)
--- a/lualib/lua_bayes_redis.lua
+++ b/lualib/lua_bayes_redis.lua
@@ -54,15 +54,15 @@ local function gen_classify_functor(redis_params, classify_script_id)
      end
  
      lua_redis.exec_redis_script(classify_script_id,
-      { task = task, is_write = false, key = expanded_key },
-      classify_redis_cb, { expanded_key, script_class_labels, stat_tokens })
+        { task = task, is_write = false, key = expanded_key },
+        classify_redis_cb, { expanded_key, script_class_labels, stat_tokens })
    end
  end
  
  local function gen_learn_functor(redis_params, learn_script_id)
    return function(task, expanded_key, id, class_label, symbol, is_unlearn, stat_tokens, callback, maybe_text_tokens)
      local function learn_redis_cb(err, data)
-      lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data)
+      lua_util.debugm(N, task, 'learn redis cb: %s, %s for class %s', err, data, class_label)
        if err then
          callback(task, false, err)
        else
@@ -80,13 +80,13 @@ local function gen_learn_functor(redis_params, learn_script_id)
  
      if maybe_text_tokens then
        lua_redis.exec_redis_script(learn_script_id,
-        { task = task, is_write = true, key = expanded_key },
-        learn_redis_cb,
-        { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
+          { task = task, is_write = true, key = expanded_key },
+          learn_redis_cb,
+          { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens })
      else
        lua_redis.exec_redis_script(learn_script_id,
-        { task = task, is_write = true, key = expanded_key },
-        learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens })
+          { task = task, is_write = true, key = expanded_key },
+          learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens })
      end
    end
  end
@@ -196,11 +196,11 @@ exports.lua_bayes_init_statfile = function(classifier_ucl, statfile_ucl, symbol,
        end
  
        lua_redis.exec_redis_script(stat_script_id,
-        { ev_base = ev_base, cfg = cfg, is_write = false },
-        stat_redis_cb, { tostring(cursor),
-          symbol,
-          learn_key,
-          tostring(max_users) })
+          { ev_base = ev_base, cfg = cfg, is_write = false },
+          stat_redis_cb, { tostring(cursor),
+                           symbol,
+                           learn_key,
+                           tostring(max_users) })
        return statfile_ucl.monitor_timeout or classifier_ucl.monitor_timeout or 30.0
      end)
    end
@@ -226,8 +226,8 @@ local function gen_cache_check_functor(redis_params, check_script_id, conf)
  
      lua_util.debugm(N, task, 'checking cache: %s', cache_id)
      lua_redis.exec_redis_script(check_script_id,
-      { task = task, is_write = false, key = cache_id },
-      classify_redis_cb, { cache_id, packed_conf })
+        { task = task, is_write = false, key = cache_id },
+        classify_redis_cb, { cache_id, packed_conf })
    end
  end
  
@@ -250,9 +250,9 @@ local function gen_cache_learn_functor(redis_params, learn_script_id, conf)
  
      lua_util.debugm(N, task, 'try to learn cache: %s as %s', cache_id, cache_class_name)
      lua_redis.exec_redis_script(learn_script_id,
-      { task = task, is_write = true, key = cache_id },
-      learn_redis_cb,
-      { cache_id, cache_class_name, packed_conf })
+        { task = task, is_write = true, key = cache_id },
+        learn_redis_cb,
+        { cache_id, cache_class_name, packed_conf })
    end
  end
  
@@ -266,8 +266,8 @@ exports.lua_bayes_init_cache = function(classifier_ucl, statfile_ucl)
    local default_conf = {
      cache_prefix = "learned_ids",
      cache_max_elt = 10000, -- Maximum number of elements in the cache key
-    cache_max_keys = 5,    -- Maximum number of keys in the cache
-    cache_elt_len = 32,    -- Length of the element in the cache (will trim id to that value)
+    cache_max_keys = 5, -- Maximum number of keys in the cache
+    cache_elt_len = 32, -- Length of the element in the cache (will trim id to that value)
    }
  
    local conf = lua_util.override_defaults(default_conf, classifier_ucl)
@@ -282,7 +282,7 @@ exports.lua_bayes_init_cache = function(classifier_ucl, statfile_ucl)
    local learn_script_id = lua_redis.load_redis_script_from_file("bayes_cache_learn.lua", redis_params)
  
    return gen_cache_check_functor(redis_params, check_script_id, conf), gen_cache_learn_functor(redis_params,
-    learn_script_id, conf)
+      learn_script_id, conf)
  end
  
  return exports
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h

index 9f83f80244c44075576bbedc81572b0191c4abe3..355046cac0cf68606f71c9911ada01551d1142cf 100644 (file)
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -629,7 +629,7 @@ gboolean rspamd_config_check_statfiles(struct rspamd_classifier_config *cf);
  /**
   * Multi-class configuration helpers
   */
-gboolean rspamd_config_parse_class_labels(ucl_object_t *obj,
+gboolean rspamd_config_parse_class_labels(const ucl_object_t *obj,
                                                                                   GHashTable **class_labels);
  
  gboolean rspamd_config_migrate_binary_config(struct rspamd_statfile_config *stcf);
diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx

index 68b6460d891cea12a4593a03360823a852f1166d..3978b23b01f0d66276582515a8bbe8e157721415 100644 (file)
--- a/src/libserver/cfg_rcl.cxx
+++ b/src/libserver/cfg_rcl.cxx
@@ -1246,7 +1246,24 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
                         }
                         st->is_spam_converted = TRUE;
                 }
-               /* If class field is present, it was already parsed by the default parser */
+               else if (class_val != nullptr && spam_val == nullptr) {
+                       /* Only class field present - set is_spam for backward compatibility */
+                       if (st->class_name != nullptr) {
+                               if (strcmp(st->class_name, "spam") == 0) {
+                                       st->is_spam = TRUE;
+                               }
+                               else if (strcmp(st->class_name, "ham") == 0) {
+                                       st->is_spam = FALSE;
+                               }
+                               else {
+                                       /* For non-binary classes, default to not spam */
+                                       st->is_spam = FALSE;
+                               }
+                               msg_debug_config("statfile %s with class '%s' set is_spam=%s for compatibility",
+                                                                st->symbol, st->class_name, st->is_spam ? "true" : "false");
+                       }
+               }
+               /* If both fields are present, class takes precedence and was already parsed by the default parser */
                 return TRUE;
         }
  
@@ -1269,7 +1286,7 @@ rspamd_rcl_class_labels_handler(rspamd_mempool_t *pool,
                 return FALSE;
         }
  
-       if (!rspamd_config_parse_class_labels((ucl_object_t *) obj, &ccf->class_labels)) {
+       if (!rspamd_config_parse_class_labels(obj, &ccf->class_labels)) {
                 g_set_error(err, CFG_RCL_ERROR, EINVAL,
                                         "invalid class_labels configuration");
                 return FALSE;
@@ -1351,6 +1368,22 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
                                                 }
                                         }
                                 }
+                               else if (g_ascii_strcasecmp(st_key, "class_labels") == 0) {
+                                       /* Parse class_labels configuration directly */
+                                       if (ucl_object_type(val) != UCL_OBJECT) {
+                                               g_set_error(err, CFG_RCL_ERROR, EINVAL,
+                                                                       "class_labels must be an object");
+                                               ucl_object_iterate_free(it);
+                                               return FALSE;
+                                       }
+
+                                       if (!rspamd_config_parse_class_labels(val, &ccf->class_labels)) {
+                                               g_set_error(err, CFG_RCL_ERROR, EINVAL,
+                                                                       "invalid class_labels configuration");
+                                               ucl_object_iterate_free(it);
+                                               return FALSE;
+                                       }
+                               }
                         }
                 }
  
@@ -2579,7 +2612,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
                                                                                            FALSE,
                                                                                            TRUE,
                                                                                            cfg->doc_strings,
-                                                                                          "CLassifier options");
+                                                                                          "Classifier options");
                 /* Default classifier is 'bayes' for now */
                 sub->default_key = "bayes";
  
diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx

index 2533bd65e8a10b6c280abf881e501ce9c69b8436..c22a9b877bf7408ff5edd0dbf39f14021efca7b4 100644 (file)
--- a/src/libserver/cfg_utils.cxx
+++ b/src/libserver/cfg_utils.cxx
@@ -3044,21 +3044,23 @@ rspamd_ip_is_local_cfg(struct rspamd_config *cfg,
  }
  
  gboolean
-rspamd_config_parse_class_labels(ucl_object_t *obj, GHashTable **class_labels)
+rspamd_config_parse_class_labels(const ucl_object_t *obj, GHashTable **class_labels)
  {
         const ucl_object_t *cur;
         ucl_object_iter_t it = nullptr;
-       const char *class_name, *label;
  
         if (!obj || ucl_object_type(obj) != UCL_OBJECT) {
                 return FALSE;
         }
  
-       *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
+       if (*class_labels == nullptr) {
+               *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal,
+                                                                                         g_free, g_free);
+       }
  
         while ((cur = ucl_object_iterate(obj, &it, true)) != nullptr) {
-               class_name = ucl_object_key(cur);
-               label = ucl_object_tostring(cur);
+               const char *class_name = ucl_object_key(cur);
+               const char *label = ucl_object_tostring(cur);
  
                 if (class_name && label) {
                         /* Validate class name: alphanumeric + underscore, max 32 chars */
@@ -3079,20 +3081,15 @@ rspamd_config_parse_class_labels(ucl_object_t *obj, GHashTable **class_labels)
                         }
  
                         /* Validate label uniqueness */
-                       GHashTableIter label_iter;
-                       gpointer key, value;
-                       g_hash_table_iter_init(&label_iter, *class_labels);
-                       while (g_hash_table_iter_next(&label_iter, &key, &value)) {
-                               if (strcmp((const char *) value, label) == 0) {
-                                       msg_err("backend label '%s' is used by multiple classes", label);
-                                       g_hash_table_destroy(*class_labels);
-                                       *class_labels = nullptr;
-                                       return FALSE;
-                               }
+                       if (g_hash_table_lookup(*class_labels, label)) {
+                               msg_err("backend label '%s' is used by multiple classes", label);
+                               g_hash_table_destroy(*class_labels);
+                               *class_labels = nullptr;
+                               return FALSE;
                         }
-
-                       g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label));
                 }
+
+               g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label));
         }
  
         return g_hash_table_size(*class_labels) > 0;
@@ -3171,13 +3168,13 @@ rspamd_config_validate_class_config(struct rspamd_classifier_config *ccf, GError
         /* Validate class count */
         if (class_count < 2) {
                 g_set_error(err, g_quark_from_static_string("config"), 1,
-                                       "classifier must have at least 2 classes, found %u", class_count);
+                                       "classifier must have at least 2 classes, found %ud", class_count);
                 g_hash_table_destroy(seen_classes);
                 return FALSE;
         }
  
         if (class_count > 20) {
-               msg_warn("classifier has %u classes, performance may be degraded above 20 classes",
+               msg_warn("classifier has %ud classes, performance may be degraded above 20 classes",
                                  class_count);
         }
  
diff --git a/src/libstat/backends/redis_backend.cxx b/src/libstat/backends/redis_backend.cxx

index 5d55bf3acedaafb615491bb0f03eadab0fc46c01..a0305ec8d1c6f5f9c7aa7d425d2bd45fa4d30a1c 100644 (file)
--- a/src/libstat/backends/redis_backend.cxx
+++ b/src/libstat/backends/redis_backend.cxx
@@ -167,10 +167,12 @@ public:
  
         auto save_in_mempool(const char *class_label) const
         {
-               auto var_name = fmt::format("{}_{}", redis_object_expanded, class_label);
+               auto var_name =
+                       rspamd_mempool_strdup(task->task_pool,
+                                                                 fmt::format("{}_{}", redis_object_expanded, class_label).c_str());
                 /* We do not set destructor for the variable, as it should be already added on creation */
-               rspamd_mempool_set_variable(task->task_pool, var_name.c_str(), (gpointer) this, nullptr);
-               msg_debug_bayes("saved runtime in mempool at %s", var_name.c_str());
+               rspamd_mempool_set_variable(task->task_pool, var_name, (gpointer) this, nullptr);
+               msg_debug_bayes("saved runtime in mempool at %s", var_name);
         }
  };
  
@@ -911,6 +913,39 @@ rspamd_redis_classified(lua_State *L)
                 lua_rawgeti(L, 3, 1); /* learned_counts -> position 4 */
                 lua_rawgeti(L, 3, 2); /* token_results -> position 5 */
  
+               /* First, process learned_counts for all statfiles */
+               if (lua_istable(L, 4) && rt->stcf->clcf && rt->stcf->clcf->statfiles) {
+                       GList *cur = rt->stcf->clcf->statfiles;
+                       int redis_idx = 1; /* Lua array index starts at 1 */
+
+                       while (cur) {
+                               auto *stcf = (struct rspamd_statfile_config *) cur->data;
+                               const char *class_label = get_class_label(stcf);
+
+                               /* Get the runtime for this statfile */
+                               auto maybe_rt = redis_stat_runtime<float>::maybe_recover_from_mempool(rt->task,
+                                                                                                                                                                         rt->redis_object_expanded,
+                                                                                                                                                                         class_label);
+                               if (maybe_rt) {
+                                       auto *statfile_rt = maybe_rt.value();
+
+                                       /* Extract learned count for this statfile */
+                                       lua_rawgeti(L, 4, redis_idx); /* learned_counts[redis_idx] */
+                                       if (lua_isnumber(L, -1)) {
+                                               statfile_rt->learned = lua_tointeger(L, -1);
+                                               msg_debug_bayes("set learned count for class %s (label %s): %L",
+                                                                               stcf->class_name ? stcf->class_name : "unknown",
+                                                                               class_label,
+                                                                               statfile_rt->learned);
+                                       }
+                                       lua_pop(L, 1); /* Pop learned_counts[redis_idx] */
+                               }
+
+                               cur = g_list_next(cur);
+                               redis_idx++;
+                       }
+               }
+
                 /* Process results for all statfiles in order using class_index (O(N) instead of O(N²)) */
                 if (rt->stcf->clcf && rt->stcf->clcf->statfiles) {
                         GList *cur = rt->stcf->clcf->statfiles;
@@ -1246,6 +1281,8 @@ rspamd_redis_learn_tokens(struct rspamd_task *task,
                 lua_new_text(L, text_tokens_buf, text_tokens_len, false);
         }
  
+       msg_debug_bayes("called lua learn script for %s (cookie=%s)", rt->stcf->symbol, cookie);
+
         if (lua_pcall(L, nargs, 0, err_idx) != 0) {
                 msg_err_task("call to script failed: %s", lua_tostring(L, -1));
                 lua_settop(L, err_idx - 1);
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c

index 52613d64897e30cdb21b7b39cead82b8cf1ad0fd..3d1bd7107469a86b507feab17b21b79bce163983 100644 (file)
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -136,7 +136,6 @@ bayes_classify_token(struct rspamd_classifier *ctx,
         unsigned int spam_count = 0, ham_count = 0, total_count = 0;
         struct rspamd_statfile *st;
         struct rspamd_task *task;
-       const char *token_type = "txt";
         double spam_prob, spam_freq, ham_freq, bayes_spam_prob, bayes_ham_prob,
                 ham_prob, fw, w, val;
  
@@ -225,11 +224,6 @@ bayes_classify_token(struct rspamd_classifier *ctx,
                 if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) {
                         cl->text_tokens++;
                 }
-               else {
-                       token_type = "meta";
-               }
-
-               /* Per-token debug logging removed to reduce verbosity */
         }
  }
  
@@ -245,10 +239,9 @@ bayes_classify_token_multiclass(struct rspamd_classifier *ctx,
         int id;
         struct rspamd_statfile *st;
         struct rspamd_task *task;
-       const char *token_type = "txt";
         double val, fw, w;
-       unsigned int *class_counts;
-       unsigned int total_count = 0;
+       guint64 *class_counts;
+       guint64 total_count = 0;
  
         task = cl->task;
  
@@ -258,12 +251,11 @@ bayes_classify_token_multiclass(struct rspamd_classifier *ctx,
                 if (val <= cl->meta_skip_prob) {
                         return;
                 }
-               token_type = "meta";
         }
  
         /* Allocate array for class counts */
-       class_counts = g_alloca(cl->num_classes * sizeof(unsigned int));
-       memset(class_counts, 0, cl->num_classes * sizeof(unsigned int));
+       class_counts = g_alloca(cl->num_classes * sizeof(guint64));
+       memset(class_counts, 0, cl->num_classes * sizeof(guint64));
  
         /* Collect counts for each class */
         for (i = 0; i < ctx->statfiles_ids->len; i++) {
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c

index 5126fd2cc3bea7219131fd4bd4e5f5606035b02a..11b31decca73a090fbcea15f4a4decfa5811ba74 100644 (file)
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -893,9 +893,26 @@ rspamd_stat_backends_learn(struct rspamd_stat_ctx *st_ctx,
                         backend_found = TRUE;
  
                         if (!(task->flags & RSPAMD_TASK_FLAG_UNLEARN)) {
-                               if (!!spam != !!st->stcf->is_spam) {
-                                       /* If we are not unlearning, then do not touch another class */
-                                       continue;
+                               /* For multiclass learning, check if this statfile has any tokens to learn */
+                               if (task->flags & RSPAMD_TASK_FLAG_LEARN_CLASS) {
+                                       /* Multiclass learning: only process statfiles that have tokens set up by the classifier */
+                                       gboolean has_tokens = FALSE;
+                                       for (unsigned int k = 0; k < task->tokens->len && !has_tokens; k++) {
+                                               rspamd_token_t *tok = (rspamd_token_t *) g_ptr_array_index(task->tokens, k);
+                                               if (tok->values[id] != 0) {
+                                                       has_tokens = TRUE;
+                                               }
+                                       }
+                                       if (!has_tokens) {
+                                               continue;
+                                       }
+                               }
+                               else {
+                                       /* Binary learning: use traditional spam/ham check */
+                                       if (!!spam != !!st->stcf->is_spam) {
+                                               /* If we are not unlearning, then do not touch another class */
+                                               continue;
+                                       }
                                 }
                         }
  
diff --git a/test/functional/cases/110_statistics/300-multiclass-redis.robot b/test/functional/cases/110_statistics/300-multiclass-redis.robot

index 1663a78258d68791ad1a1879d1907eeb9176a3ce..12ec34d6287b62f1d937b034593d9b345c4c6ecf 100644 (file)
--- a/test/functional/cases/110_statistics/300-multiclass-redis.robot
+++ b/test/functional/cases/110_statistics/300-multiclass-redis.robot
@@ -7,13 +7,14 @@ Resource        multiclass_lib.robot
  *** Variables ***
  ${RSPAMD_REDIS_SERVER}  ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
  ${RSPAMD_STATS_HASH}    siphash
+${CONFIG}               ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
  
  *** Test Cases ***
  Multiclass Basic Learning and Classification
      [Documentation]    Test basic multiclass learning and classification
      [Tags]             multiclass  basic  learning
      Multiclass Basic Learn Test
-    
+
  Multiclass Legacy Compatibility
      [Documentation]    Test that old learn_spam/learn_ham commands still work
      [Tags]             multiclass  compatibility  legacy
@@ -39,11 +40,6 @@ Multiclass Statistics
      [Tags]             multiclass  statistics
      Multiclass Stats Test
  
-Multiclass Performance
-    [Documentation]    Test classification performance with multiple classes
-    [Tags]             multiclass  performance
-    Multiclass Performance Test  50
-
  Per-User Multiclass Learning
      [Documentation]    Test per-user multiclass classification
      [Tags]             multiclass  per-user
@@ -57,4 +53,4 @@ Multiclass Empty Part Test
      Set Test Variable  ${MESSAGE}  ${RSPAMD_TESTDIR}/messages/empty_part.eml
      Learn Multiclass  ${EMPTY}  spam  ${MESSAGE}
      Scan File  ${MESSAGE}
-    Expect Symbol  BAYES_SPAM
-\ No newline at end of file
+    Expect Symbol  BAYES_SPAM
diff --git a/test/functional/cases/110_statistics/310-multiclass-migration.robot b/test/functional/cases/110_statistics/310-multiclass-migration.robot

index ef138800548b8f892ad92778c4a8bb895521f1a0..4ce4d67e48255f9eaef26d73235cf626e484a56d 100644 (file)
--- a/test/functional/cases/110_statistics/310-multiclass-migration.robot
+++ b/test/functional/cases/110_statistics/310-multiclass-migration.robot
@@ -2,8 +2,8 @@
  Documentation   Multiclass Bayes Migration Tests
  Suite Setup     Rspamd Redis Setup
  Suite Teardown  Rspamd Redis Teardown
-Resource        multiclass_lib.robot
  Resource        lib.robot
+Resource        multiclass_lib.robot
  
  *** Variables ***
  ${RSPAMD_REDIS_SERVER}    ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
@@ -15,26 +15,26 @@ ${MULTICLASS_CONFIG}      ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
  Binary to Multiclass Migration
      [Documentation]    Test migration from binary to multiclass configuration
      [Tags]             migration  binary-to-multiclass
-    
+
      # First, start with binary configuration and learn some data
      Set Suite Variable  ${CONFIG}  ${BINARY_CONFIG}
      Rspamd Redis Teardown
      Rspamd Redis Setup
-    
+
      # Learn with binary system
      Learn Test
-    
+
      # Now switch to multiclass configuration
      Set Suite Variable  ${CONFIG}  ${MULTICLASS_CONFIG}
      Rspamd Teardown
      Rspamd Setup
-    
+
      # Should still work with existing data
      Scan File  ${MESSAGE_SPAM}
      Expect Symbol  BAYES_SPAM
      Scan File  ${MESSAGE_HAM}
      Expect Symbol  BAYES_HAM
-    
+
      # Should be able to add new classes
      Learn Multiclass  ${EMPTY}  newsletter  ${MESSAGE_NEWSLETTER}
      Scan File  ${MESSAGE_NEWSLETTER}
@@ -43,7 +43,7 @@ Binary to Multiclass Migration
  Configuration Validation
      [Documentation]    Test multiclass configuration validation
      [Tags]             configuration  validation
-    
+
      # Test that configuration loads without errors
      ${result} =  Run Process  rspamd  -t  -c  ${MULTICLASS_CONFIG}
      Should Be Equal As Integers  ${result.rc}  0  msg=Configuration validation failed: ${result.stderr}
@@ -51,22 +51,22 @@ Configuration Validation
  Redis Data Format Migration
      [Documentation]    Test that Redis data format is properly migrated
      [Tags]             migration  redis  data-format
-    
+
      # Start with binary data
      Set Suite Variable  ${CONFIG}  ${BINARY_CONFIG}
      Rspamd Redis Teardown
      Rspamd Redis Setup
      Learn Test
-    
+
      # Check binary format in Redis
      ${redis_result} =  Run Process  redis-cli  -p  ${RSPAMD_REDIS_PORT}  KEYS  *_learns
      Should Contain  ${redis_result.stdout}  _learns
-    
+
      # Switch to multiclass
      Set Suite Variable  ${CONFIG}  ${MULTICLASS_CONFIG}
      Rspamd Teardown
      Rspamd Setup
-    
+
      # Data should still be accessible
      Scan File  ${MESSAGE_SPAM}
      Expect Symbol  BAYES_SPAM
@@ -74,11 +74,11 @@ Redis Data Format Migration
  Backward Compatibility
      [Documentation]    Test that multiclass system maintains backward compatibility
      [Tags]             compatibility  backward
-    
+
      # Use multiclass config but test old commands
      Learn  ${EMPTY}  spam  ${MESSAGE_SPAM}
      Learn  ${EMPTY}  ham  ${MESSAGE_HAM}
-    
+
      # Should work the same as before
      Scan File  ${MESSAGE_SPAM}
      Expect Symbol  BAYES_SPAM
@@ -88,7 +88,7 @@ Backward Compatibility
  Class Label Validation
      [Documentation]    Test class label validation and error handling
      [Tags]             validation  class-labels
-    
+
      # This would test invalid class names, duplicate labels, etc.
      # Implementation depends on how validation errors are exposed
      ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:invalid-class-name  ${MESSAGE_SPAM}
@@ -97,20 +97,20 @@ Class Label Validation
  Multiclass Stats Format
      [Documentation]    Test that stats output shows multiclass information
      [Tags]             statistics  multiclass-format
-    
+
      # Learn some data across multiple classes
      Learn Multiclass  ${EMPTY}  spam  ${MESSAGE_SPAM}
      Learn Multiclass  ${EMPTY}  ham  ${MESSAGE_HAM}
      Learn Multiclass  ${EMPTY}  newsletter  ${MESSAGE_NEWSLETTER}
-    
+
      # Check stats format
      ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
      Check Rspamc  ${result}
-    
+
      # Should show all classes in stats
      Should Contain  ${result.stdout}  spam
      Should Contain  ${result.stdout}  ham
      Should Contain  ${result.stdout}  newsletter
-    
+
      # Should show learn counts
-    Should Match Regexp  ${result.stdout}  learned.*\\d+
-\ No newline at end of file
+    Should Match Regexp  ${result.stdout}  learned.*\\d+
diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot

index e6e1788d4031cb72fe651ffa8c98ff403ef06caf..4fa4284bb5bba5680a3d26a76da168a3ad64a990 100644 (file)
--- a/test/functional/cases/110_statistics/multiclass_lib.robot
+++ b/test/functional/cases/110_statistics/multiclass_lib.robot
@@ -1,7 +1,5 @@
  *** Settings ***
-Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
-Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
-Variables       ${RSPAMD_TESTDIR}/lib/vars.py
+Resource        lib.robot
  
  *** Variables ***
  ${CONFIG}                      ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
@@ -44,26 +42,26 @@ Multiclass Basic Learn Test
      IF  "${user}"
          Set To Dictionary  ${kwargs}  Deliver-To=${user}
      END
-    
+
      # Learn all classes
      Learn Multiclass  ${user}  spam  ${MESSAGE_SPAM}
      Learn Multiclass  ${user}  ham  ${MESSAGE_HAM}
      Learn Multiclass  ${user}  newsletter  ${MESSAGE_NEWSLETTER}
      Learn Multiclass  ${user}  transactional  ${MESSAGE_TRANSACTIONAL}
-    
+
      # Test classification
      Scan File  ${MESSAGE_SPAM}  &{kwargs}
      Expect Symbol  BAYES_SPAM
-    
+
      Scan File  ${MESSAGE_HAM}  &{kwargs}
      Expect Symbol  BAYES_HAM
-    
+
      Scan File  ${MESSAGE_NEWSLETTER}  &{kwargs}
      Expect Symbol  BAYES_NEWSLETTER
-    
+
      Scan File  ${MESSAGE_TRANSACTIONAL}  &{kwargs}
      Expect Symbol  BAYES_TRANSACTIONAL
-    
+
      Set Suite Variable  ${RSPAMD_STATS_LEARNTEST}  1
  
  Multiclass Legacy Compatibility Test
@@ -72,15 +70,15 @@ Multiclass Legacy Compatibility Test
      IF  "${user}"
          Set To Dictionary  ${kwargs}  Deliver-To=${user}
      END
-    
+
      # Test legacy learn_spam and learn_ham commands still work
      Learn Multiclass Legacy  ${user}  spam  ${MESSAGE_SPAM}
      Learn Multiclass Legacy  ${user}  ham  ${MESSAGE_HAM}
-    
+
      # Should still classify correctly
      Scan File  ${MESSAGE_SPAM}  &{kwargs}
      Expect Symbol  BAYES_SPAM
-    
+
      Scan File  ${MESSAGE_HAM}  &{kwargs}
      Expect Symbol  BAYES_HAM
  
@@ -89,15 +87,15 @@ Multiclass Relearn Test
      IF  ${RSPAMD_STATS_LEARNTEST} == 0
          Fail  "Learn test was not run"
      END
-    
+
      Set Test Variable  ${kwargs}  &{EMPTY}
      IF  "${user}"
          Set To Dictionary  ${kwargs}  Deliver-To=${user}
      END
-    
+
      # Relearn spam message as ham
      Learn Multiclass  ${user}  ham  ${MESSAGE_SPAM}
-    
+
      # Should now classify as ham or at least not spam
      Scan File  ${MESSAGE_SPAM}  &{kwargs}
      ${pass} =  Run Keyword And Return Status  Expect Symbol  BAYES_HAM
@@ -112,10 +110,10 @@ Multiclass Cross-Learn Test
      IF  "${user}"
          Set To Dictionary  ${kwargs}  Deliver-To=${user}
      END
-    
+
      # Learn newsletter message as transactional
      Learn Multiclass  ${user}  transactional  ${MESSAGE_NEWSLETTER}
-    
+
      # Should classify as transactional, not newsletter
      Scan File  ${MESSAGE_NEWSLETTER}  &{kwargs}
      Expect Symbol  BAYES_TRANSACTIONAL
@@ -127,15 +125,15 @@ Multiclass Unlearn Test
      IF  "${user}"
          Set To Dictionary  ${kwargs}  Deliver-To=${user}
      END
-    
+
      # First learn spam
      Learn Multiclass  ${user}  spam  ${MESSAGE_SPAM}
      Scan File  ${MESSAGE_SPAM}  &{kwargs}
      Expect Symbol  BAYES_SPAM
-    
+
      # Then unlearn spam (learn as ham)
      Learn Multiclass  ${user}  ham  ${MESSAGE_SPAM}
-    
+
      # Should no longer classify as spam
      Scan File  ${MESSAGE_SPAM}  &{kwargs}
      Do Not Expect Symbol  BAYES_SPAM
@@ -151,7 +149,7 @@ Multiclass Stats Test
      # Check that rspamc stat shows learning counts for all classes
      ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
      Check Rspamc  ${result}
-    
+
      # Should show statistics for all classes
      Should Contain  ${result.stdout}  spam
      Should Contain  ${result.stdout}  ham
@@ -161,11 +159,11 @@ Multiclass Stats Test
  Multiclass Configuration Migration Test
      # Test that old binary config can be automatically migrated
      Set Test Variable  ${binary_config}  ${RSPAMD_TESTDIR}/configs/stats.conf
-    
+
      # Start with binary config
      ${result} =  Run Rspamc  --config  ${binary_config}  stat
      Check Rspamc  ${result}
-    
+
      # Should show deprecation warning but work
      Should Contain  ${result.stderr}  deprecated  ignore_case=True
  
@@ -173,17 +171,17 @@ Multiclass Performance Test
      [Arguments]  ${num_messages}=100
      # Test classification performance with multiple classes
      ${start_time} =  Get Time  epoch
-    
+
      FOR  ${i}  IN RANGE  ${num_messages}
          Scan File  ${MESSAGE_SPAM}
          Scan File  ${MESSAGE_HAM}
          Scan File  ${MESSAGE_NEWSLETTER}
          Scan File  ${MESSAGE_TRANSACTIONAL}
      END
-    
+
      ${end_time} =  Get Time  epoch
      ${duration} =  Evaluate  ${end_time} - ${start_time}
-    
+
      # Should complete in reasonable time (adjust threshold as needed)
      Should Be True  ${duration} < 30  msg=Performance test took ${duration}s, expected < 30s
  
@@ -191,6 +189,6 @@ Multiclass Memory Test
      # Test that memory usage is reasonable for multiclass classification
      ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
      Check Rspamc  ${result}
-    
+
      # Extract memory usage if available in stats
-    # This is a placeholder - actual implementation would parse memory stats
-\ No newline at end of file
+    # This is a placeholder - actual implementation would parse memory stats
diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf

index 3504cd16ebe27bc7e0f326ffc432273c54e01bf4..5b8d077341c986529292fb57d64a4c7c3e48b4ae 100644 (file)
--- a/test/functional/configs/multiclass_bayes.conf
+++ b/test/functional/configs/multiclass_bayes.conf
@@ -64,29 +64,25 @@ classifier {
         statfile {
                 class = "spam";
                 symbol = BAYES_SPAM;
-               size = 1M;
                 server = {= env.REDIS_SERVER =}
         }
         statfile {
                 class = "ham";
                 symbol = BAYES_HAM;
-               size = 1M;
                 server = {= env.REDIS_SERVER =}
         }
         statfile {
                 class = "newsletter";
                 symbol = BAYES_NEWSLETTER;
-               size = 1M;
                 server = {= env.REDIS_SERVER =}
         }
         statfile {
                 class = "transactional";
                 symbol = BAYES_TRANSACTIONAL;
-               size = 1M;
                 server = {= env.REDIS_SERVER =}
         }
  
-       # Backend class labels for Redis storage optimization
+       # Backend class labels for Redis
         class_labels = {
                 "spam" = "S";
                 "ham" = "H";
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Fri, 25 Jul 2025 10:58:36 +0000 (11:58 +0100)
lualib/lua_bayes_redis.lua		patch \| blob \| blame \| history
src/libserver/cfg_file.h		patch \| blob \| blame \| history
src/libserver/cfg_rcl.cxx		patch \| blob \| blame \| history
src/libserver/cfg_utils.cxx		patch \| blob \| blame \| history
src/libstat/backends/redis_backend.cxx		patch \| blob \| blame \| history
src/libstat/classifiers/bayes.c		patch \| blob \| blame \| history
src/libstat/stat_process.c		patch \| blob \| blame \| history
test/functional/cases/110_statistics/300-multiclass-redis.robot		patch \| blob \| blame \| history
test/functional/cases/110_statistics/310-multiclass-migration.robot		patch \| blob \| blame \| history
test/functional/cases/110_statistics/multiclass_lib.robot		patch \| blob \| blame \| history
test/functional/configs/multiclass_bayes.conf		patch \| blob \| blame \| history