]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Fix more calculation issues
authorVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 28 Jul 2025 09:41:33 +0000 (10:41 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 28 Jul 2025 09:41:33 +0000 (10:41 +0100)
src/libserver/cfg_rcl.cxx
src/libstat/classifiers/bayes.c
test/functional/cases/110_statistics/multiclass_lib.robot
test/functional/configs/multiclass_bayes.conf

index 3978b23b01f0d66276582515a8bbe8e157721415..da58459170accaf5600d9ccc4b0a32e98ef89a8b 100644 (file)
@@ -2631,7 +2631,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
                rspamd_rcl_add_default_handler(sub,
                                                                           "min_prob_strength",
                                                                           rspamd_rcl_parse_struct_double,
-                                                                          G_STRUCT_OFFSET(struct rspamd_classifier_config, min_token_hits),
+                                                                          G_STRUCT_OFFSET(struct rspamd_classifier_config, min_prob_strength),
                                                                           0,
                                                                           "Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]");
                rspamd_rcl_add_default_handler(sub,
index d995de91f1dbb2582b295a1a98f428685397a83e..ffe96237ce715b47ddae2f31e3bcab28e5dfb9ac 100644 (file)
@@ -548,8 +548,13 @@ bayes_classify_multiclass(struct rspamd_classifier *ctx,
                        double fisher_result = inv_chi_square(task, max_log_prob, cl.processed_tokens);
                        confidence = 1.0 - fisher_result;
 
+                       msg_debug_bayes("fisher_result: %g, max_log_prob: %g, condition check: fisher_result > 0.999 = %s, max_log_prob > -50 = %s",
+                                                       fisher_result, max_log_prob,
+                                                       fisher_result > 0.999 ? "true" : "false",
+                                                       max_log_prob > -50 ? "true" : "false");
+
                        /* Handle case where Fisher method indicates extreme confidence */
-                       if (fisher_result >= 1.0 && max_log_prob > -50) {
+                       if (fisher_result > 0.999 && max_log_prob > -100) {
                                /* Large magnitude negative log prob means strong evidence */
                                confidence = 0.90;
                                msg_debug_bayes("extreme negative log_prob (%g), setting high confidence", max_log_prob);
index b2e7c10e319c3629f7f398774e5211b7844fc051..89b5683d3188cbbd6a1023610bb35ad5a946611b 100644 (file)
@@ -1,4 +1,5 @@
 *** Settings ***
+Library         OperatingSystem
 Resource        lib.robot
 
 *** Variables ***
@@ -17,20 +18,24 @@ ${RSPAMD_STATS_PER_USER}       ${EMPTY}
 *** Keywords ***
 Learn Multiclass
     [Arguments]  ${user}  ${class}  ${message}
+    # Extract filename from message path for queue-id
+    ${path}  ${filename} =  Split Path  ${message}
     IF  "${user}"
-        ${result} =  Run Rspamc  -d  ${user}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:${class}  ${message}
+        ${result} =  Run Rspamc  -d  ${user}  --queue-id  ${filename}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:${class}  ${message}
     ELSE
-        ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:${class}  ${message}
+        ${result} =  Run Rspamc  --queue-id  ${filename}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:${class}  ${message}
     END
     Check Rspamc  ${result}
 
 Learn Multiclass Legacy
     [Arguments]  ${user}  ${class}  ${message}
     # Test backward compatibility with old learn_spam/learn_ham commands
+    # Extract filename from message path for queue-id
+    ${path}  ${filename} =  Split Path  ${message}
     IF  "${user}"
-        ${result} =  Run Rspamc  -d  ${user}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_${class}  ${message}
+        ${result} =  Run Rspamc  -d  ${user}  --queue-id  ${filename}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_${class}  ${message}
     ELSE
-        ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_${class}  ${message}
+        ${result} =  Run Rspamc  --queue-id  ${filename}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_${class}  ${message}
     END
     Check Rspamc  ${result}
 
index 6651f94a174bf6fa80a24e12857b5d2de54feae4..278aeeee932a6d9449151f2c8209a44b06447355 100644 (file)
@@ -112,6 +112,7 @@ classifier {
 
        min_learns = 1;
        min_tokens = 1;
+       min_token_hits = 1;
        min_prob_strength = 0.05;
 
        {% if env.STATS_PER_USER ~= '' %}