From: Vsevolod Stakhov Date: Mon, 28 Jul 2025 09:41:33 +0000 (+0100) Subject: [Project] Fix more calculation issues X-Git-Tag: 3.13.0~38^2~8 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7c2fd274617ede6c8da89166c6c3818a53973aeb;p=thirdparty%2Frspamd.git [Project] Fix more calculation issues --- diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx index 3978b23b01..da58459170 100644 --- a/src/libserver/cfg_rcl.cxx +++ b/src/libserver/cfg_rcl.cxx @@ -2631,7 +2631,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections) rspamd_rcl_add_default_handler(sub, "min_prob_strength", rspamd_rcl_parse_struct_double, - G_STRUCT_OFFSET(struct rspamd_classifier_config, min_token_hits), + G_STRUCT_OFFSET(struct rspamd_classifier_config, min_prob_strength), 0, "Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]"); rspamd_rcl_add_default_handler(sub, diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index d995de91f1..ffe96237ce 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -548,8 +548,13 @@ bayes_classify_multiclass(struct rspamd_classifier *ctx, double fisher_result = inv_chi_square(task, max_log_prob, cl.processed_tokens); confidence = 1.0 - fisher_result; + msg_debug_bayes("fisher_result: %g, max_log_prob: %g, condition check: fisher_result > 0.999 = %s, max_log_prob > -50 = %s", + fisher_result, max_log_prob, + fisher_result > 0.999 ? "true" : "false", + max_log_prob > -50 ? "true" : "false"); + /* Handle case where Fisher method indicates extreme confidence */ - if (fisher_result >= 1.0 && max_log_prob > -50) { + if (fisher_result > 0.999 && max_log_prob > -100) { /* Large magnitude negative log prob means strong evidence */ confidence = 0.90; msg_debug_bayes("extreme negative log_prob (%g), setting high confidence", max_log_prob); diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot index b2e7c10e31..89b5683d31 100644 --- a/test/functional/cases/110_statistics/multiclass_lib.robot +++ b/test/functional/cases/110_statistics/multiclass_lib.robot @@ -1,4 +1,5 @@ *** Settings *** +Library OperatingSystem Resource lib.robot *** Variables *** @@ -17,20 +18,24 @@ ${RSPAMD_STATS_PER_USER} ${EMPTY} *** Keywords *** Learn Multiclass [Arguments] ${user} ${class} ${message} + # Extract filename from message path for queue-id + ${path} ${filename} = Split Path ${message} IF "${user}" - ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} + ${result} = Run Rspamc -d ${user} --queue-id ${filename} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} ELSE - ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} + ${result} = Run Rspamc --queue-id ${filename} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} END Check Rspamc ${result} Learn Multiclass Legacy [Arguments] ${user} ${class} ${message} # Test backward compatibility with old learn_spam/learn_ham commands + # Extract filename from message path for queue-id + ${path} ${filename} = Split Path ${message} IF "${user}" - ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} + ${result} = Run Rspamc -d ${user} --queue-id ${filename} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} ELSE - ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} + ${result} = Run Rspamc --queue-id ${filename} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} END Check Rspamc ${result} diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf index 6651f94a17..278aeeee93 100644 --- a/test/functional/configs/multiclass_bayes.conf +++ b/test/functional/configs/multiclass_bayes.conf @@ -112,6 +112,7 @@ classifier { min_learns = 1; min_tokens = 1; + min_token_hits = 1; min_prob_strength = 0.05; {% if env.STATS_PER_USER ~= '' %}