]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Fix some issues and add debugging
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 16 Nov 2018 17:41:40 +0000 (17:41 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 16 Nov 2018 17:41:40 +0000 (17:41 +0000)
lualib/lua_stat.lua
src/libstat/classifiers/bayes.c

index 4d6fef7a87f2bc42af9f91e8a0b104e24d51cae4..2b43defb602dad7d82ad085d9a95028b51e2ace2 100644 (file)
@@ -621,12 +621,12 @@ local function get_mime_stat_tokens(task, res, i)
         online_text = true
       end
 
-      rawset(res, i, "#lang:" .. tp:get_language() or 'unk')
+      rawset(res, i, "#lang:" .. (tp:get_language() or 'unk'))
       lua_util.debugm("bayes", task, "added language: %s",
           res[i])
       i = i + 1
 
-      rawset(res, i, "#cs:" .. tp:get_charset() or 'unk')
+      rawset(res, i, "#cs:" .. (tp:get_charset() or 'unk'))
       lua_util.debugm("bayes", task, "added charset: %s",
           res[i])
       i = i + 1
index edaae4e79b288c5a23121344da25bfacccd093ed..934c8d941383addf7093645b61008c3df861ff8f 100644 (file)
@@ -337,7 +337,9 @@ bayes_classify (struct rspamd_classifier * ctx,
        }
 
        if (cl.processed_tokens == 0) {
-               msg_info_bayes ("no tokens found in bayes database, ignore stats");
+               msg_info_bayes ("no tokens found in bayes database "
+                                 "(%ud total tokens, %ud text tokens), ignore stats",
+                               tokens->len, text_tokens);
 
                return TRUE;
        }
@@ -345,8 +347,11 @@ bayes_classify (struct rspamd_classifier * ctx,
        if (ctx->cfg->min_tokens > 0 &&
                cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) {
                msg_info_bayes ("ignore bayes probability since we have "
-                                               "too few text tokens: %uL, at least %d is required",
-                                               cl.text_tokens, (gint)(ctx->cfg->min_tokens * 0.1));
+                                               "found too few text tokens: %uL (of %ud checked), "
+                                               "at least %d is required",
+                                               cl.text_tokens,
+                                               text_tokens,
+                                               (gint)(ctx->cfg->min_tokens * 0.1));
 
                return TRUE;
        }
@@ -374,7 +379,8 @@ bayes_classify (struct rspamd_classifier * ctx,
                final_prob = (s + 1.0 - h) / 2.;
                msg_debug_bayes (
                                "<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
-                                               " %L tokens processed of %ud total tokens (%uL text tokens)",
+                               " %L tokens processed of %ud total tokens;"
+                               " %uL text tokens found of %ud text tokens)",
                                task->message_id,
                                cl.ham_prob,
                                h,
@@ -382,7 +388,8 @@ bayes_classify (struct rspamd_classifier * ctx,
                                s,
                                cl.processed_tokens,
                                tokens->len,
-                               cl.text_tokens);
+                               cl.text_tokens,
+                               text_tokens);
        }
        else {
                /*