online_text = true
end
- rawset(res, i, "#lang:" .. tp:get_language() or 'unk')
+ rawset(res, i, "#lang:" .. (tp:get_language() or 'unk'))
lua_util.debugm("bayes", task, "added language: %s",
res[i])
i = i + 1
- rawset(res, i, "#cs:" .. tp:get_charset() or 'unk')
+ rawset(res, i, "#cs:" .. (tp:get_charset() or 'unk'))
lua_util.debugm("bayes", task, "added charset: %s",
res[i])
i = i + 1
}
if (cl.processed_tokens == 0) {
- msg_info_bayes ("no tokens found in bayes database, ignore stats");
+ msg_info_bayes ("no tokens found in bayes database "
+ "(%ud total tokens, %ud text tokens), ignore stats",
+ tokens->len, text_tokens);
return TRUE;
}
if (ctx->cfg->min_tokens > 0 &&
cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) {
msg_info_bayes ("ignore bayes probability since we have "
- "too few text tokens: %uL, at least %d is required",
- cl.text_tokens, (gint)(ctx->cfg->min_tokens * 0.1));
+ "found too few text tokens: %uL (of %ud checked), "
+ "at least %d is required",
+ cl.text_tokens,
+ text_tokens,
+ (gint)(ctx->cfg->min_tokens * 0.1));
return TRUE;
}
final_prob = (s + 1.0 - h) / 2.;
msg_debug_bayes (
"<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
- " %L tokens processed of %ud total tokens (%uL text tokens)",
+ " %L tokens processed of %ud total tokens;"
+ " %uL text tokens found of %ud text tokens)",
task->message_id,
cl.ham_prob,
h,
s,
cl.processed_tokens,
tokens->len,
- cl.text_tokens);
+ cl.text_tokens,
+ text_tokens);
}
else {
/*