#include "config.h"
#include "mem_pool.h"
+#include <event.h>
#define RSPAMD_DEFAULT_CLASSIFIER "bayes"
/* Consider this value as 0 */
struct rspamd_classifier_config;
struct rspamd_task;
+struct rspamd_config;
struct rspamd_classifier;
struct token_node_s;
struct rspamd_stat_classifier {
char *name;
- gboolean (*init_func)(rspamd_mempool_t *pool,
- struct rspamd_classifier *cl);
+ gboolean (*init_func)(struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *cl);
gboolean (*classify_func)(struct rspamd_classifier * ctx,
- GPtrArray *tokens,
- struct rspamd_task *task);
+ GPtrArray *tokens,
+ struct rspamd_task *task);
gboolean (*learn_spam_func)(struct rspamd_classifier * ctx,
- GPtrArray *input,
- struct rspamd_task *task,
- gboolean is_spam,
- gboolean unlearn,
- GError **err);
+ GPtrArray *input,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ gboolean unlearn,
+ GError **err);
+ void (*fin_func)(struct rspamd_classifier *cl);
};
/* Bayes algorithm */
-gboolean bayes_init (rspamd_mempool_t *pool,
- struct rspamd_classifier *);
+gboolean bayes_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *);
gboolean bayes_classify (struct rspamd_classifier *ctx,
GPtrArray *tokens,
struct rspamd_task *task);
gboolean is_spam,
gboolean unlearn,
GError **err);
+void bayes_fin (struct rspamd_classifier *);
/* Generic lua classifier */
-gboolean lua_classifier_init (rspamd_mempool_t *pool,
- struct rspamd_classifier *);
+gboolean lua_classifier_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *);
gboolean lua_classifier_classify (struct rspamd_classifier *ctx,
GPtrArray *tokens,
struct rspamd_task *task);
gboolean unlearn,
GError **err);
+extern guint rspamd_bayes_log_id;
+#define msg_debug_bayes(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
+ rspamd_bayes_log_id, "bayes", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
#endif
/*
INIT_LOG_MODULE(luacl)
gboolean
-lua_classifier_init (rspamd_mempool_t *pool,
- struct rspamd_classifier *cl)
+lua_classifier_init (struct rspamd_config *cfg,
+ struct event_base *ev_base,
+ struct rspamd_classifier *cl)
{
struct rspamd_lua_classifier_ctx *ctx;
lua_State *L = cl->ctx->cfg->lua_state;
ctx = g_hash_table_lookup (lua_classifiers, cl->subrs->name);
if (ctx != NULL) {
- msg_err_pool ("duplicate lua classifier definition: %s",
+ msg_err_config ("duplicate lua classifier definition: %s",
cl->subrs->name);
return FALSE;
lua_getglobal (L, "rspamd_classifiers");
if (lua_type (L, -1) != LUA_TTABLE) {
- msg_err_pool ("cannot register classifier %s: no rspamd_classifier global",
+ msg_err_config ("cannot register classifier %s: no rspamd_classifier global",
cl->subrs->name);
lua_pop (L, 1);
lua_gettable (L, -2);
if (lua_type (L, -1) != LUA_TTABLE) {
- msg_err_pool ("cannot register classifier %s: bad lua type: %s",
+ msg_err_config ("cannot register classifier %s: bad lua type: %s",
cl->subrs->name, lua_typename (L, lua_type (L, -1)));
lua_pop (L, 2);
lua_gettable (L, -2);
if (lua_type (L, -1) != LUA_TFUNCTION) {
- msg_err_pool ("cannot register classifier %s: bad lua type for classify: %s",
+ msg_err_config ("cannot register classifier %s: bad lua type for classify: %s",
cl->subrs->name, lua_typename (L, lua_type (L, -1)));
lua_pop (L, 3);
lua_gettable (L, -2);
if (lua_type (L, -1) != LUA_TFUNCTION) {
- msg_err_pool ("cannot register classifier %s: bad lua type for learn: %s",
+ msg_err_config ("cannot register classifier %s: bad lua type for learn: %s",
cl->subrs->name, lua_typename (L, lua_type (L, -1)));
lua_pop (L, 3);
}
}
- msg_debug_task ("added stat tokens for header '%s'", name);
+ msg_debug_bayes ("added stat tokens for header '%s'", name);
}
}
g_array_append_val (ar, elt);
}
- msg_debug_task ("added stat tokens for image '%s'", img->html_image->src);
+ msg_debug_bayes ("added stat tokens for image '%s'", img->html_image->src);
}
}
else if (part->cd && part->cd->filename.len > 0) {
elt.len = part->ct->boundary.len;
if (elt.len) {
- msg_debug_task ("added stat tokens for mime boundary '%*s'",
+ msg_debug_bayes ("added stat tokens for mime boundary '%*s'",
(gint)elt.len, elt.begin);
g_array_append_val (ar, elt);
}
if (tp->language != NULL && tp->language[0] != '\0') {
elt.begin = (gchar *)tp->language;
elt.len = strlen (elt.begin);
- msg_debug_task ("added stat tokens for part language '%s'", elt.begin);
+ msg_debug_bayes ("added stat tokens for part language '%s'", elt.begin);
g_array_append_val (ar, elt);
}
if (tp->real_charset != NULL) {
elt.begin = (gchar *)tp->real_charset;
elt.len = strlen (elt.begin);
- msg_debug_task ("added stat tokens for part charset '%s'", elt.begin);
+ msg_debug_bayes ("added stat tokens for part charset '%s'", elt.begin);
g_array_append_val (ar, elt);
}
}
g_array_append_val (ar, elt);
}
- /* Use more precise headers order */
-#if 0
- cur = g_list_first (task->headers_order->head);
- while (cur) {
- hdr = cur->data;
-
- if (hdr->name && hdr->type != RSPAMD_HEADER_RECEIVED) {
- elt.begin = hdr->name;
- elt.len = strlen (hdr->name);
- g_array_append_val (ar, elt);
- }
-
- cur = g_list_next (cur);
- }
-#endif
-
- /* Use metatokens plugin from Lua */
- lua_getglobal (L, "rspamd_plugins");
-
- if (lua_type (L, -1) == LUA_TTABLE) {
- lua_pushstring (L, "stat_metatokens");
- lua_gettable (L, -2);
-
- if (lua_type (L, -1) == LUA_TTABLE) {
- gint old_top;
-
- old_top = lua_gettop (L);
- lua_pushstring (L, "callback");
- lua_gettable (L, -2);
-
- if (lua_type (L, -1) == LUA_TFUNCTION) {
- struct rspamd_task **ptask;
-
- ptask = lua_newuserdata (L, sizeof (*ptask));
- rspamd_lua_setclass (L, "rspamd{task}", -1);
- *ptask = task;
-
- if (lua_pcall (L, 1, LUA_MULTRET, 0) != 0) {
- msg_err_task ("stat_metatokens failed: %s",
- lua_tostring (L, -1));
- lua_pop (L, 1);
- } else {
- if (lua_gettop (L) > old_top &&
- lua_istable (L, old_top + 1)) {
- lua_pushvalue (L, old_top + 1);
- /* Iterate over table of tables */
- for (lua_pushnil (L); lua_next (L, -2);
- lua_pop (L, 1)) {
- elt.flags = RSPAMD_STAT_TOKEN_FLAG_META|
- RSPAMD_STAT_TOKEN_FLAG_LUA_META;
-
- if (lua_isnumber (L, -1)) {
- gdouble num = lua_tonumber (L, -1);
- guint8 *pnum = rspamd_mempool_alloc (
- task->task_pool,
- sizeof (num));
-
- msg_debug_task ("got metatoken number: %.2f",
- num);
- memcpy (pnum, &num, sizeof (num));
- elt.begin = (gchar *) pnum;
- elt.len = sizeof (num);
- g_array_append_val (ar, elt);
- } else if (lua_isstring (L, -1)) {
- const gchar *str;
- gsize tlen;
-
- str = lua_tolstring (L, -1, &tlen);
- guint8 *pstr = rspamd_mempool_alloc (
- task->task_pool,
- tlen);
- memcpy (pstr, str, tlen);
-
- msg_debug_task ("got metatoken string: %*s",
- (gint) tlen, str);
- elt.begin = (gchar *) pstr;
- elt.len = tlen;
- g_array_append_val (ar, elt);
- }
- else if (lua_istable (L, -1)) {
- /* Treat that as unigramms */
- for (lua_pushnil (L); lua_next (L, -2);
- lua_pop (L, 1)) {
- if (lua_isstring (L, -1)) {
- const gchar *str;
- gsize tlen;
-
- str = lua_tolstring (L, -1, &tlen);
- guint8 *pstr = rspamd_mempool_alloc (
- task->task_pool,
- tlen);
- memcpy (pstr, str, tlen);
-
- msg_debug_task ("got unigramm "
- "metatoken string: %*s",
- (gint) tlen, str);
- elt.begin = (gchar *) pstr;
- elt.len = tlen;
- elt.flags |= RSPAMD_STAT_TOKEN_FLAG_UNIGRAM;
- g_array_append_val (ar, elt);
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- lua_settop (L, 0);
- st_ctx->tokenizer->tokenize_func (st_ctx,
- task,
- ar,
- TRUE,
- "META:",
- task->tokens);
-
rspamd_mempool_add_destructor (task->task_pool,
rspamd_array_free_hard, ar);
}
if (pdiff != NULL && (1.0 - *pdiff) * 100.0 > similarity_treshold) {
- msg_debug_task ("message has two common parts (%.2f), so skip the last one",
+ msg_debug_bayes ("message has two common parts (%.2f), so skip the last one",
*pdiff);
break;
}
if (!rspamd_symcache_is_symbol_enabled (task, task->cfg->cache,
st->stcf->symbol)) {
g_ptr_array_index (task->stat_runtimes, i) = NULL;
- msg_debug_task ("symbol %s is disabled, skip classification",
+ msg_debug_bayes ("symbol %s is disabled, skip classification",
st->stcf->symbol);
continue;
}
if (bk_run == NULL) {
skip = TRUE;
- msg_debug_task ("disable classifier %s as statfile symbol %s is disabled",
+ msg_debug_bayes ("disable classifier %s as statfile symbol %s is disabled",
cl->cfg->name, st->stcf->symbol);
break;
}
if (!skip) {
if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) {
- msg_debug_task (
+ msg_debug_bayes (
"<%s> contains less tokens than required for %s classifier: "
"%ud < %ud",
task->message_id,
continue;
}
else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) {
- msg_debug_task (
+ msg_debug_bayes (
"<%s> contains more tokens than allowed for %s classifier: "
"%ud > %ud",
task->message_id,
if (rspamd_task_find_symbol_result (task, st->stcf->symbol)) {
if (is_spam == !!st->stcf->is_spam) {
- msg_debug_task ("do not autolearn %s as symbol %s is already "
+ msg_debug_bayes ("do not autolearn %s as symbol %s is already "
"added", is_spam ? "spam" : "ham", st->stcf->symbol);
return TRUE;