]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Reorganize structure of filter components.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 2 Jun 2015 11:37:22 +0000 (12:37 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 2 Jun 2015 14:39:37 +0000 (15:39 +0100)
src/libmime/filter.c
src/libmime/filter.h
src/libserver/CMakeLists.txt
src/libserver/cfg_rcl.c
src/libserver/composites.c [new file with mode: 0644]
src/libserver/composites.h [new file with mode: 0644]
src/libserver/task.c
src/libserver/task.h
src/lua/lua_cfg_file.c
src/lua/lua_config.c

index ddab0e81b63f9390441c77a52774e9cc095bfae3..a9089818cc0a70a6ba22715eb9552a400f88b364 100644 (file)
 
 #define COMMON_PART_FACTOR 95
 
-static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len,
-               rspamd_mempool_t *pool, gpointer ud, GError **err);
-static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom);
-static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom);
-static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom);
-
-const struct rspamd_atom_subr composite_expr_subr = {
-       .parse = rspamd_composite_expr_parse,
-       .process = rspamd_composite_expr_process,
-       .priority = rspamd_composite_expr_priority,
-       .destroy = rspamd_composite_expr_destroy
-};
-
-static inline GQuark
-filter_error_quark (void)
-{
-       return g_quark_from_static_string ("g-filter-error-quark");
-}
-
 struct metric_result *
 rspamd_create_metric_result (struct rspamd_task *task, const gchar *name)
 {
@@ -343,392 +324,6 @@ check_metric_settings (struct rspamd_task *task, struct metric *metric,
        return FALSE;
 }
 
-/* Return true if metric has score that is more than spam score for it */
-static gboolean
-check_metric_is_spam (struct rspamd_task *task, struct metric *metric)
-{
-       struct metric_result *res;
-       double ms;
-
-       /* Avoid concurrency while checking results */
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
-       g_static_mutex_lock (&result_mtx);
-#else
-       G_LOCK (result_mtx);
-#endif
-       res = g_hash_table_lookup (task->results, metric->name);
-       if (res) {
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
-               g_static_mutex_unlock (&result_mtx);
-#else
-               G_UNLOCK (result_mtx);
-#endif
-               if (!check_metric_settings (task, metric, &ms)) {
-                       ms = metric->actions[METRIC_ACTION_REJECT].score;
-               }
-               return (ms > 0 && res->score >= ms);
-       }
-
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
-       g_static_mutex_unlock (&result_mtx);
-#else
-       G_UNLOCK (result_mtx);
-#endif
-
-       return FALSE;
-}
-
-gint
-rspamd_process_filters (struct rspamd_task *task)
-{
-       GList *cur;
-       struct metric *metric;
-       gpointer item = NULL;
-
-       /* Insert default metric to be sure that it exists all the time */
-       rspamd_create_metric_result (task, DEFAULT_METRIC);
-       if (task->settings) {
-               const ucl_object_t *wl;
-
-               wl = ucl_object_find_key (task->settings, "whitelist");
-               if (wl != NULL) {
-                       msg_info ("<%s> is whitelisted", task->message_id);
-                       task->flags |= RSPAMD_TASK_FLAG_SKIP;
-                       return 0;
-               }
-       }
-
-       /* Process metrics symbols */
-       while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) {
-               /* Check reject actions */
-               cur = task->cfg->metrics_list;
-               while (cur) {
-                       metric = cur->data;
-                       if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) &&
-                               metric->actions[METRIC_ACTION_REJECT].score > 0 &&
-                               check_metric_is_spam (task, metric)) {
-                               msg_info ("<%s> has already scored more than %.2f, so do not "
-                                               "plan any more checks", task->message_id,
-                                               metric->actions[METRIC_ACTION_REJECT].score);
-                               return 1;
-                       }
-                       cur = g_list_next (cur);
-               }
-       }
-
-       if (rspamd_session_events_pending (task->s) != 0) {
-               task->state = WAIT_FILTER;
-       }
-
-       return 1;
-}
-
-
-struct composites_data {
-       struct rspamd_task *task;
-       struct rspamd_composite *composite;
-       struct metric_result *metric_res;
-       GTree *symbols_to_remove;
-       guint8 *checked;
-};
-
-struct symbol_remove_data {
-       struct symbol *ms;
-       gboolean remove_weight;
-       gboolean remove_symbol;
-       GList *comp;
-};
-
-
-/*
- * Composites are just sequences of symbols
- */
-static rspamd_expression_atom_t *
-rspamd_composite_expr_parse (const gchar *line, gsize len,
-               rspamd_mempool_t *pool, gpointer ud, GError **err)
-{
-       gsize clen;
-       rspamd_expression_atom_t *res;
-
-       clen = strcspn (line, ", \t()><+!|&\n");
-       if (clen == 0) {
-               /* Invalid composite atom */
-               g_set_error (err, filter_error_quark (), 100, "Invalid composite: %s",
-                               line);
-               return NULL;
-       }
-
-       res = rspamd_mempool_alloc0 (pool, sizeof (*res));
-       res->len = clen;
-       res->str = line;
-       res->data = rspamd_mempool_alloc (pool, clen + 1);
-       rspamd_strlcpy (res->data, line, clen + 1);
-
-       return res;
-}
-
-static gint
-rspamd_composite_process_single_symbol (struct composites_data *cd,
-               const gchar *sym, struct symbol **pms)
-{
-       struct symbol *ms = NULL;
-       gint rc = 0;
-       struct rspamd_composite *ncomp;
-
-       if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) {
-               if ((ncomp =
-                               g_hash_table_lookup (cd->task->cfg->composite_symbols,
-                                               sym)) != NULL) {
-                       /* Set checked for this symbol to avoid cyclic references */
-                       if (isclr (cd->checked, ncomp->id * 2)) {
-                               setbit (cd->checked, cd->composite->id * 2);
-                               rc = rspamd_process_expression (ncomp->expr,
-                                               RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
-                               clrbit (cd->checked, cd->composite->id * 2);
-
-                               if (rc) {
-                                       setbit (cd->checked, ncomp->id * 2 + 1);
-                               }
-                               setbit (cd->checked, ncomp->id * 2);
-
-                               ms = g_hash_table_lookup (cd->metric_res->symbols, sym);
-                       }
-                       else {
-                               /*
-                                * XXX: in case of cyclic references this would return 0
-                                */
-                               rc = isset (cd->checked, ncomp->id * 2 + 1);
-                       }
-               }
-       }
-       else {
-               rc = 1;
-       }
-
-       *pms = ms;
-       return rc;
-}
-
-static gint
-rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom)
-{
-       struct composites_data *cd = (struct composites_data *)input;
-       const gchar *sym = atom->data;
-       struct symbol_remove_data *rd;
-       struct symbol *ms;
-       struct rspamd_symbols_group *gr;
-       struct rspamd_symbol_def *sdef;
-       gint rc = 0;
-       gchar t = '\0';
-
-       if (isset (cd->checked, cd->composite->id * 2)) {
-               /* We have already checked this composite, so just return its value */
-               rc = isset (cd->checked, cd->composite->id * 2 + 1);
-               return rc;
-       }
-
-       if (*sym == '~' || *sym == '-') {
-               t = *sym ++;
-       }
-
-       if (strncmp (sym, "g:", 2) == 0) {
-               gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2);
-
-               if (gr != NULL) {
-                       LL_FOREACH (gr->symbols, sdef) {
-                               rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms);
-                               if (rc) {
-                                       break;
-                               }
-                       }
-               }
-       }
-       else {
-               rc = rspamd_composite_process_single_symbol (cd, sym, &ms);
-       }
-
-       if (rc && ms) {
-               /*
-                * At this point we know that we need to do something about this symbol,
-                * however, we don't know whether we need to delete it unfortunately,
-                * that depends on the later decisions when the complete expression is
-                * evaluated.
-                */
-               if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) {
-                       rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd));
-                       rd->ms = ms;
-
-                       if (G_UNLIKELY (t == '~')) {
-                               rd->remove_weight = FALSE;
-                               rd->remove_symbol = TRUE;
-                       }
-                       else if (G_UNLIKELY (t == '-')) {
-                               rd->remove_symbol = FALSE;
-                               rd->remove_weight = FALSE;
-                       }
-                       else {
-                               rd->remove_symbol = TRUE;
-                               rd->remove_weight = TRUE;
-                       }
-
-                       rd->comp = g_list_prepend (NULL, cd->composite);
-                       g_tree_insert (cd->symbols_to_remove,
-                                       (gpointer)ms->name,
-                                       rd);
-               }
-               else {
-                       /*
-                        * XXX: what if we have different preferences regarding
-                        * weight and symbol removal in different composites?
-                        */
-                       rd->comp = g_list_prepend (rd->comp, cd->composite);
-               }
-       }
-
-       return rc;
-}
-
-/*
- * We don't have preferences for composites
- */
-static gint
-rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
-{
-       return 0;
-}
-
-static void
-rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
-{
-       /* Composite atoms are destroyed just with the pool */
-}
-
-static gint
-remove_compare_data (gconstpointer a, gconstpointer b)
-{
-       const gchar *ca = a, *cb = b;
-
-       return strcmp (ca, cb);
-}
-
-static void
-composites_foreach_callback (gpointer key, gpointer value, void *data)
-{
-       struct composites_data *cd = data;
-       struct rspamd_composite *comp = value;
-       gint rc;
-
-       cd->composite = comp;
-
-       rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
-
-       /* Checked bit */
-       setbit (cd->checked, comp->id * 2);
-
-       /* Result bit */
-       if (rc) {
-               setbit (cd->checked, comp->id * 2 + 1);
-               rspamd_task_insert_result_single (cd->task, key, 1.0, NULL);
-       }
-       else {
-               clrbit (cd->checked, comp->id * 2 + 1);
-       }
-}
-
-
-static gboolean
-composites_remove_symbols (gpointer key, gpointer value, gpointer data)
-{
-       struct composites_data *cd = data;
-       struct symbol_remove_data *rd = value;
-       GList *cur;
-       struct rspamd_composite *comp;
-       gboolean matched = FALSE;
-
-       cur = rd->comp;
-
-       /*
-        * XXX: actually, this is a weak assumption as we are unaware here about
-        * negate operation and so on. We need to parse AST directly and remove
-        * only those symbols that could be removed.
-        */
-       while (cur) {
-               comp = cur->data;
-
-               if (isset (cd->checked, comp->id * 2 + 1)) {
-                       matched = TRUE;
-                       break;
-               }
-
-               cur = g_list_next (cur);
-       }
-
-       g_list_free (rd->comp);
-
-       if (matched) {
-               if (rd->remove_symbol) {
-                       g_hash_table_remove (cd->metric_res->symbols, key);
-               }
-               if (rd->remove_weight) {
-                       cd->metric_res->score -= rd->ms->score;
-               }
-       }
-
-       return FALSE;
-}
-
-static void
-composites_metric_callback (gpointer key, gpointer value, gpointer data)
-{
-       struct rspamd_task *task = (struct rspamd_task *)data;
-       struct composites_data *cd =
-               rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
-       struct metric_result *metric_res = (struct metric_result *)value;
-
-       cd->task = task;
-       cd->metric_res = (struct metric_result *)metric_res;
-       cd->symbols_to_remove = g_tree_new (remove_compare_data);
-       cd->checked =
-               rspamd_mempool_alloc0 (task->task_pool,
-                       NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
-
-       /* Process hash table */
-       g_hash_table_foreach (task->cfg->composite_symbols,
-               composites_foreach_callback,
-               cd);
-
-       /* Remove symbols that are in composites */
-       g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
-       /* Free list */
-       g_tree_destroy (cd->symbols_to_remove);
-}
-
-void
-rspamd_make_composites (struct rspamd_task *task)
-{
-       g_hash_table_foreach (task->results, composites_metric_callback, task);
-}
-
-struct classifiers_cbdata {
-       struct rspamd_task *task;
-       struct lua_locked_state *nL;
-};
-
-
-void
-rspamd_process_statistics (struct rspamd_task *task)
-{
-       if (RSPAMD_TASK_IS_SKIPPED (task)) {
-               return;
-       }
-
-       /* TODO: handle err here */
-       rspamd_stat_classify (task, task->cfg->lua_state, NULL);
-
-       /* Process results */
-       rspamd_make_composites (task);
-}
-
 static void
 insert_metric_header (gpointer metric_name, gpointer metric_value,
        gpointer data)
@@ -910,16 +505,3 @@ rspamd_check_action_metric (struct rspamd_task *task,
 
        return METRIC_ACTION_NOACTION;
 }
-
-gboolean
-rspamd_learn_task_spam (struct rspamd_classifier_config *cl,
-       struct rspamd_task *task,
-       gboolean is_spam,
-       GError **err)
-{
-       return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err);
-}
-
-/*
- * vi:ts=4
- */
index 67dc600104bad406930fcb35118d297f10dd5f8d..d0a3d17ec3f9eb4c7e0826f0a79433fd057bfceb 100644 (file)
@@ -73,19 +73,6 @@ struct metric_result {
        double grow_factor;                             /**< current grow factor                                        */
 };
 
-
-/**
- * Subr for composite expressions
- */
-extern const struct rspamd_atom_subr composite_expr_subr;
-/**
- * Composite structure
- */
-struct rspamd_composite {
-       struct rspamd_expression *expr;
-       gint id;
-};
-
 /**
  * Create or return existing result for the specified metric name
  * @param task task object
@@ -95,19 +82,6 @@ struct rspamd_composite {
 struct metric_result * rspamd_create_metric_result (struct rspamd_task *task,
                const gchar *name);
 
-/**
- * Process all filters
- * @param task worker's task that present message from user
- * @return 0 - if there is non-finished tasks and 1 if processing is completed
- */
-gint rspamd_process_filters (struct rspamd_task *task);
-
-/**
- * Process message with statfiles
- * @param task worker's task that present message from user
- */
-void rspamd_process_statistics (struct rspamd_task *task);
-
 /**
  * Insert a result to task
  * @param task worker's task that present message from user
@@ -134,12 +108,6 @@ void rspamd_task_insert_result_single (struct rspamd_task *task,
        double flag,
        GList *opts);
 
-/**
- * Process all results and form composite metrics from existent metrics as it is defined in config
- * @param task worker's task that present message from user
- */
-void rspamd_make_composites (struct rspamd_task *task);
-
 /**
  * Default consolidation function for metric, it get all symbols and multiply symbol
  * weight by some factor that is specified in config. Default factor is 1.
@@ -152,18 +120,6 @@ double rspamd_factor_consolidation_func (struct rspamd_task *task,
        const gchar *unused);
 
 
-/**
- * Learn specified statfile with message in a task
- * @param statfile symbol of statfile
- * @param task worker's task object
- * @param err pointer to GError
- * @return true if learn succeed
- */
-gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl,
-       struct rspamd_task *task,
-       gboolean is_spam,
-       GError **err);
-
 /*
  * Get action from a string
  */
index 4edbe054d4d24fb0d10b5f24c42cdd19661bbfef..1f8df6c13cff64bd2a52d7604dcc54147d997f43 100644 (file)
@@ -3,6 +3,7 @@ SET(LIBRSPAMDSERVERSRC
                                ${CMAKE_CURRENT_SOURCE_DIR}/buffer.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.c
+                               ${CMAKE_CURRENT_SOURCE_DIR}/composites.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/dkim.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/dns.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c
index 314be10d0b0e971f374d43b4f5dbc168449fb60f..fc8ada74966cbbd4df1156a644cb3f2148b5b683 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013, Vsevolod Stakhov
+/* Copyright (c) 2013-2015, Vsevolod Stakhov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,7 +28,7 @@
 #include "cfg_file.h"
 #include "lua/lua_common.h"
 #include "expression.h"
-
+#include "composites.h"
 
 struct rspamd_rcl_default_handler_data {
        struct rspamd_rcl_struct_parser pd;
diff --git a/src/libserver/composites.c b/src/libserver/composites.c
new file mode 100644 (file)
index 0000000..b6d2fb8
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "logger.h"
+#include "expression.h"
+#include "task.h"
+#include "utlist.h"
+#include "filter.h"
+#include "composites.h"
+
+struct composites_data {
+       struct rspamd_task *task;
+       struct rspamd_composite *composite;
+       struct metric_result *metric_res;
+       GTree *symbols_to_remove;
+       guint8 *checked;
+};
+
+struct symbol_remove_data {
+       struct symbol *ms;
+       gboolean remove_weight;
+       gboolean remove_symbol;
+       GList *comp;
+};
+
+static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len,
+               rspamd_mempool_t *pool, gpointer ud, GError **err);
+static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom);
+static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom);
+static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom);
+
+const struct rspamd_atom_subr composite_expr_subr = {
+       .parse = rspamd_composite_expr_parse,
+       .process = rspamd_composite_expr_process,
+       .priority = rspamd_composite_expr_priority,
+       .destroy = rspamd_composite_expr_destroy
+};
+
+static GQuark
+rspamd_composites_quark (void)
+{
+       return g_quark_from_static_string ("composites");
+}
+
+static rspamd_expression_atom_t *
+rspamd_composite_expr_parse (const gchar *line, gsize len,
+               rspamd_mempool_t *pool, gpointer ud, GError **err)
+{
+       gsize clen;
+       rspamd_expression_atom_t *res;
+
+       /*
+        * Composites are just sequences of symbols
+        */
+       clen = strcspn (line, ", \t()><+!|&\n");
+       if (clen == 0) {
+               /* Invalid composite atom */
+               g_set_error (err, rspamd_composites_quark (), 100, "Invalid composite: %s",
+                               line);
+               return NULL;
+       }
+
+       res = rspamd_mempool_alloc0 (pool, sizeof (*res));
+       res->len = clen;
+       res->str = line;
+       res->data = rspamd_mempool_alloc (pool, clen + 1);
+       rspamd_strlcpy (res->data, line, clen + 1);
+
+       return res;
+}
+
+static gint
+rspamd_composite_process_single_symbol (struct composites_data *cd,
+               const gchar *sym, struct symbol **pms)
+{
+       struct symbol *ms = NULL;
+       gint rc = 0;
+       struct rspamd_composite *ncomp;
+
+       if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) {
+               if ((ncomp =
+                               g_hash_table_lookup (cd->task->cfg->composite_symbols,
+                                               sym)) != NULL) {
+                       /* Set checked for this symbol to avoid cyclic references */
+                       if (isclr (cd->checked, ncomp->id * 2)) {
+                               setbit (cd->checked, cd->composite->id * 2);
+                               rc = rspamd_process_expression (ncomp->expr,
+                                               RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
+                               clrbit (cd->checked, cd->composite->id * 2);
+
+                               if (rc) {
+                                       setbit (cd->checked, ncomp->id * 2 + 1);
+                               }
+                               setbit (cd->checked, ncomp->id * 2);
+
+                               ms = g_hash_table_lookup (cd->metric_res->symbols, sym);
+                       }
+                       else {
+                               /*
+                                * XXX: in case of cyclic references this would return 0
+                                */
+                               rc = isset (cd->checked, ncomp->id * 2 + 1);
+                       }
+               }
+       }
+       else {
+               rc = 1;
+       }
+
+       *pms = ms;
+       return rc;
+}
+
+static gint
+rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom)
+{
+       struct composites_data *cd = (struct composites_data *)input;
+       const gchar *sym = atom->data;
+       struct symbol_remove_data *rd;
+       struct symbol *ms;
+       struct rspamd_symbols_group *gr;
+       struct rspamd_symbol_def *sdef;
+       gint rc = 0;
+       gchar t = '\0';
+
+       if (isset (cd->checked, cd->composite->id * 2)) {
+               /* We have already checked this composite, so just return its value */
+               rc = isset (cd->checked, cd->composite->id * 2 + 1);
+               return rc;
+       }
+
+       if (*sym == '~' || *sym == '-') {
+               t = *sym ++;
+       }
+
+       if (strncmp (sym, "g:", 2) == 0) {
+               gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2);
+
+               if (gr != NULL) {
+                       LL_FOREACH (gr->symbols, sdef) {
+                               rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms);
+                               if (rc) {
+                                       break;
+                               }
+                       }
+               }
+       }
+       else {
+               rc = rspamd_composite_process_single_symbol (cd, sym, &ms);
+       }
+
+       if (rc && ms) {
+               /*
+                * At this point we know that we need to do something about this symbol,
+                * however, we don't know whether we need to delete it unfortunately,
+                * that depends on the later decisions when the complete expression is
+                * evaluated.
+                */
+               if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) {
+                       rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd));
+                       rd->ms = ms;
+
+                       if (G_UNLIKELY (t == '~')) {
+                               rd->remove_weight = FALSE;
+                               rd->remove_symbol = TRUE;
+                       }
+                       else if (G_UNLIKELY (t == '-')) {
+                               rd->remove_symbol = FALSE;
+                               rd->remove_weight = FALSE;
+                       }
+                       else {
+                               rd->remove_symbol = TRUE;
+                               rd->remove_weight = TRUE;
+                       }
+
+                       rd->comp = g_list_prepend (NULL, cd->composite);
+                       g_tree_insert (cd->symbols_to_remove,
+                                       (gpointer)ms->name,
+                                       rd);
+               }
+               else {
+                       /*
+                        * XXX: what if we have different preferences regarding
+                        * weight and symbol removal in different composites?
+                        */
+                       rd->comp = g_list_prepend (rd->comp, cd->composite);
+               }
+       }
+
+       return rc;
+}
+
+/*
+ * We don't have preferences for composites
+ */
+static gint
+rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
+{
+       return 0;
+}
+
+static void
+rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
+{
+       /* Composite atoms are destroyed just with the pool */
+}
+
+static gint
+remove_compare_data (gconstpointer a, gconstpointer b)
+{
+       const gchar *ca = a, *cb = b;
+
+       return strcmp (ca, cb);
+}
+
+static void
+composites_foreach_callback (gpointer key, gpointer value, void *data)
+{
+       struct composites_data *cd = data;
+       struct rspamd_composite *comp = value;
+       gint rc;
+
+       cd->composite = comp;
+
+       rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
+
+       /* Checked bit */
+       setbit (cd->checked, comp->id * 2);
+
+       /* Result bit */
+       if (rc) {
+               setbit (cd->checked, comp->id * 2 + 1);
+               rspamd_task_insert_result_single (cd->task, key, 1.0, NULL);
+       }
+       else {
+               clrbit (cd->checked, comp->id * 2 + 1);
+       }
+}
+
+
+static gboolean
+composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+{
+       struct composites_data *cd = data;
+       struct symbol_remove_data *rd = value;
+       GList *cur;
+       struct rspamd_composite *comp;
+       gboolean matched = FALSE;
+
+       cur = rd->comp;
+
+       /*
+        * XXX: actually, this is a weak assumption as we are unaware here about
+        * negate operation and so on. We need to parse AST directly and remove
+        * only those symbols that could be removed.
+        */
+       while (cur) {
+               comp = cur->data;
+
+               if (isset (cd->checked, comp->id * 2 + 1)) {
+                       matched = TRUE;
+                       break;
+               }
+
+               cur = g_list_next (cur);
+       }
+
+       g_list_free (rd->comp);
+
+       if (matched) {
+               if (rd->remove_symbol) {
+                       g_hash_table_remove (cd->metric_res->symbols, key);
+               }
+               if (rd->remove_weight) {
+                       cd->metric_res->score -= rd->ms->score;
+               }
+       }
+
+       return FALSE;
+}
+
+static void
+composites_metric_callback (gpointer key, gpointer value, gpointer data)
+{
+       struct rspamd_task *task = (struct rspamd_task *)data;
+       struct composites_data *cd =
+               rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
+       struct metric_result *metric_res = (struct metric_result *)value;
+
+       cd->task = task;
+       cd->metric_res = (struct metric_result *)metric_res;
+       cd->symbols_to_remove = g_tree_new (remove_compare_data);
+       cd->checked =
+               rspamd_mempool_alloc0 (task->task_pool,
+                       NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
+
+       /* Process hash table */
+       g_hash_table_foreach (task->cfg->composite_symbols,
+               composites_foreach_callback,
+               cd);
+
+       /* Remove symbols that are in composites */
+       g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
+       /* Free list */
+       g_tree_destroy (cd->symbols_to_remove);
+}
+
+void
+rspamd_make_composites (struct rspamd_task *task)
+{
+       g_hash_table_foreach (task->results, composites_metric_callback, task);
+}
diff --git a/src/libserver/composites.h b/src/libserver/composites.h
new file mode 100644 (file)
index 0000000..fdcfe82
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SRC_LIBSERVER_COMPOSITES_H_
+#define SRC_LIBSERVER_COMPOSITES_H_
+
+#include "config.h"
+
+struct rspamd_task;
+
+/**
+ * Subr for composite expressions
+ */
+extern const struct rspamd_atom_subr composite_expr_subr;
+/**
+ * Composite structure
+ */
+struct rspamd_composite {
+       struct rspamd_expression *expr;
+       gint id;
+};
+
+/**
+ * Process all results and form composite metrics from existent metrics as it is defined in config
+ * @param task worker's task that present message from user
+ */
+void rspamd_make_composites (struct rspamd_task *task);
+
+#endif /* SRC_LIBSERVER_COMPOSITES_H_ */
index ba4f0c1bd7526bbe2b97d7049a4c7d14c09052b4..70da55afe2e37e079c02d7b4a790012f8c77a665 100644 (file)
@@ -492,3 +492,92 @@ rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re)
 
        return ret;
 }
+
+gboolean
+rspamd_learn_task_spam (struct rspamd_classifier_config *cl,
+       struct rspamd_task *task,
+       gboolean is_spam,
+       GError **err)
+{
+       return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err);
+}
+
+/* Return true if metric has score that is more than spam score for it */
+static gboolean
+check_metric_is_spam (struct rspamd_task *task, struct metric *metric)
+{
+       struct metric_result *res;
+       double ms;
+
+       /* Avoid concurrency while checking results */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+       g_static_mutex_lock (&result_mtx);
+#else
+       G_LOCK (result_mtx);
+#endif
+       res = g_hash_table_lookup (task->results, metric->name);
+       if (res) {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+               g_static_mutex_unlock (&result_mtx);
+#else
+               G_UNLOCK (result_mtx);
+#endif
+               if (!check_metric_settings (task, metric, &ms)) {
+                       ms = metric->actions[METRIC_ACTION_REJECT].score;
+               }
+               return (ms > 0 && res->score >= ms);
+       }
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+       g_static_mutex_unlock (&result_mtx);
+#else
+       G_UNLOCK (result_mtx);
+#endif
+
+       return FALSE;
+}
+
+gint
+rspamd_process_filters (struct rspamd_task *task)
+{
+       GList *cur;
+       struct metric *metric;
+       gpointer item = NULL;
+
+       /* Insert default metric to be sure that it exists all the time */
+       rspamd_create_metric_result (task, DEFAULT_METRIC);
+       if (task->settings) {
+               const ucl_object_t *wl;
+
+               wl = ucl_object_find_key (task->settings, "whitelist");
+               if (wl != NULL) {
+                       msg_info ("<%s> is whitelisted", task->message_id);
+                       task->flags |= RSPAMD_TASK_FLAG_SKIP;
+                       return 0;
+               }
+       }
+
+       /* Process metrics symbols */
+       while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) {
+               /* Check reject actions */
+               cur = task->cfg->metrics_list;
+               while (cur) {
+                       metric = cur->data;
+                       if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) &&
+                               metric->actions[METRIC_ACTION_REJECT].score > 0 &&
+                               check_metric_is_spam (task, metric)) {
+                               msg_info ("<%s> has already scored more than %.2f, so do not "
+                                               "plan any more checks", task->message_id,
+                                               metric->actions[METRIC_ACTION_REJECT].score);
+                               return 1;
+                       }
+                       cur = g_list_next (cur);
+               }
+       }
+
+       if (rspamd_session_events_pending (task->s) != 0) {
+               task->state = WAIT_FILTER;
+       }
+
+       return 1;
+}
index 5ce24176cd7c9240e3631946e6749119e4d4010c..45e720eb8f5cb1129e9236fd8a573578b766922a 100644 (file)
@@ -239,4 +239,16 @@ guint rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re,
  */
 guint rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re);
 
+/**
+ * Learn specified statfile with message in a task
+ * @param statfile symbol of statfile
+ * @param task worker's task object
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl,
+       struct rspamd_task *task,
+       gboolean is_spam,
+       GError **err);
+
 #endif /* TASK_H_ */
index 047a5119bfb30f664ed4d910ef96d8526250224e..6e49d9512e2baaf8bd63fcb495bf8a5038e43279 100644 (file)
@@ -26,6 +26,7 @@
 #include "symbols_cache.h"
 #include "expression.h"
 #include "filter.h"
+#include "composites.h"
 #ifdef HAVE_SYS_UTSNAME_H
 #include <sys/utsname.h>
 #endif
index 410595d08d199a898341af22a5e2a847c0dc09bb..8262f51eaf8dc168243702bed3a642fcd9032d54 100644 (file)
@@ -28,6 +28,7 @@
 #include "message.h"
 #include "radix.h"
 #include "expression.h"
+#include "composites.h"
 #include "utlist.h"
 
 /***