]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Start refactoring of statistics in rspamd.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 17 Jan 2015 21:53:49 +0000 (21:53 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 18 Jan 2015 23:05:15 +0000 (23:05 +0000)
15 files changed:
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libserver/cfg_utils.c
src/libstat/CMakeLists.txt
src/libstat/backends.h [new file with mode: 0644]
src/libstat/backends/backends.c [new file with mode: 0644]
src/libstat/backends/mmaped_file.c [moved from src/libserver/statfile.c with 100% similarity]
src/libstat/backends/mmaped_file.h [moved from src/libserver/statfile.h with 100% similarity]
src/libstat/classifiers.h
src/libstat/classifiers/classifiers.c
src/libstat/stat_config.c [new file with mode: 0644]
src/libstat/tokenizers.h
src/libstat/tokenizers/tokenizers.c
src/lua/lua_classifier.c
src/lua/lua_common.c

index 5844a945f95c78da067be0cd6fec5735ba55e6fb..1beb51055ba21a5d6f561bbdd87ad11ecb3285c0 100644 (file)
@@ -125,43 +125,6 @@ struct rspamd_symbols_group {
        GList *symbols;
 };
 
-/**
- * Statfile section definition
- */
-struct rspamd_statfile_section {
-       guint32 code;                                   /**< section's code                                                                             */
-       guint64 size;                                   /**< size of section                                                                    */
-       double weight;                                  /**< weight coefficient for section                                             */
-};
-
-/**
- * Statfile autolearn parameters
- */
-struct statfile_autolearn_params {
-       const gchar *metric;                            /**< metric name for autolearn triggering               */
-       double threshold_min;                           /**< threshold mark                                                                             */
-       double threshold_max;                           /**< threshold mark                                                                             */
-       GList *symbols;                                 /**< list of symbols                                                                    */
-};
-
-/**
- * Sync affinity
- */
-enum sync_affinity {
-       AFFINITY_NONE = 0,
-       AFFINITY_MASTER,
-       AFFINITY_SLAVE
-};
-
-/**
- * Binlog params
- */
-struct statfile_binlog_params {
-       enum sync_affinity affinity;
-       time_t rotate_time;
-       gchar *master_addr;
-       guint16 master_port;
-};
 
 typedef double (*statfile_normalize_func)(struct rspamd_config *cfg,
        long double score, void *params);
@@ -171,15 +134,7 @@ typedef double (*statfile_normalize_func)(struct rspamd_config *cfg,
  */
 struct rspamd_statfile_config {
        gchar *symbol;                                  /**< symbol of statfile                                                                 */
-       gchar *path;                                    /**< filesystem pattern (with %r or %f)                                 */
        gchar *label;                                   /**< label of this statfile                                                             */
-       gsize size;                                     /**< size of statfile                                                                   */
-       GList *sections;                                /**< list of sections in statfile                                               */
-       struct statfile_autolearn_params *autolearn;    /**< autolearn params                                                                   */
-       struct statfile_binlog_params *binlog;          /**< binlog params                                                                              */
-       statfile_normalize_func normalizer;             /**< function that is used as normaliser                */
-       void *normalizer_data;                          /**< normalizer function params                         */
-       gchar *normalizer_str;                          /**< source string (for dump)                                                   */
        ucl_object_t *opts;                             /**< other options                                                                              */
        gboolean is_spam;                               /**< spam flag                                                                                  */
 };
@@ -193,7 +148,7 @@ struct rspamd_classifier_config {
        gchar *metric;                                  /**< metric of this classifier                          */
        struct classifier *classifier;                  /**< classifier interface                               */
        struct tokenizer *tokenizer;                    /**< tokenizer used for classifier                                              */
-       GHashTable *opts;                               /**< other options                                      */
+       ucl_object_t *opts;                             /**< other options                                      */
        GList *pre_callbacks;                           /**< list of callbacks that are called before classification */
        GList *post_callbacks;                          /**< list of callbacks that are called after classification */
 };
index 6c77292aa47dd7ccf9207bbacf1875a17ccd8a9a..921464219021fb24bbe4827ca2fe206cb57ad45a 100644 (file)
@@ -889,14 +889,6 @@ rspamd_rcl_statfile_handler (struct rspamd_config *cfg, const ucl_object_t *obj,
                        return FALSE;
                }
 
-               if (st->path == NULL) {
-                       g_set_error (err,
-                               CFG_RCL_ERROR,
-                               EINVAL,
-                               "statfile must have a path defined");
-                       return FALSE;
-               }
-
                st->opts = (ucl_object_t *)obj;
 
                val = ucl_object_find_key (obj, "spam");
@@ -967,7 +959,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
 
        if (found == NULL) {
                ccf = rspamd_config_new_classifier (cfg, NULL);
-               ccf->classifier = get_classifier (type);
+               ccf->classifier = rspamd_stat_get_classifier (type);
        }
        else {
                ccf = found;
@@ -997,13 +989,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
                        }
                        else if (g_ascii_strcasecmp (key,
                                "tokenizer") == 0 && val->type == UCL_STRING) {
-                               ccf->tokenizer = get_tokenizer (ucl_object_tostring (val));
-                       }
-                       else {
-                               /* Just insert a value of option to the hash */
-                               g_hash_table_insert (ccf->opts,
-                                       (gpointer)key,
-                                       (gpointer)ucl_object_tostring_forced (val));
+                               ccf->tokenizer = rspamd_stat_get_tokenizer (ucl_object_tostring (val));
                        }
                }
        }
@@ -1403,21 +1389,11 @@ rspamd_rcl_config_init (void)
                rspamd_rcl_parse_struct_string,
                G_STRUCT_OFFSET (struct rspamd_statfile_config, symbol),
                0);
-       rspamd_rcl_add_default_handler (ssub,
-               "path",
-               rspamd_rcl_parse_struct_string,
-               G_STRUCT_OFFSET (struct rspamd_statfile_config, path),
-               RSPAMD_CL_FLAG_STRING_PATH);
        rspamd_rcl_add_default_handler (ssub,
                "label",
                rspamd_rcl_parse_struct_string,
                G_STRUCT_OFFSET (struct rspamd_statfile_config, label),
                0);
-       rspamd_rcl_add_default_handler (ssub,
-               "size",
-               rspamd_rcl_parse_struct_integer,
-               G_STRUCT_OFFSET (struct rspamd_statfile_config, size),
-               RSPAMD_CL_FLAG_INT_SIZE);
        rspamd_rcl_add_default_handler (ssub,
                "spam",
                rspamd_rcl_parse_struct_boolean,
index b53a2690c6d1bbba8997ba9d8356b436e023c709..c9a9555b1d036f8f83758153365ece41e83809bb 100644 (file)
@@ -498,12 +498,6 @@ rspamd_config_new_classifier (struct rspamd_config *cfg,
                        rspamd_mempool_alloc0 (cfg->cfg_pool,
                                sizeof (struct rspamd_classifier_config));
        }
-       if (c->opts == NULL) {
-               c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
-               rspamd_mempool_add_destructor (cfg->cfg_pool,
-                       (rspamd_mempool_destruct_t) g_hash_table_destroy,
-                       c->opts);
-       }
        if (c->labels == NULL) {
                c->labels = g_hash_table_new_full (rspamd_str_hash,
                                rspamd_str_equal,
index 810570f20a3d29ce6e09c5d71dccc97ff941484c..f1692de63bdae282b4a1cc02618568e0969a57a4 100644 (file)
@@ -1,11 +1,14 @@
 # Librspamdserver
 SET(LIBSTATSRC
-                               )
+                               stat_config.c)
 SET(TOKENIZERSSRC  tokenizers/tokenizers.c
                                tokenizers/osb.c)
 
 SET(CLASSIFIERSSRC classifiers/classifiers.c
                 classifiers/bayes.c)
+                
+SET(BACKENDSSRC backends/backends.c
+                       backends/mmaped_file.c)
                                
 ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC})
 IF(NOT DEBIAN_BUILD)
diff --git a/src/libstat/backends.h b/src/libstat/backends.h
new file mode 100644 (file)
index 0000000..04710b4
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BACKENDS_H_
+#define BACKENDS_H_
+
+#include "config.h"
+#include "cfg_file.h"
+
+#define RSPAMD_DEFAULT_BACKEND "mmap"
+
+struct rspamd_stat_backend {
+       const char *name;
+       gpointer (*init)(rspamd_mempool_t *pool, struct rspamd_statfile_config *cfg);
+       gpointer ctx;
+};
+
+extern struct rspamd_stat_backend statfile_backends[];
+
+struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name);
+
+#endif /* BACKENDS_H_ */
diff --git a/src/libstat/backends/backends.c b/src/libstat/backends/backends.c
new file mode 100644 (file)
index 0000000..815a66d
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "main.h"
+#include "backends.h"
+#include "mmaped_file.h"
+
+struct rspamd_stat_backend statfile_backends[] = {
+       {RSPAMD_DEFAULT_BACKEND, }
+};
+
+
+struct rspamd_stat_backend *
+rspamd_stat_get_backend (const char *name)
+{
+       guint i;
+
+       for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) {
+               if (strcmp (statfile_backends[i].name, name) == 0) {
+                       return &statfile_backends[i];
+               }
+       }
+
+       return NULL;
+}
index d13178486b683f0528da8cc8b5a25238208dd825..2c2f3344981a6923d0e0750cfb728e94438368c9 100644 (file)
@@ -45,7 +45,7 @@ struct classifier {
 };
 
 /* Get classifier structure by name or return NULL if this name is not found */
-struct classifier * get_classifier (const char *name);
+struct classifier * rspamd_stat_get_classifier (const char *name);
 
 /* Bayes algorithm */
 struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
index 6af7d2dc8e616abbac74de7500d394f06d2cfdd4..a3efb53c1e308fa87e43204b9d69ba4b328dd8f9 100644 (file)
@@ -40,7 +40,7 @@ struct classifier classifiers[] = {
 };
 
 struct classifier *
-get_classifier (const char *name)
+rspamd_stat_get_classifier (const char *name)
 {
        guint i;
 
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
new file mode 100644 (file)
index 0000000..fd2c0f1
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "stat_api.h"
+#include "main.h"
+#include "cfg_rcl.h"
+
index ed47e0adde4f576e4e402d571c4cf8940353a734..c0d2e893464ca921c0c62fe3ddd96b2ba30b774a 100644 (file)
@@ -33,7 +33,7 @@ struct tokenizer {
 int token_node_compare_func (gconstpointer a, gconstpointer b);
 
 /* Get tokenizer structure by name or return NULL if this name is not found */
-struct tokenizer * get_tokenizer (const char *name);
+struct tokenizer * rspamd_stat_get_tokenizer (const char *name);
 
 /* Get next word from specified f_str_t buf */
 gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
index 3e6c745ec63fe39acdbf58774b0286effe572355..ce221397d656edcf40ed47c5b201b092b1005b6a 100644 (file)
@@ -26,7 +26,6 @@
  * Common tokenization functions
  */
 
-#include <sys/types.h>
 #include "main.h"
 #include "tokenizers.h"
 
@@ -77,7 +76,7 @@ const gchar t_delimiters[255] = {
 };
 
 struct tokenizer *
-get_tokenizer (const char *name)
+rspamd_stat_get_tokenizer (const char *name)
 {
        guint i;
 
@@ -230,7 +229,7 @@ tokenize_subject (struct rspamd_task *task, GTree ** tree)
                        (rspamd_mempool_destruct_t) g_tree_destroy, *tree);
        }
 
-       osb_tokenizer = get_tokenizer ("osb-text");
+       osb_tokenizer = rspamd_stat_get_tokenizer ("osb-text");
 
        /* Try to use pre-defined subject */
        if (task->subject != NULL) {
index 346f5d64ba272ca9280cb4783af59c2288e6bf75..7adc473ba3cc56343247c35af7c10ff43ffb7bec 100644 (file)
@@ -45,16 +45,12 @@ static const struct luaL_reg classifierlib_m[] = {
 
 LUA_FUNCTION_DEF (statfile, get_symbol);
 LUA_FUNCTION_DEF (statfile, get_label);
-LUA_FUNCTION_DEF (statfile, get_path);
-LUA_FUNCTION_DEF (statfile, get_size);
 LUA_FUNCTION_DEF (statfile, is_spam);
 LUA_FUNCTION_DEF (statfile, get_param);
 
 static const struct luaL_reg statfilelib_m[] = {
        LUA_INTERFACE_DEF (statfile, get_symbol),
        LUA_INTERFACE_DEF (statfile, get_label),
-       LUA_INTERFACE_DEF (statfile, get_path),
-       LUA_INTERFACE_DEF (statfile, get_size),
        LUA_INTERFACE_DEF (statfile, is_spam),
        LUA_INTERFACE_DEF (statfile, get_param),
        {"__tostring", rspamd_lua_class_tostring},
@@ -351,36 +347,6 @@ lua_statfile_get_label (lua_State *L)
        return 1;
 }
 
-static gint
-lua_statfile_get_path (lua_State *L)
-{
-       struct rspamd_statfile_config *st = lua_check_statfile (L);
-
-       if (st != NULL) {
-               lua_pushstring (L, st->path);
-       }
-       else {
-               lua_pushnil (L);
-       }
-
-       return 1;
-}
-
-static gint
-lua_statfile_get_size (lua_State *L)
-{
-       struct rspamd_statfile_config *st = lua_check_statfile (L);
-
-       if (st != NULL) {
-               lua_pushinteger (L, st->size);
-       }
-       else {
-               lua_pushnil (L);
-       }
-
-       return 1;
-}
-
 static gint
 lua_statfile_is_spam (lua_State *L)
 {
index 8df878585691dd693b7aad34084f5e2759f1e04c..73471719b907d6a4512ef6c3b57c9ab90aad35a4 100644 (file)
@@ -355,9 +355,8 @@ gboolean
 rspamd_init_lua_filters (struct rspamd_config *cfg)
 {
        struct rspamd_config **pcfg;
-       GList *cur, *tmp;
+       GList *cur;
        struct script_module *module;
-       struct rspamd_statfile_config *st;
        lua_State *L = cfg->lua_state;
 
        cur = g_list_first (cfg->script_modules);
@@ -395,24 +394,7 @@ rspamd_init_lua_filters (struct rspamd_config *cfg)
                }
                cur = g_list_next (cur);
        }
-       /* Init statfiles normalizers */
-       cur = g_list_first (cfg->statfiles);
-       while (cur) {
-               st = cur->data;
-               if (st->normalizer == rspamd_lua_normalize) {
-                       tmp = st->normalizer_data;
-                       if (tmp && (tmp = g_list_next (tmp))) {
-                               if (tmp->data) {
-                                       /* Code must be loaded from data */
-                                       if (luaL_loadstring (L, tmp->data) != 0) {
-                                               msg_info ("cannot load normalizer code %s", tmp->data);
-                                               return FALSE;
-                                       }
-                               }
-                       }
-               }
-               cur = g_list_next (cur);
-       }
+
        /* Assign state */
        cfg->lua_state = L;