* New system of classifiers interface and statfiles processing

author Vsevolod Stakhov <vsevolod@rambler-co.ru>

Mon, 14 Sep 2009 15:11:19 +0000 (19:11 +0400)

committer Vsevolod Stakhov <vsevolod@rambler-co.ru>

Mon, 14 Sep 2009 15:11:19 +0000 (19:11 +0400)
author Vsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 14 Sep 2009 15:11:19 +0000 (19:11 +0400)
committer Vsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 14 Sep 2009 15:11:19 +0000 (19:11 +0400)
diff --git a/conf/rspamd.conf.sample b/conf/rspamd.conf.sample

index 926f9901e16be7c4598ac0f7068bb3b52b803e76..42659259a6fae2c9e9a21b927ca75b4e587a1759 100644 (file)
--- a/conf/rspamd.conf.sample
+++ b/conf/rspamd.conf.sample
@@ -306,3 +306,12 @@ view {
         # Symbols to check, can also be list of files or regexp:
         symbols = "/^[A-Z]{2}_SURBL_MULTI$/i";
  };
+
+# Settings files
+settings {
+       # json data for user's settings
+       #user_settings = "file:///some/json/file";
+       
+       # json data for domain's settings
+       #domain_settings = "file:///some/other/json/file";
+};
diff --git a/rspamd.conf.sample b/rspamd.conf.sample

index 507878431dd4015a11d1adc33ca14eb32767df59..da67662a12b625b589915ea17cd1b9e9777828d7 100644 (file)
--- a/rspamd.conf.sample
+++ b/rspamd.conf.sample
@@ -39,12 +39,52 @@ worker {
         password = "q1";
  };
  
+# Settings for fuzzy storage interface
+worker {
+    type = "fuzzy";
+
+       # Bind socket for control interface
+       bind_socket = localhost:11335;
+
+    count = 1;
+       # Path to filesystem storage
+       hashfile = "/tmp/fuzzy.db";
+};
+
+# Options for lmtp worker
+#worker {
+       #type = "lmtp";
+       # Bind socket for lmtp interface
+       #bind_socket = localhost:11335;
+       # Metric that is considered as main. If we have spam result on
+       # this metric, lmtp delivery would be failed
+       #metric = "default";
+       # Number of lmtp workers
+       #count = 1;
+#};
+
+#worker {
+       #type = "delivery";
+       # Path to delivery agent, %f is expanded as mail from address and %r 
+       # is expanded as recipient address
+       # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
+       #agent = "/dev/null";
+       # Bind socket for lmtp interface
+       # Example: bind_socket = localhost:25
+       
+       # Whether we should use lmtp for MTA delivery
+       #lmtp = no;
+#};
+
+
  # Sample metric definition
  metric {
         # Name of metric
         name = "testmetric";
         # Score to count message as spam by this metric
         required_score = 10.1;
+       # Symbols cache path for optimal checks planning
+       cache_file = "/tmp/symbols.cache";
  };
  
  # Logging settings
@@ -64,27 +104,36 @@ logging {
  # Default: 100M
  statfile_pool_size = 40M;
  
-
-# Sample statfile definition
-#statfile {
-       # Alias is used for learning and is used as symbol
-       #alias = "test.spam";
-       # Pattern is path to file, can include %r - recipient name and %f - mail from value
-       #pattern = "./test.spam";
-       # Weight in spam/ham classifier
-       #weight = 1.0;
-       # Size of this statfile class
-       #size = 10M;
-       # Tokenizer for this statfile
-       # Deafault: osb-text
-       #tokenizer = "osb-text";
-#};
-#statfile {
-       #alias = "test.ham";
-       #pattern = "./test.ham";
-       #weight = -2.0;
-       #size = 10M;
-#};
+# Classifier definition
+classifier {
+       # Type of classfier
+    type = "winnow";
+       # Tokenizer used
+    tokenizer = "osb-text";
+    # Sample statfile definition
+    statfile {
+        # Alias is used for learning and is used as symbol
+        symbol = "WINNOW_SPAM";
+        # Pattern is path to file, can include %r - recipient name and %f - mail from value
+        path = "/tmp/test.spam";
+        # Size of this statfile class
+        size = 10M;
+        # Tokenizer for this statfile
+        # Deafault: osb-text
+        #tokenizer = "osb-text";
+        autolearn {
+            min_mark = 10.0;
+        };
+    };
+    statfile {
+        symbol = "WINNOW_HAM";
+        path = "/tmp/test.ham";
+        size = 10M;
+        autolearn {
+            max_mark = 0.1;
+        };
+    };
+};
  
  # Factors coefficients
  factors {
@@ -159,30 +208,7 @@ factors {
      "R_MIXED_CHARSET" = 5;
      "R_BAD_EMAIL" = 10.5;
  };
-# Options for lmtp worker
-#worker {
-       #type = "lmtp";
-       # Bind socket for lmtp interface
-       #bind_socket = localhost:11335;
-       # Metric that is considered as main. If we have spam result on
-       # this metric, lmtp delivery would be failed
-       #metric = "default";
-       # Number of lmtp workers
-       #count = 1;
-#};
  
-#worker {
-       #type = "delivery";
-       # Path to delivery agent, %f is expanded as mail from address and %r 
-       # is expanded as recipient address
-       # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
-       #agent = "/dev/null";
-       # Bind socket for lmtp interface
-       # Example: bind_socket = localhost:25
-       
-       # Whether we should use lmtp for MTA delivery
-       #lmtp = no;
-#};
  
  # SURBL module params, note that single quotes are mandatory here
  .module 'surbl' {
@@ -285,6 +311,14 @@ factors {
      #blacklist = "file:///some/path/emails.lst";
  };
  
+# Module for fuzzy checksum loading
+.module 'fuzzy_check' {
+    metric = "default";
+       symbol = "R_FUZZY";
+       # List of fuzzy storage servers, separated by ',' or ';' or simple by spaces
+       servers = "localhost:11335";
+};
+
  # If enables threat each regexp as raw regex and do not try to convert
  # each text part to utf8 encoding. Save a lot of resources but less
  # portable.
@@ -315,3 +349,19 @@ settings {
         # json data for domain's settings
         #domain_settings = "file:///some/other/json/file";
  };
+
+# Example of json config:
+# [
+#     {
+#         "name": "cebka@test.ru",
+#         "metrics":
+#         {
+#             "default": 5.5
+#         },
+#         "factors":
+#         {
+#             "R_FUZZY": 10.1
+#         },
+#         "want_spam": false
+#     }
+# ] 
diff --git a/src/cfg_file.h b/src/cfg_file.h

index 3e932fc102b2b136656892831aef4da4a4a27406..527c3f7c62cb9c9019fb76eff14769f57ffcf357 100644 (file)
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -130,16 +130,24 @@ struct statfile_autolearn_params {
   * Statfile config definition
   */
  struct statfile {
-       char *alias;                                                                    /**< alias of statfile                                                                  */
-       char *pattern;                                                                  /**< filesystem pattern (with %r or %f)                                 */
-       double weight;                                                                  /**< weight scale                                                                               */
-       char *metric;                                                                   /**< metric name                                                                                */
+       char *symbol;                                                                   /**< symbol of statfile                                                                 */
+       char *path;                                                                     /**< filesystem pattern (with %r or %f)                                 */
         size_t size;                                                                    /**< size of statfile                                                                   */
-       struct tokenizer *tokenizer;                                    /**< tokenizer used for statfile                                                */
         GList *sections;                                                                /**< list of sections in statfile                                               */
         struct statfile_autolearn_params *autolearn;    /**< autolearn params                                                                   */
  };
  
+/**
+ * Classifier config definition
+ */
+struct classifier_config {
+    GList *statfiles;                               /**< statfiles list                                     */
+    char *metric;                                   /**< metric of this classifier                          */
+    struct classifier *classifier;                  /**< classifier interface                               */
+       struct tokenizer *tokenizer;                                    /**< tokenizer used for classifier                                              */
+    GHashTable *opts;                               /**< other options                                      */
+};
+
  /**
   * Config option for importing to script module
   */
@@ -223,7 +231,8 @@ struct config_file {
         GHashTable* factors;                                                    /**< hash of factors indexed by symbol name                             */
         GHashTable* c_modules;                                                  /**< hash of c modules indexed by module name                   */
         GHashTable* composite_symbols;                                  /**< hash of composite symbols indexed by its name              */
-       GHashTable* statfiles;                                                  /**< hash of defined statfiles indexed by alias                 */
+    GList *classifiers;                             /**< list of all classifiers defined                    */
+    GHashTable *classifiers_symbols;                /**< hashtable indexed by symbol name of classifiers    */
      GHashTable* cfg_params;                                                    /**< all cfg params indexed by its name in this structure */
         int clock_res;                                                                  /**< resolution of clock used                                                   */
         GList *views;                                                                   /**< views                                                                                              */
@@ -314,7 +323,7 @@ void post_load_config (struct config_file *cfg);
  void unescape_quotes (char *line);
  
  GList* parse_comma_list (memory_pool_t *pool, char *line);
-
+struct classifier_config* check_classifier_cfg (struct config_file *cfg, struct classifier_config *c);
  
  int yylex (void);
  int yyparse (void);
diff --git a/src/cfg_file.l b/src/cfg_file.l

index 64c113a7139fc5c7795860a811e5651c1467b338..5355a7c578a5d848bbc1aa2aa2a48eed9006fc19 100644 (file)
--- a/src/cfg_file.l
+++ b/src/cfg_file.l
@@ -2,6 +2,7 @@
  %x module
  %x lua
  %x worker
+%x classifier
  
  %{
  
@@ -21,6 +22,7 @@ extern void add_luabuf (const char *line);
  YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
  int line_stack[MAX_INCLUDE_DEPTH];
  int include_stack_ptr = 0;
+int nested_depth = 0;
  extern struct config_file *cfg;
  
  %}
@@ -74,17 +76,7 @@ enabled                                                      return ENABLED;
  delivery                                               return DELIVERY;
  agent                                                  return AGENT;
  
-statfile                                               return STATFILE;
-alias                                                  return ALIAS;
-pattern                                                        return PATTERN;
-weight                                                 return WEIGHT;
-size                                                   return SIZE;
-tokenizer                                              return TOKENIZER;
-classifier                                             return CLASSIFIER;
-section                                                        return SECTION;
-autolearn                                              return AUTOLEARN;
-min_mark                                               return MIN_MARK;
-max_mark                                               return MAX_MARK;
+classifier                                             BEGIN(classifier); return CLASSIFIER;
  
  logging                                                        return LOGGING;
  
@@ -167,8 +159,8 @@ yes|YES|no|NO|[yY]|[nN]                     yylval.flag=parse_flag(yytext); return FLAG;
  <module>[ \t]+                                                 /* ignore whitespace */;
  <module>[ \t]*#.*                                              /* ignore comments */;
  <module>\'[a-zA-Z0-9_-]+\'     yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; 
-<module>\{     return OBRACE;
-<module>\}  BEGIN(INITIAL); return EBRACE;
+<module>\{     nested_depth ++; return OBRACE;
+<module>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
  <module>\;     return SEMICOLON;
  <module>=      return EQSIGN;
  <module>\$[a-zA-Z_][a-zA-Z0-9_]+               yylval.string=strdup(yytext + 1); return VARIABLE;
@@ -178,8 +170,8 @@ yes|YES|no|NO|[yY]|[nN]                     yylval.flag=parse_flag(yytext); return FLAG;
  <worker>\n                                                             /* ignore EOL */;
  <worker>[ \t]+                                                 /* ignore whitespace */;
  <worker>[ \t]*#.*                                              /* ignore comments */;
-<worker>\{     return OBRACE;
-<worker>\}  BEGIN(INITIAL); return EBRACE;
+<worker>\{     nested_depth ++; return OBRACE;
+<worker>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
  <worker>\;     return SEMICOLON;
  <worker>=      return EQSIGN;
  <worker>type                                                   return TYPE;
@@ -193,6 +185,32 @@ yes|YES|no|NO|[yY]|[nN]                    yylval.flag=parse_flag(yytext); return FLAG;
  <worker>\$[a-zA-Z_][a-zA-Z0-9_]+               yylval.string=strdup(yytext + 1); return VARIABLE;
  <worker>\".+[^\\]\"    yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
  
+<classifier>\n                                                         /* ignore EOL */;
+<classifier>[ \t]+                                                     /* ignore whitespace */;
+<classifier>[ \t]*#.*                                          /* ignore comments */;
+<classifier>\{                             nested_depth ++; return OBRACE;
+<classifier>\}                              if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<classifier>\;                             return SEMICOLON;
+<classifier>=                              return EQSIGN;
+<classifier>type                                                       return TYPE;
+<classifier>bind_socket                                                return BINDSOCK;
+<classifier>count                                                      return COUNT;
+<classifier>statfile                                           return STATFILE;
+<classifier>symbol                                                     return SYMBOL;
+<classifier>path                                                       return PATH;
+<classifier>size                                                       return SIZE;
+<classifier>tokenizer                                          return TOKENIZER;
+<classifier>section                                                    return SECTION;
+<classifier>autolearn                                          return AUTOLEARN;
+<classifier>min_mark                                           return MIN_MARK;
+<classifier>max_mark                                           return MAX_MARK;
+<classifier>[0-9]+                                                     yylval.number=strtol(yytext, NULL, 10); return NUMBER;
+<classifier>-?[0-9]+\.?[0-9]*                          yylval.fract=strtod(yytext, NULL); return FRACT;
+<classifier>[0-9]+[kKmMgG]?                                    yylval.limit=parse_limit(yytext); return SIZELIMIT;
+<classifier>\$[a-zA-Z_][a-zA-Z0-9_]+           yylval.string=strdup(yytext + 1); return VARIABLE;
+<classifier>[a-zA-Z0-9_%-]+                    yylval.string=strdup(yytext); return PARAM;
+<classifier>\".+[^\\]\"        yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
  <lua>\n                                                                        /* ignore EOL */;
  <lua>[ \t]+                                                            /* ignore whitespace */;
  <lua>[ \t]*#.*                                                 /* ignore comments */;
diff --git a/src/cfg_file.y b/src/cfg_file.y

index 19584be923fc26be1f23f2c48b349a8080ad4384..84a0a1000289639448523a24bb850e2bfe2ae20f 100644 (file)
--- a/src/cfg_file.y
+++ b/src/cfg_file.y
@@ -23,6 +23,7 @@ extern char *yytext;
  
  GList *cur_module_opt = NULL;
  struct metric *cur_metric = NULL;
+struct classifier_config *cur_classifier = NULL;
  struct statfile *cur_statfile = NULL;
  struct statfile_section *cur_section = NULL;
  struct statfile_autolearn_params *cur_autolearn = NULL;
@@ -58,7 +59,7 @@ struct rspamd_view *cur_view = NULL;
  %token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT
  %token  VIEW IP FROM SYMBOLS
  %token  AUTOLEARN MIN_MARK MAX_MARK
-%token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS
+%token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH
  
  %type  <string>        STRING
  %type  <string>        VARIABLE
@@ -93,7 +94,7 @@ command       :
         | metric
         | composites
         | logging
-       | statfile
+    | classifier
         | statfile_pool_size
         | luacode
         | raw_mode
@@ -660,20 +661,81 @@ loggingfile:
         }
         ;
  
+
+classifier:
+    CLASSIFIER OBRACE classifierbody EBRACE {
+        if (cur_classifier == NULL || cur_classifier->classifier == NULL) {
+            yyerror ("yyparse: invalid classifier definition");
+            YYERROR;
+        }
+        if (cur_classifier->metric == NULL) {
+            cur_classifier->metric = DEFAULT_METRIC;
+        }
+               if (cur_classifier->tokenizer == NULL) {
+                       cur_classifier->tokenizer = get_tokenizer ("osb-text");
+               }
+
+        cfg->classifiers = g_list_prepend (cfg->classifiers, cur_classifier);
+        cur_classifier = NULL;
+    }
+    ;
+
+classifierbody:
+    | classifiercmd SEMICOLON
+    | classifierbody classifiercmd SEMICOLON
+    ;
+
+classifiercmd:
+    | statfile
+    | classifiertype
+    | classifiermetric
+       | classifiertokenizer
+    | classifieroption
+    ;
+
+classifiertype:
+    TYPE EQSIGN QUOTEDSTRING {
+        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
+        if ((cur_classifier->classifier = get_classifier ($3)) == NULL) {
+            yyerror ("yyparse: unknown classifier type: %s", $3);
+            YYERROR;
+        }
+    }
+    ;
+classifiertokenizer:
+       TOKENIZER EQSIGN QUOTEDSTRING {
+        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
+               if ((cur_classifier->tokenizer = get_tokenizer ($3)) == NULL) {
+                       yyerror ("yyparse: unknown tokenizer %s", $3);
+                       YYERROR;
+               }
+       }
+       ;
+
+classifiermetric:
+    METRIC EQSIGN QUOTEDSTRING {
+        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
+        cur_classifier->metric = $3;
+        memory_pool_add_destructor (cfg->cfg_pool, g_free, cur_classifier->metric);
+    }
+    ;
+
+classifieroption:
+    PARAM EQSIGN QUOTEDSTRING {
+        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
+        g_hash_table_insert (cur_classifier->opts, $1, $3);
+        memory_pool_add_destructor (cfg->cfg_pool, g_free, $1);
+        memory_pool_add_destructor (cfg->cfg_pool, g_free, $3);
+    };
+
  statfile:
         STATFILE OBRACE statfilebody EBRACE {
-               if (cur_statfile == NULL || cur_statfile->alias == NULL || cur_statfile->pattern == NULL 
-                       || cur_statfile->weight == 0 || cur_statfile->size == 0) {
+               if (cur_statfile == NULL || cur_statfile->path == NULL || cur_statfile->size == 0) {
                         yyerror ("yyparse: not enough arguments in statfile definition");
                         YYERROR;
                 }
-               if (cur_statfile->metric == NULL) {
-                       cur_statfile->metric = memory_pool_strdup (cfg->cfg_pool, "default");
-               }
-               if (cur_statfile->tokenizer == NULL) {
-                       cur_statfile->tokenizer = get_tokenizer ("osb-text");
-               }
-               g_hash_table_insert (cfg->statfiles, cur_statfile->alias, cur_statfile);
+        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
+               cur_classifier->statfiles = g_list_prepend (cur_classifier->statfiles, cur_statfile);
                 cur_statfile = NULL;
         }
         ;
@@ -684,48 +746,33 @@ statfilebody:
         ;
  
  statfilecmd:
-       | statfilealias
-       | statfilepattern
-       | statfileweight
+       | statfilesymbol
+       | statfilepath
         | statfilesize
-       | statfilemetric
-       | statfiletokenizer
         | statfilesection
         | statfileautolearn
         ;
         
-statfilealias:
-       ALIAS EQSIGN QUOTEDSTRING {
+statfilesymbol:
+       SYMBOL EQSIGN QUOTEDSTRING {
+        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
                 if (cur_statfile == NULL) {
                         cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
                 }
-               cur_statfile->alias = memory_pool_strdup (cfg->cfg_pool, $3);
+               cur_statfile->symbol = memory_pool_strdup (cfg->cfg_pool, $3);
+        g_hash_table_insert (cfg->classifiers_symbols, $3, cur_classifier);
         }
         ;
  
-statfilepattern:
-       PATTERN EQSIGN QUOTEDSTRING {
+statfilepath:
+       PATH EQSIGN QUOTEDSTRING {
                 if (cur_statfile == NULL) {
                         cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
                 }
-               cur_statfile->pattern = memory_pool_strdup (cfg->cfg_pool, $3);
+               cur_statfile->path = memory_pool_strdup (cfg->cfg_pool, $3);
         }
         ;
  
-statfileweight:
-       WEIGHT EQSIGN NUMBER {
-               if (cur_statfile == NULL) {
-                       cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
-               }
-               cur_statfile->weight = $3;
-       }
-       | WEIGHT EQSIGN FRACT {
-               if (cur_statfile == NULL) {
-                       cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
-               }
-               cur_statfile->weight = $3;
-       }
-       ;
  
  statfilesize:
         SIZE EQSIGN NUMBER {
@@ -742,26 +789,7 @@ statfilesize:
         }
         ;
  
-statfilemetric:
-       METRIC EQSIGN QUOTEDSTRING {
-               if (cur_statfile == NULL) {
-                       cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
-               }
-               cur_statfile->metric = memory_pool_strdup (cfg->cfg_pool, $3);
-       }
-       ;
  
-statfiletokenizer:
-       TOKENIZER EQSIGN QUOTEDSTRING {
-               if (cur_statfile == NULL) {
-                       cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
-               }
-               if ((cur_statfile->tokenizer = get_tokenizer ($3)) == NULL) {
-                       yyerror ("yyparse: unknown tokenizer %s", $3);
-                       YYERROR;
-               }
-       }
-       ;
  
  statfilesection:
         SECTION OBRACE sectionbody EBRACE {
diff --git a/src/cfg_utils.c b/src/cfg_utils.c

index 7d06e662c940cc591b04ee8854b2d0211e2784ab..0acd50be8e181bc9076c060b54d07c4eb120e085 100644 (file)
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -186,7 +186,7 @@ init_defaults (struct config_file *cfg)
         cfg->factors = g_hash_table_new (g_str_hash, g_str_equal);
         cfg->c_modules = g_hash_table_new (g_str_hash, g_str_equal);
         cfg->composite_symbols = g_hash_table_new (g_str_hash, g_str_equal);
-       cfg->statfiles = g_hash_table_new (g_str_hash, g_str_equal);
+       cfg->classifiers_symbols = g_hash_table_new (g_str_hash, g_str_equal);
         cfg->cfg_params = g_hash_table_new (g_str_hash, g_str_equal);
         init_settings (cfg);
  
@@ -207,10 +207,10 @@ free_config (struct config_file *cfg)
         g_hash_table_unref (cfg->c_modules);
         g_hash_table_remove_all (cfg->composite_symbols);
         g_hash_table_unref (cfg->composite_symbols);
-       g_hash_table_remove_all (cfg->statfiles);
-       g_hash_table_unref (cfg->statfiles);
         g_hash_table_remove_all (cfg->cfg_params);
         g_hash_table_unref (cfg->cfg_params);
+       g_hash_table_destroy (cfg->classifiers_symbols);
+       g_list_free (cfg->classifiers);
         g_list_free (cfg->metrics_list);
         memory_pool_delete (cfg->cfg_pool);
  }
@@ -604,6 +604,20 @@ parse_comma_list (memory_pool_t *pool, char *line)
         return res;
  }
  
+struct classifier_config *
+check_classifier_cfg (struct config_file *cfg, struct classifier_config *c)
+{
+       if (c == NULL) {
+               c = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct classifier_config));
+       }
+       if (c->opts == NULL) {
+               c->opts = g_hash_table_new (g_str_hash, g_str_equal);
+               memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)g_hash_table_destroy, c->opts);
+       }
+
+       return c;
+}
+
  /*
   * vi:ts=4
   */
diff --git a/src/classifiers/classifiers.c b/src/classifiers/classifiers.c

index 283350972ee703d9e962ecb7bb722fd205fc480d..482d111b0fd00041a2e24a6444d5ea4feba4f566 100644 (file)
--- a/src/classifiers/classifiers.c
+++ b/src/classifiers/classifiers.c
@@ -35,7 +35,6 @@ struct classifier classifiers[] = {
         .init_func        = winnow_init, 
         .classify_func    = winnow_classify, 
         .learn_func       = winnow_learn, 
-       .result_file_func = winnow_result_file 
         },
  };
  
diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h

index 13a2957249a2a7573ae2baceb59a39159c8ead4b..fcb251da1e4d639d4fbfb1593e9a66b0008f2175 100644 (file)
--- a/src/classifiers/classifiers.h
+++ b/src/classifiers/classifiers.h
@@ -6,29 +6,30 @@
  #include "../statfile.h"
  #include "../tokenizers/tokenizers.h"
  
+struct classifier_config;
+struct worker_task;
+
  struct classifier_ctx {
         memory_pool_t *pool;
         GHashTable *results;
+       struct classifier_config *cfg;
  };
  /* Common classifier structure */
  struct classifier {
         char *name;
-       struct classifier_ctx* (*init_func)(memory_pool_t *pool);
-       void (*classify_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, 
-                                                       char *statfile, GTree *input, double scale);
+       struct classifier_ctx* (*init_func)(memory_pool_t *pool, struct classifier_config *cf);
+       void (*classify_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task);
         void (*learn_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, 
-                                                       char *statfile, GTree *input, int in_class);
-       char* (*result_file_func)(struct classifier_ctx *ctx, double *probability);
+                                                       char *symbol, GTree *input, gboolean in_class);
  };
  
  /* Get classifier structure by name or return NULL if this name is not found */
  struct classifier* get_classifier (char *name);
  
  /* Winnow algorithm */
-struct classifier_ctx* winnow_init (memory_pool_t *pool);
-void winnow_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, char *statfile, GTree *input, double scale);
-void winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, char *statfile, GTree *input, int in_class);
-char* winnow_result_file (struct classifier_ctx* ctx, double *probability);
+struct classifier_ctx* winnow_init (memory_pool_t *pool, struct classifier_config *cf);
+void winnow_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task);
+void winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, char *symbol, GTree *input, gboolean in_class);
  
  /* Array of all defined classifiers */
  extern struct classifier classifiers[];
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c

index edd929af081d1cdc368c76ac2f8b8fbff8309e27..88298faf471adc7220b46fc274b68557e956e2f8 100644 (file)
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -27,6 +27,9 @@
   */
  
  #include "classifiers.h"
+#include "../main.h"
+#include "../filter.h"
+#include "../cfg_file.h"
  
  #define WINNOW_PROMOTION 1.23
  #define WINNOW_DEMOTION 0.83
@@ -85,21 +88,23 @@ learn_callback (gpointer key, gpointer value, gpointer data)
  }
  
  struct classifier_ctx* 
-winnow_init (memory_pool_t *pool)
+winnow_init (memory_pool_t *pool, struct classifier_config *cfg)
  {
         struct classifier_ctx *ctx = memory_pool_alloc (pool, sizeof (struct classifier_ctx));
  
         ctx->pool = pool;
-       ctx->results = g_hash_table_new (g_str_hash, g_str_equal);
-       memory_pool_add_destructor (pool, (pool_destruct_func)g_hash_table_destroy, ctx->results);
+       ctx->cfg = cfg;
  
         return ctx;
  }
  void 
-winnow_classify (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfile, GTree *input, double scale)
+winnow_classify (struct classifier_ctx *ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task)
  {
         struct winnow_callback_data data;
         double *res = memory_pool_alloc (ctx->pool, sizeof (double));
+       double max = 0;
+       GList *cur;
+       struct statfile *st, *sel = NULL;
  
         g_assert (pool != NULL);
         g_assert (ctx != NULL);
@@ -109,29 +114,44 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfi
         data.count = 0;
         data.now = time (NULL);
         data.ctx = ctx;
-
-       if ((data.file = statfile_pool_is_open (pool, statfile)) == NULL) {
-               if ((data.file = statfile_pool_open (pool, statfile)) == NULL) {
-                       return;
+       
+       cur = ctx->cfg->statfiles;
+       while (cur) {
+               st = cur->data;
+               if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) {
+                       if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
+                               msg_warn ("winnow_classify: cannot open %s, skip it", st->path);
+                               cur = g_list_next (cur);
+                               continue;
+                       }
                 }
-       }
  
-       g_tree_foreach (input, classify_callback, &data);
+               g_tree_foreach (input, classify_callback, &data);
         
-       if (data.count != 0) {
-       *res = scale * (data.sum / data.count);
+               if (data.count != 0) {
+                       *res = (data.sum / data.count);
+               }
+               else {
+                       *res = 0;
+               }
+               if (*res > max) {
+                       max = *res;
+                       sel = st;
+               }
+               cur = g_list_next (cur);
         }
-       else {
-               *res = 0;
+       
+       if (sel != NULL) {
+               insert_result (task, ctx->cfg->metric, sel->symbol, 1, NULL);
         }
-
-       g_hash_table_insert (ctx->results, statfile, res);
  }
  
  void
-winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfile, GTree *input, int in_class)
+winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, char *symbol, GTree *input, int in_class)
  {
         struct winnow_callback_data data;
+       GList *cur;
+       struct statfile *st;
         
         g_assert (pool != NULL);
         g_assert (ctx != NULL);
@@ -142,50 +162,29 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfile,
         data.in_class = in_class;
         data.now = time (NULL);
         data.ctx = ctx;
-
-       if ((data.file = statfile_pool_is_open (pool, statfile)) == NULL) {
-               if ((data.file = statfile_pool_open (pool, statfile)) == NULL) {
-                       return;
+       
+       cur = g_list_first (ctx->cfg->statfiles);
+       while (cur) {
+               st = cur->data;
+               if (strcmp (symbol, st->symbol) == 0) {
+                       if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
+                               /* Try to create statfile */
+                               if (statfile_pool_create (pool, 
+                                                       st->path, st->size / sizeof (struct stat_file_block)) == -1) {
+                                       msg_err ("winnow_learn: cannot create statfile %s", st->path);
+                                       return;
+                               }
+                               if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
+                                       msg_err ("winnow_learn: cannot create statfile %s", st->path);
+                                       return;
+                               }
+                       }
+                       break;
                 }
+               cur = g_list_next (cur);
         }
  
         statfile_pool_lock_file (pool, data.file);
         g_tree_foreach (input, learn_callback, &data);
         statfile_pool_unlock_file (pool, data.file);
  }
-
-struct winnow_result_data {
-       char *filename;
-       double max_score;
-       double sum;
-};
-
-static void 
-result_file_callback (gpointer key, gpointer value, gpointer data)
-{
-       struct winnow_result_data *d = (struct winnow_result_data *)data;
-       double w = *((double *)value);
-
-       if (fabs (w) > fabs (d->max_score)) {
-               d->filename = (char *)key;
-               d->max_score = w;
-       }
-       d->sum += fabs (w);
-}
-
-char* 
-winnow_result_file (struct classifier_ctx* ctx, double *probability)
-{
-       struct winnow_result_data data = { NULL, 0, 0 };
-       g_assert (ctx != NULL);
-       
-       g_hash_table_foreach (ctx->results, result_file_callback, &data);
-       if (data.sum != 0) {
-               *probability = data.max_score / data.sum;
-       }
-       else {
-               *probability = 1;
-       }
-
-       return data.filename;
-}
diff --git a/src/controller.c b/src/controller.c

index 4196e16f04937577fcdf13499a55c59539bf396f..0aaa8bd992d44b6fcf8324b58c088e70a19513b5 100644 (file)
--- a/src/controller.c
+++ b/src/controller.c
@@ -181,9 +181,7 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
         int r = 0, days, hours, minutes;
         time_t uptime;
         unsigned long size = 0;
-       struct statfile *statfile;
-       stat_file_t *file;
-       struct metric *metric;
+       struct classifier_config *cl;
         memory_pool_stat_t mem_st;
         char *password = g_hash_table_lookup (session->worker->cf->params, "password");
  
@@ -311,26 +309,16 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
                                         return;
                                 }
  
-                               statfile = g_hash_table_lookup (session->cfg->statfiles, *cmd_args);
-                               if (statfile == NULL) {
+                               session->learn_symbol = *cmd_args;
+                               cl = g_hash_table_lookup (session->cfg->classifiers_symbols, *cmd_args);
+                               if (cl == NULL) {
                                         r = snprintf (out_buf, sizeof (out_buf), "statfile %s is not defined" CRLF, *cmd_args);
                                         rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
                                         return;
  
                                 }
+                               session->learn_classifier = cl;
  
-                               metric = g_hash_table_lookup (session->cfg->metrics, statfile->metric);
-
-                               session->learn_rcpt = NULL;
-                               session->learn_from = NULL;
-                               session->learn_filename = NULL;
-                               session->learn_tokenizer = statfile->tokenizer;
-                               if (metric != NULL) {
-                                       session->learn_classifier = metric->classifier;
-                               }
-                               else {
-                                       session->learn_classifier = get_classifier ("winnow");
-                               }
                                 /* By default learn positive */
                                 session->in_class = 1;
                                 /* Get all arguments */
@@ -366,22 +354,6 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
                                                 }
                                         }
                                 }
-                               session->learn_filename = resolve_stat_filename (session->session_pool, statfile->pattern, 
-                                                                                                                                       session->learn_rcpt, session->learn_from);
-                               if ((file = statfile_pool_open (session->worker->srv->statfile_pool, session->learn_filename)) == NULL) {
-                                       /* Try to create statfile */
-                                       if (statfile_pool_create (session->worker->srv->statfile_pool, 
-                                                                       session->learn_filename, statfile->size / sizeof (struct stat_file_block)) == -1) {
-                                               r = snprintf (out_buf, sizeof (out_buf), "cannot create statfile %s" CRLF, session->learn_filename);
-                                               rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
-                                               return;
-                                       }
-                                       if ((file = statfile_pool_open (session->worker->srv->statfile_pool, session->learn_filename)) == NULL) {
-                                               r = snprintf (out_buf, sizeof (out_buf), "cannot open statfile %s" CRLF, session->learn_filename);
-                                               rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
-                                               return;
-                                       }
-                               }
                  rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_CHARACTER, size);
                                 session->state = STATE_LEARN;
                         }
@@ -479,7 +451,7 @@ controller_read_socket (f_str_t *in, void *arg)
                         while ((content = get_next_text_part (session->session_pool, session->parts, &cur)) != NULL) {
                                 c.begin = content->data;
                                 c.len = content->len;
-                               if (!session->learn_tokenizer->tokenize_func (session->learn_tokenizer, 
+                               if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, 
                                                         session->session_pool, &c, &tokens)) {
                                         i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
                                         rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE);
@@ -487,9 +459,9 @@ controller_read_socket (f_str_t *in, void *arg)
                                         return;
                                 }
                         }
-                       cls_ctx = session->learn_classifier->init_func (session->session_pool);
-                       session->learn_classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool,
-                                                                                                       session->learn_filename, tokens, session->in_class);
+                       cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier);
+                       session->learn_classifier->classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool,
+                                                                                                       session->learn_symbol, tokens, session->in_class);
                         session->worker->srv->stat->messages_learned ++;
                         i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF);
                         rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE);
diff --git a/src/filter.c b/src/filter.c

index 34e4871921c1e4a5d868b1b181116b4cf837666e..c9453dc61dba4743a54b18ed43468e595fbefc29 100644 (file)
--- a/src/filter.c
+++ b/src/filter.c
@@ -444,7 +444,7 @@ check_autolearn (struct statfile_autolearn_params *params, struct worker_task *t
         return FALSE;
  }
  
-static void
+void
  process_autolearn (struct statfile *st, struct worker_task *task, GTree *tokens, 
                                         struct classifier *classifier, char *filename, struct classifier_ctx* ctx)
  {
@@ -464,7 +464,7 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree *tokens,
                                 }
                         }
  
-                       classifier->learn_func (ctx, task->worker->srv->statfile_pool, filename, tokens, 1);
+                       classifier->learn_func (ctx, task->worker->srv->statfile_pool, filename, tokens, TRUE);
                 }
         }
  }
@@ -488,48 +488,27 @@ make_composites (struct worker_task *task)
         g_hash_table_foreach (task->results, composites_metric_callback, task);
  }
  
-struct statfile_result_data {
-       struct metric *metric;
-       struct classifier_ctx *ctx;
-};
  
  struct statfile_callback_data {
         GHashTable *tokens;
-       GHashTable *classifiers;
         struct worker_task *task;
  };
  
  static void
-statfiles_callback (gpointer key, gpointer value, void *arg)
+classifiers_callback (gpointer value, void *arg)
  {
         struct statfile_callback_data *data= (struct statfile_callback_data *)arg;
         struct worker_task *task = data->task;
-       struct statfile *st = (struct statfile *)value;
-       struct classifier *classifier;
-       struct statfile_result_data *res_data;
-       struct metric *metric;
+       struct classifier_config *cl = value;
+       struct classifier_ctx *ctx;
         struct mime_text_part *text_part;
-
+       struct statfile *st;
         GTree *tokens = NULL;
         GList *cur;
-
-       char *filename;
         f_str_t c;
         
-       if (g_list_length (task->rcpt) == 1) {
-               filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, (char *)task->rcpt->data);
-       }
-       else {
-               /* XXX: handle multiply recipients correctly */
-               filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, "");
-       }
-       
-       if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL && !check_autolearn (st->autolearn, task)) {
-               return;
-       }
-       
         cur = g_list_first (task->text_parts);
-       if ((tokens = g_hash_table_lookup (data->tokens, st->tokenizer)) == NULL) {
+       if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) {
                 while (cur != NULL) {
                         text_part = (struct mime_text_part *)cur->data;
                         if (text_part->is_empty) {
@@ -539,52 +518,32 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
                         c.begin = text_part->content->data;
                         c.len = text_part->content->len;
                         /* Tree would be freed at task pool freeing */
-                       if (!st->tokenizer->tokenize_func (st->tokenizer, task->task_pool, &c, &tokens)) {
+                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
                                 msg_info ("statfiles_callback: cannot tokenize input");
                                 return;
                         }
                         cur = g_list_next (cur);
                 }
-               g_hash_table_insert (data->tokens, st->tokenizer, tokens);
+               g_hash_table_insert (data->tokens, cl->tokenizer, tokens);
         }
         
-       metric = g_hash_table_lookup (task->cfg->metrics, st->metric);
-       if (metric == NULL) {
-               classifier = get_classifier ("winnow");
-       } 
-       else {
-               classifier = metric->classifier;
-       }
-       if ((res_data = g_hash_table_lookup (data->classifiers, classifier)) == NULL) {
-               res_data = memory_pool_alloc (task->task_pool, sizeof (struct statfile_result_data));
-               res_data->ctx = classifier->init_func (task->task_pool);
-               res_data->metric = metric;
-               g_hash_table_insert (data->classifiers, classifier, res_data);
-       }
+       ctx = cl->classifier->init_func (task->task_pool, cl);
+       cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);
         
-       classifier->classify_func (res_data->ctx, task->worker->srv->statfile_pool, filename, tokens, st->weight);
-
-       if (st->autolearn) {
-               /* Process autolearn */
-               process_autolearn (st, task, tokens, classifier, filename, res_data->ctx);
+       /* Autolearning */
+       cur = g_list_first (cl->statfiles);
+       while (cur) {
+               st = cur->data;
+               if (st->autolearn) {
+                       if (check_autolearn (st->autolearn, task)) {
+                               /* Process autolearn */
+                               process_autolearn (st, task, tokens, cl->classifier, st->path, ctx);
+                       }
+               }
+               cur = g_list_next (cur);
         }
  }
  
-static void
-statfiles_results_callback (gpointer key, gpointer value, void *arg)
-{
-       struct worker_task *task = (struct worker_task *)arg;
-       struct statfile_result_data *res = (struct statfile_result_data *)value;
-       struct classifier *classifier = (struct classifier *)key;
-       double *w;
-       char *filename;
-
-       w = memory_pool_alloc (task->task_pool, sizeof (double));
-       filename = classifier->result_file_func (res->ctx, w);
-       insert_result (task, res->metric->name, classifier->name, *w, NULL);
-       msg_debug ("statfiles_results_callback: got total weight %.2f for metric %s", *w, res->metric->name);
-}
-
  
  void
  process_statfiles (struct worker_task *task)
@@ -593,16 +552,11 @@ process_statfiles (struct worker_task *task)
         
         cd.task = task;
         cd.tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
-       cd.classifiers = g_hash_table_new (g_str_hash, g_str_equal);
  
-       g_hash_table_foreach (task->cfg->statfiles, statfiles_callback, &cd);
-       g_hash_table_foreach (cd.classifiers, statfiles_results_callback, task);
-       
+       g_list_foreach (task->cfg->classifiers, classifiers_callback, &cd);
         g_hash_table_destroy (cd.tokens);
-       g_hash_table_destroy (cd.classifiers);
-       /* Process results */
-       g_hash_table_foreach (task->results, metric_process_callback_forced, task);
  
+       /* Process results */
         task->state = WRITE_REPLY;
  }
  
diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h

index ff475c0a174fb9381628c1255d2bfbf3812a19d3..ab06166b3b7f137dbd766ce3e36cb5060fa31e47 100644 (file)
--- a/src/lua/lua_common.h
+++ b/src/lua/lua_common.h
@@ -2,6 +2,8 @@
  #define RSPAMD_LUA_H
  
  #include "../config.h"
+#ifdef WITH_LUA
+
  #include "../main.h"
  #include "../cfg_file.h"
  #include <lua.h>
@@ -30,4 +32,5 @@ int lua_call_chain_filter (const char *function, struct worker_task *task, int *
  double lua_consolidation_func (struct worker_task *task, const char *metric_name, const char *function_name);
  void add_luabuf (const char *line);
  
-#endif
+#endif /* WITH_LUA */
+#endif /* RSPAMD_LUA_H */
diff --git a/src/main.h b/src/main.h

index bfc78e88808d75884d6c20210adb1015be3a24be..4c1ab8617388a20eed425a45cf119ce20239a0c9 100644 (file)
--- a/src/main.h
+++ b/src/main.h
@@ -71,6 +71,7 @@ struct pidfh;
  struct config_file;
  struct tokenizer;
  struct classifier;
+struct classifier_config;
  struct mime_part;
  struct rspamd_view;
  
@@ -140,9 +141,8 @@ struct controller_session {
         struct config_file *cfg;                                                                        /**< pointer to config file                                                     */
         char *learn_rcpt;                                                                                       /**< recipient for learning                                                     */
         char *learn_from;                                                                                       /**< from address for learning                                          */
-       struct tokenizer *learn_tokenizer;                                                      /**< tokenizer for learning                                                     */
-       struct classifier *learn_classifier;                                            /**< classifier for learning                                            */
-       char *learn_filename;                                                                           /**< real filename for learning                                         */
+       struct classifier_config *learn_classifier;
+       char *learn_symbol;                                                                                     /**< symbol to train                                                            */
         rspamd_io_dispatcher_t *dispatcher;                                                     /**< IO dispatcher object                                                       */
         f_str_t *learn_buf;                                                                                     /**< learn input                                                                        */
         GList *parts;                                                                                           /**< extracted mime parts                                                       */
diff --git a/src/protocol.c b/src/protocol.c

index 176160381a3c88b408491d87df1a6df1e0c6517d..cd5f32424a3475e265ad6dcb8b62054916b325ae 100644 (file)
--- a/src/protocol.c
+++ b/src/protocol.c
@@ -313,15 +313,7 @@ parse_header (struct worker_task *task, f_str_t *line)
                                 task->rcpt = g_list_prepend (task->rcpt, tmp);
                                 msg_debug ("parse_header: read rcpt header, value: %s", tmp);
                         }
-                       else {
-                               msg_info ("parse_header: wrong header: %s", headern);
-                               return -1;
-                       }
-                       break;
-               case 'n':
-               case 'N':
-                       /* nrcpt */
-                       if (strncasecmp (headern, NRCPT_HEADER, sizeof (NRCPT_HEADER) - 1) == 0) {
+                       else if (strncasecmp (headern, NRCPT_HEADER, sizeof (NRCPT_HEADER) - 1) == 0) {
                                 tmp = memory_pool_fstrdup (task->task_pool, line);
                                 task->nrcpt = strtoul (tmp, &err, 10);
                                 msg_debug ("parse_header: read rcpt header, value: %d", (int)task->nrcpt);
diff --git a/src/symbols_cache.c b/src/symbols_cache.c

index 1d5d38d4d9fd283483b9bd8ee33aa7c5530912a0..cc7e8a1b19ee2c111783bd3ab8d3b62c5b84e2ef 100644 (file)
--- a/src/symbols_cache.c
+++ b/src/symbols_cache.c
@@ -360,6 +360,9 @@ call_symbol_callback (struct worker_task *task, struct symbols_cache *cache, str
                 item = &cache->items[0];
         }
         else {
+               if (cache == NULL) {
+                       return FALSE;
+               }
                 /* Next pointer */
                 if (*saved_item - cache->items >= cache->used_items - 1) {
                         /* No more items in cache */
diff --git a/test/rspamd_statfile_test.c b/test/rspamd_statfile_test.c

index 19a6cf7abd1374093247aef7c05f07bae48e218f..282d4dc1c368104bad800127d34f23bf717d380d 100644 (file)
--- a/test/rspamd_statfile_test.c
+++ b/test/rspamd_statfile_test.c
@@ -25,6 +25,7 @@ void
  rspamd_statfile_test_func ()
  {
         statfile_pool_t *pool;
+       stat_file_t *st;
         uint32_t random_hashes[HASHES_NUM], i, v;
         time_t now;
         
@@ -40,17 +41,17 @@ rspamd_statfile_test_func ()
  
         /* Create new file */
         g_assert (statfile_pool_create (pool, TEST_FILENAME, 65535) != -1);
-       g_assert (statfile_pool_open (pool, TEST_FILENAME) != -1);
+       g_assert ((st = statfile_pool_open (pool, TEST_FILENAME)) != NULL);
         
         /* Get and set random blocks */
-       statfile_pool_lock_file (pool, TEST_FILENAME);
+       statfile_pool_lock_file (pool, st);
         for (i = 0; i < HASHES_NUM; i ++) {
-               statfile_pool_set_block (pool, TEST_FILENAME, random_hashes[i], random_hashes[i], now, 1.0);
+               statfile_pool_set_block (pool, st, random_hashes[i], random_hashes[i], now, 1.0);
         }
-       statfile_pool_unlock_file (pool, TEST_FILENAME);
+       statfile_pool_unlock_file (pool, st);
  
         for (i = 0; i < HASHES_NUM; i ++) {
-               v = statfile_pool_get_block (pool, TEST_FILENAME, random_hashes[i], random_hashes[i], now);
+               v = statfile_pool_get_block (pool, st, random_hashes[i], random_hashes[i], now);
                 g_assert(v == 1.0);
         }
  
diff --git a/test/rspamd_test_suite.c b/test/rspamd_test_suite.c

index 0f4768e731fa8262c65b9164b23a45bc44472794..24d8e0289dd6653cd1689d15b98cf13514456f2a 100644 (file)
--- a/test/rspamd_test_suite.c
+++ b/test/rspamd_test_suite.c
@@ -30,4 +30,6 @@ main (int argc, char **argv)
         g_test_add_func ("/rspamd/statfile", rspamd_statfile_test_func);
  
         g_test_run ();
+
+       return 0;
  }
diff --git a/utils/expression_parser.c b/utils/expression_parser.c

index 4f37ec9239f01eb465414df6327bd209bfb63a4d..38b52934ff9159e9cf1e06998f62593529658168 100644 (file)
--- a/utils/expression_parser.c
+++ b/utils/expression_parser.c
@@ -49,4 +49,6 @@ main (int argc, char **argv)
         }
  
         memory_pool_delete (pool);
+
+       return 0;
  }
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Mon, 14 Sep 2009 15:11:19 +0000 (19:11 +0400)
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Mon, 14 Sep 2009 15:11:19 +0000 (19:11 +0400)
conf/rspamd.conf.sample		patch \| blob \| blame \| history
rspamd.conf.sample		patch \| blob \| blame \| history
src/cfg_file.h		patch \| blob \| blame \| history
src/cfg_file.l		patch \| blob \| blame \| history
src/cfg_file.y		patch \| blob \| blame \| history
src/cfg_utils.c		patch \| blob \| blame \| history
src/classifiers/classifiers.c		patch \| blob \| blame \| history
src/classifiers/classifiers.h		patch \| blob \| blame \| history
src/classifiers/winnow.c		patch \| blob \| blame \| history
src/controller.c		patch \| blob \| blame \| history
src/filter.c		patch \| blob \| blame \| history
src/lua/lua_common.h		patch \| blob \| blame \| history
src/main.h		patch \| blob \| blame \| history
src/protocol.c		patch \| blob \| blame \| history
src/symbols_cache.c		patch \| blob \| blame \| history
test/rspamd_statfile_test.c		patch \| blob \| blame \| history
test/rspamd_test_suite.c		patch \| blob \| blame \| history
utils/expression_parser.c		patch \| blob \| blame \| history