]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
* Feed BSD lex
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 14 Sep 2009 15:20:52 +0000 (19:20 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 14 Sep 2009 15:20:52 +0000 (19:20 +0400)
conf/rspamd.conf.sample
src/cfg_file.l

index 42659259a6fae2c9e9a21b927ca75b4e587a1759..f38285ee5eef06010088bebd869af4cc23958f38 100644 (file)
@@ -39,12 +39,52 @@ worker {
        password = "q1";
 };
 
+# Settings for fuzzy storage interface
+worker {
+    type = "fuzzy";
+
+       # Bind socket for control interface
+       bind_socket = localhost:11335;
+
+    count = 1;
+       # Path to filesystem storage
+       hashfile = "/tmp/fuzzy.db";
+};
+
+# Options for lmtp worker
+#worker {
+       #type = "lmtp";
+       # Bind socket for lmtp interface
+       #bind_socket = localhost:11335;
+       # Metric that is considered as main. If we have spam result on
+       # this metric, lmtp delivery would be failed
+       #metric = "default";
+       # Number of lmtp workers
+       #count = 1;
+#};
+
+#worker {
+       #type = "delivery";
+       # Path to delivery agent, %f is expanded as mail from address and %r 
+       # is expanded as recipient address
+       # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
+       #agent = "/dev/null";
+       # Bind socket for lmtp interface
+       # Example: bind_socket = localhost:25
+       
+       # Whether we should use lmtp for MTA delivery
+       #lmtp = no;
+#};
+
+
 # Sample metric definition
 metric {
        # Name of metric
        name = "testmetric";
        # Score to count message as spam by this metric
        required_score = 10.1;
+       # Symbols cache path for optimal checks planning
+       cache_file = "/tmp/symbols.cache";
 };
 
 # Logging settings
@@ -64,27 +104,36 @@ logging {
 # Default: 100M
 statfile_pool_size = 40M;
 
-
-# Sample statfile definition
-#statfile {
-       # Alias is used for learning and is used as symbol
-       #alias = "test.spam";
-       # Pattern is path to file, can include %r - recipient name and %f - mail from value
-       #pattern = "./test.spam";
-       # Weight in spam/ham classifier
-       #weight = 1.0;
-       # Size of this statfile class
-       #size = 10M;
-       # Tokenizer for this statfile
-       # Deafault: osb-text
-       #tokenizer = "osb-text";
-#};
-#statfile {
-       #alias = "test.ham";
-       #pattern = "./test.ham";
-       #weight = -2.0;
-       #size = 10M;
-#};
+# Classifier definition
+classifier {
+       # Type of classfier
+    type = "winnow";
+       # Tokenizer used
+    tokenizer = "osb-text";
+    # Sample statfile definition
+    statfile {
+        # Alias is used for learning and is used as symbol
+        symbol = "WINNOW_SPAM";
+        # Pattern is path to file, can include %r - recipient name and %f - mail from value
+        path = "/tmp/test.spam";
+        # Size of this statfile class
+        size = 10M;
+        # Tokenizer for this statfile
+        # Deafault: osb-text
+        #tokenizer = "osb-text";
+        autolearn {
+            min_mark = 10.0;
+        };
+    };
+    statfile {
+        symbol = "WINNOW_HAM";
+        path = "/tmp/test.ham";
+        size = 10M;
+        autolearn {
+            max_mark = 0.1;
+        };
+    };
+};
 
 # Factors coefficients
 factors {
@@ -159,30 +208,7 @@ factors {
     "R_MIXED_CHARSET" = 5;
     "R_BAD_EMAIL" = 10.5;
 };
-# Options for lmtp worker
-#worker {
-       #type = "lmtp";
-       # Bind socket for lmtp interface
-       #bind_socket = localhost:11335;
-       # Metric that is considered as main. If we have spam result on
-       # this metric, lmtp delivery would be failed
-       #metric = "default";
-       # Number of lmtp workers
-       #count = 1;
-#};
 
-#worker {
-       #type = "delivery";
-       # Path to delivery agent, %f is expanded as mail from address and %r 
-       # is expanded as recipient address
-       # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
-       #agent = "/dev/null";
-       # Bind socket for lmtp interface
-       # Example: bind_socket = localhost:25
-       
-       # Whether we should use lmtp for MTA delivery
-       #lmtp = no;
-#};
 
 # SURBL module params, note that single quotes are mandatory here
 .module 'surbl' {
@@ -285,6 +311,14 @@ factors {
     #blacklist = "file:///some/path/emails.lst";
 };
 
+# Module for fuzzy checksum loading
+.module 'fuzzy_check' {
+    metric = "default";
+       symbol = "R_FUZZY";
+       # List of fuzzy storage servers, separated by ',' or ';' or simple by spaces
+       servers = "localhost:11335";
+};
+
 # If enables threat each regexp as raw regex and do not try to convert
 # each text part to utf8 encoding. Save a lot of resources but less
 # portable.
@@ -315,3 +349,19 @@ settings {
        # json data for domain's settings
        #domain_settings = "file:///some/other/json/file";
 };
+
+# Example of json config:
+# [
+#     {
+#         "name": "cebka@test.ru",
+#         "metrics":
+#         {
+#             "default": 5.5
+#         },
+#         "factors":
+#         {
+#             "R_FUZZY": 10.1
+#         },
+#         "want_spam": false
+#     }
+# ] 
index 5355a7c578a5d848bbc1aa2aa2a48eed9006fc19..e1314905c0186c9d2b66849344fbd2a313391f34 100644 (file)
@@ -1,8 +1,8 @@
 %x incl
-%x module
-%x lua
-%x worker
-%x classifier
+%x module_lex_state
+%x lua_lex_state
+%x worker_lex_state
+%x classifier_lex_state
 
 %{
 
@@ -33,9 +33,9 @@ extern struct config_file *cfg;
 %%
 [ \t]*#.*                                              /* ignore comments */;
 .include                                               BEGIN(incl);
-.module                                                        BEGIN(module);
-.lua                                                   BEGIN(lua);
-worker                                                 BEGIN(worker); return WORKER;
+.module                                                        BEGIN(module_lex_state);
+.lua                                                   BEGIN(lua_lex_state);
+worker                                                 BEGIN(worker_lex_state); return WORKER;
 composites                                             return COMPOSITES;
 tempdir                                                        return TEMPDIR;
 pidfile                                                        return PIDFILE;
@@ -76,7 +76,7 @@ enabled                                                       return ENABLED;
 delivery                                               return DELIVERY;
 agent                                                  return AGENT;
 
-classifier                                             BEGIN(classifier); return CLASSIFIER;
+classifier                                             BEGIN(classifier_lex_state); return CLASSIFIER;
 
 logging                                                        return LOGGING;
 
@@ -155,67 +155,67 @@ yes|YES|no|NO|[yY]|[nN]                   yylval.flag=parse_flag(yytext); return FLAG;
                }
 }
 
-<module>\n                                                             /* ignore EOL */;
-<module>[ \t]+                                                 /* ignore whitespace */;
-<module>[ \t]*#.*                                              /* ignore comments */;
-<module>\'[a-zA-Z0-9_-]+\'     yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; 
-<module>\{     nested_depth ++; return OBRACE;
-<module>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
-<module>\;     return SEMICOLON;
-<module>=      return EQSIGN;
-<module>\$[a-zA-Z_][a-zA-Z0-9_]+               yylval.string=strdup(yytext + 1); return VARIABLE;
-<module>[a-zA-Z0-9_%-]+        yylval.string=strdup(yytext); return PARAM;
-<module>\".+[^\\]\"    yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
-
-<worker>\n                                                             /* ignore EOL */;
-<worker>[ \t]+                                                 /* ignore whitespace */;
-<worker>[ \t]*#.*                                              /* ignore comments */;
-<worker>\{     nested_depth ++; return OBRACE;
-<worker>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
-<worker>\;     return SEMICOLON;
-<worker>=      return EQSIGN;
-<worker>type                                                   return TYPE;
-<worker>bind_socket                                            return BINDSOCK;
-<worker>count                                                  return COUNT;
-<worker>[0-9]+                                                 yylval.number=strtol(yytext, NULL, 10); return NUMBER;
-<worker>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR;
-<worker>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2}     yylval.string=strdup(yytext); return IPNETWORK;
-<worker>[*a-zA-Z0-9.-]+:[0-9]{1,5}             yylval.string=strdup(yytext); return HOSTPORT;
-<worker>[a-zA-Z<][a-zA-Z@+>_-]*         yylval.string=strdup(yytext); return STRING;
-<worker>\$[a-zA-Z_][a-zA-Z0-9_]+               yylval.string=strdup(yytext + 1); return VARIABLE;
-<worker>\".+[^\\]\"    yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
-
-<classifier>\n                                                         /* ignore EOL */;
-<classifier>[ \t]+                                                     /* ignore whitespace */;
-<classifier>[ \t]*#.*                                          /* ignore comments */;
-<classifier>\{                             nested_depth ++; return OBRACE;
-<classifier>\}                              if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
-<classifier>\;                             return SEMICOLON;
-<classifier>=                              return EQSIGN;
-<classifier>type                                                       return TYPE;
-<classifier>bind_socket                                                return BINDSOCK;
-<classifier>count                                                      return COUNT;
-<classifier>statfile                                           return STATFILE;
-<classifier>symbol                                                     return SYMBOL;
-<classifier>path                                                       return PATH;
-<classifier>size                                                       return SIZE;
-<classifier>tokenizer                                          return TOKENIZER;
-<classifier>section                                                    return SECTION;
-<classifier>autolearn                                          return AUTOLEARN;
-<classifier>min_mark                                           return MIN_MARK;
-<classifier>max_mark                                           return MAX_MARK;
-<classifier>[0-9]+                                                     yylval.number=strtol(yytext, NULL, 10); return NUMBER;
-<classifier>-?[0-9]+\.?[0-9]*                          yylval.fract=strtod(yytext, NULL); return FRACT;
-<classifier>[0-9]+[kKmMgG]?                                    yylval.limit=parse_limit(yytext); return SIZELIMIT;
-<classifier>\$[a-zA-Z_][a-zA-Z0-9_]+           yylval.string=strdup(yytext + 1); return VARIABLE;
-<classifier>[a-zA-Z0-9_%-]+                    yylval.string=strdup(yytext); return PARAM;
-<classifier>\".+[^\\]\"        yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
-
-<lua>\n                                                                        /* ignore EOL */;
-<lua>[ \t]+                                                            /* ignore whitespace */;
-<lua>[ \t]*#.*                                                 /* ignore comments */;
-<lua>^.endlua$                                                 BEGIN(INITIAL);
-<lua>.*                                                                        add_luabuf(yytext); return LUACODE;
+<module_lex_state>\n                                                           /* ignore EOL */;
+<module_lex_state>[ \t]+                                                       /* ignore whitespace */;
+<module_lex_state>[ \t]*#.*                                            /* ignore comments */;
+<module_lex_state>\'[a-zA-Z0-9_-]+\'   yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; 
+<module_lex_state>\{   nested_depth ++; return OBRACE;
+<module_lex_state>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<module_lex_state>\;   return SEMICOLON;
+<module_lex_state>=    return EQSIGN;
+<module_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+             yylval.string=strdup(yytext + 1); return VARIABLE;
+<module_lex_state>[a-zA-Z0-9_%-]+      yylval.string=strdup(yytext); return PARAM;
+<module_lex_state>\".+[^\\]\"  yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
+<worker_lex_state>\n                                                           /* ignore EOL */;
+<worker_lex_state>[ \t]+                                                       /* ignore whitespace */;
+<worker_lex_state>[ \t]*#.*                                            /* ignore comments */;
+<worker_lex_state>\{   nested_depth ++; return OBRACE;
+<worker_lex_state>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<worker_lex_state>\;   return SEMICOLON;
+<worker_lex_state>=    return EQSIGN;
+<worker_lex_state>type                                                 return TYPE;
+<worker_lex_state>bind_socket                                          return BINDSOCK;
+<worker_lex_state>count                                                        return COUNT;
+<worker_lex_state>[0-9]+                                                       yylval.number=strtol(yytext, NULL, 10); return NUMBER;
+<worker_lex_state>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}       yylval.string=strdup(yytext); return IPADDR;
+<worker_lex_state>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2}   yylval.string=strdup(yytext); return IPNETWORK;
+<worker_lex_state>[*a-zA-Z0-9.-]+:[0-9]{1,5}           yylval.string=strdup(yytext); return HOSTPORT;
+<worker_lex_state>[a-zA-Z<][a-zA-Z@+>_-]*         yylval.string=strdup(yytext); return STRING;
+<worker_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+             yylval.string=strdup(yytext + 1); return VARIABLE;
+<worker_lex_state>\".+[^\\]\"  yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
+<classifier_lex_state>\n                                                       /* ignore EOL */;
+<classifier_lex_state>[ \t]+                                           /* ignore whitespace */;
+<classifier_lex_state>[ \t]*#.*                                                /* ignore comments */;
+<classifier_lex_state>\{                               nested_depth ++; return OBRACE;
+<classifier_lex_state>\}                            if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<classifier_lex_state>\;                            return SEMICOLON;
+<classifier_lex_state>=                                    return EQSIGN;
+<classifier_lex_state>type                                                     return TYPE;
+<classifier_lex_state>bind_socket                                      return BINDSOCK;
+<classifier_lex_state>count                                                    return COUNT;
+<classifier_lex_state>statfile                                         return STATFILE;
+<classifier_lex_state>symbol                                           return SYMBOL;
+<classifier_lex_state>path                                                     return PATH;
+<classifier_lex_state>size                                                     return SIZE;
+<classifier_lex_state>tokenizer                                                return TOKENIZER;
+<classifier_lex_state>section                                          return SECTION;
+<classifier_lex_state>autolearn                                                return AUTOLEARN;
+<classifier_lex_state>min_mark                                         return MIN_MARK;
+<classifier_lex_state>max_mark                                         return MAX_MARK;
+<classifier_lex_state>[0-9]+                                                   yylval.number=strtol(yytext, NULL, 10); return NUMBER;
+<classifier_lex_state>-?[0-9]+\.?[0-9]*                                yylval.fract=strtod(yytext, NULL); return FRACT;
+<classifier_lex_state>[0-9]+[kKmMgG]?                                  yylval.limit=parse_limit(yytext); return SIZELIMIT;
+<classifier_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+         yylval.string=strdup(yytext + 1); return VARIABLE;
+<classifier_lex_state>[a-zA-Z0-9_%-]+                  yylval.string=strdup(yytext); return PARAM;
+<classifier_lex_state>\".+[^\\]\"      yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
+<lua_lex_state>\n                                                                      /* ignore EOL */;
+<lua_lex_state>[ \t]+                                                          /* ignore whitespace */;
+<lua_lex_state>[ \t]*#.*                                                       /* ignore comments */;
+<lua_lex_state>^.endlua$                                                       BEGIN(INITIAL);
+<lua_lex_state>.*                                                                      add_luabuf(yytext); return LUACODE;
 
 %%
 /*