]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add more meta-tokens to bayes
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 3 Sep 2016 15:31:40 +0000 (16:31 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 3 Sep 2016 15:33:46 +0000 (16:33 +0100)
src/libstat/stat_process.c

index 00e7ded4c150df6becac22da5c8a45506664b182..0d3795f4d7a388704924481d359627404f575bbb 100644 (file)
@@ -22,7 +22,8 @@
 #include "libmime/images.h"
 #include "libserver/html.h"
 #include "lua/lua_common.h"
-#include <utlist.h>
+#include "utlist.h"
+#include <math.h>
 
 #define RSPAMD_CLASSIFY_OP 0
 #define RSPAMD_LEARN_OP 1
@@ -74,6 +75,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
        GArray *ar;
        rspamd_ftok_t elt;
        guint i;
+       gchar tmpbuf[128];
 
        ar = g_array_sized_new (FALSE, FALSE, sizeof (elt), 4);
 
@@ -123,6 +125,14 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
                                msg_debug_task ("added stat tokens for mime boundary '%s'", elt.begin);
                                g_array_append_val (ar, elt);
                        }
+
+                       if (part->content && part->content->len > 1) {
+                               rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "mime%d:%dlog",
+                                               (gint)log2 (part->content->len));
+                               elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf);
+                               elt.len = strlen (elt.begin);
+                               g_array_append_val (ar, elt);
+                       }
                }
        }
 
@@ -152,6 +162,23 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
                cur = g_list_next (cur);
        }
 
+       /* Size meta-token */
+       if (task->msg.len > 1) {
+               rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "size%dlog",
+                               (gint)log2 (task->msg.len));
+               elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf);
+               elt.len = strlen (elt.begin);
+               g_array_append_val (ar, elt);
+       }
+       /* Number recipients */
+       if (task->rcpt_envelope && task->rcpt_envelope->len > 0) {
+               rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "recipients%d",
+                               task->rcpt_envelope->len);
+               elt.begin = rspamd_mempool_strdup (task->task_pool, tmpbuf);
+               elt.len = strlen (elt.begin);
+               g_array_append_val (ar, elt);
+       }
+
        st_ctx->tokenizer->tokenize_func (st_ctx,
                        task->task_pool,
                        ar,