]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add support of words regexps
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Nov 2018 11:16:45 +0000 (11:16 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Nov 2018 11:16:45 +0000 (11:16 +0000)
src/libmime/mime_expressions.c
src/libserver/re_cache.c
src/libserver/re_cache.h

index 596d959c6e25680ea798d14dc800601caace47c9..c6d258c494e17eaed62d02b046f2f24c23b680de 100644 (file)
@@ -239,6 +239,10 @@ rspamd_parse_long_option (const gchar *start, gsize len,
                ret = TRUE;
                a->type = RSPAMD_RE_SARAWBODY;
        }
+       else if (TYPE_CHECK (start, "words", len)) {
+               ret = TRUE;
+               a->type = RSPAMD_RE_WORDS;
+       }
        else if (TYPE_CHECK (start, "selector", len)) {
                ret = TRUE;
                a->type = RSPAMD_RE_SELECTOR;
index 64f53773d25631902623fb9eeb5bfa558ac0f654..7b7cabb69651ded97185a54e55388d8685051e22 100644 (file)
@@ -23,6 +23,7 @@
 #include "libutil/util.h"
 #include "libutil/regexp.h"
 #include "lua/lua_common.h"
+#include "libstat/stat_api.h"
 
 #include "khash.h"
 
@@ -1199,6 +1200,46 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                        g_free (lenvec);
                }
                break;
+       case RSPAMD_RE_WORDS:
+               if (task->text_parts->len > 0) {
+                       cnt = 0;
+
+                       PTR_ARRAY_FOREACH (task->text_parts, i, part) {
+                               if (part->utf_words) {
+                                       cnt += part->utf_words->len;
+                               }
+                       }
+
+                       if (cnt > 0) {
+                               scvec = g_malloc (sizeof (*scvec) * cnt);
+                               lenvec = g_malloc (sizeof (*lenvec) * cnt);
+
+                               cnt = 0;
+
+                               PTR_ARRAY_FOREACH (task->text_parts, i, part) {
+                                       guint j;
+                                       rspamd_stat_token_t *tok;
+
+
+                                       if (part->utf_words) {
+                                               for (j = 0; j < part->utf_words->len; j ++) {
+                                                       tok = &g_array_index (part->utf_words, rspamd_stat_token_t, j);
+                                                       scvec[cnt] = tok->begin;
+                                                       lenvec[cnt++] = tok->len;
+                                               }
+                                       }
+                               }
+
+                               ret = rspamd_re_cache_process_regexp_data (rt, re,
+                                               task, scvec, lenvec, cnt, TRUE);
+
+                               msg_debug_re_task ("checking sa words regexp: %s -> %d",
+                                               rspamd_regexp_get_pattern (re), ret);
+                               g_free (scvec);
+                               g_free (lenvec);
+                       }
+               }
+               break;
        case RSPAMD_RE_SELECTOR:
                if (rspamd_re_cache_process_selector (task, rt,
                                re_class->type_data,
index c14b29ef0d8a76f80b9bec70e57bb307cde691d9..596ea08c271313de623973e66cf012b2849c78da 100644 (file)
@@ -35,6 +35,7 @@ enum rspamd_re_type {
        RSPAMD_RE_BODY, /* full in SA */
        RSPAMD_RE_SABODY, /* body in SA */
        RSPAMD_RE_SARAWBODY, /* rawbody in SA */
+       RSPAMD_RE_WORDS, /* normalized words */
        RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */
        RSPAMD_RE_MAX
 };