From: Vsevolod Stakhov Date: Mon, 12 Nov 2018 11:16:45 +0000 (+0000) Subject: [Feature] Add support of words regexps X-Git-Tag: 1.8.2~54 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=40958cbc99827ff63ba60f5b97c00104ecb47661;p=thirdparty%2Frspamd.git [Feature] Add support of words regexps --- diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index 596d959c6e..c6d258c494 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -239,6 +239,10 @@ rspamd_parse_long_option (const gchar *start, gsize len, ret = TRUE; a->type = RSPAMD_RE_SARAWBODY; } + else if (TYPE_CHECK (start, "words", len)) { + ret = TRUE; + a->type = RSPAMD_RE_WORDS; + } else if (TYPE_CHECK (start, "selector", len)) { ret = TRUE; a->type = RSPAMD_RE_SELECTOR; diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 64f53773d2..7b7cabb696 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -23,6 +23,7 @@ #include "libutil/util.h" #include "libutil/regexp.h" #include "lua/lua_common.h" +#include "libstat/stat_api.h" #include "khash.h" @@ -1199,6 +1200,46 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, g_free (lenvec); } break; + case RSPAMD_RE_WORDS: + if (task->text_parts->len > 0) { + cnt = 0; + + PTR_ARRAY_FOREACH (task->text_parts, i, part) { + if (part->utf_words) { + cnt += part->utf_words->len; + } + } + + if (cnt > 0) { + scvec = g_malloc (sizeof (*scvec) * cnt); + lenvec = g_malloc (sizeof (*lenvec) * cnt); + + cnt = 0; + + PTR_ARRAY_FOREACH (task->text_parts, i, part) { + guint j; + rspamd_stat_token_t *tok; + + + if (part->utf_words) { + for (j = 0; j < part->utf_words->len; j ++) { + tok = &g_array_index (part->utf_words, rspamd_stat_token_t, j); + scvec[cnt] = tok->begin; + lenvec[cnt++] = tok->len; + } + } + } + + ret = rspamd_re_cache_process_regexp_data (rt, re, + task, scvec, lenvec, cnt, TRUE); + + msg_debug_re_task ("checking sa words regexp: %s -> %d", + rspamd_regexp_get_pattern (re), ret); + g_free (scvec); + g_free (lenvec); + } + } + break; case RSPAMD_RE_SELECTOR: if (rspamd_re_cache_process_selector (task, rt, re_class->type_data, diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h index c14b29ef0d..596ea08c27 100644 --- a/src/libserver/re_cache.h +++ b/src/libserver/re_cache.h @@ -35,6 +35,7 @@ enum rspamd_re_type { RSPAMD_RE_BODY, /* full in SA */ RSPAMD_RE_SABODY, /* body in SA */ RSPAMD_RE_SARAWBODY, /* rawbody in SA */ + RSPAMD_RE_WORDS, /* normalized words */ RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */ RSPAMD_RE_MAX };