]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Use hits from the cache in re cache
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 9 Dec 2015 14:54:50 +0000 (14:54 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 9 Dec 2015 14:54:50 +0000 (14:54 +0000)
src/libmime/mime_expressions.c
src/libserver/re_cache.c
src/libserver/re_cache.h
src/lua/lua_task.c

index 8d42bf9a86ae7fc6e2c4ad04647d4fa1a2869a0b..bc13859c1425ae189c0934636a8fd63eddcb793e 100644 (file)
@@ -342,6 +342,13 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line)
        result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
                        &err);
 
+       if (result->is_multiple) {
+               rspamd_regexp_set_maxhits (result->regexp, 0);
+       }
+       else {
+               rspamd_regexp_set_maxhits (result->regexp, 1);
+       }
+
        g_string_free (re_flags, TRUE);
 
        if (result->regexp == NULL || err != NULL) {
@@ -687,8 +694,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
                                re->type,
                                re->header,
                                strlen (re->header),
-                               re->is_strong,
-                               re->is_multiple);
+                               re->is_strong);
        }
        else {
                ret = rspamd_re_cache_process (task,
@@ -697,8 +703,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
                                re->type,
                                NULL,
                                0,
-                               re->is_strong,
-                               re->is_multiple);
+                               re->is_strong);
        }
 
        if (re->is_test) {
index c29bc7a3121e1d0dadb7eaec04aa5d7b9dde3963..c310c7fb88d350a9756ed353d528c2d7f081e3dd 100644 (file)
@@ -399,10 +399,11 @@ rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache)
 static guint
 rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
                rspamd_regexp_t *re, const guchar *in, gsize len,
-               gboolean is_raw, gboolean is_multiple)
+               gboolean is_raw)
 {
        guint r = 0;
        const gchar *start = NULL, *end = NULL;
+       guint max_hits = rspamd_regexp_get_maxhits (re);
 
        if (len == 0) {
                len = strlen (in);
@@ -421,7 +422,7 @@ rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
                        NULL)) {
                r++;
 
-               if (!is_multiple || r >= 0xFF) {
+               if (max_hits > 0 && r > max_hits) {
                        break;
                }
        }
@@ -446,29 +447,32 @@ rspamd_re_cache_hyperscan_cb (unsigned int id,
        struct rspamd_re_hyperscan_cbdata *cbdata = ud;
        struct rspamd_re_runtime *rt;
        struct rspamd_re_cache_elt *pcre_elt;
-       guint ret;
+       guint ret, maxhits;
 
        rt = cbdata->rt;
 
+       pcre_elt = g_ptr_array_index (rt->cache->re, id);
 
        if (flags & HS_FLAG_PREFILTER) {
                if (!isset (rt->checked, id)) {
                        /* We need to match the corresponding pcre first */
-                       pcre_elt = g_ptr_array_index (rt->cache->re, id);
                        ret = rspamd_re_cache_process_pcre (rt,
                                        pcre_elt->re,
                                        cbdata->in + from,
                                        to - from,
-                                       FALSE,
-                                       TRUE);
+                                       FALSE);
 
                        setbit (rt->checked, id);
                        rt->results[id] = ret;
                }
        }
        else {
+               maxhits = rspamd_regexp_get_maxhits (pcre_elt->re);
                setbit (rt->checked, id);
-               rt->results[id] ++;
+
+               if (maxhits == 0 || rt->results[id] < maxhits) {
+                       rt->results[id]++;
+               }
        }
 
        return 0;
@@ -479,7 +483,7 @@ static guint
 rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
                rspamd_regexp_t *re,
                const guchar *in, gsize len,
-               gboolean is_raw, gboolean is_multiple)
+               gboolean is_raw)
 {
        struct rspamd_re_cache_elt *elt;
        struct rspamd_re_class *re_class;
@@ -491,14 +495,14 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
        re_class = rspamd_regexp_get_class (re);
 
 #ifndef WITH_HYPERSCAN
-       ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+       ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw);
        setbit (rt->checked, re_id);
        rt->results[re_id] = ret;
 #else
        struct rspamd_re_hyperscan_cbdata cbdata;
 
        if (elt->match_type == RSPAMD_RE_CACHE_PCRE) {
-               ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+               ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw);
                setbit (rt->checked, re_id);
                rt->results[re_id] = ret;
        }
@@ -560,8 +564,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                struct rspamd_re_runtime *rt,
                rspamd_regexp_t *re,
                struct rspamd_re_class *re_class,
-               gboolean is_strong,
-               gboolean is_multiple)
+               gboolean is_strong)
 {
        guint ret = 0, i;
        GList *cur, *headerlist;
@@ -604,7 +607,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                /* Match re */
                                if (in) {
                                        ret += rspamd_re_cache_process_regexp_data (rt, re, in,
-                                                       strlen (in), raw, is_multiple);
+                                                       strlen (in), raw);
                                        debug_task ("checking header %s regexp: %s -> %d",
                                                        re_class->type_data,
                                                        rspamd_regexp_get_pattern (re), ret);
@@ -619,7 +622,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                in = task->raw_headers_content.begin;
                len = task->raw_headers_content.len;
                ret = rspamd_re_cache_process_regexp_data (rt, re, in,
-                               len, raw, is_multiple);
+                               len, raw);
                debug_task ("checking allheader regexp: %s -> %d",
                                rspamd_regexp_get_pattern (re), ret);
                break;
@@ -649,7 +652,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 
                        if (len > 0) {
                                ret += rspamd_re_cache_process_regexp_data (rt, re, in,
-                                               len, raw, is_multiple);
+                                               len, raw);
                                debug_task ("checking mime regexp: %s -> %d",
                                                rspamd_regexp_get_pattern (re), ret);
                        }
@@ -665,7 +668,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                        raw = FALSE;
 
                        ret += rspamd_re_cache_process_regexp_data (rt, re, in,
-                                       len, raw, is_multiple);
+                                       len, raw);
                }
 
                g_hash_table_iter_init (&it, task->emails);
@@ -677,7 +680,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                        raw = FALSE;
 
                        ret += rspamd_re_cache_process_regexp_data (rt, re, in,
-                                       len, raw, is_multiple);
+                                       len, raw);
                }
 
                debug_task ("checking url regexp: %s -> %d",
@@ -689,7 +692,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                len = task->msg.len;
 
                ret = rspamd_re_cache_process_regexp_data (rt, re, in,
-                               len, raw, is_multiple);
+                               len, raw);
                debug_task ("checking rawbody regexp: %s -> %d",
                                rspamd_regexp_get_pattern (re), ret);
                break;
@@ -711,8 +714,7 @@ rspamd_re_cache_process (struct rspamd_task *task,
                enum rspamd_re_type type,
                gpointer type_data,
                gsize datalen,
-               gboolean is_strong,
-               gboolean is_multiple)
+               gboolean is_strong)
 {
        guint64 re_id;
        struct rspamd_re_class *re_class;
@@ -733,12 +735,7 @@ rspamd_re_cache_process (struct rspamd_task *task,
 
        if (isset (rt->checked, re_id)) {
                /* Fast path */
-               if (is_multiple) {
-                       return rt->results[re_id];
-               }
-               else {
-                       return rt->results[re_id] ? 1 : 0;
-               }
+               return rt->results[re_id];
        }
        else {
                /* Slow path */
@@ -751,7 +748,7 @@ rspamd_re_cache_process (struct rspamd_task *task,
                }
 
                return rspamd_re_cache_exec_re (task, rt, re, re_class,
-                               is_strong, is_multiple);
+                               is_strong);
        }
 
        return 0;
@@ -999,12 +996,19 @@ rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
 
                        hs_flags[i] = 0;
                        pcre_flags = rspamd_regexp_get_pcre_flags (re);
+
                        if (pcre_flags & PCRE_UTF8) {
                                hs_flags[i] |= HS_FLAG_UTF8;
                        }
                        if (pcre_flags & PCRE_CASELESS) {
                                hs_flags[i] |= HS_FLAG_CASELESS;
                        }
+                       if (pcre_flags & PCRE_MULTILINE) {
+                               hs_flags[i] |= HS_FLAG_MULTILINE;
+                       }
+                       if (rspamd_regexp_get_maxhits (re) == 1) {
+                               hs_flags[i] |= HS_FLAG_SINGLEMATCH;
+                       }
 
                        if (hs_compile (rspamd_regexp_get_pattern (re),
                                        hs_flags[i],
index 3100565157c4e85e5e02ce12695d37336f958386..c812b8ef325c869930a4e663d2d9c4913a80c419 100644 (file)
@@ -86,7 +86,6 @@ struct rspamd_re_runtime* rspamd_re_cache_runtime_new (struct rspamd_re_cache *c
  * @param type_data associated data with the type (e.g. header name)
  * @param datalen associated data length
  * @param is_strong use case sensitive match when looking for headers
- * @param is_multiple return multiple possible occurrences of the specified re
  */
 gint rspamd_re_cache_process (struct rspamd_task *task,
                struct rspamd_re_runtime *rt,
@@ -94,8 +93,7 @@ gint rspamd_re_cache_process (struct rspamd_task *task,
                enum rspamd_re_type type,
                gpointer type_data,
                gsize datalen,
-               gboolean is_strong,
-               gboolean is_multiple);
+               gboolean is_strong);
 
 /**
  * Destroy runtime data
index 8246d921b3ddf6faf7bd4d0ec9967db0695306c6..8529489f6e4da821e7ba5d5a1af8d2cd668069cd 100644 (file)
@@ -440,7 +440,6 @@ LUA_FUNCTION_DEF (task, set_settings);
  *   + `url`: url regexp
  * - `header`: for header and rawheader regexp means the name of header
  * - `strong`: case sensitive match for headers
- * - `multiple`: allow multiple matches
  * @return {number} number of regexp occurences in the task (limited by 255 so far)
  */
 LUA_FUNCTION_DEF (task, process_regexp);
@@ -2007,7 +2006,7 @@ lua_task_process_regexp (lua_State *L)
 {
        struct rspamd_task *task = lua_check_task (L, 1);
        struct rspamd_lua_regexp *re = NULL;
-       gboolean strong = FALSE, multiple = FALSE;
+       gboolean strong = FALSE;
        const gchar *type_str = NULL, *header_str = NULL;
        gsize header_len = 0;
        GError *err = NULL;
@@ -2024,13 +2023,12 @@ lua_task_process_regexp (lua_State *L)
         *   + `url`: url regexp
         * - `header`: for header and rawheader regexp means the name of header
         * - `strong`: case sensitive match for headers
-        * - `multiple`: allow multiple matches
         */
        if (task != NULL) {
                if (!rspamd_lua_parse_table_arguments (L, 2, &err,
-                                       "*re=U{regexp};*type=S;header=V;strong=B;multiple=B",
+                                       "*re=U{regexp};*type=S;header=V;strong=B",
                                        &re, &type_str, &header_len, &header_str,
-                                       &strong, &multiple)) {
+                                       &strong)) {
                        msg_err_task ("cannot get parameters list: %e", err);
 
                        if (err) {
@@ -2047,7 +2045,7 @@ lua_task_process_regexp (lua_State *L)
                        }
                        else {
                                ret = rspamd_re_cache_process (task, task->re_rt, re->re, type,
-                                               (gpointer) header_str, header_len, strong, multiple);
+                                               (gpointer) header_str, header_len, strong);
                        }
                }
        }