]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Implement scoped regexp cache system
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 27 Jun 2025 16:16:00 +0000 (17:16 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 27 Jun 2025 16:16:00 +0000 (17:16 +0100)
src/libserver/re_cache.c
src/libserver/re_cache.h
src/lua/lua_config.c

index 50b155ae047ba6128895b534c81f979d529a86a3..25ed72948d91e5faf9c5ca4ca69b8bb0dd746619 100644 (file)
@@ -91,6 +91,7 @@ struct rspamd_re_class {
        gsize type_len;
        GHashTable *re;
        rspamd_cryptobox_hash_state_t *st;
+       struct rspamd_re_cache *cache; /* Back-reference to owning cache */
 
        char hash[rspamd_cryptobox_HASHBYTES + 1];
 
@@ -126,6 +127,11 @@ struct rspamd_re_cache {
        unsigned int max_re_data;
        char hash[rspamd_cryptobox_HASHBYTES + 1];
        lua_State *L;
+
+       /* Intrusive linked list for scoped caches */
+       struct rspamd_re_cache *next, *prev;
+       char *scope;
+
 #ifdef WITH_HYPERSCAN
        enum rspamd_hyperscan_status hyperscan_loaded;
        gboolean disable_hyperscan;
@@ -149,6 +155,9 @@ struct rspamd_re_runtime {
        struct rspamd_re_cache *cache;
        struct rspamd_re_cache_stat stat;
        gboolean has_hs;
+
+       /* Linked list for multiple scoped runtimes */
+       struct rspamd_re_runtime *next, *prev;
 };
 
 static GQuark
@@ -174,6 +183,62 @@ rspamd_re_cache_class_id(enum rspamd_re_type type,
        return rspamd_cryptobox_fast_hash_final(&st);
 }
 
+static struct rspamd_re_cache *
+rspamd_re_cache_find_by_scope(struct rspamd_re_cache *cache_head, const char *scope)
+{
+       struct rspamd_re_cache *cur;
+
+       if (!cache_head) {
+               return NULL;
+       }
+
+       DL_FOREACH(cache_head, cur)
+       {
+               if (scope == NULL && cur->scope == NULL) {
+                       /* Looking for default scope */
+                       return cur;
+               }
+               else if (scope != NULL && cur->scope != NULL && strcmp(cur->scope, scope) == 0) {
+                       return cur;
+               }
+       }
+
+       return NULL;
+}
+
+static struct rspamd_re_cache *
+rspamd_re_cache_add_to_scope_list(struct rspamd_re_cache **cache_head, const char *scope)
+{
+       struct rspamd_re_cache *new_cache, *existing;
+
+       if (!cache_head) {
+               return NULL;
+       }
+
+       /* Check if scope already exists */
+       existing = rspamd_re_cache_find_by_scope(*cache_head, scope);
+       if (existing) {
+               return existing;
+       }
+
+       /* Create new cache for this scope */
+       new_cache = rspamd_re_cache_new();
+       if (new_cache->scope) {
+               g_free(new_cache->scope);
+       }
+       new_cache->scope = g_strdup(scope);
+
+       /* Add to linked list */
+       if (*cache_head) {
+               DL_APPEND(*cache_head, new_cache);
+       }
+       else {
+               *cache_head = new_cache;
+       }
+
+       return new_cache;
+}
+
 static void
 rspamd_re_cache_destroy(struct rspamd_re_cache *cache)
 {
@@ -230,6 +295,11 @@ rspamd_re_cache_destroy(struct rspamd_re_cache *cache)
 
        g_hash_table_unref(cache->re_classes);
        g_ptr_array_free(cache->re, TRUE);
+
+       if (cache->scope) {
+               g_free(cache->scope);
+       }
+
        g_free(cache);
 }
 
@@ -252,6 +322,8 @@ rspamd_re_cache_new(void)
        cache->nre = 0;
        cache->re = g_ptr_array_new_full(256, rspamd_re_cache_elt_dtor);
        cache->selectors = kh_init(lua_selectors_hash);
+       cache->next = cache->prev = NULL;
+       cache->scope = NULL; /* Default scope */
 #ifdef WITH_HYPERSCAN
        cache->hyperscan_loaded = RSPAMD_HYPERSCAN_UNKNOWN;
 #endif
@@ -295,6 +367,7 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache,
                re_class->id = class_id;
                re_class->type_len = datalen;
                re_class->type = type;
+               re_class->cache = cache; /* Set back-reference */
                re_class->re = g_hash_table_new_full(rspamd_regexp_hash,
                                                                                         rspamd_regexp_equal, NULL, (GDestroyNotify) rspamd_regexp_unref);
 
@@ -330,6 +403,26 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache,
        return nre;
 }
 
+rspamd_regexp_t *
+rspamd_re_cache_add_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+                                                  rspamd_regexp_t *re, enum rspamd_re_type type,
+                                                  gconstpointer type_data, gsize datalen,
+                                                  int lua_cbref)
+{
+       struct rspamd_re_cache *cache;
+
+       g_assert(cache_head != NULL);
+       g_assert(re != NULL);
+
+       /* NULL scope is allowed for default scope */
+       cache = rspamd_re_cache_add_to_scope_list(cache_head, scope);
+       if (!cache) {
+               return NULL;
+       }
+
+       return rspamd_re_cache_add(cache, re, type, type_data, datalen, lua_cbref);
+}
+
 void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
                                                         rspamd_regexp_t *what,
                                                         rspamd_regexp_t *with)
@@ -371,6 +464,23 @@ void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
        }
 }
 
+void rspamd_re_cache_replace_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+                                                                       rspamd_regexp_t *what,
+                                                                       rspamd_regexp_t *with)
+{
+       struct rspamd_re_cache *cache;
+
+       g_assert(cache_head != NULL);
+       g_assert(what != NULL);
+       g_assert(with != NULL);
+
+       /* NULL scope is allowed for default scope */
+       cache = rspamd_re_cache_find_by_scope(*cache_head, scope);
+       if (cache) {
+               rspamd_re_cache_replace(cache, what, with);
+       }
+}
+
 static int
 rspamd_re_cache_sort_func(gconstpointer a, gconstpointer b)
 {
@@ -515,8 +625,21 @@ void rspamd_re_cache_init(struct rspamd_re_cache *cache, struct rspamd_config *c
 #endif
 }
 
-struct rspamd_re_runtime *
-rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
+void rspamd_re_cache_init_scoped(struct rspamd_re_cache *cache_head,
+                                                                struct rspamd_config *cfg)
+{
+       struct rspamd_re_cache *cur;
+
+       g_assert(cache_head != NULL);
+
+       DL_FOREACH(cache_head, cur)
+       {
+               rspamd_re_cache_init(cur, cfg);
+       }
+}
+
+static struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new_single(struct rspamd_re_cache *cache)
 {
        struct rspamd_re_runtime *rt;
        g_assert(cache != NULL);
@@ -534,6 +657,58 @@ rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
        return rt;
 }
 
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
+{
+       struct rspamd_re_runtime *rt_head = NULL, *rt;
+       struct rspamd_re_cache *cur;
+
+       g_assert(cache != NULL);
+
+       /*
+        * Create runtime for all scopes in the chain.
+        * This ensures task has runtimes for all available scopes.
+        */
+       DL_FOREACH(cache, cur)
+       {
+               rt = rspamd_re_cache_runtime_new_single(cur);
+               if (rt) {
+                       if (rt_head) {
+                               DL_APPEND(rt_head, rt);
+                       }
+                       else {
+                               rt_head = rt;
+                       }
+               }
+       }
+
+       return rt_head;
+}
+
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new_all_scopes(struct rspamd_re_cache *cache_head)
+{
+       /* This is now the same as the main function since it always creates for all scopes */
+       return rspamd_re_cache_runtime_new(cache_head);
+}
+
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new_scoped(struct rspamd_re_cache *cache_head, const char *scope)
+{
+       struct rspamd_re_cache *cache;
+
+       if (!cache_head) {
+               return NULL;
+       }
+
+       cache = rspamd_re_cache_find_by_scope(cache_head, scope);
+       if (!cache) {
+               return NULL;
+       }
+
+       return rspamd_re_cache_runtime_new_single(cache);
+}
+
 const struct rspamd_re_cache_stat *
 rspamd_re_cache_get_stat(struct rspamd_re_runtime *rt)
 {
@@ -1503,20 +1678,20 @@ rspamd_re_cache_exec_re(struct rspamd_task *task,
        return rt->results[re_id];
 }
 
-int rspamd_re_cache_process(struct rspamd_task *task,
-                                                       rspamd_regexp_t *re,
-                                                       enum rspamd_re_type type,
-                                                       gconstpointer type_data,
-                                                       gsize datalen,
-                                                       gboolean is_strong)
+static int
+rspamd_re_cache_process_single(struct rspamd_task *task,
+                                                          struct rspamd_re_runtime *rt,
+                                                          rspamd_regexp_t *re,
+                                                          enum rspamd_re_type type,
+                                                          gconstpointer type_data,
+                                                          gsize datalen,
+                                                          gboolean is_strong)
 {
        uint64_t re_id;
        struct rspamd_re_class *re_class;
        struct rspamd_re_cache *cache;
-       struct rspamd_re_runtime *rt;
 
        g_assert(task != NULL);
-       rt = task->re_rt;
        g_assert(rt != NULL);
        g_assert(re != NULL);
 
@@ -1551,6 +1726,53 @@ int rspamd_re_cache_process(struct rspamd_task *task,
        return 0;
 }
 
+int rspamd_re_cache_process(struct rspamd_task *task,
+                                                       rspamd_regexp_t *re,
+                                                       enum rspamd_re_type type,
+                                                       gconstpointer type_data,
+                                                       gsize datalen,
+                                                       gboolean is_strong)
+{
+       struct rspamd_re_runtime *rt_list, *rt;
+       struct rspamd_re_class *re_class;
+       struct rspamd_re_cache *target_cache;
+       int result = 0;
+
+       g_assert(task != NULL);
+       g_assert(re != NULL);
+
+       rt_list = task->re_rt;
+       if (!rt_list) {
+               return 0;
+       }
+
+       /*
+        * Since each regexp belongs to a class which belongs to a cache,
+        * we can find the correct cache and corresponding runtime
+        */
+       re_class = rspamd_regexp_get_class(re);
+       if (!re_class) {
+               return 0;
+       }
+
+       target_cache = re_class->cache;
+       if (!target_cache) {
+               return 0;
+       }
+
+       /* Find the runtime that matches the cache */
+       DL_FOREACH(rt_list, rt)
+       {
+               if (rt->cache == target_cache) {
+                       result = rspamd_re_cache_process_single(task, rt, re, type,
+                                                                                                       type_data, datalen, is_strong);
+                       break;
+               }
+       }
+
+       return result;
+}
+
 int rspamd_re_cache_process_ffi(void *ptask,
                                                                void *pre,
                                                                int type,
@@ -1571,24 +1793,30 @@ int rspamd_re_cache_process_ffi(void *ptask,
 
 void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt)
 {
+       struct rspamd_re_runtime *cur, *tmp;
+
        g_assert(rt != NULL);
 
-       if (rt->sel_cache) {
-               struct rspamd_re_selector_result sr;
+       /* Handle linked list of runtimes */
+       DL_FOREACH_SAFE(rt, cur, tmp)
+       {
+               if (cur->sel_cache) {
+                       struct rspamd_re_selector_result sr;
 
-               kh_foreach_value(rt->sel_cache, sr, {
-                       for (unsigned int i = 0; i < sr.cnt; i++) {
-                               g_free((gpointer) sr.scvec[i]);
-                       }
+                       kh_foreach_value(cur->sel_cache, sr, {
+                               for (unsigned int i = 0; i < sr.cnt; i++) {
+                                       g_free((gpointer) sr.scvec[i]);
+                               }
 
-                       g_free(sr.scvec);
-                       g_free(sr.lenvec);
-               });
-               kh_destroy(selectors_results_hash, rt->sel_cache);
-       }
+                               g_free(sr.scvec);
+                               g_free(sr.lenvec);
+                       });
+                       kh_destroy(selectors_results_hash, cur->sel_cache);
+               }
 
-       REF_RELEASE(rt->cache);
-       g_free(rt);
+               REF_RELEASE(cur->cache);
+               g_free(cur);
+       }
 }
 
 void rspamd_re_cache_unref(struct rspamd_re_cache *cache)
@@ -1598,6 +1826,21 @@ void rspamd_re_cache_unref(struct rspamd_re_cache *cache)
        }
 }
 
+void rspamd_re_cache_unref_scoped(struct rspamd_re_cache *cache_head)
+{
+       struct rspamd_re_cache *cur, *tmp;
+
+       if (!cache_head) {
+               return;
+       }
+
+       DL_FOREACH_SAFE(cache_head, cur, tmp)
+       {
+               DL_DELETE(cache_head, cur);
+               rspamd_re_cache_unref(cur);
+       }
+}
+
 struct rspamd_re_cache *
 rspamd_re_cache_ref(struct rspamd_re_cache *cache)
 {
@@ -1620,6 +1863,23 @@ unsigned int rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, unsigned i
        return old;
 }
 
+unsigned int rspamd_re_cache_set_limit_scoped(struct rspamd_re_cache *cache_head, const char *scope, unsigned int limit)
+{
+       struct rspamd_re_cache *cache;
+       unsigned int old = 0;
+
+       if (!cache_head || !scope) {
+               return old;
+       }
+
+       cache = rspamd_re_cache_find_by_scope(cache_head, scope);
+       if (cache) {
+               old = rspamd_re_cache_set_limit(cache, limit);
+       }
+
+       return old;
+}
+
 const char *
 rspamd_re_cache_type_to_string(enum rspamd_re_type type)
 {
@@ -2257,6 +2517,55 @@ int rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
 #endif
 }
 
+int rspamd_re_cache_compile_hyperscan_scoped(struct rspamd_re_cache *cache_head,
+                                                                                        const char *cache_dir,
+                                                                                        double max_time,
+                                                                                        gboolean silent,
+                                                                                        struct ev_loop *event_loop,
+                                                                                        void (*cb)(unsigned int ncompiled, GError *err, void *cbd),
+                                                                                        void *cbd)
+{
+#ifndef WITH_HYPERSCAN
+       return -1;
+#else
+       struct rspamd_re_cache *cur;
+       int result = 0, total_compiled = 0;
+       GError *first_error = NULL;
+
+       if (!cache_head) {
+               return -1;
+       }
+
+       /*
+        * For now, compile each cache sequentially
+        * TODO: Could be made async if needed
+        */
+       DL_FOREACH(cache_head, cur)
+       {
+               result = rspamd_re_cache_compile_hyperscan(cur, cache_dir, max_time, silent,
+                                                                                                  event_loop, NULL, NULL);
+               if (result >= 0) {
+                       total_compiled += result;
+               }
+               else if (!first_error) {
+                       first_error = g_error_new(rspamd_re_cache_quark(), result,
+                                                                         "Failed to compile hyperscan for scope '%s'",
+                                                                         cur->scope ? cur->scope : "unknown");
+               }
+       }
+
+       if (cb) {
+               cb(total_compiled, first_error, cbd);
+       }
+
+       if (first_error) {
+               g_error_free(first_error);
+       }
+
+       return total_compiled;
+#endif
+}
+
 gboolean
 rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
                                                                                const char *path, gboolean silent, gboolean try_load, GError **err)
@@ -2691,6 +3000,48 @@ rspamd_re_cache_load_hyperscan(struct rspamd_re_cache *cache,
 #endif
 }
 
+enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan_scoped(
+       struct rspamd_re_cache *cache_head,
+       const char *cache_dir, bool try_load)
+{
+#ifndef WITH_HYPERSCAN
+       return RSPAMD_HYPERSCAN_UNSUPPORTED;
+#else
+       struct rspamd_re_cache *cur;
+       enum rspamd_hyperscan_status result, overall_status = RSPAMD_HYPERSCAN_UNKNOWN;
+       gboolean has_loaded = FALSE, all_loaded = TRUE;
+
+       if (!cache_head) {
+               return RSPAMD_HYPERSCAN_LOAD_ERROR;
+       }
+
+       DL_FOREACH(cache_head, cur)
+       {
+               result = rspamd_re_cache_load_hyperscan(cur, cache_dir, try_load);
+
+               if (result == RSPAMD_HYPERSCAN_LOADED_FULL ||
+                       result == RSPAMD_HYPERSCAN_LOADED_PARTIAL) {
+                       has_loaded = TRUE;
+                       if (result == RSPAMD_HYPERSCAN_LOADED_PARTIAL) {
+                               all_loaded = FALSE;
+                       }
+               }
+               else {
+                       all_loaded = FALSE;
+               }
+       }
+
+       if (has_loaded) {
+               overall_status = all_loaded ? RSPAMD_HYPERSCAN_LOADED_FULL : RSPAMD_HYPERSCAN_LOADED_PARTIAL;
+       }
+       else {
+               overall_status = RSPAMD_HYPERSCAN_LOAD_ERROR;
+       }
+
+       return overall_status;
+#endif
+}
+
 void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
                                                                  const char *sname,
                                                                  int ref)
@@ -2717,3 +3068,108 @@ void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
                kh_value(cache->selectors, k) = ref;
        }
 }
+
+void rspamd_re_cache_add_selector_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+                                                                                const char *sname, int ref)
+{
+       struct rspamd_re_cache *cache;
+
+       g_assert(cache_head != NULL);
+       g_assert(sname != NULL);
+
+       /* NULL scope is allowed for default scope */
+       cache = rspamd_re_cache_add_to_scope_list(cache_head, scope);
+       if (cache) {
+               rspamd_re_cache_add_selector(cache, sname, ref);
+       }
+}
+
+struct rspamd_re_cache *rspamd_re_cache_find_scope(struct rspamd_re_cache *cache_head, const char *scope)
+{
+       return rspamd_re_cache_find_by_scope(cache_head, scope);
+}
+
+gboolean rspamd_re_cache_remove_scope(struct rspamd_re_cache **cache_head, const char *scope)
+{
+       struct rspamd_re_cache *target;
+
+       if (!cache_head || !*cache_head) {
+               return FALSE;
+       }
+
+       /* Prevent removal of default scope (NULL) to keep head stable */
+       if (!scope) {
+               return FALSE;
+       }
+
+       target = rspamd_re_cache_find_by_scope(*cache_head, scope);
+       if (!target) {
+               return FALSE;
+       }
+
+       /* Remove from linked list */
+       DL_DELETE(*cache_head, target);
+
+       /* If this was the head and there are no more elements, update head */
+       if (target == *cache_head && !*cache_head) {
+               *cache_head = NULL;
+       }
+
+       /* Unref the cache */
+       rspamd_re_cache_unref(target);
+
+       return TRUE;
+}
+
+unsigned int rspamd_re_cache_count_scopes(struct rspamd_re_cache *cache_head)
+{
+       struct rspamd_re_cache *cur;
+       unsigned int count = 0;
+
+       if (!cache_head) {
+               return 0;
+       }
+
+       DL_COUNT(cache_head, cur, count);
+       return count;
+}
+
+char **rspamd_re_cache_get_scope_names(struct rspamd_re_cache *cache_head, unsigned int *count_out)
+{
+       struct rspamd_re_cache *cur;
+       char **names = NULL;
+       unsigned int i = 0, count = 0;
+
+       if (!cache_head || !count_out) {
+               if (count_out) {
+                       *count_out = 0;
+               }
+               return NULL;
+       }
+
+       /* First count scopes */
+       DL_COUNT(cache_head, cur, count);
+
+       if (count == 0) {
+               *count_out = 0;
+               return NULL;
+       }
+
+       /* Allocate array */
+       names = g_malloc(sizeof(char *) * count);
+
+       /* Fill array */
+       DL_FOREACH(cache_head, cur)
+       {
+               if (cur->scope) {
+                       names[i] = g_strdup(cur->scope);
+               }
+               else {
+                       names[i] = g_strdup("default");
+               }
+               i++;
+       }
+
+       *count_out = count;
+       return names;
+}
index 20b1108e0b5c457b38cb019780f18216875b3a2f..b64c7a9ab70f8228647bd40782d7bb0d9744abc5 100644 (file)
@@ -76,6 +76,22 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache, rspamd_regexp_t *re,
                                        gconstpointer type_data, gsize datalen,
                                        int lua_cbref);
 
+/**
+ * Add the existing regexp to the cache with specified scope
+ * @param cache_head head of cache list
+ * @param scope scope name
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ * @param lua_cbref optional lua callback reference for matching purposes
+ */
+rspamd_regexp_t *
+rspamd_re_cache_add_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+                                                  rspamd_regexp_t *re, enum rspamd_re_type type,
+                                                  gconstpointer type_data, gsize datalen,
+                                                  int lua_cbref);
+
 /**
  * Replace regexp in the cache with another regexp
  * @param cache cache object
@@ -86,12 +102,29 @@ void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
                                                         rspamd_regexp_t *what,
                                                         rspamd_regexp_t *with);
 
+/**
+ * Replace regexp in the scoped cache with another regexp
+ * @param cache_head head of cache list
+ * @param scope scope name
+ * @param what re to replace
+ * @param with regexp object to replace the origin
+ */
+void rspamd_re_cache_replace_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+                                                                       rspamd_regexp_t *what,
+                                                                       rspamd_regexp_t *with);
+
 /**
  * Initialize and optimize re cache structure
  */
 void rspamd_re_cache_init(struct rspamd_re_cache *cache,
                                                  struct rspamd_config *cfg);
 
+/**
+ * Initialize and optimize re cache structures for all scopes
+ */
+void rspamd_re_cache_init_scoped(struct rspamd_re_cache *cache_head,
+                                                                struct rspamd_config *cfg);
+
 enum rspamd_hyperscan_status {
        RSPAMD_HYPERSCAN_UNKNOWN = 0,
        RSPAMD_HYPERSCAN_UNSUPPORTED,
@@ -108,10 +141,21 @@ enum rspamd_hyperscan_status {
 enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded(struct rspamd_re_cache *cache);
 
 /**
- * Get runtime data for a cache
+ * Get runtime data for a cache - automatically creates runtimes for all scopes in the chain
+ * This is the main function used for task runtime creation
  */
 struct rspamd_re_runtime *rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache);
 
+/**
+ * Get runtime data for all scoped caches (same as rspamd_re_cache_runtime_new)
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new_all_scopes(struct rspamd_re_cache *cache_head);
+
+/**
+ * Get runtime data for a specific scoped cache only
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new_scoped(struct rspamd_re_cache *cache_head, const char *scope);
+
 /**
  * Get runtime statistics
  */
@@ -151,6 +195,11 @@ void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt);
  */
 void rspamd_re_cache_unref(struct rspamd_re_cache *cache);
 
+/**
+ * Unref re cache list (all scopes)
+ */
+void rspamd_re_cache_unref_scoped(struct rspamd_re_cache *cache_head);
+
 /**
  * Retain reference to re cache
  */
@@ -161,6 +210,11 @@ struct rspamd_re_cache *rspamd_re_cache_ref(struct rspamd_re_cache *cache);
  */
 unsigned int rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, unsigned int limit);
 
+/**
+ * Set limit for all regular expressions in the scoped cache, returns previous limit
+ */
+unsigned int rspamd_re_cache_set_limit_scoped(struct rspamd_re_cache *cache_head, const char *scope, unsigned int limit);
+
 /**
  * Convert re type to a human readable string (constant one)
  */
@@ -183,6 +237,17 @@ int rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
                                                                          void (*cb)(unsigned int ncompiled, GError *err, void *cbd),
                                                                          void *cbd);
 
+/**
+ * Compile expressions to the hyperscan tree and store in the `cache_dir` for all scopes
+ */
+int rspamd_re_cache_compile_hyperscan_scoped(struct rspamd_re_cache *cache_head,
+                                                                                        const char *cache_dir,
+                                                                                        double max_time,
+                                                                                        gboolean silent,
+                                                                                        struct ev_loop *event_loop,
+                                                                                        void (*cb)(unsigned int ncompiled, GError *err, void *cbd),
+                                                                                        void *cbd);
+
 /**
  * Returns TRUE if the specified file is valid hyperscan cache
  */
@@ -199,12 +264,48 @@ enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan(
        struct rspamd_re_cache *cache,
        const char *cache_dir, bool try_load);
 
+/**
+ * Loads all hyperscan regexps precompiled for all scopes
+ */
+enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan_scoped(
+       struct rspamd_re_cache *cache_head,
+       const char *cache_dir, bool try_load);
+
 /**
  * Registers lua selector in the cache
  */
 void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
                                                                  const char *sname, int ref);
 
+/**
+ * Registers lua selector in the scoped cache
+ */
+void rspamd_re_cache_add_selector_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+                                                                                const char *sname, int ref);
+
+/**
+ * Find a cache by scope name
+ */
+struct rspamd_re_cache *rspamd_re_cache_find_scope(struct rspamd_re_cache *cache_head, const char *scope);
+
+/**
+ * Remove a cache scope from the list
+ */
+gboolean rspamd_re_cache_remove_scope(struct rspamd_re_cache **cache_head, const char *scope);
+
+/**
+ * Count the number of scopes in the cache list
+ */
+unsigned int rspamd_re_cache_count_scopes(struct rspamd_re_cache *cache_head);
+
+/**
+ * Get array of scope names from the cache list
+ * @param cache_head head of cache list
+ * @param count_out pointer to store the number of scopes
+ * @return array of scope names (must be freed with g_strfreev), or NULL if no scopes
+ */
+char **rspamd_re_cache_get_scope_names(struct rspamd_re_cache *cache_head, unsigned int *count_out);
+
 #ifdef __cplusplus
 }
 #endif
index 7b3a156cd062c173589e3ed3969988158ac57dd6..0c7f5d340542aa5bdf2bc534ffac5a373a2d11f9 100644 (file)
@@ -561,6 +561,79 @@ LUA_FUNCTION_DEF(config, register_regexp);
  */
 LUA_FUNCTION_DEF(config, replace_regexp);
 
+/***
+ * @method rspamd_config:register_regexp_scoped(scope, params)
+ * Registers new re for further cached usage in a specific scope
+ * Params is the table with the following fields (mandatory fields are marked with `*`):
+ * - `re`* : regular expression object
+ * - `type`*: type of regular expression:
+ *   + `mime`: mime regexp
+ *   + `rawmime`: raw mime regexp
+ *   + `header`: header regexp
+ *   + `rawheader`: raw header expression
+ *   + `body`: raw body regexp
+ *   + `url`: url regexp
+ * - `header`: for header and rawheader regexp means the name of header
+ * - `pcre_only`: flag regexp as pcre only regexp
+ * @param {string} scope scope name for the regexp
+ * @param {table} params regexp parameters
+ */
+LUA_FUNCTION_DEF(config, register_regexp_scoped);
+
+/***
+ * @method rspamd_config:replace_regexp_scoped(scope, params)
+ * Replaces regexp with a new one in a specific scope
+ * Params is the table with the following fields (mandatory fields are marked with `*`):
+ * - `old_re`* : old regular expression object (must be in the cache)
+ * - `new_re`* : old regular expression object (must not be in the cache)
+ * - `pcre_only`: flag regexp as pcre only regexp
+ * @param {string} scope scope name for the regexp
+ * @param {table} params regexp parameters
+ */
+LUA_FUNCTION_DEF(config, replace_regexp_scoped);
+
+/***
+ * @method rspamd_config:register_re_selector_scoped(scope, name, selector_str, [delimiter, [flatten]])
+ * Registers selector with the specific name in a specific scope to use in regular expressions
+ * @param {string} scope scope name for the selector
+ * @param {string} name name of the selector
+ * @param {string} selector_str selector definition
+ * @param {string} delimiter delimiter to use when joining strings if flatten is false
+ * @param {bool} flatten if true then selector will return a table of captures instead of a single string
+ * @return true if selector has been registered
+ */
+LUA_FUNCTION_DEF(config, register_re_selector_scoped);
+
+/***
+ * @method rspamd_config:find_regexp_scope(scope)
+ * Checks if a regexp scope exists
+ * @param {string} scope scope name to check (can be nil for default scope)
+ * @return {boolean} true if scope exists
+ */
+LUA_FUNCTION_DEF(config, find_regexp_scope);
+
+/***
+ * @method rspamd_config:remove_regexp_scope(scope)
+ * Removes a regexp scope from the cache
+ * @param {string} scope scope name to remove
+ * @return {boolean} true if scope was removed successfully
+ */
+LUA_FUNCTION_DEF(config, remove_regexp_scope);
+
+/***
+ * @method rspamd_config:count_regexp_scopes()
+ * Returns the number of regexp scopes
+ * @return {number} number of scopes
+ */
+LUA_FUNCTION_DEF(config, count_regexp_scopes);
+
+/***
+ * @method rspamd_config:list_regexp_scopes()
+ * Returns a list of all regexp scope names
+ * @return {table} array of scope names (default scope is named "default")
+ */
+LUA_FUNCTION_DEF(config, list_regexp_scopes);
+
 /***
  * @method rspamd_config:register_worker_script(worker_type, script)
  * Registers the following script for workers of a specified type. The exact type
@@ -920,6 +993,13 @@ static const struct luaL_reg configlib_m[] = {
        LUA_INTERFACE_DEF(config, disable_symbol),
        LUA_INTERFACE_DEF(config, register_regexp),
        LUA_INTERFACE_DEF(config, replace_regexp),
+       LUA_INTERFACE_DEF(config, register_regexp_scoped),
+       LUA_INTERFACE_DEF(config, replace_regexp_scoped),
+       LUA_INTERFACE_DEF(config, register_re_selector_scoped),
+       LUA_INTERFACE_DEF(config, find_regexp_scope),
+       LUA_INTERFACE_DEF(config, remove_regexp_scope),
+       LUA_INTERFACE_DEF(config, count_regexp_scopes),
+       LUA_INTERFACE_DEF(config, list_regexp_scopes),
        LUA_INTERFACE_DEF(config, register_worker_script),
        LUA_INTERFACE_DEF(config, register_re_selector),
        LUA_INTERFACE_DEF(config, add_on_load),
@@ -4848,3 +4928,316 @@ lua_config_unload_custom_tokenizers(lua_State *L)
                return luaL_error(L, "invalid arguments");
        }
 }
+
+static int
+lua_config_register_regexp_scoped(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+       const char *scope = luaL_checkstring(L, 2);
+       struct rspamd_lua_regexp *re = NULL;
+       rspamd_regexp_t *cache_re;
+       const char *type_str = NULL, *header_str = NULL;
+       gsize header_len = 0;
+       GError *err = NULL;
+       enum rspamd_re_type type = RSPAMD_RE_BODY;
+       gboolean pcre_only = FALSE;
+
+       /*
+        * - `scope`*: scope name for the regexp
+        * - `re`* : regular expression object
+        * - `type`*: type of regular expression:
+        *   + `mime`: mime regexp
+        *   + `rawmime`: raw mime regexp
+        *   + `header`: header regexp
+        *   + `rawheader`: raw header expression
+        *   + `body`: raw body regexp
+        *   + `url`: url regexp
+        * - `header`: for header and rawheader regexp means the name of header
+        * - `pcre_only`: allow merely pcre for this regexp
+        */
+       if (cfg != NULL && scope != NULL) {
+               if (!rspamd_lua_parse_table_arguments(L, 3, &err,
+                                                                                         RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT,
+                                                                                         "*re=U{regexp};*type=S;header=S;pcre_only=B",
+                                                                                         &re, &type_str, &header_str, &pcre_only)) {
+                       msg_err_config("cannot get parameters list: %e", err);
+
+                       if (err) {
+                               g_error_free(err);
+                       }
+               }
+               else {
+                       type = rspamd_re_cache_type_from_string(type_str);
+
+                       if ((type == RSPAMD_RE_HEADER ||
+                                type == RSPAMD_RE_RAWHEADER ||
+                                type == RSPAMD_RE_MIMEHEADER) &&
+                               header_str == NULL) {
+                               msg_err_config(
+                                       "header argument is mandatory for header/rawheader regexps");
+                       }
+                       else {
+                               if (pcre_only) {
+                                       rspamd_regexp_set_flags(re->re,
+                                                                                       rspamd_regexp_get_flags(re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY);
+                               }
+
+                               if (header_str != NULL) {
+                                       /* Include the last \0 */
+                                       header_len = strlen(header_str) + 1;
+                               }
+
+                               cache_re = rspamd_re_cache_add_scoped(&cfg->re_cache, scope, re->re, type,
+                                                                                                         (gpointer) header_str, header_len, -1);
+
+                               /*
+                                * XXX: here are dragons!
+                                * Actually, lua regexp contains internal rspamd_regexp_t
+                                * and it owns it.
+                                * However, after this operation we have some OTHER regexp,
+                                * which we really would like to use.
+                                * So we do the following:
+                                * 1) Remove old re and unref it
+                                * 2) Replace the internal re with cached one
+                                * 3) Increase its refcount to share ownership between cache and
+                                *   lua object
+                                */
+                               if (cache_re != re->re) {
+                                       rspamd_regexp_unref(re->re);
+                                       re->re = rspamd_regexp_ref(cache_re);
+
+                                       if (pcre_only) {
+                                               rspamd_regexp_set_flags(re->re,
+                                                                                               rspamd_regexp_get_flags(re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY);
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int
+lua_config_replace_regexp_scoped(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+       const char *scope = luaL_checkstring(L, 2);
+       struct rspamd_lua_regexp *old_re = NULL, *new_re = NULL;
+       gboolean pcre_only = FALSE;
+       GError *err = NULL;
+
+       if (cfg != NULL && scope != NULL) {
+               if (!rspamd_lua_parse_table_arguments(L, 3, &err,
+                                                                                         RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT,
+                                                                                         "*old_re=U{regexp};*new_re=U{regexp};pcre_only=B",
+                                                                                         &old_re, &new_re, &pcre_only)) {
+                       int ret = luaL_error(L, "cannot get parameters list: %s",
+                                                                err ? err->message : "invalid arguments");
+
+                       if (err) {
+                               g_error_free(err);
+                       }
+
+                       return ret;
+               }
+               else {
+
+                       if (pcre_only) {
+                               rspamd_regexp_set_flags(new_re->re,
+                                                                               rspamd_regexp_get_flags(new_re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY);
+                       }
+
+                       rspamd_re_cache_replace_scoped(&cfg->re_cache, scope, old_re->re, new_re->re);
+               }
+       }
+
+       return 0;
+}
+
+static int
+lua_config_register_re_selector_scoped(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+       const char *scope = luaL_checkstring(L, 2);
+       const char *name = luaL_checkstring(L, 3);
+       const char *selector_str = luaL_checkstring(L, 4);
+       const char *delimiter = "";
+       bool flatten = false;
+       int top = lua_gettop(L);
+       bool res = false;
+
+       if (cfg && scope && name && selector_str) {
+               if (lua_gettop(L) >= 5) {
+                       delimiter = luaL_checkstring(L, 5);
+
+                       if (lua_isboolean(L, 6)) {
+                               flatten = lua_toboolean(L, 6);
+                       }
+               }
+
+               if (luaL_dostring(L, "return require \"lua_selectors\"") != 0) {
+                       msg_warn_config("cannot require lua_selectors: %s",
+                                                       lua_tostring(L, -1));
+               }
+               else {
+                       if (lua_type(L, -1) != LUA_TTABLE) {
+                               msg_warn_config("lua selectors must return "
+                                                               "table and not %s",
+                                                               lua_typename(L, lua_type(L, -1)));
+                       }
+                       else {
+                               lua_pushstring(L, "create_selector_closure");
+                               lua_gettable(L, -2);
+
+                               if (lua_type(L, -1) != LUA_TFUNCTION) {
+                                       msg_warn_config("create_selector_closure must return "
+                                                                       "function and not %s",
+                                                                       lua_typename(L, lua_type(L, -1)));
+                               }
+                               else {
+                                       int err_idx, ret;
+                                       struct rspamd_config **pcfg;
+
+                                       lua_pushcfunction(L, &rspamd_lua_traceback);
+                                       err_idx = lua_gettop(L);
+
+                                       /* Push function */
+                                       lua_pushvalue(L, -2);
+
+                                       pcfg = lua_newuserdata(L, sizeof(*pcfg));
+                                       rspamd_lua_setclass(L, rspamd_config_classname, -1);
+                                       *pcfg = cfg;
+                                       lua_pushstring(L, selector_str);
+                                       lua_pushstring(L, delimiter);
+                                       lua_pushboolean(L, flatten);
+
+                                       if ((ret = lua_pcall(L, 4, 1, err_idx)) != 0) {
+                                               msg_err_config("call to create_selector_closure lua "
+                                                                          "script failed (%d): %s",
+                                                                          ret,
+                                                                          lua_tostring(L, -1));
+                                       }
+                                       else {
+                                               if (lua_type(L, -1) != LUA_TFUNCTION) {
+                                                       msg_warn_config("create_selector_closure "
+                                                                                       "invocation must return "
+                                                                                       "function and not %s",
+                                                                                       lua_typename(L, lua_type(L, -1)));
+                                               }
+                                               else {
+                                                       ret = luaL_ref(L, LUA_REGISTRYINDEX);
+                                                       rspamd_re_cache_add_selector_scoped(&cfg->re_cache, scope,
+                                                                                                                               name, ret);
+                                                       res = true;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       lua_settop(L, top);
+       lua_pushboolean(L, res);
+
+       if (res) {
+               msg_info_config("registered regexp selector %s for scope %s", name, scope);
+       }
+
+       return 1;
+}
+
+static int
+lua_config_find_regexp_scope(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+       const char *scope = NULL;
+
+       if (cfg) {
+               if (lua_type(L, 2) == LUA_TSTRING) {
+                       scope = lua_tostring(L, 2);
+               }
+               /* scope can be NULL for default scope */
+
+               struct rspamd_re_cache *found_cache = rspamd_re_cache_find_scope(cfg->re_cache, scope);
+               lua_pushboolean(L, found_cache != NULL);
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       return 1;
+}
+
+static int
+lua_config_remove_regexp_scope(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+       const char *scope = luaL_checkstring(L, 2);
+
+       if (cfg && scope) {
+               gboolean result = rspamd_re_cache_remove_scope(&cfg->re_cache, scope);
+               lua_pushboolean(L, result);
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       return 1;
+}
+
+static int
+lua_config_count_regexp_scopes(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+
+       if (cfg) {
+               unsigned int count = rspamd_re_cache_count_scopes(cfg->re_cache);
+               lua_pushinteger(L, count);
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       return 1;
+}
+
+static int
+lua_config_list_regexp_scopes(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_config *cfg = lua_check_config(L, 1);
+
+       if (cfg) {
+               char **scope_names;
+               unsigned int count, i;
+
+               scope_names = rspamd_re_cache_get_scope_names(cfg->re_cache, &count);
+
+               lua_newtable(L);
+
+               if (scope_names) {
+                       for (i = 0; i < count; i++) {
+                               lua_pushinteger(L, i + 1);
+                               lua_pushstring(L, scope_names[i]);
+                               lua_settable(L, -3);
+                       }
+
+                       g_strfreev(scope_names);
+               }
+       }
+       else {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       return 1;
+}