From: Vsevolod Stakhov Date: Fri, 27 Jun 2025 16:16:00 +0000 (+0100) Subject: [Project] Implement scoped regexp cache system X-Git-Tag: 3.13.0~47^2~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f9644a00549c183b17d99c74a68b6aaea17baac9;p=thirdparty%2Frspamd.git [Project] Implement scoped regexp cache system --- diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 50b155ae04..25ed72948d 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -91,6 +91,7 @@ struct rspamd_re_class { gsize type_len; GHashTable *re; rspamd_cryptobox_hash_state_t *st; + struct rspamd_re_cache *cache; /* Back-reference to owning cache */ char hash[rspamd_cryptobox_HASHBYTES + 1]; @@ -126,6 +127,11 @@ struct rspamd_re_cache { unsigned int max_re_data; char hash[rspamd_cryptobox_HASHBYTES + 1]; lua_State *L; + + /* Intrusive linked list for scoped caches */ + struct rspamd_re_cache *next, *prev; + char *scope; + #ifdef WITH_HYPERSCAN enum rspamd_hyperscan_status hyperscan_loaded; gboolean disable_hyperscan; @@ -149,6 +155,9 @@ struct rspamd_re_runtime { struct rspamd_re_cache *cache; struct rspamd_re_cache_stat stat; gboolean has_hs; + + /* Linked list for multiple scoped runtimes */ + struct rspamd_re_runtime *next, *prev; }; static GQuark @@ -174,6 +183,62 @@ rspamd_re_cache_class_id(enum rspamd_re_type type, return rspamd_cryptobox_fast_hash_final(&st); } +static struct rspamd_re_cache * +rspamd_re_cache_find_by_scope(struct rspamd_re_cache *cache_head, const char *scope) +{ + struct rspamd_re_cache *cur; + + if (!cache_head) { + return NULL; + } + + DL_FOREACH(cache_head, cur) + { + if (scope == NULL && cur->scope == NULL) { + /* Looking for default scope */ + return cur; + } + else if (scope != NULL && cur->scope != NULL && strcmp(cur->scope, scope) == 0) { + return cur; + } + } + + return NULL; +} + +static struct rspamd_re_cache * +rspamd_re_cache_add_to_scope_list(struct rspamd_re_cache **cache_head, const char *scope) +{ + struct rspamd_re_cache *new_cache, *existing; + + if (!cache_head) { + return NULL; + } + + /* Check if scope already exists */ + existing = rspamd_re_cache_find_by_scope(*cache_head, scope); + if (existing) { + return existing; + } + + /* Create new cache for this scope */ + new_cache = rspamd_re_cache_new(); + if (new_cache->scope) { + g_free(new_cache->scope); + } + new_cache->scope = g_strdup(scope); + + /* Add to linked list */ + if (*cache_head) { + DL_APPEND(*cache_head, new_cache); + } + else { + *cache_head = new_cache; + } + + return new_cache; +} + static void rspamd_re_cache_destroy(struct rspamd_re_cache *cache) { @@ -230,6 +295,11 @@ rspamd_re_cache_destroy(struct rspamd_re_cache *cache) g_hash_table_unref(cache->re_classes); g_ptr_array_free(cache->re, TRUE); + + if (cache->scope) { + g_free(cache->scope); + } + g_free(cache); } @@ -252,6 +322,8 @@ rspamd_re_cache_new(void) cache->nre = 0; cache->re = g_ptr_array_new_full(256, rspamd_re_cache_elt_dtor); cache->selectors = kh_init(lua_selectors_hash); + cache->next = cache->prev = NULL; + cache->scope = NULL; /* Default scope */ #ifdef WITH_HYPERSCAN cache->hyperscan_loaded = RSPAMD_HYPERSCAN_UNKNOWN; #endif @@ -295,6 +367,7 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache, re_class->id = class_id; re_class->type_len = datalen; re_class->type = type; + re_class->cache = cache; /* Set back-reference */ re_class->re = g_hash_table_new_full(rspamd_regexp_hash, rspamd_regexp_equal, NULL, (GDestroyNotify) rspamd_regexp_unref); @@ -330,6 +403,26 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache, return nre; } +rspamd_regexp_t * +rspamd_re_cache_add_scoped(struct rspamd_re_cache **cache_head, const char *scope, + rspamd_regexp_t *re, enum rspamd_re_type type, + gconstpointer type_data, gsize datalen, + int lua_cbref) +{ + struct rspamd_re_cache *cache; + + g_assert(cache_head != NULL); + g_assert(re != NULL); + + /* NULL scope is allowed for default scope */ + cache = rspamd_re_cache_add_to_scope_list(cache_head, scope); + if (!cache) { + return NULL; + } + + return rspamd_re_cache_add(cache, re, type, type_data, datalen, lua_cbref); +} + void rspamd_re_cache_replace(struct rspamd_re_cache *cache, rspamd_regexp_t *what, rspamd_regexp_t *with) @@ -371,6 +464,23 @@ void rspamd_re_cache_replace(struct rspamd_re_cache *cache, } } +void rspamd_re_cache_replace_scoped(struct rspamd_re_cache **cache_head, const char *scope, + rspamd_regexp_t *what, + rspamd_regexp_t *with) +{ + struct rspamd_re_cache *cache; + + g_assert(cache_head != NULL); + g_assert(what != NULL); + g_assert(with != NULL); + + /* NULL scope is allowed for default scope */ + cache = rspamd_re_cache_find_by_scope(*cache_head, scope); + if (cache) { + rspamd_re_cache_replace(cache, what, with); + } +} + static int rspamd_re_cache_sort_func(gconstpointer a, gconstpointer b) { @@ -515,8 +625,21 @@ void rspamd_re_cache_init(struct rspamd_re_cache *cache, struct rspamd_config *c #endif } -struct rspamd_re_runtime * -rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache) +void rspamd_re_cache_init_scoped(struct rspamd_re_cache *cache_head, + struct rspamd_config *cfg) +{ + struct rspamd_re_cache *cur; + + g_assert(cache_head != NULL); + + DL_FOREACH(cache_head, cur) + { + rspamd_re_cache_init(cur, cfg); + } +} + +static struct rspamd_re_runtime * +rspamd_re_cache_runtime_new_single(struct rspamd_re_cache *cache) { struct rspamd_re_runtime *rt; g_assert(cache != NULL); @@ -534,6 +657,58 @@ rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache) return rt; } +struct rspamd_re_runtime * +rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache) +{ + struct rspamd_re_runtime *rt_head = NULL, *rt; + struct rspamd_re_cache *cur; + + g_assert(cache != NULL); + + /* + * Create runtime for all scopes in the chain. + * This ensures task has runtimes for all available scopes. + */ + DL_FOREACH(cache, cur) + { + rt = rspamd_re_cache_runtime_new_single(cur); + if (rt) { + if (rt_head) { + DL_APPEND(rt_head, rt); + } + else { + rt_head = rt; + } + } + } + + return rt_head; +} + +struct rspamd_re_runtime * +rspamd_re_cache_runtime_new_all_scopes(struct rspamd_re_cache *cache_head) +{ + /* This is now the same as the main function since it always creates for all scopes */ + return rspamd_re_cache_runtime_new(cache_head); +} + +struct rspamd_re_runtime * +rspamd_re_cache_runtime_new_scoped(struct rspamd_re_cache *cache_head, const char *scope) +{ + struct rspamd_re_cache *cache; + + if (!cache_head) { + return NULL; + } + + cache = rspamd_re_cache_find_by_scope(cache_head, scope); + if (!cache) { + return NULL; + } + + return rspamd_re_cache_runtime_new_single(cache); +} + const struct rspamd_re_cache_stat * rspamd_re_cache_get_stat(struct rspamd_re_runtime *rt) { @@ -1503,20 +1678,20 @@ rspamd_re_cache_exec_re(struct rspamd_task *task, return rt->results[re_id]; } -int rspamd_re_cache_process(struct rspamd_task *task, - rspamd_regexp_t *re, - enum rspamd_re_type type, - gconstpointer type_data, - gsize datalen, - gboolean is_strong) +static int +rspamd_re_cache_process_single(struct rspamd_task *task, + struct rspamd_re_runtime *rt, + rspamd_regexp_t *re, + enum rspamd_re_type type, + gconstpointer type_data, + gsize datalen, + gboolean is_strong) { uint64_t re_id; struct rspamd_re_class *re_class; struct rspamd_re_cache *cache; - struct rspamd_re_runtime *rt; g_assert(task != NULL); - rt = task->re_rt; g_assert(rt != NULL); g_assert(re != NULL); @@ -1551,6 +1726,53 @@ int rspamd_re_cache_process(struct rspamd_task *task, return 0; } +int rspamd_re_cache_process(struct rspamd_task *task, + rspamd_regexp_t *re, + enum rspamd_re_type type, + gconstpointer type_data, + gsize datalen, + gboolean is_strong) +{ + struct rspamd_re_runtime *rt_list, *rt; + struct rspamd_re_class *re_class; + struct rspamd_re_cache *target_cache; + int result = 0; + + g_assert(task != NULL); + g_assert(re != NULL); + + rt_list = task->re_rt; + if (!rt_list) { + return 0; + } + + /* + * Since each regexp belongs to a class which belongs to a cache, + * we can find the correct cache and corresponding runtime + */ + re_class = rspamd_regexp_get_class(re); + if (!re_class) { + return 0; + } + + target_cache = re_class->cache; + if (!target_cache) { + return 0; + } + + /* Find the runtime that matches the cache */ + DL_FOREACH(rt_list, rt) + { + if (rt->cache == target_cache) { + result = rspamd_re_cache_process_single(task, rt, re, type, + type_data, datalen, is_strong); + break; + } + } + + return result; +} + int rspamd_re_cache_process_ffi(void *ptask, void *pre, int type, @@ -1571,24 +1793,30 @@ int rspamd_re_cache_process_ffi(void *ptask, void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt) { + struct rspamd_re_runtime *cur, *tmp; + g_assert(rt != NULL); - if (rt->sel_cache) { - struct rspamd_re_selector_result sr; + /* Handle linked list of runtimes */ + DL_FOREACH_SAFE(rt, cur, tmp) + { + if (cur->sel_cache) { + struct rspamd_re_selector_result sr; - kh_foreach_value(rt->sel_cache, sr, { - for (unsigned int i = 0; i < sr.cnt; i++) { - g_free((gpointer) sr.scvec[i]); - } + kh_foreach_value(cur->sel_cache, sr, { + for (unsigned int i = 0; i < sr.cnt; i++) { + g_free((gpointer) sr.scvec[i]); + } - g_free(sr.scvec); - g_free(sr.lenvec); - }); - kh_destroy(selectors_results_hash, rt->sel_cache); - } + g_free(sr.scvec); + g_free(sr.lenvec); + }); + kh_destroy(selectors_results_hash, cur->sel_cache); + } - REF_RELEASE(rt->cache); - g_free(rt); + REF_RELEASE(cur->cache); + g_free(cur); + } } void rspamd_re_cache_unref(struct rspamd_re_cache *cache) @@ -1598,6 +1826,21 @@ void rspamd_re_cache_unref(struct rspamd_re_cache *cache) } } +void rspamd_re_cache_unref_scoped(struct rspamd_re_cache *cache_head) +{ + struct rspamd_re_cache *cur, *tmp; + + if (!cache_head) { + return; + } + + DL_FOREACH_SAFE(cache_head, cur, tmp) + { + DL_DELETE(cache_head, cur); + rspamd_re_cache_unref(cur); + } +} + struct rspamd_re_cache * rspamd_re_cache_ref(struct rspamd_re_cache *cache) { @@ -1620,6 +1863,23 @@ unsigned int rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, unsigned i return old; } +unsigned int rspamd_re_cache_set_limit_scoped(struct rspamd_re_cache *cache_head, const char *scope, unsigned int limit) +{ + struct rspamd_re_cache *cache; + unsigned int old = 0; + + if (!cache_head || !scope) { + return old; + } + + cache = rspamd_re_cache_find_by_scope(cache_head, scope); + if (cache) { + old = rspamd_re_cache_set_limit(cache, limit); + } + + return old; +} + const char * rspamd_re_cache_type_to_string(enum rspamd_re_type type) { @@ -2257,6 +2517,55 @@ int rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache, #endif } +int rspamd_re_cache_compile_hyperscan_scoped(struct rspamd_re_cache *cache_head, + const char *cache_dir, + double max_time, + gboolean silent, + struct ev_loop *event_loop, + void (*cb)(unsigned int ncompiled, GError *err, void *cbd), + void *cbd) +{ +#ifndef WITH_HYPERSCAN + return -1; +#else + struct rspamd_re_cache *cur; + int result = 0, total_compiled = 0; + GError *first_error = NULL; + + if (!cache_head) { + return -1; + } + + /* + * For now, compile each cache sequentially + * TODO: Could be made async if needed + */ + DL_FOREACH(cache_head, cur) + { + result = rspamd_re_cache_compile_hyperscan(cur, cache_dir, max_time, silent, + event_loop, NULL, NULL); + if (result >= 0) { + total_compiled += result; + } + else if (!first_error) { + first_error = g_error_new(rspamd_re_cache_quark(), result, + "Failed to compile hyperscan for scope '%s'", + cur->scope ? cur->scope : "unknown"); + } + } + + if (cb) { + cb(total_compiled, first_error, cbd); + } + + if (first_error) { + g_error_free(first_error); + } + + return total_compiled; +#endif +} + gboolean rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache, const char *path, gboolean silent, gboolean try_load, GError **err) @@ -2691,6 +3000,48 @@ rspamd_re_cache_load_hyperscan(struct rspamd_re_cache *cache, #endif } +enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan_scoped( + struct rspamd_re_cache *cache_head, + const char *cache_dir, bool try_load) +{ +#ifndef WITH_HYPERSCAN + return RSPAMD_HYPERSCAN_UNSUPPORTED; +#else + struct rspamd_re_cache *cur; + enum rspamd_hyperscan_status result, overall_status = RSPAMD_HYPERSCAN_UNKNOWN; + gboolean has_loaded = FALSE, all_loaded = TRUE; + + if (!cache_head) { + return RSPAMD_HYPERSCAN_LOAD_ERROR; + } + + DL_FOREACH(cache_head, cur) + { + result = rspamd_re_cache_load_hyperscan(cur, cache_dir, try_load); + + if (result == RSPAMD_HYPERSCAN_LOADED_FULL || + result == RSPAMD_HYPERSCAN_LOADED_PARTIAL) { + has_loaded = TRUE; + if (result == RSPAMD_HYPERSCAN_LOADED_PARTIAL) { + all_loaded = FALSE; + } + } + else { + all_loaded = FALSE; + } + } + + if (has_loaded) { + overall_status = all_loaded ? RSPAMD_HYPERSCAN_LOADED_FULL : RSPAMD_HYPERSCAN_LOADED_PARTIAL; + } + else { + overall_status = RSPAMD_HYPERSCAN_LOAD_ERROR; + } + + return overall_status; +#endif +} + void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache, const char *sname, int ref) @@ -2717,3 +3068,108 @@ void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache, kh_value(cache->selectors, k) = ref; } } + +void rspamd_re_cache_add_selector_scoped(struct rspamd_re_cache **cache_head, const char *scope, + const char *sname, int ref) +{ + struct rspamd_re_cache *cache; + + g_assert(cache_head != NULL); + g_assert(sname != NULL); + + /* NULL scope is allowed for default scope */ + cache = rspamd_re_cache_add_to_scope_list(cache_head, scope); + if (cache) { + rspamd_re_cache_add_selector(cache, sname, ref); + } +} + +struct rspamd_re_cache *rspamd_re_cache_find_scope(struct rspamd_re_cache *cache_head, const char *scope) +{ + return rspamd_re_cache_find_by_scope(cache_head, scope); +} + +gboolean rspamd_re_cache_remove_scope(struct rspamd_re_cache **cache_head, const char *scope) +{ + struct rspamd_re_cache *target; + + if (!cache_head || !*cache_head) { + return FALSE; + } + + /* Prevent removal of default scope (NULL) to keep head stable */ + if (!scope) { + return FALSE; + } + + target = rspamd_re_cache_find_by_scope(*cache_head, scope); + if (!target) { + return FALSE; + } + + /* Remove from linked list */ + DL_DELETE(*cache_head, target); + + /* If this was the head and there are no more elements, update head */ + if (target == *cache_head && !*cache_head) { + *cache_head = NULL; + } + + /* Unref the cache */ + rspamd_re_cache_unref(target); + + return TRUE; +} + +unsigned int rspamd_re_cache_count_scopes(struct rspamd_re_cache *cache_head) +{ + struct rspamd_re_cache *cur; + unsigned int count = 0; + + if (!cache_head) { + return 0; + } + + DL_COUNT(cache_head, cur, count); + return count; +} + +char **rspamd_re_cache_get_scope_names(struct rspamd_re_cache *cache_head, unsigned int *count_out) +{ + struct rspamd_re_cache *cur; + char **names = NULL; + unsigned int i = 0, count = 0; + + if (!cache_head || !count_out) { + if (count_out) { + *count_out = 0; + } + return NULL; + } + + /* First count scopes */ + DL_COUNT(cache_head, cur, count); + + if (count == 0) { + *count_out = 0; + return NULL; + } + + /* Allocate array */ + names = g_malloc(sizeof(char *) * count); + + /* Fill array */ + DL_FOREACH(cache_head, cur) + { + if (cur->scope) { + names[i] = g_strdup(cur->scope); + } + else { + names[i] = g_strdup("default"); + } + i++; + } + + *count_out = count; + return names; +} diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h index 20b1108e0b..b64c7a9ab7 100644 --- a/src/libserver/re_cache.h +++ b/src/libserver/re_cache.h @@ -76,6 +76,22 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache, rspamd_regexp_t *re, gconstpointer type_data, gsize datalen, int lua_cbref); +/** + * Add the existing regexp to the cache with specified scope + * @param cache_head head of cache list + * @param scope scope name + * @param re regexp object + * @param type type of object + * @param type_data associated data with the type (e.g. header name) + * @param datalen associated data length + * @param lua_cbref optional lua callback reference for matching purposes + */ +rspamd_regexp_t * +rspamd_re_cache_add_scoped(struct rspamd_re_cache **cache_head, const char *scope, + rspamd_regexp_t *re, enum rspamd_re_type type, + gconstpointer type_data, gsize datalen, + int lua_cbref); + /** * Replace regexp in the cache with another regexp * @param cache cache object @@ -86,12 +102,29 @@ void rspamd_re_cache_replace(struct rspamd_re_cache *cache, rspamd_regexp_t *what, rspamd_regexp_t *with); +/** + * Replace regexp in the scoped cache with another regexp + * @param cache_head head of cache list + * @param scope scope name + * @param what re to replace + * @param with regexp object to replace the origin + */ +void rspamd_re_cache_replace_scoped(struct rspamd_re_cache **cache_head, const char *scope, + rspamd_regexp_t *what, + rspamd_regexp_t *with); + /** * Initialize and optimize re cache structure */ void rspamd_re_cache_init(struct rspamd_re_cache *cache, struct rspamd_config *cfg); +/** + * Initialize and optimize re cache structures for all scopes + */ +void rspamd_re_cache_init_scoped(struct rspamd_re_cache *cache_head, + struct rspamd_config *cfg); + enum rspamd_hyperscan_status { RSPAMD_HYPERSCAN_UNKNOWN = 0, RSPAMD_HYPERSCAN_UNSUPPORTED, @@ -108,10 +141,21 @@ enum rspamd_hyperscan_status { enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded(struct rspamd_re_cache *cache); /** - * Get runtime data for a cache + * Get runtime data for a cache - automatically creates runtimes for all scopes in the chain + * This is the main function used for task runtime creation */ struct rspamd_re_runtime *rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache); +/** + * Get runtime data for all scoped caches (same as rspamd_re_cache_runtime_new) + */ +struct rspamd_re_runtime *rspamd_re_cache_runtime_new_all_scopes(struct rspamd_re_cache *cache_head); + +/** + * Get runtime data for a specific scoped cache only + */ +struct rspamd_re_runtime *rspamd_re_cache_runtime_new_scoped(struct rspamd_re_cache *cache_head, const char *scope); + /** * Get runtime statistics */ @@ -151,6 +195,11 @@ void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt); */ void rspamd_re_cache_unref(struct rspamd_re_cache *cache); +/** + * Unref re cache list (all scopes) + */ +void rspamd_re_cache_unref_scoped(struct rspamd_re_cache *cache_head); + /** * Retain reference to re cache */ @@ -161,6 +210,11 @@ struct rspamd_re_cache *rspamd_re_cache_ref(struct rspamd_re_cache *cache); */ unsigned int rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, unsigned int limit); +/** + * Set limit for all regular expressions in the scoped cache, returns previous limit + */ +unsigned int rspamd_re_cache_set_limit_scoped(struct rspamd_re_cache *cache_head, const char *scope, unsigned int limit); + /** * Convert re type to a human readable string (constant one) */ @@ -183,6 +237,17 @@ int rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache, void (*cb)(unsigned int ncompiled, GError *err, void *cbd), void *cbd); +/** + * Compile expressions to the hyperscan tree and store in the `cache_dir` for all scopes + */ +int rspamd_re_cache_compile_hyperscan_scoped(struct rspamd_re_cache *cache_head, + const char *cache_dir, + double max_time, + gboolean silent, + struct ev_loop *event_loop, + void (*cb)(unsigned int ncompiled, GError *err, void *cbd), + void *cbd); + /** * Returns TRUE if the specified file is valid hyperscan cache */ @@ -199,12 +264,48 @@ enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan( struct rspamd_re_cache *cache, const char *cache_dir, bool try_load); +/** + * Loads all hyperscan regexps precompiled for all scopes + */ +enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan_scoped( + struct rspamd_re_cache *cache_head, + const char *cache_dir, bool try_load); + /** * Registers lua selector in the cache */ void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache, const char *sname, int ref); +/** + * Registers lua selector in the scoped cache + */ +void rspamd_re_cache_add_selector_scoped(struct rspamd_re_cache **cache_head, const char *scope, + const char *sname, int ref); + +/** + * Find a cache by scope name + */ +struct rspamd_re_cache *rspamd_re_cache_find_scope(struct rspamd_re_cache *cache_head, const char *scope); + +/** + * Remove a cache scope from the list + */ +gboolean rspamd_re_cache_remove_scope(struct rspamd_re_cache **cache_head, const char *scope); + +/** + * Count the number of scopes in the cache list + */ +unsigned int rspamd_re_cache_count_scopes(struct rspamd_re_cache *cache_head); + +/** + * Get array of scope names from the cache list + * @param cache_head head of cache list + * @param count_out pointer to store the number of scopes + * @return array of scope names (must be freed with g_strfreev), or NULL if no scopes + */ +char **rspamd_re_cache_get_scope_names(struct rspamd_re_cache *cache_head, unsigned int *count_out); + #ifdef __cplusplus } #endif diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 7b3a156cd0..0c7f5d3405 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -561,6 +561,79 @@ LUA_FUNCTION_DEF(config, register_regexp); */ LUA_FUNCTION_DEF(config, replace_regexp); +/*** + * @method rspamd_config:register_regexp_scoped(scope, params) + * Registers new re for further cached usage in a specific scope + * Params is the table with the following fields (mandatory fields are marked with `*`): + * - `re`* : regular expression object + * - `type`*: type of regular expression: + * + `mime`: mime regexp + * + `rawmime`: raw mime regexp + * + `header`: header regexp + * + `rawheader`: raw header expression + * + `body`: raw body regexp + * + `url`: url regexp + * - `header`: for header and rawheader regexp means the name of header + * - `pcre_only`: flag regexp as pcre only regexp + * @param {string} scope scope name for the regexp + * @param {table} params regexp parameters + */ +LUA_FUNCTION_DEF(config, register_regexp_scoped); + +/*** + * @method rspamd_config:replace_regexp_scoped(scope, params) + * Replaces regexp with a new one in a specific scope + * Params is the table with the following fields (mandatory fields are marked with `*`): + * - `old_re`* : old regular expression object (must be in the cache) + * - `new_re`* : old regular expression object (must not be in the cache) + * - `pcre_only`: flag regexp as pcre only regexp + * @param {string} scope scope name for the regexp + * @param {table} params regexp parameters + */ +LUA_FUNCTION_DEF(config, replace_regexp_scoped); + +/*** + * @method rspamd_config:register_re_selector_scoped(scope, name, selector_str, [delimiter, [flatten]]) + * Registers selector with the specific name in a specific scope to use in regular expressions + * @param {string} scope scope name for the selector + * @param {string} name name of the selector + * @param {string} selector_str selector definition + * @param {string} delimiter delimiter to use when joining strings if flatten is false + * @param {bool} flatten if true then selector will return a table of captures instead of a single string + * @return true if selector has been registered + */ +LUA_FUNCTION_DEF(config, register_re_selector_scoped); + +/*** + * @method rspamd_config:find_regexp_scope(scope) + * Checks if a regexp scope exists + * @param {string} scope scope name to check (can be nil for default scope) + * @return {boolean} true if scope exists + */ +LUA_FUNCTION_DEF(config, find_regexp_scope); + +/*** + * @method rspamd_config:remove_regexp_scope(scope) + * Removes a regexp scope from the cache + * @param {string} scope scope name to remove + * @return {boolean} true if scope was removed successfully + */ +LUA_FUNCTION_DEF(config, remove_regexp_scope); + +/*** + * @method rspamd_config:count_regexp_scopes() + * Returns the number of regexp scopes + * @return {number} number of scopes + */ +LUA_FUNCTION_DEF(config, count_regexp_scopes); + +/*** + * @method rspamd_config:list_regexp_scopes() + * Returns a list of all regexp scope names + * @return {table} array of scope names (default scope is named "default") + */ +LUA_FUNCTION_DEF(config, list_regexp_scopes); + /*** * @method rspamd_config:register_worker_script(worker_type, script) * Registers the following script for workers of a specified type. The exact type @@ -920,6 +993,13 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, disable_symbol), LUA_INTERFACE_DEF(config, register_regexp), LUA_INTERFACE_DEF(config, replace_regexp), + LUA_INTERFACE_DEF(config, register_regexp_scoped), + LUA_INTERFACE_DEF(config, replace_regexp_scoped), + LUA_INTERFACE_DEF(config, register_re_selector_scoped), + LUA_INTERFACE_DEF(config, find_regexp_scope), + LUA_INTERFACE_DEF(config, remove_regexp_scope), + LUA_INTERFACE_DEF(config, count_regexp_scopes), + LUA_INTERFACE_DEF(config, list_regexp_scopes), LUA_INTERFACE_DEF(config, register_worker_script), LUA_INTERFACE_DEF(config, register_re_selector), LUA_INTERFACE_DEF(config, add_on_load), @@ -4848,3 +4928,316 @@ lua_config_unload_custom_tokenizers(lua_State *L) return luaL_error(L, "invalid arguments"); } } + +static int +lua_config_register_regexp_scoped(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + struct rspamd_lua_regexp *re = NULL; + rspamd_regexp_t *cache_re; + const char *type_str = NULL, *header_str = NULL; + gsize header_len = 0; + GError *err = NULL; + enum rspamd_re_type type = RSPAMD_RE_BODY; + gboolean pcre_only = FALSE; + + /* + * - `scope`*: scope name for the regexp + * - `re`* : regular expression object + * - `type`*: type of regular expression: + * + `mime`: mime regexp + * + `rawmime`: raw mime regexp + * + `header`: header regexp + * + `rawheader`: raw header expression + * + `body`: raw body regexp + * + `url`: url regexp + * - `header`: for header and rawheader regexp means the name of header + * - `pcre_only`: allow merely pcre for this regexp + */ + if (cfg != NULL && scope != NULL) { + if (!rspamd_lua_parse_table_arguments(L, 3, &err, + RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, + "*re=U{regexp};*type=S;header=S;pcre_only=B", + &re, &type_str, &header_str, &pcre_only)) { + msg_err_config("cannot get parameters list: %e", err); + + if (err) { + g_error_free(err); + } + } + else { + type = rspamd_re_cache_type_from_string(type_str); + + if ((type == RSPAMD_RE_HEADER || + type == RSPAMD_RE_RAWHEADER || + type == RSPAMD_RE_MIMEHEADER) && + header_str == NULL) { + msg_err_config( + "header argument is mandatory for header/rawheader regexps"); + } + else { + if (pcre_only) { + rspamd_regexp_set_flags(re->re, + rspamd_regexp_get_flags(re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY); + } + + if (header_str != NULL) { + /* Include the last \0 */ + header_len = strlen(header_str) + 1; + } + + cache_re = rspamd_re_cache_add_scoped(&cfg->re_cache, scope, re->re, type, + (gpointer) header_str, header_len, -1); + + /* + * XXX: here are dragons! + * Actually, lua regexp contains internal rspamd_regexp_t + * and it owns it. + * However, after this operation we have some OTHER regexp, + * which we really would like to use. + * So we do the following: + * 1) Remove old re and unref it + * 2) Replace the internal re with cached one + * 3) Increase its refcount to share ownership between cache and + * lua object + */ + if (cache_re != re->re) { + rspamd_regexp_unref(re->re); + re->re = rspamd_regexp_ref(cache_re); + + if (pcre_only) { + rspamd_regexp_set_flags(re->re, + rspamd_regexp_get_flags(re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY); + } + } + } + } + } + + return 0; +} + +static int +lua_config_replace_regexp_scoped(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + struct rspamd_lua_regexp *old_re = NULL, *new_re = NULL; + gboolean pcre_only = FALSE; + GError *err = NULL; + + if (cfg != NULL && scope != NULL) { + if (!rspamd_lua_parse_table_arguments(L, 3, &err, + RSPAMD_LUA_PARSE_ARGUMENTS_DEFAULT, + "*old_re=U{regexp};*new_re=U{regexp};pcre_only=B", + &old_re, &new_re, &pcre_only)) { + int ret = luaL_error(L, "cannot get parameters list: %s", + err ? err->message : "invalid arguments"); + + if (err) { + g_error_free(err); + } + + return ret; + } + else { + + if (pcre_only) { + rspamd_regexp_set_flags(new_re->re, + rspamd_regexp_get_flags(new_re->re) | RSPAMD_REGEXP_FLAG_PCRE_ONLY); + } + + rspamd_re_cache_replace_scoped(&cfg->re_cache, scope, old_re->re, new_re->re); + } + } + + return 0; +} + +static int +lua_config_register_re_selector_scoped(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + const char *name = luaL_checkstring(L, 3); + const char *selector_str = luaL_checkstring(L, 4); + const char *delimiter = ""; + bool flatten = false; + int top = lua_gettop(L); + bool res = false; + + if (cfg && scope && name && selector_str) { + if (lua_gettop(L) >= 5) { + delimiter = luaL_checkstring(L, 5); + + if (lua_isboolean(L, 6)) { + flatten = lua_toboolean(L, 6); + } + } + + if (luaL_dostring(L, "return require \"lua_selectors\"") != 0) { + msg_warn_config("cannot require lua_selectors: %s", + lua_tostring(L, -1)); + } + else { + if (lua_type(L, -1) != LUA_TTABLE) { + msg_warn_config("lua selectors must return " + "table and not %s", + lua_typename(L, lua_type(L, -1))); + } + else { + lua_pushstring(L, "create_selector_closure"); + lua_gettable(L, -2); + + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_warn_config("create_selector_closure must return " + "function and not %s", + lua_typename(L, lua_type(L, -1))); + } + else { + int err_idx, ret; + struct rspamd_config **pcfg; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + + /* Push function */ + lua_pushvalue(L, -2); + + pcfg = lua_newuserdata(L, sizeof(*pcfg)); + rspamd_lua_setclass(L, rspamd_config_classname, -1); + *pcfg = cfg; + lua_pushstring(L, selector_str); + lua_pushstring(L, delimiter); + lua_pushboolean(L, flatten); + + if ((ret = lua_pcall(L, 4, 1, err_idx)) != 0) { + msg_err_config("call to create_selector_closure lua " + "script failed (%d): %s", + ret, + lua_tostring(L, -1)); + } + else { + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_warn_config("create_selector_closure " + "invocation must return " + "function and not %s", + lua_typename(L, lua_type(L, -1))); + } + else { + ret = luaL_ref(L, LUA_REGISTRYINDEX); + rspamd_re_cache_add_selector_scoped(&cfg->re_cache, scope, + name, ret); + res = true; + } + } + } + } + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + lua_settop(L, top); + lua_pushboolean(L, res); + + if (res) { + msg_info_config("registered regexp selector %s for scope %s", name, scope); + } + + return 1; +} + +static int +lua_config_find_regexp_scope(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = NULL; + + if (cfg) { + if (lua_type(L, 2) == LUA_TSTRING) { + scope = lua_tostring(L, 2); + } + /* scope can be NULL for default scope */ + + struct rspamd_re_cache *found_cache = rspamd_re_cache_find_scope(cfg->re_cache, scope); + lua_pushboolean(L, found_cache != NULL); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_remove_regexp_scope(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + const char *scope = luaL_checkstring(L, 2); + + if (cfg && scope) { + gboolean result = rspamd_re_cache_remove_scope(&cfg->re_cache, scope); + lua_pushboolean(L, result); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_count_regexp_scopes(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg) { + unsigned int count = rspamd_re_cache_count_scopes(cfg->re_cache); + lua_pushinteger(L, count); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int +lua_config_list_regexp_scopes(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg) { + char **scope_names; + unsigned int count, i; + + scope_names = rspamd_re_cache_get_scope_names(cfg->re_cache, &count); + + lua_newtable(L); + + if (scope_names) { + for (i = 0; i < count; i++) { + lua_pushinteger(L, i + 1); + lua_pushstring(L, scope_names[i]); + lua_settable(L, -3); + } + + g_strfreev(scope_names); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +}