lua_settop(L, err_idx - 1);
}
+
+gboolean rspamd_hs_cache_lua_load_sync(const char *cache_key,
+ const char *entity_name,
+ unsigned char **data,
+ gsize *len,
+ char **error)
+{
+ lua_State *L = lua_backend_L;
+ int err_idx;
+
+ msg_debug_hyperscan("load_sync: entity='%s', key=%s",
+ entity_name ? entity_name : "unknown", cache_key);
+
+ if (data) *data = NULL;
+ if (len) *len = 0;
+ if (error) *error = NULL;
+
+ if (!rspamd_hs_cache_has_lua_backend()) {
+ msg_debug_hyperscan("load_sync: no Lua backend");
+ if (error) *error = g_strdup("Lua backend not initialized");
+ return FALSE;
+ }
+
+ lua_pushcfunction(L, rspamd_lua_traceback);
+ err_idx = lua_gettop(L);
+
+ /* Get backend object from registry */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, lua_backend_ref);
+ if (!lua_istable(L, -1)) {
+ lua_settop(L, err_idx - 1);
+ if (error) *error = g_strdup("Invalid Lua backend reference");
+ return FALSE;
+ }
+
+ /* Get load_sync method */
+ lua_getfield(L, -1, "load_sync");
+ if (!lua_isfunction(L, -1)) {
+ lua_settop(L, err_idx - 1);
+ msg_debug_hyperscan("load_sync: backend has no load_sync method (async-only backend)");
+ if (error) *error = g_strdup("Backend does not support synchronous loading");
+ return FALSE;
+ }
+
+ /* Push self (backend object) */
+ lua_pushvalue(L, -2);
+ /* Push cache_key */
+ lua_pushstring(L, cache_key);
+ /* Push platform_id */
+ lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "");
+
+ /* Call backend:load_sync(cache_key, platform_id) -> data, err */
+ if (lua_pcall(L, 3, 2, err_idx) != 0) {
+ const char *lua_err = lua_tostring(L, -1);
+ if (error) *error = g_strdup(lua_err ? lua_err : "Lua call failed");
+ lua_settop(L, err_idx - 1);
+ return FALSE;
+ }
+
+ /* Check results: data, err */
+ if (lua_isnil(L, -2)) {
+ /* Load failed - check error */
+ const char *lua_err = lua_tostring(L, -1);
+ if (error) *error = g_strdup(lua_err ? lua_err : "Load failed");
+ lua_settop(L, err_idx - 1);
+ return FALSE;
+ }
+
+ /* Get data - prefer rspamd{text} or Lua string */
+ struct rspamd_lua_text *t = lua_check_text_or_string(L, -2);
+ if (t && t->start && t->len > 0) {
+ if (data) {
+ *data = g_malloc(t->len);
+ memcpy(*data, t->start, t->len);
+ }
+ if (len) *len = t->len;
+ msg_debug_hyperscan("load_sync: loaded %z bytes for %s", t->len, cache_key);
+ }
+ else {
+ if (error) *error = g_strdup("Empty or invalid data returned");
+ lua_settop(L, err_idx - 1);
+ return FALSE;
+ }
+
+ lua_settop(L, err_idx - 1);
+ return TRUE;
+}
+
+gboolean rspamd_hs_cache_lua_exists_sync(const char *cache_key,
+ const char *entity_name,
+ gboolean *exists)
+{
+ lua_State *L = lua_backend_L;
+ int err_idx;
+
+ msg_debug_hyperscan("exists_sync: entity='%s', key=%s",
+ entity_name ? entity_name : "unknown", cache_key);
+
+ if (exists) *exists = FALSE;
+
+ if (!rspamd_hs_cache_has_lua_backend()) {
+ msg_debug_hyperscan("exists_sync: no Lua backend");
+ return FALSE;
+ }
+
+ lua_pushcfunction(L, rspamd_lua_traceback);
+ err_idx = lua_gettop(L);
+
+ /* Get backend object from registry */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, lua_backend_ref);
+ if (!lua_istable(L, -1)) {
+ lua_settop(L, err_idx - 1);
+ return FALSE;
+ }
+
+ /* Get exists_sync method */
+ lua_getfield(L, -1, "exists_sync");
+ if (!lua_isfunction(L, -1)) {
+ lua_settop(L, err_idx - 1);
+ msg_debug_hyperscan("exists_sync: backend has no exists_sync method (async-only backend)");
+ return FALSE;
+ }
+
+ /* Push self (backend object) */
+ lua_pushvalue(L, -2);
+ /* Push cache_key */
+ lua_pushstring(L, cache_key);
+ /* Push platform_id */
+ lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "");
+
+ /* Call backend:exists_sync(cache_key, platform_id) -> exists, err */
+ if (lua_pcall(L, 3, 2, err_idx) != 0) {
+ lua_settop(L, err_idx - 1);
+ return FALSE;
+ }
+
+ /* Check result */
+ if (exists) {
+ *exists = lua_toboolean(L, -2);
+ }
+
+ msg_debug_hyperscan("exists_sync: %s -> %s", cache_key, *exists ? "found" : "not found");
+
+ lua_settop(L, err_idx - 1);
+ return TRUE;
+}
GHashTableIter cit;
gpointer k, v;
struct rspamd_re_class *re_class;
- char path[PATH_MAX], npath[PATH_MAX];
+ char path[PATH_MAX];
hs_database_t *test_db;
int fd, i, n, *hs_ids = NULL, pcre_flags, re_flags;
rspamd_cryptobox_fast_hash_state_t crc_st;
struct iovec iov[7];
struct rspamd_re_cache *cache;
GError *err;
- pid_t our_pid = getpid();
cache = cbdata->cache;
#endif
}
+/* Forward declaration - defined after rspamd_re_cache_load_hyperscan_scoped */
+static gboolean
+rspamd_re_cache_apply_hyperscan_blob(struct rspamd_re_cache *cache,
+ struct rspamd_re_class *re_class,
+ const unsigned char *data,
+ gsize len,
+ bool try_load);
enum rspamd_hyperscan_status
rspamd_re_cache_load_hyperscan(struct rspamd_re_cache *cache,
#ifndef WITH_HYPERSCAN
return RSPAMD_HYPERSCAN_UNSUPPORTED;
#else
- char path[PATH_MAX];
- int fd, i, n, *hs_ids = NULL, *hs_flags = NULL, total = 0, ret;
GHashTableIter it;
gpointer k, v;
- uint8_t *map, *p;
struct rspamd_re_class *re_class;
- struct rspamd_re_cache_elt *elt;
- struct stat st;
- gboolean has_valid = FALSE, all_valid = FALSE;
+ gboolean has_valid = FALSE, all_valid = TRUE;
+ unsigned int total_loaded = 0;
g_hash_table_iter_init(&it, cache->re_classes);
+ /* Lua backend is required for sync loading */
+ if (!rspamd_hs_cache_has_lua_backend()) {
+ msg_warn_re_cache("no Lua backend available for synchronous hyperscan loading%s%s%s",
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
+ cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOAD_ERROR;
+ return cache->hyperscan_loaded;
+ }
+
while (g_hash_table_iter_next(&it, &k, &v)) {
re_class = v;
- rspamd_snprintf(path, sizeof(path), "%s%c%s.hs", cache_dir,
- G_DIR_SEPARATOR, re_class->hash);
-
- if (rspamd_re_cache_is_valid_hyperscan_file(cache, path, try_load, FALSE, NULL)) {
- msg_debug_re_cache("load hyperscan database from '%s'",
- re_class->hash);
-
- fd = open(path, O_RDONLY);
-
- /* Read number of regexps */
- g_assert(fd != -1);
- fstat(fd, &st);
-
- map = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
-
- if (map == MAP_FAILED) {
- if (!try_load) {
- msg_err_re_cache("cannot mmap %s: %s", path, strerror(errno));
- }
- else {
- msg_debug_re_cache("cannot mmap %s: %s", path, strerror(errno));
- }
-
- close(fd);
- all_valid = FALSE;
- continue;
- }
-
- close(fd);
- p = map + RSPAMD_HS_MAGIC_LEN + sizeof(cache->plt);
- n = *(int *) p;
-
- if (n <= 0 || 2 * n * sizeof(int) + /* IDs + flags */
- sizeof(uint64_t) + /* crc */
- RSPAMD_HS_MAGIC_LEN + /* header */
- sizeof(cache->plt) >
- (gsize) st.st_size) {
- /* Some wrong amount of regexps */
- if (!try_load) {
- msg_err_re_cache("bad number of expressions in %s: %d",
- path, n);
- }
- else {
- msg_debug_re_cache("bad number of expressions in %s: %d",
- path, n);
- }
-
- munmap(map, st.st_size);
- all_valid = FALSE;
- continue;
- }
-
- total += n;
- p += sizeof(n);
- hs_ids = g_malloc(n * sizeof(*hs_ids));
- memcpy(hs_ids, p, n * sizeof(*hs_ids));
- p += n * sizeof(*hs_ids);
- hs_flags = g_malloc(n * sizeof(*hs_flags));
- memcpy(hs_flags, p, n * sizeof(*hs_flags));
-
- /* Skip crc */
- p += n * sizeof(*hs_ids) + sizeof(uint64_t);
-
- /* Cleanup */
- if (re_class->hs_scratch != NULL) {
- hs_free_scratch(re_class->hs_scratch);
- }
-
- if (re_class->hs_db != NULL) {
- rspamd_hyperscan_free(re_class->hs_db, false);
- }
-
- /*
- * Reset match_type to PCRE for all regexps in this class.
- * We iterate re_class->re (the hash table of regexps) rather than
- * hs_ids because after config reload the hs_ids may point to different
- * regexps in cache->re. By iterating the actual regexps in this class,
- * we ensure we reset the correct cache_elts.
- */
- {
- GHashTableIter class_it;
- gpointer class_k, class_v;
-
- g_hash_table_iter_init(&class_it, re_class->re);
- while (g_hash_table_iter_next(&class_it, &class_k, &class_v)) {
- rspamd_regexp_t *class_re = class_v;
- uint64_t re_cache_id = rspamd_regexp_get_cache_id(class_re);
-
- if (re_cache_id != RSPAMD_INVALID_ID && re_cache_id < cache->re->len) {
- struct rspamd_re_cache_elt *elt = g_ptr_array_index(cache->re, re_cache_id);
- elt->match_type = RSPAMD_RE_CACHE_PCRE;
- }
- }
- }
-
- if (re_class->hs_ids) {
- g_free(re_class->hs_ids);
- }
+ unsigned char *data = NULL;
+ gsize data_len = 0;
+ char *error = NULL;
- re_class->hs_ids = NULL;
- re_class->nhs = 0;
- re_class->hs_scratch = NULL;
- re_class->hs_db = NULL;
- munmap(map, st.st_size);
-
- re_class->hs_db = rspamd_hyperscan_maybe_load(path, p - map);
- if (re_class->hs_db == NULL) {
- if (!try_load) {
- msg_err_re_cache("bad hs database in %s", path);
- }
- else {
- msg_debug_re_cache("bad hs database in %s", path);
- }
- g_free(hs_ids);
- g_free(hs_flags);
-
- re_class->hs_ids = NULL;
- re_class->hs_scratch = NULL;
- re_class->hs_db = NULL;
- all_valid = FALSE;
-
- continue;
- }
-
- if ((ret = hs_alloc_scratch(rspamd_hyperscan_get_database(re_class->hs_db),
- &re_class->hs_scratch)) != HS_SUCCESS) {
- if (!try_load) {
- msg_err_re_cache("bad hs database in %s; error code: %d", path, ret);
- }
- else {
- msg_debug_re_cache("bad hs database in %s; error code: %d", path, ret);
- }
- g_free(hs_ids);
- g_free(hs_flags);
-
- rspamd_hyperscan_free(re_class->hs_db, true);
- re_class->hs_ids = NULL;
- re_class->hs_scratch = NULL;
- re_class->hs_db = NULL;
- all_valid = FALSE;
-
- continue;
- }
-
- /*
- * First validate all IDs point to regexps in this re_class.
- * We must do validation BEFORE setting any match_types, otherwise if
- * validation fails mid-loop, some regexps will have match_type=HYPERSCAN
- * but hs_scratch will be NULL.
- */
- for (i = 0; i < n; i++) {
- g_assert((int) cache->re->len > hs_ids[i] && hs_ids[i] >= 0);
- elt = g_ptr_array_index(cache->re, hs_ids[i]);
-
- /* Verify the regexp at this ID belongs to the current re_class */
- if (rspamd_regexp_get_class(elt->re) != re_class) {
- msg_info_re_cache("stale hyperscan file %s: id %d points to "
- "wrong re_class, removing to trigger recompilation",
- path, hs_ids[i]);
- g_free(hs_ids);
- g_free(hs_flags);
- rspamd_hyperscan_free(re_class->hs_db, true);
- re_class->hs_ids = NULL;
- re_class->hs_scratch = NULL;
- re_class->hs_db = NULL;
- /* Remove stale file to trigger recompilation by hs_helper */
- unlink(path);
- all_valid = FALSE;
- goto next_class;
- }
- }
-
- /*
- * All IDs validated - now set match types.
- */
- for (i = 0; i < n; i++) {
- elt = g_ptr_array_index(cache->re, hs_ids[i]);
-
- if (hs_flags[i] & HS_FLAG_PREFILTER) {
- elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN_PRE;
- }
- else {
- elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN;
- }
+ /* Load via Lua backend (handles files, compression, etc.) */
+ if (rspamd_hs_cache_lua_load_sync(re_class->hash, "re_class", &data, &data_len, &error)) {
+ msg_debug_re_cache("loaded hyperscan via Lua backend for '%s' (%z bytes)",
+ re_class->hash, data_len);
+ }
+ else {
+ /* Lua backend failed - async-only backend or file not found */
+ if (error) {
+ msg_debug_re_cache("Lua backend load failed for '%s': %s",
+ re_class->hash, error);
+ g_free(error);
}
+ all_valid = FALSE;
+ continue;
+ }
- re_class->hs_ids = hs_ids;
- g_free(hs_flags);
- re_class->nhs = n;
-
- /* Notify main process about the loaded hyperscan file */
- rspamd_hyperscan_notice_known(path);
+ if (!data || data_len == 0) {
+ all_valid = FALSE;
+ continue;
+ }
- if (!has_valid) {
- has_valid = TRUE;
- all_valid = TRUE;
- }
+ /* Process the loaded data using the blob apply function */
+ if (rspamd_re_cache_apply_hyperscan_blob(cache, re_class, data, data_len, try_load)) {
+ has_valid = TRUE;
+ total_loaded++;
+ msg_debug_re_cache("successfully applied hyperscan blob for '%s'", re_class->hash);
}
else {
- if (!try_load) {
- msg_err_re_cache("invalid hyperscan hash file '%s'",
- path);
- }
- else {
- msg_debug_re_cache("invalid hyperscan hash file '%s'",
- path);
- }
all_valid = FALSE;
- continue;
+ msg_debug_re_cache("failed to apply hyperscan blob for '%s'", re_class->hash);
}
- next_class:;
+
+ g_free(data);
}
if (has_valid) {
if (all_valid) {
- msg_info_re_cache("full hyperscan database of %d regexps has been loaded%s%s%s",
- total,
+ msg_info_re_cache("full hyperscan database (%u classes) has been loaded%s%s%s",
+ total_loaded,
cache->scope ? " for scope '" : "",
cache->scope ? cache->scope : "",
cache->scope ? "'" : "");
cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_FULL;
}
else {
- msg_info_re_cache("partial hyperscan database of %d regexps has been loaded%s%s%s",
- total,
+ msg_info_re_cache("partial hyperscan database (%u classes) has been loaded%s%s%s",
+ total_loaded,
cache->scope ? " for scope '" : "",
cache->scope ? cache->scope : "",
cache->scope ? "'" : "");
cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOAD_ERROR;
}
-
return cache->hyperscan_loaded;
#endif
}