]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add async multipattern compilation infrastructure
authorVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 5 Jan 2026 20:44:47 +0000 (20:44 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 5 Jan 2026 20:44:47 +0000 (20:44 +0000)
Add deferred hyperscan compilation for multipatterns (TLD patterns):
- Build ACISM fallback immediately during pre-fork (fast)
- Queue multipatterns for async HS compilation by hs_helper
- Workers hot-swap from ACISM to hyperscan when compilation completes

IPC additions:
- RSPAMD_SRV_MULTIPATTERN_LOADED: hs_helper → main
- RSPAMD_CONTROL_MULTIPATTERN_LOADED: main → workers

Bug fixes:
- Use per-pattern TLD flags instead of multipattern-level flags
- Add word boundary check in ACISM callback for TLD matching

src/hs_helper.c
src/libserver/rspamd_control.c
src/libserver/rspamd_control.h
src/libserver/url.c
src/libserver/worker_util.c
src/libutil/multipattern.c
src/libutil/multipattern.h

index 1464b9628eb14e3a309aef42e6f4782c8e1bae70..e0ef0d298c5facfd5a0e2b03e8ff47dfc3b3a565 100644 (file)
@@ -15,6 +15,7 @@
  */
 #include "config.h"
 #include "libutil/util.h"
+#include "libutil/multipattern.h"
 #include "libserver/cfg_file.h"
 #include "libserver/cfg_rcl.h"
 #include "libserver/worker_util.h"
@@ -24,6 +25,7 @@
 
 #ifdef WITH_HYPERSCAN
 #include "libserver/hyperscan_tools.h"
+#include "hs.h"
 #endif
 
 #ifdef HAVE_GLOB_H
@@ -592,6 +594,72 @@ rspamd_hs_helper_reload(struct rspamd_main *rspamd_main,
        return TRUE;
 }
 
+#ifdef WITH_HYPERSCAN
+/*
+ * Compile pending multipatterns that were queued during pre-fork initialization
+ */
+static void
+rspamd_hs_helper_compile_pending_multipatterns(struct hs_helper_ctx *ctx,
+                                                                                          struct rspamd_worker *worker)
+{
+       struct rspamd_multipattern_pending *pending;
+       unsigned int count = 0;
+
+       pending = rspamd_multipattern_get_pending(&count);
+       if (pending == NULL || count == 0) {
+               msg_debug("no pending multipattern compilations");
+               return;
+       }
+
+       msg_info("processing %ud pending multipattern compilations", count);
+
+       for (unsigned int i = 0; i < count; i++) {
+               struct rspamd_multipattern_pending *entry = &pending[i];
+               struct rspamd_multipattern *mp = entry->mp;
+               unsigned int npatterns;
+               char fp[PATH_MAX];
+               GError *err = NULL;
+
+               npatterns = rspamd_multipattern_get_npatterns(mp);
+               msg_info("compiling multipattern '%s' with %ud patterns", entry->name, npatterns);
+
+               /* Build cache file path */
+               rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hs", ctx->hs_dir,
+                                               (int) sizeof(entry->hash) / 2, entry->hash);
+
+               /* Check if cache file already exists (race with another process) */
+               if (access(fp, R_OK) == 0) {
+                       msg_info("cache file %s already exists for multipattern '%s', skipping compilation",
+                                        fp, entry->name);
+               }
+               else {
+                       /* Compile and save to cache */
+                       if (!rspamd_multipattern_compile_hs_to_cache(mp, ctx->hs_dir, &err)) {
+                               msg_err("failed to compile multipattern '%s': %e", entry->name, err);
+                               if (err) {
+                                       g_error_free(err);
+                               }
+                               continue;
+                       }
+               }
+
+               /* Send notification to main process */
+               struct rspamd_srv_command srv_cmd;
+               memset(&srv_cmd, 0, sizeof(srv_cmd));
+               srv_cmd.type = RSPAMD_SRV_MULTIPATTERN_LOADED;
+               rspamd_strlcpy(srv_cmd.cmd.mp_loaded.name, entry->name,
+                                          sizeof(srv_cmd.cmd.mp_loaded.name));
+               rspamd_strlcpy(srv_cmd.cmd.mp_loaded.cache_dir, ctx->hs_dir,
+                                          sizeof(srv_cmd.cmd.mp_loaded.cache_dir));
+
+               rspamd_srv_send_command(worker, ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+               msg_info("sent multipattern loaded notification for '%s'", entry->name);
+       }
+
+       rspamd_multipattern_clear_pending();
+}
+#endif
+
 static gboolean
 rspamd_hs_helper_workers_spawned(struct rspamd_main *rspamd_main,
                                                                 struct rspamd_worker *worker, int fd,
@@ -644,6 +712,11 @@ rspamd_hs_helper_workers_spawned(struct rspamd_main *rspamd_main,
                }
        }
 
+#ifdef WITH_HYPERSCAN
+       /* Process pending multipattern compilations (e.g., TLD patterns) */
+       rspamd_hs_helper_compile_pending_multipatterns(ctx, worker);
+#endif
+
        if (attached_fd != -1) {
                close(attached_fd);
        }
index a9e2f7366b21b37f50fa23b4fd35f04e07615b8a..4ed964d42b05aa95728d82ce67b4e647aa148e22 100644 (file)
@@ -1170,6 +1170,24 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
                                        rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
                                                                                                 rspamd_control_ignore_io_handler, NULL, worker->pid);
                                }
+#endif
+                               break;
+                       case RSPAMD_SRV_MULTIPATTERN_LOADED:
+#ifdef WITH_HYPERSCAN
+                               msg_info_main("received multipattern loaded notification for '%s' from %s",
+                                                         cmd.cmd.mp_loaded.name, cmd.cmd.mp_loaded.cache_dir);
+
+                               /* Broadcast command to all workers */
+                               memset(&wcmd, 0, sizeof(wcmd));
+                               wcmd.type = RSPAMD_CONTROL_MULTIPATTERN_LOADED;
+                               rspamd_strlcpy(wcmd.cmd.mp_loaded.name,
+                                                          cmd.cmd.mp_loaded.name,
+                                                          sizeof(wcmd.cmd.mp_loaded.name));
+                               rspamd_strlcpy(wcmd.cmd.mp_loaded.cache_dir,
+                                                          cmd.cmd.mp_loaded.cache_dir,
+                                                          sizeof(wcmd.cmd.mp_loaded.cache_dir));
+                               rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+                                                                                        rspamd_control_ignore_io_handler, NULL, worker->pid);
 #endif
                                break;
                        case RSPAMD_SRV_MONITORED_CHANGE:
@@ -1527,6 +1545,12 @@ rspamd_control_command_to_string(enum rspamd_control_type cmd)
        case RSPAMD_CONTROL_COMPOSITES_STATS:
                reply = "composites_stats";
                break;
+       case RSPAMD_CONTROL_FUZZY_BLOCKED:
+               reply = "fuzzy_blocked";
+               break;
+       case RSPAMD_CONTROL_MULTIPATTERN_LOADED:
+               reply = "multipattern_loaded";
+               break;
        default:
                break;
        }
@@ -1569,6 +1593,9 @@ const char *rspamd_srv_command_to_string(enum rspamd_srv_type cmd)
        case RSPAMD_SRV_WORKERS_SPAWNED:
                reply = "workers_spawned";
                break;
+       case RSPAMD_SRV_MULTIPATTERN_LOADED:
+               reply = "multipattern_loaded";
+               break;
        }
 
        return reply;
index 5048035b46b938fd365d119f4f8635e388e7a822..d4db9547324e29260fc79bae3ab69be671ca0392 100644 (file)
@@ -39,6 +39,7 @@ enum rspamd_control_type {
        RSPAMD_CONTROL_FUZZY_BLOCKED,
        RSPAMD_CONTROL_WORKERS_SPAWNED,
        RSPAMD_CONTROL_COMPOSITES_STATS,
+       RSPAMD_CONTROL_MULTIPATTERN_LOADED,
        RSPAMD_CONTROL_MAX
 };
 
@@ -51,8 +52,9 @@ enum rspamd_srv_type {
        RSPAMD_SRV_HEARTBEAT,
        RSPAMD_SRV_HEALTH,
        RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE,
-       RSPAMD_SRV_FUZZY_BLOCKED,   /* Used to notify main process about a blocked ip */
-       RSPAMD_SRV_WORKERS_SPAWNED, /* Used to notify workers that all workers have been spawned */
+       RSPAMD_SRV_FUZZY_BLOCKED,       /* Used to notify main process about a blocked ip */
+       RSPAMD_SRV_WORKERS_SPAWNED,     /* Used to notify workers that all workers have been spawned */
+       RSPAMD_SRV_MULTIPATTERN_LOADED, /* Multipattern HS compiled and ready */
 };
 
 enum rspamd_log_pipe_type {
@@ -80,6 +82,10 @@ struct rspamd_control_command {
                        char scope[64]; /* Scope name, NULL means all scopes */
                        gsize fd_size;  /* Size of FD-based db, 0 if not using FD */
                } hs_loaded;
+               struct {
+                       char name[64];
+                       char cache_dir[CONTROL_PATHLEN];
+               } mp_loaded;
                struct {
                        char tag[32];
                        gboolean alive;
@@ -230,6 +236,11 @@ struct rspamd_srv_command {
                struct {
                        unsigned int workers_count;
                } workers_spawned;
+               /* Sent when a multipattern hyperscan db is compiled */
+               struct {
+                       char name[64];
+                       char cache_dir[CONTROL_PATHLEN];
+               } mp_loaded;
        } cmd;
 };
 
index 9054b8982b81a63332b81af36f87c545ca13d408..0e3becd5aebc45bbbddb9a1104c58a0a7fd840c2 100644 (file)
@@ -599,6 +599,11 @@ void rspamd_url_init(const char *tld_file)
                        g_error_free(err);
                        ret = FALSE;
                }
+               else if (rspamd_multipattern_get_state(url_scanner->search_trie_full) ==
+                                RSPAMD_MP_STATE_COMPILING) {
+                       /* Add to pending queue for hs_helper to compile */
+                       rspamd_multipattern_add_pending(url_scanner->search_trie_full, "tld");
+               }
        }
 
        if (tld_file != NULL) {
index e55138bbb72780fb9d56f121b20cb796ea615766..698b542b29aafbaf17214b31f270dbe1f719c898 100644 (file)
@@ -58,6 +58,7 @@
 #include "contrib/libev/ev.h"
 #include "libstat/stat_api.h"
 #include "libserver/protocol_internal.h"
+#include "libutil/multipattern.h"
 
 struct rspamd_worker *rspamd_current_worker = NULL;
 
@@ -1959,6 +1960,52 @@ rspamd_worker_hyperscan_ready(struct rspamd_main *rspamd_main,
 
        return TRUE;
 }
+
+static gboolean
+rspamd_worker_multipattern_ready(struct rspamd_main *rspamd_main,
+                                                                struct rspamd_worker *worker, int fd,
+                                                                int attached_fd,
+                                                                struct rspamd_control_command *cmd,
+                                                                gpointer ud)
+{
+       struct rspamd_control_reply rep;
+       struct rspamd_multipattern *mp;
+       const char *name = cmd->cmd.mp_loaded.name;
+       const char *cache_dir = cmd->cmd.mp_loaded.cache_dir;
+
+       memset(&rep, 0, sizeof(rep));
+       rep.type = RSPAMD_CONTROL_MULTIPATTERN_LOADED;
+
+       mp = rspamd_multipattern_find_pending(name);
+
+       if (mp != NULL) {
+               if (rspamd_multipattern_load_from_cache(mp, cache_dir)) {
+                       msg_info("multipattern '%s' hot-swapped to hyperscan", name);
+                       rep.reply.hs_loaded.status = 0;
+               }
+               else {
+                       msg_warn("failed to load multipattern '%s' from cache, "
+                                        "continuing with ACISM fallback",
+                                        name);
+                       rep.reply.hs_loaded.status = ENOENT;
+               }
+       }
+       else {
+               msg_warn("received multipattern notification for unknown '%s'", name);
+               rep.reply.hs_loaded.status = ENOENT;
+       }
+
+       if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
+               msg_err("cannot write reply to the control socket: %s",
+                               strerror(errno));
+       }
+
+       if (attached_fd >= 0) {
+               close(attached_fd);
+       }
+
+       return TRUE;
+}
 #endif /* With Hyperscan */
 
 gboolean
@@ -2055,6 +2102,10 @@ void rspamd_worker_init_scanner(struct rspamd_worker *worker,
                                                                                  RSPAMD_CONTROL_HYPERSCAN_LOADED,
                                                                                  rspamd_worker_hyperscan_ready,
                                                                                  NULL);
+       rspamd_control_worker_add_cmd_handler(worker,
+                                                                                 RSPAMD_CONTROL_MULTIPATTERN_LOADED,
+                                                                                 rspamd_worker_multipattern_ready,
+                                                                                 NULL);
 #endif
        rspamd_control_worker_add_cmd_handler(worker,
                                                                                  RSPAMD_CONTROL_LOG_PIPE,
index 91f0968a7288aceb593b098e64b3e2d7eae1d85a..bed2a80b8e4bdb730a0fbe9c84808fc5cad74555 100644 (file)
@@ -40,6 +40,8 @@ enum rspamd_hs_check_state {
 static const char *hs_cache_dir = NULL;
 static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED;
 
+/* Pending compilation queue for deferred HS compilation */
+static GArray *pending_compilations = NULL;
 
 struct RSPAMD_ALIGNED(64) rspamd_multipattern {
 #ifdef WITH_HYPERSCAN
@@ -332,14 +334,15 @@ void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp,
                                                                                 const char *pattern, gsize patlen, int flags)
 {
        gsize dlen;
-       gboolean is_tld = (mp->flags & RSPAMD_MULTIPATTERN_TLD);
+       /* Use per-pattern flags to determine if this is a TLD pattern */
+       gboolean is_tld = (flags & RSPAMD_MULTIPATTERN_TLD);
 
        g_assert(pattern != NULL);
        g_assert(mp != NULL);
        g_assert(!mp->compiled);
 
        /*
-        * For TLD multipatterns: always add to pats array for ACISM fallback
+        * For TLD patterns: add to pats array for ACISM fallback
         */
        if (is_tld) {
                ac_trie_pat_t acism_pat;
@@ -686,7 +689,8 @@ rspamd_multipattern_compile(struct rspamd_multipattern *mp, int flags, GError **
                        mp->state = RSPAMD_MP_STATE_INIT;
                        mp->compiled = TRUE;
 
-                       msg_info("built ACISM fallback trie for %ud TLD patterns", mp->cnt);
+                       msg_info("built ACISM fallback trie for %ud TLD patterns (total patterns: %ud)",
+                                        mp->pats->len, mp->cnt);
 
                        /* Try to load from cache first */
                        if (!(flags & RSPAMD_MULTIPATTERN_COMPILE_NO_FS) &&
@@ -806,10 +810,14 @@ rspamd_multipattern_acism_cb(int strnum, int textpos, void *context)
        ac_trie_pat_t pat;
 
        pat = g_array_index(cbd->mp->pats, ac_trie_pat_t, strnum);
-       /*
-        * For TLD multipatterns: strnum IS the pattern ID (0-based)
-        * All patterns in the multipattern are TLD patterns, so no offset needed
-        */
+
+       /* For TLD patterns, require word boundary at end of match */
+       if (cbd->mp->flags & RSPAMD_MULTIPATTERN_TLD) {
+               if (textpos < (int) cbd->len && g_ascii_isalnum(cbd->in[textpos])) {
+                       return 0;
+               }
+       }
+
        ret = cbd->cb(cbd->mp, strnum, textpos - pat.len,
                                  textpos, cbd->in, cbd->len, cbd->ud);
 
@@ -1160,3 +1168,252 @@ rspamd_multipattern_set_hs_db(struct rspamd_multipattern *mp, void *hs_db)
        return FALSE;
 #endif
 }
+
+void rspamd_multipattern_add_pending(struct rspamd_multipattern *mp,
+                                                                        const char *name)
+{
+       struct rspamd_multipattern_pending entry;
+
+       g_assert(mp != NULL);
+       g_assert(name != NULL);
+       g_assert(mp->state == RSPAMD_MP_STATE_COMPILING);
+
+       if (pending_compilations == NULL) {
+               pending_compilations = g_array_new(FALSE, FALSE,
+                                                                                  sizeof(struct rspamd_multipattern_pending));
+       }
+
+       entry.mp = mp;
+       entry.name = g_strdup(name);
+       rspamd_multipattern_get_hash(mp, entry.hash);
+
+       g_array_append_val(pending_compilations, entry);
+
+       msg_info("added multipattern '%s' (%ud patterns) to pending compilation queue",
+                        name, mp->cnt);
+}
+
+struct rspamd_multipattern_pending *
+rspamd_multipattern_get_pending(unsigned int *count)
+{
+       if (pending_compilations == NULL || pending_compilations->len == 0) {
+               *count = 0;
+               return NULL;
+       }
+
+       *count = pending_compilations->len;
+       return (struct rspamd_multipattern_pending *) pending_compilations->data;
+}
+
+void rspamd_multipattern_clear_pending(void)
+{
+       if (pending_compilations == NULL) {
+               return;
+       }
+
+       for (unsigned int i = 0; i < pending_compilations->len; i++) {
+               struct rspamd_multipattern_pending *entry;
+
+               entry = &g_array_index(pending_compilations,
+                                                          struct rspamd_multipattern_pending, i);
+               g_free(entry->name);
+       }
+
+       g_array_free(pending_compilations, TRUE);
+       pending_compilations = NULL;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_find_pending(const char *name)
+{
+       if (pending_compilations == NULL || name == NULL) {
+               return NULL;
+       }
+
+       for (unsigned int i = 0; i < pending_compilations->len; i++) {
+               struct rspamd_multipattern_pending *entry;
+
+               entry = &g_array_index(pending_compilations,
+                                                          struct rspamd_multipattern_pending, i);
+               if (strcmp(entry->name, name) == 0) {
+                       return entry->mp;
+               }
+       }
+
+       return NULL;
+}
+
+gboolean
+rspamd_multipattern_compile_hs_to_cache(struct rspamd_multipattern *mp,
+                                                                               const char *cache_dir,
+                                                                               GError **err)
+{
+#ifdef WITH_HYPERSCAN
+       hs_platform_info_t plt;
+       hs_compile_error_t *hs_errors = NULL;
+       hs_database_t *db = NULL;
+       unsigned char hash[rspamd_cryptobox_HASHBYTES];
+       char *bytes = NULL;
+       gsize len;
+       char fp[PATH_MAX], np[PATH_MAX];
+       int fd;
+
+       g_assert(mp != NULL);
+       g_assert(cache_dir != NULL);
+
+       if (mp->state != RSPAMD_MP_STATE_COMPILING) {
+               g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+                                       "multipattern not in COMPILING state");
+               return FALSE;
+       }
+
+       if (mp->hs_pats == NULL || mp->cnt == 0) {
+               g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+                                       "no patterns to compile");
+               return FALSE;
+       }
+
+       g_assert(hs_populate_platform(&plt) == HS_SUCCESS);
+
+       /* Compute hash for cache key */
+       rspamd_multipattern_get_hash(mp, hash);
+
+       msg_info("compiling hyperscan database for %ud patterns", mp->cnt);
+
+       if (hs_compile_multi((const char *const *) mp->hs_pats->data,
+                                                (const unsigned int *) mp->hs_flags->data,
+                                                (const unsigned int *) mp->hs_ids->data,
+                                                mp->cnt,
+                                                HS_MODE_BLOCK,
+                                                &plt,
+                                                &db,
+                                                &hs_errors) != HS_SUCCESS) {
+               g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+                                       "cannot compile hyperscan: %s (pattern %d)",
+                                       hs_errors->message, hs_errors->expression);
+               hs_free_compile_error(hs_errors);
+               return FALSE;
+       }
+
+       /* Serialize with unified header format */
+       if (!rspamd_hyperscan_serialize_with_header(db, NULL, NULL, 0, &bytes, &len)) {
+               g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+                                       "cannot serialize hyperscan database");
+               hs_free_database(db);
+               return FALSE;
+       }
+
+       hs_free_database(db);
+
+       /* Write to temp file and rename */
+       rspamd_snprintf(fp, sizeof(fp), "%s%chs-mp-XXXXXXXXXXXXX",
+                                       cache_dir, G_DIR_SEPARATOR);
+
+       if ((fd = g_mkstemp_full(fp, O_CREAT | O_EXCL | O_WRONLY, 00644)) == -1) {
+               g_set_error(err, rspamd_multipattern_quark(), errno,
+                                       "cannot create temp file %s: %s", fp, strerror(errno));
+               g_free(bytes);
+               return FALSE;
+       }
+
+       if (write(fd, bytes, len) != (ssize_t) len) {
+               g_set_error(err, rspamd_multipattern_quark(), errno,
+                                       "cannot write to %s: %s", fp, strerror(errno));
+               close(fd);
+               unlink(fp);
+               g_free(bytes);
+               return FALSE;
+       }
+
+       g_free(bytes);
+       fsync(fd);
+       close(fd);
+
+       /* Rename to final path */
+       rspamd_snprintf(np, sizeof(np), "%s/%*xs.hs", cache_dir,
+                                       (int) rspamd_cryptobox_HASHBYTES / 2, hash);
+
+       if (rename(fp, np) == -1) {
+               g_set_error(err, rspamd_multipattern_quark(), errno,
+                                       "cannot rename %s to %s: %s", fp, np, strerror(errno));
+               unlink(fp);
+               return FALSE;
+       }
+
+       rspamd_hyperscan_notice_known(np);
+       msg_info("saved hyperscan database to %s (%z bytes)", np, len);
+
+       return TRUE;
+#else
+       g_set_error(err, rspamd_multipattern_quark(), ENOTSUP,
+                               "hyperscan not available");
+       return FALSE;
+#endif
+}
+
+gboolean
+rspamd_multipattern_load_from_cache(struct rspamd_multipattern *mp,
+                                                                       const char *cache_dir)
+{
+#ifdef WITH_HYPERSCAN
+       unsigned char hash[rspamd_cryptobox_HASHBYTES];
+       char fp[PATH_MAX];
+       gchar *data;
+       gsize len;
+       GError *err = NULL;
+
+       g_assert(mp != NULL);
+       g_assert(cache_dir != NULL);
+
+       if (mp->state != RSPAMD_MP_STATE_COMPILING) {
+               msg_debug("multipattern not in COMPILING state, cannot load from cache");
+               return FALSE;
+       }
+
+       /* Calculate hash for cache key */
+       rspamd_multipattern_get_hash(mp, hash);
+
+       rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hs", cache_dir,
+                                       (int) rspamd_cryptobox_HASHBYTES / 2, hash);
+
+       if (!g_file_get_contents(fp, &data, &len, &err)) {
+               if (err) {
+                       msg_warn("cannot read hyperscan cache %s: %s", fp, err->message);
+                       g_error_free(err);
+               }
+               return FALSE;
+       }
+
+       mp->hs_db = rspamd_hyperscan_load_from_header(data, len, &err);
+       g_free(data);
+
+       if (mp->hs_db == NULL) {
+               if (err) {
+                       msg_warn("cannot load hyperscan from cache %s: %s", fp, err->message);
+                       g_error_free(err);
+               }
+               return FALSE;
+       }
+
+       /* Allocate scratch space */
+       if (!rspamd_multipattern_alloc_scratch(mp, &err)) {
+               msg_warn("hyperscan cache loaded but scratch allocation failed: %s",
+                                err ? err->message : "unknown error");
+               rspamd_hyperscan_free(mp->hs_db, true);
+               mp->hs_db = NULL;
+               g_clear_error(&err);
+               return FALSE;
+       }
+
+       /* Register the file so it won't be cleaned up */
+       rspamd_hyperscan_notice_known(fp);
+
+       mp->state = RSPAMD_MP_STATE_COMPILED;
+       msg_info("hot-swapped hyperscan database from cache %s for %ud patterns",
+                        fp, mp->cnt);
+
+       return TRUE;
+#else
+       return FALSE;
+#endif
+}
index ce03cd853d05b86e860de1b6bab31d58c846b3d9..f4f2439ea146839b27ea3b80d2270a82b78242dc 100644 (file)
@@ -240,6 +240,68 @@ void rspamd_multipattern_get_hash(struct rspamd_multipattern *mp,
 gboolean rspamd_multipattern_set_hs_db(struct rspamd_multipattern *mp,
                                                                           void *hs_db);
 
+/**
+ * Pending compilation entry for deferred HS compilation
+ */
+struct rspamd_multipattern_pending {
+       struct rspamd_multipattern *mp;
+       char *name;             /* Identifier for logging/IPC */
+       unsigned char hash[64]; /* Cache key hash (rspamd_cryptobox_HASHBYTES) */
+};
+
+/**
+ * Add multipattern to pending compilation queue.
+ * Called during pre-fork initialization when hs_helper is not yet available.
+ * @param mp multipattern in COMPILING state
+ * @param name identifier for this multipattern (e.g., "tld")
+ */
+void rspamd_multipattern_add_pending(struct rspamd_multipattern *mp,
+                                                                        const char *name);
+
+/**
+ * Get list of pending multipattern compilations.
+ * Returns array of rspamd_multipattern_pending, caller must free array (not contents).
+ * @param count output: number of pending entries
+ * @return array of pending entries or NULL if none
+ */
+struct rspamd_multipattern_pending *rspamd_multipattern_get_pending(
+       unsigned int *count);
+
+/**
+ * Clear pending queue after hs_helper has processed it.
+ */
+void rspamd_multipattern_clear_pending(void);
+
+/**
+ * Find a pending multipattern by name.
+ * @param name identifier
+ * @return multipattern or NULL if not found
+ */
+struct rspamd_multipattern *rspamd_multipattern_find_pending(const char *name);
+
+/**
+ * Compile hyperscan database and save to cache file.
+ * This is called by hs_helper for async compilation.
+ * @param mp multipattern in COMPILING state
+ * @param cache_dir directory to save cache file
+ * @param err error output
+ * @return TRUE on success
+ */
+gboolean rspamd_multipattern_compile_hs_to_cache(struct rspamd_multipattern *mp,
+                                                                                                const char *cache_dir,
+                                                                                                GError **err);
+
+/**
+ * Load hyperscan database from cache file.
+ * This is called by workers when they receive notification that
+ * hs_helper has compiled a multipattern database.
+ * @param mp multipattern in COMPILING state
+ * @param cache_dir directory containing cache files
+ * @return TRUE if loaded successfully
+ */
+gboolean rspamd_multipattern_load_from_cache(struct rspamd_multipattern *mp,
+                                                                                        const char *cache_dir);
+
 #ifdef __cplusplus
 }
 #endif