*/
#include "config.h"
#include "libutil/util.h"
+#include "libutil/multipattern.h"
#include "libserver/cfg_file.h"
#include "libserver/cfg_rcl.h"
#include "libserver/worker_util.h"
#ifdef WITH_HYPERSCAN
#include "libserver/hyperscan_tools.h"
+#include "hs.h"
#endif
#ifdef HAVE_GLOB_H
return TRUE;
}
+#ifdef WITH_HYPERSCAN
+/*
+ * Compile pending multipatterns that were queued during pre-fork initialization
+ */
+static void
+rspamd_hs_helper_compile_pending_multipatterns(struct hs_helper_ctx *ctx,
+ struct rspamd_worker *worker)
+{
+ struct rspamd_multipattern_pending *pending;
+ unsigned int count = 0;
+
+ pending = rspamd_multipattern_get_pending(&count);
+ if (pending == NULL || count == 0) {
+ msg_debug("no pending multipattern compilations");
+ return;
+ }
+
+ msg_info("processing %ud pending multipattern compilations", count);
+
+ for (unsigned int i = 0; i < count; i++) {
+ struct rspamd_multipattern_pending *entry = &pending[i];
+ struct rspamd_multipattern *mp = entry->mp;
+ unsigned int npatterns;
+ char fp[PATH_MAX];
+ GError *err = NULL;
+
+ npatterns = rspamd_multipattern_get_npatterns(mp);
+ msg_info("compiling multipattern '%s' with %ud patterns", entry->name, npatterns);
+
+ /* Build cache file path */
+ rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hs", ctx->hs_dir,
+ (int) sizeof(entry->hash) / 2, entry->hash);
+
+ /* Check if cache file already exists (race with another process) */
+ if (access(fp, R_OK) == 0) {
+ msg_info("cache file %s already exists for multipattern '%s', skipping compilation",
+ fp, entry->name);
+ }
+ else {
+ /* Compile and save to cache */
+ if (!rspamd_multipattern_compile_hs_to_cache(mp, ctx->hs_dir, &err)) {
+ msg_err("failed to compile multipattern '%s': %e", entry->name, err);
+ if (err) {
+ g_error_free(err);
+ }
+ continue;
+ }
+ }
+
+ /* Send notification to main process */
+ struct rspamd_srv_command srv_cmd;
+ memset(&srv_cmd, 0, sizeof(srv_cmd));
+ srv_cmd.type = RSPAMD_SRV_MULTIPATTERN_LOADED;
+ rspamd_strlcpy(srv_cmd.cmd.mp_loaded.name, entry->name,
+ sizeof(srv_cmd.cmd.mp_loaded.name));
+ rspamd_strlcpy(srv_cmd.cmd.mp_loaded.cache_dir, ctx->hs_dir,
+ sizeof(srv_cmd.cmd.mp_loaded.cache_dir));
+
+ rspamd_srv_send_command(worker, ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+ msg_info("sent multipattern loaded notification for '%s'", entry->name);
+ }
+
+ rspamd_multipattern_clear_pending();
+}
+#endif
+
static gboolean
rspamd_hs_helper_workers_spawned(struct rspamd_main *rspamd_main,
struct rspamd_worker *worker, int fd,
}
}
+#ifdef WITH_HYPERSCAN
+ /* Process pending multipattern compilations (e.g., TLD patterns) */
+ rspamd_hs_helper_compile_pending_multipatterns(ctx, worker);
+#endif
+
if (attached_fd != -1) {
close(attached_fd);
}
rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
rspamd_control_ignore_io_handler, NULL, worker->pid);
}
+#endif
+ break;
+ case RSPAMD_SRV_MULTIPATTERN_LOADED:
+#ifdef WITH_HYPERSCAN
+ msg_info_main("received multipattern loaded notification for '%s' from %s",
+ cmd.cmd.mp_loaded.name, cmd.cmd.mp_loaded.cache_dir);
+
+ /* Broadcast command to all workers */
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_MULTIPATTERN_LOADED;
+ rspamd_strlcpy(wcmd.cmd.mp_loaded.name,
+ cmd.cmd.mp_loaded.name,
+ sizeof(wcmd.cmd.mp_loaded.name));
+ rspamd_strlcpy(wcmd.cmd.mp_loaded.cache_dir,
+ cmd.cmd.mp_loaded.cache_dir,
+ sizeof(wcmd.cmd.mp_loaded.cache_dir));
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_ignore_io_handler, NULL, worker->pid);
#endif
break;
case RSPAMD_SRV_MONITORED_CHANGE:
case RSPAMD_CONTROL_COMPOSITES_STATS:
reply = "composites_stats";
break;
+ case RSPAMD_CONTROL_FUZZY_BLOCKED:
+ reply = "fuzzy_blocked";
+ break;
+ case RSPAMD_CONTROL_MULTIPATTERN_LOADED:
+ reply = "multipattern_loaded";
+ break;
default:
break;
}
case RSPAMD_SRV_WORKERS_SPAWNED:
reply = "workers_spawned";
break;
+ case RSPAMD_SRV_MULTIPATTERN_LOADED:
+ reply = "multipattern_loaded";
+ break;
}
return reply;
RSPAMD_CONTROL_FUZZY_BLOCKED,
RSPAMD_CONTROL_WORKERS_SPAWNED,
RSPAMD_CONTROL_COMPOSITES_STATS,
+ RSPAMD_CONTROL_MULTIPATTERN_LOADED,
RSPAMD_CONTROL_MAX
};
RSPAMD_SRV_HEARTBEAT,
RSPAMD_SRV_HEALTH,
RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE,
- RSPAMD_SRV_FUZZY_BLOCKED, /* Used to notify main process about a blocked ip */
- RSPAMD_SRV_WORKERS_SPAWNED, /* Used to notify workers that all workers have been spawned */
+ RSPAMD_SRV_FUZZY_BLOCKED, /* Used to notify main process about a blocked ip */
+ RSPAMD_SRV_WORKERS_SPAWNED, /* Used to notify workers that all workers have been spawned */
+ RSPAMD_SRV_MULTIPATTERN_LOADED, /* Multipattern HS compiled and ready */
};
enum rspamd_log_pipe_type {
char scope[64]; /* Scope name, NULL means all scopes */
gsize fd_size; /* Size of FD-based db, 0 if not using FD */
} hs_loaded;
+ struct {
+ char name[64];
+ char cache_dir[CONTROL_PATHLEN];
+ } mp_loaded;
struct {
char tag[32];
gboolean alive;
struct {
unsigned int workers_count;
} workers_spawned;
+ /* Sent when a multipattern hyperscan db is compiled */
+ struct {
+ char name[64];
+ char cache_dir[CONTROL_PATHLEN];
+ } mp_loaded;
} cmd;
};
g_error_free(err);
ret = FALSE;
}
+ else if (rspamd_multipattern_get_state(url_scanner->search_trie_full) ==
+ RSPAMD_MP_STATE_COMPILING) {
+ /* Add to pending queue for hs_helper to compile */
+ rspamd_multipattern_add_pending(url_scanner->search_trie_full, "tld");
+ }
}
if (tld_file != NULL) {
#include "contrib/libev/ev.h"
#include "libstat/stat_api.h"
#include "libserver/protocol_internal.h"
+#include "libutil/multipattern.h"
struct rspamd_worker *rspamd_current_worker = NULL;
return TRUE;
}
+
+static gboolean
+rspamd_worker_multipattern_ready(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *worker, int fd,
+ int attached_fd,
+ struct rspamd_control_command *cmd,
+ gpointer ud)
+{
+ struct rspamd_control_reply rep;
+ struct rspamd_multipattern *mp;
+ const char *name = cmd->cmd.mp_loaded.name;
+ const char *cache_dir = cmd->cmd.mp_loaded.cache_dir;
+
+ memset(&rep, 0, sizeof(rep));
+ rep.type = RSPAMD_CONTROL_MULTIPATTERN_LOADED;
+
+ mp = rspamd_multipattern_find_pending(name);
+
+ if (mp != NULL) {
+ if (rspamd_multipattern_load_from_cache(mp, cache_dir)) {
+ msg_info("multipattern '%s' hot-swapped to hyperscan", name);
+ rep.reply.hs_loaded.status = 0;
+ }
+ else {
+ msg_warn("failed to load multipattern '%s' from cache, "
+ "continuing with ACISM fallback",
+ name);
+ rep.reply.hs_loaded.status = ENOENT;
+ }
+ }
+ else {
+ msg_warn("received multipattern notification for unknown '%s'", name);
+ rep.reply.hs_loaded.status = ENOENT;
+ }
+
+ if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
+ msg_err("cannot write reply to the control socket: %s",
+ strerror(errno));
+ }
+
+ if (attached_fd >= 0) {
+ close(attached_fd);
+ }
+
+ return TRUE;
+}
#endif /* With Hyperscan */
gboolean
RSPAMD_CONTROL_HYPERSCAN_LOADED,
rspamd_worker_hyperscan_ready,
NULL);
+ rspamd_control_worker_add_cmd_handler(worker,
+ RSPAMD_CONTROL_MULTIPATTERN_LOADED,
+ rspamd_worker_multipattern_ready,
+ NULL);
#endif
rspamd_control_worker_add_cmd_handler(worker,
RSPAMD_CONTROL_LOG_PIPE,
static const char *hs_cache_dir = NULL;
static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED;
+/* Pending compilation queue for deferred HS compilation */
+static GArray *pending_compilations = NULL;
struct RSPAMD_ALIGNED(64) rspamd_multipattern {
#ifdef WITH_HYPERSCAN
const char *pattern, gsize patlen, int flags)
{
gsize dlen;
- gboolean is_tld = (mp->flags & RSPAMD_MULTIPATTERN_TLD);
+ /* Use per-pattern flags to determine if this is a TLD pattern */
+ gboolean is_tld = (flags & RSPAMD_MULTIPATTERN_TLD);
g_assert(pattern != NULL);
g_assert(mp != NULL);
g_assert(!mp->compiled);
/*
- * For TLD multipatterns: always add to pats array for ACISM fallback
+ * For TLD patterns: add to pats array for ACISM fallback
*/
if (is_tld) {
ac_trie_pat_t acism_pat;
mp->state = RSPAMD_MP_STATE_INIT;
mp->compiled = TRUE;
- msg_info("built ACISM fallback trie for %ud TLD patterns", mp->cnt);
+ msg_info("built ACISM fallback trie for %ud TLD patterns (total patterns: %ud)",
+ mp->pats->len, mp->cnt);
/* Try to load from cache first */
if (!(flags & RSPAMD_MULTIPATTERN_COMPILE_NO_FS) &&
ac_trie_pat_t pat;
pat = g_array_index(cbd->mp->pats, ac_trie_pat_t, strnum);
- /*
- * For TLD multipatterns: strnum IS the pattern ID (0-based)
- * All patterns in the multipattern are TLD patterns, so no offset needed
- */
+
+ /* For TLD patterns, require word boundary at end of match */
+ if (cbd->mp->flags & RSPAMD_MULTIPATTERN_TLD) {
+ if (textpos < (int) cbd->len && g_ascii_isalnum(cbd->in[textpos])) {
+ return 0;
+ }
+ }
+
ret = cbd->cb(cbd->mp, strnum, textpos - pat.len,
textpos, cbd->in, cbd->len, cbd->ud);
return FALSE;
#endif
}
+
+void rspamd_multipattern_add_pending(struct rspamd_multipattern *mp,
+ const char *name)
+{
+ struct rspamd_multipattern_pending entry;
+
+ g_assert(mp != NULL);
+ g_assert(name != NULL);
+ g_assert(mp->state == RSPAMD_MP_STATE_COMPILING);
+
+ if (pending_compilations == NULL) {
+ pending_compilations = g_array_new(FALSE, FALSE,
+ sizeof(struct rspamd_multipattern_pending));
+ }
+
+ entry.mp = mp;
+ entry.name = g_strdup(name);
+ rspamd_multipattern_get_hash(mp, entry.hash);
+
+ g_array_append_val(pending_compilations, entry);
+
+ msg_info("added multipattern '%s' (%ud patterns) to pending compilation queue",
+ name, mp->cnt);
+}
+
+struct rspamd_multipattern_pending *
+rspamd_multipattern_get_pending(unsigned int *count)
+{
+ if (pending_compilations == NULL || pending_compilations->len == 0) {
+ *count = 0;
+ return NULL;
+ }
+
+ *count = pending_compilations->len;
+ return (struct rspamd_multipattern_pending *) pending_compilations->data;
+}
+
+void rspamd_multipattern_clear_pending(void)
+{
+ if (pending_compilations == NULL) {
+ return;
+ }
+
+ for (unsigned int i = 0; i < pending_compilations->len; i++) {
+ struct rspamd_multipattern_pending *entry;
+
+ entry = &g_array_index(pending_compilations,
+ struct rspamd_multipattern_pending, i);
+ g_free(entry->name);
+ }
+
+ g_array_free(pending_compilations, TRUE);
+ pending_compilations = NULL;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_find_pending(const char *name)
+{
+ if (pending_compilations == NULL || name == NULL) {
+ return NULL;
+ }
+
+ for (unsigned int i = 0; i < pending_compilations->len; i++) {
+ struct rspamd_multipattern_pending *entry;
+
+ entry = &g_array_index(pending_compilations,
+ struct rspamd_multipattern_pending, i);
+ if (strcmp(entry->name, name) == 0) {
+ return entry->mp;
+ }
+ }
+
+ return NULL;
+}
+
+gboolean
+rspamd_multipattern_compile_hs_to_cache(struct rspamd_multipattern *mp,
+ const char *cache_dir,
+ GError **err)
+{
+#ifdef WITH_HYPERSCAN
+ hs_platform_info_t plt;
+ hs_compile_error_t *hs_errors = NULL;
+ hs_database_t *db = NULL;
+ unsigned char hash[rspamd_cryptobox_HASHBYTES];
+ char *bytes = NULL;
+ gsize len;
+ char fp[PATH_MAX], np[PATH_MAX];
+ int fd;
+
+ g_assert(mp != NULL);
+ g_assert(cache_dir != NULL);
+
+ if (mp->state != RSPAMD_MP_STATE_COMPILING) {
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "multipattern not in COMPILING state");
+ return FALSE;
+ }
+
+ if (mp->hs_pats == NULL || mp->cnt == 0) {
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "no patterns to compile");
+ return FALSE;
+ }
+
+ g_assert(hs_populate_platform(&plt) == HS_SUCCESS);
+
+ /* Compute hash for cache key */
+ rspamd_multipattern_get_hash(mp, hash);
+
+ msg_info("compiling hyperscan database for %ud patterns", mp->cnt);
+
+ if (hs_compile_multi((const char *const *) mp->hs_pats->data,
+ (const unsigned int *) mp->hs_flags->data,
+ (const unsigned int *) mp->hs_ids->data,
+ mp->cnt,
+ HS_MODE_BLOCK,
+ &plt,
+ &db,
+ &hs_errors) != HS_SUCCESS) {
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "cannot compile hyperscan: %s (pattern %d)",
+ hs_errors->message, hs_errors->expression);
+ hs_free_compile_error(hs_errors);
+ return FALSE;
+ }
+
+ /* Serialize with unified header format */
+ if (!rspamd_hyperscan_serialize_with_header(db, NULL, NULL, 0, &bytes, &len)) {
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "cannot serialize hyperscan database");
+ hs_free_database(db);
+ return FALSE;
+ }
+
+ hs_free_database(db);
+
+ /* Write to temp file and rename */
+ rspamd_snprintf(fp, sizeof(fp), "%s%chs-mp-XXXXXXXXXXXXX",
+ cache_dir, G_DIR_SEPARATOR);
+
+ if ((fd = g_mkstemp_full(fp, O_CREAT | O_EXCL | O_WRONLY, 00644)) == -1) {
+ g_set_error(err, rspamd_multipattern_quark(), errno,
+ "cannot create temp file %s: %s", fp, strerror(errno));
+ g_free(bytes);
+ return FALSE;
+ }
+
+ if (write(fd, bytes, len) != (ssize_t) len) {
+ g_set_error(err, rspamd_multipattern_quark(), errno,
+ "cannot write to %s: %s", fp, strerror(errno));
+ close(fd);
+ unlink(fp);
+ g_free(bytes);
+ return FALSE;
+ }
+
+ g_free(bytes);
+ fsync(fd);
+ close(fd);
+
+ /* Rename to final path */
+ rspamd_snprintf(np, sizeof(np), "%s/%*xs.hs", cache_dir,
+ (int) rspamd_cryptobox_HASHBYTES / 2, hash);
+
+ if (rename(fp, np) == -1) {
+ g_set_error(err, rspamd_multipattern_quark(), errno,
+ "cannot rename %s to %s: %s", fp, np, strerror(errno));
+ unlink(fp);
+ return FALSE;
+ }
+
+ rspamd_hyperscan_notice_known(np);
+ msg_info("saved hyperscan database to %s (%z bytes)", np, len);
+
+ return TRUE;
+#else
+ g_set_error(err, rspamd_multipattern_quark(), ENOTSUP,
+ "hyperscan not available");
+ return FALSE;
+#endif
+}
+
+gboolean
+rspamd_multipattern_load_from_cache(struct rspamd_multipattern *mp,
+ const char *cache_dir)
+{
+#ifdef WITH_HYPERSCAN
+ unsigned char hash[rspamd_cryptobox_HASHBYTES];
+ char fp[PATH_MAX];
+ gchar *data;
+ gsize len;
+ GError *err = NULL;
+
+ g_assert(mp != NULL);
+ g_assert(cache_dir != NULL);
+
+ if (mp->state != RSPAMD_MP_STATE_COMPILING) {
+ msg_debug("multipattern not in COMPILING state, cannot load from cache");
+ return FALSE;
+ }
+
+ /* Calculate hash for cache key */
+ rspamd_multipattern_get_hash(mp, hash);
+
+ rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hs", cache_dir,
+ (int) rspamd_cryptobox_HASHBYTES / 2, hash);
+
+ if (!g_file_get_contents(fp, &data, &len, &err)) {
+ if (err) {
+ msg_warn("cannot read hyperscan cache %s: %s", fp, err->message);
+ g_error_free(err);
+ }
+ return FALSE;
+ }
+
+ mp->hs_db = rspamd_hyperscan_load_from_header(data, len, &err);
+ g_free(data);
+
+ if (mp->hs_db == NULL) {
+ if (err) {
+ msg_warn("cannot load hyperscan from cache %s: %s", fp, err->message);
+ g_error_free(err);
+ }
+ return FALSE;
+ }
+
+ /* Allocate scratch space */
+ if (!rspamd_multipattern_alloc_scratch(mp, &err)) {
+ msg_warn("hyperscan cache loaded but scratch allocation failed: %s",
+ err ? err->message : "unknown error");
+ rspamd_hyperscan_free(mp->hs_db, true);
+ mp->hs_db = NULL;
+ g_clear_error(&err);
+ return FALSE;
+ }
+
+ /* Register the file so it won't be cleaned up */
+ rspamd_hyperscan_notice_known(fp);
+
+ mp->state = RSPAMD_MP_STATE_COMPILED;
+ msg_info("hot-swapped hyperscan database from cache %s for %ud patterns",
+ fp, mp->cnt);
+
+ return TRUE;
+#else
+ return FALSE;
+#endif
+}
gboolean rspamd_multipattern_set_hs_db(struct rspamd_multipattern *mp,
void *hs_db);
+/**
+ * Pending compilation entry for deferred HS compilation
+ */
+struct rspamd_multipattern_pending {
+ struct rspamd_multipattern *mp;
+ char *name; /* Identifier for logging/IPC */
+ unsigned char hash[64]; /* Cache key hash (rspamd_cryptobox_HASHBYTES) */
+};
+
+/**
+ * Add multipattern to pending compilation queue.
+ * Called during pre-fork initialization when hs_helper is not yet available.
+ * @param mp multipattern in COMPILING state
+ * @param name identifier for this multipattern (e.g., "tld")
+ */
+void rspamd_multipattern_add_pending(struct rspamd_multipattern *mp,
+ const char *name);
+
+/**
+ * Get list of pending multipattern compilations.
+ * Returns array of rspamd_multipattern_pending, caller must free array (not contents).
+ * @param count output: number of pending entries
+ * @return array of pending entries or NULL if none
+ */
+struct rspamd_multipattern_pending *rspamd_multipattern_get_pending(
+ unsigned int *count);
+
+/**
+ * Clear pending queue after hs_helper has processed it.
+ */
+void rspamd_multipattern_clear_pending(void);
+
+/**
+ * Find a pending multipattern by name.
+ * @param name identifier
+ * @return multipattern or NULL if not found
+ */
+struct rspamd_multipattern *rspamd_multipattern_find_pending(const char *name);
+
+/**
+ * Compile hyperscan database and save to cache file.
+ * This is called by hs_helper for async compilation.
+ * @param mp multipattern in COMPILING state
+ * @param cache_dir directory to save cache file
+ * @param err error output
+ * @return TRUE on success
+ */
+gboolean rspamd_multipattern_compile_hs_to_cache(struct rspamd_multipattern *mp,
+ const char *cache_dir,
+ GError **err);
+
+/**
+ * Load hyperscan database from cache file.
+ * This is called by workers when they receive notification that
+ * hs_helper has compiled a multipattern database.
+ * @param mp multipattern in COMPILING state
+ * @param cache_dir directory containing cache files
+ * @return TRUE if loaded successfully
+ */
+gboolean rspamd_multipattern_load_from_cache(struct rspamd_multipattern *mp,
+ const char *cache_dir);
+
#ifdef __cplusplus
}
#endif