]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Rework] Allow execution of async events when hs compiles regexps
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 2 Oct 2019 17:41:37 +0000 (18:41 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 2 Oct 2019 17:41:37 +0000 (18:41 +0100)
src/hs_helper.c
src/libserver/re_cache.c
src/libserver/re_cache.h

index f83a9d4292752f13954da468e11f0ceb866708bf..3cdc2a43973ea5a71af6c2b44f6666d2da79542e 100644 (file)
@@ -178,37 +178,44 @@ rspamd_hs_helper_cleanup_dir (struct hs_helper_ctx *ctx, gboolean forced)
        return ret;
 }
 
-static gboolean
-rspamd_rs_compile (struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
-               gboolean forced)
+/* Bad hack, but who cares */
+static gboolean hack_global_forced;
+
+static void
+rspamd_rs_delayed_cb (EV_P_ ev_timer *w, int revents)
 {
-       GError *err = NULL;
+       struct rspamd_worker *worker = (struct rspamd_worker *)w->data;
        static struct rspamd_srv_command srv_cmd;
-       gint ncompiled;
+       struct hs_helper_ctx *ctx;
 
-       if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
-               msg_warn ("CPU doesn't have SSSE3 instructions set "
-                               "required for hyperscan, disable hyperscan compilation");
-               return FALSE;
-       }
+       ctx = (struct hs_helper_ctx *)worker->ctx;
+       memset (&srv_cmd, 0, sizeof (srv_cmd));
+       srv_cmd.type = RSPAMD_SRV_HYPERSCAN_LOADED;
+       rspamd_strlcpy (srv_cmd.cmd.hs_loaded.cache_dir, ctx->hs_dir,
+                       sizeof (srv_cmd.cmd.hs_loaded.cache_dir));
+       srv_cmd.cmd.hs_loaded.forced = hack_global_forced;
+       hack_global_forced = FALSE;
 
-       if (!rspamd_hs_helper_cleanup_dir (ctx, forced)) {
-               msg_warn ("cannot cleanup cache dir '%s'", ctx->hs_dir);
-       }
+       rspamd_srv_send_command (worker,
+                       ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+       ev_timer_stop (EV_A_ w);
+       g_free (w);
+}
 
-       if ((ncompiled = rspamd_re_cache_compile_hyperscan (ctx->cfg->re_cache,
-                       ctx->hs_dir, ctx->max_time, !forced,
-                       &err)) == -1) {
-               msg_err ("failed to compile re cache: %e", err);
-               g_error_free (err);
+static void
+rspamd_rs_compile_cb (guint ncompiled, GError *err, void *cbd)
+{
+       struct rspamd_worker *worker = (struct rspamd_worker *)cbd;
+       ev_timer *tm;
+       ev_tstamp when = 0.0;
+       struct hs_helper_ctx *ctx;
 
-               return FALSE;
-       }
+       ctx = (struct hs_helper_ctx *)worker->ctx;
 
        if (ncompiled > 0) {
                msg_info ("compiled %d regular expressions to the hyperscan tree",
                                ncompiled);
-               forced = TRUE;
+               hack_global_forced = TRUE;
        }
 
        /*
@@ -216,17 +223,36 @@ rspamd_rs_compile (struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
         * XXX: now we just sleep for 5 seconds to ensure that
         */
        if (!ctx->loaded) {
-               ev_sleep (5.0);
+               when = 5.0; /* Postpone */
                ctx->loaded = TRUE;
        }
 
-       memset (&srv_cmd, 0, sizeof (srv_cmd));
-       srv_cmd.type = RSPAMD_SRV_HYPERSCAN_LOADED;
-       rspamd_strlcpy (srv_cmd.cmd.hs_loaded.cache_dir, ctx->hs_dir,
-                       sizeof (srv_cmd.cmd.hs_loaded.cache_dir));
-       srv_cmd.cmd.hs_loaded.forced = forced;
+       tm = g_malloc0 (sizeof (*tm));
+       tm->data = (void *)worker;
+       ev_timer_init (tm, rspamd_rs_delayed_cb, when, 0);
+       ev_timer_start (ctx->event_loop, tm);
+}
 
-       rspamd_srv_send_command (worker, ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+static gboolean
+rspamd_rs_compile (struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
+               gboolean forced)
+{
+       if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
+               msg_warn ("CPU doesn't have SSSE3 instructions set "
+                               "required for hyperscan, disable hyperscan compilation");
+               return FALSE;
+       }
+
+       if (!rspamd_hs_helper_cleanup_dir (ctx, forced)) {
+               msg_warn ("cannot cleanup cache dir '%s'", ctx->hs_dir);
+       }
+
+       hack_global_forced = forced; /* killmeplease */
+       rspamd_re_cache_compile_hyperscan (ctx->cfg->re_cache,
+                       ctx->hs_dir, ctx->max_time, !forced,
+                       ctx->event_loop,
+                       rspamd_rs_compile_cb,
+                       (void *)worker);
 
        return TRUE;
 }
index 61732292614a623ecf49836a118e3b20ec0c0fe5..d93cb8f1327012b53c65834459be4114b1d57dc9 100644 (file)
@@ -33,6 +33,7 @@
 #include "unix-std.h"
 #include <signal.h>
 #include <stdalign.h>
+#include "contrib/libev/ev.h"
 
 #ifndef WITH_PCRE2
 #include <pcre.h>
@@ -1678,19 +1679,35 @@ rspamd_re_cache_is_finite (struct rspamd_re_cache *cache,
 }
 #endif
 
-gint
-rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
-               const char *cache_dir, gdouble max_time, gboolean silent,
-               GError **err)
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_cache_hs_compile_cbdata {
+       GHashTableIter it;
+       struct rspamd_re_cache *cache;
+       const char *cache_dir;
+       gdouble max_time;
+       gboolean silent;
+       guint total;
+       void (*cb)(guint ncompiled, GError *err, void *cbd);
+       void *cbd;
+};
+
+static void
+rspamd_re_cache_compile_err (EV_P_ ev_timer *w, GError *err,
+               struct rspamd_re_cache_hs_compile_cbdata *cbdata)
 {
-       g_assert (cache != NULL);
-       g_assert (cache_dir != NULL);
+       ev_timer_stop (EV_A_ w);
+       cbdata->cb (cbdata->total, err, cbdata->cb);
+       g_free (w);
+       g_free (cbdata);
+       g_error_free (err);
+}
 
-#ifndef WITH_HYPERSCAN
-       g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled");
-       return -1;
-#else
-       GHashTableIter it, cit;
+static void
+rspamd_re_cache_compile_timer_cb (EV_P_ ev_timer *w, int revents )
+{
+       struct rspamd_re_cache_hs_compile_cbdata *cbdata =
+                       (struct rspamd_re_cache_hs_compile_cbdata *)w->data;
+       GHashTableIter cit;
        gpointer k, v;
        struct rspamd_re_class *re_class;
        gchar path[PATH_MAX], npath[PATH_MAX];
@@ -1704,298 +1721,354 @@ rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
        const hs_expr_ext_t **hs_exts = NULL;
        gchar **hs_pats = NULL;
        gchar *hs_serialized;
-       gsize serialized_len, total = 0;
+       gsize serialized_len;
        struct iovec iov[7];
+       struct rspamd_re_cache *cache;
+       GError *err;
 
-       g_hash_table_iter_init (&it, cache->re_classes);
+       cache = cbdata->cache;
 
-       while (g_hash_table_iter_next (&it, &k, &v)) {
-               re_class = v;
-               rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir,
-                               G_DIR_SEPARATOR, re_class->hash);
+       if (!g_hash_table_iter_next (&cbdata->it, &k, &v)) {
+               /* All done */
+               ev_timer_stop (EV_A_ w);
+               cbdata->cb (cbdata->total, NULL, cbdata->cbd);
+               g_free (w);
+               g_free (cbdata);
 
-               if (rspamd_re_cache_is_valid_hyperscan_file (cache, path, TRUE, TRUE)) {
+               return;
+       }
 
-                       fd = open (path, O_RDONLY, 00600);
+       re_class = v;
+       rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cbdata->cache_dir,
+                       G_DIR_SEPARATOR, re_class->hash);
 
-                       /* Read number of regexps */
-                       g_assert (fd != -1);
-                       lseek (fd, RSPAMD_HS_MAGIC_LEN + sizeof (cache->plt), SEEK_SET);
-                       g_assert (read (fd, &n, sizeof (n)) == sizeof (n));
-                       close (fd);
+       if (rspamd_re_cache_is_valid_hyperscan_file (cache, path, TRUE, TRUE)) {
 
-                       if (re_class->type_len > 0) {
-                               if (!silent) {
-                                       msg_info_re_cache (
-                                                       "skip already valid class %s(%*s) to cache %6s, %d regexps",
-                                                       rspamd_re_cache_type_to_string (re_class->type),
-                                                       (gint) re_class->type_len - 1,
-                                                       re_class->type_data,
-                                                       re_class->hash,
-                                                       n);
-                               }
+               fd = open (path, O_RDONLY, 00600);
+
+               /* Read number of regexps */
+               g_assert (fd != -1);
+               lseek (fd, RSPAMD_HS_MAGIC_LEN + sizeof (cache->plt), SEEK_SET);
+               g_assert (read (fd, &n, sizeof (n)) == sizeof (n));
+               close (fd);
+
+               if (re_class->type_len > 0) {
+                       if (!cbdata->silent) {
+                               msg_info_re_cache (
+                                               "skip already valid class %s(%*s) to cache %6s, %d regexps",
+                                               rspamd_re_cache_type_to_string (re_class->type),
+                                               (gint) re_class->type_len - 1,
+                                               re_class->type_data,
+                                               re_class->hash,
+                                               n);
                        }
-                       else {
-                               if (!silent) {
-                                       msg_info_re_cache (
-                                                       "skip already valid class %s to cache %6s, %d regexps",
-                                                       rspamd_re_cache_type_to_string (re_class->type),
-                                                       re_class->hash,
-                                                       n);
-                               }
+               }
+               else {
+                       if (!cbdata->silent) {
+                               msg_info_re_cache (
+                                               "skip already valid class %s to cache %6s, %d regexps",
+                                               rspamd_re_cache_type_to_string (re_class->type),
+                                               re_class->hash,
+                                               n);
                        }
-
-                       continue;
                }
 
-               rspamd_snprintf (path, sizeof (path), "%s%c%s.hs.new", cache_dir,
-                                               G_DIR_SEPARATOR, re_class->hash);
-               fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);
+               ev_timer_again (EV_A_ w);
+               return;
+       }
 
-               if (fd == -1) {
-                       g_set_error (err, rspamd_re_cache_quark (), errno, "cannot open file "
-                                       "%s: %s", path, strerror (errno));
-                       return -1;
-               }
+       rspamd_snprintf (path, sizeof (path), "%s%c%s.hs.new", cbdata->cache_dir,
+                       G_DIR_SEPARATOR, re_class->hash);
+       fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);
 
-               g_hash_table_iter_init (&cit, re_class->re);
-               n = g_hash_table_size (re_class->re);
-               hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
-               hs_ids = g_malloc (sizeof (*hs_ids) * n);
-               hs_pats = g_malloc (sizeof (*hs_pats) * n);
-               hs_exts = g_malloc0 (sizeof (*hs_exts) * n);
-               i = 0;
+       if (fd == -1) {
+               err = g_error_new (rspamd_re_cache_quark (), errno,
+                               "cannot open file %s: %s", path, strerror (errno));
+               rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+               return;
+       }
 
-               while (g_hash_table_iter_next (&cit, &k, &v)) {
-                       re = v;
+       g_hash_table_iter_init (&cit, re_class->re);
+       n = g_hash_table_size (re_class->re);
+       hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
+       hs_ids = g_malloc (sizeof (*hs_ids) * n);
+       hs_pats = g_malloc (sizeof (*hs_pats) * n);
+       hs_exts = g_malloc0 (sizeof (*hs_exts) * n);
+       i = 0;
 
-                       pcre_flags = rspamd_regexp_get_pcre_flags (re);
-                       re_flags = rspamd_regexp_get_flags (re);
+       while (g_hash_table_iter_next (&cit, &k, &v)) {
+               re = v;
 
-                       if (re_flags & RSPAMD_REGEXP_FLAG_PCRE_ONLY) {
-                               /* Do not try to compile bad regexp */
-                               msg_info_re_cache (
-                                               "do not try compile %s to hyperscan as it is PCRE only",
-                                               rspamd_regexp_get_pattern (re));
-                               continue;
-                       }
+               pcre_flags = rspamd_regexp_get_pcre_flags (re);
+               re_flags = rspamd_regexp_get_flags (re);
+
+               if (re_flags & RSPAMD_REGEXP_FLAG_PCRE_ONLY) {
+                       /* Do not try to compile bad regexp */
+                       msg_info_re_cache (
+                                       "do not try compile %s to hyperscan as it is PCRE only",
+                                       rspamd_regexp_get_pattern (re));
+                       continue;
+               }
 
-                       hs_flags[i] = 0;
-                       hs_exts[i] = NULL;
+               hs_flags[i] = 0;
+               hs_exts[i] = NULL;
 #ifndef WITH_PCRE2
-                       if (pcre_flags & PCRE_FLAG(UTF8)) {
-                               hs_flags[i] |= HS_FLAG_UTF8;
-                       }
+               if (pcre_flags & PCRE_FLAG(UTF8)) {
+                       hs_flags[i] |= HS_FLAG_UTF8;
+               }
 #else
-                       if (pcre_flags & PCRE_FLAG(UTF)) {
+               if (pcre_flags & PCRE_FLAG(UTF)) {
                                hs_flags[i] |= HS_FLAG_UTF8;
                        }
 #endif
-                       if (pcre_flags & PCRE_FLAG(CASELESS)) {
-                               hs_flags[i] |= HS_FLAG_CASELESS;
-                       }
-                       if (pcre_flags & PCRE_FLAG(MULTILINE)) {
-                               hs_flags[i] |= HS_FLAG_MULTILINE;
-                       }
-                       if (pcre_flags & PCRE_FLAG(DOTALL)) {
-                               hs_flags[i] |= HS_FLAG_DOTALL;
-                       }
-                       if (rspamd_regexp_get_maxhits (re) == 1) {
-                               hs_flags[i] |= HS_FLAG_SINGLEMATCH;
-                       }
+               if (pcre_flags & PCRE_FLAG(CASELESS)) {
+                       hs_flags[i] |= HS_FLAG_CASELESS;
+               }
+               if (pcre_flags & PCRE_FLAG(MULTILINE)) {
+                       hs_flags[i] |= HS_FLAG_MULTILINE;
+               }
+               if (pcre_flags & PCRE_FLAG(DOTALL)) {
+                       hs_flags[i] |= HS_FLAG_DOTALL;
+               }
+               if (rspamd_regexp_get_maxhits (re) == 1) {
+                       hs_flags[i] |= HS_FLAG_SINGLEMATCH;
+               }
 
-                       gchar *pat = rspamd_re_cache_hs_pattern_from_pcre (re);
+               gchar *pat = rspamd_re_cache_hs_pattern_from_pcre (re);
 
-                       if (hs_compile (pat,
-                                       hs_flags[i],
-                                       cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
-                                       &cache->plt,
-                                       &test_db,
-                                       &hs_errors) != HS_SUCCESS) {
-                               msg_info_re_cache ("cannot compile %s to hyperscan, try prefilter match",
-                                               pat);
-                               hs_free_compile_error (hs_errors);
+               if (hs_compile (pat,
+                               hs_flags[i],
+                               cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
+                               &cache->plt,
+                               &test_db,
+                               &hs_errors) != HS_SUCCESS) {
+                       msg_info_re_cache ("cannot compile %s to hyperscan, try prefilter match",
+                                       pat);
+                       hs_free_compile_error (hs_errors);
 
-                               /* The approximation operation might take a significant
-                                * amount of time, so we need to check if it's finite
-                                */
-                               if (rspamd_re_cache_is_finite (cache, re, hs_flags[i], max_time)) {
-                                       hs_flags[i] |= HS_FLAG_PREFILTER;
-                                       hs_ids[i] = rspamd_regexp_get_cache_id (re);
-                                       hs_pats[i] = pat;
-                                       i++;
-                               }
-                               else {
-                                       g_free (pat); /* Avoid leak */
-                               }
-                       }
-                       else {
+                       /* The approximation operation might take a significant
+                        * amount of time, so we need to check if it's finite
+                        */
+                       if (rspamd_re_cache_is_finite (cache, re, hs_flags[i], cbdata->max_time)) {
+                               hs_flags[i] |= HS_FLAG_PREFILTER;
                                hs_ids[i] = rspamd_regexp_get_cache_id (re);
                                hs_pats[i] = pat;
-                               i ++;
-                               hs_free_database (test_db);
+                               i++;
+                       }
+                       else {
+                               g_free (pat); /* Avoid leak */
                        }
                }
-               /* Adjust real re number */
-               n = i;
-
-               if (n > 0) {
-                       /* Create the hs tree */
-                       if (hs_compile_ext_multi ((const char **)hs_pats,
-                                       hs_flags,
-                                       hs_ids,
-                                       hs_exts,
-                                       n,
-                                       cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
-                                       &cache->plt,
-                                       &test_db,
-                                       &hs_errors) != HS_SUCCESS) {
-
-                               g_set_error (err, rspamd_re_cache_quark (), EINVAL,
-                                               "cannot create tree of regexp when processing '%s': %s",
-                                               hs_pats[hs_errors->expression], hs_errors->message);
-                               g_free (hs_flags);
-                               g_free (hs_ids);
-
-                               for (guint j = 0; j < i; j ++) {
-                                       g_free (hs_pats[j]);
-                               }
+               else {
+                       hs_ids[i] = rspamd_regexp_get_cache_id (re);
+                       hs_pats[i] = pat;
+                       i ++;
+                       hs_free_database (test_db);
+               }
+       }
+       /* Adjust real re number */
+       n = i;
+
+       if (n > 0) {
+               /* Create the hs tree */
+               if (hs_compile_ext_multi ((const char **)hs_pats,
+                               hs_flags,
+                               hs_ids,
+                               hs_exts,
+                               n,
+                               cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
+                               &cache->plt,
+                               &test_db,
+                               &hs_errors) != HS_SUCCESS) {
 
-                               g_free (hs_pats);
-                               g_free (hs_exts);
-                               close (fd);
-                               unlink (path);
-                               hs_free_compile_error (hs_errors);
 
-                               return -1;
-                       }
+                       g_free (hs_flags);
+                       g_free (hs_ids);
 
                        for (guint j = 0; j < i; j ++) {
                                g_free (hs_pats[j]);
                        }
+
                        g_free (hs_pats);
                        g_free (hs_exts);
+                       close (fd);
+                       unlink (path);
+                       hs_free_compile_error (hs_errors);
 
-                       if (hs_serialize_database (test_db, &hs_serialized,
-                                       &serialized_len) != HS_SUCCESS) {
-                               g_set_error (err,
-                                               rspamd_re_cache_quark (),
-                                               errno,
-                                               "cannot serialize tree of regexp for %s",
-                                               re_class->hash);
+                       err = g_error_new (rspamd_re_cache_quark (), EINVAL,
+                                       "cannot create tree of regexp when processing '%s': %s",
+                                       hs_pats[hs_errors->expression], hs_errors->message);
+                       rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
 
-                               close (fd);
-                               unlink (path);
-                               g_free (hs_ids);
-                               g_free (hs_flags);
-                               hs_free_database (test_db);
+                       return;
+               }
 
-                               return -1;
-                       }
+               for (guint j = 0; j < i; j ++) {
+                       g_free (hs_pats[j]);
+               }
 
-                       hs_free_database (test_db);
+               g_free (hs_pats);
+               g_free (hs_exts);
 
-                       /*
-                        * Magic - 8 bytes
-                        * Platform - sizeof (platform)
-                        * n - number of regexps
-                        * n * <regexp ids>
-                        * n * <regexp flags>
-                        * crc - 8 bytes checksum
-                        * <hyperscan blob>
-                        */
-                       rspamd_cryptobox_fast_hash_init (&crc_st, 0xdeadbabe);
-                       /* IDs -> Flags -> Hs blob */
-                       rspamd_cryptobox_fast_hash_update (&crc_st,
-                                       hs_ids, sizeof (*hs_ids) * n);
-                       rspamd_cryptobox_fast_hash_update (&crc_st,
-                                       hs_flags, sizeof (*hs_flags) * n);
-                       rspamd_cryptobox_fast_hash_update (&crc_st,
-                                       hs_serialized, serialized_len);
-                       crc = rspamd_cryptobox_fast_hash_final (&crc_st);
+               if (hs_serialize_database (test_db, &hs_serialized,
+                               &serialized_len) != HS_SUCCESS) {
+                       err = g_error_new (rspamd_re_cache_quark (),
+                                       errno,
+                                       "cannot serialize tree of regexp for %s",
+                                       re_class->hash);
 
-                       if (cache->vectorized_hyperscan) {
-                               iov[0].iov_base = (void *) rspamd_hs_magic_vector;
-                       }
-                       else {
-                               iov[0].iov_base = (void *) rspamd_hs_magic;
-                       }
+                       close (fd);
+                       unlink (path);
+                       g_free (hs_ids);
+                       g_free (hs_flags);
+                       hs_free_database (test_db);
 
-                       iov[0].iov_len = RSPAMD_HS_MAGIC_LEN;
-                       iov[1].iov_base = &cache->plt;
-                       iov[1].iov_len = sizeof (cache->plt);
-                       iov[2].iov_base = &n;
-                       iov[2].iov_len = sizeof (n);
-                       iov[3].iov_base = hs_ids;
-                       iov[3].iov_len = sizeof (*hs_ids) * n;
-                       iov[4].iov_base = hs_flags;
-                       iov[4].iov_len = sizeof (*hs_flags) * n;
-                       iov[5].iov_base = &crc;
-                       iov[5].iov_len = sizeof (crc);
-                       iov[6].iov_base = hs_serialized;
-                       iov[6].iov_len = serialized_len;
-
-                       if (writev (fd, iov, G_N_ELEMENTS (iov)) == -1) {
-                               g_set_error (err,
-                                               rspamd_re_cache_quark (),
-                                               errno,
-                                               "cannot serialize tree of regexp to %s: %s",
-                                               path, strerror (errno));
-                               close (fd);
-                               unlink (path);
-                               g_free (hs_ids);
-                               g_free (hs_flags);
-                               g_free (hs_serialized);
+                       rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+                       return;
+               }
 
-                               return -1;
-                       }
+               hs_free_database (test_db);
 
-                       if (re_class->type_len > 0) {
-                               msg_info_re_cache (
-                                               "compiled class %s(%*s) to cache %6s, %d regexps",
-                                               rspamd_re_cache_type_to_string (re_class->type),
-                                               (gint) re_class->type_len - 1,
-                                               re_class->type_data,
-                                               re_class->hash,
-                                               n);
-                       }
-                       else {
-                               msg_info_re_cache (
-                                               "compiled class %s to cache %6s, %d regexps",
-                                               rspamd_re_cache_type_to_string (re_class->type),
-                                               re_class->hash,
-                                               n);
-                       }
-
-                       total += n;
+               /*
+                * Magic - 8 bytes
+                * Platform - sizeof (platform)
+                * n - number of regexps
+                * n * <regexp ids>
+                * n * <regexp flags>
+                * crc - 8 bytes checksum
+                * <hyperscan blob>
+                */
+               rspamd_cryptobox_fast_hash_init (&crc_st, 0xdeadbabe);
+               /* IDs -> Flags -> Hs blob */
+               rspamd_cryptobox_fast_hash_update (&crc_st,
+                               hs_ids, sizeof (*hs_ids) * n);
+               rspamd_cryptobox_fast_hash_update (&crc_st,
+                               hs_flags, sizeof (*hs_flags) * n);
+               rspamd_cryptobox_fast_hash_update (&crc_st,
+                               hs_serialized, serialized_len);
+               crc = rspamd_cryptobox_fast_hash_final (&crc_st);
+
+               if (cache->vectorized_hyperscan) {
+                       iov[0].iov_base = (void *) rspamd_hs_magic_vector;
+               }
+               else {
+                       iov[0].iov_base = (void *) rspamd_hs_magic;
+               }
 
-                       g_free (hs_serialized);
+               iov[0].iov_len = RSPAMD_HS_MAGIC_LEN;
+               iov[1].iov_base = &cache->plt;
+               iov[1].iov_len = sizeof (cache->plt);
+               iov[2].iov_base = &n;
+               iov[2].iov_len = sizeof (n);
+               iov[3].iov_base = hs_ids;
+               iov[3].iov_len = sizeof (*hs_ids) * n;
+               iov[4].iov_base = hs_flags;
+               iov[4].iov_len = sizeof (*hs_flags) * n;
+               iov[5].iov_base = &crc;
+               iov[5].iov_len = sizeof (crc);
+               iov[6].iov_base = hs_serialized;
+               iov[6].iov_len = serialized_len;
+
+               if (writev (fd, iov, G_N_ELEMENTS (iov)) == -1) {
+                       err = g_error_new (rspamd_re_cache_quark (),
+                                       errno,
+                                       "cannot serialize tree of regexp to %s: %s",
+                                       path, strerror (errno));
+                       close (fd);
+                       unlink (path);
                        g_free (hs_ids);
                        g_free (hs_flags);
+                       g_free (hs_serialized);
+
+                       rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+                       return;
                }
 
-               fsync (fd);
+               if (re_class->type_len > 0) {
+                       msg_info_re_cache (
+                                       "compiled class %s(%*s) to cache %6s, %d regexps",
+                                       rspamd_re_cache_type_to_string (re_class->type),
+                                       (gint) re_class->type_len - 1,
+                                       re_class->type_data,
+                                       re_class->hash,
+                                       n);
+               }
+               else {
+                       msg_info_re_cache (
+                                       "compiled class %s to cache %6s, %d regexps",
+                                       rspamd_re_cache_type_to_string (re_class->type),
+                                       re_class->hash,
+                                       n);
+               }
 
-               /* Now rename temporary file to the new .hs file */
-               rspamd_snprintf (npath, sizeof (path), "%s%c%s.hs", cache_dir,
-                               G_DIR_SEPARATOR, re_class->hash);
+               cbdata->total += n;
 
-               if (rename (path, npath) == -1) {
-                       g_set_error (err,
-                                       rspamd_re_cache_quark (),
-                                       errno,
-                                       "cannot rename %s to %s: %s",
-                                       path, npath, strerror (errno));
-                       unlink (path);
-                       close (fd);
+               g_free (hs_serialized);
+               g_free (hs_ids);
+               g_free (hs_flags);
+       }
 
-                       return -1;
-               }
+       fsync (fd);
+
+       /* Now rename temporary file to the new .hs file */
+       rspamd_snprintf (npath, sizeof (path), "%s%c%s.hs", cbdata->cache_dir,
+                       G_DIR_SEPARATOR, re_class->hash);
 
+       if (rename (path, npath) == -1) {
+               err = g_error_new (rspamd_re_cache_quark (),
+                               errno,
+                               "cannot rename %s to %s: %s",
+                               path, npath, strerror (errno));
+               unlink (path);
                close (fd);
+
+               rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+               return;
        }
 
-       return total;
+       close (fd);
+       ev_timer_again (EV_A_ w);
+}
+
+#endif
+
+gint
+rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
+                                                                  const char *cache_dir,
+                                                                  gdouble max_time,
+                                                                  gboolean silent,
+                                                                  struct ev_loop *event_loop,
+                                                                  void (*cb)(guint ncompiled, GError *err, void *cbd),
+                                                                  void *cbd)
+{
+       g_assert (cache != NULL);
+       g_assert (cache_dir != NULL);
+
+#ifndef WITH_HYPERSCAN
+       g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled");
+       return -1;
+#else
+       static ev_timer *timer;
+       static const ev_tstamp timer_interval = 0.1;
+       struct rspamd_re_cache_hs_compile_cbdata *cbdata;
+
+       cbdata = g_malloc0 (sizeof (*cbdata));
+       g_hash_table_iter_init (&cbdata->it, cache->re_classes);
+       cbdata->cache = cache;
+       cbdata->cache_dir = cache_dir;
+       cbdata->cb = cb;
+       cbdata->cbd = cbd;
+       cbdata->max_time = max_time;
+       cbdata->silent = silent;
+       cbdata->total = 0;
+       timer = g_malloc0 (sizeof (*timer));
+       timer->data = (void *)cbdata; /* static */
+
+       ev_timer_init (timer, rspamd_re_cache_compile_timer_cb,
+                       timer_interval, timer_interval);
+       ev_timer_start (event_loop, timer);
+
+       return 0;
 #endif
 }
 
index b9f80375e6a61a157ba893c42ce5f57c3b3a3071..6b5aa84f64bdd69b9c63534d22f4cdddb43fc05c 100644 (file)
@@ -160,12 +160,17 @@ const gchar *rspamd_re_cache_type_to_string (enum rspamd_re_type type);
  */
 enum rspamd_re_type rspamd_re_cache_type_from_string (const char *str);
 
+struct ev_loop;
 /**
  * Compile expressions to the hyperscan tree and store in the `cache_dir`
  */
 gint rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
-                                                                               const char *cache_dir, gdouble max_time, gboolean silent,
-                                                                               GError **err);
+                                                                               const char *cache_dir,
+                                                                               gdouble max_time,
+                                                                               gboolean silent,
+                                                                               struct ev_loop *event_loop,
+                                                                               void (*cb)(guint ncompiled, GError *err, void *cbd),
+                                                                               void *cbd);
 
 
 /**