install -d "$(DESTDIR)$(e_rundir)"
install -m 770 -d "$(DESTDIR)$(e_localstatedir)"
install -m 770 -d "$(DESTDIR)$(e_datadir)"
- install -m 660 -d "$(DESTDIR)$(e_sghcachedir)"
+ install -m 770 -d "$(DESTDIR)$(e_sghcachedir)"
install-rules:
if INSTALL_SURICATA_UPDATE
If the current suricata installation does not have hyperscan
-support, refer to :ref:`installation`
\ No newline at end of file
+support, refer to :ref:`installation`
+
+Hyperscan caching
+~~~~~~~~~~~~~~~~~
+
+Upon startup, Hyperscan compiles and optimizes the ruleset into its own
+internal structure. Suricata optimizes the startup process by saving
+the Hyperscan internal structures to disk and loading them on the next start.
+This prevents the recompilation of the ruleset and results in faster
+initialization. If the ruleset is changed, new necessary cache files are
+automatically created.
+
+To enable this function, in `suricata.yaml` configure:
+
+::
+
+ detect:
+ # Cache MPM contexts to the disk to avoid rule compilation at the startup.
+ # Cache files are created in the standard library directory.
+ sgh-mpm-caching: yes
+ sgh-mpm-caching-path: /var/lib/suricata/cache/hs
+
+
+**Note**:
+You might need to create and adjust permissions to the default caching folder
+path, especially if you are running Suricata as a non-root user.
util-mpm-ac-ks.h \
util-mpm.h \
util-mpm-hs.h \
+ util-mpm-hs-cache.h \
util-mpm-hs-core.h \
util-optimize.h \
util-pages.h \
util-mpm-ac-ks-small.c \
util-mpm.c \
util-mpm-hs.c \
+ util-mpm-hs-cache.c \
util-mpm-hs-core.c \
util-pages.c \
util-path.c \
--- /dev/null
+/* Copyright (C) 2007-2024 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Lukas Sismis <lsismis@oisf.net>
+ *
+ * MPM pattern matcher that calls the Hyperscan regex matcher.
+ */
+
+#include "suricata-common.h"
+#include "suricata.h"
+#include "detect-engine.h"
+#include "util-debug.h"
+#include "util-hash-lookup3.h"
+#include "util-mpm-hs-core.h"
+#include "util-mpm-hs-cache.h"
+#include "util-path.h"
+
+#ifdef BUILD_HYPERSCAN
+
+#include <hs.h>
+
+static const char *HSCacheConstructFPath(const char *folder_path, uint64_t hs_db_hash)
+{
+ static char hash_file_path[PATH_MAX];
+
+ char hash_file_path_suffix[] = "_v1.hs";
+ char filename[PATH_MAX];
+ uint64_t r =
+ snprintf(filename, sizeof(filename), "%020lu%s", hs_db_hash, hash_file_path_suffix);
+ if (r != (uint64_t)(20 + strlen(hash_file_path_suffix)))
+ return NULL;
+
+ r = PathMerge(hash_file_path, sizeof(hash_file_path), folder_path, filename);
+ if (r)
+ return NULL;
+
+ return hash_file_path;
+}
+
+static char *HSReadStream(const char *file_path, size_t *buffer_sz)
+{
+ FILE *file = fopen(file_path, "rb");
+ if (!file) {
+ SCLogDebug("Failed to open file %s: %s", file_path, strerror(errno));
+ return NULL;
+ }
+
+ // Seek to the end of the file to determine its size
+ fseek(file, 0, SEEK_END);
+ long file_sz = ftell(file);
+ if (file_sz < 0) {
+ SCLogDebug("Failed to determine file size of %s: %s", file_path, strerror(errno));
+ fclose(file);
+ return NULL;
+ }
+
+ char *buffer = (char *)SCCalloc(file_sz, sizeof(char));
+ if (!buffer) {
+ SCLogWarning("Failed to allocate memory");
+ fclose(file);
+ return NULL;
+ }
+
+ // Rewind file pointer and read the file into the buffer
+ rewind(file);
+ size_t bytes_read = fread(buffer, 1, file_sz, file);
+ if (bytes_read != (size_t)file_sz) {
+ SCLogDebug("Failed to read the entire file %s: %s", file_path, strerror(errno));
+ SCFree(buffer);
+ fclose(file);
+ return NULL;
+ }
+
+ *buffer_sz = file_sz;
+ fclose(file);
+ return buffer;
+}
+
+/**
+ * Function to hash the searched pattern, only things relevant to Hyperscan
+ * compilation are hashed.
+ */
+static void SCHSCachePatternHash(const SCHSPattern *p, uint32_t *h1, uint32_t *h2)
+{
+ BUG_ON(p->original_pat == NULL);
+ BUG_ON(p->sids == NULL);
+
+ hashlittle2_safe(&p->len, sizeof(p->len), h1, h2);
+ hashlittle2_safe(&p->flags, sizeof(p->flags), h1, h2);
+ hashlittle2_safe(p->original_pat, p->len, h1, h2);
+ hashlittle2_safe(&p->id, sizeof(p->id), h1, h2);
+ hashlittle2_safe(&p->offset, sizeof(p->offset), h1, h2);
+ hashlittle2_safe(&p->depth, sizeof(p->depth), h1, h2);
+ hashlittle2_safe(&p->sids_size, sizeof(p->sids_size), h1, h2);
+ hashlittle2_safe(p->sids, p->sids_size * sizeof(SigIntId), h1, h2);
+}
+
+int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash, const char *dirpath)
+{
+ const char *hash_file_static = HSCacheConstructFPath(dirpath, hs_db_hash);
+ if (hash_file_static == NULL)
+ return -1;
+
+ SCLogDebug("Loading the cached HS DB from %s", hash_file_static);
+ if (!SCPathExists(hash_file_static))
+ return -1;
+
+ FILE *db_cache = fopen(hash_file_static, "r");
+ char *buffer = NULL;
+ int ret = 0;
+ if (db_cache) {
+ size_t buffer_size;
+ buffer = HSReadStream(hash_file_static, &buffer_size);
+ if (!buffer) {
+ SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static);
+ ret = -1;
+ goto freeup;
+ }
+
+ hs_error_t error = hs_deserialize_database(buffer, buffer_size, hs_db);
+ if (error != HS_SUCCESS) {
+ SCLogWarning("Failed to deserialize Hyperscan database of %s: %s", hash_file_static,
+ HSErrorToStr(error));
+ ret = -1;
+ goto freeup;
+ }
+
+ ret = 0;
+ goto freeup;
+ }
+
+freeup:
+ if (db_cache)
+ fclose(db_cache);
+ if (buffer)
+ SCFree(buffer);
+ return ret;
+}
+
+static int HSSaveCache(hs_database_t *hs_db, uint64_t hs_db_hash, const char *dstpath)
+{
+ static bool notified = false;
+ char *db_stream = NULL;
+ size_t db_size;
+ int ret = -1;
+
+ hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size);
+ if (err != HS_SUCCESS) {
+ SCLogWarning("Failed to serialize Hyperscan database: %s", HSErrorToStr(err));
+ goto cleanup;
+ }
+
+ const char *hash_file_static = HSCacheConstructFPath(dstpath, hs_db_hash);
+ SCLogDebug("Caching the compiled HS at %s", hash_file_static);
+ if (SCPathExists(hash_file_static)) {
+ // potentially signs that it might not work as expected as we got into
+ // hash collision. If this happens with older and not used caches it is
+ // fine.
+ // It is problematic when one ruleset yields two colliding MPM groups.
+ SCLogWarning("Overwriting cache file %s. If the problem persists consider switching off "
+ "the caching",
+ hash_file_static);
+ }
+
+ FILE *db_cache_out = fopen(hash_file_static, "w");
+ if (!db_cache_out) {
+ if (!notified) {
+ SCLogWarning("Failed to create Hyperscan cache file, make sure the folder exist and is "
+ "writable or adjust sgh-mpm-caching-path setting (%s)",
+ hash_file_static);
+ notified = true;
+ }
+ goto cleanup;
+ }
+ size_t r = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out);
+ if (r > 0 && (size_t)r != db_size) {
+ SCLogWarning("Failed to write to file: %s", hash_file_static);
+ if (r != db_size) {
+ // possibly a corrupted DB cache was created
+ r = remove(hash_file_static);
+ if (r != 0) {
+ SCLogWarning("Failed to remove corrupted cache file: %s", hash_file_static);
+ }
+ }
+ }
+ ret = fclose(db_cache_out);
+ if (ret != 0) {
+ SCLogWarning("Failed to close file: %s", hash_file_static);
+ goto cleanup;
+ }
+
+ ret = 0;
+cleanup:
+ if (db_stream)
+ SCFree(db_stream);
+ return ret;
+}
+
+uint64_t HSHashDb(const PatternDatabase *pd)
+{
+ uint64_t cached_hash = 0;
+ uint32_t *hash = (uint32_t *)(&cached_hash);
+ hashword2(&pd->pattern_cnt, 1, &hash[0], &hash[1]);
+ for (uint32_t i = 0; i < pd->pattern_cnt; i++) {
+ SCHSCachePatternHash(pd->parray[i], &hash[0], &hash[1]);
+ }
+
+ return cached_hash;
+}
+
+void HSSaveCacheIterator(void *data, void *aux)
+{
+ PatternDatabase *pd = (PatternDatabase *)data;
+ struct HsIteratorData *iter_data = (struct HsIteratorData *)aux;
+ if (pd->no_cache)
+ return;
+
+ // count only cacheable DBs
+ iter_data->pd_stats->hs_cacheable_dbs_cnt++;
+ if (pd->cached) {
+ iter_data->pd_stats->hs_dbs_cache_loaded_cnt++;
+ return;
+ }
+
+ if (HSSaveCache(pd->hs_db, HSHashDb(pd), iter_data->cache_path) == 0) {
+ pd->cached = true; // for rule reloads
+ iter_data->pd_stats->hs_dbs_cache_saved_cnt++;
+ }
+}
+
+#endif /* BUILD_HYPERSCAN */
--- /dev/null
+/* Copyright (C) 2024 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Lukas Sismis <lsismis@oisf.net>
+ *
+ * Hyperscan caching logic for faster database compilation.
+ */
+
+#ifndef SURICATA_UTIL_MPM_HS_CACHE__H
+#define SURICATA_UTIL_MPM_HS_CACHE__H
+
+#include "util-mpm-hs-core.h"
+
+#ifdef BUILD_HYPERSCAN
+
+struct HsIteratorData {
+ PatternDatabaseCache *pd_stats;
+ const char *cache_path;
+};
+
+int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash, const char *dirpath);
+uint64_t HSHashDb(const PatternDatabase *pd);
+void HSSaveCacheIterator(void *data, void *aux);
+#endif /* BUILD_HYPERSCAN */
+
+#endif /* SURICATA_UTIL_MPM_HS_CACHE__H */
#include <hs.h>
+// Encode major, minor, and patch into a single 32-bit integer.
+#define HS_VERSION_ENCODE(major, minor, patch) (((major) << 24) | ((minor) << 16) | ((patch) << 8))
+#define HS_VERSION_AT_LEAST(major, minor, patch) \
+ (HS_VERSION_32BIT >= HS_VERSION_ENCODE(major, minor, patch))
+
/**
* Translates Hyperscan error codes to human-readable messages.
*
return "HS_BAD_ALLOC: The memory allocator did not return correctly aligned memory";
case HS_SCRATCH_IN_USE:
return "HS_SCRATCH_IN_USE: The scratch region was already in use";
+#if HS_VERSION_AT_LEAST(4, 4, 0)
case HS_ARCH_ERROR:
return "HS_ARCH_ERROR: Unsupported CPU architecture";
+#endif // HS_VERSION_AT_LEAST(4, 4, 0)
+#if HS_VERSION_AT_LEAST(4, 6, 0)
case HS_INSUFFICIENT_SPACE:
return "HS_INSUFFICIENT_SPACE: Provided buffer was too small";
+#endif // HS_VERSION_AT_LEAST(4, 6, 0)
+#if HS_VERSION_AT_LEAST(5, 1, 1)
case HS_UNKNOWN_ERROR:
return "HS_UNKNOWN_ERROR: Unexpected internal error";
+#endif // HS_VERSION_AT_LEAST(5, 1, 1)
default:
return "Unknown error code";
}
#include <hs.h>
typedef struct SCHSPattern_ {
- /* length of the pattern */
+ /** length of the pattern */
uint16_t len;
- /* flags describing the pattern */
+ /** flags describing the pattern */
uint8_t flags;
- /* holds the original pattern that was added */
+ /** holds the original pattern that was added */
uint8_t *original_pat;
- /* pattern id */
+ /** pattern id */
uint32_t id;
uint16_t offset;
uint16_t depth;
- /* sid(s) for this pattern */
+ /** sid(s) for this pattern */
uint32_t sids_size;
SigIntId *sids;
- /* only used at ctx init time, when this structure is part of a hash
+ /** only used at ctx init time, when this structure is part of a hash
* table. */
struct SCHSPattern_ *next;
} SCHSPattern;
typedef struct SCHSCtx_ {
- /* hash used during ctx initialization */
+ /** hash used during ctx initialization */
SCHSPattern **init_hash;
- /* pattern database and pattern arrays. */
+ /** pattern database and pattern arrays. */
void *pattern_db;
- /* size of database, for accounting. */
+ /** size of database, for accounting. */
size_t hs_db_size;
} SCHSCtx;
typedef struct SCHSThreadCtx_ {
- /* Hyperscan scratch space region for this thread, capable of handling any
+ /** Hyperscan scratch space region for this thread, capable of handling any
* database that has been compiled. */
void *scratch;
- /* size of scratch space, for accounting. */
+ /** size of scratch space, for accounting. */
size_t scratch_size;
} SCHSThreadCtx;
hs_database_t *hs_db;
uint32_t pattern_cnt;
- /* Reference count: number of MPM contexts using this pattern database. */
+ /** Reference count: number of MPM contexts using this pattern database. */
uint32_t ref_cnt;
- /* Signals if the matcher has loaded/saved the pattern database to disk */
+ /** Signals if the matcher has loaded/saved the pattern database to disk */
bool cached;
+ /** Matcher will not cache this pattern DB */
+ bool no_cache;
} PatternDatabase;
+typedef struct PatternDatabaseCache_ {
+ uint32_t hs_cacheable_dbs_cnt;
+ uint32_t hs_dbs_cache_loaded_cnt;
+ uint32_t hs_dbs_cache_saved_cnt;
+} PatternDatabaseCache;
+
const char *HSErrorToStr(hs_error_t error_code);
#endif /* BUILD_HYPERSCAN */
#include "detect-engine-build.h"
#include "conf.h"
+#include "util-conf.h"
#include "util-debug.h"
#include "util-unittest.h"
#include "util-unittest-helper.h"
#include "util-memcmp.h"
#include "util-mpm-hs.h"
+#include "util-mpm-hs-cache.h"
#include "util-mpm-hs-core.h"
#include "util-memcpy.h"
#include "util-hash.h"
#include "util-hash-lookup3.h"
#include "util-hyperscan.h"
+#include "util-path.h"
#ifdef BUILD_HYPERSCAN
pd->pattern_cnt = pattern_cnt;
pd->ref_cnt = 0;
pd->hs_db = NULL;
+ pd->cached = false;
/* alloc the pattern array */
pd->parray = (SCHSPattern **)SCCalloc(pd->pattern_cnt, sizeof(SCHSPattern *));
* \param SCHSCompileData* [in] Pointer to the compile data.
* \retval 0 On success, negative value on failure.
*/
-static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd)
+static int PatternDatabaseGetCached(
+ PatternDatabase **pd, SCHSCompileData *cd, const char *cache_dir_path)
{
/* Check global hash table to see if we've seen this pattern database
* before, and reuse the Hyperscan database if so. */
CompileDataFree(cd);
*pd = pd_cached;
return 0;
+ } else if (cache_dir_path) {
+ pd_cached = *pd;
+ uint64_t db_lookup_hash = HSHashDb(pd_cached);
+ if (HSLoadCache(&pd_cached->hs_db, db_lookup_hash, cache_dir_path) == 0) {
+ pd_cached->ref_cnt = 1;
+ pd_cached->cached = true;
+ if (HSScratchAlloc(pd_cached->hs_db) != 0) {
+ goto recover;
+ }
+ if (HashTableAdd(g_db_table, pd_cached, 1) < 0) {
+ goto recover;
+ }
+ CompileDataFree(cd);
+ return 0;
+
+ recover:
+ pd_cached->ref_cnt = 0;
+ pd_cached->cached = false;
+ return -1;
+ }
}
return -1; // not cached
}
HSPatternArrayInit(ctx, pd);
+ pd->no_cache = !(mpm_ctx->flags & MPMCTX_FLAGS_CACHE_TO_DISK);
/* Serialise whole database compilation as a relatively easy way to ensure
* dedupe is safe. */
SCMutexLock(&g_db_table_mutex);
goto error;
}
- if (PatternDatabaseGetCached(&pd, cd) == 0 && pd != NULL) {
+ const char *cache_path = pd->no_cache || !mpm_conf ? NULL : mpm_conf->cache_dir_path;
+ if (PatternDatabaseGetCached(&pd, cd, cache_path) == 0 && pd != NULL) {
ctx->pattern_db = pd;
if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) {
SCMutexUnlock(&g_db_table_mutex);
return -1;
}
+/**
+ * \brief Cache the initialized and compiled ruleset
+ */
+static int SCHSCacheRuleset(MpmConfig *mpm_conf)
+{
+ if (!mpm_conf || !mpm_conf->cache_dir_path) {
+ return -1;
+ }
+
+ SCLogDebug("Caching the loaded ruleset to %s", mpm_conf->cache_dir_path);
+ if (SCCreateDirectoryTree(mpm_conf->cache_dir_path, true) != 0) {
+ SCLogWarning("Failed to create Hyperscan cache folder, make sure "
+ "the parent folder is writeable "
+ "or adjust sgh-mpm-caching-path setting (%s)",
+ mpm_conf->cache_dir_path);
+ return -1;
+ }
+ PatternDatabaseCache pd_stats = { 0 };
+ struct HsIteratorData iter_data = { .pd_stats = &pd_stats,
+ .cache_path = mpm_conf->cache_dir_path };
+ SCMutexLock(&g_db_table_mutex);
+ HashTableIterate(g_db_table, HSSaveCacheIterator, &iter_data);
+ SCMutexUnlock(&g_db_table_mutex);
+ SCLogNotice("Rule group caching - loaded: %u newly cached: %u total cacheable: %u",
+ pd_stats.hs_dbs_cache_loaded_cnt, pd_stats.hs_dbs_cache_saved_cnt,
+ pd_stats.hs_cacheable_dbs_cnt);
+ return 0;
+}
+
/**
* \brief Init the mpm thread context.
*
printf("\n");
}
+static MpmConfig *SCHSConfigInit(void)
+{
+ MpmConfig *c = SCCalloc(1, sizeof(MpmConfig));
+ return c;
+}
+
+static void SCHSConfigDeinit(MpmConfig **c)
+{
+ if (c != NULL) {
+ SCFree(*c);
+ (*c) = NULL;
+ }
+}
+
+static void SCHSConfigCacheDirSet(MpmConfig *c, const char *dir_path)
+{
+ c->cache_dir_path = dir_path;
+}
+
/************************** Mpm Registration ***************************/
/**
mpm_table[MPM_HS].InitThreadCtx = SCHSInitThreadCtx;
mpm_table[MPM_HS].DestroyCtx = SCHSDestroyCtx;
mpm_table[MPM_HS].DestroyThreadCtx = SCHSDestroyThreadCtx;
- mpm_table[MPM_HS].ConfigInit = NULL;
- mpm_table[MPM_HS].ConfigDeinit = NULL;
- mpm_table[MPM_HS].ConfigCacheDirSet = NULL;
+ mpm_table[MPM_HS].ConfigInit = SCHSConfigInit;
+ mpm_table[MPM_HS].ConfigDeinit = SCHSConfigDeinit;
+ mpm_table[MPM_HS].ConfigCacheDirSet = SCHSConfigCacheDirSet;
mpm_table[MPM_HS].AddPattern = SCHSAddPatternCS;
mpm_table[MPM_HS].AddPatternNocase = SCHSAddPatternCI;
mpm_table[MPM_HS].Prepare = SCHSPreparePatterns;
- mpm_table[MPM_HS].CacheRuleset = NULL;
+ mpm_table[MPM_HS].CacheRuleset = SCHSCacheRuleset;
mpm_table[MPM_HS].Search = SCHSSearch;
mpm_table[MPM_HS].PrintCtx = SCHSPrintInfo;
mpm_table[MPM_HS].PrintThreadCtx = SCHSPrintSearchStats;