From: Lukas Sismis Date: Mon, 28 Oct 2024 14:46:17 +0000 (+0100) Subject: hyperscan: add caching mechanism for hyperscan contexts X-Git-Tag: suricata-8.0.0-beta1~190 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7dc65c2f8a9b5232b9594f747fe1756301402f74;p=thirdparty%2Fsuricata.git hyperscan: add caching mechanism for hyperscan contexts Cache Hyperscan serialized databases to disk to prevent compilation of the same databases when Suricata is run again with the same ruleset. Hyperscan binary files are stored per rulegroup in the designated folder, by default in the cached library folder. Since caching is per signature group heads, some chunk of the ruleset can change and it still can reuse part of the unchanged signature groups. Loading *fresh* ET Open ruleset: 19 seconds Loading *cached* ET Open ruleset: 07 seconds Ticket: 7170 --- diff --git a/Makefile.am b/Makefile.am index aaaaf7b6dd..48f1629127 100644 --- a/Makefile.am +++ b/Makefile.am @@ -37,7 +37,7 @@ install-conf: install -d "$(DESTDIR)$(e_rundir)" install -m 770 -d "$(DESTDIR)$(e_localstatedir)" install -m 770 -d "$(DESTDIR)$(e_datadir)" - install -m 660 -d "$(DESTDIR)$(e_sghcachedir)" + install -m 770 -d "$(DESTDIR)$(e_sghcachedir)" install-rules: if INSTALL_SURICATA_UPDATE diff --git a/doc/userguide/performance/hyperscan.rst b/doc/userguide/performance/hyperscan.rst index 95916332b3..065163110c 100644 --- a/doc/userguide/performance/hyperscan.rst +++ b/doc/userguide/performance/hyperscan.rst @@ -81,4 +81,29 @@ if it is present on the system in case of the "auto" setting. If the current suricata installation does not have hyperscan -support, refer to :ref:`installation` \ No newline at end of file +support, refer to :ref:`installation` + +Hyperscan caching +~~~~~~~~~~~~~~~~~ + +Upon startup, Hyperscan compiles and optimizes the ruleset into its own +internal structure. Suricata optimizes the startup process by saving +the Hyperscan internal structures to disk and loading them on the next start. +This prevents the recompilation of the ruleset and results in faster +initialization. If the ruleset is changed, new necessary cache files are +automatically created. + +To enable this function, in `suricata.yaml` configure: + +:: + + detect: + # Cache MPM contexts to the disk to avoid rule compilation at the startup. + # Cache files are created in the standard library directory. + sgh-mpm-caching: yes + sgh-mpm-caching-path: /var/lib/suricata/cache/hs + + +**Note**: +You might need to create and adjust permissions to the default caching folder +path, especially if you are running Suricata as a non-root user. diff --git a/src/Makefile.am b/src/Makefile.am index 3ddc570d87..389dc6eca7 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -542,6 +542,7 @@ noinst_HEADERS = \ util-mpm-ac-ks.h \ util-mpm.h \ util-mpm-hs.h \ + util-mpm-hs-cache.h \ util-mpm-hs-core.h \ util-optimize.h \ util-pages.h \ @@ -1102,6 +1103,7 @@ libsuricata_c_a_SOURCES = \ util-mpm-ac-ks-small.c \ util-mpm.c \ util-mpm-hs.c \ + util-mpm-hs-cache.c \ util-mpm-hs-core.c \ util-pages.c \ util-path.c \ diff --git a/src/util-mpm-hs-cache.c b/src/util-mpm-hs-cache.c new file mode 100644 index 0000000000..5b91c0197b --- /dev/null +++ b/src/util-mpm-hs-cache.c @@ -0,0 +1,248 @@ +/* Copyright (C) 2007-2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + * + * MPM pattern matcher that calls the Hyperscan regex matcher. + */ + +#include "suricata-common.h" +#include "suricata.h" +#include "detect-engine.h" +#include "util-debug.h" +#include "util-hash-lookup3.h" +#include "util-mpm-hs-core.h" +#include "util-mpm-hs-cache.h" +#include "util-path.h" + +#ifdef BUILD_HYPERSCAN + +#include + +static const char *HSCacheConstructFPath(const char *folder_path, uint64_t hs_db_hash) +{ + static char hash_file_path[PATH_MAX]; + + char hash_file_path_suffix[] = "_v1.hs"; + char filename[PATH_MAX]; + uint64_t r = + snprintf(filename, sizeof(filename), "%020lu%s", hs_db_hash, hash_file_path_suffix); + if (r != (uint64_t)(20 + strlen(hash_file_path_suffix))) + return NULL; + + r = PathMerge(hash_file_path, sizeof(hash_file_path), folder_path, filename); + if (r) + return NULL; + + return hash_file_path; +} + +static char *HSReadStream(const char *file_path, size_t *buffer_sz) +{ + FILE *file = fopen(file_path, "rb"); + if (!file) { + SCLogDebug("Failed to open file %s: %s", file_path, strerror(errno)); + return NULL; + } + + // Seek to the end of the file to determine its size + fseek(file, 0, SEEK_END); + long file_sz = ftell(file); + if (file_sz < 0) { + SCLogDebug("Failed to determine file size of %s: %s", file_path, strerror(errno)); + fclose(file); + return NULL; + } + + char *buffer = (char *)SCCalloc(file_sz, sizeof(char)); + if (!buffer) { + SCLogWarning("Failed to allocate memory"); + fclose(file); + return NULL; + } + + // Rewind file pointer and read the file into the buffer + rewind(file); + size_t bytes_read = fread(buffer, 1, file_sz, file); + if (bytes_read != (size_t)file_sz) { + SCLogDebug("Failed to read the entire file %s: %s", file_path, strerror(errno)); + SCFree(buffer); + fclose(file); + return NULL; + } + + *buffer_sz = file_sz; + fclose(file); + return buffer; +} + +/** + * Function to hash the searched pattern, only things relevant to Hyperscan + * compilation are hashed. + */ +static void SCHSCachePatternHash(const SCHSPattern *p, uint32_t *h1, uint32_t *h2) +{ + BUG_ON(p->original_pat == NULL); + BUG_ON(p->sids == NULL); + + hashlittle2_safe(&p->len, sizeof(p->len), h1, h2); + hashlittle2_safe(&p->flags, sizeof(p->flags), h1, h2); + hashlittle2_safe(p->original_pat, p->len, h1, h2); + hashlittle2_safe(&p->id, sizeof(p->id), h1, h2); + hashlittle2_safe(&p->offset, sizeof(p->offset), h1, h2); + hashlittle2_safe(&p->depth, sizeof(p->depth), h1, h2); + hashlittle2_safe(&p->sids_size, sizeof(p->sids_size), h1, h2); + hashlittle2_safe(p->sids, p->sids_size * sizeof(SigIntId), h1, h2); +} + +int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash, const char *dirpath) +{ + const char *hash_file_static = HSCacheConstructFPath(dirpath, hs_db_hash); + if (hash_file_static == NULL) + return -1; + + SCLogDebug("Loading the cached HS DB from %s", hash_file_static); + if (!SCPathExists(hash_file_static)) + return -1; + + FILE *db_cache = fopen(hash_file_static, "r"); + char *buffer = NULL; + int ret = 0; + if (db_cache) { + size_t buffer_size; + buffer = HSReadStream(hash_file_static, &buffer_size); + if (!buffer) { + SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static); + ret = -1; + goto freeup; + } + + hs_error_t error = hs_deserialize_database(buffer, buffer_size, hs_db); + if (error != HS_SUCCESS) { + SCLogWarning("Failed to deserialize Hyperscan database of %s: %s", hash_file_static, + HSErrorToStr(error)); + ret = -1; + goto freeup; + } + + ret = 0; + goto freeup; + } + +freeup: + if (db_cache) + fclose(db_cache); + if (buffer) + SCFree(buffer); + return ret; +} + +static int HSSaveCache(hs_database_t *hs_db, uint64_t hs_db_hash, const char *dstpath) +{ + static bool notified = false; + char *db_stream = NULL; + size_t db_size; + int ret = -1; + + hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size); + if (err != HS_SUCCESS) { + SCLogWarning("Failed to serialize Hyperscan database: %s", HSErrorToStr(err)); + goto cleanup; + } + + const char *hash_file_static = HSCacheConstructFPath(dstpath, hs_db_hash); + SCLogDebug("Caching the compiled HS at %s", hash_file_static); + if (SCPathExists(hash_file_static)) { + // potentially signs that it might not work as expected as we got into + // hash collision. If this happens with older and not used caches it is + // fine. + // It is problematic when one ruleset yields two colliding MPM groups. + SCLogWarning("Overwriting cache file %s. If the problem persists consider switching off " + "the caching", + hash_file_static); + } + + FILE *db_cache_out = fopen(hash_file_static, "w"); + if (!db_cache_out) { + if (!notified) { + SCLogWarning("Failed to create Hyperscan cache file, make sure the folder exist and is " + "writable or adjust sgh-mpm-caching-path setting (%s)", + hash_file_static); + notified = true; + } + goto cleanup; + } + size_t r = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out); + if (r > 0 && (size_t)r != db_size) { + SCLogWarning("Failed to write to file: %s", hash_file_static); + if (r != db_size) { + // possibly a corrupted DB cache was created + r = remove(hash_file_static); + if (r != 0) { + SCLogWarning("Failed to remove corrupted cache file: %s", hash_file_static); + } + } + } + ret = fclose(db_cache_out); + if (ret != 0) { + SCLogWarning("Failed to close file: %s", hash_file_static); + goto cleanup; + } + + ret = 0; +cleanup: + if (db_stream) + SCFree(db_stream); + return ret; +} + +uint64_t HSHashDb(const PatternDatabase *pd) +{ + uint64_t cached_hash = 0; + uint32_t *hash = (uint32_t *)(&cached_hash); + hashword2(&pd->pattern_cnt, 1, &hash[0], &hash[1]); + for (uint32_t i = 0; i < pd->pattern_cnt; i++) { + SCHSCachePatternHash(pd->parray[i], &hash[0], &hash[1]); + } + + return cached_hash; +} + +void HSSaveCacheIterator(void *data, void *aux) +{ + PatternDatabase *pd = (PatternDatabase *)data; + struct HsIteratorData *iter_data = (struct HsIteratorData *)aux; + if (pd->no_cache) + return; + + // count only cacheable DBs + iter_data->pd_stats->hs_cacheable_dbs_cnt++; + if (pd->cached) { + iter_data->pd_stats->hs_dbs_cache_loaded_cnt++; + return; + } + + if (HSSaveCache(pd->hs_db, HSHashDb(pd), iter_data->cache_path) == 0) { + pd->cached = true; // for rule reloads + iter_data->pd_stats->hs_dbs_cache_saved_cnt++; + } +} + +#endif /* BUILD_HYPERSCAN */ diff --git a/src/util-mpm-hs-cache.h b/src/util-mpm-hs-cache.h new file mode 100644 index 0000000000..237762d5ae --- /dev/null +++ b/src/util-mpm-hs-cache.h @@ -0,0 +1,43 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + * + * Hyperscan caching logic for faster database compilation. + */ + +#ifndef SURICATA_UTIL_MPM_HS_CACHE__H +#define SURICATA_UTIL_MPM_HS_CACHE__H + +#include "util-mpm-hs-core.h" + +#ifdef BUILD_HYPERSCAN + +struct HsIteratorData { + PatternDatabaseCache *pd_stats; + const char *cache_path; +}; + +int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash, const char *dirpath); +uint64_t HSHashDb(const PatternDatabase *pd); +void HSSaveCacheIterator(void *data, void *aux); +#endif /* BUILD_HYPERSCAN */ + +#endif /* SURICATA_UTIL_MPM_HS_CACHE__H */ diff --git a/src/util-mpm-hs-core.c b/src/util-mpm-hs-core.c index b8663ae220..07f1e2d29c 100644 --- a/src/util-mpm-hs-core.c +++ b/src/util-mpm-hs-core.c @@ -33,6 +33,11 @@ #include +// Encode major, minor, and patch into a single 32-bit integer. +#define HS_VERSION_ENCODE(major, minor, patch) (((major) << 24) | ((minor) << 16) | ((patch) << 8)) +#define HS_VERSION_AT_LEAST(major, minor, patch) \ + (HS_VERSION_32BIT >= HS_VERSION_ENCODE(major, minor, patch)) + /** * Translates Hyperscan error codes to human-readable messages. * @@ -69,12 +74,18 @@ const char *HSErrorToStr(hs_error_t error_code) return "HS_BAD_ALLOC: The memory allocator did not return correctly aligned memory"; case HS_SCRATCH_IN_USE: return "HS_SCRATCH_IN_USE: The scratch region was already in use"; +#if HS_VERSION_AT_LEAST(4, 4, 0) case HS_ARCH_ERROR: return "HS_ARCH_ERROR: Unsupported CPU architecture"; +#endif // HS_VERSION_AT_LEAST(4, 4, 0) +#if HS_VERSION_AT_LEAST(4, 6, 0) case HS_INSUFFICIENT_SPACE: return "HS_INSUFFICIENT_SPACE: Provided buffer was too small"; +#endif // HS_VERSION_AT_LEAST(4, 6, 0) +#if HS_VERSION_AT_LEAST(5, 1, 1) case HS_UNKNOWN_ERROR: return "HS_UNKNOWN_ERROR: Unexpected internal error"; +#endif // HS_VERSION_AT_LEAST(5, 1, 1) default: return "Unknown error code"; } diff --git a/src/util-mpm-hs-core.h b/src/util-mpm-hs-core.h index fc7c2d3028..699dd69568 100644 --- a/src/util-mpm-hs-core.h +++ b/src/util-mpm-hs-core.h @@ -35,44 +35,44 @@ #include typedef struct SCHSPattern_ { - /* length of the pattern */ + /** length of the pattern */ uint16_t len; - /* flags describing the pattern */ + /** flags describing the pattern */ uint8_t flags; - /* holds the original pattern that was added */ + /** holds the original pattern that was added */ uint8_t *original_pat; - /* pattern id */ + /** pattern id */ uint32_t id; uint16_t offset; uint16_t depth; - /* sid(s) for this pattern */ + /** sid(s) for this pattern */ uint32_t sids_size; SigIntId *sids; - /* only used at ctx init time, when this structure is part of a hash + /** only used at ctx init time, when this structure is part of a hash * table. */ struct SCHSPattern_ *next; } SCHSPattern; typedef struct SCHSCtx_ { - /* hash used during ctx initialization */ + /** hash used during ctx initialization */ SCHSPattern **init_hash; - /* pattern database and pattern arrays. */ + /** pattern database and pattern arrays. */ void *pattern_db; - /* size of database, for accounting. */ + /** size of database, for accounting. */ size_t hs_db_size; } SCHSCtx; typedef struct SCHSThreadCtx_ { - /* Hyperscan scratch space region for this thread, capable of handling any + /** Hyperscan scratch space region for this thread, capable of handling any * database that has been compiled. */ void *scratch; - /* size of scratch space, for accounting. */ + /** size of scratch space, for accounting. */ size_t scratch_size; } SCHSThreadCtx; @@ -81,12 +81,20 @@ typedef struct PatternDatabase_ { hs_database_t *hs_db; uint32_t pattern_cnt; - /* Reference count: number of MPM contexts using this pattern database. */ + /** Reference count: number of MPM contexts using this pattern database. */ uint32_t ref_cnt; - /* Signals if the matcher has loaded/saved the pattern database to disk */ + /** Signals if the matcher has loaded/saved the pattern database to disk */ bool cached; + /** Matcher will not cache this pattern DB */ + bool no_cache; } PatternDatabase; +typedef struct PatternDatabaseCache_ { + uint32_t hs_cacheable_dbs_cnt; + uint32_t hs_dbs_cache_loaded_cnt; + uint32_t hs_dbs_cache_saved_cnt; +} PatternDatabaseCache; + const char *HSErrorToStr(hs_error_t error_code); #endif /* BUILD_HYPERSCAN */ diff --git a/src/util-mpm-hs.c b/src/util-mpm-hs.c index 66fd4582cc..4a7fed2a0b 100644 --- a/src/util-mpm-hs.c +++ b/src/util-mpm-hs.c @@ -33,16 +33,19 @@ #include "detect-engine-build.h" #include "conf.h" +#include "util-conf.h" #include "util-debug.h" #include "util-unittest.h" #include "util-unittest-helper.h" #include "util-memcmp.h" #include "util-mpm-hs.h" +#include "util-mpm-hs-cache.h" #include "util-mpm-hs-core.h" #include "util-memcpy.h" #include "util-hash.h" #include "util-hash-lookup3.h" #include "util-hyperscan.h" +#include "util-path.h" #ifdef BUILD_HYPERSCAN @@ -551,6 +554,7 @@ static PatternDatabase *PatternDatabaseAlloc(uint32_t pattern_cnt) pd->pattern_cnt = pattern_cnt; pd->ref_cnt = 0; pd->hs_db = NULL; + pd->cached = false; /* alloc the pattern array */ pd->parray = (SCHSPattern **)SCCalloc(pd->pattern_cnt, sizeof(SCHSPattern *)); @@ -675,7 +679,8 @@ static int CompileDataExtensionsInit(hs_expr_ext_t **ext, const SCHSPattern *p) * \param SCHSCompileData* [in] Pointer to the compile data. * \retval 0 On success, negative value on failure. */ -static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd) +static int PatternDatabaseGetCached( + PatternDatabase **pd, SCHSCompileData *cd, const char *cache_dir_path) { /* Check global hash table to see if we've seen this pattern database * before, and reuse the Hyperscan database if so. */ @@ -690,6 +695,26 @@ static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd) CompileDataFree(cd); *pd = pd_cached; return 0; + } else if (cache_dir_path) { + pd_cached = *pd; + uint64_t db_lookup_hash = HSHashDb(pd_cached); + if (HSLoadCache(&pd_cached->hs_db, db_lookup_hash, cache_dir_path) == 0) { + pd_cached->ref_cnt = 1; + pd_cached->cached = true; + if (HSScratchAlloc(pd_cached->hs_db) != 0) { + goto recover; + } + if (HashTableAdd(g_db_table, pd_cached, 1) < 0) { + goto recover; + } + CompileDataFree(cd); + return 0; + + recover: + pd_cached->ref_cnt = 0; + pd_cached->cached = false; + return -1; + } } return -1; // not cached @@ -751,6 +776,7 @@ int SCHSPreparePatterns(MpmConfig *mpm_conf, MpmCtx *mpm_ctx) } HSPatternArrayInit(ctx, pd); + pd->no_cache = !(mpm_ctx->flags & MPMCTX_FLAGS_CACHE_TO_DISK); /* Serialise whole database compilation as a relatively easy way to ensure * dedupe is safe. */ SCMutexLock(&g_db_table_mutex); @@ -759,7 +785,8 @@ int SCHSPreparePatterns(MpmConfig *mpm_conf, MpmCtx *mpm_ctx) goto error; } - if (PatternDatabaseGetCached(&pd, cd) == 0 && pd != NULL) { + const char *cache_path = pd->no_cache || !mpm_conf ? NULL : mpm_conf->cache_dir_path; + if (PatternDatabaseGetCached(&pd, cd, cache_path) == 0 && pd != NULL) { ctx->pattern_db = pd; if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) { SCMutexUnlock(&g_db_table_mutex); @@ -801,6 +828,35 @@ error: return -1; } +/** + * \brief Cache the initialized and compiled ruleset + */ +static int SCHSCacheRuleset(MpmConfig *mpm_conf) +{ + if (!mpm_conf || !mpm_conf->cache_dir_path) { + return -1; + } + + SCLogDebug("Caching the loaded ruleset to %s", mpm_conf->cache_dir_path); + if (SCCreateDirectoryTree(mpm_conf->cache_dir_path, true) != 0) { + SCLogWarning("Failed to create Hyperscan cache folder, make sure " + "the parent folder is writeable " + "or adjust sgh-mpm-caching-path setting (%s)", + mpm_conf->cache_dir_path); + return -1; + } + PatternDatabaseCache pd_stats = { 0 }; + struct HsIteratorData iter_data = { .pd_stats = &pd_stats, + .cache_path = mpm_conf->cache_dir_path }; + SCMutexLock(&g_db_table_mutex); + HashTableIterate(g_db_table, HSSaveCacheIterator, &iter_data); + SCMutexUnlock(&g_db_table_mutex); + SCLogNotice("Rule group caching - loaded: %u newly cached: %u total cacheable: %u", + pd_stats.hs_dbs_cache_loaded_cnt, pd_stats.hs_dbs_cache_saved_cnt, + pd_stats.hs_cacheable_dbs_cnt); + return 0; +} + /** * \brief Init the mpm thread context. * @@ -1096,6 +1152,25 @@ void SCHSPrintInfo(MpmCtx *mpm_ctx) printf("\n"); } +static MpmConfig *SCHSConfigInit(void) +{ + MpmConfig *c = SCCalloc(1, sizeof(MpmConfig)); + return c; +} + +static void SCHSConfigDeinit(MpmConfig **c) +{ + if (c != NULL) { + SCFree(*c); + (*c) = NULL; + } +} + +static void SCHSConfigCacheDirSet(MpmConfig *c, const char *dir_path) +{ + c->cache_dir_path = dir_path; +} + /************************** Mpm Registration ***************************/ /** @@ -1108,13 +1183,13 @@ void MpmHSRegister(void) mpm_table[MPM_HS].InitThreadCtx = SCHSInitThreadCtx; mpm_table[MPM_HS].DestroyCtx = SCHSDestroyCtx; mpm_table[MPM_HS].DestroyThreadCtx = SCHSDestroyThreadCtx; - mpm_table[MPM_HS].ConfigInit = NULL; - mpm_table[MPM_HS].ConfigDeinit = NULL; - mpm_table[MPM_HS].ConfigCacheDirSet = NULL; + mpm_table[MPM_HS].ConfigInit = SCHSConfigInit; + mpm_table[MPM_HS].ConfigDeinit = SCHSConfigDeinit; + mpm_table[MPM_HS].ConfigCacheDirSet = SCHSConfigCacheDirSet; mpm_table[MPM_HS].AddPattern = SCHSAddPatternCS; mpm_table[MPM_HS].AddPatternNocase = SCHSAddPatternCI; mpm_table[MPM_HS].Prepare = SCHSPreparePatterns; - mpm_table[MPM_HS].CacheRuleset = NULL; + mpm_table[MPM_HS].CacheRuleset = SCHSCacheRuleset; mpm_table[MPM_HS].Search = SCHSSearch; mpm_table[MPM_HS].PrintCtx = SCHSPrintInfo; mpm_table[MPM_HS].PrintThreadCtx = SCHSPrintSearchStats;