From: Lukas Sismis Date: Mon, 28 Oct 2024 14:41:07 +0000 (+0100) Subject: util-mpm-hs: refactor Hyperscan Pattern DB initialization to smaller functions X-Git-Tag: suricata-8.0.0-beta1~192 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=65cfc6d926c8e7ff2392ab43ff40b78353ab721a;p=thirdparty%2Fsuricata.git util-mpm-hs: refactor Hyperscan Pattern DB initialization to smaller functions --- diff --git a/src/Makefile.am b/src/Makefile.am index fd950962b5..3ddc570d87 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -542,6 +542,7 @@ noinst_HEADERS = \ util-mpm-ac-ks.h \ util-mpm.h \ util-mpm-hs.h \ + util-mpm-hs-core.h \ util-optimize.h \ util-pages.h \ util-path.h \ @@ -1101,6 +1102,7 @@ libsuricata_c_a_SOURCES = \ util-mpm-ac-ks-small.c \ util-mpm.c \ util-mpm-hs.c \ + util-mpm-hs-core.c \ util-pages.c \ util-path.c \ util-pidfile.c \ diff --git a/src/util-mpm-hs-core.c b/src/util-mpm-hs-core.c new file mode 100644 index 0000000000..b8663ae220 --- /dev/null +++ b/src/util-mpm-hs-core.c @@ -0,0 +1,83 @@ +/* Copyright (C) 2007-2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Jim Xu + * \author Justin Viiret + * \author Lukas Sismis + * + * MPM pattern matcher core function for the Hyperscan regex matcher. + */ + +#include "suricata-common.h" +#include "suricata.h" +#include "util-mpm-hs-core.h" + +#ifdef BUILD_HYPERSCAN + +#include + +/** + * Translates Hyperscan error codes to human-readable messages. + * + * \param error_code + * The error code returned by a Hyperscan function. + * \return + * A string describing the error. + */ +const char *HSErrorToStr(hs_error_t error_code) +{ + switch (error_code) { + case HS_SUCCESS: + return "HS_SUCCESS: The engine completed normally"; + case HS_INVALID: + return "HS_INVALID: A parameter passed to this function was invalid"; + case HS_NOMEM: + return "HS_NOMEM: A memory allocation failed"; + case HS_SCAN_TERMINATED: + return "HS_SCAN_TERMINATED: The engine was terminated by callback"; + case HS_COMPILER_ERROR: + return "HS_COMPILER_ERROR: The pattern compiler failed"; + case HS_DB_VERSION_ERROR: + return "HS_DB_VERSION_ERROR: The given database was built for a different version of " + "Hyperscan"; + case HS_DB_PLATFORM_ERROR: + return "HS_DB_PLATFORM_ERROR: The given database was built for a different platform " + "(i.e., CPU type)"; + case HS_DB_MODE_ERROR: + return "HS_DB_MODE_ERROR: The given database was built for a different mode of " + "operation"; + case HS_BAD_ALIGN: + return "HS_BAD_ALIGN: A parameter passed to this function was not correctly aligned"; + case HS_BAD_ALLOC: + return "HS_BAD_ALLOC: The memory allocator did not return correctly aligned memory"; + case HS_SCRATCH_IN_USE: + return "HS_SCRATCH_IN_USE: The scratch region was already in use"; + case HS_ARCH_ERROR: + return "HS_ARCH_ERROR: Unsupported CPU architecture"; + case HS_INSUFFICIENT_SPACE: + return "HS_INSUFFICIENT_SPACE: Provided buffer was too small"; + case HS_UNKNOWN_ERROR: + return "HS_UNKNOWN_ERROR: Unexpected internal error"; + default: + return "Unknown error code"; + } +} + +#endif /* BUILD_HYPERSCAN */ diff --git a/src/util-mpm-hs-core.h b/src/util-mpm-hs-core.h new file mode 100644 index 0000000000..fc7c2d3028 --- /dev/null +++ b/src/util-mpm-hs-core.h @@ -0,0 +1,93 @@ +/* Copyright (C) 2007-2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Jim Xu + * \author Justin Viiret + * \author Lukas Sismis + * + * MPM pattern matcher core function for the Hyperscan regex matcher. + */ + +#ifndef SURICATA_UTIL_MPM_HS_CORE__H +#define SURICATA_UTIL_MPM_HS_CORE__H + +#include "suricata-common.h" +#include "suricata.h" + +#ifdef BUILD_HYPERSCAN +#include + +typedef struct SCHSPattern_ { + /* length of the pattern */ + uint16_t len; + /* flags describing the pattern */ + uint8_t flags; + /* holds the original pattern that was added */ + uint8_t *original_pat; + /* pattern id */ + uint32_t id; + + uint16_t offset; + uint16_t depth; + + /* sid(s) for this pattern */ + uint32_t sids_size; + SigIntId *sids; + + /* only used at ctx init time, when this structure is part of a hash + * table. */ + struct SCHSPattern_ *next; +} SCHSPattern; + +typedef struct SCHSCtx_ { + /* hash used during ctx initialization */ + SCHSPattern **init_hash; + + /* pattern database and pattern arrays. */ + void *pattern_db; + + /* size of database, for accounting. */ + size_t hs_db_size; +} SCHSCtx; + +typedef struct SCHSThreadCtx_ { + /* Hyperscan scratch space region for this thread, capable of handling any + * database that has been compiled. */ + void *scratch; + + /* size of scratch space, for accounting. */ + size_t scratch_size; +} SCHSThreadCtx; + +typedef struct PatternDatabase_ { + SCHSPattern **parray; + hs_database_t *hs_db; + uint32_t pattern_cnt; + + /* Reference count: number of MPM contexts using this pattern database. */ + uint32_t ref_cnt; + /* Signals if the matcher has loaded/saved the pattern database to disk */ + bool cached; +} PatternDatabase; + +const char *HSErrorToStr(hs_error_t error_code); + +#endif /* BUILD_HYPERSCAN */ +#endif /* SURICATA_UTIL_MPM_HS_CORE__H */ diff --git a/src/util-mpm-hs.c b/src/util-mpm-hs.c index 82b91b4ec0..da02e2194d 100644 --- a/src/util-mpm-hs.c +++ b/src/util-mpm-hs.c @@ -38,6 +38,7 @@ #include "util-unittest-helper.h" #include "util-memcmp.h" #include "util-mpm-hs.h" +#include "util-mpm-hs-core.h" #include "util-memcpy.h" #include "util-hash.h" #include "util-hash-lookup3.h" @@ -379,7 +380,7 @@ typedef struct SCHSCompileData_ { unsigned int pattern_cnt; } SCHSCompileData; -static SCHSCompileData *SCHSAllocCompileData(unsigned int pattern_cnt) +static SCHSCompileData *CompileDataAlloc(unsigned int pattern_cnt) { SCHSCompileData *cd = SCCalloc(pattern_cnt, sizeof(SCHSCompileData)); if (cd == NULL) { @@ -422,7 +423,7 @@ error: return NULL; } -static void SCHSFreeCompileData(SCHSCompileData *cd) +static void CompileDataFree(SCHSCompileData *cd) { if (cd == NULL) { return; @@ -445,15 +446,6 @@ static void SCHSFreeCompileData(SCHSCompileData *cd) SCFree(cd); } -typedef struct PatternDatabase_ { - SCHSPattern **parray; - hs_database_t *hs_db; - uint32_t pattern_cnt; - - /* Reference count: number of MPM contexts using this pattern database. */ - uint32_t ref_cnt; -} PatternDatabase; - static uint32_t SCHSPatternHash(const SCHSPattern *p, uint32_t hash) { BUG_ON(p->original_pat == NULL); @@ -570,38 +562,20 @@ static PatternDatabase *PatternDatabaseAlloc(uint32_t pattern_cnt) return pd; } -/** - * \brief Process the patterns added to the mpm, and create the internal tables. - * - * \param mpm_ctx Pointer to the mpm context. - */ -int SCHSPreparePatterns(MpmCtx *mpm_ctx) +static int HSCheckPatterns(MpmCtx *mpm_ctx, SCHSCtx *ctx) { - SCHSCtx *ctx = (SCHSCtx *)mpm_ctx->ctx; - if (mpm_ctx->pattern_cnt == 0 || ctx->init_hash == NULL) { SCLogDebug("no patterns supplied to this mpm_ctx"); return 0; } + return 1; +} - hs_error_t err; - hs_compile_error_t *compile_err = NULL; - SCHSCompileData *cd = NULL; - PatternDatabase *pd = NULL; - - cd = SCHSAllocCompileData(mpm_ctx->pattern_cnt); - if (cd == NULL) { - goto error; - } - - pd = PatternDatabaseAlloc(mpm_ctx->pattern_cnt); - if (pd == NULL) { - goto error; - } - - /* populate the pattern array with the patterns in the hash */ +static void HSPatternArrayPopulate(SCHSCtx *ctx, PatternDatabase *pd) +{ for (uint32_t i = 0, p = 0; i < INIT_HASH_SIZE; i++) { - SCHSPattern *node = ctx->init_hash[i], *nnode = NULL; + SCHSPattern *node = ctx->init_hash[i]; + SCHSPattern *nnode = NULL; while (node != NULL) { nnode = node->next; node->next = NULL; @@ -609,105 +583,207 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx) node = nnode; } } +} +static void HSPatternArrayInit(SCHSCtx *ctx, PatternDatabase *pd) +{ + HSPatternArrayPopulate(ctx, pd); /* we no longer need the hash, so free its memory */ SCFree(ctx->init_hash); ctx->init_hash = NULL; +} - /* Serialise whole database compilation as a relatively easy way to ensure - * dedupe is safe. */ - SCMutexLock(&g_db_table_mutex); - - /* Init global pattern database hash if necessary. */ +static int HSGlobalPatternDatabaseInit(void) +{ if (g_db_table == NULL) { g_db_table = HashTableInit(INIT_DB_HASH_SIZE, PatternDatabaseHash, PatternDatabaseCompare, PatternDatabaseTableFree); if (g_db_table == NULL) { - SCMutexUnlock(&g_db_table_mutex); - goto error; + return -1; } } + return 0; +} +static void HSLogCompileError(hs_compile_error_t *compile_err) +{ + SCLogError("failed to compile hyperscan database"); + if (compile_err) { + SCLogError("compile error: %s", compile_err->message); + hs_free_compile_error(compile_err); + } +} + +static int HSScratchAlloc(const hs_database_t *db) +{ + SCMutexLock(&g_scratch_proto_mutex); + hs_error_t err = hs_alloc_scratch(db, &g_scratch_proto); + SCMutexUnlock(&g_scratch_proto_mutex); + if (err != HS_SUCCESS) { + SCLogError("failed to allocate scratch"); + return -1; + } + return 0; +} + +static int PatternDatabaseGetSize(PatternDatabase *pd, size_t *db_size) +{ + hs_error_t err = hs_database_size(pd->hs_db, db_size); + if (err != HS_SUCCESS) { + SCLogError("failed to query database size: %s", HSErrorToStr(err)); + return -1; + } + return 0; +} + +static void SCHSCleanupOnError(PatternDatabase *pd, SCHSCompileData *cd) +{ + if (pd) { + PatternDatabaseFree(pd); + } + if (cd) { + CompileDataFree(cd); + } +} + +static int CompileDataExtensionsInit(hs_expr_ext_t **ext, const SCHSPattern *p) +{ + if (p->flags & (MPM_PATTERN_FLAG_OFFSET | MPM_PATTERN_FLAG_DEPTH)) { + *ext = SCCalloc(1, sizeof(hs_expr_ext_t)); + if ((*ext) == NULL) { + return -1; + } + if (p->flags & MPM_PATTERN_FLAG_OFFSET) { + (*ext)->flags |= HS_EXT_FLAG_MIN_OFFSET; + (*ext)->min_offset = p->offset + p->len; + } + if (p->flags & MPM_PATTERN_FLAG_DEPTH) { + (*ext)->flags |= HS_EXT_FLAG_MAX_OFFSET; + (*ext)->max_offset = p->offset + p->depth; + } + } + + return 0; +} + +/** + * \brief Initialize the pattern database - try to get existing pd + * from the global hash table, or load it from disk if caching is enabled. + * + * \param PatternDatabase* [in/out] Pointer to the pattern database to use. + * \param SCHSCompileData* [in] Pointer to the compile data. + * \retval 0 On success, negative value on failure. + */ +static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd) +{ /* Check global hash table to see if we've seen this pattern database * before, and reuse the Hyperscan database if so. */ - PatternDatabase *pd_cached = HashTableLookup(g_db_table, pd, 1); - + PatternDatabase *pd_cached = HashTableLookup(g_db_table, *pd, 1); if (pd_cached != NULL) { SCLogDebug("Reusing cached database %p with %" PRIu32 " patterns (ref_cnt=%" PRIu32 ")", pd_cached->hs_db, pd_cached->pattern_cnt, pd_cached->ref_cnt); pd_cached->ref_cnt++; - ctx->pattern_db = pd_cached; - SCMutexUnlock(&g_db_table_mutex); - PatternDatabaseFree(pd); - SCHSFreeCompileData(cd); + PatternDatabaseFree(*pd); + CompileDataFree(cd); + *pd = pd_cached; return 0; } - BUG_ON(ctx->pattern_db != NULL); /* already built? */ + return -1; // not cached +} +static int PatternDatabaseCompile(PatternDatabase *pd, SCHSCompileData *cd) +{ for (uint32_t i = 0; i < pd->pattern_cnt; i++) { const SCHSPattern *p = pd->parray[i]; - cd->ids[i] = i; cd->flags[i] = HS_FLAG_SINGLEMATCH; if (p->flags & MPM_PATTERN_FLAG_NOCASE) { cd->flags[i] |= HS_FLAG_CASELESS; } - cd->expressions[i] = HSRenderPattern(p->original_pat, p->len); + if (CompileDataExtensionsInit(&cd->ext[i], p) != 0) { + return -1; + } + } - if (p->flags & (MPM_PATTERN_FLAG_OFFSET | MPM_PATTERN_FLAG_DEPTH)) { - cd->ext[i] = SCCalloc(1, sizeof(hs_expr_ext_t)); - if (cd->ext[i] == NULL) { - SCMutexUnlock(&g_db_table_mutex); - goto error; - } + hs_compile_error_t *compile_err = NULL; + hs_error_t err = hs_compile_ext_multi((const char *const *)cd->expressions, cd->flags, cd->ids, + (const hs_expr_ext_t *const *)cd->ext, cd->pattern_cnt, HS_MODE_BLOCK, NULL, &pd->hs_db, + &compile_err); + if (err != HS_SUCCESS) { + HSLogCompileError(compile_err); + return -1; + } - if (p->flags & MPM_PATTERN_FLAG_OFFSET) { - cd->ext[i]->flags |= HS_EXT_FLAG_MIN_OFFSET; - cd->ext[i]->min_offset = p->offset + p->len; - } - if (p->flags & MPM_PATTERN_FLAG_DEPTH) { - cd->ext[i]->flags |= HS_EXT_FLAG_MAX_OFFSET; - cd->ext[i]->max_offset = p->offset + p->depth; - } - } + if (HSScratchAlloc(pd->hs_db) != 0) { + return -1; } - BUG_ON(mpm_ctx->pattern_cnt == 0); + if (HashTableAdd(g_db_table, pd, 1) < 0) { + return -1; + } + pd->ref_cnt = 1; + return 0; +} - err = hs_compile_ext_multi((const char *const *)cd->expressions, cd->flags, - cd->ids, (const hs_expr_ext_t *const *)cd->ext, - cd->pattern_cnt, HS_MODE_BLOCK, NULL, &pd->hs_db, - &compile_err); +/** + * \brief Process the patterns added to the mpm, and create the internal tables. + * + * \param mpm_ctx Pointer to the mpm context. + */ +int SCHSPreparePatterns(MpmCtx *mpm_ctx) +{ + SCHSCtx *ctx = (SCHSCtx *)mpm_ctx->ctx; - if (err != HS_SUCCESS) { - SCLogError("failed to compile hyperscan database"); - if (compile_err) { - SCLogError("compile error: %s", compile_err->message); - } - hs_free_compile_error(compile_err); + if (HSCheckPatterns(mpm_ctx, ctx) == 0) { + return 0; + } + + SCHSCompileData *cd = CompileDataAlloc(mpm_ctx->pattern_cnt); + PatternDatabase *pd = PatternDatabaseAlloc(mpm_ctx->pattern_cnt); + if (cd == NULL || pd == NULL) { + goto error; + } + + HSPatternArrayInit(ctx, pd); + /* Serialise whole database compilation as a relatively easy way to ensure + * dedupe is safe. */ + SCMutexLock(&g_db_table_mutex); + if (HSGlobalPatternDatabaseInit() == -1) { SCMutexUnlock(&g_db_table_mutex); goto error; } - ctx->pattern_db = pd; + if (PatternDatabaseGetCached(&pd, cd) == 0 && pd != NULL) { + ctx->pattern_db = pd; + if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) { + SCMutexUnlock(&g_db_table_mutex); + goto error; + } - SCMutexLock(&g_scratch_proto_mutex); - err = hs_alloc_scratch(pd->hs_db, &g_scratch_proto); - SCMutexUnlock(&g_scratch_proto_mutex); - if (err != HS_SUCCESS) { - SCLogError("failed to allocate scratch"); + if (pd->ref_cnt == 1) { + // freshly allocated + mpm_ctx->memory_cnt++; + mpm_ctx->memory_size += ctx->hs_db_size; + } + SCMutexUnlock(&g_db_table_mutex); + return 0; + } + + BUG_ON(ctx->pattern_db != NULL); /* already built? */ + BUG_ON(mpm_ctx->pattern_cnt == 0); + + if (PatternDatabaseCompile(pd, cd) != 0) { SCMutexUnlock(&g_db_table_mutex); goto error; } - err = hs_database_size(pd->hs_db, &ctx->hs_db_size); - if (err != HS_SUCCESS) { - SCLogError("failed to query database size"); + ctx->pattern_db = pd; + if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) { SCMutexUnlock(&g_db_table_mutex); goto error; } @@ -715,26 +791,12 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx) mpm_ctx->memory_cnt++; mpm_ctx->memory_size += ctx->hs_db_size; - SCLogDebug("Built %" PRIu32 " patterns into a database of size %" PRIuMAX - " bytes", mpm_ctx->pattern_cnt, (uintmax_t)ctx->hs_db_size); - - /* Cache this database globally for later. */ - pd->ref_cnt = 1; - int r = HashTableAdd(g_db_table, pd, 1); SCMutexUnlock(&g_db_table_mutex); - if (r < 0) - goto error; - - SCHSFreeCompileData(cd); + CompileDataFree(cd); return 0; error: - if (pd) { - PatternDatabaseFree(pd); - } - if (cd) { - SCHSFreeCompileData(cd); - } + SCHSCleanupOnError(pd, cd); return -1; } diff --git a/src/util-mpm-hs.h b/src/util-mpm-hs.h index 09deb5ff99..487abf8fa3 100644 --- a/src/util-mpm-hs.h +++ b/src/util-mpm-hs.h @@ -27,48 +27,6 @@ #ifndef SURICATA_UTIL_MPM_HS__H #define SURICATA_UTIL_MPM_HS__H -typedef struct SCHSPattern_ { - /* length of the pattern */ - uint16_t len; - /* flags describing the pattern */ - uint8_t flags; - /* holds the original pattern that was added */ - uint8_t *original_pat; - /* pattern id */ - uint32_t id; - - uint16_t offset; - uint16_t depth; - - /* sid(s) for this pattern */ - uint32_t sids_size; - SigIntId *sids; - - /* only used at ctx init time, when this structure is part of a hash - * table. */ - struct SCHSPattern_ *next; -} SCHSPattern; - -typedef struct SCHSCtx_ { - /* hash used during ctx initialization */ - SCHSPattern **init_hash; - - /* pattern database and pattern arrays. */ - void *pattern_db; - - /* size of database, for accounting. */ - size_t hs_db_size; -} SCHSCtx; - -typedef struct SCHSThreadCtx_ { - /* Hyperscan scratch space region for this thread, capable of handling any - * database that has been compiled. */ - void *scratch; - - /* size of scratch space, for accounting. */ - size_t scratch_size; -} SCHSThreadCtx; - void MpmHSRegister(void); void MpmHSGlobalCleanup(void);