--- /dev/null
+/* Copyright (C) 2007-2024 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Jim Xu <jim.xu@windriver.com>
+ * \author Justin Viiret <justin.viiret@intel.com>
+ * \author Lukas Sismis <lsismis@oisf.net>
+ *
+ * MPM pattern matcher core function for the Hyperscan regex matcher.
+ */
+
+#include "suricata-common.h"
+#include "suricata.h"
+#include "util-mpm-hs-core.h"
+
+#ifdef BUILD_HYPERSCAN
+
+#include <hs.h>
+
+/**
+ * Translates Hyperscan error codes to human-readable messages.
+ *
+ * \param error_code
+ * The error code returned by a Hyperscan function.
+ * \return
+ * A string describing the error.
+ */
+const char *HSErrorToStr(hs_error_t error_code)
+{
+ switch (error_code) {
+ case HS_SUCCESS:
+ return "HS_SUCCESS: The engine completed normally";
+ case HS_INVALID:
+ return "HS_INVALID: A parameter passed to this function was invalid";
+ case HS_NOMEM:
+ return "HS_NOMEM: A memory allocation failed";
+ case HS_SCAN_TERMINATED:
+ return "HS_SCAN_TERMINATED: The engine was terminated by callback";
+ case HS_COMPILER_ERROR:
+ return "HS_COMPILER_ERROR: The pattern compiler failed";
+ case HS_DB_VERSION_ERROR:
+ return "HS_DB_VERSION_ERROR: The given database was built for a different version of "
+ "Hyperscan";
+ case HS_DB_PLATFORM_ERROR:
+ return "HS_DB_PLATFORM_ERROR: The given database was built for a different platform "
+ "(i.e., CPU type)";
+ case HS_DB_MODE_ERROR:
+ return "HS_DB_MODE_ERROR: The given database was built for a different mode of "
+ "operation";
+ case HS_BAD_ALIGN:
+ return "HS_BAD_ALIGN: A parameter passed to this function was not correctly aligned";
+ case HS_BAD_ALLOC:
+ return "HS_BAD_ALLOC: The memory allocator did not return correctly aligned memory";
+ case HS_SCRATCH_IN_USE:
+ return "HS_SCRATCH_IN_USE: The scratch region was already in use";
+ case HS_ARCH_ERROR:
+ return "HS_ARCH_ERROR: Unsupported CPU architecture";
+ case HS_INSUFFICIENT_SPACE:
+ return "HS_INSUFFICIENT_SPACE: Provided buffer was too small";
+ case HS_UNKNOWN_ERROR:
+ return "HS_UNKNOWN_ERROR: Unexpected internal error";
+ default:
+ return "Unknown error code";
+ }
+}
+
+#endif /* BUILD_HYPERSCAN */
--- /dev/null
+/* Copyright (C) 2007-2024 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Jim Xu <jim.xu@windriver.com>
+ * \author Justin Viiret <justin.viiret@intel.com>
+ * \author Lukas Sismis <lsismis@oisf.net>
+ *
+ * MPM pattern matcher core function for the Hyperscan regex matcher.
+ */
+
+#ifndef SURICATA_UTIL_MPM_HS_CORE__H
+#define SURICATA_UTIL_MPM_HS_CORE__H
+
+#include "suricata-common.h"
+#include "suricata.h"
+
+#ifdef BUILD_HYPERSCAN
+#include <hs.h>
+
+typedef struct SCHSPattern_ {
+ /* length of the pattern */
+ uint16_t len;
+ /* flags describing the pattern */
+ uint8_t flags;
+ /* holds the original pattern that was added */
+ uint8_t *original_pat;
+ /* pattern id */
+ uint32_t id;
+
+ uint16_t offset;
+ uint16_t depth;
+
+ /* sid(s) for this pattern */
+ uint32_t sids_size;
+ SigIntId *sids;
+
+ /* only used at ctx init time, when this structure is part of a hash
+ * table. */
+ struct SCHSPattern_ *next;
+} SCHSPattern;
+
+typedef struct SCHSCtx_ {
+ /* hash used during ctx initialization */
+ SCHSPattern **init_hash;
+
+ /* pattern database and pattern arrays. */
+ void *pattern_db;
+
+ /* size of database, for accounting. */
+ size_t hs_db_size;
+} SCHSCtx;
+
+typedef struct SCHSThreadCtx_ {
+ /* Hyperscan scratch space region for this thread, capable of handling any
+ * database that has been compiled. */
+ void *scratch;
+
+ /* size of scratch space, for accounting. */
+ size_t scratch_size;
+} SCHSThreadCtx;
+
+typedef struct PatternDatabase_ {
+ SCHSPattern **parray;
+ hs_database_t *hs_db;
+ uint32_t pattern_cnt;
+
+ /* Reference count: number of MPM contexts using this pattern database. */
+ uint32_t ref_cnt;
+ /* Signals if the matcher has loaded/saved the pattern database to disk */
+ bool cached;
+} PatternDatabase;
+
+const char *HSErrorToStr(hs_error_t error_code);
+
+#endif /* BUILD_HYPERSCAN */
+#endif /* SURICATA_UTIL_MPM_HS_CORE__H */
#include "util-unittest-helper.h"
#include "util-memcmp.h"
#include "util-mpm-hs.h"
+#include "util-mpm-hs-core.h"
#include "util-memcpy.h"
#include "util-hash.h"
#include "util-hash-lookup3.h"
unsigned int pattern_cnt;
} SCHSCompileData;
-static SCHSCompileData *SCHSAllocCompileData(unsigned int pattern_cnt)
+static SCHSCompileData *CompileDataAlloc(unsigned int pattern_cnt)
{
SCHSCompileData *cd = SCCalloc(pattern_cnt, sizeof(SCHSCompileData));
if (cd == NULL) {
return NULL;
}
-static void SCHSFreeCompileData(SCHSCompileData *cd)
+static void CompileDataFree(SCHSCompileData *cd)
{
if (cd == NULL) {
return;
SCFree(cd);
}
-typedef struct PatternDatabase_ {
- SCHSPattern **parray;
- hs_database_t *hs_db;
- uint32_t pattern_cnt;
-
- /* Reference count: number of MPM contexts using this pattern database. */
- uint32_t ref_cnt;
-} PatternDatabase;
-
static uint32_t SCHSPatternHash(const SCHSPattern *p, uint32_t hash)
{
BUG_ON(p->original_pat == NULL);
return pd;
}
-/**
- * \brief Process the patterns added to the mpm, and create the internal tables.
- *
- * \param mpm_ctx Pointer to the mpm context.
- */
-int SCHSPreparePatterns(MpmCtx *mpm_ctx)
+static int HSCheckPatterns(MpmCtx *mpm_ctx, SCHSCtx *ctx)
{
- SCHSCtx *ctx = (SCHSCtx *)mpm_ctx->ctx;
-
if (mpm_ctx->pattern_cnt == 0 || ctx->init_hash == NULL) {
SCLogDebug("no patterns supplied to this mpm_ctx");
return 0;
}
+ return 1;
+}
- hs_error_t err;
- hs_compile_error_t *compile_err = NULL;
- SCHSCompileData *cd = NULL;
- PatternDatabase *pd = NULL;
-
- cd = SCHSAllocCompileData(mpm_ctx->pattern_cnt);
- if (cd == NULL) {
- goto error;
- }
-
- pd = PatternDatabaseAlloc(mpm_ctx->pattern_cnt);
- if (pd == NULL) {
- goto error;
- }
-
- /* populate the pattern array with the patterns in the hash */
+static void HSPatternArrayPopulate(SCHSCtx *ctx, PatternDatabase *pd)
+{
for (uint32_t i = 0, p = 0; i < INIT_HASH_SIZE; i++) {
- SCHSPattern *node = ctx->init_hash[i], *nnode = NULL;
+ SCHSPattern *node = ctx->init_hash[i];
+ SCHSPattern *nnode = NULL;
while (node != NULL) {
nnode = node->next;
node->next = NULL;
node = nnode;
}
}
+}
+static void HSPatternArrayInit(SCHSCtx *ctx, PatternDatabase *pd)
+{
+ HSPatternArrayPopulate(ctx, pd);
/* we no longer need the hash, so free its memory */
SCFree(ctx->init_hash);
ctx->init_hash = NULL;
+}
- /* Serialise whole database compilation as a relatively easy way to ensure
- * dedupe is safe. */
- SCMutexLock(&g_db_table_mutex);
-
- /* Init global pattern database hash if necessary. */
+static int HSGlobalPatternDatabaseInit(void)
+{
if (g_db_table == NULL) {
g_db_table = HashTableInit(INIT_DB_HASH_SIZE, PatternDatabaseHash,
PatternDatabaseCompare,
PatternDatabaseTableFree);
if (g_db_table == NULL) {
- SCMutexUnlock(&g_db_table_mutex);
- goto error;
+ return -1;
}
}
+ return 0;
+}
+static void HSLogCompileError(hs_compile_error_t *compile_err)
+{
+ SCLogError("failed to compile hyperscan database");
+ if (compile_err) {
+ SCLogError("compile error: %s", compile_err->message);
+ hs_free_compile_error(compile_err);
+ }
+}
+
+static int HSScratchAlloc(const hs_database_t *db)
+{
+ SCMutexLock(&g_scratch_proto_mutex);
+ hs_error_t err = hs_alloc_scratch(db, &g_scratch_proto);
+ SCMutexUnlock(&g_scratch_proto_mutex);
+ if (err != HS_SUCCESS) {
+ SCLogError("failed to allocate scratch");
+ return -1;
+ }
+ return 0;
+}
+
+static int PatternDatabaseGetSize(PatternDatabase *pd, size_t *db_size)
+{
+ hs_error_t err = hs_database_size(pd->hs_db, db_size);
+ if (err != HS_SUCCESS) {
+ SCLogError("failed to query database size: %s", HSErrorToStr(err));
+ return -1;
+ }
+ return 0;
+}
+
+static void SCHSCleanupOnError(PatternDatabase *pd, SCHSCompileData *cd)
+{
+ if (pd) {
+ PatternDatabaseFree(pd);
+ }
+ if (cd) {
+ CompileDataFree(cd);
+ }
+}
+
+static int CompileDataExtensionsInit(hs_expr_ext_t **ext, const SCHSPattern *p)
+{
+ if (p->flags & (MPM_PATTERN_FLAG_OFFSET | MPM_PATTERN_FLAG_DEPTH)) {
+ *ext = SCCalloc(1, sizeof(hs_expr_ext_t));
+ if ((*ext) == NULL) {
+ return -1;
+ }
+ if (p->flags & MPM_PATTERN_FLAG_OFFSET) {
+ (*ext)->flags |= HS_EXT_FLAG_MIN_OFFSET;
+ (*ext)->min_offset = p->offset + p->len;
+ }
+ if (p->flags & MPM_PATTERN_FLAG_DEPTH) {
+ (*ext)->flags |= HS_EXT_FLAG_MAX_OFFSET;
+ (*ext)->max_offset = p->offset + p->depth;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * \brief Initialize the pattern database - try to get existing pd
+ * from the global hash table, or load it from disk if caching is enabled.
+ *
+ * \param PatternDatabase* [in/out] Pointer to the pattern database to use.
+ * \param SCHSCompileData* [in] Pointer to the compile data.
+ * \retval 0 On success, negative value on failure.
+ */
+static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd)
+{
/* Check global hash table to see if we've seen this pattern database
* before, and reuse the Hyperscan database if so. */
- PatternDatabase *pd_cached = HashTableLookup(g_db_table, pd, 1);
-
+ PatternDatabase *pd_cached = HashTableLookup(g_db_table, *pd, 1);
if (pd_cached != NULL) {
SCLogDebug("Reusing cached database %p with %" PRIu32
" patterns (ref_cnt=%" PRIu32 ")",
pd_cached->hs_db, pd_cached->pattern_cnt,
pd_cached->ref_cnt);
pd_cached->ref_cnt++;
- ctx->pattern_db = pd_cached;
- SCMutexUnlock(&g_db_table_mutex);
- PatternDatabaseFree(pd);
- SCHSFreeCompileData(cd);
+ PatternDatabaseFree(*pd);
+ CompileDataFree(cd);
+ *pd = pd_cached;
return 0;
}
- BUG_ON(ctx->pattern_db != NULL); /* already built? */
+ return -1; // not cached
+}
+static int PatternDatabaseCompile(PatternDatabase *pd, SCHSCompileData *cd)
+{
for (uint32_t i = 0; i < pd->pattern_cnt; i++) {
const SCHSPattern *p = pd->parray[i];
-
cd->ids[i] = i;
cd->flags[i] = HS_FLAG_SINGLEMATCH;
if (p->flags & MPM_PATTERN_FLAG_NOCASE) {
cd->flags[i] |= HS_FLAG_CASELESS;
}
-
cd->expressions[i] = HSRenderPattern(p->original_pat, p->len);
+ if (CompileDataExtensionsInit(&cd->ext[i], p) != 0) {
+ return -1;
+ }
+ }
- if (p->flags & (MPM_PATTERN_FLAG_OFFSET | MPM_PATTERN_FLAG_DEPTH)) {
- cd->ext[i] = SCCalloc(1, sizeof(hs_expr_ext_t));
- if (cd->ext[i] == NULL) {
- SCMutexUnlock(&g_db_table_mutex);
- goto error;
- }
+ hs_compile_error_t *compile_err = NULL;
+ hs_error_t err = hs_compile_ext_multi((const char *const *)cd->expressions, cd->flags, cd->ids,
+ (const hs_expr_ext_t *const *)cd->ext, cd->pattern_cnt, HS_MODE_BLOCK, NULL, &pd->hs_db,
+ &compile_err);
+ if (err != HS_SUCCESS) {
+ HSLogCompileError(compile_err);
+ return -1;
+ }
- if (p->flags & MPM_PATTERN_FLAG_OFFSET) {
- cd->ext[i]->flags |= HS_EXT_FLAG_MIN_OFFSET;
- cd->ext[i]->min_offset = p->offset + p->len;
- }
- if (p->flags & MPM_PATTERN_FLAG_DEPTH) {
- cd->ext[i]->flags |= HS_EXT_FLAG_MAX_OFFSET;
- cd->ext[i]->max_offset = p->offset + p->depth;
- }
- }
+ if (HSScratchAlloc(pd->hs_db) != 0) {
+ return -1;
}
- BUG_ON(mpm_ctx->pattern_cnt == 0);
+ if (HashTableAdd(g_db_table, pd, 1) < 0) {
+ return -1;
+ }
+ pd->ref_cnt = 1;
+ return 0;
+}
- err = hs_compile_ext_multi((const char *const *)cd->expressions, cd->flags,
- cd->ids, (const hs_expr_ext_t *const *)cd->ext,
- cd->pattern_cnt, HS_MODE_BLOCK, NULL, &pd->hs_db,
- &compile_err);
+/**
+ * \brief Process the patterns added to the mpm, and create the internal tables.
+ *
+ * \param mpm_ctx Pointer to the mpm context.
+ */
+int SCHSPreparePatterns(MpmCtx *mpm_ctx)
+{
+ SCHSCtx *ctx = (SCHSCtx *)mpm_ctx->ctx;
- if (err != HS_SUCCESS) {
- SCLogError("failed to compile hyperscan database");
- if (compile_err) {
- SCLogError("compile error: %s", compile_err->message);
- }
- hs_free_compile_error(compile_err);
+ if (HSCheckPatterns(mpm_ctx, ctx) == 0) {
+ return 0;
+ }
+
+ SCHSCompileData *cd = CompileDataAlloc(mpm_ctx->pattern_cnt);
+ PatternDatabase *pd = PatternDatabaseAlloc(mpm_ctx->pattern_cnt);
+ if (cd == NULL || pd == NULL) {
+ goto error;
+ }
+
+ HSPatternArrayInit(ctx, pd);
+ /* Serialise whole database compilation as a relatively easy way to ensure
+ * dedupe is safe. */
+ SCMutexLock(&g_db_table_mutex);
+ if (HSGlobalPatternDatabaseInit() == -1) {
SCMutexUnlock(&g_db_table_mutex);
goto error;
}
- ctx->pattern_db = pd;
+ if (PatternDatabaseGetCached(&pd, cd) == 0 && pd != NULL) {
+ ctx->pattern_db = pd;
+ if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) {
+ SCMutexUnlock(&g_db_table_mutex);
+ goto error;
+ }
- SCMutexLock(&g_scratch_proto_mutex);
- err = hs_alloc_scratch(pd->hs_db, &g_scratch_proto);
- SCMutexUnlock(&g_scratch_proto_mutex);
- if (err != HS_SUCCESS) {
- SCLogError("failed to allocate scratch");
+ if (pd->ref_cnt == 1) {
+ // freshly allocated
+ mpm_ctx->memory_cnt++;
+ mpm_ctx->memory_size += ctx->hs_db_size;
+ }
+ SCMutexUnlock(&g_db_table_mutex);
+ return 0;
+ }
+
+ BUG_ON(ctx->pattern_db != NULL); /* already built? */
+ BUG_ON(mpm_ctx->pattern_cnt == 0);
+
+ if (PatternDatabaseCompile(pd, cd) != 0) {
SCMutexUnlock(&g_db_table_mutex);
goto error;
}
- err = hs_database_size(pd->hs_db, &ctx->hs_db_size);
- if (err != HS_SUCCESS) {
- SCLogError("failed to query database size");
+ ctx->pattern_db = pd;
+ if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) {
SCMutexUnlock(&g_db_table_mutex);
goto error;
}
mpm_ctx->memory_cnt++;
mpm_ctx->memory_size += ctx->hs_db_size;
- SCLogDebug("Built %" PRIu32 " patterns into a database of size %" PRIuMAX
- " bytes", mpm_ctx->pattern_cnt, (uintmax_t)ctx->hs_db_size);
-
- /* Cache this database globally for later. */
- pd->ref_cnt = 1;
- int r = HashTableAdd(g_db_table, pd, 1);
SCMutexUnlock(&g_db_table_mutex);
- if (r < 0)
- goto error;
-
- SCHSFreeCompileData(cd);
+ CompileDataFree(cd);
return 0;
error:
- if (pd) {
- PatternDatabaseFree(pd);
- }
- if (cd) {
- SCHSFreeCompileData(cd);
- }
+ SCHSCleanupOnError(pd, cd);
return -1;
}