--- /dev/null
+#
+# a lot of noise to find sqlite
+#
+
+option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF)
+
+if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC)
+find_package(PkgConfig QUIET)
+
+# first check for sqlite on the system
+pkg_check_modules(SQLITE3 sqlite3)
+endif()
+
+if (NOT SQLITE3_FOUND)
+ message(STATUS "looking for sqlite3 in source tree")
+ # look in the source tree
+ if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND
+ EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
+ message(STATUS " found sqlite3 in source tree")
+ set(SQLITE3_FOUND TRUE)
+ set(SQLITE3_BUILD_SOURCE TRUE)
+ set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
+ set(SQLITE3_LDFLAGS sqlite3_static)
+ else()
+ message(FATAL_ERROR " no sqlite3 in source tree")
+ endif()
+endif()
+
+# now do version checks
+if (SQLITE3_FOUND)
+ list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}")
+ CHECK_C_SOURCE_COMPILES("#include <sqlite3.h>\n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK)
+ if (NOT SQLITE_VERSION_OK)
+ message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version")
+ endif()
+if (NOT SQLITE3_BUILD_SOURCE)
+ set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+ list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS})
+ CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2)
+ list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}")
+ list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS})
+else()
+ if (NOT TARGET sqlite3_static)
+ # build sqlite as a static lib to compile into our test programs
+ add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
+ if (NOT WIN32)
+ set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
+ endif()
+ endif()
+endif()
+endif()
+
+# that's enough about sqlite
--- /dev/null
+find_package(Threads)
+
+# remove some warnings
+if(CMAKE_CXX_FLAGS MATCHES "-Wmissing-declarations" )
+ string(REPLACE "-Wmissing-declarations" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+endif()
+
+include_directories(${PROJECT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
+include_directories(${PROJECT_SOURCE_DIR}/util)
+
+# add any subdir with a cmake file
+file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
+foreach(e ${dirents})
+ if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND
+ EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt)
+ add_subdirectory(${e})
+ endif ()
+endforeach ()
--- /dev/null
+include (${CMAKE_MODULE_PATH}/sqlite3.cmake)
+
+if (NOT XCODE)
+ include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS})
+else()
+ # cmake doesn't think Xcode supports isystem
+ set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}")
+endif()
+
+CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO)
+CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET)
+set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()")
+
+# only set these after all tests are done
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
+
+
+SET(hsbench_SOURCES
+ common.h
+ data_corpus.cpp
+ data_corpus.h
+ engine_hyperscan.cpp
+ engine_hyperscan.h
+ heapstats.cpp
+ heapstats.h
+ huge.cpp
+ huge.h
+ main.cpp
+ thread_barrier.h
+ timer.h
+)
+
+add_executable(hsbench ${hsbench_SOURCES})
+target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
+ ${CMAKE_THREAD_LIBS_INIT})
--- /dev/null
+Hyperscan Benchmarker: hsbench
+==============================
+
+The `hsbench` tool provides an easy way to measure Hyperscan's performance
+for a particular set of patterns and corpus of data to be scanned.
+
+Documentation describing its operation is available in the Tools section of the
+[Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/).
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <string>
+
+enum class ScanMode { BLOCK, STREAMING, VECTORED };
+
+extern bool echo_matches;
+extern bool saveDatabases;
+extern bool loadDatabases;
+extern std::string serializePath;
+extern unsigned int somPrecisionMode;
+
+#endif // COMMON_H
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "data_corpus.h"
+
+#include "util/container.h"
+#include "ue2common.h"
+
+#include <cassert>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <sqlite3.h>
+
+using namespace std;
+using namespace ue2;
+
+static
+void readRow(sqlite3_stmt *statement, vector<DataBlock> &blocks,
+ map<unsigned int, unsigned int> &stream_indices) {
+ unsigned int id = sqlite3_column_int(statement, 0);
+ unsigned int stream_id = sqlite3_column_int(statement, 1);
+ const char *blob = (const char *)sqlite3_column_blob(statement, 2);
+ unsigned int bytes = sqlite3_column_bytes(statement, 2);
+
+ if (!contains(stream_indices, stream_id)) {
+ unsigned int internal_stream_index = stream_indices.size();
+ stream_indices[stream_id] = internal_stream_index;
+ }
+ auto internal_stream_index = stream_indices[stream_id];
+
+ assert(blob || bytes > 0);
+ blocks.emplace_back(id, stream_id, internal_stream_index,
+ string(blob, blob + bytes));
+}
+
+vector<DataBlock> readCorpus(const string &filename) {
+ int status;
+ sqlite3 *db = nullptr;
+
+ status = sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READONLY,
+ nullptr);
+
+ assert(db);
+ if (status != SQLITE_OK) {
+ ostringstream err;
+ err << "Unable to open database '" << filename << "': "
+ << sqlite3_errmsg(db);
+ status = sqlite3_close(db);
+ assert(status == SQLITE_OK);
+ throw DataCorpusError(err.str());
+ }
+
+ static const string query("SELECT id, stream_id, data "
+ "FROM chunk ORDER BY id;");
+
+ sqlite3_stmt *statement = nullptr;
+
+ status = sqlite3_prepare_v2(db, query.c_str(), query.size(), &statement,
+ nullptr);
+ if (status != SQLITE_OK) {
+ status = sqlite3_finalize(statement);
+ assert(status == SQLITE_OK);
+ status = sqlite3_close(db);
+ assert(status == SQLITE_OK);
+
+ ostringstream oss;
+ oss << "Query failed: " << query;
+ throw DataCorpusError(oss.str());
+ }
+
+ vector<DataBlock> blocks;
+ map<unsigned int, unsigned int> stream_indices;
+
+ status = sqlite3_step(statement);
+ while (status == SQLITE_ROW) {
+ readRow(statement, blocks, stream_indices);
+ status = sqlite3_step(statement);
+ }
+
+ if (status != SQLITE_DONE) {
+ ostringstream oss;
+ oss << "Error retrieving blocks from corpus: "
+ << sqlite3_errstr(status);
+
+ status = sqlite3_finalize(statement);
+ assert(status == SQLITE_OK);
+ status = sqlite3_close(db);
+ assert(status == SQLITE_OK);
+
+ throw DataCorpusError(oss.str());
+ }
+
+ status = sqlite3_finalize(statement);
+ assert(status == SQLITE_OK);
+ status = sqlite3_close(db);
+ assert(status == SQLITE_OK);
+
+ if (blocks.empty()) {
+ throw DataCorpusError("Database contains no blocks.");
+ }
+
+ return blocks;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DATACORPUS_H
+#define DATACORPUS_H
+
+#include <vector>
+#include <string>
+
+class DataBlock {
+public:
+ DataBlock(unsigned int in_id, unsigned int in_stream,
+ unsigned int int_stream_index_in, std::string in_data)
+ : id(in_id), stream_id(in_stream),
+ internal_stream_index(int_stream_index_in),
+ payload(std::move(in_data)) {}
+
+ unsigned int id; // unique block identifier
+ unsigned int stream_id; // unique stream identifier (from corpus file)
+ unsigned int internal_stream_index; /* dense index for this stream
+ * (allocated by hsbench) */
+ std::string payload; // actual block payload
+};
+
+/** Exception thrown if an error occurs. */
+class DataCorpusError {
+public:
+ explicit DataCorpusError(std::string msg_in) : msg(std::move(msg_in)) {}
+ std::string msg;
+};
+
+/**
+ * Interface to a corpus database. Any error will produce a DataCorpusError
+ * and should be considered fatal.
+ */
+std::vector<DataBlock> readCorpus(const std::string &filename);
+
+#endif // DATACORPUS_H
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "ExpressionParser.h"
+#include "common.h"
+#include "engine_hyperscan.h"
+#include "expressions.h"
+#include "heapstats.h"
+#include "huge.h"
+#include "timer.h"
+
+#include "crc32.h"
+#include "database.h"
+#include "hs_compile.h"
+#include "hs_internal.h"
+#include "hs_runtime.h"
+#include "util/database_util.h"
+#include "util/make_unique.h"
+
+#include <cassert>
+#include <cstring>
+#include <iomanip>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace std;
+
+EngineContext::EngineContext(const hs_database_t *db) {
+ hs_alloc_scratch(db, &scratch);
+ assert(scratch);
+}
+
+EngineContext::~EngineContext() {
+ hs_free_scratch(scratch);
+}
+
+namespace /* anonymous */ {
+
+/** Scan context structure passed to the onMatch callback function. */
+struct ScanContext {
+ ScanContext(unsigned id_in, ResultEntry &result_in,
+ const EngineStream *stream_in)
+ : id(id_in), result(result_in), stream(stream_in) {}
+ unsigned id;
+ ResultEntry &result;
+ const EngineStream *stream; // nullptr except in streaming mode.
+};
+
+} // namespace
+
+/**
+ * Callback function called for every match that Hyperscan produces, used when
+ * "echo matches" is off.
+ */
+static
+int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
+ void *ctx) {
+ ScanContext *sc = static_cast<ScanContext *>(ctx);
+ assert(sc);
+ sc->result.matches++;
+
+ return 0;
+}
+
+/**
+ * Callback function called for every match that Hyperscan produces when "echo
+ * matches" is enabled.
+ */
+static
+int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
+ unsigned int, void *ctx) {
+ ScanContext *sc = static_cast<ScanContext *>(ctx);
+ assert(sc);
+ sc->result.matches++;
+
+ if (sc->stream) {
+ printf("Match @%u:%u:%llu for %u\n", sc->stream->sn, sc->id, to, id);
+ } else {
+ printf("Match @%u:%llu for %u\n", sc->id, to, id);
+ }
+
+ return 0;
+}
+
+EngineHyperscan::EngineHyperscan(hs_database_t *db_in) : db(db_in) {
+ assert(db);
+}
+
+EngineHyperscan::~EngineHyperscan() {
+ release_huge(db);
+}
+
+unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
+ return ue2::make_unique<EngineContext>(db);
+}
+
+void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
+ ResultEntry &result, EngineContext &ctx) const {
+ assert(data);
+
+ ScanContext sc(id, result, nullptr);
+ auto callback = echo_matches ? onMatchEcho : onMatch;
+ hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
+
+ if (rv != HS_SUCCESS) {
+ printf("Fatal error: hs_scan returned error %d\n", rv);
+ abort();
+ }
+}
+
+void EngineHyperscan::scan_vectored(const char *const *data,
+ const unsigned int *len, unsigned int count,
+ unsigned streamId, ResultEntry &result,
+ EngineContext &ctx) const {
+ assert(data);
+ assert(len);
+
+ ScanContext sc(streamId, result, nullptr);
+ auto callback = echo_matches ? onMatchEcho : onMatch;
+ hs_error_t rv =
+ hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
+
+ if (rv != HS_SUCCESS) {
+ printf("Fatal error: hs_scan_vector returned error %d\n", rv);
+ abort();
+ }
+}
+
+unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
+ unsigned streamId) const {
+ auto stream = ue2::make_unique<EngineStream>();
+ stream->ctx = &ctx;
+
+ hs_open_stream(db, 0, &stream->id);
+ if (!stream->id) {
+ // an error occurred, propagate to caller
+ return nullptr;
+ }
+ stream->sn = streamId;
+ return stream;
+}
+
+void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
+ ResultEntry &result) const {
+ assert(stream);
+
+ auto &s = static_cast<EngineStream &>(*stream);
+ EngineContext &ctx = *s.ctx;
+
+ ScanContext sc(0, result, &s);
+ auto callback = echo_matches ? onMatchEcho : onMatch;
+
+ assert(s.id);
+ hs_close_stream(s.id, ctx.scratch, callback, &sc);
+ s.id = nullptr;
+}
+
+void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
+ unsigned len, unsigned id,
+ ResultEntry &result) const {
+ assert(data);
+
+ auto &s = static_cast<EngineStream &>(stream);
+ EngineContext &ctx = *s.ctx;
+
+ ScanContext sc(id, result, &s);
+ auto callback = echo_matches ? onMatchEcho : onMatch;
+ hs_error_t rv =
+ hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
+
+ if (rv != HS_SUCCESS) {
+ printf("Fatal error: hs_scan_stream returned error %d\n", rv);
+ abort();
+ }
+}
+
+static
+unsigned makeModeFlags(ScanMode scan_mode) {
+ switch (scan_mode) {
+ case ScanMode::BLOCK:
+ return HS_MODE_BLOCK;
+ case ScanMode::STREAMING:
+ return HS_MODE_STREAM;
+ case ScanMode::VECTORED:
+ return HS_MODE_VECTORED;
+ }
+ assert(0);
+ return HS_MODE_STREAM;
+}
+
+/**
+ * Hash the settings used to compile a database, returning a string that can be
+ * used as a filename.
+ */
+static
+string dbSettingsHash(const string &filename, u32 mode) {
+ ostringstream info_oss;
+
+ info_oss << filename.c_str() << ' ';
+ info_oss << mode << ' ';
+
+ string info = info_oss.str();
+
+ u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size());
+
+ // return STL string with printable version of digest
+ ostringstream oss;
+ oss << hex << setw(8) << setfill('0') << crc << dec;
+
+ return oss.str();
+}
+
+static
+string dbFilename(const std::string &name, unsigned mode) {
+ ostringstream oss;
+ oss << serializePath << '/' << dbSettingsHash(name, mode) << ".db";
+ return oss.str();
+}
+
+std::unique_ptr<EngineHyperscan>
+buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
+ const std::string &name, UNUSED const ue2::Grey &grey) {
+ if (expressions.empty()) {
+ assert(0);
+ return nullptr;
+ }
+
+ long double compileSecs = 0.0;
+ size_t compiledSize = 0.0;
+ size_t streamSize = 0;
+ size_t scratchSize = 0;
+ unsigned int peakMemorySize = 0;
+ unsigned int crc = 0;
+ std::string db_info;
+
+ unsigned int mode = makeModeFlags(scan_mode);
+
+ hs_database_t *db;
+ hs_error_t err;
+
+ if (loadDatabases) {
+ db = loadDatabase(dbFilename(name, mode).c_str());
+ if (!db) {
+ return nullptr;
+ }
+ } else {
+ const unsigned int count = expressions.size();
+
+ vector<string> exprs;
+ vector<unsigned int> flags, ids;
+ vector<hs_expr_ext> ext;
+
+ for (const auto &m : expressions) {
+ string expr;
+ unsigned int f = 0;
+ hs_expr_ext extparam;
+ extparam.flags = 0;
+ if (!readExpression(m.second, expr, &f, &extparam)) {
+ printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
+ m.first);
+ return nullptr;
+ }
+
+ exprs.push_back(expr);
+ ids.push_back(m.first);
+ flags.push_back(f);
+ ext.push_back(extparam);
+ }
+
+ unsigned full_mode = mode;
+ if (mode == HS_MODE_STREAM) {
+ full_mode |= somPrecisionMode;
+ }
+
+ // Our compiler takes an array of plain ol' C strings.
+ vector<const char *> patterns(count);
+ for (unsigned int i = 0; i < count; i++) {
+ patterns[i] = exprs[i].c_str();
+ }
+
+ // Extended parameters are passed as pointers to hs_expr_ext structures.
+ vector<const hs_expr_ext *> ext_ptr(count);
+ for (unsigned int i = 0; i < count; i++) {
+ ext_ptr[i] = &ext[i];
+ }
+
+ Timer timer;
+ timer.start();
+
+ hs_compile_error_t *compile_err;
+
+#ifndef RELEASE_BUILD
+ err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
+ ext_ptr.data(), count, full_mode, nullptr,
+ &db, &compile_err, grey);
+#else
+ err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
+ ext_ptr.data(), count, full_mode, nullptr,
+ &db, &compile_err);
+#endif
+
+ timer.complete();
+ compileSecs = timer.seconds();
+ peakMemorySize = getPeakHeap();
+
+ if (err == HS_COMPILER_ERROR) {
+ if (compile_err->expression >= 0) {
+ printf("Compile error for signature #%u: %s\n",
+ compile_err->expression, compile_err->message);
+ } else {
+ printf("Compile error: %s\n", compile_err->message);
+ }
+ hs_free_compile_error(compile_err);
+ return nullptr;
+ }
+ }
+
+ // copy the db into huge pages (where available) to reduce TLB pressure
+ db = get_huge(db);
+ if (!db) {
+ return nullptr;
+ }
+
+ err = hs_database_size(db, &compiledSize);
+ if (err != HS_SUCCESS) {
+ return nullptr;
+ }
+ assert(compiledSize > 0);
+
+ crc = db->crc32;
+
+ if (saveDatabases) {
+ saveDatabase(db, dbFilename(name, mode).c_str());
+ }
+
+ if (mode & HS_MODE_STREAM) {
+ err = hs_stream_size(db, &streamSize);
+ if (err != HS_SUCCESS) {
+ return nullptr;
+ }
+ } else {
+ streamSize = 0;
+ }
+
+ char *info;
+ err = hs_database_info(db, &info);
+ if (err != HS_SUCCESS) {
+ return nullptr;
+ } else {
+ db_info = string(info);
+ free(info);
+ }
+
+ // Allocate scratch temporarily to find its size: this is a good test
+ // anyway.
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ if (err != HS_SUCCESS) {
+ return nullptr;
+ }
+
+ err = hs_scratch_size(scratch, &scratchSize);
+ if (err != HS_SUCCESS) {
+ return nullptr;
+ }
+ hs_free_scratch(scratch);
+
+ // Output summary information.
+ printf("Signatures: %s\n", name.c_str());
+ printf("Hyperscan info: %s\n", db_info.c_str());
+ printf("Expression count: %'zu\n", expressions.size());
+ printf("Bytecode size: %'zu bytes\n", compiledSize);
+ printf("Database CRC: 0x%x\n", crc);
+ if (mode & HS_MODE_STREAM) {
+ printf("Stream state size: %'zu bytes\n", streamSize);
+ }
+ printf("Scratch size: %'zu bytes\n", scratchSize);
+ printf("Compile time: %'0.3Lf seconds\n", compileSecs);
+ printf("Peak heap usage: %'u bytes\n", peakMemorySize);
+
+ return ue2::make_unique<EngineHyperscan>(db);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ENGINEHYPERSCAN_H
+#define ENGINEHYPERSCAN_H
+
+#include "expressions.h"
+#include "common.h"
+#include "hs_runtime.h"
+
+#include <memory>
+
+/** Structure for the result of a single complete scan. */
+struct ResultEntry {
+ double seconds = 0; //!< Time taken for scan.
+ unsigned int matches = 0; //!< Count of matches found.
+};
+
+/** Engine context which is allocated on a per-thread basis. */
+class EngineContext {
+public:
+ explicit EngineContext(const hs_database_t *db);
+ ~EngineContext();
+
+ hs_scratch_t *scratch = nullptr;
+};
+
+/** Streaming mode scans have persistent stream state associated with them. */
+class EngineStream {
+public:
+ hs_stream_t *id;
+ unsigned int sn;
+ EngineContext *ctx;
+};
+
+/** Hyperscan Engine for scanning data. */
+class EngineHyperscan {
+public:
+ explicit EngineHyperscan(hs_database_t *db);
+ ~EngineHyperscan();
+
+ std::unique_ptr<EngineContext> makeContext() const;
+
+ void scan(const char *data, unsigned int len, unsigned int id,
+ ResultEntry &result, EngineContext &ctx) const;
+
+ void scan_vectored(const char *const *data, const unsigned int *len,
+ unsigned int count, unsigned int streamId,
+ ResultEntry &result, EngineContext &ctx) const;
+
+ std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
+ unsigned id) const;
+
+ void streamClose(std::unique_ptr<EngineStream> stream,
+ ResultEntry &result) const;
+
+ void streamScan(EngineStream &stream, const char *data, unsigned int len,
+ unsigned int id, ResultEntry &result) const;
+
+private:
+ hs_database_t *db;
+};
+
+namespace ue2 {
+struct Grey;
+}
+
+std::unique_ptr<EngineHyperscan>
+buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
+ const std::string &name, const ue2::Grey &grey);
+
+#endif // ENGINEHYPERSCAN_H
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Peak heap usage code.
+ *
+ * At present, we only have an implementation for modern glibc systems, using
+ * the malloc_info() call. We return zero elsewhere.
+ */
+
+#include "config.h"
+
+#include "heapstats.h"
+
+#if defined HAVE_MALLOC_INFO
+
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <malloc.h>
+
+size_t getPeakHeap(void) {
+ FILE *tmpf = tmpfile();
+ if (!tmpf) {
+ return 0;
+ }
+
+ int rv = malloc_info(0, tmpf);
+ if (rv != 0) {
+ fclose(tmpf);
+ return 0;
+ }
+
+ rewind(tmpf);
+
+ // We don't want to depend on a real XML parser. This is ugly and brittle
+ // and hopefully good enough for the time being. We look for the last
+ // system tag with type max, which should be the malloc-wide one.
+
+ static const char begin[] = "<system type=\"max\" size=\"";
+ const size_t begin_len = strlen(begin);
+
+ char *line = nullptr;
+ size_t len = 0, maxheap = 0;
+ ssize_t read;
+
+ while ((read = getline(&line, &len, tmpf)) != -1) {
+ if (strncmp(line, begin, begin_len) == 0) {
+ errno = 0;
+ maxheap = (size_t)strtoull(line + begin_len, nullptr, 10);
+ if (errno != 0) {
+ goto finish;
+ }
+ }
+ }
+
+finish:
+ free(line);
+ fclose(tmpf);
+ return maxheap;
+}
+
+#elif defined __linux
+
+#include <cstdlib>
+#include <fstream>
+#include <sstream>
+#include <string>
+
+#include <sys/types.h>
+#include <unistd.h>
+
+using namespace std;
+
+size_t getPeakHeap(void) {
+ // Modern Linux kernels write a 'VmPeak' value into /proc/$PID/status. This
+ // is a reasonable approximation, though it likely includes shared libs and
+ // the like as well...
+ ostringstream path;
+ path << "/proc/" << getpid() << "/status";
+
+ ifstream f(path.str().c_str());
+ if (!f.good()) {
+ return 0;
+ }
+
+ const string vmpeak("VmPeak:");
+
+ string line;
+ while (getline(f, line)) {
+ istringstream iss(line, istringstream::in);
+ string word;
+ iss >> word;
+ if (word != vmpeak) {
+ continue;
+ }
+
+ // Skip spaces
+ while (iss.good() && !isdigit(iss.peek())) {
+ iss.ignore();
+ }
+
+ size_t num = 0;
+ iss >> num;
+ return num * 1024;
+ }
+
+ f.close();
+ return 0;
+}
+
+#else
+
+// Stub.
+size_t getPeakHeap(void) {
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HEAPSTATS_H
+#define HEAPSTATS_H
+
+#include <cstddef> // for size_t
+
+size_t getPeakHeap(void);
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "hs.h"
+#include "ue2common.h"
+
+#include "common.h"
+#include "huge.h"
+
+#ifndef _WIN32
+#include <cstdio>
+#include <cstring>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if defined(HAVE_SHMGET)
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
+UNUSED static int hsdb_shmid;
+
+using namespace std;
+
+long gethugepagesize(void);
+
+hs_database_t *get_huge(hs_database_t *db) {
+#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
+ /* move the database to huge pages where possible, but fail politely */
+ hs_error_t err;
+ size_t len;
+ char *bytes;
+
+ long hpage_size = gethugepagesize();
+ if (hpage_size < 0) {
+ printf("Couldn't determine huge page size\n");
+ hsdb_shmid = -1;
+ return db;
+ }
+
+ err = hs_serialize_database(db, &bytes, &len);
+ if (err != HS_SUCCESS) {
+ printf("Failed to serialize database for copy: %d\n", err);
+ // this is weird - don't fail gracefully this time
+ return nullptr;
+ }
+
+ size_t size;
+ err = hs_serialized_database_size(bytes, len, &size);
+ if (err != HS_SUCCESS) {
+ printf("Failed to get database size: %d\n", err);
+ // this is weird - don't fail gracefully this time
+ return nullptr;
+ }
+
+ void *shmaddr;
+ if ((hsdb_shmid = shmget(IPC_PRIVATE, ROUNDUP_N(size, gethugepagesize()),
+ SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
+ // This could fail if the user doesn't have permission to shmget(),
+ // which is OK.
+ goto fini;
+ }
+
+ shmaddr = shmat(hsdb_shmid, nullptr, SHM_RND);
+ if (shmaddr == (char *)-1) {
+ perror("Shared memory attach failure");
+ goto fini;
+ }
+
+ // Mark this segment to be destroyed after this process detaches.
+ shmctl(hsdb_shmid, IPC_RMID, nullptr);
+
+ err = hs_deserialize_database_at(bytes, len, (hs_database_t *)shmaddr);
+ if (err != HS_SUCCESS) {
+ printf("Failed to deserialize database into shm: %d\n", err);
+ shmdt((const void *)shmaddr);
+ goto fini;
+ }
+
+ free(bytes);
+ hs_free_database(db);
+ return (hs_database_t *)shmaddr;
+
+fini:
+ free(bytes);
+ hsdb_shmid = -1;
+ return db;
+#else
+ return db;
+#endif
+}
+
+void release_huge(hs_database_t *db) {
+#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
+ if (hsdb_shmid != -1) {
+ if (shmdt((const void *)db) != 0) {
+ perror("Detach failure");
+ }
+ } else {
+ // fallback
+ hs_free_database(db);
+ }
+#else
+ hs_free_database(db);
+#endif
+}
+
+#define BUF_SIZE 4096
+static long read_meminfo(const char *tag) {
+ int fd;
+ char buf[BUF_SIZE];
+ int len;
+ char *p, *q;
+ long val;
+
+ fd = open("/proc/meminfo", O_RDONLY);
+ if (fd < 0) {
+ perror("Couldn't open /proc/meminfo");
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ perror("Error reading /proc/meminfo");
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ printf("/proc/meminfo is too large\n");
+ return -1;
+ }
+ buf[len] = '\0';
+
+ p = strstr(buf, tag);
+ if (!p) {
+ return -1;
+ }
+
+ p += strlen(tag);
+ val = strtol(p, &q, 0);
+ if (!isspace(*q)) {
+ printf("Couldn't parse /proc/meminfo value\n");
+ return -1;
+ }
+
+ return val;
+}
+
+long gethugepagesize(void) {
+ long hpage_size;
+ int hpage_kb;
+
+ hpage_kb = read_meminfo("Hugepagesize:");
+ if (hpage_kb < 0) {
+ hpage_size = -1;
+ } else {
+ /* convert from kb to bytes */
+ hpage_size = 1024 * hpage_kb;
+ }
+
+ return hpage_size;
+}
+
+#else
+
+/* No huge page support on WIN32. */
+
+hs_database_t *get_huge(hs_database_t *db) { return db; }
+
+void release_huge(hs_database_t *db) { hs_free_database(db); }
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HUGE_H
+#define HUGE_H
+
+#include "hs.h"
+
+hs_database_t *get_huge(hs_database_t *db);
+void release_huge(hs_database_t *db);
+
+#endif /* HUGE_H */
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "common.h"
+#include "data_corpus.h"
+#include "engine_hyperscan.h"
+#include "expressions.h"
+#include "thread_barrier.h"
+#include "timer.h"
+#include "util/expression_path.h"
+#include "util/string_util.h"
+
+#include "grey.h"
+#include "hs.h"
+#include "ue2common.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <clocale>
+#include <cmath>
+#include <cstdlib>
+#include <fstream>
+#include <map>
+#include <numeric>
+#include <sstream>
+#include <set>
+#include <thread>
+
+#include <getopt.h>
+#ifndef _WIN32
+#include <pthread.h>
+#include <unistd.h>
+#endif
+
+#include <boost/core/noncopyable.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using namespace ue2;
+using boost::adaptors::map_keys;
+
+// Globals common to all files.
+bool echo_matches = false;
+bool saveDatabases = false;
+bool loadDatabases = false;
+string serializePath("");
+unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
+
+namespace /* anonymous */ {
+
+// Globals local to this file.
+bool display_per_scan = false;
+ScanMode scan_mode = ScanMode::STREAMING;
+unsigned repeats = 20;
+string exprPath("");
+string corpusFile("");
+vector<unsigned int> threadCores;
+Timer totalTimer;
+double totalSecs = 0;
+
+typedef void (*thread_func_t)(void *context);
+
+class ThreadContext : boost::noncopyable {
+public:
+ ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
+ thread_barrier &tb_in, thread_func_t function_in,
+ vector<DataBlock> corpus_data_in)
+ : num(num_in), results(repeats), engine(db_in),
+ enginectx(db_in.makeContext()), corpus_data(move(corpus_data_in)),
+ tb(tb_in), function(function_in) {}
+
+ // Start the thread.
+ bool start(int cpu) {
+ thr = thread(function, this);
+
+ // affine if it's asked for
+ if (cpu >= 0) {
+ return affine(cpu);
+ }
+ return true;
+ }
+
+ // Wait for the thread to exit.
+ void join() {
+ thr.join();
+ }
+
+ // Serialise all threads on a global barrier.
+ void barrier() {
+ tb.wait();
+ }
+
+ // Apply processor affinity (if available) to this thread.
+ bool affine(UNUSED int cpu) {
+#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ assert(cpu >= 0 && cpu < CPU_SETSIZE);
+
+ // The 'clang' compiler complains about an unused result here, so we
+ // silence it.
+ (void)CPU_SET(cpu, &cpuset);
+
+ int rv = pthread_setaffinity_np(thr.native_handle(), sizeof(cpuset),
+ &cpuset);
+ return (rv == 0);
+#endif
+ return false; // not available
+ }
+
+ unsigned num;
+ Timer timer;
+ vector<ResultEntry> results;
+ const EngineHyperscan &engine;
+ unique_ptr<EngineContext> enginectx;
+ vector<DataBlock> corpus_data;
+
+protected:
+ thread_barrier &tb; // shared barrier for time sync
+ thread_func_t function;
+ thread thr;
+};
+
+/** Display usage information, with an optional error. */
+static
+void usage(const char *error) {
+ printf("Usage: hsbench [OPTIONS...]\n\n");
+ printf("Options:\n\n");
+ printf(" -h Display help and exit.\n");
+ printf(" -G OVERRIDES Overrides for the grey box.\n");
+ printf(" -e PATH Path to expression directory.\n");
+ printf(" -s FILE Signature file to use.\n");
+ printf(" -z NUM Signature ID to use.\n");
+ printf(" -c FILE File to use as corpus.\n");
+ printf(" -n NUMBER Repeat scan NUMBER times (default 20).\n");
+ printf(" -N Benchmark in block mode"
+ " (default: streaming).\n");
+ printf(" -V Benchmark in vectored mode"
+ " (default: streaming).\n");
+ printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
+ printf(" -i DIR Don't compile, load from files in DIR"
+ " instead.\n");
+ printf(" -w DIR After compiling, save to files in DIR.\n");
+ printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n");
+ printf("\n");
+ printf(" --per-scan Display per-scan Mbit/sec results.\n");
+ printf(" --echo-matches Display all matches that occur during scan.\n");
+ printf("\n\n");
+
+ if (error) {
+ printf("Error: %s\n", error);
+ }
+}
+
+/** Wraps up a name and the set of signature IDs it refers to. */
+struct BenchmarkSigs {
+ BenchmarkSigs(string name_in, SignatureSet sigs_in)
+ : name(move(name_in)), sigs(move(sigs_in)) {}
+ string name;
+ SignatureSet sigs;
+};
+
+/** Process command-line arguments. Prints usage and exits on error. */
+static
+void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
+ UNUSED Grey &grey) {
+ const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:";
+ int in_sigfile = 0;
+ int do_per_scan = 0;
+ int do_echo_matches = 0;
+ vector<string> sigFiles;
+
+ static struct option longopts[] = {
+ {"per-scan", 0, &do_per_scan, 1},
+ {"echo-matches", 0, &do_echo_matches, 1},
+ {nullptr, 0, nullptr, 0}
+ };
+
+ for (;;) {
+ int c = getopt_long(argc, argv, options, longopts, nullptr);
+ if (c < 0) {
+ break;
+ }
+ switch (c) {
+ case 'c':
+ corpusFile.assign(optarg);
+ break;
+ case 'd': {
+ unsigned dist;
+ if (!fromString(optarg, dist)) {
+ usage("Must provide an integer argument to '-d' flag");
+ exit(1);
+ }
+ switch (dist) {
+ case 2:
+ somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL;
+ break;
+ case 4:
+ somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM;
+ break;
+ case 8:
+ somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
+ break;
+ default:
+ usage("SOM precision must be 2, 4 or 8");
+ exit(1);
+ }
+ break;
+ }
+ case 'e':
+ exprPath.assign(optarg);
+ break;
+#ifndef RELEASE_BUILD
+ case 'G':
+ applyGreyOverrides(&grey, string(optarg));
+ break;
+#endif
+ case 'h':
+ usage(nullptr);
+ exit(0);
+ break;
+ case 'n':
+ if (!fromString(optarg, repeats) || repeats == 0) {
+ usage("Couldn't parse argument to -n flag, should be"
+ " a positive integer.");
+ exit(1);
+ }
+ break;
+ case 's':
+ in_sigfile = 2;
+ break;
+ case 'N':
+ scan_mode = ScanMode::BLOCK;
+ break;
+ case 'V':
+ scan_mode = ScanMode::VECTORED;
+ break;
+ case 'T':
+ if (!strToList(optarg, threadCores)) {
+ usage("Couldn't parse argument to -T flag, should be"
+ " a list of positive integers.");
+ exit(1);
+ }
+ break;
+ case 'z': {
+ unsigned int sinumber;
+ if (!fromString(optarg, sinumber)) {
+ usage("Argument to '-z' flag must be an integer");
+ exit(1);
+ }
+ SignatureSet sigs = {sinumber};
+ sigSets.emplace_back(string("-z ") + optarg, sigs);
+ break;
+ }
+ case 'i':
+ loadDatabases = true;
+ serializePath = optarg;
+ break;
+ case 'w':
+ saveDatabases = true;
+ serializePath = optarg;
+ break;
+ case 1:
+ if (in_sigfile) {
+ sigFiles.push_back(optarg);
+ in_sigfile = 2;
+ break;
+ }
+ case 0:
+ break;
+ default:
+ usage("Unrecognised command line argument.");
+ exit(1);
+ }
+
+ if (in_sigfile) {
+ in_sigfile--;
+ }
+ }
+
+ if (do_echo_matches) {
+ echo_matches = true;
+ }
+ if (do_per_scan) {
+ display_per_scan = true;
+ }
+
+ if (exprPath.empty() && !sigFiles.empty()) {
+ /* attempt to infer an expression directory */
+ auto si = sigFiles.begin();
+ exprPath = inferExpressionPath(*si);
+ for (++si; si != sigFiles.end(); ++si) {
+ if (exprPath != inferExpressionPath(*si)) {
+ usage("Unable to infer consistent expression directory");
+ exit(1);
+ }
+ }
+ }
+
+ // Must have a valid expression path
+ if (exprPath.empty()) {
+ usage("Must specify an expression path with the -e option.");
+ exit(1);
+ }
+
+ // Must have valid database to scan
+ if (corpusFile.empty()) {
+ usage("Must specify a corpus file with the -c option.");
+ exit(1);
+ }
+
+ // Cannot ask for both loading and saving
+ if (loadDatabases && saveDatabases) {
+ usage("You cannot both load and save databases.");
+ exit(1);
+ }
+
+ // Read in any -s signature sets.
+ for (const auto &file : sigFiles) {
+ SignatureSet sigs;
+ loadSignatureList(file, sigs);
+ sigSets.emplace_back(file, move(sigs));
+ }
+}
+
+/** Start the global timer. */
+static
+void startTotalTimer(ThreadContext *ctx) {
+ if (ctx->num != 0) {
+ return; // only runs in the first thread
+ }
+ totalTimer.start();
+}
+
+/** Stop the global timer and calculate totals. */
+static
+void stopTotalTimer(ThreadContext *ctx) {
+ if (ctx->num != 0) {
+ return; // only runs in the first thread
+ }
+ totalTimer.complete();
+ totalSecs = totalTimer.seconds();
+}
+
+/** Run a benchmark over a given engine and corpus in block mode. */
+static
+void benchBlock(void *context) {
+ ThreadContext *ctx = (ThreadContext *)context;
+
+ // Synchronization point
+ ctx->barrier();
+
+ startTotalTimer(ctx);
+
+ for (ResultEntry &r : ctx->results) {
+ ctx->timer.start();
+
+ for (const DataBlock &block : ctx->corpus_data) {
+ ctx->engine.scan(block.payload.c_str(), block.payload.size(),
+ block.id, r, *ctx->enginectx);
+ }
+
+ ctx->timer.complete();
+ r.seconds = ctx->timer.seconds();
+ }
+
+ // Synchronization point
+ ctx->barrier();
+
+ // Now that all threads are finished, we can stop the clock.
+ stopTotalTimer(ctx);
+}
+
+/** Structure used to represent a stream. */
+struct StreamInfo {
+ unsigned int stream_id = ~0U;
+ unsigned int first_block_id = ~0U;
+ unsigned int last_block_id = 0;
+ unique_ptr<EngineStream> eng_handle;
+};
+
+static
+u64a count_streams(const vector<DataBlock> &corpus_blocks) {
+ set<unsigned int> streams;
+ for (const DataBlock &block : corpus_blocks) {
+ streams.insert(block.stream_id);
+ }
+
+ return (u64a)streams.size();
+}
+
+/**
+ * Take a ThreadContext and prepare a vector<StreamDataBlock> for streaming mode
+ * scanning from it.
+ */
+static
+vector<StreamInfo> prepStreamingData(const ThreadContext *ctx) {
+ vector<StreamInfo> info(count_streams(ctx->corpus_data));
+ for (const DataBlock &block : ctx->corpus_data) {
+ assert(block.internal_stream_index < info.size());
+ StreamInfo &si = info[block.internal_stream_index];
+
+ /* check if this is the first time we have encountered this stream */
+ if (si.first_block_id > si.last_block_id) {
+ si.stream_id = block.stream_id;
+ si.first_block_id = block.id;
+ si.last_block_id = block.id;
+ } else {
+ assert(block.stream_id == si.stream_id);
+ assert(block.id > si.last_block_id);
+ assert(block.id > si.first_block_id);
+ si.last_block_id = block.id;
+ }
+ }
+ return info;
+}
+
+static
+void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams) {
+ assert(ctx);
+ const EngineHyperscan &e = ctx->engine;
+ const vector<DataBlock> &blocks = ctx->corpus_data;
+
+ for (ResultEntry &r : ctx->results) {
+ ctx->timer.start();
+
+ for (const auto &b : blocks) {
+ StreamInfo &stream = streams[b.internal_stream_index];
+ assert(stream.stream_id == b.stream_id);
+
+ // If this is the first block in the stream, open the stream
+ // handle.
+ if (b.id == stream.first_block_id) {
+ assert(!stream.eng_handle);
+ stream.eng_handle = e.streamOpen(*ctx->enginectx, b.stream_id);
+ if (!stream.eng_handle) {
+ printf("Fatal error: stream open failed!\n");
+ exit(1);
+ }
+ }
+
+ assert(stream.eng_handle);
+
+ e.streamScan(*stream.eng_handle, b.payload.c_str(),
+ b.payload.size(), b.id, r);
+
+ // if this was the last block in the stream, close the stream handle
+ if (b.id == stream.last_block_id) {
+ e.streamClose(move(stream.eng_handle), r);
+ stream.eng_handle = nullptr;
+ }
+ }
+
+ ctx->timer.complete();
+ r.seconds = ctx->timer.seconds();
+ }
+}
+
+/** Run a benchmark over a given engine and corpus in streaming mode. */
+static
+void benchStreaming(void *context) {
+ ThreadContext *ctx = (ThreadContext *)context;
+ vector<StreamInfo> streams = prepStreamingData(ctx);
+
+ // Synchronization point
+ ctx->barrier();
+
+ startTotalTimer(ctx);
+
+ benchStreamingInternal(ctx, streams);
+
+ // Synchronization point
+ ctx->barrier();
+
+ // Now that all threads are finished, we can stop the clock.
+ stopTotalTimer(ctx);
+}
+
+/** In-memory structure for a data block to be scanned in vectored mode. */
+struct VectoredInfo {
+ vector<const char *> data;
+ vector<unsigned int> len;
+ unsigned int stream_id;
+};
+
+/**
+ * Take a ThreadContext and prepare a vector<VectoredInfo> for vectored mode
+ * scanning from it.
+ */
+static
+vector<VectoredInfo> prepVectorData(const ThreadContext *ctx) {
+ vector<VectoredInfo> out(count_streams(ctx->corpus_data));
+ for (const DataBlock &block : ctx->corpus_data) {
+ VectoredInfo &vi = out[block.internal_stream_index];
+ if (vi.data.empty()) {
+ vi.stream_id = block.stream_id;
+ } else {
+ assert(vi.stream_id == block.stream_id);
+ }
+ vi.data.push_back(block.payload.c_str());
+ vi.len.push_back(block.payload.size());
+ }
+
+ return out;
+}
+
+/** Run a benchmark over a given engine and corpus in vectored mode. */
+static
+void benchVectored(void *context) {
+ ThreadContext *ctx = (ThreadContext *)context;
+
+ vector<VectoredInfo> v_plans = prepVectorData(ctx);
+
+ // Synchronization point
+ ctx->barrier();
+
+ startTotalTimer(ctx);
+
+ for (ResultEntry &r : ctx->results) {
+ ctx->timer.start();
+
+ for (const VectoredInfo &v_plan : v_plans) {
+ ctx->engine.scan_vectored(&v_plan.data[0], &v_plan.len[0],
+ v_plan.data.size(), v_plan.stream_id, r,
+ *ctx->enginectx);
+ }
+
+ ctx->timer.complete();
+ r.seconds = ctx->timer.seconds();
+ }
+
+ // Synchronization point
+ ctx->barrier();
+
+ // Now that all threads are finished, we can stop the clock.
+ stopTotalTimer(ctx);
+}
+
+/** Given a time and a size, compute the throughput in megabits/sec. */
+static
+long double calc_mbps(double seconds, u64a bytes) {
+ assert(seconds > 0);
+ return (long double)bytes / ((long double)seconds * 125000);
+}
+
+/** Dump per-scan throughput data to screen. */
+static
+void displayPerScanResults(const vector<unique_ptr<ThreadContext>> &threads,
+ u64a bytesPerRun) {
+ for (const auto &t : threads) {
+ const auto &results = t->results;
+ for (size_t j = 0; j != results.size(); j++) {
+ const auto &r = results[j];
+ double mbps = calc_mbps(r.seconds, bytesPerRun);
+ printf("T %2u Scan %2zu: %'0.2f Mbit/sec\n", t->num, j, mbps);
+ }
+ }
+ printf("\n");
+}
+
+static
+u64a byte_size(const vector<DataBlock> &corpus_blocks) {
+ u64a total = 0;
+ for (const DataBlock &block : corpus_blocks) {
+ total += block.payload.size();
+ }
+
+ return total;
+}
+
+/** Dump benchmark results to screen. */
+static
+void displayResults(const vector<unique_ptr<ThreadContext>> &threads,
+ const vector<DataBlock> &corpus_blocks) {
+ u64a bytesPerRun = byte_size(corpus_blocks);
+ u64a matchesPerRun = threads[0]->results[0].matches;
+
+ // Sanity check: all of our results should have the same match count.
+ for (const auto &t : threads) {
+ if (!all_of(begin(t->results), end(t->results),
+ [&matchesPerRun](const ResultEntry &e) {
+ return e.matches == matchesPerRun;
+ })) {
+ printf("\nWARNING: PER-SCAN MATCH COUNTS ARE INCONSISTENT!\n\n");
+ break;
+ }
+ }
+
+ printf("Time spent scanning: %'0.3f seconds\n", totalSecs);
+ printf("Corpus size: %'llu bytes ", bytesPerRun);
+ switch (scan_mode) {
+ case ScanMode::STREAMING:
+ printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(),
+ count_streams(corpus_blocks));
+ break;
+ case ScanMode::VECTORED:
+ printf("(%'zu blocks in %'llu vectors)\n", corpus_blocks.size(),
+ count_streams(corpus_blocks));
+ break;
+ case ScanMode::BLOCK:
+ printf("(%'zu blocks)\n", corpus_blocks.size());
+ break;
+ }
+
+ u64a totalBytes = bytesPerRun * repeats * threads.size();
+ u64a totalBlocks = corpus_blocks.size() * repeats * threads.size();
+
+ double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun;
+ printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n",
+ matchesPerRun, matchRate);
+
+ double blockRate = (double)totalBlocks / (double)totalSecs;
+ printf("Overall block rate: %'0.2f blocks/sec\n", blockRate);
+ printf("Overall throughput: %'0.2Lf Mbit/sec\n",
+ calc_mbps(totalSecs, totalBytes));
+ printf("\n");
+
+ if (display_per_scan) {
+ displayPerScanResults(threads, bytesPerRun);
+ }
+}
+
+/**
+ * Construct a thread context for this scanning mode.
+ *
+ * Note: does not take blocks by reference. This is to give every thread their
+ * own copy of the data. It would be unrealistic for every thread to be scanning
+ * the same copy of the data.
+ */
+static
+unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
+ const vector<DataBlock> &blocks,
+ unsigned id,
+ thread_barrier &sync_barrier) {
+ thread_func_t fn = nullptr;
+ switch (scan_mode) {
+ case ScanMode::STREAMING:
+ fn = benchStreaming;
+ break;
+ case ScanMode::VECTORED:
+ fn = benchVectored;
+ break;
+ case ScanMode::BLOCK:
+ fn = benchBlock;
+ break;
+ }
+ assert(fn);
+
+ return ue2::make_unique<ThreadContext>(id, db, sync_barrier, fn, blocks);
+}
+
+/** Run the given benchmark. */
+static
+void runBenchmark(const EngineHyperscan &db,
+ const vector<DataBlock> &corpus_blocks) {
+ size_t numThreads;
+ bool useAffinity = false;
+
+ if (threadCores.empty()) {
+ numThreads = 1;
+ } else {
+ numThreads = threadCores.size();
+#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
+ useAffinity = true;
+#else
+ useAffinity = false;
+#endif
+ }
+
+ // Initialise a barrier that will let us sync threads before/after scanning
+ // for timer measurements.
+ thread_barrier sync_barrier(numThreads);
+
+ vector<unique_ptr<ThreadContext>> threads;
+
+ for (unsigned i = 0; i < numThreads; i++) {
+ auto t = makeThreadContext(db, corpus_blocks, i, sync_barrier);
+ int core = useAffinity ? (int)threadCores[i] : -1;
+ if (!t->start(core)) {
+ printf("Unable to start processing thread %u\n", i);
+ exit(1);
+ }
+ threads.push_back(move(t));
+ }
+
+ // Reap threads.
+ for (auto &t : threads) {
+ t->join();
+ }
+
+ // Display global results.
+ displayResults(threads, corpus_blocks);
+}
+
+} // namespace
+
+/** Main driver. */
+int main(int argc, char *argv[]) {
+ Grey grey;
+
+ setlocale(LC_ALL, ""); // use the user's locale
+
+#ifndef NDEBUG
+ printf("\nWARNING: DO NOT BENCHMARK A HYPERSCAN BUILD WITH ASSERTIONS\n\n");
+#endif
+
+ vector<BenchmarkSigs> sigSets;
+ processArgs(argc, argv, sigSets, grey);
+
+ // read in and process our expressions
+ ExpressionMap exprMapTemplate;
+ loadExpressions(exprPath, exprMapTemplate);
+
+ // If we have no signature sets, the user wants us to benchmark all the
+ // known expressions together.
+ if (sigSets.empty()) {
+ SignatureSet sigs;
+ for (auto i : exprMapTemplate | map_keys) {
+ sigs.push_back(i);
+ }
+ sigSets.emplace_back(exprPath, move(sigs));
+ }
+
+ // read in and process our corpus
+ vector<DataBlock> corpus_blocks;
+ try {
+ corpus_blocks = readCorpus(corpusFile);
+ } catch (const DataCorpusError &e) {
+ printf("Corpus data error: %s\n", e.msg.c_str());
+ return 1;
+ }
+
+ for (const auto &s : sigSets) {
+ ExpressionMap exprMap = exprMapTemplate; // copy
+
+ limitBySignature(exprMap, s.sigs);
+ if (exprMap.empty()) {
+ continue;
+ }
+
+ auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey);
+ if (!engine) {
+ printf("Error: expressions failed to compile.\n");
+ exit(1);
+ }
+
+ printf("\n");
+
+ runBenchmark(*engine, corpus_blocks);
+ }
+
+ return 0;
+}
--- /dev/null
+#!/usr/bin/python
+
+'''
+A module to construct corpora databases for the Hyperscan benchmarker
+(hsbench).
+
+After construction, simply add blocks with the add_chunk() method, then call
+finish() when you're done.
+'''
+
+import os.path
+
+try:
+ from sqlite3 import dbapi2 as sqlite
+except:
+ from pysqlite2 import dbapi2 as sqlite
+
+class CorpusBuilder:
+ SCHEMA = '''
+CREATE TABLE chunk (
+ id integer primary key,
+ stream_id integer not null,
+ data blob
+);
+'''
+
+ def __init__(self, outfile):
+ if os.path.exists(outfile):
+ raise RuntimeError("Database '%s' already exists" % outfile)
+ self.outfile = outfile
+ self.db = sqlite.connect(self.outfile)
+ self.db.executescript(CorpusBuilder.SCHEMA)
+ self.current_chunk_id = 0;
+
+ def add_chunk(self, stream_id, data):
+ chunk_id = self.current_chunk_id;
+ c = self.db.cursor()
+ q = 'insert into chunk (id, stream_id, data) values (?, ?, ?)'
+ c.execute(q, (chunk_id, stream_id, sqlite.Binary(data)))
+ self.current_chunk_id += 1
+ return chunk_id
+
+ def finish(self):
+ self.db.commit()
+
+ c = self.db.cursor()
+ q = 'create index chunk_stream_id_idx on chunk(stream_id)'
+ c.execute(q)
+
+ c = self.db.cursor()
+ q = 'vacuum'
+ c.execute(q)
+
+ c = self.db.cursor()
+ q = 'analyze'
+ c.execute(q)
+
+ self.db.commit()
--- /dev/null
+#!/usr/bin/python
+
+'''
+This script creates a Hyperscan benchmarking corpus database from a supplied
+group of Project Gutenberg texts.
+'''
+
+import sys, getopt, os.path
+import gutenberg.acquire, gutenberg.cleanup, gutenberg.query
+from CorpusBuilder import CorpusBuilder
+
+stream_id = 0
+stream_bytes = 0
+
+def addBlocks(builder, block_size, stream_size, text_id, text):
+ global stream_id
+ global stream_bytes
+
+ print "text", text_id, "len", len(text)
+ i = 0
+ while i < len(text):
+ chunk = text[i:min(len(text), i + block_size)]
+ builder.add_chunk(stream_id, chunk)
+ i += block_size
+ stream_bytes += len(chunk)
+ if stream_bytes >= stream_size:
+ stream_id += 1
+ stream_bytes = 0
+ print "Text", text_id, ": added", i/block_size, "blocks of", block_size, "bytes."
+
+def buildCorpus(outFN, block_size, stream_size, text_ids):
+ if len(text_ids) == 0:
+ print >>sys.stderr, "Must provide at least one input ID"
+ sys.exit(0)
+
+ builder = CorpusBuilder(outFN)
+
+ total_bytes = 0
+ stream_id = 0
+ stream_bytes = 0
+
+ for text_id in text_ids:
+ text_id = int(text_id)
+ text = gutenberg.acquire.load_etext(text_id)
+ text = gutenberg.cleanup.strip_headers(text).strip()
+ addBlocks(builder, block_size, stream_size, text_id, text)
+ total_bytes += len(text)
+
+ builder.finish()
+
+ print "Total:", total_bytes, "bytes."
+
+def usage(exeName):
+ errmsg = "Usage: %s -o <output file> -b <block size> -s <max stream size> <gutenberg text id>..."
+ errmsg = errmsg % exeName
+ print >> sys.stderr, errmsg
+ sys.exit(-1)
+
+if __name__ == '__main__':
+ opts, args = getopt.getopt(sys.argv[1:], 'o:b:s:')
+ opts = dict(opts)
+
+ requiredKeys = [ '-o', '-b', '-s' ]
+ for k in requiredKeys:
+ if not opts.has_key(k):
+ usage(os.path.basename(sys.argv[0]))
+
+ buildCorpus(opts['-o'], int(opts['-b']), int(opts['-s']), args)
--- /dev/null
+#!/usr/bin/python
+
+'''
+Simple script to take a file full of lines of text and push them into a
+Hyperscan benchmarking corpus database, one block per line.
+'''
+
+import sys, getopt, os.path
+from CorpusBuilder import CorpusBuilder
+
+def lineCorpus(inFN, outFN):
+ '''
+ Read lines from file name @inFN and write them as blocks to a new db with
+ name @outFN.
+ '''
+
+ if not os.path.exists(inFN):
+ print >> sys.stderr, "Input file '%s' does not exist. Exiting." % outFN
+ sys.exit(-1)
+
+ lines = open(inFN).readlines()
+
+ if len(lines) == 0:
+ print >> sys.stderr, "Input file contained no lines. Exiting."
+ sys.exit(0)
+
+ builder = CorpusBuilder(outFN)
+
+ # write a single stream to contain everything
+ streamId = 0
+
+ for l in lines:
+ builder.add_chunk(streamId, l.rstrip())
+
+ builder.finish()
+
+def usage(exeName):
+ errmsg = "Usage: %s -i <input file> -o <output file>"
+ errmsg = errmsg % exeName
+ print >> sys.stderr, errmsg
+ sys.exit(-1)
+
+if __name__ == '__main__':
+ args = getopt.getopt(sys.argv[1:], 'i:o:c:')
+ args = dict(args[0])
+
+ requiredKeys = [ '-i', '-o' ]
+ for k in requiredKeys:
+ if not args.has_key(k):
+ usage(os.path.basename(sys.argv[0]))
+
+ fnArgs = tuple([args[k] for k in requiredKeys])
+ lineCorpus(*fnArgs)
--- /dev/null
+#!/usr/bin/env python
+
+'''
+Script to convert a pcap file containing UDP and TCP packets to a corpus file.
+'''
+
+import sys, getopt, pprint, os
+from sqlite3 import dbapi2 as sqlite
+import pcap
+from optparse import OptionParser
+from socket import AF_INET, IPPROTO_UDP, IPPROTO_TCP, inet_ntop, ntohs, ntohl, inet_ntoa
+import struct
+from CorpusBuilder import CorpusBuilder
+
+ETHERTYPE_IP = 0x0800 # IP protocol
+ETHERTYPE_ARP = 0x0806 # Addr. resolution protocol
+ETHERTYPE_REVARP = 0x8035 # reverse Addr. resolution protocol
+ETHERTYPE_VLAN = 0x8100 # IEEE 802.1Q VLAN tagging
+ETHERTYPE_IPV6 = 0x86dd # IPv6
+
+#
+# A dictionary of active TCP streams
+#
+tcp_streams = {}
+
+#
+# A dictionary of UDP streams
+#
+udp_streams = {}
+
+#
+# Current stream id
+cur_stream_id = 0
+
+def usage(exeName) :
+ errmsg = "Usage: %s -i <pcap-file> -o <sqlite-file>"
+ errmsg = errmsg % exeName
+ print >> sys.stderr, errmsg
+ sys.exit(-1)
+
+class FiveTuple(object):
+ def __init__(self, protocol, src_addr, src_port, dst_addr, dst_port):
+ self.protocol = protocol
+ self.src_addr = src_addr
+ self.src_port = src_port
+ self.dst_addr = dst_addr
+ self.dst_port = dst_port
+
+ def __str__(self):
+ return "%d,%s,%d,%s,%d" % (self.protocol, self.src_addr, self.src_port, self.dst_addr, self.dst_port)
+
+class UdpSegment:
+ """Definition of a UDP segment
+ """
+ def __init__(self, five_tuple, header, payload):
+ self.five_tuple = five_tuple
+ self.udp_header = header
+ self.udp_payload = payload
+
+class TcpSegment:
+ """Definition of a TCP segment
+ """
+ def __init__(self, five_tuple, header, payload):
+ self.five_tuple = five_tuple
+ self.tcp_header = header
+ self.tcp_payload = payload
+ self.tcp_sequence_number, self.tcp_acknowledgement_number = struct.unpack('!LL', header[4:12])
+
+ def opt_isset_FIN(self):
+ opts = ord(self.tcp_header[13]) & 0x3F
+ return (opts & 0x01)
+
+ def opt_isset_SYN(self):
+ opts = ord(self.tcp_header[13]) & 0x3F
+ return (opts & 0x02)
+
+ def get_sequence_number(self):
+ return self.tcp_sequence_number
+
+ def __cmp__(self, other):
+ return cmp(self.tcp_sequence_number, other.tcp_sequence_number)
+
+class TcpStream:
+ """Definition of a TCP stream.
+ """
+ TCP_STREAM_ACTIVE = 0x1
+ TCP_STREAM_CLOSED = 0x02
+
+ def __init__(self, five_tuple):
+ self.five_tuple = five_tuple
+ self.initial_sequence_number = 0
+ self.segments = []
+
+ def reset_stream(self):
+ self.segments = []
+ self.initial_sequence_number = 0
+
+ def set_initial_sequence_number(self, sequence_number):
+ self.initial_sequence_number = sequence_number
+
+ def append_segment(self, tcp_segment):
+ if len(self.segments) == 0:
+ self.set_initial_sequence_number(tcp_segment.get_sequence_number())
+ self.segments.append(tcp_segment)
+
+ def get_segments_sorted(self):
+ return sorted(self.segments)
+
+class UdpStream:
+ """A container for UDP packets that share the same 5-tuple
+ """
+ def __init__(self, five_tuple):
+ self.five_tuple = five_tuple
+ self.segments = []
+
+ def append_segment(self, udp_segment):
+ self.segments.append(udp_segment)
+
+
+def newStream(five_tuple):
+ '''
+ Create a new stream using the arguments passed-in and return its ID.
+ '''
+ global cur_stream_id
+ stream_id = cur_stream_id
+ cur_stream_id += 1
+ return stream_id
+
+def process_tcp_segment(builder, segment):
+ """Process a tcp segment. It checks for SYN and FIN segments are
+ if set modifies the associated stream.
+ """
+ segment_id = str(segment.five_tuple)
+ if segment_id in tcp_streams:
+ m_tcp_stream = tcp_streams[segment_id]
+ m_tcp_stream.append_segment(segment)
+ else:
+ m_tcp_stream = TcpStream(segment.five_tuple)
+ m_tcp_stream.append_segment(segment)
+ tcp_streams[segment_id] = m_tcp_stream
+
+
+ if segment.opt_isset_SYN():
+ m_tcp_stream.segments = []
+
+ if segment.opt_isset_FIN():
+ #
+ # Finished with the stream - add the segments in the
+ # stream to db allowing the stream to be reused.
+ #
+ db_add_tcp_stream_segments(builder, m_tcp_stream)
+ del tcp_streams[segment_id]
+
+def process_udp_segment(builder, segment):
+ """ Process a UDP segment. Given the connectionless nature of the UDP
+ protocol we simple accumulate the segment for later processing
+ when all the packets have been read
+ """
+ segment_id = str(segment.five_tuple)
+ if segment_id in udp_streams:
+ m_udp_stream = udp_streams[segment_id]
+ m_udp_stream.append_segment(segment)
+ else:
+ m_udp_stream = UdpStream(segment.five_tuple)
+ m_udp_stream.append_segment(segment)
+ udp_streams[segment_id] = m_udp_stream
+
+
+def db_add_tcp_stream_segments(builder, tcp_stream):
+ """Add the contents of a tcp stream to the database
+ """
+ tcp_segments = tcp_stream.get_segments_sorted()
+ last_sequence_num = 0
+ streamID = None
+
+ for tcp_segment in tcp_segments:
+ if (len(tcp_segment.tcp_payload) > 0) and (tcp_segment.tcp_sequence_number > last_sequence_num):
+ #
+ # Segment with an actual payload - add it to the stream's
+ # list of chunks.
+ #
+ # Note: delay creating the stream until we have a via chunk to
+ # commit to it
+ #
+ if streamID == None:
+ streamID = newStream(tcp_stream.five_tuple)
+ builder.add_chunk(streamID, tcp_segment.tcp_payload)
+ last_sequence_num = tcp_segment.tcp_sequence_number
+
+
+def db_add_udp_stream_segments(builder, udp_stream):
+ """Add the contents of a UDP stream to the database. Since UDP is
+ connection-less, a UDP stream object is really just an accumulation
+ of all the packets associated with a given 5-tuple.
+ """
+ udp_segments = udp_stream.segments
+ streamID = None
+ for udp_segment in udp_segments:
+ if len(udp_segment.udp_payload) > 0:
+ if streamID == None:
+ streamID = newStream(udp_stream.five_tuple)
+ builder.add_chunk(streamID, udp_segment.udp_payload)
+
+def enchunk_pcap(pcapFN, sqliteFN):
+ """Read the contents of a pcap file with name @pcapFN and produce
+ a sqlite db with name @sqliteFN. It will contain chunks of data
+ from TCP and UDP streams,
+ """
+
+ if not os.path.exists(pcapFN):
+ print >> sys.stderr, "Input file '%s' does not exist. Exiting." % pcapFN
+ sys.exit(-1)
+
+ builder = CorpusBuilder(sqliteFN)
+
+ #
+ # Read in the contents of the pcap file, adding stream segments as found
+ #
+ pkt_cnt = 0;
+ ip_pkt_cnt = 0;
+ unsupported_ip_protocol_cnt = 0
+ pcap_ref = pcap.pcap(pcapFN)
+ done = False
+
+ while not done:
+ try:
+ ts, packet = pcap_ref.next()
+ except:
+ break
+
+ pkt_cnt += 1
+
+ linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
+ if linkLayerType != ETHERTYPE_IP:
+ #
+ # We're only interested in IP packets
+ #
+ continue
+
+ ip_pkt_cnt += 1
+
+ ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0]
+ ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len]
+ pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
+
+ if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
+ #
+ # we're only interested in UDP and TCP packets at the moment
+ #
+ continue
+
+ pkt_src_addr = inet_ntoa(ip_pkt[12:16])
+ pkt_dst_addr = inet_ntoa(ip_pkt[16:20])
+
+ ip_hdr_len_offset = (ord(ip_pkt[0]) & 0x0f) * 4
+ ip_payload = ip_pkt[ip_hdr_len_offset:len(ip_pkt)]
+
+ pkt_src_port, pkt_dst_port = struct.unpack('!HH', ip_payload[0:4])
+ five_tuple = FiveTuple(pkt_protocol, pkt_src_addr, pkt_src_port, pkt_dst_addr, pkt_dst_port)
+ five_tuple_id = str(five_tuple)
+
+ if pkt_protocol == IPPROTO_UDP:
+ udp_payload_len = struct.unpack('!H', ip_payload[4:6])[0] - 8
+ udp_header = ip_payload[0:8]
+ udp_payload = ip_payload[8:len(ip_payload)]
+ udp_segment = UdpSegment(five_tuple, udp_header, udp_payload)
+ process_udp_segment(builder, udp_segment)
+ elif pkt_protocol == IPPROTO_TCP:
+ tcp_hdr_len = (ord(ip_payload[12]) >> 4) * 4
+ tcp_header = ip_payload[0:tcp_hdr_len]
+ tcp_payload = ip_payload[tcp_hdr_len:len(ip_payload)]
+ segment = TcpSegment(five_tuple, tcp_header, tcp_payload)
+ process_tcp_segment(builder, segment)
+
+ #
+ # Having read the contents of the pcap, we fill the database with any
+ # remaining TCP and UDP segments
+ #
+ for tcp_stream in tcp_streams.itervalues():
+ db_add_tcp_stream_segments(builder, tcp_stream)
+
+ for udp_stream in udp_streams.itervalues():
+ db_add_udp_stream_segments(builder, udp_stream)
+
+ #
+ # We've finished with the database
+ #
+ builder.finish()
+
+if __name__ == '__main__' :
+
+ args = getopt.getopt(sys.argv[1:], 'i:o:')
+ args = dict(args[0])
+
+ requiredKeys = [ '-i', '-o']
+ for k in requiredKeys :
+ if not args.has_key(k) :
+ usage(os.path.basename(sys.argv[0]))
+
+ fnArgs = tuple([ args[k] for k in requiredKeys ])
+ enchunk_pcap(*fnArgs)
--- /dev/null
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * \brief Simple thread barrier.
+ */
+
+#ifndef TOOLS_THREAD_BARRIER_H
+#define TOOLS_THREAD_BARRIER_H
+
+#include <condition_variable>
+#include <mutex>
+
+/**
+ * \brief Simple thread barrier class.
+ *
+ * Blocks until wait() has been called N times.
+ */
+class thread_barrier {
+public:
+ explicit thread_barrier(unsigned int n) : max(n) {
+ if (max == 0) {
+ throw std::runtime_error("invalid barrier");
+ }
+ }
+
+ void wait() {
+ std::unique_lock<std::mutex> lock(mtx);
+ count++;
+ if (count >= max) {
+ count = 0;
+ condvar.notify_all();
+ } else {
+ condvar.wait(lock);
+ }
+ }
+
+private:
+ std::mutex mtx;
+ std::condition_variable condvar;
+ unsigned int count = 0;
+ unsigned int max;
+};
+
+#endif // TOOLS_THREAD_BARRIER_H
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TIMER_H
+#define TIMER_H
+
+#include "ue2common.h"
+
+#include <chrono>
+
+class Timer {
+public:
+ Timer() = default;
+
+ void start() {
+ clock_start = Clock::now();
+ }
+
+ void complete() {
+ clock_end = Clock::now();
+ }
+
+ double seconds() const {
+ std::chrono::duration<double> secs = clock_end - clock_start;
+ return secs.count();
+ }
+
+protected:
+ using Clock = std::chrono::steady_clock;
+ std::chrono::time_point<Clock> clock_start;
+ std::chrono::time_point<Clock> clock_end;
+};
+
+#endif // TIMER_H
# utility libs
+CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
-include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
+ ${PROJECT_SOURCE_DIR})
set_source_files_properties(
${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp
)
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
+set(databaseutil_SRCS
+ database_util.cpp
+ database_util.h
+)
+add_library(databaseutil STATIC ${databaseutil_SRCS})
+
+set(crosscompileutil_SRCS
+ cross_compile.cpp
+ cross_compile.h
+ )
+add_library(crosscompileutil STATIC ${crosscompileutil_SRCS})
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "cross_compile.h"
+#include "src/ue2common.h"
+#include "src/hs_compile.h"
+#include "src/util/make_unique.h"
+
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+struct XcompileMode {
+ const char *name;
+ unsigned long long cpu_features;
+};
+
+static const XcompileMode xcompile_options[] = {
+ { "avx2", HS_CPU_FEATURES_AVX2 },
+ { "base", 0 },
+};
+
+unique_ptr<hs_platform_info> xcompileReadMode(const char *s) {
+ hs_platform_info rv;
+ UNUSED hs_error_t err;
+ err = hs_populate_platform(&rv);
+ assert(!err);
+
+ string str(s);
+ string mode = str.substr(0, str.find(":"));
+ string opt = str.substr(str.find(":")+1, str.npos);
+ bool found_mode = false;
+
+ if (!opt.empty()) {
+ const size_t numOpts = ARRAY_LENGTH(xcompile_options);
+ for (size_t i = 0; i < numOpts; i++) {
+ if (opt.compare(xcompile_options[i].name) == 0) {
+ DEBUG_PRINTF("found opt %zu:%llu\n", i,
+ xcompile_options[i].cpu_features);
+ rv.cpu_features = xcompile_options[i].cpu_features;
+ found_mode = true;
+ break;
+ }
+ }
+ }
+
+ if (!found_mode) {
+ return nullptr;
+ } else {
+ DEBUG_PRINTF("cpu_features %llx\n", rv.cpu_features);
+ return ue2::make_unique<hs_platform_info>(rv);
+ }
+}
+
+string to_string(const hs_platform_info &p) {
+ ostringstream out;
+ if (p.tune) {
+ out << p.tune;
+ }
+
+ if (p.cpu_features) {
+ u64a features = p.cpu_features;
+ if (features & HS_CPU_FEATURES_AVX2) {
+ out << " avx2";
+ features &= ~HS_CPU_FEATURES_AVX2;
+ }
+
+ if (features) {
+ out << " " << "?cpu_features?:" << features;
+ }
+ }
+
+ return out.str();
+}
+
+string xcompileUsage(void) {
+ string variants = "Instruction set options: ";
+ const size_t numOpts = ARRAY_LENGTH(xcompile_options);
+ for (size_t i = 0; i < numOpts; i++) {
+ variants += xcompile_options[i].name;
+ if (i + 1 != numOpts) {
+ variants += ", ";
+ }
+ }
+
+ return variants;
+}
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CROSS_COMPILE_H
+#define CROSS_COMPILE_H
+
+#include <memory>
+#include <string>
+
+struct hs_platform_info;
+
+std::unique_ptr<hs_platform_info> xcompileReadMode(const char *s);
+std::string xcompileUsage(void);
+
+std::string to_string(const hs_platform_info &p);
+
+#endif /* CROSS_COMPILE_H */
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "database_util.h"
+
+#include "hs_common.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+
+#if defined(HAVE_MMAP)
+#include <sys/mman.h> // for mmap
+#include <unistd.h> // for close
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#endif
+
+using namespace std;
+
+bool saveDatabase(const hs_database_t *db, const char *filename, bool verbose) {
+ assert(db);
+ assert(filename);
+
+ if (verbose) {
+ cout << "Saving database to: " << filename << endl;
+ }
+
+ char *bytes = nullptr;
+ size_t length = 0;
+ hs_error_t err = hs_serialize_database(db, &bytes, &length);
+ if (err != HS_SUCCESS) {
+ return false;
+ }
+
+ assert(bytes);
+ assert(length > 0);
+
+ ofstream out(filename, ios::binary);
+ out.write(bytes, length);
+ out.close();
+
+ ::free(bytes);
+
+ return true;
+}
+
+hs_database_t * loadDatabase(const char *filename, bool verbose) {
+ assert(filename);
+
+ if (verbose) {
+ cout << "Loading database from: " << filename << endl;
+ }
+
+ char *bytes = nullptr;
+
+#if defined(HAVE_MMAP)
+ // Use mmap to read the file
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ return nullptr;
+ }
+ struct stat st;
+ if (fstat(fd, &st) < 0) {
+ close(fd);
+ return nullptr;
+ }
+ size_t len = st.st_size;
+
+ bytes = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, 0);
+ if (bytes == MAP_FAILED) {
+ cout << "mmap failed" << endl;
+ close(fd);
+ return nullptr;
+ }
+#else
+ // Fall back on stream IO
+ ifstream is;
+ is.open(filename, ios::in | ios::binary);
+ if (!is.is_open()) {
+ return nullptr;
+ }
+ is.seekg(0, ios::end);
+ size_t len = is.tellg();
+ if (verbose) {
+ cout << "Reading " << len << " bytes" << endl;
+ }
+ is.seekg(0, ios::beg);
+ bytes = new char[len];
+ is.read(bytes, len);
+ is.close();
+#endif
+
+ assert(bytes);
+
+ if (verbose) {
+ char *info = nullptr;
+ hs_error_t err = hs_serialized_database_info(bytes, len, &info);
+ if (err) {
+ cout << "Unable to decode serialized database info: " << err
+ << endl;
+ } else if (info) {
+ cout << "Serialized database info: " << info << endl;
+ std::free(info);
+ } else {
+ cout << "Unable to decode serialized database info." << endl;
+ }
+ }
+
+ hs_database_t *db = nullptr;
+ hs_error_t err = hs_deserialize_database(bytes, len, &db);
+
+#if defined(HAVE_MMAP)
+ munmap(bytes, len);
+ close(fd);
+#else
+ delete [] bytes;
+#endif
+
+ if (err != HS_SUCCESS) {
+ cout << "hs_deserialize_database call failed: " << err << endl;
+ return nullptr;
+ }
+
+ assert(db);
+
+ return db;
+}
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DATABASE_UTIL_H
+#define DATABASE_UTIL_H
+
+struct hs_database;
+
+bool saveDatabase(const hs_database *db, const char *filename,
+ bool verbose = false);
+
+hs_database *loadDatabase(const char *filename, bool verbose = false);
+
+#endif /* DATABASE_UTIL_H */
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EXPRESSION_PATH_H
+#define EXPRESSION_PATH_H
+
+#include "ue2common.h"
+
+#include <cerrno>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <sys/stat.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <libgen.h>
+#endif
+
+//
+// Utility functions
+//
+
+/**
+ * Given a path to a signature file, infer the path of the pcre directory.
+ */
+static inline
+std::string inferExpressionPath(const std::string &sigFile) {
+#ifndef _WIN32
+ // POSIX variant.
+
+ // dirname() may modify its argument, so we must make a copy.
+ std::vector<char> path(sigFile.size() + 1);
+ memcpy(path.data(), sigFile.c_str(), sigFile.size());
+ path[sigFile.size()] = 0; // ensure null termination.
+
+ std::string rv = dirname(path.data());
+#else
+ // Windows variant.
+ if (sigFile.size() >= _MAX_DIR) {
+ return std::string();
+ }
+ char path[_MAX_DIR];
+ _splitpath(sigFile.c_str(), nullptr, path, nullptr, nullptr);
+ std::string rv(path);
+#endif
+
+ rv += "/../pcre";
+ return rv;
+}
+
+#if defined(_WIN32)
+#define stat _stat
+#define S_IFREG _S_IFREG
+#endif
+
+static inline
+bool isDir(const std::string &filename) {
+ struct stat s;
+
+ if (stat(filename.c_str(), &s) == -1) {
+ std::cerr << "stat: " << strerror(errno) << std::endl;
+ return false;
+ }
+
+ return (S_IFDIR & s.st_mode);
+}
+
+static inline
+bool isFile(const std::string &filename) {
+ struct stat s;
+
+ if (stat(filename.c_str(), &s) == -1) {
+ std::cerr << "stat: " << strerror(errno) << std::endl;
+ return false;
+ }
+
+ return (S_IFREG & s.st_mode);
+}
+
+#endif /* EXPRESSION_PATH_H */