]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Fat runtime
authorMatthew Barr <matthew.barr@intel.com>
Wed, 2 Nov 2016 00:01:28 +0000 (11:01 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 14 Dec 2016 04:34:54 +0000 (15:34 +1100)
17 files changed:
CMakeLists.txt
cmake/arch.cmake
cmake/attrib.cmake [new file with mode: 0644]
cmake/build_wrapper.sh [new file with mode: 0755]
cmake/config.h.in
cmake/keep.syms.in [new file with mode: 0644]
src/compiler/compiler.cpp
src/database.c
src/database.h
src/dispatcher.c [new file with mode: 0644]
src/hs.cpp
src/hs_common.h
src/nfa/mcsheng_compile.cpp
src/util/cpuid_flags.c
src/util/cpuid_flags.h
src/util/simd_types.h
unit/CMakeLists.txt

index 0559932d15f5cb1bc1003382e0feb617f25a1511..9f953c6ea9ffc59af875419ae1f6e11569adf015 100644 (file)
@@ -213,7 +213,6 @@ else()
         set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native")
     endif()
 
-    # we don't use these for the lib, but other tools/unit tests
     if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*)
         set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native")
     endif()
@@ -257,9 +256,24 @@ if (RELEASE_BUILD)
     endif()
 endif()
 
-# ensure we are building for the right target arch
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    # This is a Linux-only feature for now - requires platform support
+    # elsewhere
+    option(FAT_RUNTIME "Build a library that supports multiple microarchitecures" RELEASE_BUILD)
+    if (FAT_RUNTIME)
+        include (${CMAKE_MODULE_PATH}/attrib.cmake)
+        if (NOT HAS_C_ATTR_IFUNC)
+            message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime")
+        endif()
+    endif()
+endif ()
+
 include (${CMAKE_MODULE_PATH}/arch.cmake)
 
+if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3)
+        message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
+endif ()
+
 # testing a builtin takes a little more work
 CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
 CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
@@ -365,6 +379,14 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 endif()
 endif()
 
+if (NOT FAT_RUNTIME)
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
+else()
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+endif()
+
 add_subdirectory(util)
 add_subdirectory(unit)
 add_subdirectory(doc/dev-reference)
@@ -391,8 +413,13 @@ if (NOT WIN32)
 endif()
 
 # only set these after all tests are done
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS} ${EXTRA_C_FLAGS}")
+if (NOT FAT_RUNTIME)
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
+else()
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
+endif()
 
 
 if(NOT WIN32)
@@ -414,12 +441,19 @@ SET(hs_HEADERS
 )
 install(FILES ${hs_HEADERS} DESTINATION include/hs)
 
+set (hs_exec_common_SRCS
+        src/alloc.c
+        src/scratch.c
+        src/util/multibit.c
+        )
+
 set (hs_exec_SRCS
     ${hs_HEADERS}
     src/hs_version.h
     src/ue2common.h
-    src/alloc.c
     src/allocator.h
+    src/crc32.c
+    src/crc32.h
     src/report.h
     src/runtime.c
     src/fdr/fdr.c
@@ -558,8 +592,8 @@ set (hs_exec_SRCS
     src/util/join.h
     src/util/masked_move.h
     src/util/multibit.h
-    src/util/multibit_internal.h
     src/util/multibit.c
+    src/util/multibit_internal.h
     src/util/pack_bits.h
     src/util/popcount.h
     src/util/pqueue.h
@@ -571,21 +605,14 @@ set (hs_exec_SRCS
     src/util/state_compress.c
     src/util/unaligned.h
     src/util/uniform_ops.h
-    src/scratch.h
-    src/scratch.c
-    src/crc32.c
-    src/crc32.h
     src/database.c
     src/database.h
 )
 
-if (HAVE_AVX2)
-    set (hs_exec_SRCS
-        ${hs_exec_SRCS}
-        src/fdr/teddy_avx2.c
-        src/util/masked_move.c
-        )
-endif ()
+set (hs_exec_avx2_SRCS
+    src/fdr/teddy_avx2.c
+    src/util/masked_move.c
+)
 
 
 SET (hs_SRCS
@@ -1013,27 +1040,101 @@ endif()
 set (LIB_VERSION ${HS_VERSION})
 set (LIB_SOVERSION ${HS_MAJOR_VERSION})
 
-add_library(hs_exec OBJECT ${hs_exec_SRCS})
+if (NOT FAT_RUNTIME)
+
+    if (HAVE_AVX2)
+        add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS}
+            ${hs_exec_avx2_SRCS})
+    else()
+        add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS})
+    endif()
+
+    add_library(hs_runtime STATIC src/hs_version.c $<TARGET_OBJECTS:hs_exec>)
+    set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
+
+    if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
+        add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
+        set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+    endif()
+
+else (FAT_RUNTIME)
+    set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh")
+    add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
+    set_target_properties(hs_exec_core2 PROPERTIES
+        COMPILE_FLAGS "-march=core2"
+        RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
+        )
+
+    add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
+    set_target_properties(hs_exec_corei7 PROPERTIES
+        COMPILE_FLAGS "-march=corei7"
+        RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
+        )
+
+    add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
+    set_target_properties(hs_exec_avx2 PROPERTIES
+        COMPILE_FLAGS "-march=core-avx2"
+        RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
+        )
+
+    add_library(hs_exec_common OBJECT
+        ${hs_exec_common_SRCS}
+        src/dispatcher.c
+        )
+    set_source_files_properties(src/dispatcher.c PROPERTIES
+        COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function")
+    set_source_files_properties(${hs_exec_common_SRCS} PROPERTIES
+        COMPILE_FLAGS "-march=core-avx2")
+
+    if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
+        add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
+        set_target_properties(hs_exec_shared_core2 PROPERTIES
+            COMPILE_FLAGS "-march=core2"
+            POSITION_INDEPENDENT_CODE TRUE
+            RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
+            )
+        add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
+        set_target_properties(hs_exec_shared_corei7 PROPERTIES
+            COMPILE_FLAGS "-march=corei7"
+            POSITION_INDEPENDENT_CODE TRUE
+            RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
+            )
+        add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
+        set_target_properties(hs_exec_shared_avx2 PROPERTIES
+            COMPILE_FLAGS "-march=core-avx2"
+            POSITION_INDEPENDENT_CODE TRUE
+            RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
+            )
+        add_library(hs_exec_common_shared OBJECT
+        ${hs_exec_common_SRCS}
+        src/dispatcher.c
+        )
+        set_target_properties(hs_exec_common_shared PROPERTIES
+            OUTPUT_NAME hs_exec_common
+            POSITION_INDEPENDENT_CODE TRUE)
+    endif() # SHARED
 
-if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
-add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
-set_target_properties(hs_exec_shared PROPERTIES
-    POSITION_INDEPENDENT_CODE TRUE)
-endif()
 
 # hs_version.c is added explicitly to avoid some build systems that refuse to
 # create a lib without any src (I'm looking at you Xcode)
 
-add_library(hs_runtime STATIC src/hs_version.c $<TARGET_OBJECTS:hs_exec>)
+    add_library(hs_runtime STATIC src/hs_version.c
+        $<TARGET_OBJECTS:hs_exec_common> $<TARGET_OBJECTS:hs_exec_core2>
+        $<TARGET_OBJECTS:hs_exec_corei7> $<TARGET_OBJECTS:hs_exec_avx2>)
+endif (NOT FAT_RUNTIME)
+
 
-set_target_properties(hs_runtime PROPERTIES
-    LINKER_LANGUAGE C)
+set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
 if (NOT BUILD_SHARED_LIBS)
     install(TARGETS hs_runtime DESTINATION lib)
 endif()
 
 if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
-    add_library(hs_runtime_shared SHARED src/hs_version.c $<TARGET_OBJECTS:hs_exec_shared>)
+    if (NOT FAT_RUNTIME)
+        add_library(hs_runtime_shared SHARED src/hs_version.c $<TARGET_OBJECTS:hs_exec_shared>)
+    else()
+        add_library(hs_runtime_shared SHARED src/hs_version.c $<TARGET_OBJECTS:hs_exec_common_shared> $<TARGET_OBJECTS:hs_exec_shared_core2> $<TARGET_OBJECTS:hs_exec_shared_corei7> $<TARGET_OBJECTS:hs_exec_shared_avx2>)
+    endif()
     set_target_properties(hs_runtime_shared PROPERTIES
         VERSION ${LIB_VERSION}
         SOVERSION ${LIB_SOVERSION}
@@ -1046,8 +1147,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
         LIBRARY DESTINATION lib)
 endif()
 
+if (NOT FAT_RUNTIME)
+    add_library(hs STATIC ${hs_SRCS} $<TARGET_OBJECTS:hs_exec>)
+else()
 # we want the static lib for testing
-add_library(hs STATIC ${hs_SRCS} $<TARGET_OBJECTS:hs_exec>)
+add_library(hs STATIC src/hs_version.c ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common>  $<TARGET_OBJECTS:hs_exec_core2> $<TARGET_OBJECTS:hs_exec_corei7> $<TARGET_OBJECTS:hs_exec_avx2>)
+endif()
 
 add_dependencies(hs ragel_Parser)
 
@@ -1056,7 +1161,11 @@ install(TARGETS hs DESTINATION lib)
 endif()
 
 if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
-    add_library(hs_shared SHARED ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
+    if (NOT FAT_RUNTIME)
+        add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
+    else()
+        add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common_shared> $<TARGET_OBJECTS:hs_exec_shared_core2> $<TARGET_OBJECTS:hs_exec_shared_corei7> $<TARGET_OBJECTS:hs_exec_shared_avx2>)
+    endif()
     add_dependencies(hs_shared ragel_Parser)
     set_target_properties(hs_shared PROPERTIES
         OUTPUT_NAME hs
index c00401dd012152196a46f01fdb4fe2a9ab4354a5..e98fbf22733e78603ebf6e1cf82cf070ed19a447 100644 (file)
@@ -11,7 +11,8 @@ else ()
 endif ()
 
 
-set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
+set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}")
+
 # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
 CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
 int main() {
@@ -19,10 +20,6 @@ int main() {
     (void)_mm_shuffle_epi8(a, a);
 }" HAVE_SSSE3)
 
-if (NOT HAVE_SSSE3)
-    message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
-endif ()
-
 # now look for AVX2
 CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
 #if !defined(__AVX2__)
@@ -34,9 +31,5 @@ int main(){
     (void)_mm256_xor_si256(z, z);
 }" HAVE_AVX2)
 
-if (NOT HAVE_AVX2)
-    message(STATUS "Building without AVX2 support")
-endif ()
-
 unset (CMAKE_REQUIRED_FLAGS)
 unset (INTRIN_INC_H)
diff --git a/cmake/attrib.cmake b/cmake/attrib.cmake
new file mode 100644 (file)
index 0000000..6ce3f2a
--- /dev/null
@@ -0,0 +1,3 @@
+# tests for compiler properties
+
+CHECK_C_SOURCE_COMPILES("int foo(int) __attribute__ ((ifunc(\"foo_i\"))); int f1(int i) { return i; } void (*foo_i()) { return f1; } int main(void) { return 0; }" HAS_C_ATTR_IFUNC)
diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh
new file mode 100755 (executable)
index 0000000..5baf209
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh -e
+# This is used for renaming symbols for the fat runtime, don't call directly
+# TODO: make this a lot less fragile!
+PREFIX=$1
+KEEPSYMS_IN=$2
+shift 2
+BUILD=$@
+OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/')
+SYMSFILE=/tmp/${PREFIX}_rename.syms.$$
+KEEPSYMS=/tmp/keep.syms.$$
+# grab the command without the target obj or src file flags
+# we don't just call gcc directly as there may be flags modifying the arch
+CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;')
+# find me a libc
+LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6)
+cp ${KEEPSYMS_IN} ${KEEPSYMS}
+# get all symbols from libc and turn them into patterns
+nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS}
+# build the object
+${BUILD}
+# rename the symbols in the object
+nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE}
+if test -s ${SYMSFILE}
+then
+    objcopy --redefine-syms=${SYMSFILE} ${OUT}
+fi
+rm -f ${SYMSFILE} ${KEEPSYMS}
index 75c27b3e2166cfe025e779f3c5bc6cb99be5d70d..198d96c5d8eeb60504c628c928daed3da5e3abdb 100644 (file)
@@ -15,6 +15,9 @@
 /* internal build, switch on dump support. */
 #cmakedefine DUMP_SUPPORT
 
+/* Define if building "fat" runtime. */
+#cmakedefine FAT_RUNTIME
+
 /* Define to 1 if `backtrace' works. */
 #cmakedefine HAVE_BACKTRACE
 
diff --git a/cmake/keep.syms.in b/cmake/keep.syms.in
new file mode 100644 (file)
index 0000000..ab6f82a
--- /dev/null
@@ -0,0 +1,11 @@
+# names to exclude
+hs_misc_alloc
+hs_misc_free
+hs_free_scratch
+hs_stream_alloc
+hs_stream_free
+hs_scratch_alloc
+hs_scratch_free
+hs_database_alloc
+hs_database_free
+^_
index d56aff88816de60ddfdddc5cd6e95d975b5ba0d9..4a4afc64e753f890f9722c1e52b92899c10f291d 100644 (file)
 /** \file
  * \brief Compiler front-end interface.
  */
+#include "allocator.h"
 #include "asserts.h"
 #include "compiler.h"
+#include "crc32.h"
 #include "database.h"
 #include "grey.h"
 #include "hs_internal.h"
@@ -321,6 +323,45 @@ platform_t target_to_platform(const target_t &target_info) {
     return p;
 }
 
+/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
+ * \ref hs_database, ensuring that it is padded correctly to give cacheline
+ * alignment.  */
+static
+hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
+    size_t db_len = sizeof(struct hs_database) + len;
+    DEBUG_PRINTF("db size %zu\n", db_len);
+    DEBUG_PRINTF("db platform %llx\n", platform);
+
+    struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
+    if (hs_check_alloc(db) != HS_SUCCESS) {
+        hs_database_free(db);
+        return nullptr;
+    }
+
+    // So that none of our database is uninitialized
+    memset(db, 0, db_len);
+
+    // we need to align things manually
+    size_t shift = (uintptr_t)db->bytes & 0x3f;
+    DEBUG_PRINTF("shift is %zu\n", shift);
+
+    db->bytecode = offsetof(struct hs_database, bytes) - shift;
+    char *bytecode = (char *)db + db->bytecode;
+    assert(ISALIGNED_CL(bytecode));
+
+    db->magic = HS_DB_MAGIC;
+    db->version = HS_DB_VERSION;
+    db->length = len;
+    db->platform = platform;
+
+    // Copy bytecode
+    memcpy(bytecode, in_bytecode, len);
+
+    db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
+    return db;
+}
+
+
 struct hs_database *build(NG &ng, unsigned int *length) {
     assert(length);
 
index a4e10c225b68b69fa7da379d899074c9845631b3..61eb021fa106281de5db747ee98d02e763b990de 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -348,43 +348,6 @@ hs_error_t dbIsValid(const hs_database_t *db) {
     return HS_SUCCESS;
 }
 
-/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
- * \ref hs_database, ensuring that it is padded correctly to give cacheline
- * alignment.  */
-hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
-    size_t db_len = sizeof(struct hs_database) + len;
-    DEBUG_PRINTF("db size %zu\n", db_len);
-    DEBUG_PRINTF("db platform %llx\n", platform);
-
-    struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
-    if (hs_check_alloc(db) != HS_SUCCESS) {
-        hs_database_free(db);
-        return NULL;
-    }
-
-    // So that none of our database is uninitialized
-    memset(db, 0, db_len);
-
-    // we need to align things manually
-    size_t shift = (uintptr_t)db->bytes & 0x3f;
-    DEBUG_PRINTF("shift is %zu\n", shift);
-
-    db->bytecode = offsetof(struct hs_database, bytes) - shift;
-    char *bytecode = (char *)db + db->bytecode;
-    assert(ISALIGNED_CL(bytecode));
-
-    db->magic = HS_DB_MAGIC;
-    db->version = HS_DB_VERSION;
-    db->length = len;
-    db->platform = platform;
-
-    // Copy bytecode
-    memcpy(bytecode, in_bytecode, len);
-
-    db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
-    return db;
-}
-
 #if defined(_WIN32)
 #define SNPRINTF_COMPAT _snprintf
 #else
index 5488c93d62fbcc9853955961d0121f5b3284fc78..399513fc23c5dfa03165ba9e2f30db55e80aca86 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -110,7 +110,6 @@ hs_error_t validDatabase(const hs_database_t *db) {
 }
 
 hs_error_t dbIsValid(const struct hs_database *db);
-struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform);
 
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/src/dispatcher.c b/src/dispatcher.c
new file mode 100644 (file)
index 0000000..810a529
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "hs_common.h"
+#include "hs_runtime.h"
+#include "ue2common.h"
+#include "util/cpuid_flags.h"
+#include "util/join.h"
+
+#define CREATE_DISPATCH(RTYPE, NAME, ...)                                      \
+    /* create defns */                                                         \
+    RTYPE JOIN(avx2_, NAME)(__VA_ARGS__);                                      \
+    RTYPE JOIN(corei7_, NAME)(__VA_ARGS__);                                    \
+    RTYPE JOIN(core2_, NAME)(__VA_ARGS__);                                     \
+                                                                               \
+    /* error func */                                                           \
+    static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) {                      \
+        return (RTYPE)HS_ARCH_ERROR;                                           \
+    }                                                                          \
+                                                                               \
+    /* resolver */                                                             \
+    static void(*JOIN(resolve_, NAME)(void)) {                                 \
+        if (check_avx2()) {                                                    \
+            return JOIN(avx2_, NAME);                                          \
+        }                                                                      \
+        if (check_sse42() && check_popcnt()) {                                 \
+            return JOIN(corei7_, NAME);                                        \
+        }                                                                      \
+        if (check_ssse3()) {                                                   \
+            return JOIN(core2_, NAME);                                         \
+        }                                                                      \
+        /* anything else is fail */                                            \
+        return JOIN(error_, NAME);                                             \
+    }                                                                          \
+                                                                               \
+    /* function */                                                             \
+    RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
+
+CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
+                unsigned length, unsigned flags, hs_scratch_t *scratch,
+                match_event_handler onEvent, void *userCtx);
+
+CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
+                size_t *stream_size);
+
+CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
+                size_t *size);
+CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
+CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
+
+CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
+                unsigned int flags, hs_stream_t **stream);
+
+CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
+                unsigned int length, unsigned int flags, hs_scratch_t *scratch,
+                match_event_handler onEvent, void *ctxt);
+
+CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
+                hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
+
+CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
+                const char *const *data, const unsigned int *length,
+                unsigned int count, unsigned int flags, hs_scratch_t *scratch,
+                match_event_handler onevent, void *context);
+
+CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
+
+CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
+                const hs_stream_t *from_id);
+
+CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
+                unsigned int flags, hs_scratch_t *scratch,
+                match_event_handler onEvent, void *context);
+
+CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
+                const hs_stream_t *from_id, hs_scratch_t *scratch,
+                match_event_handler onEvent, void *context);
+
+CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
+                char **bytes, size_t *length);
+
+CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
+                const size_t length, hs_database_t **db);
+
+CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
+                const size_t length, hs_database_t *db);
+
+CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
+                size_t length, char **info);
+
+CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
+                const size_t length, size_t *deserialized_size);
+
+/** INTERNALS **/
+
+CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
index 07f6d2c1e502f616b24a75c69d0bc45ac907a259..f64e867a2ea37271e862a1ceebc52db32abe11c3 100644 (file)
@@ -192,6 +192,14 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
         return HS_COMPILER_ERROR;
     }
 
+#if defined(FAT_RUNTIME)
+    if (!check_ssse3()) {
+        *db = nullptr;
+        *comp_error = generateCompileError("Unsupported architecture", -1);
+        return HS_ARCH_ERROR;
+    }
+#endif
+
     if (!checkMode(mode, comp_error)) {
         *db = nullptr;
         assert(*comp_error); // set by checkMode.
@@ -319,6 +327,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
         return HS_COMPILER_ERROR;
     }
 
+#if defined(FAT_RUNTIME)
+    if (!check_ssse3()) {
+        *error = generateCompileError("Unsupported architecture", -1);
+        return HS_ARCH_ERROR;
+    }
+#endif
+
     if (!info) {
         *error = generateCompileError("Invalid parameter: info is NULL", -1);
         return HS_COMPILER_ERROR;
@@ -426,6 +441,11 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
 
 extern "C" HS_PUBLIC_API
 hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
+#if defined(FAT_RUNTIME)
+    if (!check_ssse3()) {
+        return HS_ARCH_ERROR;
+    }
+#endif
     freeCompileError(error);
     return HS_SUCCESS;
 }
index 4bf31146cdb01f83b7daf981efd96e71831fe6aa..ad8d9880e8b836f2ff49be021bc6675289c2a222 100644 (file)
@@ -519,6 +519,17 @@ const char *hs_version(void);
  */
 #define HS_SCRATCH_IN_USE       (-10)
 
+/**
+ * Unsupported CPU architecture.
+ *
+ * This error is returned when Hyperscan is able to detect that the current
+ * system does not support the required instruction set.
+ *
+ * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3
+ * (SSSE3).
+ */
+#define HS_ARCH_ERROR              (-11)
+
 /** @} */
 
 #ifdef __cplusplus
index b7570af42fd8a282e839a1d0a8d7e1c689c76bb3..a7713bb061a9b1b0dc6133cf250b3038f506d141 100644 (file)
@@ -193,7 +193,8 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info,
     }
     for (u32 i = 0; i < N_CHARS; i++) {
         assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
-        memcpy((u8*)&m->sheng_masks[i], (u8*)masks[info.alpha_remap[i]].data(), sizeof(m128));
+        memcpy((u8 *)&m->sheng_masks[i],
+               (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128));
     }
     m->sheng_end = sheng_end;
     m->sheng_accel_limit = sheng_end - 1;
index 9a8bd922e52d081dd33d5f843c2848da42a31acc..dba147ee1bc81dc2d113c79ca8f6b5aa6e95f2e4 100644 (file)
 #define SSSE3 (1 << 9)
 #define SSE4_1 (1 << 19)
 #define SSE4_2 (1 << 20)
+#define POPCNT (1 << 23)
 #define XSAVE (1 << 27)
 #define AVX (1 << 28)
 
 // EDX
+#define FXSAVE (1 << 24)
 #define SSE (1 << 25)
-#define SSE2 (1 << 25)
+#define SSE2 (1 << 26)
 #define HTT (1 << 28)
 
 // Structured Extended Feature Flags Enumeration Leaf ECX values
@@ -87,7 +89,6 @@ u64a xgetbv(u32 op) {
 #endif
 }
 
-static
 int check_avx2(void) {
 #if defined(__INTEL_COMPILER)
     return _may_i_use_cpu_feature(_FEATURE_AVX2);
@@ -137,6 +138,24 @@ u64a cpuid_flags(void) {
     return cap;
 }
 
+int check_ssse3(void) {
+    unsigned int eax, ebx, ecx, edx;
+    cpuid(1, 0, &eax, &ebx, &ecx, &edx);
+    return !!(ecx & SSSE3);
+}
+
+int check_sse42(void) {
+    unsigned int eax, ebx, ecx, edx;
+    cpuid(1, 0, &eax, &ebx, &ecx, &edx);
+    return !!(ecx & SSE4_2);
+}
+
+int check_popcnt(void) {
+    unsigned int eax, ebx, ecx, edx;
+    cpuid(1, 0, &eax, &ebx, &ecx, &edx);
+    return !!(ecx & POPCNT);
+}
+
 struct family_id {
     u32 full_family;
     u32 full_model;
index 2df97ab5b43c223def1bad0de3eaca038480b2c8..8b23d4958646756bda980e96a3c5ef2167b3e673 100644 (file)
@@ -41,6 +41,11 @@ u64a cpuid_flags(void);
 
 u32 cpuid_tune(void);
 
+int check_avx2(void);
+int check_ssse3(void);
+int check_sse42(void);
+int check_popcnt(void);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
index 35f27e66fcbd641af552e39b2e9f569336bdd21a..74e2abecb065fa272976d6076460a19e763816bb 100644 (file)
@@ -61,6 +61,7 @@
 #error no intrinsics!
 #endif
 
+#if defined(__SSE2__)
 typedef __m128i m128;
 #else
 typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128;
index 77f3ac3b5f4eeb0786e5226e6dff75ae58d52e2d..8b49444474541cecff995cd7e9460311b68c6a0c 100644 (file)
@@ -1,5 +1,5 @@
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
 
 set(gtest_SOURCES gtest/gtest-all.cc gtest/gtest.h)
 if(NOT XCODE)
@@ -34,7 +34,7 @@ add_library(gtest STATIC ${gtest_SOURCES})
 
 add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR})
 
-if (NOT RELEASE_BUILD)
+if (NOT (RELEASE_BUILD OR FAT_RUNTIME))
 set(unit_internal_SOURCES
     internal/bitfield.cpp
     internal/bitutils.cpp
@@ -89,7 +89,7 @@ set(unit_internal_SOURCES
 
 add_executable(unit-internal ${unit_internal_SOURCES})
 target_link_libraries(unit-internal hs gtest corpusomatic)
-endif(NOT RELEASE_BUILD)
+endif(NOT (RELEASE_BUILD OR FAT_RUNTIME))
 
 set(unit_hyperscan_SOURCES
     hyperscan/allocators.cpp