set(TUNE_FLAG native)
endif()
+ if (BUILD_SVE2_BITPERM)
+ set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
+ elseif (BUILD_SVE2)
+ set(GNUCC_ARCH "${GNUCC_ARCH}+sve2")
+ elseif (BUILD_SVE)
+ set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
+ endif ()
+
# compiler version checks TODO: test more compilers
if (CMAKE_COMPILER_IS_GNUCXX)
set(GNUCXX_MINVER "4.8.1")
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
elseif (ARCH_ARM32 OR ARCH_AARCH64)
CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H)
+ if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM)
+ CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H)
+ endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -flax-vector-conversions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions")
endif()
- `make -jT` where T is the number of threads used to compile.
- `cmake --build . -- -j T` can also be used instead of make.
+# Compiling for SVE
+
+The following cmake variables can be set in order to target Arm's Scalable
+Vector Extension. They are listed in ascending order of strength, with cmake
+detecting whether the feature is available in the compiler and falling back to
+a weaker version if not. Only one of these variables needs to be set as weaker
+variables will be implied as set.
+
+- `BUILD_SVE`
+- `BUILD_SVE2`
+- `BUILD_SVE2_BITPERM`
+
# Documentation
Information on building the Hyperscan library and using its API is available in
message (FATAL_ERROR "No intrinsics header found")
endif ()
+if (ARCH_ARM32 OR ARCH_AARCH64)
+ CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
+int main() {
+ int32x4_t a = vdupq_n_s32(1);
+ (void)a;
+}" HAVE_NEON)
+endif ()
+
+if (ARCH_AARCH64)
+ set(PREV_FLAGS "${CMAKE_C_FLAGS}")
+ if (BUILD_SVE2_BITPERM)
+ set(CMAKE_C_FLAGS "-march=${GNUCC_ARCH} ${CMAKE_C_FLAGS}")
+ CHECK_C_SOURCE_COMPILES("#include <arm_sve.h>
+ int main() {
+ svuint8_t a = svbext(svdup_u8(1), svdup_u8(2));
+ (void)a;
+ }" HAVE_SVE2_BITPERM)
+ if (HAVE_SVE2_BITPERM)
+ add_definitions(-DHAVE_SVE2_BITPERM)
+ endif ()
+ endif()
+ if (BUILD_SVE2)
+ set(CMAKE_C_FLAGS "-march=${GNUCC_ARCH} ${CMAKE_C_FLAGS}")
+ CHECK_C_SOURCE_COMPILES("#include <arm_sve.h>
+ int main() {
+ svuint8_t a = svbsl(svdup_u8(1), svdup_u8(2), svdup_u8(3));
+ (void)a;
+ }" HAVE_SVE2)
+ if (HAVE_SVE2)
+ add_definitions(-DHAVE_SVE2)
+ endif ()
+ endif()
+ if (BUILD_SVE)
+ set(CMAKE_C_FLAGS "-march=${GNUCC_ARCH} ${CMAKE_C_FLAGS}")
+ CHECK_C_SOURCE_COMPILES("#include <arm_sve.h>
+ int main() {
+ svuint8_t a = svdup_u8(1);
+ (void)a;
+ }" HAVE_SVE)
+ if (HAVE_SVE)
+ add_definitions(-DHAVE_SVE)
+ endif ()
+ endif ()
+ set(CMAKE_C_FLAGS "${PREV_FLAGS}")
+endif()
+
if (BUILD_AVX512)
CHECK_C_COMPILER_FLAG(${SKYLAKE_FLAG} HAS_ARCH_SKYLAKE)
if (NOT HAS_ARCH_SKYLAKE)
(void)_mm512_permutexvar_epi8(idx, a);
}" HAVE_AVX512VBMI)
-elseif (ARCH_ARM32 OR ARCH_AARCH64)
- CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
-int main() {
- int32x4_t a = vdupq_n_s32(1);
- (void)a;
-}" HAVE_NEON)
-else ()
+elseif (!ARCH_ARM32 AND !ARCH_AARCH64)
message (FATAL_ERROR "Unsupported architecture")
endif ()
endif ()
endif ()
+unset (PREV_FLAGS)
unset (CMAKE_REQUIRED_FLAGS)
unset (INTRIN_INC_H)
/* "Define if cross compiling for AARCH64" */
#cmakedefine CROSS_COMPILE_AARCH64
+/* Define if building SVE for AARCH64. */
+#cmakedefine BUILD_SVE
+
+/* Define if building SVE2 for AARCH64. */
+#cmakedefine BUILD_SVE2
+
+/* Define if building SVE2+BITPERM for AARCH64. */
+#cmakedefine BUILD_SVE2_BITPERM
+
/* internal build, switch on dump support. */
#cmakedefine DUMP_SUPPORT
/* C compiler has arm_neon.h */
#cmakedefine HAVE_C_ARM_NEON_H
+/* C compiler has arm_sve.h */
+#cmakedefine HAVE_C_ARM_SVE_H
+
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
#if defined(HAVE_C_ARM_NEON_H)
# define USE_ARM_NEON_H
+# if defined(HAVE_C_ARM_SVE_H)
+# define USE_ARM_SVE
+# if defined(BUILD_SVE2)
+# define USE_ARM_SVE2
+# if defined(BUILD_SVE2_BITPERM)
+# define USE_ARM_SVE2_BITPERM
+# endif
+# endif
+# endif
#endif
#ifdef __cplusplus
#include <intrin.h>
#elif defined(USE_ARM_NEON_H)
#include <arm_neon.h>
+# if defined(USE_ARM_SVE)
+# include <arm_sve.h>
+# endif
#else
#error no intrinsics file
#endif