From: Cameron Cawley Date: Thu, 29 Feb 2024 21:20:25 +0000 (+0000) Subject: Provide an inline asm fallback for the ARMv8 intrinsics X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7ea78f12c8d159729d23f531366368e664fab609;p=thirdparty%2Fzlib-ng.git Provide an inline asm fallback for the ARMv8 intrinsics --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 64dd7ad4..35f75e44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,12 +713,15 @@ if(WITH_OPTIM) if(WITH_ARMV8) check_armv8_compiler_flag() - if(HAVE_ARMV8_FLAG) + if(HAVE_ARMV8_INLINE_ASM OR HAVE_ARMV8_INTRIN) add_definitions(-DARM_CRC32) set(ARMV8_SRCS ${ARCHDIR}/crc32_armv8.c) set_property(SOURCE ${ARMV8_SRCS} PROPERTY COMPILE_FLAGS "${ARMV8FLAG} ${NOLTOFLAG}") list(APPEND ZLIB_ARCH_SRCS ${ARMV8_SRCS}) add_feature_info(ARMV8_CRC 1 "Support ARMv8 optimized CRC hash generation, using \"${ARMV8FLAG}\"") + if(HAVE_ARMV8_INTRIN) + add_definitions(-DARM_CRC32_INTRIN) + endif() else() set(WITH_ARMV8 OFF) endif() diff --git a/arch/arm/acle_intrins.h b/arch/arm/acle_intrins.h index cc4faa9c..1545ef57 100644 --- a/arch/arm/acle_intrins.h +++ b/arch/arm/acle_intrins.h @@ -14,6 +14,56 @@ #else # define Z_TARGET_CRC #endif + +#if !defined(ARM_CRC32_INTRIN) && !defined(_MSC_VER) +#ifdef __aarch64__ +static inline uint32_t __crc32b(uint32_t __a, uint8_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32b %w0, %w1, %w2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} + +static inline uint32_t __crc32h(uint32_t __a, uint16_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32h %w0, %w1, %w2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} + +static inline uint32_t __crc32w(uint32_t __a, uint32_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32w %w0, %w1, %w2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} + +static inline uint32_t __crc32d(uint32_t __a, uint64_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32x %w0, %w1, %x2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} +#else +static inline uint32_t __crc32b(uint32_t __a, uint8_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32b %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} + +static inline uint32_t __crc32h(uint32_t __a, uint16_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32h %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} + +static inline uint32_t __crc32w(uint32_t __a, uint32_t __b) { + uint32_t __c; + __asm__ __volatile__("crc32w %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b)); + return __c; +} + +static inline uint32_t __crc32d(uint32_t __a, uint64_t __b) { + return __crc32w (__crc32w (__a, __b & 0xffffffffULL), __b >> 32); +} +#endif +#endif #endif #ifdef ARM_SIMD diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index e7498e27..9ab351af 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -15,8 +15,22 @@ macro(check_armv8_compiler_flag) endif() endif() endif() - # Check whether compiler supports ARMv8 CRC intrinsics + # Check whether compiler supports ARMv8 inline asm set(CMAKE_REQUIRED_FLAGS "${ARMV8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") + check_c_source_compiles( + "unsigned int f(unsigned int a, unsigned int b) { + unsigned int c; + #ifdef __aarch64__ + __asm__ __volatile__ ( \"crc32w %w0, %w1, %w2\" : \"=r\" (c) : \"r\" (a), \"r\" (b)); + #else + __asm__ __volatile__ ( \"crc32w %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b)); + #endif + return (int)c; + } + int main(void) { return f(1,2); }" + HAVE_ARMV8_INLINE_ASM + ) + # Check whether compiler supports ARMv8 intrinsics check_c_source_compiles( "#if defined(_MSC_VER) #include @@ -27,7 +41,7 @@ macro(check_armv8_compiler_flag) return __crc32w(a, b); } int main(void) { return 0; }" - HAVE_ARMV8_FLAG + HAVE_ARMV8_INTRIN ) set(CMAKE_REQUIRED_FLAGS) endmacro() diff --git a/configure b/configure index a092e8ec..c5b570d0 100755 --- a/configure +++ b/configure @@ -1121,7 +1121,7 @@ EOF fi } -check_armv8_compiler_flag() { +check_armv8_intrinsics() { # Check whether -march=armv8-a+crc works correctly cat > $test.c << EOF int main() { return 0; } @@ -1139,7 +1139,28 @@ EOF fi fi - # Check whether compiler supports ARMv8 CRC intrinsics + # Check whether compiler supports ARMv8 inline asm + cat > $test.c << EOF +unsigned int f(unsigned int a, unsigned int b) { + unsigned int c; +#ifdef __aarch64__ + __asm__ __volatile__("crc32w %w0, %w1, %w2" : "=r" (c) : "r"(a), "r"(b)); +#else + __asm__ __volatile__("crc32w %0, %1, %2" : "=r" (c) : "r"(a), "r"(b)); +#endif + return c; +} +int main(void) { return 0; } +EOF + if try ${CC} ${CFLAGS} ${armv8flag} $test.c; then + echo "Checking for ARMv8 inline assembly ... Yes." | tee -a configure.log + HAVE_ARMV8_INLINE_ASM=1 + else + echo "Checking for ARMv8 inline assembly ... No." | tee -a configure.log + HAVE_ARMV8_INLINE_ASM=0 + fi + + # Check whether compiler supports ARMv8 intrinsics cat > $test.c << EOF #include unsigned int f(unsigned int a, unsigned int b) { @@ -1147,12 +1168,12 @@ unsigned int f(unsigned int a, unsigned int b) { } int main(void) { return 0; } EOF - if try ${CC} ${CFLAGS} ${acleflag} $test.c; then - echo "Checking for ARMv8 CRC intrinsics ... Yes." | tee -a configure.log - ARMV8_AVAILABLE=1 + if try ${CC} ${CFLAGS} ${armv8flag} $test.c; then + echo "Checking for ARMv8 intrinsics ... Yes." | tee -a configure.log + HAVE_ARMV8_INTRIN=1 else - echo "Checking for ARMv8 CRC intrinsics ... No." | tee -a configure.log - ARMV8_AVAILABLE=0 + echo "Checking for ARMv8 intrinsics ... No." | tee -a configure.log + HAVE_ARMV8_INTRIN=0 fi } @@ -1738,12 +1759,17 @@ EOF fi if test $buildarmv8 -eq 1; then - check_armv8_compiler_flag + check_armv8_intrinsics - if test $ARMV8_AVAILABLE -eq 1; then + if test $HAVE_ARMV8_INTRIN -eq 1 || test $HAVE_ARMV8_INLINE_ASM -eq 1; then CFLAGS="${CFLAGS} -DARM_CRC32" SFLAGS="${SFLAGS} -DARM_CRC32" + if test $HAVE_ARMV8_INTRIN -eq 1; then + CFLAGS="${CFLAGS} -DARM_CRC32_INTRIN" + SFLAGS="${SFLAGS} -DARM_CRC32_INTRIN" + fi + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_armv8.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_armv8.lo" fi