]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
[ARM/AArch64] Add run-time detection of ACLE and NEON instructions under Linux.
authorMika Lindqvist <postmaster@raasu.org>
Tue, 13 Mar 2018 09:26:19 +0000 (11:26 +0200)
committerHans Kristian Rosbach <hk-git@circlestorm.org>
Wed, 12 Dec 2018 14:24:17 +0000 (15:24 +0100)
* Use getauxval() to check support for ACLE CRC32 instructions
* Allow disabling CRC32 instruction check

CMakeLists.txt
arch/aarch64/Makefile.in
arch/aarch64/armfeature.c [new file with mode: 0644]
arch/aarch64/insert_string_acle.c
arch/arm/Makefile.in
arch/arm/armfeature.c [new file with mode: 0644]
arch/arm/insert_string_acle.c
configure
functable.c
win32/Makefile.arm

index 3f5cd005d921ab4e1ca6685219f9128cc1e439d2..eed40c204a680e5b895e50d044ffb04122d029b2 100644 (file)
@@ -478,7 +478,7 @@ else()
     message(STATUS "No optimized architecture: using ${ARCHDIR}")
 endif()
 if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64")
-    set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/fill_window_arm.c)
+    set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/armfeature.c ${ARCHDIR}/fill_window_arm.c)
 endif()
 if(WITH_OPTIM)
     if("${ARCH}" MATCHES "arm")
index 3c2bebc0a33ef2a961245d6224d4420bab8eb1aa..6fcf919a93cd1ce807c3e319ae91e188f65e1a1b 100644 (file)
@@ -12,7 +12,7 @@ SRCDIR=.
 SRCTOP=../..
 TOPDIR=$(SRCTOP)
 
-all: adler32_neon.o adler32_neon.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
+all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
 
 adler32_neon.o: $(SRCDIR)/adler32_neon.c
        $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
@@ -20,6 +20,12 @@ adler32_neon.o: $(SRCDIR)/adler32_neon.c
 adler32_neon.lo: $(SRCDIR)/adler32_neon.c
        $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
 
+armfeature.o: $(SRCDIR)/armfeature.c
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
+
+armfeature.lo: $(SRCDIR)/armfeature.c
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
+
 crc32_acle.o: $(SRCDIR)/crc32_acle.c
        $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
 
diff --git a/arch/aarch64/armfeature.c b/arch/aarch64/armfeature.c
new file mode 100644 (file)
index 0000000..9f2af03
--- /dev/null
@@ -0,0 +1,19 @@
+#if defined(__linux__)
+# include <sys/auxv.h>
+# include <asm/hwcap.h>
+#endif
+
+int arm_has_crc32() {
+#if defined(__linux__) && defined(HWCAP_CRC32)
+  return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0;
+#elif defined(ARM_NOCHECK_ACLE)
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+int arm_has_neon()
+{
+  return 1; /* always available */
+}
index 49f11cb35369c4ab06ff62107e2ad823d5e38650..563100b7105db77f14c9bdc92b926b3bb984c890 100644 (file)
@@ -5,10 +5,9 @@
  *
  */
 
-#include "zbuild.h"
-#ifdef __ARM_FEATURE_CRC32
+#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
 #include <arm_acle.h>
-#endif
+#include "zbuild.h"
 #include "deflate.h"
 
 /* ===========================================================================
@@ -19,7 +18,6 @@
  *    input characters and the first MIN_MATCH bytes of str are valid
  *    (except for the last MIN_MATCH-1 bytes of the input file).
  */
-#ifdef ARM_ACLE_CRC_HASH
 Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
     Pos p, lp, ret;
 
index 3c2bebc0a33ef2a961245d6224d4420bab8eb1aa..6fcf919a93cd1ce807c3e319ae91e188f65e1a1b 100644 (file)
@@ -12,7 +12,7 @@ SRCDIR=.
 SRCTOP=../..
 TOPDIR=$(SRCTOP)
 
-all: adler32_neon.o adler32_neon.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
+all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
 
 adler32_neon.o: $(SRCDIR)/adler32_neon.c
        $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
@@ -20,6 +20,12 @@ adler32_neon.o: $(SRCDIR)/adler32_neon.c
 adler32_neon.lo: $(SRCDIR)/adler32_neon.c
        $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
 
+armfeature.o: $(SRCDIR)/armfeature.c
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
+
+armfeature.lo: $(SRCDIR)/armfeature.c
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
+
 crc32_acle.o: $(SRCDIR)/crc32_acle.c
        $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
 
diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c
new file mode 100644 (file)
index 0000000..7c78dda
--- /dev/null
@@ -0,0 +1,33 @@
+#if defined(__linux__)
+# include <sys/auxv.h>
+# include <asm/hwcap.h>
+#elif defined(_WIN32)
+# include <winapifamily.h>
+#endif
+
+int arm_has_crc32() {
+#if defined(__linux__) && defined(HWCAP2_CRC32)
+  return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
+#elif defined(ARM_NOCHECK_ACLE)
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+int arm_has_neon()
+{
+#if defined(__linux__) && defined(HWCAP_NEON)
+  return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
+#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
+# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
+  return 1; /* Always supported */
+# endif
+#endif
+
+#if defined(ARM_NOCHECK_NEON)
+  return 1;
+#else
+  return 0;
+#endif
+}
index 49f11cb35369c4ab06ff62107e2ad823d5e38650..563100b7105db77f14c9bdc92b926b3bb984c890 100644 (file)
@@ -5,10 +5,9 @@
  *
  */
 
-#include "zbuild.h"
-#ifdef __ARM_FEATURE_CRC32
+#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
 #include <arm_acle.h>
-#endif
+#include "zbuild.h"
 #include "deflate.h"
 
 /* ===========================================================================
@@ -19,7 +18,6 @@
  *    input characters and the first MIN_MATCH bytes of str are valid
  *    (except for the last MIN_MATCH-1 bytes of the input file).
  */
-#ifdef ARM_ACLE_CRC_HASH
 Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
     Pos p, lp, ret;
 
index bcffbd6e27482d0e8c69dfd49bc29b9c97e70da2..61cde7841d389b4ac40b45c243fed1970a9efd11 100755 (executable)
--- a/configure
+++ b/configure
@@ -1010,8 +1010,8 @@ case "${ARCH}" in
     arm | armv[3467]l | armv4b | armv4tl | armv5tel | armv5tejl | armv[67]hl | armv7hnl | armv[78]-a | armv8-a+* | armv8.[1234]-a | armv8.[1234]-a+*)
         [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=arm
         ARCHDIR=arch/arm
-        ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_arm.o"
-        ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_arm.lo"
+        ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o"
+        ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo"
 
         GCC_MACHINE=$(${CC} -dumpmachine)
         case "${GCC_MACHINE}" in
@@ -1115,8 +1115,8 @@ case "${ARCH}" in
     aarch64)
         [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=aarch64
         ARCHDIR=arch/aarch64
-        ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_arm.o"
-        ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_arm.lo"
+        ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o"
+        ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo"
 
         if test $native -eq 0; then
           ARCH="armv8-a"
index e0f0e40ce8dae152a090f13efd6280626f0bdc95..93fbd23d4d03e94cf43aca6cd111895cacaacb61 100644 (file)
@@ -12,6 +12,9 @@
 
 #if defined(X86_CPUID)
 # include "arch/x86/x86.h"
+#elif (defined(__arm__) || defined(__aarch64__) || defined(_M_ARM))
+extern int arm_has_crc32();
+extern int arm_has_neon();
 #endif
 
 
@@ -65,7 +68,8 @@ ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsi
     #ifdef X86_SSE4_2_CRC_HASH
     if (x86_cpu_has_sse42)
         functable.insert_string=&insert_string_sse;
-    #elif defined(ARM_ACLE_CRC_HASH)
+    #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
+    if (arm_has_crc32())
         functable.insert_string=&insert_string_acle;
     #endif
 
@@ -93,6 +97,7 @@ ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, si
     functable.adler32=&adler32_c;
 
     #if ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32))
+    if (arm_has_neon())
         functable.adler32=&adler32_neon;
     #endif
 
@@ -113,10 +118,10 @@ ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64
 
     if (sizeof(void *) == sizeof(ptrdiff_t)) {
 #if BYTE_ORDER == LITTLE_ENDIAN
-#  if __ARM_FEATURE_CRC32
+      functable.crc32=crc32_little;
+#  if __ARM_FEATURE_CRC32 && defined(ARM_ACLE_CRC_HASH)
+      if (arm_has_crc32())
         functable.crc32=crc32_acle;
-#  else
-        functable.crc32=crc32_little;
 #  endif
 #elif BYTE_ORDER == BIG_ENDIAN
         functable.crc32=crc32_big;
index 2ffb725cdad21646b0151697b962a340717c38a0..656e4cf34adec6cbaf0fed3667770d4f81dcdd78 100644 (file)
@@ -37,7 +37,7 @@ WITH_VFPV3 =
 NEON_ARCH = /arch:VFPv4
 SUFFIX =
 
-OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_slow.obj \
+OBJS = adler32.obj armfeature.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_slow.obj \
        functable.obj infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj fill_window_arm.obj
 !if "$(WITH_GZFILEOP)" != ""
 WFLAGS = $(WFLAGS) -DWITH_GZFILEOP
@@ -61,7 +61,7 @@ NEON_ARCH = /arch:VFPv3
 !endif
 !if "$(WITH_NEON)" != ""
 CFLAGS = $(CFLAGS) $(NEON_ARCH)
-WFLAGS = $(WFLAGS) -D__ARM_NEON__=1
+WFLAGS = $(WFLAGS) -D__ARM_NEON__=1 -DARM_NEON_ADLER32 -DARM_NOCHECK_NEON
 OBJS = $(OBJS) adler32_neon.obj
 !endif