From: Sebastian Pop Date: Fri, 25 Jan 2019 17:44:46 +0000 (-0600) Subject: ARM: check cpu feature once at init time X-Git-Tag: 1.9.9-b1~524 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1cd1b4eb0eddbe8c142d8a7223fde9bc212739dd;p=thirdparty%2Fzlib-ng.git ARM: check cpu feature once at init time This makes the checks for arm cpu features as inexpensive as on the x86 side by calling the runtime feature detection once in deflate/inflate init and then storing the result in a global variable. --- diff --git a/arch/aarch64/arm.h b/arch/aarch64/arm.h new file mode 100644 index 00000000..baee87f1 --- /dev/null +++ b/arch/aarch64/arm.h @@ -0,0 +1,13 @@ +/* arm.h -- check for ARM features. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ARM_H_ +#define ARM_H_ + +extern int arm_cpu_has_neon; +extern int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void); + +#endif /* ARM_H_ */ diff --git a/arch/aarch64/armfeature.c b/arch/aarch64/armfeature.c index 9f2af03f..39f185d4 100644 --- a/arch/aarch64/armfeature.c +++ b/arch/aarch64/armfeature.c @@ -1,9 +1,11 @@ +#include "zutil.h" + #if defined(__linux__) # include # include #endif -int arm_has_crc32() { +static int arm_has_crc32() { #if defined(__linux__) && defined(HWCAP_CRC32) return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0; #elif defined(ARM_NOCHECK_ACLE) @@ -13,7 +15,10 @@ int arm_has_crc32() { #endif } -int arm_has_neon() -{ - return 1; /* always available */ +ZLIB_INTERNAL int arm_cpu_has_neon; +ZLIB_INTERNAL int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void) { + arm_cpu_has_neon = 1; /* always available */ + arm_cpu_has_crc32 = arm_has_crc32(); } diff --git a/arch/arm/Makefile.in b/arch/arm/Makefile.in index 6fcf919a..34d27107 100644 --- a/arch/arm/Makefile.in +++ b/arch/arm/Makefile.in @@ -24,7 +24,7 @@ armfeature.o: $(SRCDIR)/armfeature.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c armfeature.lo: $(SRCDIR)/armfeature.c - $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c crc32_acle.o: $(SRCDIR)/crc32_acle.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c diff --git a/arch/arm/arm.h b/arch/arm/arm.h new file mode 100644 index 00000000..baee87f1 --- /dev/null +++ b/arch/arm/arm.h @@ -0,0 +1,13 @@ +/* arm.h -- check for ARM features. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ARM_H_ +#define ARM_H_ + +extern int arm_cpu_has_neon; +extern int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void); + +#endif /* ARM_H_ */ diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c index 7c78dda8..a06fd528 100644 --- a/arch/arm/armfeature.c +++ b/arch/arm/armfeature.c @@ -1,3 +1,5 @@ +#include "zutil.h" + #if defined(__linux__) # include # include @@ -5,7 +7,7 @@ # include #endif -int arm_has_crc32() { +static int arm_has_crc32() { #if defined(__linux__) && defined(HWCAP2_CRC32) return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0; #elif defined(ARM_NOCHECK_ACLE) @@ -15,7 +17,7 @@ int arm_has_crc32() { #endif } -int arm_has_neon() +static int arm_has_neon() { #if defined(__linux__) && defined(HWCAP_NEON) return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0; @@ -31,3 +33,11 @@ int arm_has_neon() return 0; #endif } + +ZLIB_INTERNAL int arm_cpu_has_neon; +ZLIB_INTERNAL int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void) { + arm_cpu_has_neon = arm_has_neon(); + arm_cpu_has_crc32 = arm_has_crc32(); +} diff --git a/arch/x86/x86.c b/arch/x86/x86.c index c04e0a79..382f72a2 100644 --- a/arch/x86/x86.c +++ b/arch/x86/x86.c @@ -8,7 +8,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "x86.h" +#include "zutil.h" #ifdef _MSC_VER #include diff --git a/arch/x86/x86.h b/arch/x86/x86.h index 9b06cc66..860e6415 100644 --- a/arch/x86/x86.h +++ b/arch/x86/x86.h @@ -6,14 +6,6 @@ #ifndef CPU_H_ #define CPU_H_ -#if defined(HAVE_INTERNAL) -# define ZLIB_INTERNAL __attribute__((visibility ("internal"))) -#elif defined(HAVE_HIDDEN) -# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - extern int x86_cpu_has_sse2; extern int x86_cpu_has_sse42; extern int x86_cpu_has_pclmulqdq; diff --git a/deflate.c b/deflate.c index 7275120c..63d67478 100644 --- a/deflate.c +++ b/deflate.c @@ -236,6 +236,8 @@ int ZEXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int level, int method, #ifdef X86_CPUID x86_check_features(); +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + arm_check_features(); #endif if (version == NULL || version[0] != my_version[0] || stream_size != sizeof(PREFIX3(stream))) { diff --git a/deflate_p.h b/deflate_p.h index 6008a970..69a7c82a 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -9,10 +9,6 @@ #ifndef DEFLATE_P_H #define DEFLATE_P_H -#if defined(X86_CPUID) -# include "arch/x86/x86.h" -#endif - /* Forward declare common non-inlined functions declared in deflate.c */ #ifdef ZLIB_DEBUG diff --git a/functable.c b/functable.c index 93fbd23d..91a2a206 100644 --- a/functable.c +++ b/functable.c @@ -10,14 +10,6 @@ #include "gzendian.h" -#if defined(X86_CPUID) -# include "arch/x86/x86.h" -#elif (defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)) -extern int arm_has_crc32(); -extern int arm_has_neon(); -#endif - - /* insert_string */ #ifdef X86_SSE4_2_CRC_HASH extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count); @@ -69,7 +61,7 @@ ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsi if (x86_cpu_has_sse42) functable.insert_string=&insert_string_sse; #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) - if (arm_has_crc32()) + if (arm_cpu_has_crc32) functable.insert_string=&insert_string_acle; #endif @@ -97,7 +89,7 @@ ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, si functable.adler32=&adler32_c; #if ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)) - if (arm_has_neon()) + if (arm_cpu_has_neon) functable.adler32=&adler32_neon; #endif @@ -120,7 +112,7 @@ ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64 #if BYTE_ORDER == LITTLE_ENDIAN functable.crc32=crc32_little; # if __ARM_FEATURE_CRC32 && defined(ARM_ACLE_CRC_HASH) - if (arm_has_crc32()) + if (arm_cpu_has_crc32) functable.crc32=crc32_acle; # endif #elif BYTE_ORDER == BIG_ENDIAN diff --git a/inflate.c b/inflate.c index d17bc274..c4241cc6 100644 --- a/inflate.c +++ b/inflate.c @@ -189,6 +189,12 @@ int ZEXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int windowBits, const c int ret; struct inflate_state *state; +#ifdef X86_CPUID + x86_check_features(); +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + arm_check_features(); +#endif + if (version == NULL || version[0] != PREFIX2(VERSION)[0] || stream_size != (int)(sizeof(PREFIX3(stream)))) return Z_VERSION_ERROR; if (strm == NULL) diff --git a/zutil.h b/zutil.h index 6cb2bad6..e80cc316 100644 --- a/zutil.h +++ b/zutil.h @@ -116,6 +116,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* provide prototypes for these when building zlib without LFS */ #if !defined(WIN32) && !defined(__MSYS__) && (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) +# include "zbuild.h" /* For PREFIX() */ ZEXTERN uint32_t ZEXPORT PREFIX(adler32_combine64)(uint32_t, uint32_t, z_off_t); ZEXTERN uint32_t ZEXPORT PREFIX(crc32_combine64)(uint32_t, uint32_t, z_off_t); #endif @@ -245,4 +246,12 @@ void ZLIB_INTERNAL zcfree(void *opaque, void *ptr); #define MEMSET memset #endif +#if defined(X86_CPUID) +# include "arch/x86/x86.h" +#elif defined(__aarch64__) +# include "arch/aarch64/arm.h" +#elif defined(__arm__) || defined(_M_ARM) +# include "arch/arm/arm.h" +#endif + #endif /* ZUTIL_H_ */