From: Sebastian Pop Date: Fri, 25 Jan 2019 17:44:46 +0000 (-0600) Subject: ARM: check cpu feature once at init time X-Git-Tag: 1.9.9-b1~524 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1cd1b4eb0eddbe8c142d8a7223fde9bc212739dd;p=thirdparty%2Fzlib-ng.git ARM: check cpu feature once at init time This makes the checks for arm cpu features as inexpensive as on the x86 side by calling the runtime feature detection once in deflate/inflate init and then storing the result in a global variable. --- diff --git a/arch/aarch64/arm.h b/arch/aarch64/arm.h new file mode 100644 index 000000000..baee87f18 --- /dev/null +++ b/arch/aarch64/arm.h @@ -0,0 +1,13 @@ +/* arm.h -- check for ARM features. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ARM_H_ +#define ARM_H_ + +extern int arm_cpu_has_neon; +extern int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void); + +#endif /* ARM_H_ */ diff --git a/arch/aarch64/armfeature.c b/arch/aarch64/armfeature.c index 9f2af03f9..39f185d46 100644 --- a/arch/aarch64/armfeature.c +++ b/arch/aarch64/armfeature.c @@ -1,9 +1,11 @@ +#include "zutil.h" + #if defined(__linux__) # include # include #endif -int arm_has_crc32() { +static int arm_has_crc32() { #if defined(__linux__) && defined(HWCAP_CRC32) return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0; #elif defined(ARM_NOCHECK_ACLE) @@ -13,7 +15,10 @@ int arm_has_crc32() { #endif } -int arm_has_neon() -{ - return 1; /* always available */ +ZLIB_INTERNAL int arm_cpu_has_neon; +ZLIB_INTERNAL int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void) { + arm_cpu_has_neon = 1; /* always available */ + arm_cpu_has_crc32 = arm_has_crc32(); } diff --git a/arch/arm/Makefile.in b/arch/arm/Makefile.in index 6fcf919a9..34d27107f 100644 --- a/arch/arm/Makefile.in +++ b/arch/arm/Makefile.in @@ -24,7 +24,7 @@ armfeature.o: $(SRCDIR)/armfeature.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c armfeature.lo: $(SRCDIR)/armfeature.c - $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c crc32_acle.o: $(SRCDIR)/crc32_acle.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c diff --git a/arch/arm/arm.h b/arch/arm/arm.h new file mode 100644 index 000000000..baee87f18 --- /dev/null +++ b/arch/arm/arm.h @@ -0,0 +1,13 @@ +/* arm.h -- check for ARM features. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ARM_H_ +#define ARM_H_ + +extern int arm_cpu_has_neon; +extern int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void); + +#endif /* ARM_H_ */ diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c index 7c78dda81..a06fd5280 100644 --- a/arch/arm/armfeature.c +++ b/arch/arm/armfeature.c @@ -1,3 +1,5 @@ +#include "zutil.h" + #if defined(__linux__) # include # include @@ -5,7 +7,7 @@ # include #endif -int arm_has_crc32() { +static int arm_has_crc32() { #if defined(__linux__) && defined(HWCAP2_CRC32) return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0; #elif defined(ARM_NOCHECK_ACLE) @@ -15,7 +17,7 @@ int arm_has_crc32() { #endif } -int arm_has_neon() +static int arm_has_neon() { #if defined(__linux__) && defined(HWCAP_NEON) return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0; @@ -31,3 +33,11 @@ int arm_has_neon() return 0; #endif } + +ZLIB_INTERNAL int arm_cpu_has_neon; +ZLIB_INTERNAL int arm_cpu_has_crc32; + +void ZLIB_INTERNAL arm_check_features(void) { + arm_cpu_has_neon = arm_has_neon(); + arm_cpu_has_crc32 = arm_has_crc32(); +} diff --git a/arch/x86/x86.c b/arch/x86/x86.c index c04e0a79a..382f72a2b 100644 --- a/arch/x86/x86.c +++ b/arch/x86/x86.c @@ -8,7 +8,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "x86.h" +#include "zutil.h" #ifdef _MSC_VER #include diff --git a/arch/x86/x86.h b/arch/x86/x86.h index 9b06cc665..860e64154 100644 --- a/arch/x86/x86.h +++ b/arch/x86/x86.h @@ -6,14 +6,6 @@ #ifndef CPU_H_ #define CPU_H_ -#if defined(HAVE_INTERNAL) -# define ZLIB_INTERNAL __attribute__((visibility ("internal"))) -#elif defined(HAVE_HIDDEN) -# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - extern int x86_cpu_has_sse2; extern int x86_cpu_has_sse42; extern int x86_cpu_has_pclmulqdq; diff --git a/deflate.c b/deflate.c index 7275120c1..63d674780 100644 --- a/deflate.c +++ b/deflate.c @@ -236,6 +236,8 @@ int ZEXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int level, int method, #ifdef X86_CPUID x86_check_features(); +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + arm_check_features(); #endif if (version == NULL || version[0] != my_version[0] || stream_size != sizeof(PREFIX3(stream))) { diff --git a/deflate_p.h b/deflate_p.h index 6008a970d..69a7c82a0 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -9,10 +9,6 @@ #ifndef DEFLATE_P_H #define DEFLATE_P_H -#if defined(X86_CPUID) -# include "arch/x86/x86.h" -#endif - /* Forward declare common non-inlined functions declared in deflate.c */ #ifdef ZLIB_DEBUG diff --git a/functable.c b/functable.c index 93fbd23d4..91a2a2061 100644 --- a/functable.c +++ b/functable.c @@ -10,14 +10,6 @@ #include "gzendian.h" -#if defined(X86_CPUID) -# include "arch/x86/x86.h" -#elif (defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)) -extern int arm_has_crc32(); -extern int arm_has_neon(); -#endif - - /* insert_string */ #ifdef X86_SSE4_2_CRC_HASH extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count); @@ -69,7 +61,7 @@ ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsi if (x86_cpu_has_sse42) functable.insert_string=&insert_string_sse; #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) - if (arm_has_crc32()) + if (arm_cpu_has_crc32) functable.insert_string=&insert_string_acle; #endif @@ -97,7 +89,7 @@ ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, si functable.adler32=&adler32_c; #if ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)) - if (arm_has_neon()) + if (arm_cpu_has_neon) functable.adler32=&adler32_neon; #endif @@ -120,7 +112,7 @@ ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64 #if BYTE_ORDER == LITTLE_ENDIAN functable.crc32=crc32_little; # if __ARM_FEATURE_CRC32 && defined(ARM_ACLE_CRC_HASH) - if (arm_has_crc32()) + if (arm_cpu_has_crc32) functable.crc32=crc32_acle; # endif #elif BYTE_ORDER == BIG_ENDIAN diff --git a/inflate.c b/inflate.c index d17bc274d..c4241cc67 100644 --- a/inflate.c +++ b/inflate.c @@ -189,6 +189,12 @@ int ZEXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int windowBits, const c int ret; struct inflate_state *state; +#ifdef X86_CPUID + x86_check_features(); +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + arm_check_features(); +#endif + if (version == NULL || version[0] != PREFIX2(VERSION)[0] || stream_size != (int)(sizeof(PREFIX3(stream)))) return Z_VERSION_ERROR; if (strm == NULL) diff --git a/zutil.h b/zutil.h index 6cb2bad6d..e80cc3160 100644 --- a/zutil.h +++ b/zutil.h @@ -116,6 +116,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* provide prototypes for these when building zlib without LFS */ #if !defined(WIN32) && !defined(__MSYS__) && (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) +# include "zbuild.h" /* For PREFIX() */ ZEXTERN uint32_t ZEXPORT PREFIX(adler32_combine64)(uint32_t, uint32_t, z_off_t); ZEXTERN uint32_t ZEXPORT PREFIX(crc32_combine64)(uint32_t, uint32_t, z_off_t); #endif @@ -245,4 +246,12 @@ void ZLIB_INTERNAL zcfree(void *opaque, void *ptr); #define MEMSET memset #endif +#if defined(X86_CPUID) +# include "arch/x86/x86.h" +#elif defined(__aarch64__) +# include "arch/aarch64/arm.h" +#elif defined(__arm__) || defined(_M_ARM) +# include "arch/arm/arm.h" +#endif + #endif /* ZUTIL_H_ */