From: Nathan Moinvaziri Date: Wed, 4 Sep 2019 06:35:23 +0000 (-0700) Subject: Changes to support compilation with MSVC ARM & ARM64 (#386) X-Git-Tag: 1.9.9-b1~445 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ce0076688a7eff8c7991234f5024386cf9ac944c;p=thirdparty%2Fzlib-ng.git Changes to support compilation with MSVC ARM & ARM64 (#386) * Merge aarch64 and arm cmake sections. * Updated MSVC compiler support for ARM and ARM64. * Moved detection for -mfpu=neon to where the flag is set to simplify add_intrinsics_option. * Only add ${ACLEFLAG} on aarch64 if not WITH_NEON. * Rename arch/x86/ctzl.h to fallback_builtins.h. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 2122d6d4..49a5f927 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,16 @@ if(CMAKE_OSX_ARCHITECTURES) # If multiple architectures are requested (universal build), pick only the first list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH) else() - set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) + if (MSVC) + if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7") + set(ARCH "arm") + elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64") + set(ARCH "aarch64") + endif() + endif() + if(NOT ARCH) + set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) + endif() endif() message(STATUS "Architecture: ${ARCH}") if(CMAKE_TOOLCHAIN_FILE) @@ -156,7 +165,7 @@ elseif(MSVC) set(SSE2FLAG "/arch:SSE2") endif() if("${ARCH}" MATCHES "arm") - add_definitions("-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1") + add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) set(NEONFLAG "/arch:VFPv4") endif() if(WITH_NATIVE_INSTRUCTIONS) @@ -174,13 +183,27 @@ else() message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration") endif() endif() - if(__GNUC__ AND "${ARCH}" MATCHES "arm") - execute_process(COMMAND ${CMAKE_C_COMPILER} "-dumpmachine" - OUTPUT_VARIABLE GCC_MACHINE) - if ("${GCC_MACHINE}" MATCHES "eabihf") - set(FLOATABI "-mfloat-abi=hard") + # Check support for ARM floating point + if("${ARCH}" MATCHES "arm") + if (__GNUC__) + execute_process(COMMAND ${CMAKE_C_COMPILER} "-dumpmachine" + OUTPUT_VARIABLE GCC_MACHINE) + if ("${GCC_MACHINE}" MATCHES "eabihf") + set(FLOATABI "-mfloat-abi=hard") + else() + set(FLOATABI "-mfloat-abi=softfp") + endif() + endif() + # Check whether -mfpu=neon is available + set(CMAKE_REQUIRED_FLAGS "-mfpu=neon") + check_c_source_compiles( + "int main() { return 0; }" + MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") + set(CMAKE_REQUIRED_FLAGS) + if(MFPU_NEON_AVAILABLE) + set(NEONFLAG "${FLOATABI} -mfpu=neon") else() - set(FLOATABI "-mfloat-abi=softfp") + set(NEONFLAG "${FLOATABI}") endif() endif() if(NOT NATIVEFLAG) @@ -201,7 +224,6 @@ else() endif() if("${ARCH}" MATCHES "arm") set(ACLEFLAG "-march=armv8-a+crc") - set(NEONFLAG "${FLOATABI} -mfpu=neon") elseif("${ARCH}" MATCHES "aarch64") set(ACLEFLAG "-march=armv8-a+crc") set(NEONFLAG "-march=armv8-a+crc+simd") @@ -212,7 +234,6 @@ else() set(PCLMULFLAG ${NATIVEFLAG}) if("${ARCH}" MATCHES "arm") set(ACLEFLAG "${NATIVEFLAG}") - set(NEONFLAG "${FLOATABI} -mfpu=neon") elseif("${ARCH}" MATCHES "aarch64") set(ACLEFLAG "${NATIVEFLAG}") set(NEONFLAG "${NATIVEFLAG}") @@ -483,16 +504,6 @@ else() endif() set(CMAKE_REQUIRED_FLAGS) -# Check whether -mfpu=neon is available -set(CMAKE_REQUIRED_FLAGS "-mfpu=neon") -check_c_source_compiles( - "int main() - { - return 0; - }" - MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") -set(CMAKE_REQUIRED_FLAGS) - # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true if("${ARCH}" MATCHES "i[3-6]86") cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF) @@ -542,39 +553,25 @@ if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/armfeature.c ${ARCHDIR}/fill_window_arm.c) endif() if(WITH_OPTIM) - if("${ARCH}" MATCHES "arm") - if(WITH_ACLE) - set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}") - add_definitions("-DARM_ACLE_CRC_HASH") - add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"") - endif() + if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") if(WITH_NEON) - if(MFPU_NEON_AVAILABLE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NEONFLAG}") - endif() - add_definitions("-DARM_NEON_ADLER32") - if(MSVC) - add_definitions("-D__ARM_NEON__=1") - endif(MSVC) set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/adler32_neon.c) + add_definitions(-DARM_NEON_ADLER32) + add_intrinsics_option("${NEONFLAG}") + if(MSVC) + add_definitions(-D__ARM_NEON__) + endif() add_feature_info(NEON_FILLWINDOW 1 "Support NEON instructions in fill_window_arm, using \"${NEONFLAG}\"") endif() - elseif("${ARCH}" MATCHES "aarch64") if(WITH_ACLE) set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) - add_definitions("-DARM_ACLE_CRC_HASH") + add_definitions(-DARM_ACLE_CRC_HASH) + # For ARM aarch64, we need to check WITH_NEON first + if("${ARCH}" MATCHES "arm" OR NOT WITH_NEON) + add_intrinsics_option("${ACLEFLAG}") + endif() add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"") endif() - # We need to check WITH_NEON first - if(WITH_NEON) - add_definitions("-DARM_NEON_ADLER32") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NEONFLAG}") - set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/adler32_neon.c) - add_feature_info(NEON_FILLWINDOW 1 "Support NEON instructions in fill_window_arm, using \"${NEONFLAG}\"") - elseif(WITH_ACLE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}") - endif() elseif("${ARCHDIR}" MATCHES "arch/x86") add_definitions("-DX86_CPUID") set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c) diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c index bc77672e..71973a1e 100644 --- a/arch/arm/adler32_neon.c +++ b/arch/arm/adler32_neon.c @@ -109,7 +109,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { for (i = 0; i < len; i += n) { if ((i + n) > len) - n = len - i; + n = (int)(len - i); if (n < 16) break; diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c index 0cec748c..7c4758b6 100644 --- a/arch/arm/armfeature.c +++ b/arch/arm/armfeature.c @@ -18,7 +18,7 @@ static int arm_has_crc32() { } /* AArch64 has neon. */ -#if !defined(__aarch64__) +#if !defined(__aarch64__) && !defined(_M_ARM64) static inline int arm_has_neon() { #if defined(__linux__) && defined(HWCAP_NEON) @@ -41,7 +41,7 @@ ZLIB_INTERNAL int arm_cpu_has_neon; ZLIB_INTERNAL int arm_cpu_has_crc32; void ZLIB_INTERNAL arm_check_features(void) { -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(_M_ARM64) arm_cpu_has_neon = 1; /* always available */ #else arm_cpu_has_neon = arm_has_neon(); diff --git a/deflate.c b/deflate.c index d13dfe2a..5380b962 100644 --- a/deflate.c +++ b/deflate.c @@ -269,7 +269,7 @@ int ZEXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int level, int method, #ifdef X86_CPUID x86_check_features(); -#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) arm_check_features(); #endif diff --git a/arch/x86/ctzl.h b/fallback_builtins.h similarity index 95% rename from arch/x86/ctzl.h rename to fallback_builtins.h index bc9e9bd5..4cdace3c 100644 --- a/arch/x86/ctzl.h +++ b/fallback_builtins.h @@ -3,7 +3,7 @@ #include #ifdef X86_CPUID -# include "x86.h" +# include "arch/x86/x86.h" #endif #if defined(_MSC_VER) && !defined(__clang__) diff --git a/functable.c b/functable.c index 587abfec..b3020e00 100644 --- a/functable.c +++ b/functable.c @@ -19,7 +19,7 @@ extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned in /* fill_window */ #ifdef X86_SSE2 extern void fill_window_sse(deflate_state *s); -#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) extern void fill_window_arm(deflate_state *s); #endif @@ -81,7 +81,7 @@ ZLIB_INTERNAL void fill_window_stub(deflate_state *s) { if (x86_cpu_has_sse2) # endif functable.fill_window=&fill_window_sse; - #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) functable.fill_window=&fill_window_arm; #endif diff --git a/inflate.c b/inflate.c index ed544ba9..fae8847d 100644 --- a/inflate.c +++ b/inflate.c @@ -132,7 +132,7 @@ int ZEXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int windowBits, const c #ifdef X86_CPUID x86_check_features(); -#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) arm_check_features(); #endif diff --git a/match_p.h b/match_p.h index 6dda122b..258e73e0 100644 --- a/match_p.h +++ b/match_p.h @@ -33,10 +33,8 @@ #if defined(_MSC_VER) && !defined(__clang__) -# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) -# include "arch/x86/ctzl.h" -# elif defined(_M_ARM) -# include "arch/arm/ctzl.h" +# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) +# include "fallback_builtins.h" # endif #endif diff --git a/memcopy.h b/memcopy.h index c322deb5..b95ab29d 100644 --- a/memcopy.h +++ b/memcopy.h @@ -18,7 +18,7 @@ static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { #endif } - #if (defined(__GNUC__) || defined(__clang__)) && (defined(__ARM_NEON__) || defined(__ARM_NEON)) + #if defined(__ARM_NEON__) || defined(__ARM_NEON) #include typedef uint8x16_t inffast_chunk_t; #define INFFAST_CHUNKSIZE sizeof(inffast_chunk_t) @@ -199,7 +199,7 @@ static inline unsigned char *chunkmemset_3(unsigned char *out, unsigned char *fr } #endif - #if defined(__aarch64__) + #if defined(__aarch64__) || defined(_M_ARM64) static inline unsigned char *chunkmemset_6(unsigned char *out, unsigned char *from, unsigned dist, unsigned len) { uint16x8x3_t chunks; unsigned sz = sizeof(chunks); @@ -265,7 +265,7 @@ static inline unsigned char *chunkmemset(unsigned char *out, unsigned dist, unsi chunk = chunkmemset_4(from); break; } - #if defined(__aarch64__) + #if defined(__aarch64__) || defined(_M_ARM64) case 6: return chunkmemset_6(out, from, dist, len); #endif diff --git a/zendian.h b/zendian.h index 50870340..5921abbe 100644 --- a/zendian.h +++ b/zendian.h @@ -27,7 +27,7 @@ #elif defined(WIN32) || defined(_WIN32) # define LITTLE_ENDIAN 1234 # define BIG_ENDIAN 4321 -# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) +# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) || defined (_M_ARM64) # define BYTE_ORDER LITTLE_ENDIAN # else # error Unknown endianness! diff --git a/zutil.h b/zutil.h index fe139939..92f47063 100644 --- a/zutil.h +++ b/zutil.h @@ -240,7 +240,7 @@ void ZLIB_INTERNAL zng_cfree(void *opaque, void *ptr); #if defined(X86_CPUID) # include "arch/x86/x86.h" -#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) +#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64) # include "arch/arm/arm.h" #endif