]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Changes to support compilation with MSVC ARM & ARM64 (#386)
authorNathan Moinvaziri <nathan@solidstatenetworks.com>
Wed, 4 Sep 2019 06:35:23 +0000 (23:35 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 4 Sep 2019 06:35:23 +0000 (08:35 +0200)
* Merge aarch64 and arm cmake sections.
* Updated MSVC compiler support for ARM and ARM64.
* Moved detection for -mfpu=neon to where the flag is set to simplify add_intrinsics_option.
* Only add ${ACLEFLAG} on aarch64 if not WITH_NEON.
* Rename arch/x86/ctzl.h to fallback_builtins.h.

CMakeLists.txt
arch/arm/adler32_neon.c
arch/arm/armfeature.c
deflate.c
fallback_builtins.h [moved from arch/x86/ctzl.h with 95% similarity]
functable.c
inflate.c
match_p.h
memcopy.h
zendian.h
zutil.h

index 2122d6d48d727e52c1a21a425acebb92cd06a9e1..49a5f92703eb5cbd41252306cb9a026ee91038f3 100644 (file)
@@ -72,7 +72,16 @@ if(CMAKE_OSX_ARCHITECTURES)
     # If multiple architectures are requested (universal build), pick only the first
     list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH)
 else()
-    set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
+    if (MSVC)
+        if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7")
+            set(ARCH "arm")
+        elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64")
+            set(ARCH "aarch64")
+        endif()
+    endif()
+    if(NOT ARCH)
+        set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
+    endif()
 endif()
 message(STATUS "Architecture: ${ARCH}")
 if(CMAKE_TOOLCHAIN_FILE)
@@ -156,7 +165,7 @@ elseif(MSVC)
         set(SSE2FLAG "/arch:SSE2")
     endif()
     if("${ARCH}" MATCHES "arm")
-        add_definitions("-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1")
+        add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE)
         set(NEONFLAG "/arch:VFPv4")
     endif()
     if(WITH_NATIVE_INSTRUCTIONS)
@@ -174,13 +183,27 @@ else()
             message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
         endif()
     endif()
-    if(__GNUC__ AND "${ARCH}" MATCHES "arm")
-        execute_process(COMMAND ${CMAKE_C_COMPILER} "-dumpmachine"
-                        OUTPUT_VARIABLE GCC_MACHINE)
-        if ("${GCC_MACHINE}" MATCHES "eabihf")
-            set(FLOATABI "-mfloat-abi=hard")
+    # Check support for ARM floating point
+    if("${ARCH}" MATCHES "arm")
+        if (__GNUC__)
+            execute_process(COMMAND ${CMAKE_C_COMPILER} "-dumpmachine"
+                            OUTPUT_VARIABLE GCC_MACHINE)
+            if ("${GCC_MACHINE}" MATCHES "eabihf")
+                set(FLOATABI "-mfloat-abi=hard")
+            else()
+                set(FLOATABI "-mfloat-abi=softfp")
+            endif()
+        endif()
+        # Check whether -mfpu=neon is available
+        set(CMAKE_REQUIRED_FLAGS "-mfpu=neon")
+        check_c_source_compiles(
+            "int main() { return 0; }"
+            MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
+        set(CMAKE_REQUIRED_FLAGS)
+        if(MFPU_NEON_AVAILABLE)
+            set(NEONFLAG "${FLOATABI} -mfpu=neon")
         else()
-            set(FLOATABI "-mfloat-abi=softfp")
+            set(NEONFLAG "${FLOATABI}")
         endif()
     endif()
     if(NOT NATIVEFLAG)
@@ -201,7 +224,6 @@ else()
         endif()
         if("${ARCH}" MATCHES "arm")
             set(ACLEFLAG "-march=armv8-a+crc")
-            set(NEONFLAG "${FLOATABI} -mfpu=neon")
         elseif("${ARCH}" MATCHES "aarch64")
             set(ACLEFLAG "-march=armv8-a+crc")
             set(NEONFLAG "-march=armv8-a+crc+simd")
@@ -212,7 +234,6 @@ else()
         set(PCLMULFLAG ${NATIVEFLAG})
         if("${ARCH}" MATCHES "arm")
             set(ACLEFLAG "${NATIVEFLAG}")
-            set(NEONFLAG "${FLOATABI} -mfpu=neon")
         elseif("${ARCH}" MATCHES "aarch64")
             set(ACLEFLAG "${NATIVEFLAG}")
             set(NEONFLAG "${NATIVEFLAG}")
@@ -483,16 +504,6 @@ else()
 endif()
 set(CMAKE_REQUIRED_FLAGS)
 
-# Check whether -mfpu=neon is available
-set(CMAKE_REQUIRED_FLAGS "-mfpu=neon")
-check_c_source_compiles(
-    "int main()
-    {
-      return 0;
-    }"
-    MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
-set(CMAKE_REQUIRED_FLAGS)
-
 # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true
 if("${ARCH}" MATCHES "i[3-6]86")
     cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
@@ -542,39 +553,25 @@ if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64")
     set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/armfeature.c ${ARCHDIR}/fill_window_arm.c)
 endif()
 if(WITH_OPTIM)
-    if("${ARCH}" MATCHES "arm")
-        if(WITH_ACLE)
-            set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
-            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}")
-            add_definitions("-DARM_ACLE_CRC_HASH")
-            add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"")
-        endif()
+    if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64")
         if(WITH_NEON)
-            if(MFPU_NEON_AVAILABLE)
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NEONFLAG}")
-            endif()
-            add_definitions("-DARM_NEON_ADLER32")
-            if(MSVC)
-                add_definitions("-D__ARM_NEON__=1")
-            endif(MSVC)
             set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/adler32_neon.c)
+            add_definitions(-DARM_NEON_ADLER32)
+            add_intrinsics_option("${NEONFLAG}")
+            if(MSVC)
+                add_definitions(-D__ARM_NEON__)
+            endif()
             add_feature_info(NEON_FILLWINDOW 1 "Support NEON instructions in fill_window_arm, using \"${NEONFLAG}\"")
         endif()
-    elseif("${ARCH}" MATCHES "aarch64")
         if(WITH_ACLE)
             set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
-            add_definitions("-DARM_ACLE_CRC_HASH")
+            add_definitions(-DARM_ACLE_CRC_HASH)
+            # For ARM aarch64, we need to check WITH_NEON first
+            if("${ARCH}" MATCHES "arm" OR NOT WITH_NEON)
+                add_intrinsics_option("${ACLEFLAG}")
+            endif()
             add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"")
         endif()
-        # We need to check WITH_NEON first
-        if(WITH_NEON)
-            add_definitions("-DARM_NEON_ADLER32")
-            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NEONFLAG}")
-            set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/adler32_neon.c)
-            add_feature_info(NEON_FILLWINDOW 1 "Support NEON instructions in fill_window_arm, using \"${NEONFLAG}\"")
-        elseif(WITH_ACLE)
-            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}")
-        endif()
     elseif("${ARCHDIR}" MATCHES "arch/x86")
         add_definitions("-DX86_CPUID")
         set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c)
index bc77672ef7676e1f3b35f2565cef82845f9c1ea6..71973a1ec6aa67d84d579cfcbffcd23d1ca901fd 100644 (file)
@@ -109,7 +109,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {
 
     for (i = 0; i < len; i += n) {
         if ((i + n) > len)
-            n = len - i;
+            n = (int)(len - i);
 
         if (n < 16)
             break;
index 0cec748c4e826de80051e55e2c7ac5a21f6ff96a..7c4758b6ee11ddeeea364f1bb87cdab00111ce99 100644 (file)
@@ -18,7 +18,7 @@ static int arm_has_crc32() {
 }
 
 /* AArch64 has neon. */
-#if !defined(__aarch64__)
+#if !defined(__aarch64__) && !defined(_M_ARM64)
 static inline int arm_has_neon()
 {
  #if defined(__linux__) && defined(HWCAP_NEON)
@@ -41,7 +41,7 @@ ZLIB_INTERNAL int arm_cpu_has_neon;
 ZLIB_INTERNAL int arm_cpu_has_crc32;
 
 void ZLIB_INTERNAL arm_check_features(void) {
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(_M_ARM64)
   arm_cpu_has_neon = 1; /* always available */
 #else
   arm_cpu_has_neon = arm_has_neon();
index d13dfe2a277b76de6ce86e153d7c47e46c269cab..5380b962134242dd9a7618d657063c829f7e06ea 100644 (file)
--- a/deflate.c
+++ b/deflate.c
@@ -269,7 +269,7 @@ int ZEXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int level, int method,
 
 #ifdef X86_CPUID
     x86_check_features();
-#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
+#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
     arm_check_features();
 #endif
 
similarity index 95%
rename from arch/x86/ctzl.h
rename to fallback_builtins.h
index bc9e9bd5b5933a133102551c5ea47b38e2de79b3..4cdace3c4ef8fd679dac5267e5042ca4cfb77fde 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <intrin.h>
 #ifdef X86_CPUID
-# include "x86.h"
+# include "arch/x86/x86.h"
 #endif
 
 #if defined(_MSC_VER) && !defined(__clang__)
index 587abfecb10e418192fec1023c296c0c2078b93b..b3020e000fce105a5aac9bb395a06388cc107fea 100644 (file)
@@ -19,7 +19,7 @@ extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned in
 /* fill_window */
 #ifdef X86_SSE2
 extern void fill_window_sse(deflate_state *s);
-#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
+#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
 extern void fill_window_arm(deflate_state *s);
 #endif
 
@@ -81,7 +81,7 @@ ZLIB_INTERNAL void fill_window_stub(deflate_state *s) {
     if (x86_cpu_has_sse2)
     # endif
         functable.fill_window=&fill_window_sse;
-    #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
+    #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
         functable.fill_window=&fill_window_arm;
     #endif
 
index ed544ba95858248b1727f67a6e1d51dd2472fccc..fae8847d3d12013b9ec500ee93bf88246c7256ae 100644 (file)
--- a/inflate.c
+++ b/inflate.c
@@ -132,7 +132,7 @@ int ZEXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int windowBits, const c
 
 #ifdef X86_CPUID
     x86_check_features();
-#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
+#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
     arm_check_features();
 #endif
 
index 6dda122b15bd015d3fd9f648992f1fe5fcf7fed3..258e73e052ad88770ac5c02f9a3010f1d8ecc601 100644 (file)
--- a/match_p.h
+++ b/match_p.h
 
 
 #if defined(_MSC_VER) && !defined(__clang__)
-# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64)
-#  include "arch/x86/ctzl.h"
-# elif defined(_M_ARM)
-#  include "arch/arm/ctzl.h"
+# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) ||  defined(_M_ARM) || defined(_M_ARM64)
+#  include "fallback_builtins.h"
 # endif
 #endif
 
index c322deb50a5d98b88ff1eccc37eb1ca29c2a6542..b95ab29d3fd7f85dda7be730628f36d6ecd759da 100644 (file)
--- a/memcopy.h
+++ b/memcopy.h
@@ -18,7 +18,7 @@ static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
  #endif
 }
 
- #if (defined(__GNUC__) || defined(__clang__)) && (defined(__ARM_NEON__) || defined(__ARM_NEON))
+ #if defined(__ARM_NEON__) || defined(__ARM_NEON)
   #include <arm_neon.h>
 typedef uint8x16_t inffast_chunk_t;
   #define INFFAST_CHUNKSIZE sizeof(inffast_chunk_t)
@@ -199,7 +199,7 @@ static inline unsigned char *chunkmemset_3(unsigned char *out, unsigned char *fr
 }
   #endif
 
-  #if defined(__aarch64__)
+  #if defined(__aarch64__) || defined(_M_ARM64)
 static inline unsigned char *chunkmemset_6(unsigned char *out, unsigned char *from, unsigned dist, unsigned len) {
     uint16x8x3_t chunks;
     unsigned sz = sizeof(chunks);
@@ -265,7 +265,7 @@ static inline unsigned char *chunkmemset(unsigned char *out, unsigned dist, unsi
         chunk = chunkmemset_4(from);
         break;
     }
-  #if defined(__aarch64__)
+  #if defined(__aarch64__) || defined(_M_ARM64)
     case 6:
         return chunkmemset_6(out, from, dist, len);
   #endif
index 5087034014e435321bf4c15245dbca1a14688c65..5921abbe1e47786df561cefbd3755376409c0470 100644 (file)
--- a/zendian.h
+++ b/zendian.h
@@ -27,7 +27,7 @@
 #elif defined(WIN32) || defined(_WIN32)
 # define LITTLE_ENDIAN 1234
 # define BIG_ENDIAN 4321
-# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM)
+# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) || defined (_M_ARM64)
 #  define BYTE_ORDER LITTLE_ENDIAN
 # else
 #  error Unknown endianness!
diff --git a/zutil.h b/zutil.h
index fe139939269246383ca04ed12873fb166ff1c108..92f47063a6d58dd0c217506f8cb11b3d2f7e3ed0 100644 (file)
--- a/zutil.h
+++ b/zutil.h
@@ -240,7 +240,7 @@ void ZLIB_INTERNAL   zng_cfree(void *opaque, void *ptr);
 
 #if defined(X86_CPUID)
 # include "arch/x86/x86.h"
-#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
+#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
 # include "arch/arm/arm.h"
 #endif