]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Fixed optimizations not being used when compiler is msvc. (#376)
authorNathan Moinvaziri <nathan@solidstatenetworks.com>
Sun, 11 Aug 2019 10:49:01 +0000 (03:49 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sun, 11 Aug 2019 10:49:01 +0000 (12:49 +0200)
This issue I mentioned in #370. Optimization code such as crc_folding.c, deflate_quick_sse.c, fill_window_sse.c, and insert_string_sse.c were not being compiled when the compiler was MSVC because the checks for the instrincs were not being done and the HAVE_[TARGET]_INTRIN variables weren't being set. I could have simply set HAVE_[TARGET]_INTRIN variables to ON manually in the case of MSVC, but it is better this way to have one path for all the compilers (that it runs and checks some code for determination). I have just added MSVC code where necessary in the checks.

* Rename HAVE_SSE42_INTRIN to HAVE_SSE42CRC_INLINE_ASM.
* Added msvc inline asm support to insert_string_sse.c

CMakeLists.txt
arch/x86/insert_string_sse.c

index 420fc1c6800b937f819686c2d8b4782c96c47e63..de87b59f1f5cfeb80c3ca5e08e1ef20ad07ac77d 100644 (file)
@@ -33,7 +33,7 @@ string(REGEX REPLACE ".*#define[ \t]+ZLIBNG_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*"
 message(STATUS "ZLIB_HEADER_VERSION: ${ZLIB_HEADER_VERSION}")
 message(STATUS "ZLIBNG_HEADER_VERSION: ${ZLIBNG_HEADER_VERSION}")
 
-project(zlib 
+project(zlib
   VERSION ${ZLIB_HEADER_VERSION}
   LANGUAGES C)
 
@@ -400,80 +400,88 @@ if(MSVC)
     set(CMAKE_DEBUG_POSTFIX "d")
     add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
     add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
+endif()
+
+# Check whether compiler supports SSE2 instrinics
+if(WITH_NATIVE_INSTRUCTIONS)
+    set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
 else()
-    #
-    # Not MSVC, so we need to check if we have the MS-style SSE etc. intrinsics
-    #
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
-    endif()
+    set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
+endif()
+check_c_source_compile_or_run(
+    "#include <immintrin.h>
+    int main(void)
+    {
+        __m128i zero = _mm_setzero_si128();
+        (void)zero;
+        return 0;
+    }"
+    HAVE_SSE2_INTRIN
+)
+set(CMAKE_REQUIRED_FLAGS)
+
+# Check whether compiler supports SSE4 CRC inline asm
+if(WITH_NATIVE_INSTRUCTIONS)
+    set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
+else()
+    set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
+endif()
+check_c_source_compile_or_run(
+    "int main(void)
+    {
+        unsigned val = 0, h = 0;
+    #if defined(_MSC_VER)
+        { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov val, eax }
+    #else
+        __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
+    #endif
+        return (int) h;
+    }"
+    HAVE_SSE42CRC_INLINE_ASM
+)
+# Check whether compiler supports SSE4 CRC instrinics
+check_c_source_compile_or_run(
+    "#include <immintrin.h>
+    int main(void)
+    {
+        unsigned crc = 0;
+        char c = 'c';
+    #if defined(_MSC_VER)
+        crc = _mm_crc32_u32(crc, c);
+    #else
+        crc = __builtin_ia32_crc32qi(crc, c);
+    #endif
+        (void)crc;
+        return 0;
+    }"
+    HAVE_SSE42CRC_INTRIN
+)
+set(CMAKE_REQUIRED_FLAGS)
+
+# Check whether compiler supports PCLMULQDQ intrinics
+if(WITH_NATIVE_INSTRUCTIONS)
+    set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
+else()
+    set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
+endif()
+if(NOT (APPLE AND ${ARCH} MATCHES "i386"))
+    # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
     check_c_source_compile_or_run(
         "#include <immintrin.h>
         int main(void)
         {
-            __m128i zero = _mm_setzero_si128();
-            (void)zero;
+            __m128i a = _mm_setzero_si128();
+            __m128i b = _mm_setzero_si128();
+            __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
+            (void)c;
             return 0;
         }"
-        HAVE_SSE2_INTRIN
+        HAVE_PCLMULQDQ_INTRIN
     )
-    set(CMAKE_REQUIRED_FLAGS)
-
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        # Use the generic SSE4 enabler option to check for the SSE4.2 instruction we require:
-        set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
-    endif()
-    check_c_source_compile_or_run(
-        "int main(void)
-        {
-            unsigned val = 0, h = 0;
-            __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
-            return (int) h;
-        }"
-        HAVE_SSE42_INTRIN
-    )
-    check_c_source_compile_or_run(
-        "int main(void)
-        {
-            unsigned crc = 0;
-            char c = 'c';
-            crc = __builtin_ia32_crc32qi(crc, c);
-            (void)crc;
-            return 0;
-        }"
-        HAVE_SSE42CRC_INTRIN
-    )
-    set(CMAKE_REQUIRED_FLAGS)
-
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
-    endif()
-    if(NOT (APPLE AND ${ARCH} MATCHES "i386"))
-        # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
-        check_c_source_compile_or_run(
-            "#include <immintrin.h>
-            #include <wmmintrin.h>
-            int main(void)
-            {
-                __m128i a = _mm_setzero_si128();
-                __m128i b = _mm_setzero_si128();
-                __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
-                (void)c;
-                return 0;
-            }"
-            HAVE_PCLMULQDQ_INTRIN
-        )
-    else()
-        set(HAVE_PCLMULQDQ_INTRIN NO)
-    endif()
-    set(CMAKE_REQUIRED_FLAGS)
+else()
+    set(HAVE_PCLMULQDQ_INTRIN NO)
 endif()
+set(CMAKE_REQUIRED_FLAGS)
 
 # Check whether -mfpu=neon is available
 set(CMAKE_REQUIRED_FLAGS "-mfpu=neon")
@@ -570,7 +578,7 @@ if(WITH_OPTIM)
     elseif("${ARCHDIR}" MATCHES "arch/x86")
         add_definitions("-DX86_CPUID")
         set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c)
-        if(HAVE_SSE42_INTRIN)
+        if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
             add_definitions(-DX86_SSE4_2_CRC_HASH)
             set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/insert_string_sse.c)
             add_feature_info(SSE4_CRC 1 "Support CRC hash generation using the SSE4.2 instruction set, using \"${SSE4FLAG}\"")
@@ -599,7 +607,7 @@ if(WITH_OPTIM)
             add_definitions(-DX86_PCLMULQDQ_CRC)
             set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc_folding.c)
             add_intrinsics_option("${PCLMULFLAG}")
-            if(HAVE_SSE42_INTRIN)
+            if(HAVE_SSE42CRC_INLINE_ASM)
                 add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${PCLMULFLAG}\"")
             else()
                 add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${PCLMULFLAG} ${SSE4FLAG}\"")
@@ -882,4 +890,3 @@ if (ZLIB_ENABLE_TESTS)
 endif()
 
 FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES)
-
index 394e50937f85c0f6a3f5fb247a1b24c6a7d44a67..599621513ed02f2d71019a89503cce1ab0ba0011 100644 (file)
@@ -30,16 +30,27 @@ ZLIB_INTERNAL Pos insert_string_sse(deflate_state *const s, const Pos str, unsig
         if (s->level >= TRIGGER_LEVEL)
             val &= 0xFFFFFF;
 
-#ifdef _MSC_VER
+#if defined(X86_SSE4_2_CRC_INTRIN)
+#  ifdef _MSC_VER
         h = _mm_crc32_u32(h, val);
-#elif defined(X86_SSE4_2_CRC_INTRIN)
+#  else
         h = __builtin_ia32_crc32si(h, val);
+#  endif
 #else
+#  ifdef _MSC_VER
+        __asm {
+            mov edx h
+            mov eax, val
+            crc32 eax, edx
+            mov val, eax
+        };
+#  else
         __asm__ __volatile__ (
             "crc32 %1,%0\n\t"
             : "+r" (h)
             : "r" (val)
         );
+#  endif
 #endif
         Pos head = s->head[h & s->hash_mask];
         if (head != str+idx) {