]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Enable use of _mm_shuffle_epi8 on machines without SSE4.1
authorCameron Cawley <ccawley2011@gmail.com>
Tue, 28 Mar 2023 18:01:44 +0000 (19:01 +0100)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 1 Apr 2023 15:27:49 +0000 (17:27 +0200)
15 files changed:
CMakeLists.txt
README.md
arch/arm/chunkset_neon.c
arch/generic/chunk_permute_table.h
arch/x86/Makefile.in
arch/x86/chunkset_avx.c
arch/x86/chunkset_ssse3.c [moved from arch/x86/chunkset_sse41.c with 87% similarity]
arch/x86/x86_features.c
arch/x86/x86_features.h
chunkset_tpl.h
cmake/detect-intrinsics.cmake
configure
cpu_features.h
functable.c
win32/Makefile.msc

index 9ed3b24175b855444533042e6e7b9253ef62f360..e5184cca30f033bdd1b957014c5d14de0a6be898 100644 (file)
@@ -117,7 +117,6 @@ elseif(BASEARCH_X86_FOUND)
     option(WITH_AVX512VNNI "Build with AVX512 VNNI extensions" ON)
     option(WITH_SSE2 "Build with SSE2" ON)
     option(WITH_SSSE3 "Build with SSSE3" ON)
-    option(WITH_SSE41 "Build with SSE41" ON)
     option(WITH_SSE42 "Build with SSE42" ON)
     option(WITH_PCLMULQDQ "Build with PCLMULQDQ" ON)
     option(WITH_VPCLMULQDQ "Build with VPCLMULQDQ" ON)
@@ -133,8 +132,7 @@ mark_as_advanced(FORCE
     WITH_DFLTCC_INFLATE
     WITH_CRC32_VX
     WITH_AVX2 WITH_SSE2
-    WITH_SSSE3 WITH_SSE41
-    WITH_SSE42
+    WITH_SSSE3 WITH_SSE42
     WITH_PCLMULQDQ
     WITH_ALTIVEC
     WITH_POWER8
@@ -787,17 +785,6 @@ if(WITH_OPTIM)
                 set(WITH_AVX512VNNI OFF)
             endif()
         endif()
-        if(WITH_SSE41)
-            check_sse41_intrinsics()
-            if(HAVE_SSE41_INTRIN)
-                add_definitions(-DX86_SSE41)
-                list(APPEND SSE41_SRCS ${ARCHDIR}/chunkset_sse41.c)
-                list(APPEND ZLIB_ARCH_SRCS ${SSE41_SRCS})
-                set_property(SOURCE ${SSE41_SRCS} PROPERTY COMPILE_FLAGS "${SSE41FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_SSE41 OFF)
-            endif()
-        endif()
         if(WITH_SSE42)
             check_sse42_intrinsics()
             if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
@@ -835,7 +822,7 @@ if(WITH_OPTIM)
             check_ssse3_intrinsics()
             if(HAVE_SSSE3_INTRIN)
                 add_definitions(-DX86_SSSE3)
-                set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
+                set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
                 add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
                 set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
@@ -1226,7 +1213,6 @@ elseif(BASEARCH_X86_FOUND)
     add_feature_info(WITH_AVX512VNNI WITH_AVX512VNNI "Build with AVX512 VNNI")
     add_feature_info(WITH_SSE2 WITH_SSE2 "Build with SSE2")
     add_feature_info(WITH_SSSE3 WITH_SSSE3 "Build with SSSE3")
-    add_feature_info(WITH_SSE41 WITH_SSE41 "Build with SSE41")
     add_feature_info(WITH_SSE42 WITH_SSE42 "Build with SSE42")
     add_feature_info(WITH_PCLMULQDQ WITH_PCLMULQDQ "Build with PCLMULQDQ")
     add_feature_info(WITH_VPCLMULQDQ WITH_VPCLMULQDQ "Build with VPCLMULQDQ")
index 5b8e8ccdc25755294ef01acb43ef41ca224f3413..aa72365c9526b306c0b36b0c672c47d8a38ddbb1 100644 (file)
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ Features
   * Hash table implementation using CRC32-C intrinsics on x86 and ARM
   * Slide hash implementations using SSE2, AVX2, Neon, VMX & VSX
   * Compare256 implementations using SSE2, AVX2, Neon, & POWER9
-  * Inflate chunk copying using SSE2, AVX, Neon & VSX
+  * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
   * Support for hardware-accelerated deflate using IBM Z DFLTCC
 * Unaligned memory read/writes and large bit buffer improvements
 * Includes improvements from Cloudflare and Intel forks
@@ -213,7 +213,7 @@ Advanced Build Options
 | WITH_AVX512                     |                       | Build with AVX512 intrinsics                                        | ON                     |
 | WITH_AVX512VNNI                 |                       | Build with AVX512VNNI intrinsics                                    | ON                     |
 | WITH_SSE2                       |                       | Build with SSE2 intrinsics                                          | ON                     |
-| WITH_SSE41                      |                       | Build with SSE41 intrinsics                                         | ON                     |
+| WITH_SSSE3                      |                       | Build with SSSE3 intrinsics                                         | ON                     |
 | WITH_SSE42                      |                       | Build with SSE42 intrinsics                                         | ON                     |
 | WITH_PCLMULQDQ                  |                       | Build with PCLMULQDQ intrinsics                                     | ON                     |
 | WITH_VPCLMULQDQ                 | --without-vpclmulqdq  | Build with VPCLMULQDQ intrinsics                                    | ON                     |
index 668c0019e96da5d7da072736548591f76e0b8614..1890c91356ee4be85e7650cc5867bab5bba02de1 100644 (file)
@@ -69,7 +69,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
     *chunk_rem = lut_rem.remval;
 
 #ifdef Z_MEMORY_SANITIZER
-    /* See note in chunkset_sse41.c for why this is ok */
+    /* See note in chunkset_ssse3.c for why this is ok */
     __msan_unpoison(buf + dist, 16 - dist);
 #endif
 
index c7b2d2de7f955e8ca88355b7a0f9149d21686867..bad66ccc774b75d67b084d523b6fcfa03d17991c 100644 (file)
@@ -1,4 +1,4 @@
-/* chunk_permute_table.h - shared AVX/SSE4 permutation table for use with chunkmemset family of functions.
+/* chunk_permute_table.h - shared AVX/SSSE3 permutation table for use with chunkmemset family of functions.
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
index 4cebe55531aa0d2d81f4d4807b565b588921d333..5fd51929ce28761d423fbbc39ea6493b3a2beb20 100644 (file)
@@ -13,7 +13,6 @@ AVX512VNNIFLAG=-mavx512vnni
 AVX2FLAG=-mavx2
 SSE2FLAG=-msse2
 SSSE3FLAG=-mssse3
-SSE41FLAG=-msse4.1
 SSE42FLAG=-msse4.2
 PCLMULFLAG=-mpclmul
 VPCLMULFLAG=-mvpclmulqdq
@@ -33,7 +32,7 @@ all: \
        adler32_ssse3.o adler32_ssse3.lo \
        chunkset_avx.o chunkset_avx.lo \
        chunkset_sse2.o chunkset_sse2.lo \
-       chunkset_sse41.o chunkset_sse41.lo \
+       chunkset_ssse3.o chunkset_ssse3.lo \
        compare256_avx2.o compare256_avx2.lo \
        compare256_sse2.o compare256_sse2.lo \
        insert_string_sse42.o insert_string_sse42.lo \
@@ -60,11 +59,11 @@ chunkset_sse2.o:
 chunkset_sse2.lo:
        $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse2.c
 
-chunkset_sse41.o:
-       $(CC) $(CFLAGS) $(SSE41FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse41.c
+chunkset_ssse3.o:
+       $(CC) $(CFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_ssse3.c
 
-chunkset_sse41.lo:
-       $(CC) $(SFLAGS) $(SSE41FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse41.c
+chunkset_ssse3.lo:
+       $(CC) $(SFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_ssse3.c
 
 compare256_avx2.o:
        $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_avx2.c
index c2df2322fe248568f8727bcd1032cd03abda10e3..abcbb474f5183ce23a810e2a22f5eef02036cc84 100644 (file)
@@ -85,7 +85,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
     *chunk_rem = lut_rem.remval;
 
 #ifdef Z_MEMORY_SANITIZER
-    /* See note in chunkset_sse4.c for why this is ok */
+    /* See note in chunkset_ssse3.c for why this is ok */
     __msan_unpoison(buf + dist, 32 - dist);
 #endif
 
similarity index 87%
rename from arch/x86/chunkset_sse41.c
rename to arch/x86/chunkset_ssse3.c
index 4b7396bcacec773ab9ca87fb78f200a391c77571..0bd6263859cb42c4aee69ea977b8706f70099df1 100644 (file)
@@ -1,13 +1,13 @@
-/* chunkset_sse41.c -- SSE4 inline functions to copy small data chunks.
+/* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks.
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 #include "zbuild.h"
 
-/* This requires SSE2 support. While it's implicit with SSE4, we can minimize
+/* This requires SSE2 support. While it's implicit with SSSE3, we can minimize
  * code size by sharing the chunkcopy functions, which will certainly compile
  * to identical machine code */
-#if defined(X86_SSE41) && defined(X86_SSE2)
+#if defined(X86_SSSE3) && defined(X86_SSE2)
 #include <immintrin.h>
 #include "../generic/chunk_permute_table.h"
 
@@ -88,15 +88,15 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
 extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
 extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
 
-#define CHUNKSIZE        chunksize_sse41
-#define CHUNKMEMSET      chunkmemset_sse41
-#define CHUNKMEMSET_SAFE chunkmemset_safe_sse41
+#define CHUNKSIZE        chunksize_ssse3
+#define CHUNKMEMSET      chunkmemset_ssse3
+#define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3
 #define CHUNKCOPY        chunkcopy_sse2
 #define CHUNKUNROLL      chunkunroll_sse2
 
 #include "chunkset_tpl.h"
 
-#define INFLATE_FAST     inflate_fast_sse41
+#define INFLATE_FAST     inflate_fast_ssse3
 
 #include "inffast_tpl.h"
 
index f60ddbcf94ba6d8de102f4546c3115bf40475f73..3272e3fdd9721cb84442645030ef29bac1b8ac6f 100644 (file)
@@ -66,7 +66,6 @@ void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) {
 
     features->has_sse2 = edx & 0x4000000;
     features->has_ssse3 = ecx & 0x200;
-    features->has_sse41 = ecx & 0x80000;
     features->has_sse42 = ecx & 0x100000;
     features->has_pclmulqdq = ecx & 0x2;
 
index 00b510ffc1171547687b23ec001b00be763624af..4a36bde835d32efc64f19a7968456a03f698ee1b 100644 (file)
@@ -12,7 +12,6 @@ struct x86_cpu_features {
     int has_avx512vnni;
     int has_sse2;
     int has_ssse3;
-    int has_sse41;
     int has_sse42;
     int has_pclmulqdq;
     int has_vpclmulqdq;
index f70ef42cdb836f93d15241ce8d4a961b0cfbb775..f909a12557f0c8cd2765db2cf31ca8425249397e 100644 (file)
@@ -5,8 +5,8 @@
 #include "zbuild.h"
 #include <stdlib.h>
 
-#if CHUNK_SIZE == 32 && defined(X86_SSE41) && defined(X86_SSE2)
-extern uint8_t* chunkmemset_sse41(uint8_t *out, unsigned dist, unsigned len);
+#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2)
+extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len);
 #endif
 
 /* Returns the chunk size */
@@ -98,9 +98,9 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
        Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */
     Assert(dist > 0, "chunkmemset cannot have a distance 0");
     /* Only AVX2 */
-#if CHUNK_SIZE == 32 && defined(X86_SSE41) && defined(X86_SSE2)
+#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2)
     if (len <= 16) {
-        return chunkmemset_sse41(out, dist, len);
+        return chunkmemset_ssse3(out, dist, len);
     }
 #endif
 
index 7b59cec53c42484c28555dab9a60cdccc0bbfebb..186d87d814f954c7663aa988b166eea89a41a594 100644 (file)
@@ -435,34 +435,6 @@ macro(check_ssse3_intrinsics)
     )
 endmacro()
 
-macro(check_sse41_intrinsics)
-    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
-        if(CMAKE_HOST_UNIX OR APPLE)
-            set(SSE41FLAG "-msse4.1")
-        else()
-            set(SSE41FLAG "/arch:SSE4.1")
-        endif()
-    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-        if(NOT NATIVEFLAG)
-            set(SSE41FLAG "-msse4.1")
-        endif()
-    endif()
-    # Check whether compiler supports SSE4.1 intrinsics
-    set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG} ${NATIVEFLAG}")
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
-        int main(void) {
-            __m128i u, v, w;
-            u = _mm_set1_epi8(1);
-            v = _mm_set1_epi8(2);
-            w = _mm_sad_epu8(u, v);
-            (void)w;
-            return 0;
-        }"
-        HAVE_SSE41_INTRIN
-    )
-endmacro()
-
 macro(check_sse42_intrinsics)
     if(CMAKE_C_COMPILER_ID MATCHES "Intel")
         if(CMAKE_HOST_UNIX OR APPLE)
index eb9e57e90d4c304f00f234cc1206c78d7a13a216..2c320227fe2ba75bca997f18c3ae13719a735334 100755 (executable)
--- a/configure
+++ b/configure
@@ -110,7 +110,6 @@ avx512vnniflag="-mavx512vnni ${avx512flag}"
 avx2flag="-mavx2"
 sse2flag="-msse2"
 ssse3flag="-mssse3"
-sse41flag="-msse4.1"
 sse42flag="-msse4.2"
 pclmulflag="-mpclmul"
 vpclmulflag="-mvpclmulqdq -mavx512f"
@@ -1399,29 +1398,6 @@ EOF
     fi
 }
 
-check_sse41_intrinsics() {
-    # Check whether compiler supports SSE4.1 intrinsics
-    cat > $test.c << EOF
-#include <smmintrin.h>
-int main(void)
-{
-    __m128i u, v, w;
-    u = _mm_set1_epi8(1);
-    v = _mm_set1_epi8(2);
-    w = _mm_sad_epu8(u, v);
-    (void)w;
-    return 0;
-}
-EOF
-    if try ${CC} ${CFLAGS} ${sse41flag} $test.c; then
-        echo "Checking for SSE4.1 intrinsics ... Yes." | tee -a configure.log
-        HAVE_SSE41_INTRIN=1
-    else
-        echo "Checking for SSE4.1 intrinsics ... No." | tee -a configure.log
-        HAVE_SSE41_INTRIN=0
-    fi
-}
-
 check_sse42_intrinsics() {
     # Check whether compiler supports SSE4 CRC inline asm
     cat > $test.c << EOF
@@ -1615,16 +1591,6 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo"
             fi
 
-            check_sse41_intrinsics
-
-            if test ${HAVE_SSE41_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSE41"
-                SFLAGS="${SFLAGS} -DX86_SSE41"
-
-                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse41.o"
-                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse41.lo"
-            fi
-
             check_sse42_intrinsics
 
             if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
@@ -1659,8 +1625,8 @@ case "${ARCH}" in
             if test ${HAVE_SSSE3_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSSE3"
                 SFLAGS="${SFLAGS} -DX86_SSSE3"
-                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o"
-                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo"
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o chunkset_ssse3.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo chunkset_ssse3.lo"
             fi
 
             check_pclmulqdq_intrinsics
@@ -2111,7 +2077,6 @@ echo sharedlibdir = $sharedlibdir >> configure.log
 echo uname = $uname >> configure.log
 echo sse2flag = $sse2flag >> configure.log
 echo ssse3flag = $ssse3flag >> configure.log
-echo sse41flag = $sse41flag >> configure.log
 echo sse42flag = $sse42flag >> configure.log
 echo pclmulflag = $pclmulflag >> configure.log
 echo vpclmulflag = $vpclmulflag >> configure.log
@@ -2250,7 +2215,6 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in "
 /^AVX512VNNIFLAG *=/s#=.*#=$avx512vnniflag#
 /^SSE2FLAG *=/s#=.*#=$sse2flag#
 /^SSSE3FLAG *=/s#=.*#=$ssse3flag#
-/^SSE41FLAG *=/s#=.*#=$sse41flag#
 /^SSE42FLAG *=/s#=.*#=$sse42flag#
 /^PCLMULFLAG *=/s#=.*#=$pclmulflag#
 /^VPCLMULFLAG *=/s#=.*#=$vpclmulflag#
index 462671a187ee264a01bcb8c86f1f5d1d7457f6c5..e47f947826c9a69fa11fd844230de5741876f4a1 100644 (file)
@@ -99,8 +99,8 @@ extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, un
 extern uint32_t chunksize_sse2(void);
 extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
-#ifdef X86_SSE41
-extern uint8_t* chunkmemset_safe_sse41(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+#ifdef X86_SSSE3
+extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
 #ifdef X86_AVX2
 extern uint32_t chunksize_avx(void);
@@ -126,8 +126,8 @@ extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
 #ifdef X86_SSE2
 extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start);
 #endif
-#ifdef X86_SSE41
-extern void inflate_fast_sse41(PREFIX3(stream) *strm, uint32_t start);
+#ifdef X86_SSSE3
+extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
 #endif
 #ifdef X86_AVX2
 extern void inflate_fast_avx(PREFIX3(stream) *strm, uint32_t start);
index c7d477c7f066a2934a05e91b019f04651d450731..4212da09075a2f7437d597dd105ba64cc8fda927 100644 (file)
@@ -75,16 +75,15 @@ static void init_functable(void) {
 #endif
     // X86 - SSSE3
 #ifdef X86_SSSE3
-    if (cf.x86.has_ssse3)
+    if (cf.x86.has_ssse3) {
         ft.adler32 = &adler32_ssse3;
-#endif
-    // X86 - SSE4
-#if defined(X86_SSE41) && defined(X86_SSE2)
-    if (cf.x86.has_sse41) {
-        ft.chunkmemset_safe = &chunkmemset_safe_sse41;
-        ft.inflate_fast = &inflate_fast_sse41;
+#  ifdef X86_SSE2
+        ft.chunkmemset_safe = &chunkmemset_safe_ssse3;
+        ft.inflate_fast = &inflate_fast_ssse3;
+#  endif
     }
 #endif
+    // X86 - SSE4.2
 #ifdef X86_SSE42
     if (cf.x86.has_sse42) {
         ft.adler32_fold_copy = &adler32_fold_copy_sse42;
index d2a98d6f0a1b94386cd30eb54d4d1d32c4d494c3..8a01e31710c07a00e4da53118098c965db14be53 100644 (file)
@@ -56,6 +56,7 @@ OBJS = \
        chunkset.obj \
        chunkset_avx.obj \
        chunkset_sse2.obj \
+       chunkset_ssse3.obj \
        compare256.obj \
        compare256_avx2.obj \
        compare256_sse2.obj \
@@ -202,6 +203,7 @@ uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
 chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
 chunkset_avx.obj: $(SRCDIR)/arch/x86/chunkset_avx.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
 chunkset_sse2.obj: $(SRCDIR)/arch/x86/chunkset_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
+chunkset_ssse3.obj: $(SRCDIR)/arch/x86/chunkset_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
 cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
 crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
 crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h