]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Reorder in which intrinsics support is checked.
authorVladislav Shchapov <vladislav@shchapov.ru>
Sat, 20 Jan 2024 15:40:07 +0000 (20:40 +0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Thu, 25 Jan 2024 09:21:49 +0000 (10:21 +0100)
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
CMakeLists.txt
configure

index 4226b7a187b6d4fa4650794709c1a6bd777b6684..ea1aa1c86efde200c288ad9c3f4007b699dba66e 100644 (file)
@@ -784,6 +784,66 @@ if(WITH_OPTIM)
         if(MSVC)
             list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
         endif()
+        check_xsave_intrinsics()
+        if(HAVE_XSAVE_INTRIN)
+            add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"")
+            set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
+        endif()
+        if(WITH_SSE2)
+            check_sse2_intrinsics()
+            if(HAVE_SSE2_INTRIN)
+                add_definitions(-DX86_SSE2)
+                set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
+                list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
+                if(NOT ${ARCH} MATCHES "x86_64")
+                    set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
+                    add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable")
+                    if(FORCE_SSE2)
+                        add_definitions(-DX86_NOCHECK_SSE2)
+                    endif()
+                endif()
+            else()
+                set(WITH_SSE2 OFF)
+            endif()
+        endif()
+        if(WITH_SSSE3)
+            check_ssse3_intrinsics()
+            if(HAVE_SSSE3_INTRIN)
+                add_definitions(-DX86_SSSE3)
+                set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
+                add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
+                list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
+                set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
+            else()
+                set(WITH_SSSE3 OFF)
+            endif()
+        endif()
+        if(WITH_SSE42)
+            check_sse42_intrinsics()
+            if(HAVE_SSE42_INTRIN)
+                add_definitions(-DX86_SSE42)
+                set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
+                add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
+                list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
+                set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
+            else()
+                set(WITH_SSE42 OFF)
+            endif()
+        endif()
+        if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE42)
+            check_pclmulqdq_intrinsics()
+            if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN)
+                add_definitions(-DX86_PCLMULQDQ_CRC)
+                set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
+                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG}\"")
+                list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
+                set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
+            else()
+                set(WITH_PCLMULQDQ OFF)
+            endif()
+        else()
+            set(WITH_PCLMULQDQ OFF)
+        endif()
         if(WITH_AVX2)
             check_avx2_intrinsics()
             if(HAVE_AVX2_INTRIN)
@@ -830,83 +890,20 @@ if(WITH_OPTIM)
                 set(WITH_AVX512VNNI OFF)
             endif()
         endif()
-        if(WITH_SSE42)
-            check_sse42_intrinsics()
-            if(HAVE_SSE42_INTRIN)
-                add_definitions(-DX86_SSE42)
-                set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
-                add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
-                set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_SSE42 OFF)
-            endif()
-        endif()
-        if(WITH_SSE2)
-            check_sse2_intrinsics()
-            if(HAVE_SSE2_INTRIN)
-                add_definitions(-DX86_SSE2)
-                set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
-                list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
-                if(NOT ${ARCH} MATCHES "x86_64")
-                    set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
-                    add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable")
-                    if(FORCE_SSE2)
-                        add_definitions(-DX86_NOCHECK_SSE2)
-                    endif()
-                endif()
-            else()
-                set(WITH_SSE2 OFF)
-            endif()
-        endif()
-        if(WITH_SSSE3)
-            check_ssse3_intrinsics()
-            if(HAVE_SSSE3_INTRIN)
-                add_definitions(-DX86_SSSE3)
-                set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
-                add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
-                set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_SSSE3 OFF)
-            endif()
-        endif()
-        if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE42)
-            check_pclmulqdq_intrinsics()
-            if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN)
-                add_definitions(-DX86_PCLMULQDQ_CRC)
-                set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
-                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
-                set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
-
-                if(WITH_VPCLMULQDQ AND WITH_AVX512)
-                    check_vpclmulqdq_intrinsics()
-                    if(HAVE_VPCLMULQDQ_INTRIN AND HAVE_AVX512_INTRIN)
-                        add_definitions(-DX86_VPCLMULQDQ_CRC)
-                        set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c)
-                        add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${VPCLMULFLAG} ${AVX512FLAG}\"")
-                        list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS})
-                        set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}")
-                    else()
-                        set(WITH_VPCLMULQDQ OFF)
-                    endif()
-                else()
-                    set(WITH_VPCLMULQDQ OFF)
-                endif()
+        if(WITH_VPCLMULQDQ AND WITH_PCLMULQDQ AND WITH_AVX512)
+            check_vpclmulqdq_intrinsics()
+            if(HAVE_VPCLMULQDQ_INTRIN)
+                add_definitions(-DX86_VPCLMULQDQ_CRC)
+                set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c)
+                add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${VPCLMULFLAG} ${AVX512FLAG}\"")
+                list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS})
+                set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}")
             else()
-                set(WITH_PCLMULQDQ OFF)
                 set(WITH_VPCLMULQDQ OFF)
             endif()
         else()
-            set(WITH_PCLMULQDQ OFF)
             set(WITH_VPCLMULQDQ OFF)
         endif()
-        check_xsave_intrinsics()
-        if(HAVE_XSAVE_INTRIN)
-            add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"")
-            set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
-        endif()
     endif()
 endif()
 
index 02bac6c704ff1513635a8dd3357ed94fe9217c23..54197081fa34bb462f18d5b2220c6cd776e2d769 100755 (executable)
--- a/configure
+++ b/configure
@@ -1480,6 +1480,53 @@ case "${ARCH}" in
             ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86_features.o"
             ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86_features.lo"
 
+            check_xsave_intrinsics
+
+            if test ${HAVE_XSAVE_INTRIN} -eq 0; then
+                xsaveflag=""
+            fi
+
+            check_sse2_intrinsics
+
+            if test ${HAVE_SSE2_INTRIN} -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_SSE2"
+                SFLAGS="${SFLAGS} -DX86_SSE2"
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o compare256_sse2.o slide_hash_sse2.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo compare256_sse2.lo slide_hash_sse2.lo"
+
+                if test $forcesse2 -eq 1; then
+                    CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
+                    SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2"
+                fi
+            fi
+
+            check_ssse3_intrinsics
+
+            if test ${HAVE_SSSE3_INTRIN} -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_SSSE3"
+                SFLAGS="${SFLAGS} -DX86_SSSE3"
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o chunkset_ssse3.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo chunkset_ssse3.lo"
+            fi
+
+            check_sse42_intrinsics
+
+            if test ${HAVE_SSE42_INTRIN} -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_SSE42"
+                SFLAGS="${SFLAGS} -DX86_SSE42"
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
+            fi
+
+            check_pclmulqdq_intrinsics
+
+            if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
+                SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_pclmulqdq.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_pclmulqdq.lo"
+            fi
+
             check_avx2_intrinsics
 
             if test ${HAVE_AVX2_INTRIN} -eq 1; then
@@ -1526,63 +1573,16 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo"
             fi
 
-            check_sse42_intrinsics
-
-            if test ${HAVE_SSE42_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSE42"
-                SFLAGS="${SFLAGS} -DX86_SSE42"
-                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
-                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
-            fi
-
-            check_sse2_intrinsics
-
-            if test ${HAVE_SSE2_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSE2"
-                SFLAGS="${SFLAGS} -DX86_SSE2"
-                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o compare256_sse2.o slide_hash_sse2.o"
-                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo compare256_sse2.lo slide_hash_sse2.lo"
+            if test $buildvpclmulqdq -eq 1 && test ${HAVE_PCLMULQDQ_INTRIN} -eq 1 && test ${HAVE_AVX512_INTRIN} -eq 1; then
+                check_vpclmulqdq_intrinsics
 
-                if test $forcesse2 -eq 1; then
-                    CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
-                    SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2"
+                if test ${HAVE_VPCLMULQDQ_INTRIN} -eq 1; then
+                    CFLAGS="${CFLAGS} -DX86_VPCLMULQDQ_CRC"
+                    SFLAGS="${SFLAGS} -DX86_VPCLMULQDQ_CRC"
+                    ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_vpclmulqdq.o"
+                    ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_vpclmulqdq.lo"
                 fi
             fi
-
-            check_ssse3_intrinsics
-
-            if test ${HAVE_SSSE3_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSSE3"
-                SFLAGS="${SFLAGS} -DX86_SSSE3"
-                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o chunkset_ssse3.o"
-                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo chunkset_ssse3.lo"
-            fi
-
-            check_pclmulqdq_intrinsics
-
-            if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
-                SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
-                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_pclmulqdq.o"
-                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_pclmulqdq.lo"
-
-                if test $buildvpclmulqdq -eq 1; then
-                    check_vpclmulqdq_intrinsics
-
-                    if test ${HAVE_VPCLMULQDQ_INTRIN} -eq 1 && test ${HAVE_AVX512_INTRIN} -eq 1; then
-                        CFLAGS="${CFLAGS} -DX86_VPCLMULQDQ_CRC"
-                        SFLAGS="${SFLAGS} -DX86_VPCLMULQDQ_CRC"
-                        ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_vpclmulqdq.o"
-                        ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_vpclmulqdq.lo"
-                    fi
-                fi
-            fi
-
-            check_xsave_intrinsics
-
-            if test ${HAVE_XSAVE_INTRIN} -eq 0; then
-                xsaveflag=""
-            fi
         fi
     ;;