From: Hans Kristian Rosbach Date: Wed, 6 Mar 2019 09:39:32 +0000 (+0100) Subject: Default sse2 to be available on x86_64 arch. X-Git-Tag: 1.9.9-b1~513 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=17b7471d240d633aa0a63791becf4aab3069625a;p=thirdparty%2Fzlib-ng.git Default sse2 to be available on x86_64 arch. Allow x86 arch to force sse2 availability. It still depends on sse2 intrinsics being detected and optimizations enabled. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 647ad19e6..a0be580e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ include(CheckFunctionExists) include(CheckIncludeFile) include(CheckCSourceCompiles) include(CheckCSourceRuns) +include(CMakeDependentOption) include(FeatureSummary) # make sure we use an appropriate BUILD_TYPE by default, "Release" to be exact @@ -476,6 +477,11 @@ check_c_source_compiles( MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") set(CMAKE_REQUIRED_FLAGS) +# FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true +if("${ARCH}" MATCHES "i[3-6]86") + cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF) +endif() + # # Enable deflate_medium at level 4-6 # @@ -507,9 +513,6 @@ set(ARCHDIR "arch/generic") if("${ARCH}" MATCHES "x86_64" OR "${ARCH}" MATCHES "AMD64" OR "${ARCH}" MATCHES "i[3-6]86") set(ARCHDIR "arch/x86") add_definitions(-DUNALIGNED_OK) - if("${ARCH}" MATCHES "x86_64" OR "${ARCH}" MATCHES "AMD64") - add_definitions(-DX86_NOCHECK_SSE2) - endif() add_feature_info(SSE2 1 "Support the SSE2 instruction set, using \"${SSE2FLAG}\"") elseif("${ARCH}" MATCHES "arm") set(ARCHDIR "arch/arm") @@ -579,6 +582,10 @@ if(WITH_OPTIM) set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/fill_window_sse.c) if(NOT ${ARCH} MATCHES "x86_64") add_intrinsics_option("${SSE2FLAG}") + add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable") + if(FORCE_SSE2) + add_definitions(-DX86_NOCHECK_SSE2) + endif() endif() endif() if(HAVE_PCLMULQDQ_INTRIN) diff --git a/configure b/configure index 13942343c..4f9328696 100755 --- a/configure +++ b/configure @@ -97,6 +97,7 @@ with_msan=0 with_fuzzers=0 floatabi= native=0 +forcesse2=0 sse2flag="-msse2" sse4flag="-msse4" sse42flag="-msse4.2" @@ -144,6 +145,7 @@ case "$1" in echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log echo ' [--acle] [--neon] Compiles with additional instruction set enabled' | tee -a configure.log + echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log echo ' [--with-sanitizers] Build with address sanitizer and all supported sanitizers other than memory sanitizer (disabled by default)' | tee -a configure.log echo ' [--with-msan] Build with memory sanitizer (disabled by default)' | tee -a configure.log echo ' [--with-fuzzers] Build test/fuzz (disabled by default)' | tee -a configure.log @@ -167,6 +169,7 @@ case "$1" in -6* | --64) build64=1; shift ;; --without-acle) buildacle=0; shift ;; --without-neon) buildneon=0; shift ;; + --force-sse2) forcesse2=1; shift ;; -n | --native) native=1; shift ;; -a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;; --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;; @@ -973,6 +976,11 @@ case "${ARCH}" in ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_sse.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_sse.lo" + if test $forcesse2 -eq 1; then + CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2" + SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2" + fi + # Enable deflate_quick at level 1? # requires SSE2: code uses fill_window_sse if test $without_new_strategies -eq 0; then @@ -1008,8 +1016,8 @@ case "${ARCH}" in x86_64) ARCHDIR=arch/x86 - CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2 -DUNALIGNED_OK" - SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2 -DUNALIGNED_OK" + CFLAGS="${CFLAGS} -DUNALIGNED_OK" + SFLAGS="${SFLAGS} -DUNALIGNED_OK" # Enable arch-specific optimizations? if test $without_optimizations -eq 0; then diff --git a/functable.c b/functable.c index b4f1d769e..449f66ccb 100644 --- a/functable.c +++ b/functable.c @@ -73,7 +73,7 @@ ZLIB_INTERNAL void fill_window_stub(deflate_state *s) { functable.fill_window=&fill_window_c; #ifdef X86_SSE2 - # ifndef X86_NOCHECK_SSE2 + # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.fill_window=&fill_window_sse;