From: Matheus Castanho Date: Mon, 25 May 2020 21:10:29 +0000 (-0300) Subject: Preparation for POWER optimizations X-Git-Tag: 1.9.9-b1~247 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b81f4ee96dcbdf1db34b00727b6f1829a2ba1edb;p=thirdparty%2Fzlib-ng.git Preparation for POWER optimizations Add the scaffolding for future optimizations for POWER processors. Now the build is capable of correctly detecting multiple processor sub-architectures (ppc, ppc64 and ppc64le) and also if features needed for the optimizations are available during build and runtime. With these changes, adding a new optimized function for POWER should be as simple as adding a new file under arch/power/, appending build instructions to the build files and editing functable.c accordingly. The UNALIGNED_OK flag is now also added by default for powerpc64le targets. --- diff --git a/.gitignore b/.gitignore index 9af3854a..b6512f14 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,7 @@ a.out /Makefile /arch/arm/Makefile /arch/generic/Makefile +/arch/power/Makefile /arch/x86/Makefile .kdev4 *.kdev4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 73c6e0ff..08422632 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,8 @@ option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid if(BASEARCH_ARM_FOUND) option(WITH_ACLE "Build with ACLE" ON) option(WITH_NEON "Build with NEON intrinsics" ON) +elseif(BASEARCH_PPC_FOUND) + option(WITH_POWER8 "Build with optimisations for POWER8" ON) elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x") option(WITH_DFLTCC_DEFLATE "Use DEFLATE CONVERSION CALL instruction for compression on IBM Z" OFF) option(WITH_DFLTCC_INFLATE "Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z" OFF) @@ -102,7 +104,7 @@ elseif(BASEARCH_X86_FOUND) endif() mark_as_advanced(FORCE ZLIB_DUAL_LINK WITH_ACLE WITH_NEON WITH_DFLTCC_DEFLATE WITH_DFLTCC_INFLATE - WITH_AVX2 WITH_SSE2 WITH_SSE4 WITH_PCLMULQDQ WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST) + WITH_AVX2 WITH_SSE2 WITH_SSE4 WITH_PCLMULQDQ WITH_POWER8 WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST) add_feature_info(ZLIB_COMPAT ZLIB_COMPAT "Provide a zlib-compatible API") add_feature_info(WITH_GZFILEOP WITH_GZFILEOP "Compile with support for gzFile-related functions") @@ -114,6 +116,8 @@ add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies") if(BASEARCH_ARM_FOUND) add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE CRC") add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics") +elseif(BASEARCH_PPC_FOUND) + add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8") endif() add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings") add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting") @@ -185,7 +189,11 @@ else() endif() if(WITH_NATIVE_INSTRUCTIONS) if(__GNUC__) - set(NATIVEFLAG "-march=native") + if(BASEARCH_PPC_FOUND) + set(NATIVEFLAG "-mcpu=native") + else() + set(NATIVEFLAG "-march=native") + endif() else() message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration") endif() @@ -219,6 +227,8 @@ else() endif() # ACLE set(ACLEFLAG "-march=armv8-a+crc") + elseif(BASEARCH_PPC_FOUND) + set(POWER8FLAG "-mcpu=power8") elseif(BASEARCH_X86_FOUND) set(AVX2FLAG "-mavx2") set(SSE2FLAG "-msse2") @@ -242,7 +252,7 @@ else() endif() # Set architecture alignment requirements -if(BASEARCH_ARM_FOUND OR BASEARCH_X86_FOUND) +if(BASEARCH_ARM_FOUND OR (BASEARCH_PPC_FOUND AND "${ARCH}" MATCHES "powerpc64le") OR BASEARCH_X86_FOUND) if(NOT DEFINED UNALIGNED_OK) set(UNALIGNED_OK TRUE) endif() @@ -418,7 +428,18 @@ if(MSVC) add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) endif() -if(BASEARCH_X86_FOUND) +if(BASEARCH_PPC_FOUND) + # Check if we have what we need for POWER8 optimizations + set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}") + check_c_source_compiles( + "#include + int main() { + return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); + }" + HAVE_POWER8 + ) + set(CMAKE_REQUIRED_FLAGS) +elseif(BASEARCH_X86_FOUND) # Check whether compiler supports SSE2 instrinics if(WITH_NATIVE_INSTRUCTIONS) set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}") @@ -581,6 +602,8 @@ set(ZLIB_ARCH_HDRS) set(ARCHDIR "arch/generic") if(BASEARCH_ARM_FOUND) set(ARCHDIR "arch/arm") +elseif(BASEARCH_PPC_FOUND) + set(ARCHDIR "arch/power") elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x") set(ARCHDIR "arch/s390") elseif(BASEARCH_X86_FOUND) @@ -617,6 +640,17 @@ if(WITH_OPTIM) endif() add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"") endif() + elseif(BASEARCH_PPC_FOUND) + if(WITH_POWER8 AND HAVE_POWER8) + add_definitions(-DPOWER_FEATURES) + add_definitions(-DPOWER8) + set(ZLIB_POWER8_SRCS ) + set_source_files_properties( + ${ZLIB_POWER8_SRCS} + PROPERTIES COMPILE_FLAGS ${POWER8FLAG}) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c ${ZLIB_POWER8_SRCS}) + endif() elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x") if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE) list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c) diff --git a/README.md b/README.md index 07f336d6..b9e3bc1d 100644 --- a/README.md +++ b/README.md @@ -200,6 +200,7 @@ Advanced Build Options | WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON | | WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON | | WITH_NEON | --without-neon | Build with NEON intrinsics | ON | +| WITH_POWER8 | | Build with POWER8 optimisations | ON | WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z | OFF | | WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z | OFF | | WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF | diff --git a/arch/power/Makefile.in b/arch/power/Makefile.in new file mode 100644 index 00000000..a438fa5a --- /dev/null +++ b/arch/power/Makefile.in @@ -0,0 +1,33 @@ +# Makefile for POWER-specific files +# Copyright (C) 2020 Matheus Castanho , IBM +# For conditions of distribution and use, see copyright notice in zlib.h + +CC= +CFLAGS= +SFLAGS= +INCLUDES= +SUFFIX= + +SRCDIR=. +SRCTOP=../.. +TOPDIR=$(SRCTOP) + +P8FLAGS=-mcpu=power8 + +all: power.o \ + power.lo + +power.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c + +power.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c + +mostlyclean: clean +clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +distclean: + rm -f Makefile diff --git a/arch/power/power.c b/arch/power/power.c new file mode 100644 index 00000000..8f0c49c0 --- /dev/null +++ b/arch/power/power.c @@ -0,0 +1,19 @@ +/* POWER feature check + * Copyright (C) 2020 Matheus Castanho , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include +#include "zutil.h" + +ZLIB_INTERNAL int power_cpu_has_arch_2_07; + +void ZLIB_INTERNAL power_check_features(void) { + unsigned long hwcap2; + hwcap2 = getauxval(AT_HWCAP2); + +#ifdef POWER8 + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) + power_cpu_has_arch_2_07 = 1; +#endif +} diff --git a/arch/power/power.h b/arch/power/power.h new file mode 100644 index 00000000..4ce63841 --- /dev/null +++ b/arch/power/power.h @@ -0,0 +1,13 @@ +/* power.h -- check for POWER CPU features + * Copyright (C) 2020 Matheus Castanho , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef POWER_H_ +#define POWER_H_ + +extern int power_cpu_has_arch_2_07; + +void ZLIB_INTERNAL power_check_features(void); + +#endif /* POWER_H_ */ diff --git a/cmake/detect-arch.c b/cmake/detect-arch.c index 32a8db85..d7017d8b 100644 --- a/cmake/detect-arch.c +++ b/cmake/detect-arch.c @@ -34,7 +34,11 @@ // PowerPC #elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__) #if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__) - #error archfound ppc64 + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #error archfound ppc64le + #else + #error archfound ppc64 + #endif #else #error archfound ppc #endif diff --git a/cmake/detect-arch.cmake b/cmake/detect-arch.cmake index f4eb99fb..b80d6666 100644 --- a/cmake/detect-arch.cmake +++ b/cmake/detect-arch.cmake @@ -52,7 +52,7 @@ if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)") elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)") set(BASEARCH "arm") set(BASEARCH_ARM_FOUND TRUE) -elseif("${ARCH}" MATCHES "ppc(64)?|powerpc(64)?") +elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?") set(BASEARCH "ppc") set(BASEARCH_PPC_FOUND TRUE) elseif("${ARCH}" MATCHES "alpha") diff --git a/configure b/configure index 50172fdd..5004c540 100755 --- a/configure +++ b/configure @@ -326,6 +326,12 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then else ARCH=native fi ;; + powerpc | ppc) + ARCH=powerpc ;; + powerpc64 | ppc64) + ARCH=powerpc64 ;; + powerpc64le | ppc64le) + ARCH=powerpc64le ;; esac CFLAGS="-O2 ${CFLAGS}" if test -n "${ARCHS}"; then @@ -335,8 +341,14 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then CFLAGS="${CFLAGS} -Wall" SFLAGS="${CFLAGS} -fPIC" if test $native -eq 1; then - CFLAGS="${CFLAGS} -march=native" - SFLAGS="${SFLAGS} -march=native" + case $ARCH in + powerpc*) + NATIVE_FLAG="-mcpu=native" ;; + *) + NATIVE_FLAG="-march=native" ;; + esac + CFLAGS="${CFLAGS} ${NATIVE_FLAG}" + SFLAGS="${SFLAGS} ${NATIVE_FLAG}" fi if test "$warn" -eq 1; then CFLAGS="${CFLAGS} -Wextra -Wpedantic -Wno-implicit-fallthrough" @@ -1024,6 +1036,22 @@ EOF ;; esac +# Check whether features needed by POWER optimisations are available +case "${ARCH}" in + powerpc*) + cat > $test.c << EOF +#include +int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); } +EOF + if try $CC -c $CFLAGS -mcpu=power8 $test.c; then + HAVE_POWER8=1 + echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log + else + HAVE_POWER8=0 + echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log + fi +esac + # Check whether sys/sdt.h is available cat > $test.c << EOF #include @@ -1325,11 +1353,33 @@ case "${ARCH}" in CFLAGS="-march=${ARCH} ${CFLAGS} -DUNALIGNED_OK" SFLAGS="-march=${ARCH} ${SFLAGS} -DUNALIGNED_OK" ;; - powerpc) - [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc - ;; - powerpc64) - [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64 + powerpc*) + case "${ARCH}" in + powerpc) + [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc + ;; + powerpc64) + [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64 + ;; + powerpc64le) + [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le + CFLAGS="${CFLAGS} -DUNALIGNED_OK" + SFLAGS="${SFLAGS} -DUNALIGNED_OK" + ;; + esac + + ARCHDIR=arch/power + + if test $without_optimizations -eq 0; then + if test $HAVE_POWER8 -eq 1; then + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo" + POWERFLAGS="-DPOWER_FEATURES -DPOWER8" + fi + fi + + CFLAGS="${CFLAGS} ${POWERFLAGS}" + SFLAGS="${SFLAGS} ${POWERFLAGS}" ;; s390x) [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=s390x diff --git a/functable.c b/functable.c index 1a203e37..aad87660 100644 --- a/functable.c +++ b/functable.c @@ -29,6 +29,7 @@ extern Pos quick_insert_string_sse4(deflate_state *const s, const Pos str); #elif defined(ARM_ACLE_CRC_HASH) extern Pos quick_insert_string_acle(deflate_state *const s, const Pos str); #endif + /* slide_hash */ #ifdef X86_SSE2 void slide_hash_sse2(deflate_state *s); @@ -115,6 +116,8 @@ ZLIB_INTERNAL void cpu_check_features(void) x86_check_features(); #elif ARM_CPUID arm_check_features(); +#elif POWER_FEATURES + power_check_features(); #endif features_checked = 1; } diff --git a/zutil.h b/zutil.h index 0c9accb5..b8bce163 100644 --- a/zutil.h +++ b/zutil.h @@ -250,6 +250,8 @@ void ZLIB_INTERNAL zng_cfree(void *opaque, void *ptr); # include "arch/x86/x86.h" #elif defined(ARM_CPUID) # include "arch/arm/arm.h" +#elif defined(POWER_FEATURES) +# include "arch/power/power.h" #endif #endif /* ZUTIL_H_ */