]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Preparation for POWER optimizations
authorMatheus Castanho <msc@linux.ibm.com>
Mon, 25 May 2020 21:10:29 +0000 (18:10 -0300)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Mon, 8 Jun 2020 12:47:17 +0000 (14:47 +0200)
Add the scaffolding for future optimizations for POWER processors. Now
the build is capable of correctly detecting multiple processor
sub-architectures (ppc, ppc64 and ppc64le) and also if features
needed for the optimizations are available during build and runtime.

With these changes, adding a new optimized function for POWER should be
as simple as adding a new file under arch/power/, appending build
instructions to the build files and editing functable.c accordingly.

The UNALIGNED_OK flag is now also added by default for powerpc64le
targets.

.gitignore
CMakeLists.txt
README.md
arch/power/Makefile.in [new file with mode: 0644]
arch/power/power.c [new file with mode: 0644]
arch/power/power.h [new file with mode: 0644]
cmake/detect-arch.c
cmake/detect-arch.cmake
configure
functable.c
zutil.h

index 9af3854a1918e2c0d578d3e1c9102896f47ccbcb..b6512f145a7580c8f4f68661ef92740864db2c11 100644 (file)
@@ -64,6 +64,7 @@ a.out
 /Makefile
 /arch/arm/Makefile
 /arch/generic/Makefile
+/arch/power/Makefile
 /arch/x86/Makefile
 .kdev4
 *.kdev4
index 73c6e0ff9ebd82437d4f6c205e97a7ec6739db3d..0842263285143e254fae41b3374654a3f4ce7f12 100644 (file)
@@ -91,6 +91,8 @@ option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid
 if(BASEARCH_ARM_FOUND)
     option(WITH_ACLE "Build with ACLE" ON)
     option(WITH_NEON "Build with NEON intrinsics" ON)
+elseif(BASEARCH_PPC_FOUND)
+    option(WITH_POWER8 "Build with optimisations for POWER8" ON)
 elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
     option(WITH_DFLTCC_DEFLATE "Use DEFLATE CONVERSION CALL instruction for compression on IBM Z" OFF)
     option(WITH_DFLTCC_INFLATE "Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z" OFF)
@@ -102,7 +104,7 @@ elseif(BASEARCH_X86_FOUND)
 endif()
 
 mark_as_advanced(FORCE ZLIB_DUAL_LINK WITH_ACLE WITH_NEON WITH_DFLTCC_DEFLATE WITH_DFLTCC_INFLATE
-    WITH_AVX2 WITH_SSE2 WITH_SSE4 WITH_PCLMULQDQ WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST)
+    WITH_AVX2 WITH_SSE2 WITH_SSE4 WITH_PCLMULQDQ WITH_POWER8 WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST)
 
 add_feature_info(ZLIB_COMPAT ZLIB_COMPAT "Provide a zlib-compatible API")
 add_feature_info(WITH_GZFILEOP WITH_GZFILEOP "Compile with support for gzFile-related functions")
@@ -114,6 +116,8 @@ add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
 if(BASEARCH_ARM_FOUND)
     add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE CRC")
     add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics")
+elseif(BASEARCH_PPC_FOUND)
+    add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8")
 endif()
 add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings")
 add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
@@ -185,7 +189,11 @@ else()
     endif()
     if(WITH_NATIVE_INSTRUCTIONS)
         if(__GNUC__)
-            set(NATIVEFLAG "-march=native")
+            if(BASEARCH_PPC_FOUND)
+                set(NATIVEFLAG "-mcpu=native")
+            else()
+                set(NATIVEFLAG "-march=native")
+            endif()
         else()
             message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
         endif()
@@ -219,6 +227,8 @@ else()
                 endif()
                 # ACLE
                 set(ACLEFLAG "-march=armv8-a+crc")
+            elseif(BASEARCH_PPC_FOUND)
+                set(POWER8FLAG "-mcpu=power8")
             elseif(BASEARCH_X86_FOUND)
                 set(AVX2FLAG "-mavx2")
                 set(SSE2FLAG "-msse2")
@@ -242,7 +252,7 @@ else()
 endif()
 
 # Set architecture alignment requirements
-if(BASEARCH_ARM_FOUND OR BASEARCH_X86_FOUND)
+if(BASEARCH_ARM_FOUND OR (BASEARCH_PPC_FOUND AND "${ARCH}" MATCHES "powerpc64le") OR BASEARCH_X86_FOUND)
     if(NOT DEFINED UNALIGNED_OK)
         set(UNALIGNED_OK TRUE)
     endif()
@@ -418,7 +428,18 @@ if(MSVC)
     add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
 endif()
 
-if(BASEARCH_X86_FOUND)
+if(BASEARCH_PPC_FOUND)
+    # Check if we have what we need for POWER8 optimizations
+    set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}")
+    check_c_source_compiles(
+        "#include <sys/auxv.h>
+        int main() {
+          return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+        }"
+        HAVE_POWER8
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+elseif(BASEARCH_X86_FOUND)
     # Check whether compiler supports SSE2 instrinics
     if(WITH_NATIVE_INSTRUCTIONS)
         set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
@@ -581,6 +602,8 @@ set(ZLIB_ARCH_HDRS)
 set(ARCHDIR "arch/generic")
 if(BASEARCH_ARM_FOUND)
     set(ARCHDIR "arch/arm")
+elseif(BASEARCH_PPC_FOUND)
+    set(ARCHDIR "arch/power")
 elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
     set(ARCHDIR "arch/s390")
 elseif(BASEARCH_X86_FOUND)
@@ -617,6 +640,17 @@ if(WITH_OPTIM)
             endif()
             add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
         endif()
+    elseif(BASEARCH_PPC_FOUND)
+        if(WITH_POWER8 AND HAVE_POWER8)
+            add_definitions(-DPOWER_FEATURES)
+            add_definitions(-DPOWER8)
+            set(ZLIB_POWER8_SRCS )
+            set_source_files_properties(
+                ${ZLIB_POWER8_SRCS}
+                PROPERTIES COMPILE_FLAGS ${POWER8FLAG})
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h)
+            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c ${ZLIB_POWER8_SRCS})
+        endif()
     elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
         if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
             list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c)
index 07f336d62c357eb4df269fb357e3df46a4005c13..b9e3bc1dcef236b140ddbf6f668bda3f76fcac95 100644 (file)
--- a/README.md
+++ b/README.md
@@ -200,6 +200,7 @@ Advanced Build Options
 | WITH_PCLMULQDQ                  |                       | Build with PCLMULQDQ intrinsics                                     | ON                     |
 | WITH_ACLE                       | --without-acle        | Build with ACLE intrinsics                                          | ON                     |
 | WITH_NEON                       | --without-neon        | Build with NEON intrinsics                                          | ON                     |
+| WITH_POWER8                     |                       | Build with POWER8 optimisations                                     | ON
 | WITH_DFLTCC_DEFLATE             | --with-dfltcc-deflate | Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z   | OFF                    |
 | WITH_DFLTCC_INFLATE             | --with-dfltcc-inflate | Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z | OFF                    |
 | WITH_INFLATE_STRICT             |                       | Build with strict inflate distance checking                         | OFF                    |
diff --git a/arch/power/Makefile.in b/arch/power/Makefile.in
new file mode 100644 (file)
index 0000000..a438fa5
--- /dev/null
@@ -0,0 +1,33 @@
+# Makefile for POWER-specific files
+# Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+# For conditions of distribution and use, see copyright notice in zlib.h
+
+CC=
+CFLAGS=
+SFLAGS=
+INCLUDES=
+SUFFIX=
+
+SRCDIR=.
+SRCTOP=../..
+TOPDIR=$(SRCTOP)
+
+P8FLAGS=-mcpu=power8
+
+all: power.o \
+     power.lo
+
+power.o:
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
+
+power.lo:
+       $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
+
+mostlyclean: clean
+clean:
+       rm -f *.o *.lo *~
+       rm -rf objs
+       rm -f *.gcda *.gcno *.gcov
+
+distclean:
+       rm -f Makefile
diff --git a/arch/power/power.c b/arch/power/power.c
new file mode 100644 (file)
index 0000000..8f0c49c
--- /dev/null
@@ -0,0 +1,19 @@
+/* POWER feature check
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include <sys/auxv.h>
+#include "zutil.h"
+
+ZLIB_INTERNAL int power_cpu_has_arch_2_07;
+
+void ZLIB_INTERNAL power_check_features(void) {
+    unsigned long hwcap2;
+    hwcap2 = getauxval(AT_HWCAP2);
+
+#ifdef POWER8
+    if (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+      power_cpu_has_arch_2_07 = 1;
+#endif
+}
diff --git a/arch/power/power.h b/arch/power/power.h
new file mode 100644 (file)
index 0000000..4ce6384
--- /dev/null
@@ -0,0 +1,13 @@
+/* power.h -- check for POWER CPU features
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef POWER_H_
+#define POWER_H_
+
+extern int power_cpu_has_arch_2_07;
+
+void ZLIB_INTERNAL power_check_features(void);
+
+#endif /* POWER_H_ */
index 32a8db85edb83eb00f7bdb2c17931d65bb1e445b..d7017d8b1f556ee44f1b3bf95fcf79c7816ba774 100644 (file)
 // PowerPC
 #elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__)
     #if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
-        #error archfound ppc64
+        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+            #error archfound ppc64le
+        #else 
+            #error archfound ppc64
+        #endif
     #else
         #error archfound ppc
     #endif
index f4eb99fb78a81615ed0967f31e13aba3b3f6224e..b80d6666f222bf4363ef5e302d69155db0fbfd7a 100644 (file)
@@ -52,7 +52,7 @@ if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)")
 elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)")
     set(BASEARCH "arm")
     set(BASEARCH_ARM_FOUND TRUE)
-elseif("${ARCH}" MATCHES "ppc(64)?|powerpc(64)?")
+elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?")
     set(BASEARCH "ppc")
     set(BASEARCH_PPC_FOUND TRUE)
 elseif("${ARCH}" MATCHES "alpha")
index 50172fddc080098f507d6c45049f734cbf8bd9bb..5004c540a8c9aa797e3060a54d36c13355f01123 100755 (executable)
--- a/configure
+++ b/configure
@@ -326,6 +326,12 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
       else
         ARCH=native
       fi ;;
+    powerpc | ppc)
+      ARCH=powerpc ;;
+    powerpc64 | ppc64)
+      ARCH=powerpc64 ;;
+    powerpc64le | ppc64le)
+      ARCH=powerpc64le ;;
   esac
   CFLAGS="-O2 ${CFLAGS}"
   if test -n "${ARCHS}"; then
@@ -335,8 +341,14 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
   CFLAGS="${CFLAGS} -Wall"
   SFLAGS="${CFLAGS} -fPIC"
   if test $native -eq 1; then
-    CFLAGS="${CFLAGS} -march=native"
-    SFLAGS="${SFLAGS} -march=native"
+    case $ARCH in
+      powerpc*)
+        NATIVE_FLAG="-mcpu=native" ;;
+      *)
+        NATIVE_FLAG="-march=native" ;;
+    esac
+    CFLAGS="${CFLAGS} ${NATIVE_FLAG}"
+    SFLAGS="${SFLAGS} ${NATIVE_FLAG}"
   fi
   if test "$warn" -eq 1; then
     CFLAGS="${CFLAGS} -Wextra -Wpedantic -Wno-implicit-fallthrough"
@@ -1024,6 +1036,22 @@ EOF
         ;;
 esac
 
+# Check whether features needed by POWER optimisations are available
+case "${ARCH}" in
+    powerpc*)
+        cat > $test.c << EOF
+#include <sys/auxv.h>
+int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
+EOF
+        if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
+            HAVE_POWER8=1
+            echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
+        else
+            HAVE_POWER8=0
+            echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
+        fi
+esac
+
 # Check whether sys/sdt.h is available
 cat > $test.c << EOF
 #include <sys/sdt.h>
@@ -1325,11 +1353,33 @@ case "${ARCH}" in
         CFLAGS="-march=${ARCH} ${CFLAGS} -DUNALIGNED_OK"
         SFLAGS="-march=${ARCH} ${SFLAGS} -DUNALIGNED_OK"
     ;;
-    powerpc)
-        [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc
-    ;;
-    powerpc64)
-        [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64
+    powerpc*)
+        case "${ARCH}" in
+            powerpc)
+                [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc
+            ;;
+            powerpc64)
+                [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64
+            ;;
+            powerpc64le)
+                [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le
+                CFLAGS="${CFLAGS} -DUNALIGNED_OK"
+                SFLAGS="${SFLAGS} -DUNALIGNED_OK"
+            ;;
+        esac
+
+        ARCHDIR=arch/power
+
+        if test $without_optimizations -eq 0; then
+            if test $HAVE_POWER8 -eq 1; then
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo"
+                POWERFLAGS="-DPOWER_FEATURES -DPOWER8"
+            fi
+        fi
+
+        CFLAGS="${CFLAGS} ${POWERFLAGS}"
+        SFLAGS="${SFLAGS} ${POWERFLAGS}"
     ;;
     s390x)
         [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=s390x
index 1a203e37984480dca96abb6f6c39ed4709dafeb9..aad876605fc5f4db4e87da0bdcf9f2978db2225c 100644 (file)
@@ -29,6 +29,7 @@ extern Pos quick_insert_string_sse4(deflate_state *const s, const Pos str);
 #elif defined(ARM_ACLE_CRC_HASH)
 extern Pos quick_insert_string_acle(deflate_state *const s, const Pos str);
 #endif
+
 /* slide_hash */
 #ifdef X86_SSE2
 void slide_hash_sse2(deflate_state *s);
@@ -115,6 +116,8 @@ ZLIB_INTERNAL void cpu_check_features(void)
     x86_check_features();
 #elif ARM_CPUID
     arm_check_features();
+#elif POWER_FEATURES
+    power_check_features();
 #endif
     features_checked = 1;
 }
diff --git a/zutil.h b/zutil.h
index 0c9accb53589cd06eeeaf2fefc876120d59c3e70..b8bce163a60d99954dbb72671ca4386d850cb509 100644 (file)
--- a/zutil.h
+++ b/zutil.h
@@ -250,6 +250,8 @@ void ZLIB_INTERNAL   zng_cfree(void *opaque, void *ptr);
 #  include "arch/x86/x86.h"
 #elif defined(ARM_CPUID)
 #  include "arch/arm/arm.h"
+#elif defined(POWER_FEATURES)
+#  include "arch/power/power.h"
 #endif
 
 #endif /* ZUTIL_H_ */