From: Mika Lindqvist Date: Sun, 26 Mar 2017 19:54:17 +0000 (+0300) Subject: PowerPC: Add initial support for AltiVec. X-Git-Tag: 2.1.0-beta1~528 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=77d4c8789ed831db309ea4ae1b701d7a6296173d;p=thirdparty%2Fzlib-ng.git PowerPC: Add initial support for AltiVec. * Add detection of VMX instructions --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 614d7e593..8c4523b3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,6 +94,7 @@ if(BASEARCH_ARM_FOUND) option(WITH_ACLE "Build with ACLE" ON) option(WITH_NEON "Build with NEON intrinsics" ON) elseif(BASEARCH_PPC_FOUND) + option(WITH_ALTIVEC "Build with AltiVec (VMX) optimisations for PowerPC" ON) option(WITH_POWER8 "Build with optimisations for POWER8" ON) elseif(BASEARCH_S360_FOUND) option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) @@ -119,6 +120,7 @@ mark_as_advanced(FORCE WITH_AVX2 WITH_SSE2 WITH_SSSE3 WITH_SSE4 WITH_PCLMULQDQ + WITH_ALTIVEC WITH_POWER8 WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST @@ -608,16 +610,41 @@ if(WITH_OPTIM) endif() endif() elseif(BASEARCH_PPC_FOUND) + # Common arch detection code + if(WITH_ALTIVEC) + check_ppc_intrinsics() + endif() if(WITH_POWER8) check_power8_intrinsics() + endif() + if(HAVE_VMX OR HAVE_POWER8_INTRIN) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c) + endif() + # VMX specific options and files + if(WITH_ALTIVEC) + if(HAVE_VMX) + add_definitions(-DPPC_FEATURES) + if(HAVE_ALTIVEC) + add_definitions(-DPPC_VMX_ADLER32) + add_definitions(-DPPC_VMX_SLIDEHASH) + set(PPC_SRCS ${ARCHDIR}/adler32_vmx.c ${ARCHDIR}/slide_hash_vmx.c) + list(APPEND ZLIB_ARCH_SRCS ${PPC_SRCS}) + add_feature_info(ALTIVEC 1 "Support the AltiVec instruction set, using \"-maltivec\"") + set_property(SOURCE ${PPC_SRCS} PROPERTY COMPILE_FLAGS "${PPCFLAGS}") + else() + set(WITH_ALTIVEC OFF) + endif() + endif() + endif() + # Power8 specific options and files + if(WITH_POWER8) if(HAVE_POWER8_INTRIN) add_definitions(-DPOWER8) add_definitions(-DPOWER_FEATURES) add_definitions(-DPOWER8_VSX_ADLER32) add_definitions(-DPOWER8_VSX_CHUNKSET) add_definitions(-DPOWER8_VSX_SLIDEHASH) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c) set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c) list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS}) set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}") @@ -1315,6 +1342,7 @@ if(BASEARCH_ARM_FOUND) add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE") add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics") elseif(BASEARCH_PPC_FOUND) + add_feature_info(WITH_ALTIVEC WITH_ALTIVEC "Build with AltiVec optimisations") add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8") elseif(BASEARCH_S360_FOUND) add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z") diff --git a/arch/power/Makefile.in b/arch/power/Makefile.in index 3e4b41fb0..9b94e0b7c 100644 --- a/arch/power/Makefile.in +++ b/arch/power/Makefile.in @@ -1,5 +1,6 @@ # Makefile for POWER-specific files # Copyright (C) 2020 Matheus Castanho , IBM +# Copyright (C) 2021 Mika T. Lindqvist # For conditions of distribution and use, see copyright notice in zlib.h CC= @@ -9,6 +10,7 @@ INCLUDES= SUFFIX= P8FLAGS=-mcpu=power8 +PPCFLAGS=-maltivec NOLTOFLAG= SRCDIR=. @@ -19,10 +21,14 @@ all: power.o \ power.lo \ adler32_power8.o \ adler32_power8.lo \ + adler32_vmx.o \ + adler32_vmx.lo \ chunkset_power8.o \ chunkset_power8.lo \ slide_hash_power8.o \ - slide_hash_power8.lo + slide_hash_power8.lo \ + slide_hash_vmx.o \ + slide_hash_vmx.lo power.o: $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c @@ -36,6 +42,12 @@ adler32_power8.o: adler32_power8.lo: $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c +adler32_vmx.o: + $(CC) $(CFLAGS) $(PPCFLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_vmx.c + +adler32_vmx.lo: + $(CC) $(SFLAGS) $(PPCFLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_vmx.c + chunkset_power8.o: $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_power8.c @@ -48,6 +60,12 @@ slide_hash_power8.o: slide_hash_power8.lo: $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c +slide_hash_vmx.o: + $(CC) $(CFLAGS) ${PPCFLAGS} $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_vmx.c + +slide_hash_vmx.lo: + $(CC) $(SFLAGS) ${PPCFLAGS} $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_vmx.c + mostlyclean: clean clean: rm -f *.o *.lo *~ diff --git a/arch/power/adler32_vmx.c b/arch/power/adler32_vmx.c new file mode 100644 index 000000000..5d7dc43e2 --- /dev/null +++ b/arch/power/adler32_vmx.c @@ -0,0 +1,118 @@ +/* adler32_vmx.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011 Mark Adler + * Copyright (C) 2017-2021 Mika T. Lindqvist + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef PPC_VMX_ADLER32 +#include +#include "zutil.h" +#include "adler32_p.h" + +#define vmx_zero() (vec_splat_u32(0)) + +vector unsigned short vec_hadduh(vector unsigned char a) { + vector unsigned char vmx_one = vec_splat_u8(1); + return vec_add(vec_mulo(a, vmx_one), vec_mule(a, vmx_one)); +} + +vector unsigned int vec_hadduw(vector unsigned short a) { + vector unsigned short vmx_one = vec_splat_u16(1); + return vec_add(vec_mulo(a, vmx_one), vec_mule(a, vmx_one)); +} + +static inline void vmx_handle_head_or_tail(uint32_t *pair, const unsigned char *buf, size_t len) { + unsigned int i; + for (i = 0; i < len; ++i) { + pair[0] += buf[i]; + pair[1] += pair[0]; + } +} + +static void vmx_accum32(uint32_t *s, const unsigned char *buf, size_t len) { + static const uint8_t tc0[16] ALIGNED_(16) = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; + + vector unsigned char t0 = vec_ld(0, tc0); + vector unsigned int adacc, s2acc; + adacc = vec_insert(s[0], vmx_zero(), 0); + s2acc = vec_insert(s[1], vmx_zero(), 0); + + while (len > 0) { + vector unsigned char d0 = vec_ld(0, buf); + vector unsigned short sum2; + sum2 = vec_add(vec_mulo(t0, d0), vec_mule(t0, d0)); + s2acc = vec_add(s2acc, vec_sl(adacc, vec_splat_u32(4))); + s2acc = vec_add(s2acc, vec_hadduw(sum2)); + adacc = vec_add(adacc, vec_hadduw(vec_hadduh(d0))); + buf += 16; + len--; + } + + s[0] = vec_extract(adacc, 0) + vec_extract(adacc, 1) + vec_extract(adacc, 2) + vec_extract(adacc, 3); /* Horizontal add */ + s[1] = vec_extract(s2acc, 0) + vec_extract(s2acc, 1) + vec_extract(s2acc, 2) + vec_extract(s2acc, 3); /* Horizontal add */ +} + +uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, size_t len) { + uint32_t sum2; + uint32_t pair[2]; + int n = NMAX; + unsigned int done = 0, i; + + /* Split Adler-32 into component sums, it can be supplied by + * the caller sites (e.g. in a PNG file). + */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + pair[0] = adler; + pair[1] = sum2; + + /* in case user likes doing a byte at a time, keep it fast */ + if (UNLIKELY(len == 1)) + return adler32_len_1(adler, buf, sum2); + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (UNLIKELY(buf == NULL)) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (UNLIKELY(len < 16)) + return adler32_len_16(adler, buf, len, sum2); + + // Align buffer + unsigned int al = 0; + if ((uintptr_t)buf & 0xf) { + al = 16-((uintptr_t)buf & 0xf); + if (al > len) { + al=len; + } + vmx_handle_head_or_tail(pair, buf, al); + pair[0] %= BASE; + pair[1] %= BASE; + + done += al; + } + for (i = al; i < len; i += n) { + if ((i + n) > len) + n = (int)(len - i); + + if (n < 16) + break; + + vmx_accum32(pair, buf + i, n / 16); + pair[0] %= BASE; + pair[1] %= BASE; + + done += (n / 16) * 16; + } + + /* Handle the tail elements. */ + if (done < len) { + vmx_handle_head_or_tail(pair, (buf + done), len - done); + pair[0] %= BASE; + pair[1] %= BASE; + } + + /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ + return (pair[1] << 16) | pair[0]; +} +#endif diff --git a/arch/power/power.c b/arch/power/power.c index f93b586d5..d17fe3f4f 100644 --- a/arch/power/power.c +++ b/arch/power/power.c @@ -1,19 +1,30 @@ /* POWER feature check * Copyright (C) 2020 Matheus Castanho , IBM + * Copyright (C) 2021 Mika T. Lindqvist * For conditions of distribution and use, see copyright notice in zlib.h */ #include #include "../../zutil.h" +#include "power.h" -Z_INTERNAL int power_cpu_has_arch_2_07; +Z_INTERNAL int power_cpu_has_altivec = 0; +Z_INTERNAL int power_cpu_has_arch_2_07 = 0; void Z_INTERNAL power_check_features(void) { +#ifdef PPC_FEATURES + unsigned long hwcap; + hwcap = getauxval(AT_HWCAP); + + if (hwcap & PPC_FEATURE_HAS_ALTIVEC) + power_cpu_has_altivec = 1; +#endif + +#ifdef POWER_FEATURES unsigned long hwcap2; hwcap2 = getauxval(AT_HWCAP2); -#ifdef POWER8 if (hwcap2 & PPC_FEATURE2_ARCH_2_07) - power_cpu_has_arch_2_07 = 1; + power_cpu_has_arch_2_07 = 1; #endif } diff --git a/arch/power/power.h b/arch/power/power.h index b36c26141..81ce00961 100644 --- a/arch/power/power.h +++ b/arch/power/power.h @@ -1,11 +1,13 @@ /* power.h -- check for POWER CPU features * Copyright (C) 2020 Matheus Castanho , IBM + * Copyright (C) 2021 Mika T. Lindqvist * For conditions of distribution and use, see copyright notice in zlib.h */ #ifndef POWER_H_ #define POWER_H_ +extern int power_cpu_has_altivec; extern int power_cpu_has_arch_2_07; void Z_INTERNAL power_check_features(void); diff --git a/arch/power/slide_hash_vmx.c b/arch/power/slide_hash_vmx.c new file mode 100644 index 000000000..b16df1538 --- /dev/null +++ b/arch/power/slide_hash_vmx.c @@ -0,0 +1,34 @@ +/* Optimized slide_hash for PowerPC processors with VMX instructions + * Copyright (C) 2017-2021 Mika T. Lindqvist + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifdef PPC_VMX_SLIDEHASH + +#include +#include "zbuild.h" +#include "deflate.h" + +static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { + const vector unsigned short vmx_wsize = vec_splats(wsize); + Pos *p = table; + + do { + vector unsigned short value, result; + + value = vec_ld(0, p); + result = vec_subs(value, vmx_wsize); + vec_st(result, 0, p); + + p += 8; + entries -= 8; + } while (entries > 0); +} + +void Z_INTERNAL slide_hash_vmx(deflate_state *s) { + uint16_t wsize = s->w_size; + + slide_hash_chain(s->head, HASH_SIZE, wsize); + slide_hash_chain(s->prev, wsize, wsize); +} + +#endif /* PPC_VMX_SLIDEHASH */ diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index 90c5f3bee..c6085018a 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -86,6 +86,56 @@ macro(check_pclmulqdq_intrinsics) endif() endmacro() +macro(check_ppc_intrinsics) + # Check if compiler supports AltiVec + set(CMAKE_REQUIRED_FLAGS "-maltivec") + check_c_source_compiles( + "#include + int main(void) + { + vector int a = vec_splats(0); + vector int b = vec_splats(0); + a = vec_add(a, b); + return 0; + }" + HAVE_ALTIVEC + ) + set(CMAKE_REQUIRED_FLAGS) + + if(HAVE_ALTIVEC) + set(PPCFLAGS "-maltivec") + endif() + + set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx") + check_c_source_compiles( + "#include + int main(void) + { + vector int a = vec_splats(0); + vector int b = vec_splats(0); + a = vec_add(a, b); + return 0; + }" + HAVE_NOVSX + ) + set(CMAKE_REQUIRED_FLAGS) + + if(HAVE_NOVSX) + set(PPCFLAGS "${PPCFLAGS} -mno-vsx") + endif() + + # Check if we have what we need for AltiVec optimizations + set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS}") + check_c_source_compiles( + "#include + int main() { + return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); + }" + HAVE_VMX + ) + set(CMAKE_REQUIRED_FLAGS) +endmacro() + macro(check_power8_intrinsics) if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if(NOT NATIVEFLAG) diff --git a/configure b/configure index badaa0ae8..e48dfdc56 100755 --- a/configure +++ b/configure @@ -91,6 +91,7 @@ cover=0 build32=0 build64=0 buildacle=1 +buildaltivec=1 buildneon=1 builddfltccdeflate=0 builddfltccinflate=0 @@ -110,6 +111,7 @@ acleflag= neonflag= noltoflag="-fno-lto" vgfmaflag="-march=z13" +vmxflag="-maltivec" without_optimizations=0 without_new_strategies=0 reducedmem=0 @@ -155,6 +157,7 @@ case "$1" in echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log echo ' [--without-acle] Compiles without ARM C Language Extensions' | tee -a configure.log echo ' [--without-neon] Compiles without ARM Neon SIMD instruction set' | tee -a configure.log + echo ' [--without-altivec] Compiles without PPC AltiVec support' | tee -a configure.log echo ' [--with-dfltcc-deflate] Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log echo ' [--with-dfltcc-inflate] Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log echo ' [--without-crc32-vx] Build without vectorized CRC32 on IBM Z' | tee -a configure.log @@ -183,6 +186,7 @@ case "$1" in -6* | --64) build64=1; shift ;; --without-acle) buildacle=0; shift ;; --without-neon) buildneon=0; shift ;; + --without-altivec) buildaltivec=0 ; shift ;; --with-dfltcc-deflate) builddfltccdeflate=1; shift ;; --with-dfltcc-inflate) builddfltccinflate=1; shift ;; --without-crc32-vx) buildcrc32vx=0; shift ;; @@ -1061,6 +1065,43 @@ EOF fi } +check_ppc_intrinsics() { + cat > $test.c << EOF +#include +int main(void) +{ + vector int a = vec_splats(0); + vector int b = vec_splats(0); + a = vec_add(a, b); + return 0; +} +EOF + if test $buildaltivec -eq 1 && try ${CC} ${CFLAGS} -maltivec $test.c; then + echo "Checking for AltiVec intrinsics ... Yes." | tee -a configure.log + HAVE_ALTIVEC_INTRIN=1 + else + echo "Checking for AltiVec intrinsics ... No." | tee -a configure.log + HAVE_ALTIVEC_INTRIN=0 + fi + if test $buildaltivec -eq 1 && try ${CC} ${CFLAGS} -maltivec -mno-vsx $test.c; then + echo "Checking if -mno-vsx is supported ... Yes." | tee -a configure.log + vmxflag="$vmxflag -mno-vsx" + else + echo "Checking if -mno-vsx is supported ... No." | tee -a configure.log + fi + cat > $test.c << EOF +#include +int main() { return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); } +EOF + if try $CC -c $CFLAGS -maltivec $test.c; then + HAVE_VMX=1 + echo "Check whether VMX instructions are available ... Yes." | tee -a configure.log + else + HAVE_VMX=0 + echo "Check whether VMX instructions are available ... No." | tee -a configure.log + fi +} + check_power8_intrinsics() { # Check whether features needed by POWER optimisations are available cat > $test.c << EOF @@ -1581,14 +1622,30 @@ EOF if test $without_optimizations -eq 0; then + check_ppc_intrinsics check_power8_intrinsics + if test $HAVE_VMX -eq 1; then + CFLAGS="${CFLAGS} -DPPC_FEATURES" + SFLAGS="${SFLAGS} -DPPC_FEATURES" + fi + if test $HAVE_VMX -eq 1 -o $HAVE_POWER8_INTRIN -eq 1; then + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo" + fi + if test $HAVE_VMX -eq 1 -a $HAVE_ALTIVEC_INTRIN -eq 1; then + CFLAGS="${CFLAGS} -DPPC_VMX_ADLER32 -DPPC_VMX_SLIDEHASH" + SFLAGS="${SFLAGS} -DPPC_VMX_ADLER32 -DPPC_VMX_SLIDEHASH" + + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_vmx.o slide_hash_vmx.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_vmx.lo slide_hash_vmx.lo" + fi if test $HAVE_POWER8_INTRIN -eq 1; then CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH" SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o adler32_power8.o chunkset_power8.o slide_hash_power8.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo adler32_power8.lo chunkset_power8.lo slide_hash_power8.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_power8.o chunkset_power8.o slide_hash_power8.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_power8.lo chunkset_power8.lo slide_hash_power8.lo" fi fi ;; @@ -1829,6 +1886,7 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in " /^NEONFLAG *=/s#=.*#=$neonflag# /^NOLTOFLAG *=/s#=.*#=$noltoflag# /^VGFMAFLAG *=/s#=.*#=$vgfmaflag# +/^PPCFLAGS *=/s#=.*#=$vmxflag# " > $ARCHDIR/Makefile # Append header files dependences. diff --git a/functable.c b/functable.c index af3aaa2d8..17544feb9 100644 --- a/functable.c +++ b/functable.c @@ -44,7 +44,11 @@ extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); void slide_hash_sse2(deflate_state *s); #elif defined(ARM_NEON_SLIDEHASH) void slide_hash_neon(deflate_state *s); -#elif defined(POWER8_VSX_SLIDEHASH) +#endif +#if defined(PPC_VMX_SLIDEHASH) +void slide_hash_vmx(deflate_state *s); +#endif +#if defined(POWER8_VSX_SLIDEHASH) void slide_hash_power8(deflate_state *s); #endif #ifdef X86_AVX2 @@ -56,6 +60,9 @@ extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); #ifdef ARM_NEON_ADLER32 extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len); #endif +#ifdef PPC_VMX_ADLER32 +extern uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, size_t len); +#endif #ifdef X86_SSSE3_ADLER32 extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len); #endif @@ -175,7 +182,7 @@ Z_INTERNAL void cpu_check_features(void) x86_check_features(); #elif defined(ARM_FEATURES) arm_check_features(); -#elif defined(POWER_FEATURES) +#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) power_check_features(); #elif defined(S390_FEATURES) s390_check_features(); @@ -252,6 +259,10 @@ Z_INTERNAL void slide_hash_stub(deflate_state *s) { if (x86_cpu_has_avx2) functable.slide_hash = &slide_hash_avx2; #endif +#ifdef PPC_VMX_SLIDEHASH + if (power_cpu_has_altivec) + functable.slide_hash = &slide_hash_vmx; +#endif #ifdef POWER8_VSX_SLIDEHASH if (power_cpu_has_arch_2_07) functable.slide_hash = &slide_hash_power8; @@ -279,6 +290,10 @@ Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_ if (x86_cpu_has_avx2) functable.adler32 = &adler32_avx2; #endif +#ifdef PPC_VMX_ADLER32 + if (power_cpu_has_altivec) + functable.adler32 = &adler32_vmx; +#endif #ifdef POWER8_VSX_ADLER32 if (power_cpu_has_arch_2_07) functable.adler32 = &adler32_power8; diff --git a/zutil.h b/zutil.h index 4e8950955..bcb9ed12f 100644 --- a/zutil.h +++ b/zutil.h @@ -250,7 +250,7 @@ void Z_INTERNAL zng_cfree(void *opaque, void *ptr); # include "arch/x86/x86.h" #elif defined(ARM_FEATURES) # include "arch/arm/arm.h" -#elif defined(POWER_FEATURES) +#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) # include "arch/power/power.h" #elif defined(S390_FEATURES) # include "arch/s390/s390.h"