From: Mika Lindqvist Date: Tue, 22 Jun 2021 19:19:13 +0000 (+0300) Subject: [PowerPC] Use templatized code for slide_hash as code for VMX and VSX is very similar X-Git-Tag: 2.1.0-beta1~525 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f456924eaa9f08a3557cced8ed40b7ef6f9d76ea;p=thirdparty%2Fzlib-ng.git [PowerPC] Use templatized code for slide_hash as code for VMX and VSX is very similar * Any differences can be handled using compiler options or added as macros before including template header --- diff --git a/arch/power/slide_hash_power8.c b/arch/power/slide_hash_power8.c index f15305727..5b078ec9f 100644 --- a/arch/power/slide_hash_power8.c +++ b/arch/power/slide_hash_power8.c @@ -6,49 +6,7 @@ #ifdef POWER8_VSX_SLIDEHASH -#include -#include "zbuild.h" -#include "deflate.h" - -static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { - vector unsigned short vw, vm, *vp; - unsigned chunks; - - table += entries; - - /* Each vector register (chunk) corresponds to 128 bits == 8 Posf, - * so instead of processing each of the entries in the hash table - * individually, we can do it in chunks of 8 with vector instructions. - * - * This function is only called from slide_hash_power8(), and both calls - * pass entries as a power of 2 higher than 2^7, as defined by - * deflateInit2_(), so entries will always be a multiple of 8. */ - chunks = entries >> 3; - Assert(entries % 8 == 0, "Weird hash table size!"); - - vw[0] = wsize; - vw = vec_splat(vw,0); - - vp = (vector unsigned short *)table; - - do { - /* Processing 8 elements at a time */ - vp--; - vm = *vp; - - /* This is equivalent to: m >= wsize ? m - wsize : 0 - * Since we are using a saturated unsigned subtraction, any - * values that are <= wsize will be set to 0, while the others - * will be subtracted by wsize. */ - *vp = vec_subs(vm,vw); - } while (--chunks); -} - -void Z_INTERNAL slide_hash_power8(deflate_state *s) { - uint16_t wsize = s->w_size; - - slide_hash_chain(s->head, HASH_SIZE, wsize); - slide_hash_chain(s->prev, wsize, wsize); -} +#define SLIDE_PPC slide_hash_power8 +#include "slide_ppc_tpl.h" #endif /* POWER8_VSX_SLIDEHASH */ diff --git a/arch/power/slide_hash_vmx.c b/arch/power/slide_hash_vmx.c index b16df1538..cf9bd7b79 100644 --- a/arch/power/slide_hash_vmx.c +++ b/arch/power/slide_hash_vmx.c @@ -4,31 +4,7 @@ */ #ifdef PPC_VMX_SLIDEHASH -#include -#include "zbuild.h" -#include "deflate.h" - -static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { - const vector unsigned short vmx_wsize = vec_splats(wsize); - Pos *p = table; - - do { - vector unsigned short value, result; - - value = vec_ld(0, p); - result = vec_subs(value, vmx_wsize); - vec_st(result, 0, p); - - p += 8; - entries -= 8; - } while (entries > 0); -} - -void Z_INTERNAL slide_hash_vmx(deflate_state *s) { - uint16_t wsize = s->w_size; - - slide_hash_chain(s->head, HASH_SIZE, wsize); - slide_hash_chain(s->prev, wsize, wsize); -} +#define SLIDE_PPC slide_hash_vmx +#include "slide_ppc_tpl.h" #endif /* PPC_VMX_SLIDEHASH */ diff --git a/arch/power/slide_ppc_tpl.h b/arch/power/slide_ppc_tpl.h new file mode 100644 index 000000000..5c17e38fb --- /dev/null +++ b/arch/power/slide_ppc_tpl.h @@ -0,0 +1,31 @@ +/* Optimized slide_hash for PowerPC processors + * Copyright (C) 2017-2021 Mika T. Lindqvist + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include +#include "zbuild.h" +#include "deflate.h" + +static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { + const vector unsigned short vmx_wsize = vec_splats(wsize); + Pos *p = table; + + do { + vector unsigned short value, result; + + value = vec_ld(0, p); + result = vec_subs(value, vmx_wsize); + vec_st(result, 0, p); + + p += 8; + entries -= 8; + } while (entries > 0); +} + +void Z_INTERNAL SLIDE_PPC(deflate_state *s) { + uint16_t wsize = s->w_size; + + slide_hash_chain(s->head, HASH_SIZE, wsize); + slide_hash_chain(s->prev, wsize, wsize); +}