From: alexsifivetw Date: Mon, 19 Jun 2023 10:05:11 +0000 (-0700) Subject: Optimize slide_hash using RVV X-Git-Tag: 2.1.3~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2f4ebe2bb68380366b90f1db1f3c5b32601130a0;p=thirdparty%2Fzlib-ng.git Optimize slide_hash using RVV --- diff --git a/CMakeLists.txt b/CMakeLists.txt index ef4f239b6..e92d3826f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -733,7 +733,7 @@ if(WITH_OPTIM) list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c) # FIXME: we will not set compile flags for riscv_features.c when # the kernels update hwcap or hwprobe for riscv - set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c) + set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS}) set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") else() diff --git a/arch/riscv/slide_hash_rvv.c b/arch/riscv/slide_hash_rvv.c new file mode 100644 index 000000000..1164e89ba --- /dev/null +++ b/arch/riscv/slide_hash_rvv.c @@ -0,0 +1,34 @@ +/* slide_hash_rvv.c - RVV version of slide_hash + * Copyright (C) 2023 SiFive, Inc. All rights reserved. + * Contributed by Alex Chiang + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef RISCV_RVV + +#include + +#include "../../zbuild.h" +#include "../../deflate.h" + +static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { + size_t vl; + while (entries > 0) { + vl = __riscv_vsetvl_e16m4(entries); + vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl); + vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl); + vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl); + v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl); + __riscv_vse16_v_u16m4(table, v_tab, vl); + table += vl, entries -= vl; + } +} + +Z_INTERNAL void slide_hash_rvv(deflate_state *s) { + uint16_t wsize = (uint16_t)s->w_size; + + slide_hash_chain(s->head, HASH_SIZE, wsize); + slide_hash_chain(s->prev, wsize, wsize); +} + +#endif // RISCV_RVV diff --git a/cpu_features.h b/cpu_features.h index fb43d90a2..f47ddf0d4 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -267,6 +267,9 @@ extern void slide_hash_vmx(deflate_state *s); #if defined(POWER8_VSX) extern void slide_hash_power8(deflate_state *s); #endif +#if defined(RISCV_RVV) +extern void slide_hash_rvv(deflate_state *s); +#endif #ifdef X86_AVX2 extern void slide_hash_avx2(deflate_state *s); #endif diff --git a/functable.c b/functable.c index 60d4137b3..449edaa0b 100644 --- a/functable.c +++ b/functable.c @@ -208,6 +208,7 @@ static void init_functable(void) { ft.compare256 = &compare256_rvv; ft.longest_match = &longest_match_rvv; ft.longest_match_slow = &longest_match_slow_rvv; + ft.slide_hash = &slide_hash_rvv; } #endif diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc index 238cc1f65..b5ab45616 100644 --- a/test/benchmarks/benchmark_slidehash.cc +++ b/test/benchmarks/benchmark_slidehash.cc @@ -77,7 +77,9 @@ BENCHMARK_SLIDEHASH(power8, slide_hash_power8, test_cpu_features.power.has_arch_ #ifdef PPC_VMX BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec); #endif - +#ifdef RISCV_RVV +BENCHMARK_SLIDEHASH(rvv, slide_hash_rvv, test_cpu_features.riscv.has_rvv); +#endif #ifdef X86_SSE2 BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2); #endif