From: alexsifivetw <alex.chiang@sifive.com>
Date: Mon, 19 Jun 2023 10:05:11 +0000 (-0700)
Subject: Optimize slide_hash using RVV
X-Git-Tag: 2.1.3~4
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2f4ebe2bb68380366b90f1db1f3c5b32601130a0;p=thirdparty%2Fzlib-ng.git

Optimize slide_hash using RVV
---

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef4f239b6..e92d3826f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -733,7 +733,7 @@ if(WITH_OPTIM)
                 list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
                 # FIXME: we will not set compile flags for riscv_features.c when 
                 # the kernels update hwcap or hwprobe for riscv
-                set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c)
+                set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
                 list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
                 set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
             else()
diff --git a/arch/riscv/slide_hash_rvv.c b/arch/riscv/slide_hash_rvv.c
new file mode 100644
index 000000000..1164e89ba
--- /dev/null
+++ b/arch/riscv/slide_hash_rvv.c
@@ -0,0 +1,34 @@
+/* slide_hash_rvv.c - RVV version of slide_hash
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include <riscv_vector.h>
+
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
+    size_t vl;
+    while (entries > 0) {
+        vl = __riscv_vsetvl_e16m4(entries);
+        vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl);
+        vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl);
+        vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl);
+        v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl);
+        __riscv_vse16_v_u16m4(table, v_tab, vl);
+        table += vl, entries -= vl;
+    }
+}
+
+Z_INTERNAL void slide_hash_rvv(deflate_state *s) {
+    uint16_t wsize = (uint16_t)s->w_size;
+
+    slide_hash_chain(s->head, HASH_SIZE, wsize);
+    slide_hash_chain(s->prev, wsize, wsize);
+}
+
+#endif // RISCV_RVV
diff --git a/cpu_features.h b/cpu_features.h
index fb43d90a2..f47ddf0d4 100644
--- a/cpu_features.h
+++ b/cpu_features.h
@@ -267,6 +267,9 @@ extern void slide_hash_vmx(deflate_state *s);
 #if defined(POWER8_VSX)
 extern void slide_hash_power8(deflate_state *s);
 #endif
+#if defined(RISCV_RVV)
+extern void slide_hash_rvv(deflate_state *s);
+#endif
 #ifdef X86_AVX2
 extern void slide_hash_avx2(deflate_state *s);
 #endif
diff --git a/functable.c b/functable.c
index 60d4137b3..449edaa0b 100644
--- a/functable.c
+++ b/functable.c
@@ -208,6 +208,7 @@ static void init_functable(void) {
         ft.compare256 = &compare256_rvv;
         ft.longest_match = &longest_match_rvv;
         ft.longest_match_slow = &longest_match_slow_rvv;
+        ft.slide_hash = &slide_hash_rvv;
     }
 #endif
 
diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc
index 238cc1f65..b5ab45616 100644
--- a/test/benchmarks/benchmark_slidehash.cc
+++ b/test/benchmarks/benchmark_slidehash.cc
@@ -77,7 +77,9 @@ BENCHMARK_SLIDEHASH(power8, slide_hash_power8, test_cpu_features.power.has_arch_
 #ifdef PPC_VMX
 BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec);
 #endif
-
+#ifdef RISCV_RVV
+BENCHMARK_SLIDEHASH(rvv, slide_hash_rvv, test_cpu_features.riscv.has_rvv);
+#endif
 #ifdef X86_SSE2
 BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
 #endif