]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Optimize slide_hash using RVV
authoralexsifivetw <alex.chiang@sifive.com>
Mon, 19 Jun 2023 10:05:11 +0000 (03:05 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 23 Jun 2023 17:44:22 +0000 (19:44 +0200)
CMakeLists.txt
arch/riscv/slide_hash_rvv.c [new file with mode: 0644]
cpu_features.h
functable.c
test/benchmarks/benchmark_slidehash.cc

index ef4f239b6b00dc6e724a3896677dc0b3c3565754..e92d3826f0e4695ff719c5fed64c8ef45e4edc58 100644 (file)
@@ -733,7 +733,7 @@ if(WITH_OPTIM)
                 list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
                 # FIXME: we will not set compile flags for riscv_features.c when 
                 # the kernels update hwcap or hwprobe for riscv
-                set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c)
+                set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
                 list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
                 set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
             else()
diff --git a/arch/riscv/slide_hash_rvv.c b/arch/riscv/slide_hash_rvv.c
new file mode 100644 (file)
index 0000000..1164e89
--- /dev/null
@@ -0,0 +1,34 @@
+/* slide_hash_rvv.c - RVV version of slide_hash
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include <riscv_vector.h>
+
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
+    size_t vl;
+    while (entries > 0) {
+        vl = __riscv_vsetvl_e16m4(entries);
+        vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl);
+        vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl);
+        vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl);
+        v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl);
+        __riscv_vse16_v_u16m4(table, v_tab, vl);
+        table += vl, entries -= vl;
+    }
+}
+
+Z_INTERNAL void slide_hash_rvv(deflate_state *s) {
+    uint16_t wsize = (uint16_t)s->w_size;
+
+    slide_hash_chain(s->head, HASH_SIZE, wsize);
+    slide_hash_chain(s->prev, wsize, wsize);
+}
+
+#endif // RISCV_RVV
index fb43d90a2689708a3b0771b1e3feb8e9cd214289..f47ddf0d485a2d9f7e193155f0f1f92a8738da8e 100644 (file)
@@ -267,6 +267,9 @@ extern void slide_hash_vmx(deflate_state *s);
 #if defined(POWER8_VSX)
 extern void slide_hash_power8(deflate_state *s);
 #endif
+#if defined(RISCV_RVV)
+extern void slide_hash_rvv(deflate_state *s);
+#endif
 #ifdef X86_AVX2
 extern void slide_hash_avx2(deflate_state *s);
 #endif
index 60d4137b37cbe210f138a4125518ce12049a7711..449edaa0b8b6a63c387cb4bbce2c0709f02556d4 100644 (file)
@@ -208,6 +208,7 @@ static void init_functable(void) {
         ft.compare256 = &compare256_rvv;
         ft.longest_match = &longest_match_rvv;
         ft.longest_match_slow = &longest_match_slow_rvv;
+        ft.slide_hash = &slide_hash_rvv;
     }
 #endif
 
index 238cc1f6585bdf7a0b3197058fc96c17ac7e661c..b5ab45616108a0b2b6f628fb7f14676f83d6f44d 100644 (file)
@@ -77,7 +77,9 @@ BENCHMARK_SLIDEHASH(power8, slide_hash_power8, test_cpu_features.power.has_arch_
 #ifdef PPC_VMX
 BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec);
 #endif
-
+#ifdef RISCV_RVV
+BENCHMARK_SLIDEHASH(rvv, slide_hash_rvv, test_cpu_features.riscv.has_rvv);
+#endif
 #ifdef X86_SSE2
 BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
 #endif