add_definitions(-DRISCV_RVV)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
+ # FIXME: we will not set compile flags for riscv_features.c when
+ # the kernels update hwcap or hwprobe for riscv
+ set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/compare256_rvv.c)
+ list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
+ set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
else()
set(WITH_RVV OFF)
endif()
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
* Slide hash implementations using SSE2, AVX2, Neon, VMX & VSX
- * Compare256 implementations using SSE2, AVX2, Neon, & POWER9
+ * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
* Support for hardware-accelerated deflate using IBM Z DFLTCC
* Unaligned memory read/writes and large bit buffer improvements
--- /dev/null
+/* compare256_rvv.c - RVV version of compare256
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include "../../zbuild.h"
+#include "fallback_builtins.h"
+
+#include <riscv_vector.h>
+
+static inline uint32_t compare256_rvv_static(const uint8_t *src0, const uint8_t *src1) {
+ uint32_t len = 0;
+ size_t vl;
+ long found_diff;
+ do {
+ vl = __riscv_vsetvl_e8m4(256 - len);
+ vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl);
+ vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl);
+ vbool2_t v_mask = __riscv_vmsne_vv_u8m4_b2(v_src0, v_src1, vl);
+ found_diff = __riscv_vfirst_m_b2(v_mask, vl);
+ if (found_diff >= 0)
+ return len + (uint32_t)found_diff;
+ src0 += vl, src1 += vl, len += vl;
+ } while (len < 256);
+
+ return 256;
+}
+
+Z_INTERNAL uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1) {
+ return compare256_rvv_static(src0, src1);
+}
+
+#define LONGEST_MATCH longest_match_rvv
+#define COMPARE256 compare256_rvv_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_rvv
+#define COMPARE256 compare256_rvv_static
+
+#include "match_tpl.h"
+
+#endif // RISCV_RVV
#ifdef POWER9
extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
#endif
+#ifdef RISCV_RVV
+extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
+#endif
#ifdef DEFLATE_H_
/* insert_string */
#ifdef POWER9
extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
#endif
+#ifdef RISCV_RVV
+extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
+#endif
/* longest_match_slow */
extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
#ifdef POWER9
extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
#endif
+#ifdef RISCV_RVV
+extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
+#endif
/* quick_insert_string */
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
#endif
+ // RISCV - RVV
+#ifdef RISCV_RVV
+ if (cf.riscv.has_rvv) {
+ ft.compare256 = &compare256_rvv;
+ ft.longest_match = &longest_match_rvv;
+ ft.longest_match_slow = &longest_match_slow_rvv;
+ }
+#endif
+
+
// S390
#ifdef S390_CRC32_VX
if (cf.s390.has_vx)
#ifdef POWER9
BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00);
#endif
+#ifdef RISCV_RVV
+BENCHMARK_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv);
+#endif
#ifdef POWER9
TEST_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00)
#endif
+#ifdef RISCV_RVV
+TEST_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv)
+#endif