option(WITH_POWER9 "Build with optimisations for POWER9" ON)
elseif(BASEARCH_RISCV_FOUND)
option(WITH_RVV "Build with RVV intrinsics" ON)
+ option(WITH_RISCV_ZBC "Build with RISCV ZBC" ON)
elseif(BASEARCH_S360_FOUND)
option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF)
option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF)
WITH_POWER8
WITH_POWER9
WITH_RVV
+ WITH_RISCV_ZBC
WITH_INFLATE_STRICT
WITH_INFLATE_ALLOW_INVALID_DIST
INSTALL_UTILS
# FIXME: we will not set compile flags for riscv_features.c when
# the kernels update hwcap or hwprobe for riscv
set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
- if(WITH_RUNTIME_CPU_DETECTION)
- list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c)
- endif()
list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
else()
set(WITH_RVV OFF)
endif()
endif()
+ if(WITH_RISCV_ZBC)
+ check_riscv_zbc_ext()
+ if(HAVE_RISCV_ZBC)
+ add_definitions(-DRISCV_CRC32_ZBC)
+ set(ZBC_SRCS ${ARCHDIR}/crc32_zbc.c)
+ list(APPEND ZLIB_ARCH_SRCS ${ZBC_SRCS})
+ set_property(SOURCE ${ZBC_SRCS} PROPERTY COMPILE_FLAGS "-march=rv64gc_zbc ${NOLTOFLAG}")
+ add_feature_info(RISCV_ZBC 1 "Support RISC-V Zbc extension for CRC32")
+ else()
+ set(WITH_RISCV_ZBC OFF)
+ endif()
+ endif()
+
+ if(WITH_RUNTIME_CPU_DETECTION AND BASEARCH_RISCV_FOUND)
+ if(WITH_RVV AND WITH_RISCV_ZBC AND HAVE_RVV_INTRIN AND HAVE_RISCV_ZBC)
+ set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVFLAG}_zbc ${NOLTOFLAG}")
+ elseif(WITH_RVV AND HAVE_RVV_INTRIN)
+ set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
+ elseif(WITH_RISCV_ZBC AND HAVE_RISCV_ZBC)
+ set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVZBCFLAG} ${NOLTOFLAG}")
+ endif()
+ endif()
elseif(BASEARCH_S360_FOUND)
check_s390_intrinsics()
if(HAVE_S390_INTRIN)
add_feature_info(WITH_POWER9 WITH_POWER9 "Build with optimisations for POWER9")
elseif(BASEARCH_RISCV_FOUND)
add_feature_info(WITH_RVV WITH_RVV "Build with RVV intrinsics")
+ add_feature_info(WITH_RISCV_ZBC WITH_RISCV_ZBC "Build with RISCV ZBC")
elseif(BASEARCH_S360_FOUND)
add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z")
add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z")
--- /dev/null
+/* crc32_zbc.c - RISCV Zbc version of crc32
+ * Copyright (C) 2025 ByteDance. All rights reserved.
+ * Contributed by Yin Tong <yintong.ustc@bytedance.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#if defined(RISCV_CRC32_ZBC)
+#include "zbuild.h"
+#include <stdint.h>
+
+#define CLMUL_MIN_LEN 16 // Minimum size of buffer for _crc32_clmul
+#define CLMUL_CHUNK_LEN 16 // Length of chunk for clmul
+
+extern uint32_t crc32_c(uint32_t crc, const uint8_t *buf, size_t len);
+
+#define CONSTANT_R3 0x1751997d0ULL
+#define CONSTANT_R4 0x0ccaa009eULL
+#define CONSTANT_R5 0x163cd6124ULL
+#define MASK32 0xFFFFFFFF
+#define CRCPOLY_TRUE_LE_FULL 0x1DB710641ULL
+#define CONSTANT_RU 0x1F7011641ULL
+
+static inline uint64_t clmul(uint64_t a, uint64_t b) {
+ uint64_t res;
+ __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+ return res;
+}
+
+static inline uint64_t clmulh(uint64_t a, uint64_t b) {
+ uint64_t res;
+ __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+ return res;
+}
+
+static inline uint32_t crc32_clmul_impl(uint64_t crc, const unsigned char *buf,
+ uint64_t len) {
+ const uint64_t *buf64 = (const uint64_t *)buf;
+ uint64_t low = buf64[0] ^ crc;
+ uint64_t high = buf64[1];
+
+ if (len < 16)
+ goto finish_fold;
+ len -= 16;
+ buf64 += 2;
+
+ // process each 16-byte block
+ while (len >= 16) {
+ uint64_t t2 = clmul(CONSTANT_R4, high);
+ uint64_t t3 = clmulh(CONSTANT_R4, high);
+
+ uint64_t t0_new = clmul(CONSTANT_R3, low);
+ uint64_t t1_new = clmulh(CONSTANT_R3, low);
+
+ // Combine the results and XOR with new data
+ low = t0_new ^ t2;
+ high = t1_new ^ t3;
+ low ^= buf64[0];
+ high ^= buf64[1];
+
+ buf64 += 2;
+ len -= 16;
+ }
+
+finish_fold:
+ // Fold the 128-bit result into 64 bits
+ uint64_t fold_t3 = clmulh(low, CONSTANT_R4);
+ uint64_t fold_t2 = clmul(low, CONSTANT_R4);
+ low = high ^ fold_t2;
+ high = fold_t3;
+
+ // Combine the low and high parts and perform polynomial reduction
+ uint64_t combined = (low >> 32) | ((high & MASK32) << 32);
+ uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined;
+
+ // Barrett reduction step
+ uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32;
+ barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL);
+ uint64_t final = barrett ^ reduced_low;
+
+ // Return the high 32 bits as the final CRC
+ return (uint32_t)(final >> 32);
+}
+
+Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ if (len < CLMUL_MIN_LEN) {
+ return crc32_c(crc, buf, len);
+ }
+
+ uint64_t unaligned_length = len % CLMUL_CHUNK_LEN;
+ if (unaligned_length) {
+ crc = crc32_c(crc, buf, unaligned_length);
+ buf += unaligned_length;
+ len -= unaligned_length;
+ }
+ crc ^= 0xFFFFFFFF;
+ crc = crc32_clmul_impl(crc, buf, len);
+ return crc ^ 0xFFFFFFFF;
+}
+
+#endif
#include "riscv_features.h"
#define ISA_V_HWCAP (1 << ('v' - 'a'))
+#define ISA_ZBC_HWCAP (1 << 29)
int Z_INTERNAL is_kernel_version_greater_or_equal_to_6_5() {
struct utsname buffer;
#else
features->has_rvv = 0;
#endif
+
+#if defined(__riscv_zbc) && defined(__linux__)
+ features->has_zbc = 1;
+#else
+ features->has_zbc = 0;
+#endif
}
void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) {
unsigned long hw_cap = 0;
#endif
features->has_rvv = hw_cap & ISA_V_HWCAP;
+ features->has_zbc = hw_cap & ISA_ZBC_HWCAP;
}
void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features) {
struct riscv_cpu_features {
int has_rvv;
+ int has_zbc;
};
void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features);
void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
#endif
+#ifdef RISCV_CRC32_ZBC
+uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len);
+#endif
+
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// RISCV - RVV
# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
# undef native_slide_hash
# define native_slide_hash slide_hash_rvv
# endif
+
+// RISCV - CRC32
+# if (defined(RISCV_CRC32_ZBC) && defined (__riscv_zbc))
+# undef native_crc32
+# define native_crc32 crc32_riscv64_zbc
+# endif
#endif
#endif /* RISCV_FUNCTIONS_H_ */
set(CMAKE_REQUIRED_FLAGS)
endmacro()
+macro(check_riscv_zbc_ext)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(RISCVZBCFLAG "-march=rv64gc_zbc")
+ endif()
+ endif()
+ # Check whether compiler supports RISC-V Zbc inline asm
+ # gcc-11 / clang-14 at least
+ set(CMAKE_REQUIRED_FLAGS "${RISCVZBCFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <stdint.h>
+ uint64_t f(uint64_t a, uint64_t b) {
+ uint64_t c;
+ __asm__ __volatile__ (\"clmul %[result], %[input_a], %[input_b]\" : [result] \"=r\" (c) : [input_a] \"r\" (a), [input_b] \"r\" (b));
+ return c;
+ }
+ int main(void) { return f(1, 2); }"
+ HAVE_RISCV_ZBC
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
macro(check_s390_intrinsics)
check_c_source_compiles(
"#include <sys/auxv.h>
}
#endif
+ // RISCV - ZBC
+#ifdef RISCV_CRC32_ZBC
+ if (cf.riscv.has_zbc) {
+ ft.crc32 = &crc32_riscv64_zbc;
+ }
+#endif
// S390
#ifdef S390_CRC32_VX
#ifdef ARM_CRC32
BENCHMARK_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32);
#endif
+#ifdef RISCV_CRC32_ZBC
+BENCHMARK_CRC32(riscv, crc32_riscv64_zbc, test_cpu_features.riscv.has_zbc);
+#endif
#ifdef POWER8_VSX_CRC32
BENCHMARK_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07);
#endif
TEST_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32)
TEST_CRC32_ALIGN(armv8_align, crc32_armv8, test_cpu_features.arm.has_crc32)
#endif
+#ifdef RISCV_CRC32_ZBC
+TEST_CRC32(riscv, crc32_riscv64_zbc, test_cpu_features.riscv.has_zbc)
+#endif
#ifdef POWER8_VSX_CRC32
TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)
#endif