]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
riscv: add crc32 optimization using zbc extension
authoryintong <yintong.ustc@bytedance.com>
Tue, 22 Apr 2025 02:58:52 +0000 (10:58 +0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sun, 27 Apr 2025 16:23:50 +0000 (18:23 +0200)
CMakeLists.txt
arch/riscv/crc32_zbc.c [new file with mode: 0644]
arch/riscv/riscv_features.c
arch/riscv/riscv_features.h
arch/riscv/riscv_functions.h
cmake/detect-intrinsics.cmake
functable.c
test/benchmarks/benchmark_crc32.cc
test/test_crc32.cc

index 2324ecdad9f6c6bdf057d38d63431afa8bf58314..7afa5319b4bc4a28f54dd464a7088a309d359c2d 100644 (file)
@@ -124,6 +124,7 @@ elseif(BASEARCH_PPC_FOUND)
     option(WITH_POWER9 "Build with optimisations for POWER9" ON)
 elseif(BASEARCH_RISCV_FOUND)
     option(WITH_RVV "Build with RVV intrinsics" ON)
+    option(WITH_RISCV_ZBC "Build with RISCV ZBC" ON)
 elseif(BASEARCH_S360_FOUND)
     option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF)
     option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF)
@@ -158,6 +159,7 @@ mark_as_advanced(FORCE
     WITH_POWER8
     WITH_POWER9
     WITH_RVV
+    WITH_RISCV_ZBC
     WITH_INFLATE_STRICT
     WITH_INFLATE_ALLOW_INVALID_DIST
     INSTALL_UTILS
@@ -945,15 +947,34 @@ if(WITH_OPTIM)
                 # FIXME: we will not set compile flags for riscv_features.c when
                 # the kernels update hwcap or hwprobe for riscv
                 set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
-                if(WITH_RUNTIME_CPU_DETECTION)
-                    list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c)
-                endif()
                 list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
                 set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
             else()
                 set(WITH_RVV OFF)
             endif()
         endif()
+        if(WITH_RISCV_ZBC)
+            check_riscv_zbc_ext()
+            if(HAVE_RISCV_ZBC)
+                add_definitions(-DRISCV_CRC32_ZBC)
+                set(ZBC_SRCS ${ARCHDIR}/crc32_zbc.c)
+                list(APPEND ZLIB_ARCH_SRCS ${ZBC_SRCS})
+                set_property(SOURCE ${ZBC_SRCS} PROPERTY COMPILE_FLAGS "-march=rv64gc_zbc ${NOLTOFLAG}")
+                add_feature_info(RISCV_ZBC 1 "Support RISC-V Zbc extension for CRC32")
+            else()
+                set(WITH_RISCV_ZBC OFF)
+            endif()
+        endif()
+
+        if(WITH_RUNTIME_CPU_DETECTION AND BASEARCH_RISCV_FOUND)
+            if(WITH_RVV AND WITH_RISCV_ZBC AND HAVE_RVV_INTRIN AND HAVE_RISCV_ZBC)
+                set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVFLAG}_zbc ${NOLTOFLAG}")
+            elseif(WITH_RVV AND HAVE_RVV_INTRIN)
+                set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
+            elseif(WITH_RISCV_ZBC AND HAVE_RISCV_ZBC)
+                set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVZBCFLAG} ${NOLTOFLAG}")
+            endif()
+        endif()
     elseif(BASEARCH_S360_FOUND)
         check_s390_intrinsics()
         if(HAVE_S390_INTRIN)
@@ -1528,6 +1549,7 @@ elseif(BASEARCH_PPC_FOUND)
     add_feature_info(WITH_POWER9 WITH_POWER9 "Build with optimisations for POWER9")
 elseif(BASEARCH_RISCV_FOUND)
     add_feature_info(WITH_RVV WITH_RVV "Build with RVV intrinsics")
+    add_feature_info(WITH_RISCV_ZBC WITH_RISCV_ZBC "Build with RISCV ZBC")
 elseif(BASEARCH_S360_FOUND)
     add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z")
     add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z")
diff --git a/arch/riscv/crc32_zbc.c b/arch/riscv/crc32_zbc.c
new file mode 100644 (file)
index 0000000..d5dc71c
--- /dev/null
@@ -0,0 +1,101 @@
+/* crc32_zbc.c - RISCV Zbc version of crc32
+ * Copyright (C) 2025 ByteDance. All rights reserved.
+ * Contributed by Yin Tong <yintong.ustc@bytedance.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#if defined(RISCV_CRC32_ZBC)
+#include "zbuild.h"
+#include <stdint.h>
+
+#define CLMUL_MIN_LEN 16   // Minimum size of buffer for _crc32_clmul
+#define CLMUL_CHUNK_LEN 16 // Length of chunk for clmul
+
+extern uint32_t crc32_c(uint32_t crc, const uint8_t *buf, size_t len);
+
+#define CONSTANT_R3 0x1751997d0ULL
+#define CONSTANT_R4 0x0ccaa009eULL
+#define CONSTANT_R5 0x163cd6124ULL
+#define MASK32 0xFFFFFFFF
+#define CRCPOLY_TRUE_LE_FULL 0x1DB710641ULL
+#define CONSTANT_RU 0x1F7011641ULL
+
+static inline uint64_t clmul(uint64_t a, uint64_t b) {
+  uint64_t res;
+  __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+  return res;
+}
+
+static inline uint64_t clmulh(uint64_t a, uint64_t b) {
+  uint64_t res;
+  __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+  return res;
+}
+
+static inline uint32_t crc32_clmul_impl(uint64_t crc, const unsigned char *buf,
+                                        uint64_t len) {
+  const uint64_t *buf64 = (const uint64_t *)buf;
+  uint64_t low = buf64[0] ^ crc;
+  uint64_t high = buf64[1];
+
+  if (len < 16)
+    goto finish_fold;
+  len -= 16;
+  buf64 += 2;
+
+  // process each 16-byte block
+  while (len >= 16) {
+    uint64_t t2 = clmul(CONSTANT_R4, high);
+    uint64_t t3 = clmulh(CONSTANT_R4, high);
+
+    uint64_t t0_new = clmul(CONSTANT_R3, low);
+    uint64_t t1_new = clmulh(CONSTANT_R3, low);
+
+    // Combine the results and XOR with new data
+    low = t0_new ^ t2;
+    high = t1_new ^ t3;
+    low ^= buf64[0];
+    high ^= buf64[1];
+
+    buf64 += 2;
+    len -= 16;
+  }
+
+finish_fold:
+  // Fold the 128-bit result into 64 bits
+  uint64_t fold_t3 = clmulh(low, CONSTANT_R4);
+  uint64_t fold_t2 = clmul(low, CONSTANT_R4);
+  low = high ^ fold_t2;
+  high = fold_t3;
+
+  // Combine the low and high parts and perform polynomial reduction
+  uint64_t combined = (low >> 32) | ((high & MASK32) << 32);
+  uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined;
+
+  // Barrett reduction step
+  uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32;
+  barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL);
+  uint64_t final = barrett ^ reduced_low;
+
+  // Return the high 32 bits as the final CRC
+  return (uint32_t)(final >> 32);
+}
+
+Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf,
+                                      size_t len) {
+  if (len < CLMUL_MIN_LEN) {
+    return crc32_c(crc, buf, len);
+  }
+
+  uint64_t unaligned_length = len % CLMUL_CHUNK_LEN;
+  if (unaligned_length) {
+    crc = crc32_c(crc, buf, unaligned_length);
+    buf += unaligned_length;
+    len -= unaligned_length;
+  }
+  crc ^= 0xFFFFFFFF;
+  crc = crc32_clmul_impl(crc, buf, len);
+  return crc ^ 0xFFFFFFFF;
+}
+
+#endif
index f9957d19ccc56b84dea4578847ed03c0198de491..da509a8497472a7be3861a29b82e597b16519f4c 100644 (file)
@@ -11,6 +11,7 @@
 #include "riscv_features.h"
 
 #define ISA_V_HWCAP (1 << ('v' - 'a'))
+#define ISA_ZBC_HWCAP (1 << 29)
 
 int Z_INTERNAL is_kernel_version_greater_or_equal_to_6_5() {
     struct utsname buffer;
@@ -36,6 +37,12 @@ void Z_INTERNAL riscv_check_features_compile_time(struct riscv_cpu_features *fea
 #else
     features->has_rvv = 0;
 #endif
+
+#if defined(__riscv_zbc) && defined(__linux__)
+    features->has_zbc = 1;
+#else
+    features->has_zbc = 0;
+#endif
 }
 
 void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) {
@@ -45,6 +52,7 @@ void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features
     unsigned long hw_cap = 0;
 #endif
     features->has_rvv = hw_cap & ISA_V_HWCAP;
+    features->has_zbc = hw_cap & ISA_ZBC_HWCAP;
 }
 
 void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features) {
index b1593acc256703693120861bf70bdbb9dd9498f0..42855a1b6ba59e2a7821a7cf6bf47fb1c752dddd 100644 (file)
@@ -11,6 +11,7 @@
 
 struct riscv_cpu_features {
     int has_rvv;
+    int has_zbc;
 };
 
 void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features);
index 1792b9d259d66a10445c65d2203144bf40512bc8..86b68a6df5743c9ee976a4a22f9f7ea304e9bbe6 100644 (file)
@@ -22,6 +22,10 @@ void slide_hash_rvv(deflate_state *s);
 void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
+#ifdef RISCV_CRC32_ZBC
+uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len);
+#endif
+
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // RISCV - RVV
 #  if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
@@ -44,6 +48,12 @@ void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
 #    undef native_slide_hash
 #    define native_slide_hash slide_hash_rvv
 #  endif
+
+// RISCV - CRC32
+#  if (defined(RISCV_CRC32_ZBC) && defined (__riscv_zbc))
+#    undef native_crc32
+#    define native_crc32 crc32_riscv64_zbc
+#  endif
 #endif
 
 #endif /* RISCV_FUNCTIONS_H_ */
index 66872766d92a6289a95648e0d225ef566ea949ae..d9e01f6962609bc8849b8dab2fdea8464e69cea3 100644 (file)
@@ -458,6 +458,28 @@ macro(check_rvv_intrinsics)
     set(CMAKE_REQUIRED_FLAGS)
 endmacro()
 
+macro(check_riscv_zbc_ext)
+    if(NOT NATIVEFLAG)
+        if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+            set(RISCVZBCFLAG "-march=rv64gc_zbc")
+        endif()
+    endif()
+    # Check whether compiler supports RISC-V Zbc inline asm
+    # gcc-11 / clang-14 at least
+    set(CMAKE_REQUIRED_FLAGS "${RISCVZBCFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+    check_c_source_compiles(
+        "#include <stdint.h>
+        uint64_t f(uint64_t a, uint64_t b) {
+            uint64_t c;
+            __asm__ __volatile__ (\"clmul %[result], %[input_a], %[input_b]\" : [result] \"=r\" (c) : [input_a] \"r\" (a), [input_b] \"r\" (b));
+            return c;
+        }
+        int main(void) { return f(1, 2); }"
+        HAVE_RISCV_ZBC
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
 macro(check_s390_intrinsics)
     check_c_source_compiles(
         "#include <sys/auxv.h>
index 1d38637fc90a0f2d57ca33b460eadf6ee1fa37bb..ef1fc31dc1d5d9402e85d3f2b0a70ba8ac175db5 100644 (file)
@@ -256,6 +256,12 @@ static void init_functable(void) {
     }
 #endif
 
+    // RISCV - ZBC
+#ifdef RISCV_CRC32_ZBC
+    if (cf.riscv.has_zbc) {
+        ft.crc32 = &crc32_riscv64_zbc;
+    }
+#endif
 
     // S390
 #ifdef S390_CRC32_VX
index e6947715ff6aa7b0c5d1284c32ed571776487ac4..23a1dc196f6e0bde5104d47b1d9b8eb3fd35723a 100644 (file)
@@ -80,6 +80,9 @@ BENCHMARK_CRC32(native, native_crc32, 1);
 #ifdef ARM_CRC32
 BENCHMARK_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32);
 #endif
+#ifdef RISCV_CRC32_ZBC
+BENCHMARK_CRC32(riscv, crc32_riscv64_zbc, test_cpu_features.riscv.has_zbc);
+#endif
 #ifdef POWER8_VSX_CRC32
 BENCHMARK_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07);
 #endif
index 56667f0283298869a9f2a418497e0a351ddd5017..2f768d0c4e31d274fd381d1176c9a2866409d1e1 100644 (file)
@@ -270,6 +270,9 @@ INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_o
 TEST_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32)
 TEST_CRC32_ALIGN(armv8_align, crc32_armv8, test_cpu_features.arm.has_crc32)
 #endif
+#ifdef RISCV_CRC32_ZBC
+TEST_CRC32(riscv, crc32_riscv64_zbc, test_cpu_features.riscv.has_zbc)
+#endif
 #ifdef POWER8_VSX_CRC32
 TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)
 #endif