]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add LoongArch64 CRC32 implementation
authorVladislav Shchapov <vladislav@shchapov.ru>
Mon, 9 Jun 2025 16:01:07 +0000 (21:01 +0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 11 Jul 2025 14:12:18 +0000 (16:12 +0200)
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
14 files changed:
CMakeLists.txt
arch/loongarch/Makefile.in [new file with mode: 0644]
arch/loongarch/crc32_la.c [new file with mode: 0644]
arch/loongarch/loongarch_features.c [new file with mode: 0644]
arch/loongarch/loongarch_features.h [new file with mode: 0644]
arch/loongarch/loongarch_functions.h [new file with mode: 0644]
arch_functions.h
cmake/detect-intrinsics.cmake
configure
cpu_features.c
cpu_features.h
functable.c
test/benchmarks/benchmark_crc32.cc
test/test_crc32.cc

index c608f9351697f6e8ed583401f1b791b23cd6c984..3af5b154be29d0f47fa29f8a46d6f91c970bbb85 100644 (file)
@@ -129,6 +129,8 @@ elseif(BASEARCH_S360_FOUND)
     option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF)
     option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF)
     option(WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z" ON)
+elseif(BASEARCH_LOONGARCH_FOUND)
+    option(WITH_CRC32_LA "Build with vectorized CRC32 on LoongArch" ON)
 elseif(BASEARCH_X86_FOUND)
     option(WITH_SSE2 "Build with SSE2" ON)
     cmake_dependent_option(WITH_SSSE3 "Build with SSSE3" ON "WITH_SSE2" OFF)
@@ -160,6 +162,7 @@ mark_as_advanced(FORCE
     WITH_POWER9
     WITH_RVV
     WITH_RISCV_ZBC
+    WITH_CRC32_LA
     WITH_INFLATE_STRICT
     WITH_INFLATE_ALLOW_INVALID_DIST
     INSTALL_UTILS
@@ -730,6 +733,8 @@ elseif(BASEARCH_RISCV_FOUND)
     set(ARCHDIR "arch/riscv")
 elseif(BASEARCH_S360_FOUND)
     set(ARCHDIR "arch/s390")
+elseif(BASEARCH_LOONGARCH_FOUND)
+    set(ARCHDIR "arch/loongarch")
 elseif(BASEARCH_X86_FOUND)
     set(ARCHDIR "arch/x86")
     if(NOT ${ARCH} MATCHES "x86_64")
@@ -1004,6 +1009,25 @@ if(WITH_OPTIM)
                 set(WITH_CRC32_VX OFF)
             endif()
         endif()
+    elseif(BASEARCH_LOONGARCH_FOUND)
+        add_definitions(-DLOONGARCH_FEATURES)
+        list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/loongarch_functions.h)
+        if(WITH_RUNTIME_CPU_DETECTION)
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/loongarch_features.h)
+            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/loongarch_features.c)
+        endif()
+
+        if(WITH_CRC32_LA)
+            check_la64_crc_intrinsics()
+            if(HAVE_LA64_CRC_INTRIN)
+                add_definitions(-DLOONGARCH_CRC)
+                set(CRC32_LA_SRCS ${ARCHDIR}/crc32_la.c)
+                list(APPEND ZLIB_ARCH_SRCS ${CRC32_LA_SRCS})
+                set_property(SOURCE ${CRC32_LA_SRCS} PROPERTY COMPILE_FLAGS "${NOLTOFLAG}")
+            else()
+                set(WITH_CRC32_LA OFF)
+            endif()
+        endif()
     elseif(BASEARCH_X86_FOUND)
         add_definitions(-DX86_FEATURES)
         list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h)
@@ -1560,6 +1584,8 @@ elseif(BASEARCH_S360_FOUND)
     add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z")
     add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z")
     add_feature_info(WITH_CRC32_VX WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z")
+elseif(BASEARCH_LOONGARCH_FOUND)
+    add_feature_info(WITH_CRC32_LA WITH_CRC32_LA "Build with vectorized CRC32 on LoongArch")
 elseif(BASEARCH_X86_FOUND)
     add_feature_info(WITH_AVX2 WITH_AVX2 "Build with AVX2")
     add_feature_info(WITH_AVX512 WITH_AVX512 "Build with AVX512")
diff --git a/arch/loongarch/Makefile.in b/arch/loongarch/Makefile.in
new file mode 100644 (file)
index 0000000..6ffe50d
--- /dev/null
@@ -0,0 +1,40 @@
+# Makefile for zlib-ng
+# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
+# Copyright (C) 2024 Hans Kristian Rosbach
+# Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
+# For conditions of distribution and use, see copyright notice in zlib.h
+
+CC=
+CFLAGS=
+SFLAGS=
+INCLUDES=
+SUFFIX=
+
+SRCDIR=.
+SRCTOP=../..
+TOPDIR=$(SRCTOP)
+
+all: \
+       loongarch_features.o loongarch_features.lo \
+    crc32_la.o crc32_la.lo
+
+loongarch_features.o: $(SRCDIR)/loongarch_features.c
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/loongarch_features.c
+
+loongarch_features.lo: $(SRCDIR)/loongarch_features.c
+       $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/loongarch_features.c
+
+crc32_la.o: $(SRCDIR)/crc32_la.c
+       $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_la.c
+
+crc32_la.lo: $(SRCDIR)/crc32_la.c
+       $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_la.c
+
+mostlyclean: clean
+clean:
+       rm -f *.o *.lo *~
+       rm -rf objs
+       rm -f *.gcda *.gcno *.gcov
+
+distclean: clean
+       rm -f Makefile
diff --git a/arch/loongarch/crc32_la.c b/arch/loongarch/crc32_la.c
new file mode 100644 (file)
index 0000000..05c7866
--- /dev/null
@@ -0,0 +1,37 @@
+/* crc32_la.c - LoongArch version of crc32
+ * Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#if defined(LOONGARCH_CRC)
+#include "zbuild.h"
+#include "zmemory.h"
+#include <stdint.h>
+
+#include <larchintrin.h>
+
+Z_INTERNAL uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf,
+                                      size_t len) {
+    crc = (~crc) & 0xffffffff;
+
+    while (len >= 8) {
+        crc = (uint32_t)__crc_w_d_w((long int)zng_memread_8(buf), (int)crc);
+        buf += 8;
+        len -= 8;
+    }
+    if (len & 4) {
+        crc = (uint32_t)__crc_w_w_w((int)zng_memread_4(buf), (int)crc);
+        buf += 4;
+    }
+    if (len & 2) {
+        crc = (uint32_t)__crc_w_h_w((short)zng_memread_2(buf), (int)crc);
+        buf += 2;
+    }
+    if (len & 1) {
+        crc = (uint32_t)__crc_w_b_w((char)(*buf), (int)crc);
+    }
+
+    return crc ^ 0xffffffff;
+}
+
+#endif
diff --git a/arch/loongarch/loongarch_features.c b/arch/loongarch/loongarch_features.c
new file mode 100644 (file)
index 0000000..a71dff6
--- /dev/null
@@ -0,0 +1,23 @@
+/* loongarch_features.c -- check for LoongArch features.
+ *
+ * Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+#include "loongarch_features.h"
+
+#include <larchintrin.h>
+
+/*
+ * https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
+ *
+ * Word number Bit number  Annotation  Implication
+ * 0x1         25          CRC         1 indicates support for CRC instruction
+ */
+
+void Z_INTERNAL loongarch_check_features(struct loongarch_cpu_features *features) {
+    unsigned int w1 = __cpucfg(0x1);
+    features->has_crc = w1 & 0x2000000;
+}
diff --git a/arch/loongarch/loongarch_features.h b/arch/loongarch/loongarch_features.h
new file mode 100644 (file)
index 0000000..2b1c1e2
--- /dev/null
@@ -0,0 +1,17 @@
+/* loongarch_features.h -- check for LoongArch features.
+ *
+ * Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef LOONGARCH_FEATURES_H_
+#define LOONGARCH_FEATURES_H_
+
+struct loongarch_cpu_features {
+    int has_crc;
+};
+
+void Z_INTERNAL loongarch_check_features(struct loongarch_cpu_features *features);
+
+#endif /* LOONGARCH_FEATURES_H_ */
diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h
new file mode 100644 (file)
index 0000000..23d66a0
--- /dev/null
@@ -0,0 +1,23 @@
+/* loongarch_functions.h -- LoongArch implementations for arch-specific functions.
+ *
+ * Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef LOONGARCH_FUNCTIONS_H_
+#define LOONGARCH_FUNCTIONS_H_
+
+#ifdef LOONGARCH_CRC
+uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf, size_t len);
+#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// LOONGARCH - CRC32 - All known CPUs has crc instructions
+#  if defined(LOONGARCH_CRC)
+#    undef native_crc32
+#    define native_crc32 crc32_loongarch64
+#  endif
+#endif
+
+#endif /* LOONGARCH_FUNCTIONS_H_ */
index 9a7f8d9379fb0688859e1e844922adfc3759ce34..a53b2f7b4374f1c8b6dccb3560ab0af277fe0e03 100644 (file)
@@ -24,6 +24,8 @@
 #  include "arch/s390/s390_functions.h"
 #elif defined(RISCV_FEATURES)
 #  include "arch/riscv/riscv_functions.h"
+#elif defined(LOONGARCH_FEATURES)
+#  include "arch/loongarch/loongarch_functions.h"
 #endif
 
 #endif
index bf8bea7e9ce094612545e25ba6e385922a8dd355..d71b5416d3c12d5b181130343ba693eecc936abd 100644 (file)
@@ -676,3 +676,17 @@ macro(check_xsave_intrinsics)
         HAVE_XSAVE_INTRIN FAIL_REGEX "not supported")
     set(CMAKE_REQUIRED_FLAGS)
 endmacro()
+
+macro(check_la64_crc_intrinsics)
+    # Check whether compiler supports "crc" intrinsic
+    set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} ${ZNOLTOFLAG}")
+    check_c_source_compiles(
+        "#include <larchintrin.h>
+        int main(void) {
+            char ch = 'a';
+            int crc = __crc_w_b_w(ch, 0);
+            return crc;
+        }"
+        HAVE_LA64_CRC_INTRIN)
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
index 4e7a12e88649bf398bedf9ba7d49260b40a08fa5..ba92eff685f21a847e898c9be215cebb09812803 100755 (executable)
--- a/configure
+++ b/configure
@@ -104,6 +104,7 @@ buildzbc=1
 builddfltccdeflate=0
 builddfltccinflate=0
 buildcrc32vx=1
+buildcrc32la=1
 floatabi=
 forcesse2=0
 # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
@@ -224,6 +225,7 @@ case "$1" in
     --with-dfltcc-deflate) builddfltccdeflate=1; shift ;;
     --with-dfltcc-inflate) builddfltccinflate=1; shift ;;
     --without-crc32-vx) buildcrc32vx=0; shift ;;
+    --without-crc32-la) buildcrc32la=0; shift ;;
     --with-reduced-mem) reducedmem=1; shift ;;
     --force-sse2) forcesse2=1; shift ;;
     -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;
@@ -358,6 +360,8 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
       ARCH=powerpc64le ;;
     riscv64)
       ARCH=riscv64 ;;
+    loongarch64)
+      ARCH=loongarch64 ;;
   esac
   CFLAGS="-O2 ${CFLAGS}"
   if test -n "${ARCHS}"; then
@@ -1708,6 +1712,27 @@ EOF
     fi
 }
 
+check_la64_crc_intrinsics() {
+    # Check whether "crc" intrinsic is available
+
+    cat > $test.c << EOF
+#include <larchintrin.h>
+int main(void) {
+    char ch = 'a';
+    int crc = __crc_w_b_w(ch, 0);
+    return crc;
+}
+EOF
+    printf "Checking for CRC support... " | tee -a configure.log
+    if try $CC -c $CFLAGS $test.c; then
+        HAVE_LA64_CRC_INTRIN=1
+        echo "Yes." | tee -a configure.log
+    else
+        HAVE_LA64_CRC_INTRIN=0
+        echo "No." | tee -a configure.log
+    fi
+}
+
 check_rvv_compiler_flag() {
     cat > $test.c << EOF
 int main() { return 0; }
@@ -2226,6 +2251,30 @@ EOF
             fi
         fi
     ;;
+    # loongarch specific optimizations
+    loongarch64)
+        ARCHDIR=arch/loongarch
+
+        # Enable arch-specific optimizations
+        if test $without_optimizations -eq 0; then
+            CFLAGS="${CFLAGS} -DLOONGARCH_FEATURES"
+            SFLAGS="${SFLAGS} -DLOONGARCH_FEATURES"
+
+            ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} loongarch_features.o"
+            ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} loongarch_features.lo"
+
+            if test $buildcrc32la -eq 1; then
+                check_la64_crc_intrinsics
+                if test $HAVE_LA64_CRC_INTRIN -eq 1; then
+                    CFLAGS="${CFLAGS} -DLOONGARCH_CRC"
+                    SFLAGS="${SFLAGS} -DLOONGARCH_CRC"
+                    ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_la.o"
+                    ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_la.lo"
+                fi
+            fi
+        fi
+    ;;
+
     *)
         [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=$ARCH
     ;;
index 3585172e5d2006abe8dea8abc9091f0e5b8a3141..fde6519290b50a8fc49cee4f3d9768cdf9479f2b 100644 (file)
@@ -19,5 +19,7 @@ Z_INTERNAL void cpu_check_features(struct cpu_features *features) {
     s390_check_features(&features->s390);
 #elif defined(RISCV_FEATURES)
     riscv_check_features(&features->riscv);
+#elif defined(LOONGARCH_FEATURES)
+    loongarch_check_features(&features->loongarch);
 #endif
 }
index 8708724bc0d53ee454fd265eb58aa56aa97daa60..2b6cc4e843cf4773ce417bfb41ffa36b45e4338c 100644 (file)
@@ -18,6 +18,8 @@
 #  include "arch/s390/s390_features.h"
 #elif defined(RISCV_FEATURES)
 #  include "arch/riscv/riscv_features.h"
+#elif defined(LOONGARCH_FEATURES)
+#  include "arch/loongarch/loongarch_features.h"
 #endif
 
 struct cpu_features {
@@ -31,6 +33,8 @@ struct cpu_features {
     struct s390_cpu_features s390;
 #elif defined(RISCV_FEATURES)
     struct riscv_cpu_features riscv;
+#elif defined(LOONGARCH_FEATURES)
+    struct loongarch_cpu_features loongarch;
 #else
     char empty;
 #endif
index ef1fc31dc1d5d9402e85d3f2b0a70ba8ac175db5..ac25c9151c72300f979d66174cd197fe70202d57 100644 (file)
@@ -269,6 +269,13 @@ static void init_functable(void) {
         ft.crc32 = crc32_s390_vx;
 #endif
 
+    // LOONGARCH
+#ifdef LOONGARCH_CRC
+    if (cf.loongarch.has_crc) {
+        ft.crc32 = crc32_loongarch64;
+    }
+#endif
+
     // Assign function pointers individually for atomic operation
     FUNCTABLE_ASSIGN(ft, force_init);
     FUNCTABLE_ASSIGN(ft, adler32);
index 23a1dc196f6e0bde5104d47b1d9b8eb3fd35723a..1e95b27770522ceb9a23bea590d4d46701e52c0d 100644 (file)
@@ -97,5 +97,8 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
 /* CRC32 fold does a memory copy while hashing */
 BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq));
 #endif
+#ifdef LOONGARCH_CRC
+BENCHMARK_CRC32(loongarch64, crc32_loongarch64, test_cpu_features.loongarch.has_crc);
+#endif
 
 #endif
index 3e3bc8ea5d6216b9eecc8bdee96de69ae72a28e1..b05f220da41015eb020d303696c7f566eff5ab7c 100644 (file)
@@ -321,5 +321,8 @@ TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
 #if !defined(WITHOUT_CHORBA) && defined(X86_SSE41) && !defined(NO_CHORBA_SSE)
 TEST_CRC32(chorba_sse41, crc32_chorba_sse41, test_cpu_features.x86.has_sse41)
 #endif
+#if defined(LOONGARCH_CRC)
+TEST_CRC32(loongarch64, crc32_loongarch64, test_cpu_features.loongarch.has_crc)
+#endif
 
 #endif