From: Vladislav Shchapov Date: Mon, 9 Jun 2025 16:01:07 +0000 (+0500) Subject: Add LoongArch64 CRC32 implementation X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=b35822076e4224110604f4a19302cf8d0473eaf5;p=thirdparty%2Fzlib-ng.git Add LoongArch64 CRC32 implementation Signed-off-by: Vladislav Shchapov --- diff --git a/CMakeLists.txt b/CMakeLists.txt index c608f935..3af5b154 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,8 @@ elseif(BASEARCH_S360_FOUND) option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) option(WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z" ON) +elseif(BASEARCH_LOONGARCH_FOUND) + option(WITH_CRC32_LA "Build with vectorized CRC32 on LoongArch" ON) elseif(BASEARCH_X86_FOUND) option(WITH_SSE2 "Build with SSE2" ON) cmake_dependent_option(WITH_SSSE3 "Build with SSSE3" ON "WITH_SSE2" OFF) @@ -160,6 +162,7 @@ mark_as_advanced(FORCE WITH_POWER9 WITH_RVV WITH_RISCV_ZBC + WITH_CRC32_LA WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST INSTALL_UTILS @@ -730,6 +733,8 @@ elseif(BASEARCH_RISCV_FOUND) set(ARCHDIR "arch/riscv") elseif(BASEARCH_S360_FOUND) set(ARCHDIR "arch/s390") +elseif(BASEARCH_LOONGARCH_FOUND) + set(ARCHDIR "arch/loongarch") elseif(BASEARCH_X86_FOUND) set(ARCHDIR "arch/x86") if(NOT ${ARCH} MATCHES "x86_64") @@ -1004,6 +1009,25 @@ if(WITH_OPTIM) set(WITH_CRC32_VX OFF) endif() endif() + elseif(BASEARCH_LOONGARCH_FOUND) + add_definitions(-DLOONGARCH_FEATURES) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/loongarch_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/loongarch_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/loongarch_features.c) + endif() + + if(WITH_CRC32_LA) + check_la64_crc_intrinsics() + if(HAVE_LA64_CRC_INTRIN) + add_definitions(-DLOONGARCH_CRC) + set(CRC32_LA_SRCS ${ARCHDIR}/crc32_la.c) + list(APPEND ZLIB_ARCH_SRCS ${CRC32_LA_SRCS}) + set_property(SOURCE ${CRC32_LA_SRCS} PROPERTY COMPILE_FLAGS "${NOLTOFLAG}") + else() + set(WITH_CRC32_LA OFF) + endif() + endif() elseif(BASEARCH_X86_FOUND) add_definitions(-DX86_FEATURES) list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h) @@ -1560,6 +1584,8 @@ elseif(BASEARCH_S360_FOUND) add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z") add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z") add_feature_info(WITH_CRC32_VX WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z") +elseif(BASEARCH_LOONGARCH_FOUND) + add_feature_info(WITH_CRC32_LA WITH_CRC32_LA "Build with vectorized CRC32 on LoongArch") elseif(BASEARCH_X86_FOUND) add_feature_info(WITH_AVX2 WITH_AVX2 "Build with AVX2") add_feature_info(WITH_AVX512 WITH_AVX512 "Build with AVX512") diff --git a/arch/loongarch/Makefile.in b/arch/loongarch/Makefile.in new file mode 100644 index 00000000..6ffe50d7 --- /dev/null +++ b/arch/loongarch/Makefile.in @@ -0,0 +1,40 @@ +# Makefile for zlib-ng +# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# Copyright (C) 2024 Hans Kristian Rosbach +# Copyright (C) 2025 Vladislav Shchapov +# For conditions of distribution and use, see copyright notice in zlib.h + +CC= +CFLAGS= +SFLAGS= +INCLUDES= +SUFFIX= + +SRCDIR=. +SRCTOP=../.. +TOPDIR=$(SRCTOP) + +all: \ + loongarch_features.o loongarch_features.lo \ + crc32_la.o crc32_la.lo + +loongarch_features.o: $(SRCDIR)/loongarch_features.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/loongarch_features.c + +loongarch_features.lo: $(SRCDIR)/loongarch_features.c + $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/loongarch_features.c + +crc32_la.o: $(SRCDIR)/crc32_la.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_la.c + +crc32_la.lo: $(SRCDIR)/crc32_la.c + $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_la.c + +mostlyclean: clean +clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +distclean: clean + rm -f Makefile diff --git a/arch/loongarch/crc32_la.c b/arch/loongarch/crc32_la.c new file mode 100644 index 00000000..05c7866d --- /dev/null +++ b/arch/loongarch/crc32_la.c @@ -0,0 +1,37 @@ +/* crc32_la.c - LoongArch version of crc32 + * Copyright (C) 2025 Vladislav Shchapov + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#if defined(LOONGARCH_CRC) +#include "zbuild.h" +#include "zmemory.h" +#include + +#include + +Z_INTERNAL uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf, + size_t len) { + crc = (~crc) & 0xffffffff; + + while (len >= 8) { + crc = (uint32_t)__crc_w_d_w((long int)zng_memread_8(buf), (int)crc); + buf += 8; + len -= 8; + } + if (len & 4) { + crc = (uint32_t)__crc_w_w_w((int)zng_memread_4(buf), (int)crc); + buf += 4; + } + if (len & 2) { + crc = (uint32_t)__crc_w_h_w((short)zng_memread_2(buf), (int)crc); + buf += 2; + } + if (len & 1) { + crc = (uint32_t)__crc_w_b_w((char)(*buf), (int)crc); + } + + return crc ^ 0xffffffff; +} + +#endif diff --git a/arch/loongarch/loongarch_features.c b/arch/loongarch/loongarch_features.c new file mode 100644 index 00000000..a71dff6f --- /dev/null +++ b/arch/loongarch/loongarch_features.c @@ -0,0 +1,23 @@ +/* loongarch_features.c -- check for LoongArch features. + * + * Copyright (C) 2025 Vladislav Shchapov + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "loongarch_features.h" + +#include + +/* + * https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + * + * Word number Bit number Annotation Implication + * 0x1 25 CRC 1 indicates support for CRC instruction + */ + +void Z_INTERNAL loongarch_check_features(struct loongarch_cpu_features *features) { + unsigned int w1 = __cpucfg(0x1); + features->has_crc = w1 & 0x2000000; +} diff --git a/arch/loongarch/loongarch_features.h b/arch/loongarch/loongarch_features.h new file mode 100644 index 00000000..2b1c1e21 --- /dev/null +++ b/arch/loongarch/loongarch_features.h @@ -0,0 +1,17 @@ +/* loongarch_features.h -- check for LoongArch features. + * + * Copyright (C) 2025 Vladislav Shchapov + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef LOONGARCH_FEATURES_H_ +#define LOONGARCH_FEATURES_H_ + +struct loongarch_cpu_features { + int has_crc; +}; + +void Z_INTERNAL loongarch_check_features(struct loongarch_cpu_features *features); + +#endif /* LOONGARCH_FEATURES_H_ */ diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h new file mode 100644 index 00000000..23d66a0d --- /dev/null +++ b/arch/loongarch/loongarch_functions.h @@ -0,0 +1,23 @@ +/* loongarch_functions.h -- LoongArch implementations for arch-specific functions. + * + * Copyright (C) 2025 Vladislav Shchapov + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef LOONGARCH_FUNCTIONS_H_ +#define LOONGARCH_FUNCTIONS_H_ + +#ifdef LOONGARCH_CRC +uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf, size_t len); +#endif + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// LOONGARCH - CRC32 - All known CPUs has crc instructions +# if defined(LOONGARCH_CRC) +# undef native_crc32 +# define native_crc32 crc32_loongarch64 +# endif +#endif + +#endif /* LOONGARCH_FUNCTIONS_H_ */ diff --git a/arch_functions.h b/arch_functions.h index 9a7f8d93..a53b2f7b 100644 --- a/arch_functions.h +++ b/arch_functions.h @@ -24,6 +24,8 @@ # include "arch/s390/s390_functions.h" #elif defined(RISCV_FEATURES) # include "arch/riscv/riscv_functions.h" +#elif defined(LOONGARCH_FEATURES) +# include "arch/loongarch/loongarch_functions.h" #endif #endif diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index bf8bea7e..d71b5416 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -676,3 +676,17 @@ macro(check_xsave_intrinsics) HAVE_XSAVE_INTRIN FAIL_REGEX "not supported") set(CMAKE_REQUIRED_FLAGS) endmacro() + +macro(check_la64_crc_intrinsics) + # Check whether compiler supports "crc" intrinsic + set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} ${ZNOLTOFLAG}") + check_c_source_compiles( + "#include + int main(void) { + char ch = 'a'; + int crc = __crc_w_b_w(ch, 0); + return crc; + }" + HAVE_LA64_CRC_INTRIN) + set(CMAKE_REQUIRED_FLAGS) +endmacro() diff --git a/configure b/configure index 4e7a12e8..ba92eff6 100755 --- a/configure +++ b/configure @@ -104,6 +104,7 @@ buildzbc=1 builddfltccdeflate=0 builddfltccinflate=0 buildcrc32vx=1 +buildcrc32la=1 floatabi= forcesse2=0 # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal @@ -224,6 +225,7 @@ case "$1" in --with-dfltcc-deflate) builddfltccdeflate=1; shift ;; --with-dfltcc-inflate) builddfltccinflate=1; shift ;; --without-crc32-vx) buildcrc32vx=0; shift ;; + --without-crc32-la) buildcrc32la=0; shift ;; --with-reduced-mem) reducedmem=1; shift ;; --force-sse2) forcesse2=1; shift ;; -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;; @@ -358,6 +360,8 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then ARCH=powerpc64le ;; riscv64) ARCH=riscv64 ;; + loongarch64) + ARCH=loongarch64 ;; esac CFLAGS="-O2 ${CFLAGS}" if test -n "${ARCHS}"; then @@ -1708,6 +1712,27 @@ EOF fi } +check_la64_crc_intrinsics() { + # Check whether "crc" intrinsic is available + + cat > $test.c << EOF +#include +int main(void) { + char ch = 'a'; + int crc = __crc_w_b_w(ch, 0); + return crc; +} +EOF + printf "Checking for CRC support... " | tee -a configure.log + if try $CC -c $CFLAGS $test.c; then + HAVE_LA64_CRC_INTRIN=1 + echo "Yes." | tee -a configure.log + else + HAVE_LA64_CRC_INTRIN=0 + echo "No." | tee -a configure.log + fi +} + check_rvv_compiler_flag() { cat > $test.c << EOF int main() { return 0; } @@ -2226,6 +2251,30 @@ EOF fi fi ;; + # loongarch specific optimizations + loongarch64) + ARCHDIR=arch/loongarch + + # Enable arch-specific optimizations + if test $without_optimizations -eq 0; then + CFLAGS="${CFLAGS} -DLOONGARCH_FEATURES" + SFLAGS="${SFLAGS} -DLOONGARCH_FEATURES" + + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} loongarch_features.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} loongarch_features.lo" + + if test $buildcrc32la -eq 1; then + check_la64_crc_intrinsics + if test $HAVE_LA64_CRC_INTRIN -eq 1; then + CFLAGS="${CFLAGS} -DLOONGARCH_CRC" + SFLAGS="${SFLAGS} -DLOONGARCH_CRC" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_la.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_la.lo" + fi + fi + fi + ;; + *) [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=$ARCH ;; diff --git a/cpu_features.c b/cpu_features.c index 3585172e..fde65192 100644 --- a/cpu_features.c +++ b/cpu_features.c @@ -19,5 +19,7 @@ Z_INTERNAL void cpu_check_features(struct cpu_features *features) { s390_check_features(&features->s390); #elif defined(RISCV_FEATURES) riscv_check_features(&features->riscv); +#elif defined(LOONGARCH_FEATURES) + loongarch_check_features(&features->loongarch); #endif } diff --git a/cpu_features.h b/cpu_features.h index 8708724b..2b6cc4e8 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -18,6 +18,8 @@ # include "arch/s390/s390_features.h" #elif defined(RISCV_FEATURES) # include "arch/riscv/riscv_features.h" +#elif defined(LOONGARCH_FEATURES) +# include "arch/loongarch/loongarch_features.h" #endif struct cpu_features { @@ -31,6 +33,8 @@ struct cpu_features { struct s390_cpu_features s390; #elif defined(RISCV_FEATURES) struct riscv_cpu_features riscv; +#elif defined(LOONGARCH_FEATURES) + struct loongarch_cpu_features loongarch; #else char empty; #endif diff --git a/functable.c b/functable.c index ef1fc31d..ac25c915 100644 --- a/functable.c +++ b/functable.c @@ -269,6 +269,13 @@ static void init_functable(void) { ft.crc32 = crc32_s390_vx; #endif + // LOONGARCH +#ifdef LOONGARCH_CRC + if (cf.loongarch.has_crc) { + ft.crc32 = crc32_loongarch64; + } +#endif + // Assign function pointers individually for atomic operation FUNCTABLE_ASSIGN(ft, force_init); FUNCTABLE_ASSIGN(ft, adler32); diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc index 23a1dc19..1e95b277 100644 --- a/test/benchmarks/benchmark_crc32.cc +++ b/test/benchmarks/benchmark_crc32.cc @@ -97,5 +97,8 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) /* CRC32 fold does a memory copy while hashing */ BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq)); #endif +#ifdef LOONGARCH_CRC +BENCHMARK_CRC32(loongarch64, crc32_loongarch64, test_cpu_features.loongarch.has_crc); +#endif #endif diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 3e3bc8ea..b05f220d 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -321,5 +321,8 @@ TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2) #if !defined(WITHOUT_CHORBA) && defined(X86_SSE41) && !defined(NO_CHORBA_SSE) TEST_CRC32(chorba_sse41, crc32_chorba_sse41, test_cpu_features.x86.has_sse41) #endif +#if defined(LOONGARCH_CRC) +TEST_CRC32(loongarch64, crc32_loongarch64, test_cpu_features.loongarch.has_crc) +#endif #endif