From 770baa4f555e38bf15ea44308532278612cce457 Mon Sep 17 00:00:00 2001 From: Vladislav Shchapov Date: Thu, 3 Jul 2025 20:58:16 +0500 Subject: [PATCH] Rewrite LoongArch64 CRC32 implementation based on ARMv8 with manual alignment Signed-off-by: Vladislav Shchapov --- arch/loongarch/crc32_la.c | 68 +++++++++++++++++++++++++++++++-------- test/test_crc32.cc | 6 +++- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/arch/loongarch/crc32_la.c b/arch/loongarch/crc32_la.c index 4277acbe..034bf7f1 100644 --- a/arch/loongarch/crc32_la.c +++ b/arch/loongarch/crc32_la.c @@ -14,26 +14,66 @@ Z_INTERNAL uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf, size_t len) { - crc = (~crc) & 0xffffffff; + Z_REGISTER uint32_t c; + Z_REGISTER uint16_t buf2; + Z_REGISTER uint32_t buf4; + Z_REGISTER uint64_t buf8; - while (len >= 8) { - crc = (uint32_t)__crc_w_d_w((long int)zng_memread_8(buf), (int)crc); - buf += 8; - len -= 8; + c = ~crc; + + if (UNLIKELY(len == 1)) { + c = (uint32_t)__crc_w_b_w((char)(*buf), (int)c); + c = ~c; + return c; } - if (len & 4) { - crc = (uint32_t)__crc_w_w_w((int)zng_memread_4(buf), (int)crc); - buf += 4; + + if ((ptrdiff_t)buf & (sizeof(uint64_t) - 1)) { + if (len && ((ptrdiff_t)buf & 1)) { + c = (uint32_t)__crc_w_b_w((char)(*buf++), (int)c); + len--; + } + + if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & (sizeof(uint32_t) - 1))) { + buf2 = *((uint16_t*)buf); + c = (uint32_t)__crc_w_h_w((short)buf2, (int)c); + buf += sizeof(uint16_t); + len -= sizeof(uint16_t); + } + + if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & (sizeof(uint64_t) - 1))) { + buf4 = *((uint32_t*)buf); + c = (uint32_t)__crc_w_w_w((int)buf4, (int)c); + len -= sizeof(uint32_t); + buf += sizeof(uint32_t); + } + } - if (len & 2) { - crc = (uint32_t)__crc_w_h_w((short)zng_memread_2(buf), (int)crc); - buf += 2; + + while (len >= sizeof(uint64_t)) { + buf8 = *((uint64_t*)buf); + c = (uint32_t)__crc_w_d_w((long int)buf8, (int)c); + len -= sizeof(uint64_t); + buf += sizeof(uint64_t); } - if (len & 1) { - crc = (uint32_t)__crc_w_b_w((char)(*buf), (int)crc); + + if (len & sizeof(uint32_t)) { + buf4 = *((uint32_t*)buf); + c = (uint32_t)__crc_w_w_w((int)buf4, (int)c); + buf += sizeof(uint32_t); + } + + if (len & sizeof(uint16_t)) { + buf2 = *((uint16_t*)buf); + c = (uint32_t)__crc_w_h_w((short)buf2, (int)c); + buf += sizeof(uint16_t); + } + + if (len & sizeof(uint8_t)) { + c = (uint32_t)__crc_w_b_w((char)(*buf), (int)c); } - return crc ^ 0xffffffff; + c = ~c; + return c; } diff --git a/test/test_crc32.cc b/test/test_crc32.cc index b05f220d..d44d079e 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -282,7 +282,7 @@ TEST_CRC32(native, native_crc32, 1) #else -#ifdef ARM_CRC32 +#if defined(ARM_CRC32) || defined(LOONGARCH_CRC) static const int align_offsets[] = { 1, 2, 3, 4, 5, 6, 7 }; @@ -295,7 +295,9 @@ static const int align_offsets[] = { } \ hash(GetParam(), func); \ } +#endif +#ifdef ARM_CRC32 INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets)); TEST_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32) TEST_CRC32_ALIGN(armv8_align, crc32_armv8, test_cpu_features.arm.has_crc32) @@ -322,7 +324,9 @@ TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2) TEST_CRC32(chorba_sse41, crc32_chorba_sse41, test_cpu_features.x86.has_sse41) #endif #if defined(LOONGARCH_CRC) +INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets)); TEST_CRC32(loongarch64, crc32_loongarch64, test_cpu_features.loongarch.has_crc) +TEST_CRC32_ALIGN(loongarch64_align, crc32_loongarch64, test_cpu_features.loongarch.has_crc) #endif #endif -- 2.47.2