From: Adam Stylinski Date: Sat, 21 Dec 2024 16:04:47 +0000 (-0500) Subject: Fix unaligned access in ACLE based crc32 X-Git-Tag: 2.2.3~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=06bba674706a238e4b7bee49cf76911851bf8f0c;p=thirdparty%2Fzlib-ng.git Fix unaligned access in ACLE based crc32 This fixes a rightful complaint from the alignment sanitizer that we alias memory in an unaligned fashion. A nice added bonus is that this improves performance a tiny bit on the larger buffers, perhaps due to loops that idiomatically decrement a count and increment a single buffer pointer rather than the maze of conditional pointer reassignments. While here, let's write a unit test just for this. Since this is the only variant that accesses memory in a potentially unaligned fashion that doesn't explicitly go byte by byte or use intrinsics that don't require alignment, we'll enable it only for this function for now. Adding more tests later if need be should be possible. For everything else not crc, we're relying on ubsan to hopefully catch things by chance. --- diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index 116bcab1..a3a1fefc 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -11,9 +11,9 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; - Z_REGISTER const uint16_t *buf2; - Z_REGISTER const uint32_t *buf4; - Z_REGISTER const uint64_t *buf8; + Z_REGISTER uint16_t buf2; + Z_REGISTER uint32_t buf4; + Z_REGISTER uint64_t buf8; c = ~crc; @@ -29,45 +29,43 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, si len--; } - if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { - buf2 = (const uint16_t *) buf; - c = __crc32h(c, *buf2++); + if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & (sizeof(uint32_t) - 1))) { + buf2 = *((uint16_t*)buf); + c = __crc32h(c, buf2); + buf += sizeof(uint16_t); len -= sizeof(uint16_t); - buf4 = (const uint32_t *) buf2; - } else { - buf4 = (const uint32_t *) buf; } - if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { - c = __crc32w(c, *buf4++); + if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & (sizeof(uint64_t) - 1))) { + buf4 = *((uint32_t*)buf); + c = __crc32w(c, buf4); len -= sizeof(uint32_t); + buf += sizeof(uint32_t); } - buf8 = (const uint64_t *) buf4; - } else { - buf8 = (const uint64_t *) buf; } while (len >= sizeof(uint64_t)) { - c = __crc32d(c, *buf8++); + buf8 = *((uint64_t*)buf); + c = __crc32d(c, buf8); len -= sizeof(uint64_t); + buf += sizeof(uint64_t); } if (len >= sizeof(uint32_t)) { - buf4 = (const uint32_t *) buf8; - c = __crc32w(c, *buf4++); + buf4 = *((uint32_t*)buf); + c = __crc32w(c, buf4); len -= sizeof(uint32_t); - buf2 = (const uint16_t *) buf4; - } else { - buf2 = (const uint16_t *) buf8; + buf += sizeof(uint32_t); } if (len >= sizeof(uint16_t)) { - c = __crc32h(c, *buf2++); + buf2 = *((uint16_t*)buf); + c = __crc32h(c, buf2); len -= sizeof(uint16_t); + buf += sizeof(uint16_t); } - buf = (const unsigned char *) buf2; if (len) { c = __crc32b(c, *buf); } diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 245a92fb..ea839383 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -8,6 +8,8 @@ #include #include #include +#include "zutil.h" +#include "zutil_p.h" extern "C" { # include "zbuild.h" @@ -195,6 +197,22 @@ public: } }; +/* Specifically to test where we had dodgy alignment in the acle CRC32 + * function. All others are either byte level access or use intrinsics + * that work with unaligned access */ +class crc32_align : public ::testing::TestWithParam { +public: + void hash(int param, crc32_func crc32) { + uint8_t *buf = (uint8_t*)zng_alloc(sizeof(uint8_t) * (128 + param)); + if (buf != NULL) { + (void)crc32(0, buf + param, 128); + } else { + FAIL(); + } + zng_free(buf); + } +}; + INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests)); #define TEST_CRC32(name, func, support_flag) \ @@ -210,10 +228,26 @@ TEST_CRC32(braid, PREFIX(crc32_braid), 1) #ifdef DISABLE_RUNTIME_CPU_DETECTION TEST_CRC32(native, native_crc32, 1) + #else #ifdef ARM_ACLE +static const int align_offsets[] = { + 1, 2, 3, 4, 5, 6, 7 +}; + +#define TEST_CRC32_ALIGN(name, func, support_flag) \ + TEST_P(crc32_align, name) { \ + if (!(support_flag)) { \ + GTEST_SKIP(); \ + return; \ + } \ + hash(GetParam(), func); \ + } + +INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets)); TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32) +TEST_CRC32_ALIGN(acle_align, crc32_acle, test_cpu_features.arm.has_crc32) #endif #ifdef POWER8_VSX_CRC32 TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)