]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
AArch64: Use better block copy8 4414/head
authorArpad Panyik <Arpad.Panyik@arm.com>
Fri, 20 Jun 2025 14:48:33 +0000 (14:48 +0000)
committerArpad Panyik <Arpad.Panyik@arm.com>
Fri, 20 Jun 2025 17:05:41 +0000 (17:05 +0000)
The vector copy is only necessary for 16-byte blocks on AArch64.

Decompression uplifts on a Neoverse V2 system, using Zstd-1.5.8
compiled with "-O3 -march=armv8.2-a+sve2":

                 Clang-19  Clang-20    GCC-14    GCC-15
 1#silesia.tar:   +0.316%   +0.865%   +0.025%   +0.096%
 2#silesia.tar:   +0.689%   +1.374%   +0.027%   +0.065%
 3#silesia.tar:   +0.811%   +1.654%   +0.034%   +0.033%
 4#silesia.tar:   +0.912%   +1.755%   +0.027%   +0.042%
 5#silesia.tar:   +0.995%   +1.826%   +0.062%   +0.094%
 6#silesia.tar:   +0.976%   +1.777%   +0.065%   +0.104%
 7#silesia.tar:   +0.910%   +1.738%   +0.077%   +0.110%

lib/common/zstd_internal.h

index c1647689b945793ceef69b28c567b7762d16e0f0..791b6485d23c8c8d65a8a14ebaee48e582be03f0 100644 (file)
@@ -168,7 +168,7 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) {
-#if defined(ZSTD_ARCH_ARM_NEON)
+#if defined(ZSTD_ARCH_ARM_NEON) && !defined(__aarch64__)
     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
 #else
     ZSTD_memcpy(dst, src, 8);