]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
[chunkset_neon] Use vdupq_n_u64.
authorMika Lindqvist <postmaster@raasu.org>
Fri, 18 Jun 2021 21:10:44 +0000 (00:10 +0300)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Mon, 21 Jun 2021 09:01:32 +0000 (11:01 +0200)
* Using vdupq_n_u64 duplicates the unsigned 64-bit integer to two consecutive aligned memory locations in stack so compiler can use wider load instructions.
  All different-sized general-purpose registers overlay on ARM/AArch64, so any vector cast is no-op in assembly.

arch/arm/chunkset_neon.c

index b1fcb241d079a17d0a6ba10bef685ecd54b2ab07..e0ad3e04ea7ef3976d12b74a0ae432144f3e8785 100644 (file)
@@ -37,7 +37,9 @@ static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    *chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
+    uint64_t tmp;
+    memcpy(&tmp, from, 8);
+    *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
 }
 
 #define CHUNKSIZE        chunksize_neon