]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Eliminate extra vmovdqu instruction folding xmm into zmm.
authorNathan Moinvaziri <nathan@nathanm.com>
Sun, 11 Jan 2026 19:32:44 +0000 (11:32 -0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 17 Jan 2026 19:37:25 +0000 (20:37 +0100)
Fixed by using _mm512_castsi128_si512() and removing redundant insert.

arch/x86/crc32_pclmulqdq_tpl.h

index e951c05795f11f78b3fea221a1be60ee84525e0e..a7b8edfdde996a7c16b3f90b9d4a38644ad1a7ac 100644 (file)
@@ -264,7 +264,7 @@ Z_FORCEINLINE static uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const
         }
 
         // Fold existing xmm state into first 64 bytes
-        zmm_t0 = _mm512_inserti32x4(_mm512_setzero_si512(), xmm_crc0, 0);
+        zmm_t0 = _mm512_castsi128_si512(xmm_crc0);
         zmm_t0 = _mm512_inserti32x4(zmm_t0, xmm_crc1, 1);
         zmm_t0 = _mm512_inserti32x4(zmm_t0, xmm_crc2, 2);
         zmm_t0 = _mm512_inserti32x4(zmm_t0, xmm_crc3, 3);