From: Nathan Moin Vaziri Date: Fri, 10 Apr 2026 20:29:05 +0000 (-0700) Subject: Fix VPCLMULQDQ CRC32 build with partial AVX-512 baselines X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;ds=sidebyside;p=thirdparty%2Fzlib-ng.git Fix VPCLMULQDQ CRC32 build with partial AVX-512 baselines The 512-bit path in crc32_pclmulqdq_tpl.h assumed AVX-512F was enough, but some of the intrinsics it used actually require AVX-512DQ. Pick the correct variants based on the available features. --- diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index 6000db85..23d63035 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -32,16 +32,25 @@ #if defined(X86_VPCLMULQDQ) && defined(__AVX512F__) # if defined(_MSC_VER) && _MSC_VER < 1920 /* Use epi32 variants for older MSVC toolchains (v141/v140) to avoid cast warnings */ -# define z512_xor3_epi64(a, b, c) _mm512_ternarylogic_epi32(a, b, c, 0x96) -# define z512_inserti64x2(a, b, imm) _mm512_inserti32x4(a, b, imm) -# define z512_extracti64x2(a, imm) _mm512_extracti32x4_epi32(a, imm) +# define z512_xor3_epi64(a, b, c) _mm512_ternarylogic_epi32(a, b, c, 0x96) +# define z512_inserti64x2(a, b, imm) _mm512_inserti32x4(a, b, imm) +# define z512_extracti64x2(a, imm) _mm512_extracti32x4_epi32(a, imm) # else -# define z512_xor3_epi64(a, b, c) _mm512_ternarylogic_epi64(a, b, c, 0x96) -# define z512_inserti64x2(a, b, imm) _mm512_inserti64x2(a, b, imm) -# define z512_extracti64x2(a, imm) _mm512_extracti64x2_epi64(a, imm) +# define z512_xor3_epi64(a, b, c) _mm512_ternarylogic_epi64(a, b, c, 0x96) +# if defined(__AVX512DQ__) +# if defined(_MSC_VER) && !defined(_MM_K0_REG8) +# define z512_inserti64x2(a, b, imm) _mm512_maskz_inserti64x2(UINT8_MAX, a, b, imm) +# else +# define z512_inserti64x2(a, b, imm) _mm512_inserti64x2(a, b, imm) +# endif +# define z512_extracti64x2(a, imm) _mm512_extracti64x2_epi64(a, imm) +# else +# define z512_inserti64x2(a, b, imm) _mm512_inserti32x4(a, b, imm) +# define z512_extracti64x2(a, imm) _mm512_extracti32x4_epi32(a, imm) +# endif # endif # ifdef __AVX512VL__ -# define z128_xor3_epi64(a, b, c) _mm_ternarylogic_epi64(a, b, c, 0x96) +# define z128_xor3_epi64(a, b, c) _mm_ternarylogic_epi64(a, b, c, 0x96) # endif #endif /* 256-bit VPCLMULQDQ macros (doesn't require AVX-512) */