From: Nathan Moinvaziri Date: Fri, 2 Jan 2026 22:47:53 +0000 (-0800) Subject: Use mm_blend_epi16 in crc32_(v)pclmulqdq final reduction X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8eaa34a12c5511fe3c33cb0ea2db48409fa8e738;p=thirdparty%2Fzlib-ng.git Use mm_blend_epi16 in crc32_(v)pclmulqdq final reduction This is the preferred operation mentioned in https://www.corsix.org/content/alternative-exposition-crc32_4k_pclmulqdq --- diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index 5271a6846..d18839bbb 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -210,7 +210,7 @@ static inline uint32_t fold_final(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i __m128i x_tmp0 = _mm_clmulepi64_si128(*xmm_crc3, barrett_k, 0x00); __m128i x_tmp1 = _mm_clmulepi64_si128(x_tmp0, barrett_k, 0x10); - x_tmp1 = _mm_and_si128(x_tmp1, _mm_setr_epi32(0, 0, ~0, 0)); + x_tmp1 = _mm_blend_epi16(x_tmp1, _mm_setzero_si128(), 0xcf); x_tmp0 = _mm_xor_si128(x_tmp1, *xmm_crc3); __m128i x_res_a = _mm_clmulepi64_si128(x_tmp0, barrett_k, 0x01);