]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512
authorEric Biggers <ebiggers@google.com>
Wed, 19 Mar 2025 18:13:16 +0000 (11:13 -0700)
committerEric Biggers <ebiggers@google.com>
Wed, 19 Mar 2025 19:22:00 +0000 (12:22 -0700)
Intel made a late change to the AVX10 specification that removes support
for a 256-bit maximum vector length and enumeration of the maximum
vector length.  AVX10 will imply a maximum vector length of 512 bits.
I.e. there won't be any such thing as AVX10/256 or AVX10/512; there will
just be AVX10, and it will essentially just consolidate AVX512 features.

As a result of this new development, my strategy of providing both
*_avx10_256 and *_avx10_512 functions didn't turn out to be that useful.
The only remaining motivation for the 256-bit AVX512 / AVX10 functions
is to avoid downclocking on older Intel CPUs.  But I already wrote
*_avx2 code too (primarily to support CPUs without AVX512), which
performs almost as well as *_avx10_256.  So we should just use that.

Therefore, remove the *_avx10_256 CRC functions, and rename the
*_avx10_512 CRC functions to *_avx512.  Make Ice Lake and Tiger Lake use
the *_avx2 functions instead of *_avx10_256 which they previously used.

Link: https://lore.kernel.org/r/20250319181316.91271-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
arch/x86/lib/crc-pclmul-template.S
arch/x86/lib/crc-pclmul-template.h

index a19b730b642d36d5d0394751e218714410c12bec..ae0b6144c503c8bd8fc35d94df33958033d1d560 100644 (file)
 .macro _fold_vec       acc, data, consts, tmp
        _pclmulqdq      \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp
        _pclmulqdq      \consts, LO64_TERMS, \acc, LO64_TERMS, \acc
-.if AVX_LEVEL < 10
+.if AVX_LEVEL <= 2
        _cond_vex pxor, \data, \tmp, \tmp
        _cond_vex pxor, \tmp, \acc, \acc
 .else
 // \vl is the maximum length of vector register to use in bytes: 16, 32, or 64.
 //
 // \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or
-// 10 for AVX10 or AVX512.
+// 512 for AVX512.
 //
 // If \vl == 16 && \avx_level == 0, the generated code requires:
 // PCLMULQDQ && SSE4.1.  (Note: all known CPUs with PCLMULQDQ also have SSE4.1.)
 // If \vl == 32 && \avx_level == 2, the generated code requires:
 // VPCLMULQDQ && AVX2.
 //
-// If \vl == 32 && \avx_level == 10, the generated code requires:
-// VPCLMULQDQ && (AVX10/256 || (AVX512BW && AVX512VL))
-//
-// If \vl == 64 && \avx_level == 10, the generated code requires:
-// VPCLMULQDQ && (AVX10/512 || (AVX512BW && AVX512VL))
+// If \vl == 64 && \avx_level == 512, the generated code requires:
+// VPCLMULQDQ && AVX512BW && AVX512VL.
 //
 // Other \vl and \avx_level combinations are either not supported or not useful.
 .macro _crc_pclmul     n, lsb_crc, vl, avx_level
 .if LSB_CRC && \n == 64
        _cond_vex punpcklqdq,   %xmm1, %xmm2, %xmm2
        _pclmulqdq              CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
-    .if AVX_LEVEL < 10
+    .if AVX_LEVEL <= 2
        _cond_vex pxor,         %xmm2, %xmm0, %xmm0
        _cond_vex pxor,         %xmm1, %xmm0, %xmm0
     .else
@@ -574,13 +571,9 @@ SYM_FUNC_START(prefix##_vpclmul_avx2);                                     \
        _crc_pclmul     n=bits, lsb_crc=lsb, vl=32, avx_level=2;        \
 SYM_FUNC_END(prefix##_vpclmul_avx2);                                   \
                                                                        \
-SYM_FUNC_START(prefix##_vpclmul_avx10_256);                            \
-       _crc_pclmul     n=bits, lsb_crc=lsb, vl=32, avx_level=10;       \
-SYM_FUNC_END(prefix##_vpclmul_avx10_256);                              \
-                                                                       \
-SYM_FUNC_START(prefix##_vpclmul_avx10_512);                            \
-       _crc_pclmul     n=bits, lsb_crc=lsb, vl=64, avx_level=10;       \
-SYM_FUNC_END(prefix##_vpclmul_avx10_512);
+SYM_FUNC_START(prefix##_vpclmul_avx512);                               \
+       _crc_pclmul     n=bits, lsb_crc=lsb, vl=64, avx_level=512;      \
+SYM_FUNC_END(prefix##_vpclmul_avx512);
 #else
 #define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb)                     \
 SYM_FUNC_START(prefix##_pclmul_sse);                                   \
index 7b89f0edbc179b03d6998ba1572dc9cd0ef0205e..c5b3bfe11d8da08b3b5eb89048119fad142667eb 100644 (file)
@@ -21,10 +21,8 @@ crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len,                \
                          const void *consts_ptr);                      \
 crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len,                \
                            const void *consts_ptr);                    \
-crc_t prefix##_vpclmul_avx10_256(crc_t crc, const u8 *p, size_t len,   \
-                                const void *consts_ptr);               \
-crc_t prefix##_vpclmul_avx10_512(crc_t crc, const u8 *p, size_t len,   \
-                                const void *consts_ptr);               \
+crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len,      \
+                             const void *consts_ptr);                  \
 DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
 
 #define INIT_CRC_PCLMUL(prefix)                                                \
@@ -35,13 +33,10 @@ do {                                                                        \
            cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) {               \
                if (boot_cpu_has(X86_FEATURE_AVX512BW) &&               \
                    boot_cpu_has(X86_FEATURE_AVX512VL) &&               \
+                   !boot_cpu_has(X86_FEATURE_PREFER_YMM) &&            \
                    cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) {    \
-                       if (boot_cpu_has(X86_FEATURE_PREFER_YMM))       \
-                               static_call_update(prefix##_pclmul,     \
-                                                  prefix##_vpclmul_avx10_256); \
-                       else                                            \
-                               static_call_update(prefix##_pclmul,     \
-                                                  prefix##_vpclmul_avx10_512); \
+                       static_call_update(prefix##_pclmul,             \
+                                          prefix##_vpclmul_avx512);    \
                } else {                                                \
                        static_call_update(prefix##_pclmul,             \
                                           prefix##_vpclmul_avx2);      \