From: Eric Biggers <ebiggers@google.com>
Date: Wed, 2 Apr 2025 00:24:01 +0000 (-0700)
Subject: crypto: x86/aes - drop the avx10_256 AES-XTS and AES-CTR code
X-Git-Tag: v6.16-rc1~206^2~352
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7d14fbc5691b39b8ab322c0220713c2ac3298b29;p=thirdparty%2Fkernel%2Flinux.git

crypto: x86/aes - drop the avx10_256 AES-XTS and AES-CTR code

Intel made a late change to the AVX10 specification that removes support
for a 256-bit maximum vector length and enumeration of the maximum
vector length.  AVX10 will imply a maximum vector length of 512 bits.
I.e. there won't be any such thing as AVX10/256 or AVX10/512; there will
just be AVX10, and it will essentially just consolidate AVX512 features.

As a result of this new development, my strategy of providing both
*_avx10_256 and *_avx10_512 functions didn't turn out to be that useful.
The only remaining motivation for the 256-bit AVX512 / AVX10 functions
is to avoid downclocking on older Intel CPUs.  But in the case of
AES-XTS and AES-CTR, I already wrote *_avx2 code too (primarily to
support CPUs without AVX512), which performs almost as well as
*_avx10_256.  So we should just use that.

Therefore, remove the *_avx10_256 AES-XTS and AES-CTR functions and
algorithms, and rename the *_avx10_512 AES-XTS and AES-CTR functions and
algorithms to *_avx512.  Make Ice Lake and Tiger Lake use *_avx2 instead
of *_avx10_256 which they previously used.

I've left AES-GCM unchanged for now.  There is no VAES+AVX2 optimized
AES-GCM in the kernel yet, so the path forward for that is not as clear.
However, I did write a VAES+AVX2 optimized AES-GCM for BoringSSL.  So
one option is to port that to the kernel and then do the same cleanup.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

diff --git a/arch/x86/crypto/aes-ctr-avx-x86_64.S b/arch/x86/crypto/aes-ctr-avx-x86_64.S
index 1685d8b24b2ca..bbbfd80f5a502 100644
--- a/arch/x86/crypto/aes-ctr-avx-x86_64.S
+++ b/arch/x86/crypto/aes-ctr-avx-x86_64.S
@@ -48,8 +48,7 @@
 // using the following sets of CPU features:
 //	- AES-NI && AVX
 //	- VAES && AVX2
-//	- VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2
-//	- VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2
+//	- VAES && AVX512BW && AVX512VL && BMI2
 //
 // See the function definitions at the bottom of the file for more information.
 
@@ -76,7 +75,6 @@
 .text
 
 // Move a vector between memory and a register.
-// The register operand must be in the first 16 vector registers.
 .macro	_vmovdqu	src, dst
 .if VL < 64
 	vmovdqu		\src, \dst
@@ -86,7 +84,6 @@
 .endm
 
 // Move a vector between registers.
-// The registers must be in the first 16 vector registers.
 .macro	_vmovdqa	src, dst
 .if VL < 64
 	vmovdqa		\src, \dst
@@ -96,7 +93,7 @@
 .endm
 
 // Broadcast a 128-bit value from memory to all 128-bit lanes of a vector
-// register.  The register operand must be in the first 16 vector registers.
+// register.
 .macro	_vbroadcast128	src, dst
 .if VL == 16
 	vmovdqu		\src, \dst
@@ -108,7 +105,6 @@
 .endm
 
 // XOR two vectors together.
-// Any register operands must be in the first 16 vector registers.
 .macro	_vpxor	src1, src2, dst
 .if VL < 64
 	vpxor		\src1, \src2, \dst
@@ -199,8 +195,8 @@
 // XOR each with the zero-th round key.  Also update LE_CTR if !\final.
 .macro	_prepare_2_ctr_vecs	is_xctr, i0, i1, final=0
 .if \is_xctr
-  .if USE_AVX10
-	_vmovdqa	LE_CTR, AESDATA\i0
+  .if USE_AVX512
+	vmovdqa64	LE_CTR, AESDATA\i0
 	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i0
   .else
 	vpxor		XCTR_IV, LE_CTR, AESDATA\i0
@@ -208,7 +204,7 @@
   .endif
 	vpaddq		LE_CTR_INC1, LE_CTR, AESDATA\i1
 
-  .if USE_AVX10
+  .if USE_AVX512
 	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i1
   .else
 	vpxor		XCTR_IV, AESDATA\i1, AESDATA\i1
@@ -481,18 +477,12 @@
 .Lxor_tail_partial_vec_0\@:
 	// XOR the remaining 1 <= LEN < VL bytes.  It's easy if masked
 	// loads/stores are available; otherwise it's a bit harder...
-.if USE_AVX10
-  .if VL <= 32
-	mov		$-1, %eax
-	bzhi		LEN, %eax, %eax
-	kmovd		%eax, %k1
-  .else
+.if USE_AVX512
 	mov		$-1, %rax
 	bzhi		LEN64, %rax, %rax
 	kmovq		%rax, %k1
-  .endif
 	vmovdqu8	(SRC), AESDATA1{%k1}{z}
-	_vpxor		AESDATA1, AESDATA0, AESDATA0
+	vpxord		AESDATA1, AESDATA0, AESDATA0
 	vmovdqu8	AESDATA0, (DST){%k1}
 .else
   .if VL == 32
@@ -554,7 +544,7 @@
 // eliminates carries.  |ctr| is the per-message block counter starting at 1.
 
 .set	VL, 16
-.set	USE_AVX10, 0
+.set	USE_AVX512, 0
 SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx)
 	_aes_ctr_crypt	0
 SYM_FUNC_END(aes_ctr64_crypt_aesni_avx)
@@ -564,7 +554,7 @@ SYM_FUNC_END(aes_xctr_crypt_aesni_avx)
 
 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
 .set	VL, 32
-.set	USE_AVX10, 0
+.set	USE_AVX512, 0
 SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
 	_aes_ctr_crypt	0
 SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2)
@@ -572,21 +562,12 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2)
 	_aes_ctr_crypt	1
 SYM_FUNC_END(aes_xctr_crypt_vaes_avx2)
 
-.set	VL, 32
-.set	USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256)
-	_aes_ctr_crypt	0
-SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256)
-SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256)
-	_aes_ctr_crypt	1
-SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256)
-
 .set	VL, 64
-.set	USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512)
+.set	USE_AVX512, 1
+SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx512)
 	_aes_ctr_crypt	0
-SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512)
-SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512)
+SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512)
+SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512)
 	_aes_ctr_crypt	1
-SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512)
+SYM_FUNC_END(aes_xctr_crypt_vaes_avx512)
 #endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index 93ba0ddbe0092..bbeaccbd1c51f 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -52,32 +52,25 @@
  * different code, it uses a macro to generate several implementations that
  * share similar source code but are targeted at different CPUs, listed below:
  *
- * AES-NI + AVX
+ * AES-NI && AVX
  *    - 128-bit vectors (1 AES block per vector)
  *    - VEX-coded instructions
  *    - xmm0-xmm15
  *    - This is for older CPUs that lack VAES but do have AVX.
  *
- * VAES + VPCLMULQDQ + AVX2
+ * VAES && VPCLMULQDQ && AVX2
  *    - 256-bit vectors (2 AES blocks per vector)
  *    - VEX-coded instructions
  *    - ymm0-ymm15
- *    - This is for CPUs that have VAES but lack AVX512 or AVX10,
- *      e.g. Intel's Alder Lake and AMD's Zen 3.
+ *    - This is for CPUs that have VAES but either lack AVX512 (e.g. Intel's
+ *      Alder Lake and AMD's Zen 3) or downclock too eagerly when using zmm
+ *      registers (e.g. Intel's Ice Lake).
  *
- * VAES + VPCLMULQDQ + AVX10/256 + BMI2
- *    - 256-bit vectors (2 AES blocks per vector)
+ * VAES && VPCLMULQDQ && AVX512BW && AVX512VL && BMI2
+ *    - 512-bit vectors (4 AES blocks per vector)
  *    - EVEX-coded instructions
- *    - ymm0-ymm31
- *    - This is for CPUs that have AVX512 but where using zmm registers causes
- *      downclocking, and for CPUs that have AVX10/256 but not AVX10/512.
- *    - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256.
- *      To avoid confusion with 512-bit, we just write AVX10/256.
- *
- * VAES + VPCLMULQDQ + AVX10/512 + BMI2
- *    - Same as the previous one, but upgrades to 512-bit vectors
- *      (4 AES blocks per vector) in zmm0-zmm31.
- *    - This is for CPUs that have good AVX512 or AVX10/512 support.
+ *    - zmm0-zmm31
+ *    - This is for CPUs that have good AVX512 support.
  *
  * This file doesn't have an implementation for AES-NI alone (without AVX), as
  * the lack of VEX would make all the assembly code different.
@@ -109,7 +102,7 @@
 
 	// This table contains constants for vpshufb and vpblendvb, used to
 	// handle variable byte shifts and blending during ciphertext stealing
-	// on CPUs that don't support AVX10-style masking.
+	// on CPUs that don't support AVX512-style masking.
 .Lcts_permute_table:
 	.byte	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
 	.byte	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
@@ -138,7 +131,7 @@
 .irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 	_define_Vi	\i
 .endr
-.if USE_AVX10
+.if USE_AVX512
 .irp i, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 	_define_Vi	\i
 .endr
@@ -193,7 +186,7 @@
 	// keys to the *end* of this register range.  I.e., AES-128 uses
 	// KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14.
 	// (All also use KEY0 for the XOR-only "round" at the beginning.)
-.if USE_AVX10
+.if USE_AVX512
 	.set	KEY1_XMM,	%xmm16
 	.set	KEY1,		V16
 	.set	KEY2_XMM,	%xmm17
@@ -227,7 +220,6 @@
 .endm
 
 // Move a vector between memory and a register.
-// The register operand must be in the first 16 vector registers.
 .macro	_vmovdqu	src, dst
 .if VL < 64
 	vmovdqu		\src, \dst
@@ -238,9 +230,9 @@
 
 // Broadcast a 128-bit value into a vector.
 .macro	_vbroadcast128	src, dst
-.if VL == 16 && !USE_AVX10
+.if VL == 16
 	vmovdqu		\src, \dst
-.elseif VL == 32 && !USE_AVX10
+.elseif VL == 32
 	vbroadcasti128	\src, \dst
 .else
 	vbroadcasti32x4	\src, \dst
@@ -248,7 +240,6 @@
 .endm
 
 // XOR two vectors together.
-// Any register operands must be in the first 16 vector registers.
 .macro	_vpxor	src1, src2, dst
 .if VL < 64
 	vpxor		\src1, \src2, \dst
@@ -259,7 +250,7 @@
 
 // XOR three vectors together.
 .macro	_xor3	src1, src2, src3_and_dst
-.if USE_AVX10
+.if USE_AVX512
 	// vpternlogd with immediate 0x96 is a three-argument XOR.
 	vpternlogd	$0x96, \src1, \src2, \src3_and_dst
 .else
@@ -274,7 +265,7 @@
 	vpshufd		$0x13, \src, \tmp
 	vpaddq		\src, \src, \dst
 	vpsrad		$31, \tmp, \tmp
-.if USE_AVX10
+.if USE_AVX512
 	vpternlogd	$0x78, GF_POLY_XMM, \tmp, \dst
 .else
 	vpand		GF_POLY_XMM, \tmp, \tmp
@@ -337,7 +328,7 @@
 	vpsllq		$1*VL/16, TWEAK0, TWEAK1
 	vpsllq		$2*VL/16, TWEAK0, TWEAK2
 	vpsllq		$3*VL/16, TWEAK0, TWEAK3
-.if USE_AVX10
+.if USE_AVX512
 	vpternlogd	$0x96, V0, V1, TWEAK1
 	vpternlogd	$0x96, V2, V3, TWEAK2
 	vpternlogd	$0x96, V4, V5, TWEAK3
@@ -474,26 +465,26 @@
 	lea		OFFS-16(KEY, KEYLEN64, 4), KEY
 
 	// If all 32 SIMD registers are available, cache all the round keys.
-.if USE_AVX10
+.if USE_AVX512
 	cmp		$24, KEYLEN
 	jl		.Laes128\@
 	je		.Laes192\@
-	_vbroadcast128	-6*16(KEY), KEY1
-	_vbroadcast128	-5*16(KEY), KEY2
+	vbroadcasti32x4	-6*16(KEY), KEY1
+	vbroadcasti32x4	-5*16(KEY), KEY2
 .Laes192\@:
-	_vbroadcast128	-4*16(KEY), KEY3
-	_vbroadcast128	-3*16(KEY), KEY4
+	vbroadcasti32x4	-4*16(KEY), KEY3
+	vbroadcasti32x4	-3*16(KEY), KEY4
 .Laes128\@:
-	_vbroadcast128	-2*16(KEY), KEY5
-	_vbroadcast128	-1*16(KEY), KEY6
-	_vbroadcast128	0*16(KEY), KEY7
-	_vbroadcast128	1*16(KEY), KEY8
-	_vbroadcast128	2*16(KEY), KEY9
-	_vbroadcast128	3*16(KEY), KEY10
-	_vbroadcast128	4*16(KEY), KEY11
-	_vbroadcast128	5*16(KEY), KEY12
-	_vbroadcast128	6*16(KEY), KEY13
-	_vbroadcast128	7*16(KEY), KEY14
+	vbroadcasti32x4	-2*16(KEY), KEY5
+	vbroadcasti32x4	-1*16(KEY), KEY6
+	vbroadcasti32x4	0*16(KEY), KEY7
+	vbroadcasti32x4	1*16(KEY), KEY8
+	vbroadcasti32x4	2*16(KEY), KEY9
+	vbroadcasti32x4	3*16(KEY), KEY10
+	vbroadcasti32x4	4*16(KEY), KEY11
+	vbroadcasti32x4	5*16(KEY), KEY12
+	vbroadcasti32x4	6*16(KEY), KEY13
+	vbroadcasti32x4	7*16(KEY), KEY14
 .endif
 .endm
 
@@ -521,7 +512,7 @@
 // using the same key for all block(s).  The round key is loaded from the
 // appropriate register or memory location for round \i.  May clobber \tmp.
 .macro _vaes_1x		enc, i, xmm_suffix, data, tmp
-.if USE_AVX10
+.if USE_AVX512
 	_vaes		\enc, KEY\i\xmm_suffix, \data
 .else
 .ifnb \xmm_suffix
@@ -538,7 +529,7 @@
 // appropriate register or memory location for round \i.  In addition, does two
 // steps of the computation of the next set of tweaks.  May clobber V4 and V5.
 .macro	_vaes_4x	enc, i
-.if USE_AVX10
+.if USE_AVX512
 	_tweak_step	(2*(\i-5))
 	_vaes		\enc, KEY\i, V0
 	_vaes		\enc, KEY\i, V1
@@ -574,7 +565,7 @@
 .irp i, 5,6,7,8,9,10,11,12,13
 	_vaes_1x	\enc, \i, \xmm_suffix, \data, tmp=\tmp
 .endr
-.if USE_AVX10
+.if USE_AVX512
 	vpxord		KEY14\xmm_suffix, \tweak, \tmp
 .else
 .ifnb \xmm_suffix
@@ -617,11 +608,11 @@
 	// This is the main loop, en/decrypting 4*VL bytes per iteration.
 
 	// XOR each source block with its tweak and the zero-th round key.
-.if USE_AVX10
-	_vmovdqu	0*VL(SRC), V0
-	_vmovdqu	1*VL(SRC), V1
-	_vmovdqu	2*VL(SRC), V2
-	_vmovdqu	3*VL(SRC), V3
+.if USE_AVX512
+	vmovdqu8	0*VL(SRC), V0
+	vmovdqu8	1*VL(SRC), V1
+	vmovdqu8	2*VL(SRC), V2
+	vmovdqu8	3*VL(SRC), V3
 	vpternlogd	$0x96, TWEAK0, KEY0, V0
 	vpternlogd	$0x96, TWEAK1, KEY0, V1
 	vpternlogd	$0x96, TWEAK2, KEY0, V2
@@ -654,7 +645,7 @@
 	// Reduce latency by doing the XOR before the vaesenclast, utilizing the
 	// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a)
 	// (and likewise for vaesdeclast).
-.if USE_AVX10
+.if USE_AVX512
 	_tweak_step	18
 	_tweak_step	19
 	vpxord		TWEAK0, KEY14, V4
@@ -762,7 +753,7 @@
 	_aes_crypt	\enc, _XMM, TWEAK1_XMM, %xmm0, tmp=%xmm1
 .endif
 
-.if USE_AVX10
+.if USE_AVX512
 	// Create a mask that has the first LEN bits set.
 	mov		$-1, %r9d
 	bzhi		LEN, %r9d, %r9d
@@ -811,7 +802,7 @@
 //			   u8 iv[AES_BLOCK_SIZE]);
 //
 // Encrypt |iv| using the AES key |tweak_key| to get the first tweak.  Assumes
-// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX10.
+// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX512.
 SYM_TYPED_FUNC_START(aes_xts_encrypt_iv)
 	.set	TWEAK_KEY,	%rdi
 	.set	IV,		%rsi
@@ -853,7 +844,7 @@ SYM_FUNC_END(aes_xts_encrypt_iv)
 // multiple of 16, then this function updates |tweak| to contain the next tweak.
 
 .set	VL, 16
-.set	USE_AVX10, 0
+.set	USE_AVX512, 0
 SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx)
 	_aes_xts_crypt	1
 SYM_FUNC_END(aes_xts_encrypt_aesni_avx)
@@ -863,7 +854,7 @@ SYM_FUNC_END(aes_xts_decrypt_aesni_avx)
 
 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
 .set	VL, 32
-.set	USE_AVX10, 0
+.set	USE_AVX512, 0
 SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2)
 	_aes_xts_crypt	1
 SYM_FUNC_END(aes_xts_encrypt_vaes_avx2)
@@ -871,21 +862,12 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2)
 	_aes_xts_crypt	0
 SYM_FUNC_END(aes_xts_decrypt_vaes_avx2)
 
-.set	VL, 32
-.set	USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256)
-	_aes_xts_crypt	1
-SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
-SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
-	_aes_xts_crypt	0
-SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)
-
 .set	VL, 64
-.set	USE_AVX10, 1
-SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512)
+.set	USE_AVX512, 1
+SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx512)
 	_aes_xts_crypt	1
-SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512)
-SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512)
+SYM_FUNC_END(aes_xts_encrypt_vaes_avx512)
+SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512)
 	_aes_xts_crypt	0
-SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512)
+SYM_FUNC_END(aes_xts_decrypt_vaes_avx512)
 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index bc655d794a95c..5bdeda39cef65 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -844,8 +844,7 @@ simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)]
 DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500);
 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
 DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600);
-DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700);
-DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800);
+DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800);
 #endif
 
 /* The common part of the x86_64 AES-GCM key struct */
@@ -1592,11 +1591,6 @@ static int __init register_avx_algs(void)
 			       XFEATURE_MASK_AVX512, NULL))
 		return 0;
 
-	err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256,
-					     ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
-					     simd_skcipher_algs_vaes_avx10_256);
-	if (err)
-		return err;
 	err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
 					 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
 					 aes_gcm_simdalgs_vaes_avx10_256);
@@ -1606,15 +1600,15 @@ static int __init register_avx_algs(void)
 	if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
 		int i;
 
-		for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++)
-			skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1;
+		for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++)
+			skcipher_algs_vaes_avx512[i].base.cra_priority = 1;
 		for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
 			aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
 	}
 
-	err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512,
-					     ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
-					     simd_skcipher_algs_vaes_avx10_512);
+	err = simd_register_skciphers_compat(skcipher_algs_vaes_avx512,
+					     ARRAY_SIZE(skcipher_algs_vaes_avx512),
+					     simd_skcipher_algs_vaes_avx512);
 	if (err)
 		return err;
 	err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
@@ -1641,18 +1635,14 @@ static void unregister_avx_algs(void)
 		simd_unregister_skciphers(skcipher_algs_vaes_avx2,
 					  ARRAY_SIZE(skcipher_algs_vaes_avx2),
 					  simd_skcipher_algs_vaes_avx2);
-	if (simd_skcipher_algs_vaes_avx10_256[0])
-		simd_unregister_skciphers(skcipher_algs_vaes_avx10_256,
-					  ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
-					  simd_skcipher_algs_vaes_avx10_256);
 	if (aes_gcm_simdalgs_vaes_avx10_256[0])
 		simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
 				      ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
 				      aes_gcm_simdalgs_vaes_avx10_256);
-	if (simd_skcipher_algs_vaes_avx10_512[0])
-		simd_unregister_skciphers(skcipher_algs_vaes_avx10_512,
-					  ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
-					  simd_skcipher_algs_vaes_avx10_512);
+	if (simd_skcipher_algs_vaes_avx512[0])
+		simd_unregister_skciphers(skcipher_algs_vaes_avx512,
+					  ARRAY_SIZE(skcipher_algs_vaes_avx512),
+					  simd_skcipher_algs_vaes_avx512);
 	if (aes_gcm_simdalgs_vaes_avx10_512[0])
 		simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
 				      ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),