From 104a9526e1448dc45b96534e2d78d41b3ecac3f9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 Jan 2026 11:20:23 -0800 Subject: [PATCH] crypto: x86/aes-gcm - Use new AES library API Switch from the old AES library functions (which use struct crypto_aes_ctx) to the new ones (which use struct aes_enckey). This eliminates the unnecessary computation and caching of the decryption round keys. The new AES en/decryption functions are also much faster and use AES instructions when supported by the CPU. Since this changes the format of the AES-GCM key structures that are used by the AES-GCM assembly code, the offsets in the assembly code had to be updated to match. Note that the new key structures are smaller, since the decryption round keys are no longer unnecessarily included. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260112192035.10427-26-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/x86/crypto/aes-gcm-aesni-x86_64.S | 33 +++++++-------- arch/x86/crypto/aes-gcm-vaes-avx2.S | 21 +++++----- arch/x86/crypto/aes-gcm-vaes-avx512.S | 25 ++++++----- arch/x86/crypto/aesni-intel_glue.c | 57 +++++++++++--------------- 4 files changed, 67 insertions(+), 69 deletions(-) diff --git a/arch/x86/crypto/aes-gcm-aesni-x86_64.S b/arch/x86/crypto/aes-gcm-aesni-x86_64.S index 7c8a8a32bd3c6..6b2abb76827e1 100644 --- a/arch/x86/crypto/aes-gcm-aesni-x86_64.S +++ b/arch/x86/crypto/aes-gcm-aesni-x86_64.S @@ -143,10 +143,11 @@ .octa 0 // Offsets in struct aes_gcm_key_aesni -#define OFFSETOF_AESKEYLEN 480 -#define OFFSETOF_H_POWERS 496 -#define OFFSETOF_H_POWERS_XORED 624 -#define OFFSETOF_H_TIMES_X64 688 +#define OFFSETOF_AESKEYLEN 0 +#define OFFSETOF_AESROUNDKEYS 16 +#define OFFSETOF_H_POWERS 272 +#define OFFSETOF_H_POWERS_XORED 400 +#define OFFSETOF_H_TIMES_X64 464 .text @@ -505,9 +506,9 @@ // Encrypt an all-zeroes block to get the raw hash subkey. movl OFFSETOF_AESKEYLEN(KEY), %eax - lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR - movdqa (KEY), H_POW1 // Zero-th round key XOR all-zeroes block - lea 16(KEY), %rax + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,%rax,4), RNDKEYLAST_PTR + movdqa OFFSETOF_AESROUNDKEYS(KEY), H_POW1 + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax 1: aesenc (%rax), H_POW1 add $16, %rax @@ -624,7 +625,7 @@ // the zero-th AES round key. Clobbers TMP0 and TMP1. .macro _ctr_begin_8x movq .Lone(%rip), TMP0 - movdqa (KEY), TMP1 // zero-th round key + movdqa OFFSETOF_AESROUNDKEYS(KEY), TMP1 // zero-th round key .irp i, 0,1,2,3,4,5,6,7 _vpshufb BSWAP_MASK, LE_CTR, AESDATA\i pxor TMP1, AESDATA\i @@ -726,7 +727,7 @@ movdqu (LE_CTR_PTR), LE_CTR movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN - lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR // If there are at least 8*16 bytes of data, then continue into the main // loop, which processes 8*16 bytes of data per iteration. @@ -745,7 +746,7 @@ .if \enc // Encrypt the first 8 plaintext blocks. _ctr_begin_8x - lea 16(KEY), %rsi + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rsi .p2align 4 1: movdqa (%rsi), TMP0 @@ -767,7 +768,7 @@ // Generate the next set of 8 counter blocks and start encrypting them. _ctr_begin_8x - lea 16(KEY), %rsi + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rsi // Do a round of AES, and start the GHASH update of 8 ciphertext blocks // by doing the unreduced multiplication for the first ciphertext block. @@ -869,7 +870,7 @@ // Encrypt the next counter block. _vpshufb BSWAP_MASK, LE_CTR, TMP0 paddd ONE, LE_CTR - pxor (KEY), TMP0 + pxor OFFSETOF_AESROUNDKEYS(KEY), TMP0 lea -6*16(RNDKEYLAST_PTR), %rsi // Reduce code size cmp $24, AESKEYLEN jl 128f // AES-128? @@ -926,8 +927,8 @@ // Encrypt a counter block for the last time. pshufb BSWAP_MASK, LE_CTR - pxor (KEY), LE_CTR - lea 16(KEY), %rsi + pxor OFFSETOF_AESROUNDKEYS(KEY), LE_CTR + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rsi 1: aesenc (%rsi), LE_CTR add $16, %rsi @@ -1038,12 +1039,12 @@ // Make %rax point to the 6th from last AES round key. (Using signed // byte offsets -7*16 through 6*16 decreases code size.) - lea (KEY,AESKEYLEN64,4), %rax + lea OFFSETOF_AESROUNDKEYS(KEY,AESKEYLEN64,4), %rax // AES-encrypt the counter block and also multiply GHASH_ACC by H^1. // Interleave the AES and GHASH instructions to improve performance. pshufb BSWAP_MASK, %xmm0 - pxor (KEY), %xmm0 + pxor OFFSETOF_AESROUNDKEYS(KEY), %xmm0 cmp $24, AESKEYLEN jl 128f // AES-128? je 192f // AES-192? diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S index 93c9504a488ff..9cc387957fa93 100644 --- a/arch/x86/crypto/aes-gcm-vaes-avx2.S +++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S @@ -122,8 +122,9 @@ .octa 2 // Offsets in struct aes_gcm_key_vaes_avx2 -#define OFFSETOF_AESKEYLEN 480 -#define OFFSETOF_H_POWERS 512 +#define OFFSETOF_AESKEYLEN 0 +#define OFFSETOF_AESROUNDKEYS 16 +#define OFFSETOF_H_POWERS 288 #define NUM_H_POWERS 8 #define OFFSETOFEND_H_POWERS (OFFSETOF_H_POWERS + (NUM_H_POWERS * 16)) #define OFFSETOF_H_POWERS_XORED OFFSETOFEND_H_POWERS @@ -240,9 +241,9 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2) // Encrypt an all-zeroes block to get the raw hash subkey. movl OFFSETOF_AESKEYLEN(KEY), %eax - lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR - vmovdqu (KEY), H_CUR_XMM // Zero-th round key XOR all-zeroes block - lea 16(KEY), %rax + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,%rax,4), RNDKEYLAST_PTR + vmovdqu OFFSETOF_AESROUNDKEYS(KEY), H_CUR_XMM + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax 1: vaesenc (%rax), H_CUR_XMM, H_CUR_XMM add $16, %rax @@ -635,7 +636,7 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2) // the last AES round. Clobbers %rax and TMP0. .macro _aesenc_loop vecs:vararg _ctr_begin \vecs - lea 16(KEY), %rax + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax .Laesenc_loop\@: vbroadcasti128 (%rax), TMP0 _vaesenc TMP0, \vecs @@ -768,8 +769,8 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2) // Make RNDKEYLAST_PTR point to the last AES round key. This is the // round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256 // respectively. Then load the zero-th and last round keys. - lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR - vbroadcasti128 (KEY), RNDKEY0 + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR + vbroadcasti128 OFFSETOF_AESROUNDKEYS(KEY), RNDKEY0 vbroadcasti128 (RNDKEYLAST_PTR), RNDKEYLAST // Finish initializing LE_CTR by adding 1 to the second block. @@ -1069,12 +1070,12 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2) .endif // Make %rax point to the last AES round key for the chosen AES variant. - lea 6*16(KEY,AESKEYLEN64,4), %rax + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), %rax // Start the AES encryption of the counter block by swapping the counter // block to big-endian and XOR-ing it with the zero-th AES round key. vpshufb BSWAP_MASK, LE_CTR, %xmm0 - vpxor (KEY), %xmm0, %xmm0 + vpxor OFFSETOF_AESROUNDKEYS(KEY), %xmm0, %xmm0 // Complete the AES encryption and multiply GHASH_ACC by H^1. // Interleave the AES and GHASH instructions to improve performance. diff --git a/arch/x86/crypto/aes-gcm-vaes-avx512.S b/arch/x86/crypto/aes-gcm-vaes-avx512.S index 06b71314d65cc..516747db46592 100644 --- a/arch/x86/crypto/aes-gcm-vaes-avx512.S +++ b/arch/x86/crypto/aes-gcm-vaes-avx512.S @@ -86,10 +86,13 @@ #define NUM_H_POWERS 16 // Offset to AES key length (in bytes) in the key struct -#define OFFSETOF_AESKEYLEN 480 +#define OFFSETOF_AESKEYLEN 0 + +// Offset to AES round keys in the key struct +#define OFFSETOF_AESROUNDKEYS 16 // Offset to start of hash key powers array in the key struct -#define OFFSETOF_H_POWERS 512 +#define OFFSETOF_H_POWERS 320 // Offset to end of hash key powers array in the key struct. // @@ -301,9 +304,9 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx512) // Encrypt an all-zeroes block to get the raw hash subkey. movl OFFSETOF_AESKEYLEN(KEY), %eax - lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR - vmovdqu (KEY), %xmm0 // Zero-th round key XOR all-zeroes block - add $16, KEY + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,%rax,4), RNDKEYLAST_PTR + vmovdqu OFFSETOF_AESROUNDKEYS(KEY), %xmm0 + add $OFFSETOF_AESROUNDKEYS+16, KEY 1: vaesenc (KEY), %xmm0, %xmm0 add $16, KEY @@ -790,8 +793,8 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512) // Make RNDKEYLAST_PTR point to the last AES round key. This is the // round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256 // respectively. Then load the zero-th and last round keys. - lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR - vbroadcasti32x4 (KEY), RNDKEY0 + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR + vbroadcasti32x4 OFFSETOF_AESROUNDKEYS(KEY), RNDKEY0 vbroadcasti32x4 (RNDKEYLAST_PTR), RNDKEYLAST // Finish initializing LE_CTR by adding [0, 1, ...] to its low words. @@ -834,7 +837,7 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512) // Encrypt the first 4 vectors of plaintext blocks. Leave the resulting // ciphertext in GHASHDATA[0-3] for GHASH. _ctr_begin_4x - lea 16(KEY), %rax + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax 1: vbroadcasti32x4 (%rax), RNDKEY _vaesenc_4x RNDKEY @@ -957,7 +960,7 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512) vpshufb BSWAP_MASK, LE_CTR, %zmm0 vpaddd LE_CTR_INC, LE_CTR, LE_CTR vpxord RNDKEY0, %zmm0, %zmm0 - lea 16(KEY), %rax + lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax 1: vbroadcasti32x4 (%rax), RNDKEY vaesenc RNDKEY, %zmm0, %zmm0 @@ -1087,12 +1090,12 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512) .endif // Make %rax point to the last AES round key for the chosen AES variant. - lea 6*16(KEY,AESKEYLEN64,4), %rax + lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), %rax // Start the AES encryption of the counter block by swapping the counter // block to big-endian and XOR-ing it with the zero-th AES round key. vpshufb BSWAP_MASK, LE_CTR, %xmm0 - vpxor (KEY), %xmm0, %xmm0 + vpxor OFFSETOF_AESROUNDKEYS(KEY), %xmm0, %xmm0 // Complete the AES encryption and multiply GHASH_ACC by H^1. // Interleave the AES and GHASH instructions to improve performance. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 453e0e890041b..e6c38d1d8a929 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -780,7 +780,7 @@ DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800); /* The common part of the x86_64 AES-GCM key struct */ struct aes_gcm_key { /* Expanded AES key and the AES key length in bytes */ - struct crypto_aes_ctx aes_key; + struct aes_enckey aes_key; /* RFC4106 nonce (used only by the rfc4106 algorithms) */ u32 rfc4106_nonce; @@ -789,11 +789,10 @@ struct aes_gcm_key { /* Key struct used by the AES-NI implementations of AES-GCM */ struct aes_gcm_key_aesni { /* - * Common part of the key. The assembly code requires 16-byte alignment - * for the round keys; we get this by them being located at the start of - * the struct and the whole struct being 16-byte aligned. + * Common part of the key. 16-byte alignment is required by the + * assembly code. */ - struct aes_gcm_key base; + struct aes_gcm_key base __aligned(16); /* * Powers of the hash key H^8 through H^1. These are 128-bit values. @@ -824,10 +823,9 @@ struct aes_gcm_key_aesni { struct aes_gcm_key_vaes_avx2 { /* * Common part of the key. The assembly code prefers 16-byte alignment - * for the round keys; we get this by them being located at the start of - * the struct and the whole struct being 32-byte aligned. + * for this. */ - struct aes_gcm_key base; + struct aes_gcm_key base __aligned(16); /* * Powers of the hash key H^8 through H^1. These are 128-bit values. @@ -854,10 +852,9 @@ struct aes_gcm_key_vaes_avx2 { struct aes_gcm_key_vaes_avx512 { /* * Common part of the key. The assembly code prefers 16-byte alignment - * for the round keys; we get this by them being located at the start of - * the struct and the whole struct being 64-byte aligned. + * for this. */ - struct aes_gcm_key base; + struct aes_gcm_key base __aligned(16); /* * Powers of the hash key H^16 through H^1. These are 128-bit values. @@ -1182,26 +1179,26 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, } /* The assembly code assumes the following offsets. */ - BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_enc) != 0); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_length) != 480); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) != 512); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) != 640); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_enc) != 0); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_length) != 480); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) != 512); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, padding) != 768); + static_assert(offsetof(struct aes_gcm_key_aesni, base.aes_key.len) == 0); + static_assert(offsetof(struct aes_gcm_key_aesni, base.aes_key.k.rndkeys) == 16); + static_assert(offsetof(struct aes_gcm_key_aesni, h_powers) == 272); + static_assert(offsetof(struct aes_gcm_key_aesni, h_powers_xored) == 400); + static_assert(offsetof(struct aes_gcm_key_aesni, h_times_x64) == 464); + static_assert(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.len) == 0); + static_assert(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.k.rndkeys) == 16); + static_assert(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) == 288); + static_assert(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) == 416); + static_assert(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.len) == 0); + static_assert(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.k.rndkeys) == 16); + static_assert(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) == 320); + static_assert(offsetof(struct aes_gcm_key_vaes_avx512, padding) == 576); + + err = aes_prepareenckey(&key->aes_key, raw_key, keylen); + if (err) + return err; if (likely(crypto_simd_usable())) { - err = aes_check_keylen(keylen); - if (err) - return err; kernel_fpu_begin(); - aesni_set_key(&key->aes_key, raw_key, keylen); aes_gcm_precompute(key, flags); kernel_fpu_end(); } else { @@ -1215,10 +1212,6 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, be128 h; int i; - err = aes_expandkey(&key->aes_key, raw_key, keylen); - if (err) - return err; - /* Encrypt the all-zeroes block to get the hash key H^1 */ aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1); -- 2.47.3