From a8c60a9aca778d7fd22d6c9b1af702d6f952b87f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 3 Jul 2025 19:39:57 -0700 Subject: [PATCH] lib/crypto: x86/sha256: Move static_call above kernel-mode FPU section As I did for sha512_blocks(), reorganize x86's sha256_blocks() to be just a static_call. To achieve that, for each assembly function add a C function that handles the kernel-mode FPU section and fallback. While this increases total code size slightly, the amount of code actually executed on a given system does not increase, and it is slightly more efficient since it eliminates the extra static_key. It also makes the assembly functions be called with standard direct calls instead of static calls, eliminating the need for ANNOTATE_NOENDBR. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250704023958.73274-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/x86/sha256-avx-asm.S | 3 -- lib/crypto/x86/sha256-avx2-asm.S | 3 -- lib/crypto/x86/sha256-ni-asm.S | 2 -- lib/crypto/x86/sha256-ssse3-asm.S | 3 -- lib/crypto/x86/sha256.h | 48 ++++++++++++++++--------------- 5 files changed, 25 insertions(+), 34 deletions(-) diff --git a/lib/crypto/x86/sha256-avx-asm.S b/lib/crypto/x86/sha256-avx-asm.S index 73bcff2b548f4..798a7f07fa013 100644 --- a/lib/crypto/x86/sha256-avx-asm.S +++ b/lib/crypto/x86/sha256-avx-asm.S @@ -48,7 +48,6 @@ ######################################################################## #include -#include ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -346,8 +345,6 @@ a = TMP_ ######################################################################## .text SYM_FUNC_START(sha256_transform_avx) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 diff --git a/lib/crypto/x86/sha256-avx2-asm.S b/lib/crypto/x86/sha256-avx2-asm.S index 45787570387f2..62a46993359e6 100644 --- a/lib/crypto/x86/sha256-avx2-asm.S +++ b/lib/crypto/x86/sha256-avx2-asm.S @@ -49,7 +49,6 @@ ######################################################################## #include -#include ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -523,8 +522,6 @@ STACK_SIZE = _CTX + _CTX_SIZE ######################################################################## .text SYM_FUNC_START(sha256_transform_rorx) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 diff --git a/lib/crypto/x86/sha256-ni-asm.S b/lib/crypto/x86/sha256-ni-asm.S index 4af7d22e29e47..9ebbacbb9c13b 100644 --- a/lib/crypto/x86/sha256-ni-asm.S +++ b/lib/crypto/x86/sha256-ni-asm.S @@ -54,7 +54,6 @@ */ #include -#include #define STATE_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ @@ -111,7 +110,6 @@ */ .text SYM_FUNC_START(sha256_ni_transform) - ANNOTATE_NOENDBR # since this is called only via static_call shl $6, NUM_BLKS /* convert to bytes */ jz .Ldone_hash diff --git a/lib/crypto/x86/sha256-ssse3-asm.S b/lib/crypto/x86/sha256-ssse3-asm.S index 407b30adcd37f..820fc8bbc29fd 100644 --- a/lib/crypto/x86/sha256-ssse3-asm.S +++ b/lib/crypto/x86/sha256-ssse3-asm.S @@ -47,7 +47,6 @@ ######################################################################## #include -#include ## assume buffers not aligned #define MOVDQ movdqu @@ -353,8 +352,6 @@ a = TMP_ ######################################################################## .text SYM_FUNC_START(sha256_transform_ssse3) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index 3b5456c222ba6..669bc06538b67 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -8,48 +8,50 @@ #include #include -asmlinkage void sha256_transform_ssse3(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_transform_avx(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_transform_rorx(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_ni_transform(struct sha256_block_state *state, - const u8 *data, size_t nblocks); +DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86); +#define DEFINE_X86_SHA256_FN(c_fn, asm_fn) \ + asmlinkage void asm_fn(struct sha256_block_state *state, \ + const u8 *data, size_t nblocks); \ + static void c_fn(struct sha256_block_state *state, const u8 *data, \ + size_t nblocks) \ + { \ + if (likely(crypto_simd_usable())) { \ + kernel_fpu_begin(); \ + asm_fn(state, data, nblocks); \ + kernel_fpu_end(); \ + } else { \ + sha256_blocks_generic(state, data, nblocks); \ + } \ + } -DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3); +DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3); +DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx); +DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx); +DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform); static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) { - kernel_fpu_begin(); - static_call(sha256_blocks_x86)(state, data, nblocks); - kernel_fpu_end(); - } else { - sha256_blocks_generic(state, data, nblocks); - } + static_call(sha256_blocks_x86)(state, data, nblocks); } #define sha256_mod_init_arch sha256_mod_init_arch static inline void sha256_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - static_call_update(sha256_blocks_x86, sha256_ni_transform); + static_call_update(sha256_blocks_x86, sha256_blocks_ni); } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) static_call_update(sha256_blocks_x86, - sha256_transform_rorx); + sha256_blocks_avx2); else static_call_update(sha256_blocks_x86, - sha256_transform_avx); - } else if (!boot_cpu_has(X86_FEATURE_SSSE3)) { - return; + sha256_blocks_avx); + } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { + static_call_update(sha256_blocks_x86, sha256_blocks_ssse3); } - static_branch_enable(&have_sha256_x86); } -- 2.47.2