From 862445d3b9e74f58360a7a89787da4dca783e6dd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Oct 2025 22:50:29 -0700 Subject: [PATCH] lib/crypto: s390/sha3: Add optimized one-shot SHA-3 digest functions Some z/Architecture processors can compute a SHA-3 digest in a single instruction. arch/s390/crypto/ already uses this capability to optimize the SHA-3 crypto_shash algorithms. Use this capability to implement the sha3_224(), sha3_256(), sha3_384(), and sha3_512() library functions too. SHA3-256 benchmark results provided by Harald Freudenberger (https://lore.kernel.org/r/4188d18bfcc8a64941c5ebd8de10ede2@linux.ibm.com/) on a z/Architecture machine with "facility 86" (MSA level 12): Length (bytes) Before (MB/s) After (MB/s) ============== ============= ============ 16 212 225 64 820 915 256 1850 3350 1024 5400 8300 4096 11200 11300 Note: the original data from Harald was given in the form of a graph for each length, showing the distribution of throughputs from 500 runs. I guesstimated the peak of each one. Harald also reported that the generic SHA-3 code was at most 259 MB/s (https://lore.kernel.org/r/c39f6b6c110def0095e5da5becc12085@linux.ibm.com/). So as expected, the earlier commit that optimized sha3_absorb_blocks() and sha3_keccakf() is the more important one; it optimized the Keccak permutation which is the most performance-critical part of SHA-3. Still, this additional commit does notably improve performance further on some lengths. Reviewed-by: Ard Biesheuvel Tested-by: Harald Freudenberger Link: https://lore.kernel.org/r/20251026055032.1413733-13-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/s390/sha3.h | 67 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/lib/crypto/s390/sha3.h b/lib/crypto/s390/sha3.h index 668e53da93d2c..85471404775a3 100644 --- a/lib/crypto/s390/sha3.h +++ b/lib/crypto/s390/sha3.h @@ -8,6 +8,7 @@ #include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3_init_optim); static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data, size_t nblocks, size_t block_size) @@ -60,6 +61,61 @@ static void sha3_keccakf(struct sha3_state *state) } } +static inline bool s390_sha3(int func, const u8 *in, size_t in_len, + u8 *out, size_t out_len) +{ + struct sha3_state state; + + if (!static_branch_likely(&have_sha3)) + return false; + + if (static_branch_likely(&have_sha3_init_optim)) + func |= CPACF_KLMD_NIP | CPACF_KLMD_DUFOP; + else + memset(&state, 0, sizeof(state)); + + cpacf_klmd(func, &state, in, in_len); + + if (static_branch_likely(&have_sha3_init_optim)) + kmsan_unpoison_memory(&state, out_len); + + memcpy(out, &state, out_len); + memzero_explicit(&state, sizeof(state)); + return true; +} + +#define sha3_224_arch sha3_224_arch +static bool sha3_224_arch(const u8 *in, size_t in_len, + u8 out[SHA3_224_DIGEST_SIZE]) +{ + return s390_sha3(CPACF_KLMD_SHA3_224, in, in_len, + out, SHA3_224_DIGEST_SIZE); +} + +#define sha3_256_arch sha3_256_arch +static bool sha3_256_arch(const u8 *in, size_t in_len, + u8 out[SHA3_256_DIGEST_SIZE]) +{ + return s390_sha3(CPACF_KLMD_SHA3_256, in, in_len, + out, SHA3_256_DIGEST_SIZE); +} + +#define sha3_384_arch sha3_384_arch +static bool sha3_384_arch(const u8 *in, size_t in_len, + u8 out[SHA3_384_DIGEST_SIZE]) +{ + return s390_sha3(CPACF_KLMD_SHA3_384, in, in_len, + out, SHA3_384_DIGEST_SIZE); +} + +#define sha3_512_arch sha3_512_arch +static bool sha3_512_arch(const u8 *in, size_t in_len, + u8 out[SHA3_512_DIGEST_SIZE]) +{ + return s390_sha3(CPACF_KLMD_SHA3_512, in, in_len, + out, SHA3_512_DIGEST_SIZE); +} + #define sha3_mod_init_arch sha3_mod_init_arch static void sha3_mod_init_arch(void) { @@ -79,10 +135,17 @@ static void sha3_mod_init_arch(void) QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_256); QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_384); QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_512); + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_224); + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_256); + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_384); + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_512); #undef QUERY - if (num_present == num_possible) + if (num_present == num_possible) { static_branch_enable(&have_sha3); - else if (num_present != 0) + if (test_facility(86)) + static_branch_enable(&have_sha3_init_optim); + } else if (num_present != 0) { pr_warn("Unsupported combination of SHA-3 facilities\n"); + } } -- 2.47.3