]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
lib/crypto: s390/sha3: Add optimized one-shot SHA-3 digest functions
authorEric Biggers <ebiggers@kernel.org>
Sun, 26 Oct 2025 05:50:29 +0000 (22:50 -0700)
committerEric Biggers <ebiggers@kernel.org>
Thu, 6 Nov 2025 04:30:41 +0000 (20:30 -0800)
Some z/Architecture processors can compute a SHA-3 digest in a single
instruction.  arch/s390/crypto/ already uses this capability to optimize
the SHA-3 crypto_shash algorithms.

Use this capability to implement the sha3_224(), sha3_256(), sha3_384(),
and sha3_512() library functions too.

SHA3-256 benchmark results provided by Harald Freudenberger
(https://lore.kernel.org/r/4188d18bfcc8a64941c5ebd8de10ede2@linux.ibm.com/)
on a z/Architecture machine with "facility 86" (MSA level 12):

    Length (bytes)    Before (MB/s)   After (MB/s)
    ==============    =============   ============
          16                212             225
          64                820             915
         256               1850            3350
        1024               5400            8300
        4096              11200           11300

Note: the original data from Harald was given in the form of a graph for
each length, showing the distribution of throughputs from 500 runs.  I
guesstimated the peak of each one.

Harald also reported that the generic SHA-3 code was at most 259 MB/s
(https://lore.kernel.org/r/c39f6b6c110def0095e5da5becc12085@linux.ibm.com/).
So as expected, the earlier commit that optimized sha3_absorb_blocks()
and sha3_keccakf() is the more important one; it optimized the Keccak
permutation which is the most performance-critical part of SHA-3.
Still, this additional commit does notably improve performance further
on some lengths.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Harald Freudenberger <freude@linux.ibm.com>
Link: https://lore.kernel.org/r/20251026055032.1413733-13-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
lib/crypto/s390/sha3.h

index 668e53da93d2cbf5744a3ef4f2879bff7a97aa99..85471404775a33229c085d0236f19d6a11e598db 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/cpufeature.h>
 
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3_init_optim);
 
 static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
                               size_t nblocks, size_t block_size)
@@ -60,6 +61,61 @@ static void sha3_keccakf(struct sha3_state *state)
        }
 }
 
+static inline bool s390_sha3(int func, const u8 *in, size_t in_len,
+                            u8 *out, size_t out_len)
+{
+       struct sha3_state state;
+
+       if (!static_branch_likely(&have_sha3))
+               return false;
+
+       if (static_branch_likely(&have_sha3_init_optim))
+               func |= CPACF_KLMD_NIP | CPACF_KLMD_DUFOP;
+       else
+               memset(&state, 0, sizeof(state));
+
+       cpacf_klmd(func, &state, in, in_len);
+
+       if (static_branch_likely(&have_sha3_init_optim))
+               kmsan_unpoison_memory(&state, out_len);
+
+       memcpy(out, &state, out_len);
+       memzero_explicit(&state, sizeof(state));
+       return true;
+}
+
+#define sha3_224_arch sha3_224_arch
+static bool sha3_224_arch(const u8 *in, size_t in_len,
+                         u8 out[SHA3_224_DIGEST_SIZE])
+{
+       return s390_sha3(CPACF_KLMD_SHA3_224, in, in_len,
+                        out, SHA3_224_DIGEST_SIZE);
+}
+
+#define sha3_256_arch sha3_256_arch
+static bool sha3_256_arch(const u8 *in, size_t in_len,
+                         u8 out[SHA3_256_DIGEST_SIZE])
+{
+       return s390_sha3(CPACF_KLMD_SHA3_256, in, in_len,
+                        out, SHA3_256_DIGEST_SIZE);
+}
+
+#define sha3_384_arch sha3_384_arch
+static bool sha3_384_arch(const u8 *in, size_t in_len,
+                         u8 out[SHA3_384_DIGEST_SIZE])
+{
+       return s390_sha3(CPACF_KLMD_SHA3_384, in, in_len,
+                        out, SHA3_384_DIGEST_SIZE);
+}
+
+#define sha3_512_arch sha3_512_arch
+static bool sha3_512_arch(const u8 *in, size_t in_len,
+                         u8 out[SHA3_512_DIGEST_SIZE])
+{
+       return s390_sha3(CPACF_KLMD_SHA3_512, in, in_len,
+                        out, SHA3_512_DIGEST_SIZE);
+}
+
 #define sha3_mod_init_arch sha3_mod_init_arch
 static void sha3_mod_init_arch(void)
 {
@@ -79,10 +135,17 @@ static void sha3_mod_init_arch(void)
        QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_256);
        QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_384);
        QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_512);
+       QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_224);
+       QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_256);
+       QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_384);
+       QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_512);
 #undef QUERY
 
-       if (num_present == num_possible)
+       if (num_present == num_possible) {
                static_branch_enable(&have_sha3);
-       else if (num_present != 0)
+               if (test_facility(86))
+                       static_branch_enable(&have_sha3_init_optim);
+       } else if (num_present != 0) {
                pr_warn("Unsupported combination of SHA-3 facilities\n");
+       }
 }