]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
lib/crypto: arm64/sha1: Migrate optimized code into library
authorEric Biggers <ebiggers@kernel.org>
Sat, 12 Jul 2025 23:22:59 +0000 (16:22 -0700)
committerEric Biggers <ebiggers@kernel.org>
Mon, 14 Jul 2025 18:11:48 +0000 (11:11 -0700)
Instead of exposing the arm64-optimized SHA-1 code via arm64-specific
crypto_shash algorithms, instead just implement the sha1_blocks()
library function.  This is much simpler, it makes the SHA-1 library
functions be arm64-optimized, and it fixes the longstanding issue where
the arm64-optimized SHA-1 code was disabled by default.  SHA-1 still
remains available through crypto_shash, but individual architectures no
longer need to handle it.

Remove support for SHA-1 finalization from assembly code, since the
library does not yet support architecture-specific overrides of the
finalization.  (Support for that has been omitted for now, for
simplicity and because usually it isn't performance-critical.)

To match sha1_blocks(), change the type of the nblocks parameter and the
return value of __sha1_ce_transform() from int to size_t.  Update the
assembly code accordingly.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250712232329.818226-9-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
arch/arm64/configs/defconfig
arch/arm64/crypto/Kconfig
arch/arm64/crypto/Makefile
arch/arm64/crypto/sha1-ce-glue.c [deleted file]
lib/crypto/Kconfig
lib/crypto/Makefile
lib/crypto/arm64/sha1-ce-core.S [moved from arch/arm64/crypto/sha1-ce-core.S with 76% similarity]
lib/crypto/arm64/sha1.h [new file with mode: 0644]

index b612b78b3b09165bef597c8ddeedffae30ba916f..31681206b49cf9214a6a65fe762b91042762fea7 100644 (file)
@@ -1743,7 +1743,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
-CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA3_ARM64=m
 CONFIG_CRYPTO_SM3_ARM64_CE=m
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
index a9ead99f72c28580f51ed199bc4654ef66f7ea97..3bb5b513d5ae2ccc5affc9267353e043491153fe 100644 (file)
@@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON
          Architecture: arm64 using:
          - NEON (Advanced SIMD) extensions
 
-config CRYPTO_SHA1_ARM64_CE
-       tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)"
-       depends on KERNEL_MODE_NEON
-       select CRYPTO_HASH
-       select CRYPTO_SHA1
-       help
-         SHA-1 secure hash algorithm (FIPS 180)
-
-         Architecture: arm64 using:
-         - ARMv8 Crypto Extensions
-
 config CRYPTO_SHA3_ARM64
        tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
        depends on KERNEL_MODE_NEON
index 228101f125d505ea17ba5d8c412630a73c6cd00b..a8b2cdbe202c16befaf1ebd8a97406fea0cf9a34 100644 (file)
@@ -5,9 +5,6 @@
 # Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
 #
 
-obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
-sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
-
 obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
 sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
 
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
deleted file mode 100644 (file)
index 65b6980..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/sha1.h>
-#include <crypto/sha1_base.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("sha1");
-
-struct sha1_ce_state {
-       struct sha1_state       sst;
-       u32                     finalize;
-};
-
-extern const u32 sha1_ce_offsetof_count;
-extern const u32 sha1_ce_offsetof_finalize;
-
-asmlinkage int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
-                                  int blocks);
-
-static void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
-                             int blocks)
-{
-       while (blocks) {
-               int rem;
-
-               kernel_neon_begin();
-               rem = __sha1_ce_transform(container_of(sst,
-                                                      struct sha1_ce_state,
-                                                      sst), src, blocks);
-               kernel_neon_end();
-               src += (blocks - rem) * SHA1_BLOCK_SIZE;
-               blocks = rem;
-       }
-}
-
-const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
-const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
-
-static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
-                         unsigned int len)
-{
-       struct sha1_ce_state *sctx = shash_desc_ctx(desc);
-
-       sctx->finalize = 0;
-       return sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
-}
-
-static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
-                        unsigned int len, u8 *out)
-{
-       struct sha1_ce_state *sctx = shash_desc_ctx(desc);
-       bool finalized = false;
-
-       /*
-        * Allow the asm code to perform the finalization if there is no
-        * partial data and the input is a round multiple of the block size.
-        */
-       if (len >= SHA1_BLOCK_SIZE) {
-               unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE);
-
-               finalized = !remain;
-               sctx->finalize = finalized;
-               sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
-               data += len - remain;
-               len = remain;
-       }
-       if (!finalized) {
-               sctx->finalize = 0;
-               sha1_base_do_finup(desc, data, len, sha1_ce_transform);
-       }
-       return sha1_base_finish(desc, out);
-}
-
-static struct shash_alg alg = {
-       .init                   = sha1_base_init,
-       .update                 = sha1_ce_update,
-       .finup                  = sha1_ce_finup,
-       .descsize               = sizeof(struct sha1_ce_state),
-       .statesize              = SHA1_STATE_SIZE,
-       .digestsize             = SHA1_DIGEST_SIZE,
-       .base                   = {
-               .cra_name               = "sha1",
-               .cra_driver_name        = "sha1-ce",
-               .cra_priority           = 200,
-               .cra_flags              = CRYPTO_AHASH_ALG_BLOCK_ONLY |
-                                         CRYPTO_AHASH_ALG_FINUP_MAX,
-               .cra_blocksize          = SHA1_BLOCK_SIZE,
-               .cra_module             = THIS_MODULE,
-       }
-};
-
-static int __init sha1_ce_mod_init(void)
-{
-       return crypto_register_shash(&alg);
-}
-
-static void __exit sha1_ce_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_cpu_feature_match(SHA1, sha1_ce_mod_init);
-module_exit(sha1_ce_mod_fini);
index 519c5d6a050fddff2941b83202f8f99b0d55b1e1..05cce143af31473e1ac64531fe1cb1168dff7925 100644 (file)
@@ -147,6 +147,7 @@ config CRYPTO_LIB_SHA1_ARCH
        bool
        depends on CRYPTO_LIB_SHA1 && !UML
        default y if ARM
+       default y if ARM64 && KERNEL_MODE_NEON
 
 config CRYPTO_LIB_SHA256
        tristate
index 699a421339271cf926b3b646df116a67a4691d98..1da13c9e2f711c4af74b3904767da2706860822d 100644 (file)
@@ -76,6 +76,7 @@ libsha1-y += arm/sha1-armv4-large.o
 libsha1-$(CONFIG_KERNEL_MODE_NEON) += arm/sha1-armv7-neon.o \
                                      arm/sha1-ce-core.o
 endif
+libsha1-$(CONFIG_ARM64) += arm64/sha1-ce-core.o
 endif # CONFIG_CRYPTO_LIB_SHA1_ARCH
 
 ################################################################################
similarity index 76%
rename from arch/arm64/crypto/sha1-ce-core.S
rename to lib/crypto/arm64/sha1-ce-core.S
index 9b1f2d82a6feae09aa7bd4b0f25a82918d515db5..21efbbafd7d62cf31891c2003d9f2a43c947765f 100644 (file)
@@ -62,8 +62,8 @@
        .endm
 
        /*
-        * int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
-        *                         int blocks)
+        * size_t __sha1_ce_transform(struct sha1_block_state *state,
+        *                            const u8 *data, size_t nblocks);
         */
 SYM_FUNC_START(__sha1_ce_transform)
        /* load round constants */
@@ -76,20 +76,16 @@ SYM_FUNC_START(__sha1_ce_transform)
        ld1             {dgav.4s}, [x0]
        ldr             dgb, [x0, #16]
 
-       /* load sha1_ce_state::finalize */
-       ldr_l           w4, sha1_ce_offsetof_finalize, x4
-       ldr             w4, [x0, x4]
-
        /* load input */
 0:     ld1             {v8.4s-v11.4s}, [x1], #64
-       sub             w2, w2, #1
+       sub             x2, x2, #1
 
 CPU_LE(        rev32           v8.16b, v8.16b          )
 CPU_LE(        rev32           v9.16b, v9.16b          )
 CPU_LE(        rev32           v10.16b, v10.16b        )
 CPU_LE(        rev32           v11.16b, v11.16b        )
 
-1:     add             t0.4s, v8.4s, k0.4s
+       add             t0.4s, v8.4s, k0.4s
        mov             dg0v.16b, dgav.16b
 
        add_update      c, ev, k0,  8,  9, 10, 11, dgb
@@ -120,31 +116,15 @@ CPU_LE(   rev32           v11.16b, v11.16b        )
        add             dgbv.2s, dgbv.2s, dg1v.2s
        add             dgav.4s, dgav.4s, dg0v.4s
 
-       cbz             w2, 2f
-       cond_yield      3f, x5, x6
-       b               0b
+       /* return early if voluntary preemption is needed */
+       cond_yield      1f, x5, x6
 
-       /*
-        * Final block: add padding and total bit count.
-        * Skip if the input size was not a round multiple of the block size,
-        * the padding is handled by the C code in that case.
-        */
-2:     cbz             x4, 3f
-       ldr_l           w4, sha1_ce_offsetof_count, x4
-       ldr             x4, [x0, x4]
-       movi            v9.2d, #0
-       mov             x8, #0x80000000
-       movi            v10.2d, #0
-       ror             x7, x4, #29             // ror(lsl(x4, 3), 32)
-       fmov            d8, x8
-       mov             x4, #0
-       mov             v11.d[0], xzr
-       mov             v11.d[1], x7
-       b               1b
+       /* handled all input blocks? */
+       cbnz            x2, 0b
 
        /* store new state */
-3:     st1             {dgav.4s}, [x0]
+1:     st1             {dgav.4s}, [x0]
        str             dgb, [x0, #16]
-       mov             w0, w2
+       mov             x0, x2
        ret
 SYM_FUNC_END(__sha1_ce_transform)
diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h
new file mode 100644 (file)
index 0000000..f822563
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SHA-1 optimized for ARM64
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <linux/cpufeature.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
+
+asmlinkage size_t __sha1_ce_transform(struct sha1_block_state *state,
+                                     const u8 *data, size_t nblocks);
+
+static void sha1_blocks(struct sha1_block_state *state,
+                       const u8 *data, size_t nblocks)
+{
+       if (static_branch_likely(&have_ce) && likely(may_use_simd())) {
+               do {
+                       size_t rem;
+
+                       kernel_neon_begin();
+                       rem = __sha1_ce_transform(state, data, nblocks);
+                       kernel_neon_end();
+                       data += (nblocks - rem) * SHA1_BLOCK_SIZE;
+                       nblocks = rem;
+               } while (nblocks);
+       } else {
+               sha1_blocks_generic(state, data, nblocks);
+       }
+}
+
+#define sha1_mod_init_arch sha1_mod_init_arch
+static inline void sha1_mod_init_arch(void)
+{
+       if (cpu_have_named_feature(SHA1))
+               static_branch_enable(&have_ce);
+}