lib/crypto: arm/blake2b: Migrate optimized code into library

author Eric Biggers <ebiggers@kernel.org>

Sat, 18 Oct 2025 04:31:03 +0000 (21:31 -0700)

committer Eric Biggers <ebiggers@kernel.org>

Thu, 30 Oct 2025 05:04:24 +0000 (22:04 -0700)
author Eric Biggers <ebiggers@kernel.org>
Sat, 18 Oct 2025 04:31:03 +0000 (21:31 -0700)
committer Eric Biggers <ebiggers@kernel.org>
Thu, 30 Oct 2025 05:04:24 +0000 (22:04 -0700)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig

index c436eec22d86ca8873a9dce2ba5847691457bce1..f30d743df26433474a6ab7bd62d38ef3c36c7046 100644 (file)
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -33,22 +33,6 @@ config CRYPTO_NHPOLY1305_NEON
           Architecture: arm using:
           - NEON (Advanced SIMD) extensions
  
-config CRYPTO_BLAKE2B_NEON
-       tristate "Hash functions: BLAKE2b (NEON)"
-       depends on KERNEL_MODE_NEON
-       select CRYPTO_BLAKE2B
-       help
-         BLAKE2b cryptographic hash function (RFC 7693)
-
-         Architecture: arm using
-         - NEON (Advanced SIMD) extensions
-
-         BLAKE2b digest algorithm optimized with ARM NEON instructions.
-         On ARM processors that have NEON support but not the ARMv8
-         Crypto Extensions, typically this BLAKE2b implementation is
-         much faster than the SHA-2 family and slightly faster than
-         SHA-1.
-
  config CRYPTO_AES_ARM
         tristate "Ciphers: AES"
         select CRYPTO_ALGAPI
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile

index 6346a73effc06a395ad87de161ea03523486c125..86dd43313dbfd16f39a87ed948117192189c5779 100644 (file)
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -5,7 +5,6 @@
  
  obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
  obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
-obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
  obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
  
  obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
@@ -13,7 +12,6 @@ obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
  
  aes-arm-y      := aes-cipher-core.o aes-cipher-glue.o
  aes-arm-bs-y   := aes-neonbs-core.o aes-neonbs-glue.o
-blake2b-neon-y  := blake2b-neon-core.o blake2b-neon-glue.o
  aes-arm-ce-y   := aes-ce-core.o aes-ce-glue.o
  ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
  nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c

deleted file mode 100644 (file)

index 2ff443a..0000000
--- a/arch/arm/crypto/blake2b-neon-glue.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * BLAKE2b digest algorithm, NEON accelerated
- *
- * Copyright 2020 Google LLC
- */
-
-#include <crypto/internal/blake2b.h>
-#include <crypto/internal/hash.h>
-
-#include <linux/module.h>
-#include <linux/sizes.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void blake2b_compress_neon(struct blake2b_state *state,
-                                     const u8 *block, size_t nblocks, u32 inc);
-
-static void blake2b_compress_arch(struct blake2b_state *state,
-                                 const u8 *block, size_t nblocks, u32 inc)
-{
-       do {
-               const size_t blocks = min_t(size_t, nblocks,
-                                           SZ_4K / BLAKE2B_BLOCK_SIZE);
-
-               kernel_neon_begin();
-               blake2b_compress_neon(state, block, blocks, inc);
-               kernel_neon_end();
-
-               nblocks -= blocks;
-               block += blocks * BLAKE2B_BLOCK_SIZE;
-       } while (nblocks);
-}
-
-static int crypto_blake2b_update_neon(struct shash_desc *desc,
-                                     const u8 *in, unsigned int inlen)
-{
-       return crypto_blake2b_update_bo(desc, in, inlen, blake2b_compress_arch);
-}
-
-static int crypto_blake2b_finup_neon(struct shash_desc *desc, const u8 *in,
-                                    unsigned int inlen, u8 *out)
-{
-       return crypto_blake2b_finup(desc, in, inlen, out,
-                                   blake2b_compress_arch);
-}
-
-#define BLAKE2B_ALG(name, driver_name, digest_size)                    \
-       {                                                               \
-               .base.cra_name          = name,                         \
-               .base.cra_driver_name   = driver_name,                  \
-               .base.cra_priority      = 200,                          \
-               .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY |     \
-                                         CRYPTO_AHASH_ALG_BLOCK_ONLY | \
-                                         CRYPTO_AHASH_ALG_FINAL_NONZERO, \
-               .base.cra_blocksize     = BLAKE2B_BLOCK_SIZE,           \
-               .base.cra_ctxsize       = sizeof(struct blake2b_tfm_ctx), \
-               .base.cra_module        = THIS_MODULE,                  \
-               .digestsize             = digest_size,                  \
-               .setkey                 = crypto_blake2b_setkey,        \
-               .init                   = crypto_blake2b_init,          \
-               .update                 = crypto_blake2b_update_neon,   \
-               .finup                  = crypto_blake2b_finup_neon,    \
-               .descsize               = sizeof(struct blake2b_state), \
-               .statesize              = BLAKE2B_STATE_SIZE,           \
-       }
-
-static struct shash_alg blake2b_neon_algs[] = {
-       BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE),
-       BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE),
-       BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE),
-       BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE),
-};
-
-static int __init blake2b_neon_mod_init(void)
-{
-       if (!(elf_hwcap & HWCAP_NEON))
-               return -ENODEV;
-
-       return crypto_register_shashes(blake2b_neon_algs,
-                                      ARRAY_SIZE(blake2b_neon_algs));
-}
-
-static void __exit blake2b_neon_mod_exit(void)
-{
-       crypto_unregister_shashes(blake2b_neon_algs,
-                                 ARRAY_SIZE(blake2b_neon_algs));
-}
-
-module_init(blake2b_neon_mod_init);
-module_exit(blake2b_neon_mod_exit);
-
-MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("blake2b-160");
-MODULE_ALIAS_CRYPTO("blake2b-160-neon");
-MODULE_ALIAS_CRYPTO("blake2b-256");
-MODULE_ALIAS_CRYPTO("blake2b-256-neon");
-MODULE_ALIAS_CRYPTO("blake2b-384");
-MODULE_ALIAS_CRYPTO("blake2b-384-neon");
-MODULE_ALIAS_CRYPTO("blake2b-512");
-MODULE_ALIAS_CRYPTO("blake2b-512-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig

index 918378b7e833801b2077273175165763124ba287..280b888153bf0a3cf0a5f777caf4154cf9645fb1 100644 (file)
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -37,6 +37,7 @@ config CRYPTO_LIB_BLAKE2B
  config CRYPTO_LIB_BLAKE2B_ARCH
         bool
         depends on CRYPTO_LIB_BLAKE2B && !UML
+       default y if ARM && KERNEL_MODE_NEON
  
  # BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option.
  
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile

index f863417b16817ab3436bf0d1d4838d01781b4b59..bc26777d08e977cca5bf5795ad58bfb7faa4b4a6 100644 (file)
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -36,6 +36,7 @@ libblake2b-y := blake2b.o
  CFLAGS_blake2b.o := -Wframe-larger-than=4096 #  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
  ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y)
  CFLAGS_blake2b.o += -I$(src)/$(SRCARCH)
+libblake2b-$(CONFIG_ARM) += arm/blake2b-neon-core.o
  endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
  
  ################################################################################
diff --git a/arch/arm/crypto/blake2b-neon-core.S b/lib/crypto/arm/blake2b-neon-core.S

similarity index 94%

rename from arch/arm/crypto/blake2b-neon-core.S

rename to lib/crypto/arm/blake2b-neon-core.S

index 0406a186377fb4a93f04568101b138050bece0c8..b55c37f0b88fb406337b99c0a620e6f76e83c36a 100644 (file)
--- a/arch/arm/crypto/blake2b-neon-core.S
+++ b/lib/crypto/arm/blake2b-neon-core.S
@@ -1,6 +1,9 @@
  /* SPDX-License-Identifier: GPL-2.0-or-later */
  /*
- * BLAKE2b digest algorithm, NEON accelerated
+ * BLAKE2b digest algorithm optimized with ARM NEON instructions.  On ARM
+ * processors that have NEON support but not the ARMv8 Crypto Extensions,
+ * typically this BLAKE2b implementation is much faster than the SHA-2 family
+ * and slightly faster than SHA-1.
   *
   * Copyright 2020 Google LLC
   *
@@ -13,8 +16,8 @@
         .fpu            neon
  
         // The arguments to blake2b_compress_neon()
-       STATE           .req    r0
-       BLOCK           .req    r1
+       CTX             .req    r0
+       DATA            .req    r1
         NBLOCKS         .req    r2
         INC             .req    r3
  
@@ -234,10 +237,10 @@
  .endm
  
  //
-// void blake2b_compress_neon(struct blake2b_state *state,
-//                           const u8 *block, size_t nblocks, u32 inc);
+// void blake2b_compress_neon(struct blake2b_ctx *ctx,
+//                           const u8 *data, size_t nblocks, u32 inc);
  //
-// Only the first three fields of struct blake2b_state are used:
+// Only the first three fields of struct blake2b_ctx are used:
  //     u64 h[8];       (inout)
  //     u64 t[2];       (inout)
  //     u64 f[2];       (in)
@@ -255,7 +258,7 @@ ENTRY(blake2b_compress_neon)
         adr             ROR24_TABLE, .Lror24_table
         adr             ROR16_TABLE, .Lror16_table
  
-       mov             ip, STATE
+       mov             ip, CTX
         vld1.64         {q0-q1}, [ip]!          // Load h[0..3]
         vld1.64         {q2-q3}, [ip]!          // Load h[4..7]
  .Lnext_block:
@@ -281,14 +284,14 @@ ENTRY(blake2b_compress_neon)
         // (q8-q9) in an aligned buffer on the stack so that they can be
         // reloaded when needed.  (We could just reload directly from the
         // message buffer, but it's faster to use aligned loads.)
-       vld1.8          {q8-q9}, [BLOCK]!
+       vld1.8          {q8-q9}, [DATA]!
           veor          q6, q6, q14     // v[12..13] = IV[4..5] ^ t[0..1]
-       vld1.8          {q10-q11}, [BLOCK]!
+       vld1.8          {q10-q11}, [DATA]!
           veor          q7, q7, q15     // v[14..15] = IV[6..7] ^ f[0..1]
-       vld1.8          {q12-q13}, [BLOCK]!
+       vld1.8          {q12-q13}, [DATA]!
         vst1.8          {q8-q9}, [sp, :256]
-         mov           ip, STATE
-       vld1.8          {q14-q15}, [BLOCK]!
+         mov           ip, CTX
+       vld1.8          {q14-q15}, [DATA]!
  
         // Execute the rounds.  Each round is provided the order in which it
         // needs to use the message words.
@@ -319,7 +322,7 @@ ENTRY(blake2b_compress_neon)
         veor            q3, q3, q7              // v[6..7] ^= v[14..15]
         veor            q0, q0, q8              // v[0..1] ^= h[0..1]
         veor            q1, q1, q9              // v[2..3] ^= h[2..3]
-         mov           ip, STATE
+         mov           ip, CTX
           subs          NBLOCKS, NBLOCKS, #1    // nblocks--
           vst1.64       {q0-q1}, [ip]!          // Store new h[0..3]
         veor            q2, q2, q10             // v[4..5] ^= h[4..5]
diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h

new file mode 100644 (file)

index 0000000..1b9154d
--- /dev/null
+++ b/lib/crypto/arm/blake2b.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * BLAKE2b digest algorithm, NEON accelerated
+ *
+ * Copyright 2020 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+asmlinkage void blake2b_compress_neon(struct blake2b_ctx *ctx,
+                                     const u8 *data, size_t nblocks, u32 inc);
+
+static void blake2b_compress(struct blake2b_ctx *ctx,
+                            const u8 *data, size_t nblocks, u32 inc)
+{
+       if (!static_branch_likely(&have_neon) || !may_use_simd()) {
+               blake2b_compress_generic(ctx, data, nblocks, inc);
+               return;
+       }
+       do {
+               const size_t blocks = min_t(size_t, nblocks,
+                                           SZ_4K / BLAKE2B_BLOCK_SIZE);
+
+               kernel_neon_begin();
+               blake2b_compress_neon(ctx, data, blocks, inc);
+               kernel_neon_end();
+
+               data += blocks * BLAKE2B_BLOCK_SIZE;
+               nblocks -= blocks;
+       } while (nblocks);
+}
+
+#define blake2b_mod_init_arch blake2b_mod_init_arch
+static void blake2b_mod_init_arch(void)
+{
+       if (elf_hwcap & HWCAP_NEON)
+               static_branch_enable(&have_neon);
+}
author	Eric Biggers <ebiggers@kernel.org>
	Sat, 18 Oct 2025 04:31:03 +0000 (21:31 -0700)
committer	Eric Biggers <ebiggers@kernel.org>
	Thu, 30 Oct 2025 05:04:24 +0000 (22:04 -0700)
arch/arm/crypto/Kconfig		patch \| blob \| blame \| history
arch/arm/crypto/Makefile		patch \| blob \| blame \| history
arch/arm/crypto/blake2b-neon-glue.c	[deleted file]	patch \| blob \| blame \| history
lib/crypto/Kconfig		patch \| blob \| blame \| history
lib/crypto/Makefile		patch \| blob \| blame \| history
lib/crypto/arm/blake2b-neon-core.S	[moved from arch/arm/crypto/blake2b-neon-core.S with 94% similarity]	patch \| blob \| blame \| history
lib/crypto/arm/blake2b.h	[new file with mode: 0644]	patch \| blob