]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
lib/crypto: arm64/ghash: Migrate optimized code into library
authorEric Biggers <ebiggers@kernel.org>
Thu, 19 Mar 2026 06:17:10 +0000 (23:17 -0700)
committerEric Biggers <ebiggers@kernel.org>
Mon, 23 Mar 2026 23:44:29 +0000 (16:44 -0700)
Remove the "ghash-neon" crypto_shash algorithm.  Move the corresponding
assembly code into lib/crypto/, and wire it up to the GHASH library.

This makes the GHASH library be optimized on arm64 (though only with
NEON, not PMULL; for now the goal is just parity with crypto_shash).  It
greatly reduces the amount of arm64-specific glue code that is needed,
and it fixes the issue where this optimization was disabled by default.

To integrate the assembly code correctly with the library, make the
following tweaks:

- Change the type of 'blocks' from int to size_t
- Change the types of 'dg' and 'h' to polyval_elem.  Note that this
  simply reflects the format that the code was already using.
- Remove the 'head' argument, which is no longer needed.
- Remove the CFI stubs, as indirect calls are no longer used.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260319061723.1140720-10-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
arch/arm64/crypto/Kconfig
arch/arm64/crypto/Makefile
arch/arm64/crypto/ghash-ce-core.S
arch/arm64/crypto/ghash-ce-glue.c
lib/crypto/Makefile
lib/crypto/arm64/gf128hash.h
lib/crypto/arm64/ghash-neon-core.S [moved from arch/arm64/crypto/ghash-neon-core.S with 93% similarity]

index 82794afaffc9de8996459996a1bb3355f7c9f86b..1a0c553fbfd75e77f6035e5b205c701092749554 100644 (file)
@@ -3,14 +3,13 @@
 menu "Accelerated Cryptographic Algorithms for CPU (arm64)"
 
 config CRYPTO_GHASH_ARM64_CE
-       tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)"
+       tristate "AEAD cipher: AES in GCM mode (ARMv8 Crypto Extensions)"
        depends on KERNEL_MODE_NEON
-       select CRYPTO_HASH
        select CRYPTO_LIB_AES
        select CRYPTO_LIB_GF128MUL
        select CRYPTO_AEAD
        help
-         GCM GHASH function (NIST SP800-38D)
+         AEAD cipher: AES-GCM
 
          Architecture: arm64 using:
          - ARMv8 Crypto Extensions
index b7ba43ce8584c3c9de0dcc033957f07593b16519..8a8e3e551ed332056abc1c67a4be405ad7a5dd3c 100644 (file)
@@ -27,7 +27,7 @@ obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
 sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
 
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
-ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o ghash-neon-core.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
 aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
index 4344fe213d14c7af971d730f6be9a2f7cc43d7a9..a01f136f4fb240084067c3ebca73375f4995ed7e 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/cfi_types.h>
 #include <asm/assembler.h>
 
        SHASH           .req    v0
@@ -67,7 +66,7 @@
         * void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
         *                             u64 const h[][2], const char *head)
         */
-SYM_TYPED_FUNC_START(pmull_ghash_update_p64)
+SYM_FUNC_START(pmull_ghash_update_p64)
        ld1             {SHASH.2d}, [x3]
        ld1             {XL.2d}, [x1]
 
index 63bb9e06225112d53fa7414a832c6032c2147ecb..590054c3260d9ff14b1c3cc3ab4cb84ec4f34d59 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ * AES-GCM using ARMv8 Crypto Extensions
  *
  * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
  */
@@ -11,7 +11,6 @@
 #include <crypto/ghash.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/aead.h>
-#include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 
 #include <asm/simd.h>
 
-MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("ghash");
+MODULE_ALIAS_CRYPTO("gcm(aes)");
+MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))");
 
 #define RFC4106_NONCE_SIZE     4
 
@@ -35,10 +35,6 @@ struct ghash_key {
        u64                     h[][2];
 };
 
-struct arm_ghash_desc_ctx {
-       u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
-};
-
 struct gcm_aes_ctx {
        struct aes_enckey       aes_key;
        u8                      nonce[RFC4106_NONCE_SIZE];
@@ -48,9 +44,6 @@ struct gcm_aes_ctx {
 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
                                       u64 const h[][2], const char *head);
 
-asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
-                                     u64 const h[][2], const char *head);
-
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
                                  u64 const h[][2], u64 dg[], u8 ctr[],
                                  u32 const rk[], int rounds, u8 tag[]);
@@ -59,85 +52,11 @@ asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
                                 u32 const rk[], int rounds, const u8 l[],
                                 const u8 tag[], u64 authsize);
 
-static int ghash_init(struct shash_desc *desc)
-{
-       struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       *ctx = (struct arm_ghash_desc_ctx){};
-       return 0;
-}
-
-static __always_inline
-void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
-                         struct ghash_key *key, const char *head,
-                         void (*simd_update)(int blocks, u64 dg[],
-                                             const char *src,
-                                             u64 const h[][2],
-                                             const char *head))
+static void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
+                                struct ghash_key *key, const char *head)
 {
        scoped_ksimd()
-               simd_update(blocks, dg, src, key->h, head);
-}
-
-/* avoid hogging the CPU for too long */
-#define MAX_BLOCKS     (SZ_64K / GHASH_BLOCK_SIZE)
-
-static int ghash_update(struct shash_desc *desc, const u8 *src,
-                       unsigned int len)
-{
-       struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-       struct ghash_key *key = crypto_shash_ctx(desc->tfm);
-       int blocks;
-
-       blocks = len / GHASH_BLOCK_SIZE;
-       len -= blocks * GHASH_BLOCK_SIZE;
-
-       do {
-               int chunk = min(blocks, MAX_BLOCKS);
-
-               ghash_do_simd_update(chunk, ctx->digest, src, key, NULL,
-                                    pmull_ghash_update_p8);
-               blocks -= chunk;
-               src += chunk * GHASH_BLOCK_SIZE;
-       } while (unlikely(blocks > 0));
-       return len;
-}
-
-static int ghash_export(struct shash_desc *desc, void *out)
-{
-       struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-       u8 *dst = out;
-
-       put_unaligned_be64(ctx->digest[1], dst);
-       put_unaligned_be64(ctx->digest[0], dst + 8);
-       return 0;
-}
-
-static int ghash_import(struct shash_desc *desc, const void *in)
-{
-       struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-       const u8 *src = in;
-
-       ctx->digest[1] = get_unaligned_be64(src);
-       ctx->digest[0] = get_unaligned_be64(src + 8);
-       return 0;
-}
-
-static int ghash_finup(struct shash_desc *desc, const u8 *src,
-                      unsigned int len, u8 *dst)
-{
-       struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-       struct ghash_key *key = crypto_shash_ctx(desc->tfm);
-
-       if (len) {
-               u8 buf[GHASH_BLOCK_SIZE] = {};
-
-               memcpy(buf, src, len);
-               ghash_do_simd_update(1, ctx->digest, buf, key, NULL,
-                                    pmull_ghash_update_p8);
-               memzero_explicit(buf, sizeof(buf));
-       }
-       return ghash_export(desc, dst);
+               pmull_ghash_update_p64(blocks, dg, src, key->h, head);
 }
 
 static void ghash_reflect(u64 h[], const be128 *k)
@@ -151,41 +70,6 @@ static void ghash_reflect(u64 h[], const be128 *k)
                h[1] ^= 0xc200000000000000UL;
 }
 
-static int ghash_setkey(struct crypto_shash *tfm,
-                       const u8 *inkey, unsigned int keylen)
-{
-       struct ghash_key *key = crypto_shash_ctx(tfm);
-
-       if (keylen != GHASH_BLOCK_SIZE)
-               return -EINVAL;
-
-       /* needed for the fallback */
-       memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-
-       ghash_reflect(key->h[0], &key->k);
-       return 0;
-}
-
-static struct shash_alg ghash_alg = {
-       .base.cra_name          = "ghash",
-       .base.cra_driver_name   = "ghash-neon",
-       .base.cra_priority      = 150,
-       .base.cra_flags         = CRYPTO_AHASH_ALG_BLOCK_ONLY,
-       .base.cra_blocksize     = GHASH_BLOCK_SIZE,
-       .base.cra_ctxsize       = sizeof(struct ghash_key) + sizeof(u64[2]),
-       .base.cra_module        = THIS_MODULE,
-
-       .digestsize             = GHASH_DIGEST_SIZE,
-       .init                   = ghash_init,
-       .update                 = ghash_update,
-       .finup                  = ghash_finup,
-       .setkey                 = ghash_setkey,
-       .export                 = ghash_export,
-       .import                 = ghash_import,
-       .descsize               = sizeof(struct arm_ghash_desc_ctx),
-       .statesize              = sizeof(struct ghash_desc_ctx),
-};
-
 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey,
                          unsigned int keylen)
 {
@@ -240,9 +124,7 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
                int blocks = count / GHASH_BLOCK_SIZE;
 
                ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key,
-                                    *buf_count ? buf : NULL,
-                                    pmull_ghash_update_p64);
-
+                                    *buf_count ? buf : NULL);
                src += blocks * GHASH_BLOCK_SIZE;
                count %= GHASH_BLOCK_SIZE;
                *buf_count = 0;
@@ -275,8 +157,7 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len)
 
        if (buf_count) {
                memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
-               ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL,
-                                    pmull_ghash_update_p64);
+               ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL);
        }
 }
 
@@ -505,22 +386,15 @@ static struct aead_alg gcm_aes_algs[] = {{
 
 static int __init ghash_ce_mod_init(void)
 {
-       if (!cpu_have_named_feature(ASIMD))
+       if (!cpu_have_named_feature(ASIMD) || !cpu_have_named_feature(PMULL))
                return -ENODEV;
 
-       if (cpu_have_named_feature(PMULL))
-               return crypto_register_aeads(gcm_aes_algs,
-                                            ARRAY_SIZE(gcm_aes_algs));
-
-       return crypto_register_shash(&ghash_alg);
+       return crypto_register_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs));
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
-       if (cpu_have_named_feature(PMULL))
-               crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs));
-       else
-               crypto_unregister_shash(&ghash_alg);
+       crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs));
 }
 
 static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = {
index 8a06dd6a43eaed485d6f0338cf5c76b89134c9db..4ce0bac8fd93704de8ba73996a22cb6eb54bccb7 100644 (file)
@@ -159,7 +159,8 @@ libgf128hash-y := gf128hash.o
 ifeq ($(CONFIG_CRYPTO_LIB_GF128HASH_ARCH),y)
 CFLAGS_gf128hash.o += -I$(src)/$(SRCARCH)
 libgf128hash-$(CONFIG_ARM) += arm/ghash-neon-core.o
-libgf128hash-$(CONFIG_ARM64) += arm64/polyval-ce-core.o
+libgf128hash-$(CONFIG_ARM64) += arm64/ghash-neon-core.o \
+                               arm64/polyval-ce-core.o
 libgf128hash-$(CONFIG_X86) += x86/polyval-pclmul-avx.o
 endif
 
index 796c36804dda4271e0713874738f8cfac6fd2c27..b2c85585b75837d9381ea4f4b99214fe68897549 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
- * POLYVAL library functions, arm64 optimized
+ * GHASH and POLYVAL, arm64 optimized
  *
  * Copyright 2025 Google LLC
  */
@@ -9,8 +9,12 @@
 
 #define NUM_H_POWERS 8
 
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
 
+asmlinkage void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg,
+                                     const u8 *src,
+                                     const struct polyval_elem *h);
 asmlinkage void polyval_mul_pmull(struct polyval_elem *a,
                                  const struct polyval_elem *b);
 asmlinkage void polyval_blocks_pmull(struct polyval_elem *acc,
@@ -41,15 +45,62 @@ static void polyval_preparekey_arch(struct polyval_key *key,
        }
 }
 
+static void polyval_mul_arm64(struct polyval_elem *a,
+                             const struct polyval_elem *b)
+{
+       if (static_branch_likely(&have_asimd) && may_use_simd()) {
+               static const u8 zeroes[GHASH_BLOCK_SIZE];
+
+               scoped_ksimd() {
+                       if (static_branch_likely(&have_pmull)) {
+                               polyval_mul_pmull(a, b);
+                       } else {
+                               /*
+                                * Note that this is indeed equivalent to a
+                                * POLYVAL multiplication, since it takes the
+                                * accumulator and key in POLYVAL format, and
+                                * byte-swapping a block of zeroes is a no-op.
+                                */
+                               pmull_ghash_update_p8(1, a, zeroes, b);
+                       }
+               }
+       } else {
+               polyval_mul_generic(a, b);
+       }
+}
+
+#define ghash_mul_arch ghash_mul_arch
+static void ghash_mul_arch(struct polyval_elem *acc,
+                          const struct ghash_key *key)
+{
+       polyval_mul_arm64(acc, &key->h);
+}
+
 #define polyval_mul_arch polyval_mul_arch
 static void polyval_mul_arch(struct polyval_elem *acc,
                             const struct polyval_key *key)
 {
-       if (static_branch_likely(&have_pmull) && may_use_simd()) {
-               scoped_ksimd()
-                       polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
+       polyval_mul_arm64(acc, &key->h_powers[NUM_H_POWERS - 1]);
+}
+
+#define ghash_blocks_arch ghash_blocks_arch
+static void ghash_blocks_arch(struct polyval_elem *acc,
+                             const struct ghash_key *key,
+                             const u8 *data, size_t nblocks)
+{
+       if (static_branch_likely(&have_asimd) && may_use_simd()) {
+               do {
+                       /* Allow rescheduling every 4 KiB. */
+                       size_t n = min_t(size_t, nblocks,
+                                        4096 / GHASH_BLOCK_SIZE);
+
+                       scoped_ksimd()
+                               pmull_ghash_update_p8(n, acc, data, &key->h);
+                       data += n * GHASH_BLOCK_SIZE;
+                       nblocks -= n;
+               } while (nblocks);
        } else {
-               polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]);
+               ghash_blocks_generic(acc, &key->h, data, nblocks);
        }
 }
 
@@ -78,6 +129,9 @@ static void polyval_blocks_arch(struct polyval_elem *acc,
 #define gf128hash_mod_init_arch gf128hash_mod_init_arch
 static void gf128hash_mod_init_arch(void)
 {
-       if (cpu_have_named_feature(PMULL))
-               static_branch_enable(&have_pmull);
+       if (cpu_have_named_feature(ASIMD)) {
+               static_branch_enable(&have_asimd);
+               if (cpu_have_named_feature(PMULL))
+                       static_branch_enable(&have_pmull);
+       }
 }
similarity index 93%
rename from arch/arm64/crypto/ghash-neon-core.S
rename to lib/crypto/arm64/ghash-neon-core.S
index 6157135ad56683cd1755a58dfd56964c02b0e40d..85b20fcd98fef574e7e8f9aef0d1d205caa8e5d0 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/cfi_types.h>
 #include <asm/assembler.h>
 
        SHASH           .req    v0
        .endm
 
        /*
-        * void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
-        *                            u64 const h[][2], const char *head)
+        * void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg,
+        *                            const u8 *src,
+        *                            const struct polyval_elem *h)
         */
-SYM_TYPED_FUNC_START(pmull_ghash_update_p8)
+SYM_FUNC_START(pmull_ghash_update_p8)
        ld1             {SHASH.2d}, [x3]
        ld1             {XL.2d}, [x1]
 
        __pmull_pre_p8
 
-       /* do the head block first, if supplied */
-       cbz             x4, 0f
-       ld1             {T1.2d}, [x4]
-       mov             x4, xzr
-       b               3f
-
 0:     ld1             {T1.2d}, [x2], #16
-       sub             w0, w0, #1
+       sub             x0, x0, #1
 
-3:     /* multiply XL by SHASH in GF(2^128) */
+       /* multiply XL by SHASH in GF(2^128) */
 CPU_LE(        rev64           T1.16b, T1.16b  )
 
        ext             T2.16b, XL.16b, XL.16b, #8
@@ -219,7 +213,7 @@ CPU_LE(     rev64           T1.16b, T1.16b  )
        eor             T2.16b, T2.16b, XH.16b
        eor             XL.16b, XL.16b, T2.16b
 
-       cbnz            w0, 0b
+       cbnz            x0, 0b
 
        st1             {XL.2d}, [x1]
        ret