]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
[PowerPC] Implement Poly1305 single block update based on radix 2^64
authorMaamoun TK <maamoun.tk@googlemail.com>
Sun, 29 May 2022 01:34:44 +0000 (03:34 +0200)
committerMaamoun TK <maamoun.tk@googlemail.com>
Sun, 29 May 2022 01:34:44 +0000 (03:34 +0200)
Makefile.in
configure.ac
fat-ppc.c
fat-setup.h
poly1305-internal.c
powerpc64/fat/poly1305-internal-2.asm [new file with mode: 0644]
powerpc64/p9/poly1305-internal.asm [new file with mode: 0644]

index 65911e2a05bb399f2279d387d20485cae19112bb..11c8811415cd331925621260f72f97673628f077 100644 (file)
@@ -607,7 +607,7 @@ distdir: $(DISTFILES)
                x86_64 x86_64/aesni x86_64/sha_ni x86_64/pclmul x86_64/fat \
                arm arm/neon arm/v6 arm/fat \
                arm64 arm64/crypto arm64/fat \
-               powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
+               powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/p9 powerpc64/fat \
                s390x s390x/vf s390x/msa s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
          mkdir "$(distdir)/$$d" ; \
          find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
index 73c6fc21ec430cf4be9c745da50eceee3b935080..b68b9e23dace30d9e7d80321f650ba410c72d0c3 100644 (file)
@@ -105,6 +105,10 @@ AC_ARG_ENABLE(power-altivec,
   AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),,
   [enable_altivec=no])
 
+AC_ARG_ENABLE(power9,
+  AC_HELP_STRING([--enable-power9], [Enable POWER ISA v3.0. (default=no)]),,
+  [enable_power9=no])
+
 AC_ARG_ENABLE(s390x-vf,
   AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),,
   [enable_s390x_vf=no])
@@ -539,9 +543,12 @@ if test "x$enable_assembler" = xyes ; then
        if test "x$enable_fat" = xyes ; then
          asm_path="powerpc64/fat $asm_path"
          OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES"
-         FAT_TEST_LIST="none crypto_ext altivec"
+         FAT_TEST_LIST="none crypto_ext altivec power9"
        else
-         if test "$enable_power_crypto_ext" = yes ; then
+         if test "$enable_power9" = yes ; then
+           asm_path="powerpc64/p9 $asm_path"
+         fi
+    if test "$enable_power_crypto_ext" = yes ; then
             asm_path="powerpc64/p8 $asm_path"
          fi
          if test "$enable_power_altivec" = yes ; then
@@ -605,6 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
   aes256-encrypt-2.asm aes256-decrypt-2.asm \
   cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \
   chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \
+  poly1305-internal-2.asm \
   ghash-set-key-2.asm ghash-update-2.asm \
   salsa20-2core.asm salsa20-core-internal-2.asm \
   sha1-compress-2.asm sha256-compress-2.asm \
@@ -751,6 +759,9 @@ AH_VERBATIM([HAVE_NATIVE],
 #undef HAVE_NATIVE_ecc_secp384r1_redc
 #undef HAVE_NATIVE_ecc_secp521r1_modp
 #undef HAVE_NATIVE_ecc_secp521r1_redc
+#undef HAVE_NATIVE_poly1305_set_key
+#undef HAVE_NATIVE_poly1305_block
+#undef HAVE_NATIVE_poly1305_digest
 #undef HAVE_NATIVE_ghash_set_key
 #undef HAVE_NATIVE_ghash_update
 #undef HAVE_NATIVE_salsa20_core
index bf622cf50a44016e9029b6b64c26fe1303491edb..7569e44d6cbf817e9af2c7b3519b045f32200057 100644 (file)
--- a/fat-ppc.c
+++ b/fat-ppc.c
@@ -65,6 +65,7 @@
 #include "aes-internal.h"
 #include "chacha-internal.h"
 #include "ghash-internal.h"
+#include "poly1305.h"
 #include "fat-setup.h"
 
 /* Defines from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel */
 #ifndef PPC_FEATURE2_VEC_CRYPTO
 #define PPC_FEATURE2_VEC_CRYPTO 0x02000000
 #endif
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
 
 struct ppc_features
 {
   int have_crypto_ext;
   int have_altivec;
+  int have_power9;
 };
 
 #define MATCH(s, slen, literal, llen) \
@@ -93,6 +98,7 @@ get_ppc_features (struct ppc_features *features)
   const char *s;
   features->have_crypto_ext = 0;
   features->have_altivec = 0;
+  features->have_power9 = 0;
 
   s = secure_getenv (ENV_OVERRIDE);
   if (s)
@@ -105,6 +111,8 @@ get_ppc_features (struct ppc_features *features)
          features->have_crypto_ext = 1;
        else if (MATCH(s, length, "altivec", 7))
          features->have_altivec = 1;
+  else if (MATCH(s, length, "power9", 6))
+         features->have_power9 = 1;
        if (!sep)
          break;
        s = sep + 1;
@@ -136,6 +144,9 @@ get_ppc_features (struct ppc_features *features)
       features->have_crypto_ext
        = ((hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO);
 
+      features->have_power9
+       = ((hwcap2 & PPC_FEATURE2_ARCH_3_00) == PPC_FEATURE2_ARCH_3_00);
+
       /* We also need VSX instructions, mainly for load and store. */
       features->have_altivec
        = ((hwcap & (PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX))
@@ -172,6 +183,18 @@ DECLARE_FAT_FUNC(nettle_chacha_crypt32, chacha_crypt_func)
 DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 1core)
 DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 3core)
 
+DECLARE_FAT_FUNC(_nettle_poly1305_set_key, poly1305_set_key_func)
+DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, ppc64)
+
+DECLARE_FAT_FUNC(_nettle_poly1305_block, poly1305_block_func)
+DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, ppc64)
+
+DECLARE_FAT_FUNC(_nettle_poly1305_digest, poly1305_digest_func)
+DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, ppc64)
+
 static void CONSTRUCTOR
 fat_init (void)
 {
@@ -220,6 +243,21 @@ fat_init (void)
       nettle_chacha_crypt_vec = _nettle_chacha_crypt_1core;
       nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_1core;
     }
+
+  if (features.have_power9)
+    {
+      if (verbose)
+       fprintf (stderr, "libnettle: enabling arch 3.00 code.\n");
+      _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_ppc64;
+    _nettle_poly1305_block_vec = _nettle_poly1305_block_ppc64;
+    _nettle_poly1305_digest_vec = _nettle_poly1305_digest_ppc64;
+    }
+  else
+    {
+      _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_c;
+    _nettle_poly1305_block_vec = _nettle_poly1305_block_c;
+    _nettle_poly1305_digest_vec = _nettle_poly1305_digest_c;
+    }
 }
 
 DEFINE_FAT_FUNC(_nettle_aes_encrypt, void,
@@ -261,3 +299,19 @@ DEFINE_FAT_FUNC(nettle_chacha_crypt32, void,
                 uint8_t *dst,
                 const uint8_t *src),
                (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_set_key, void,
+               (struct poly1305_ctx *ctx,
+     const uint8_t *key),
+               (ctx, key))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_block, void,
+               (struct poly1305_ctx *ctx,
+     const uint8_t *m,
+     unsigned high),
+               (ctx, m, high))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_digest, void,
+               (struct poly1305_ctx *ctx,
+     union nettle_block16 *s),
+               (ctx, s))
index e77cce0288a0e5cff1671e17913b2f5b0b06a757..ad3c10f06fe4c7bbf64b179bbe0ee3adcf515467 100644 (file)
@@ -196,6 +196,12 @@ typedef void chacha_crypt_func(struct chacha_ctx *ctx,
                               uint8_t *dst,
                               const uint8_t *src);
 
+struct poly1305_ctx;
+typedef void poly1305_set_key_func(struct poly1305_ctx *ctx, const uint8_t *key);
+typedef void poly1305_digest_func(struct poly1305_ctx *ctx, union nettle_block16 *s);
+typedef void poly1305_block_func(struct poly1305_ctx *ctx, const uint8_t *m,
+                            unsigned high);
+
 struct aes128_ctx;
 typedef void aes128_set_key_func (struct aes128_ctx *ctx, const uint8_t *key);
 typedef void aes128_invert_key_func (struct aes128_ctx *dst, const struct aes128_ctx *src);
index 490fdf714aa7f253aeadcbb1abb24f1987ce95d6..380b934eed72d512283dd7394df8fa62dca1f5cf 100644 (file)
 #define h3 h.h32[3]
 #define h4 hh
 
+/* For fat builds */
+#if HAVE_NATIVE_poly1305_set_key
+void
+_nettle_poly1305_set_key_c(struct poly1305_ctx *ctx,
+              const uint8_t key[16]);
+# define _nettle_poly1305_set_key _nettle_poly1305_set_key_c
+#endif
+
+#if HAVE_NATIVE_poly1305_block
+void
+_nettle_poly1305_block_c(struct poly1305_ctx *ctx, const uint8_t *m,
+              unsigned t4);
+# define _nettle_poly1305_block _nettle_poly1305_block_c
+#endif
+
+#if HAVE_NATIVE_poly1305_digest
+void
+_nettle_poly1305_digest_c(struct poly1305_ctx *ctx,
+              union nettle_block16 *s);
+# define _nettle_poly1305_digest _nettle_poly1305_digest_c
+#endif
+
 void
 _nettle_poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
 {
diff --git a/powerpc64/fat/poly1305-internal-2.asm b/powerpc64/fat/poly1305-internal-2.asm
new file mode 100644 (file)
index 0000000..177a456
--- /dev/null
@@ -0,0 +1,39 @@
+C powerpc64/fat/poly1305-internal-2.asm
+
+ifelse(`
+   Copyright (C) 2022 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl picked up by configure
+dnl PROLOGUE(_nettle_poly1305_set_key)
+dnl PROLOGUE(_nettle_poly1305_block)
+dnl PROLOGUE(_nettle_poly1305_digest)
+
+define(`fat_transform', `$1_ppc64')
+include_src(`powerpc64/p9/poly1305-internal.asm')
diff --git a/powerpc64/p9/poly1305-internal.asm b/powerpc64/p9/poly1305-internal.asm
new file mode 100644 (file)
index 0000000..238d639
--- /dev/null
@@ -0,0 +1,238 @@
+C powerpc64/p9/poly1305-internal.asm
+
+ifelse(`
+   Copyright (C) 2013, 2022 Niels Möller
+   Copyright (C) 2022 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+C Register usage:
+
+define(`SP', `r1')
+define(`TOCP', `r2')
+
+C Argments
+define(`CTX', `r3')
+define(`M', `r4')
+define(`M128', `r5')
+
+C Working state
+define(`H0', `r6')
+define(`H1', `r7')
+define(`H2', `r8')
+define(`T0', `r9')
+define(`T1', `r10')
+define(`T2', `r8')
+define(`T2A', `r9')
+define(`T2S', `r10')
+define(`IDX', `r6')
+define(`RZ', `r7')
+
+define(`ZERO', `v0')
+define(`F0', `v1')
+define(`F1', `v2')
+define(`F0S', `v3')
+define(`T', `v4')
+
+define(`R', `v5')
+define(`S', `v6')
+
+define(`T00', `v7')
+define(`T10', `v8')
+define(`T11', `v9')
+define(`MU0', `v10')
+define(`MU1', `v11')
+define(`TMP', `v12')
+
+.text
+
+C _poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_set_key)
+       li                      r9, 0
+       addis           r5, TOCP, .key_mask@got@ha
+       ld                      r5, .key_mask@got@l(r5)
+       ld                      r8, 0(r5)
+       ori                     r7, r8, 3
+
+       C Load R_0 and R_1
+IF_LE(`
+       ld                      r5, 0(r4)
+       ld                      r6, 8(r4)
+')
+IF_BE(`
+       ldbrx           r5, 0, r4
+       addi            r4, r4, 8
+       ldbrx           r6, 0, r4
+')
+       and                     r5, r5, r7        C R_0 &= 0x0FFFFFFC0FFFFFFF
+       and                     r6, r6, r8        C R_1 &= 0x0FFFFFFC0FFFFFFC
+
+       srdi            r10, r6, 2
+       sldi            r7, r5, 2
+       sldi            r8, r10, 2
+       add                     r7, r7, r5
+       add                     r8, r8, r10
+
+       C Store key
+       std                     r5, 0(r3)
+       std                     r6, 8(r3)
+       std                     r7, 16(r3)
+       std                     r8, 24(r3)
+       C Reset state
+       std                     r9, 32(r3)
+       std                     r9, 40(r3)
+       std                     r9, 48(r3)
+
+       blr
+EPILOGUE(_nettle_poly1305_set_key)
+
+C void _nettle_poly1305_block(struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_block)
+       ld                      H0, 32(CTX)
+       ld                      H1, 40(CTX)
+       ld                      H2, 48(CTX)
+IF_LE(`
+       ld                      T0, 0(M)
+       ld                      T1, 8(M)
+')
+IF_BE(`
+       ldbrx           T0, 0, M
+       addi            M, M, 8
+       ldbrx           T0, 0, M
+')
+
+       addc            T0, T0, H0
+       adde            T1, T1, H1
+       adde            T2, M128, H2
+
+       mtvsrdd         VSR(T), T0, T1
+
+       li                      IDX, 16
+       lxvd2x          VSR(R), 0, CTX
+       lxvd2x          VSR(S), IDX, CTX
+
+       andi.           T2A, T2, 3
+       srdi            T2S, T2, 2
+
+       li                      RZ, 0
+       vxor            ZERO, ZERO, ZERO
+
+       xxpermdi        VSR(MU0), VSR(R), VSR(S), 0b01
+       xxswapd         VSR(MU1), VSR(R)
+
+       mtvsrdd         VSR(T11), 0, T2A
+       mtvsrdd         VSR(T00), T2S, RZ
+       mtvsrdd         VSR(T10), 0, T2
+
+       vmsumudm        F0, T, MU0, ZERO
+       vmsumudm        F1, T, MU1, ZERO
+       vmsumudm        TMP, T11, MU1, ZERO
+
+       vmsumudm        F0, T00, S, F0
+       vmsumudm        F1, T10, MU0, F1
+
+       xxmrgld         VSR(TMP), VSR(TMP), VSR(ZERO)
+       xxswapd         VSR(F0S), VSR(F0)
+       vadduqm         F1, F1, TMP
+       stxsd           F0S, 32(CTX)
+
+       li                      IDX, 40
+       xxmrghd         VSR(F0), VSR(ZERO), VSR(F0)
+       vadduqm         F1, F1, F0
+       xxswapd         VSR(F1), VSR(F1)
+       stxvd2x         VSR(F1), IDX, CTX
+
+       blr
+EPILOGUE(_nettle_poly1305_block)
+
+C _poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_digest)
+       C Load current state
+       ld                      r5, 32(r3)
+       ld                      r6, 40(r3)
+       ld                      r7, 48(r3)
+
+       C Fold high part of H2
+       li                      r10, 0
+       srdi            r9, r7, 2
+       sldi            r8, r9, 2
+       add                     r8, r8, r9
+       andi.           r7, r7, 3
+       addc            r5, r5, r8
+       adde            r6, r6, r10
+       adde            r7, r7, r10
+
+       C Add 5 to state, save result if it carries
+       li                      r8, 5
+       li                      r9, 0
+       li                      r10, -4
+       addc            r8, r8, r5
+       adde            r9, r9, r6
+       adde.           r10, r10, r7
+       iseleq          r5, r8, r5
+       iseleq          r6, r9, r6
+
+       C Load digest
+IF_LE(`
+       ld                      r7, 0(r4)
+       ld                      r8, 8(r4)
+')
+IF_BE(`
+       li                      r10, 8
+       ldbrx           r7, 0, r4
+       ldbrx           r8, r10, r4
+')
+
+       C Add hash to digest
+       addc            r5, r5, r7
+       adde            r6, r6, r8
+
+       C Store digest
+IF_LE(`
+       std                     r5, 0(r4)
+       std                     r6, 8(r4)
+')
+IF_BE(`
+       stdbrx          r5, 0, r4
+       stdbrx          r6, r10, r4
+')
+       C Reset hash
+       li                      r9, 0
+       std                     r9, 32(r3)
+       std                     r9, 40(r3)
+       std                     r9, 48(r3)
+
+       blr
+EPILOGUE(_nettle_poly1305_digest)
+
+.data
+.align 3
+.key_mask:
+.quad 0x0FFFFFFC0FFFFFFC