]> git.ipfire.org Git - thirdparty/gnutls.git/commitdiff
Imported Andy Polyakov's implementation of AES-GCM in aarch64
authorNikos Mavrogiannopoulos <nmav@gnutls.org>
Tue, 27 Sep 2016 19:43:05 +0000 (21:43 +0200)
committerNikos Mavrogiannopoulos <nmav@redhat.com>
Mon, 3 Oct 2016 11:53:05 +0000 (13:53 +0200)
cfg.mk
devel/perlasm/ghash-aarch64.pl [new symlink]
devel/perlasm/ghash-aarch64.pl.license [new symlink]
lib/accelerated/aarch64/Makefile.am
lib/accelerated/aarch64/aarch64-common.c
lib/accelerated/aarch64/aes-gcm-aarch64.c
lib/accelerated/aarch64/elf/ghash-aarch64.s [new file with mode: 0644]

diff --git a/cfg.mk b/cfg.mk
index 0cf082a9555dc26e28841c6f7d2c070aca77de6f..bba9eaf8ccd3cd5e3153815152ac06379aaf6067 100644 (file)
--- a/cfg.mk
+++ b/cfg.mk
@@ -135,6 +135,7 @@ web:
        -cp -v doc/reference/html/*.html doc/reference/html/*.png doc/reference/html/*.devhelp* doc/reference/html/*.css $(htmldir)/reference/
 
 ASM_SOURCES_XXX := \
+       lib/accelerated/aarch64/XXX/ghash-aarch64.s \
        lib/accelerated/aarch64/XXX/aes-aarch64.s \
        lib/accelerated/aarch64/XXX/sha1-armv8.s \
        lib/accelerated/aarch64/XXX/sha256-armv8.s \
diff --git a/devel/perlasm/ghash-aarch64.pl b/devel/perlasm/ghash-aarch64.pl
new file mode 120000 (symlink)
index 0000000..761f73c
--- /dev/null
@@ -0,0 +1 @@
+../openssl/crypto/modes/asm/ghashv8-armx.pl
\ No newline at end of file
diff --git a/devel/perlasm/ghash-aarch64.pl.license b/devel/perlasm/ghash-aarch64.pl.license
new file mode 120000 (symlink)
index 0000000..cd301a4
--- /dev/null
@@ -0,0 +1 @@
+license.txt
\ No newline at end of file
index f34507853b006f301b7b8c8e9ee610cb2d0895b8..edc1edd49591ad37567cd0a03ebcd7a0173df02b 100644 (file)
@@ -33,6 +33,9 @@ if ENABLE_MINITASN1
 AM_CPPFLAGS += -I$(srcdir)/../../minitasn1
 endif
 
+#ensure that we have all aarch64 instruction sets enabled for the assembler
+AM_CCASFLAGS = -Wa,-march=all
+
 EXTRA_DIST = README
 
 noinst_LTLIBRARIES = libaarch64.la
@@ -42,6 +45,6 @@ libaarch64_la_SOURCES = aarch64-common.c aarch64-common.h sha-aarch64.h sha-aarc
 
 if ASM_AARCH64
 libaarch64_la_SOURCES += elf/sha1-armv8.s elf/sha512-armv8.s elf/sha256-armv8.s \
-       elf/aes-aarch64.s
+       elf/aes-aarch64.s elf/ghash-aarch64.s
 
 endif #ASM_AARCH64
index 98661328484ce97f94b7560771ed750efc141947..310ea5508cc943516f9d8652d26652f12fdcaa3b 100644 (file)
@@ -193,20 +193,24 @@ void _register_aarch64_crypto(unsigned capabilities)
        if (_gnutls_arm_cpuid_s & ARMV8_AES) {
                _gnutls_debug_log("Aarch64 AES was detected\n");
 
-               ret =
-                   gnutls_crypto_single_cipher_register
-                   (GNUTLS_CIPHER_AES_128_GCM, 90,
-                    &_gnutls_aes_gcm_aarch64, 0);
+               if (_gnutls_arm_cpuid_s & ARMV8_PMULL) {
+                       _gnutls_debug_log("Aarch64 PMULL was detected\n");
+
+                       ret =
+                           gnutls_crypto_single_cipher_register
+                           (GNUTLS_CIPHER_AES_128_GCM, 90,
+                            &_gnutls_aes_gcm_aarch64, 0);
+                       if (ret < 0) {
+                                       gnutls_assert();
+                               }
+
+                       ret =
+                           gnutls_crypto_single_cipher_register
+                           (GNUTLS_CIPHER_AES_256_GCM, 90,
+                            &_gnutls_aes_gcm_aarch64, 0);
                        if (ret < 0) {
                                gnutls_assert();
                        }
-
-               ret =
-                   gnutls_crypto_single_cipher_register
-                   (GNUTLS_CIPHER_AES_256_GCM, 90,
-                    &_gnutls_aes_gcm_aarch64, 0);
-               if (ret < 0) {
-                       gnutls_assert();
                }
 
                ret =
index 37412363f39a88d8047cac9e812a8ff484e2512d..c571d0294f4dc90cec452ac9422ccbfaa0a64537 100644 (file)
  */
 
 /*
- * The following code is an implementation of the AES-128-GCM cipher
- * using the vpaes aarch64 code.
+ * The following code is an implementation of the AES-GCM cipher
+ * using the AES and neon instruction sets.
  */
 
 #include "errors.h"
 #include "gnutls_int.h"
-
-#ifdef HAVE_LIBNETTLE
-
 #include <gnutls/crypto.h>
 #include "errors.h"
 #include <aes-aarch64.h>
 #include <aarch64-common.h>
+#include <nettle/memxor.h>
+#include <nettle/macros.h>
 #include <byteswap.h>
-#include <nettle/gcm.h>
 
-/* GCM mode 
- * It is used when the CPU doesn't include the PCLMUL instructions.
- */
-struct gcm_aarch64_aes_ctx GCM_CTX(AES_KEY);
+#define GCM_BLOCK_SIZE 16
 
-static void aarch64_aes_encrypt(const void *_ctx,
-                               size_t length, uint8_t * dst,
-                               const uint8_t * src)
-{
-       AES_KEY *ctx = (void*)_ctx;
+/* GCM mode */
 
-       aes_v8_encrypt(src, dst, ctx);
-}
+typedef struct {
+       uint64_t hi, lo;
+} u128;
 
-static void aarch64_aes_128_set_encrypt_key(void *_ctx,
-                                   const uint8_t * key)
-{
-       AES_KEY *ctx = _ctx;
+/* This is the gcm128 structure used in openssl. It
+ * is compatible with the included assembly code.
+ */
+struct gcm128_context {
+       union {
+               uint64_t u[2];
+               uint32_t d[4];
+               uint8_t c[16];
+       } Yi, EKi, EK0, len, Xi, H;
+       u128 Htable[16];
+};
 
-       aes_v8_set_encrypt_key(key, 16*8, ctx);
-}
+struct aes_gcm_ctx {
+       AES_KEY expanded_key;
+       struct gcm128_context gcm;
+};
 
-static void aarch64_aes_256_set_encrypt_key(void *_ctx,
-                                   const uint8_t * key)
+void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16],
+                    const uint8_t * inp, size_t len);
+void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
+
+static void aes_gcm_deinit(void *_ctx)
 {
-       AES_KEY *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
 
-       aes_v8_set_encrypt_key(key, 32*8, ctx);
+       zeroize_temp_key(ctx, sizeof(*ctx));
+       gnutls_free(ctx);
 }
 
 static int
@@ -77,7 +83,7 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx,
            algorithm != GNUTLS_CIPHER_AES_256_GCM)
                return GNUTLS_E_INVALID_REQUEST;
 
-       *_ctx = gnutls_calloc(1, sizeof(struct gcm_aarch64_aes_ctx));
+       *_ctx = gnutls_calloc(1, sizeof(struct aes_gcm_ctx));
        if (*_ctx == NULL) {
                gnutls_assert();
                return GNUTLS_E_MEMORY_ERROR;
@@ -87,40 +93,127 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx,
 }
 
 static int
-aes_gcm_cipher_setkey(void *_ctx, const void *key, size_t keysize)
+aes_gcm_cipher_setkey(void *_ctx, const void *userkey, size_t keysize)
 {
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
+       int ret;
+
+       ret =
+           aes_v8_set_encrypt_key(userkey, keysize * 8,
+                                 ALIGN16(&ctx->expanded_key));
+       if (ret != 0)
+               return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED);
+
+       aes_v8_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, ALIGN16(&ctx->expanded_key));
 
-       if (keysize == 16) {
-               GCM_SET_KEY(ctx, aarch64_aes_128_set_encrypt_key, aarch64_aes_encrypt,
-                           key);
-       } else if (keysize == 32) {
-               GCM_SET_KEY(ctx, aarch64_aes_256_set_encrypt_key, aarch64_aes_encrypt,
-                           key);
-       } else abort();
+       ctx->gcm.H.u[0] = bswap_64(ctx->gcm.H.u[0]);
+       ctx->gcm.H.u[1] = bswap_64(ctx->gcm.H.u[1]);
+
+       gcm_init_v8(ctx->gcm.Htable, ctx->gcm.H.u);
 
        return 0;
 }
 
 static int aes_gcm_setiv(void *_ctx, const void *iv, size_t iv_size)
 {
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
 
        if (iv_size != GCM_BLOCK_SIZE - 4)
                return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST);
 
-       GCM_SET_IV(ctx, iv_size, iv);
+       memset(ctx->gcm.Xi.c, 0, sizeof(ctx->gcm.Xi.c));
+       memset(ctx->gcm.len.c, 0, sizeof(ctx->gcm.len.c));
+
+       memcpy(ctx->gcm.Yi.c, iv, GCM_BLOCK_SIZE - 4);
+       ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 4] = 0;
+       ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 3] = 0;
+       ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 2] = 0;
+       ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 1;
 
+       aes_v8_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c,
+                       ALIGN16(&ctx->expanded_key));
+       ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 2;
        return 0;
 }
 
+static void
+gcm_ghash(struct aes_gcm_ctx *ctx, const uint8_t * src, size_t src_size)
+{
+       size_t rest = src_size % GCM_BLOCK_SIZE;
+       size_t aligned_size = src_size - rest;
+
+       if (aligned_size > 0)
+               gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, src,
+                               aligned_size);
+
+       if (rest > 0) {
+               memxor(ctx->gcm.Xi.c, src + aligned_size, rest);
+               gcm_gmult_v8(ctx->gcm.Xi.u, ctx->gcm.Htable);
+       }
+}
+
+static void
+ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+                    size_t blocks, const AES_KEY *key,
+                    const unsigned char ivec[16])
+{
+       unsigned i;
+       uint8_t ctr[16];
+
+       memcpy(ctr, ivec, 16);
+
+       for (i=0;i<blocks;i++) {
+               aes_v8_encrypt(ctr, out, key);
+               memxor(out, in, 16);
+
+               out += 16;
+               in += 16;
+               INCREMENT(16, ctr);
+       }
+}
+
+static inline void
+ctr_encrypt_last(struct aes_gcm_ctx *ctx, const uint8_t * src,
+                uint8_t * dst, size_t pos, size_t length)
+{
+       uint8_t tmp[GCM_BLOCK_SIZE];
+       uint8_t out[GCM_BLOCK_SIZE];
+
+       memcpy(tmp, &src[pos], length);
+       ctr32_encrypt_blocks(tmp, out, 1,
+                            ALIGN16(&ctx->expanded_key),
+                            ctx->gcm.Yi.c);
+
+       memcpy(&dst[pos], out, length);
+
+}
+
 static int
 aes_gcm_encrypt(void *_ctx, const void *src, size_t src_size,
                void *dst, size_t length)
 {
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
+       int blocks = src_size / GCM_BLOCK_SIZE;
+       int exp_blocks = blocks * GCM_BLOCK_SIZE;
+       int rest = src_size - (exp_blocks);
+       uint32_t counter;
+
+       if (blocks > 0) {
+               ctr32_encrypt_blocks(src, dst,
+                                    blocks,
+                                    ALIGN16(&ctx->expanded_key),
+                                    ctx->gcm.Yi.c);
+
+               counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
+               counter += blocks;
+               _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
+       }
 
-       GCM_ENCRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src);
+       if (rest > 0)           /* last incomplete block */
+               ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);
+
+       gcm_ghash(ctx, dst, src_size);
+       ctx->gcm.len.u[1] += src_size;
 
        return 0;
 }
@@ -129,34 +222,62 @@ static int
 aes_gcm_decrypt(void *_ctx, const void *src, size_t src_size,
                void *dst, size_t dst_size)
 {
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
+       int blocks = src_size / GCM_BLOCK_SIZE;
+       int exp_blocks = blocks * GCM_BLOCK_SIZE;
+       int rest = src_size - (exp_blocks);
+       uint32_t counter;
+
+       gcm_ghash(ctx, src, src_size);
+       ctx->gcm.len.u[1] += src_size;
+
+       if (blocks > 0) {
+               ctr32_encrypt_blocks(src, dst,
+                                    blocks,
+                                    ALIGN16(&ctx->expanded_key),
+                                    ctx->gcm.Yi.c);
+
+               counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
+               counter += blocks;
+               _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
+       }
+
+       if (rest > 0)           /* last incomplete block */
+               ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);
 
-       GCM_DECRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src);
        return 0;
 }
 
 static int aes_gcm_auth(void *_ctx, const void *src, size_t src_size)
 {
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
 
-       GCM_UPDATE(ctx, src_size, src);
+       gcm_ghash(ctx, src, src_size);
+       ctx->gcm.len.u[0] += src_size;
 
        return 0;
 }
 
+
 static void aes_gcm_tag(void *_ctx, void *tag, size_t tagsize)
 {
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       struct aes_gcm_ctx *ctx = _ctx;
+       uint8_t buffer[GCM_BLOCK_SIZE];
+       uint64_t alen, clen;
 
-       GCM_DIGEST(ctx, aarch64_aes_encrypt, tagsize, tag);
-}
+       alen = ctx->gcm.len.u[0] * 8;
+       clen = ctx->gcm.len.u[1] * 8;
 
-static void aes_gcm_deinit(void *_ctx)
-{
-       struct gcm_aarch64_aes_ctx *ctx = _ctx;
+       _gnutls_write_uint64(alen, buffer);
+       _gnutls_write_uint64(clen, &buffer[8]);
 
-       zeroize_temp_key(ctx, sizeof(*ctx));
-       gnutls_free(ctx);
+       gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, buffer,
+                       GCM_BLOCK_SIZE);
+
+       ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
+       ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
+
+       memcpy(tag, ctx->gcm.Xi.c, MIN(GCM_BLOCK_SIZE, tagsize));
 }
 
 #include "../x86/aes-gcm-aead.h"
@@ -173,5 +294,3 @@ const gnutls_crypto_cipher_st _gnutls_aes_gcm_aarch64 = {
        .tag = aes_gcm_tag,
        .auth = aes_gcm_auth,
 };
-
-#endif
diff --git a/lib/accelerated/aarch64/elf/ghash-aarch64.s b/lib/accelerated/aarch64/elf/ghash-aarch64.s
new file mode 100644 (file)
index 0000000..13faf91
--- /dev/null
@@ -0,0 +1,266 @@
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+#     * Redistributions of source code must retain copyright notices,
+#      this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+#
+#     * Neither the name of the Andy Polyakov nor the names of its
+#      copyright holder and contributors may be used to endorse or
+#      promote products derived from this software without specific
+#      prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+# 1 "<built-in>"
+# 1 "<command-line>"
+# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+# 1 "lib/accelerated/aarch64/aarch64-common.h" 1
+# 2 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" 2
+
+.text
+.arch armv8-a+crypto
+.globl gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ ld1 {v17.2d},[x1]
+ movi v19.16b,#0xe1
+ shl v19.2d,v19.2d,#57
+ ext v3.16b,v17.16b,v17.16b,#8
+ ushr v18.2d,v19.2d,#63
+ dup v17.4s,v17.s[1]
+ ext v16.16b,v18.16b,v19.16b,#8
+ ushr v18.2d,v3.2d,#63
+ sshr v17.4s,v17.4s,#31
+ and v18.16b,v18.16b,v16.16b
+ shl v3.2d,v3.2d,#1
+ ext v18.16b,v18.16b,v18.16b,#8
+ and v16.16b,v16.16b,v17.16b
+ orr v3.16b,v3.16b,v18.16b
+ eor v20.16b,v3.16b,v16.16b
+ st1 {v20.2d},[x0],#16
+
+
+ ext v16.16b,v20.16b,v20.16b,#8
+ pmull v0.1q,v20.1d,v20.1d
+ eor v16.16b,v16.16b,v20.16b
+ pmull2 v2.1q,v20.2d,v20.2d
+ pmull v1.1q,v16.1d,v16.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v22.16b,v0.16b,v18.16b
+
+ ext v17.16b,v22.16b,v22.16b,#8
+ eor v17.16b,v17.16b,v22.16b
+ ext v21.16b,v16.16b,v17.16b,#8
+ st1 {v21.2d,v22.2d},[x0]
+
+ ret
+.size gcm_init_v8,.-gcm_init_v8
+.globl gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ ld1 {v17.2d},[x0]
+ movi v19.16b,#0xe1
+ ld1 {v20.2d,v21.2d},[x1]
+ shl v19.2d,v19.2d,#57
+
+ rev64 v17.16b,v17.16b
+
+ ext v3.16b,v17.16b,v17.16b,#8
+
+ pmull v0.1q,v20.1d,v3.1d
+ eor v17.16b,v17.16b,v3.16b
+ pmull2 v2.1q,v20.2d,v3.2d
+ pmull v1.1q,v21.1d,v17.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+
+
+ rev64 v0.16b,v0.16b
+
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0]
+
+ ret
+.size gcm_gmult_v8,.-gcm_gmult_v8
+.globl gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ ld1 {v0.2d},[x0]
+
+
+
+
+
+ subs x3,x3,#32
+ mov x12,#16
+# 116 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+ ld1 {v20.2d,v21.2d},[x1],#32
+ movi v19.16b,#0xe1
+ ld1 {v22.2d},[x1]
+ csel x12,xzr,x12,eq
+ ext v0.16b,v0.16b,v0.16b,#8
+ ld1 {v16.2d},[x2],#16
+ shl v19.2d,v19.2d,#57
+
+ rev64 v16.16b,v16.16b
+ rev64 v0.16b,v0.16b
+
+ ext v3.16b,v16.16b,v16.16b,#8
+ b.lo .Lodd_tail_v8
+ ld1 {v17.2d},[x2],x12
+
+ rev64 v17.16b,v17.16b
+
+ ext v7.16b,v17.16b,v17.16b,#8
+ eor v3.16b,v3.16b,v0.16b
+ pmull v4.1q,v20.1d,v7.1d
+ eor v17.16b,v17.16b,v7.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b .Loop_mod2x_v8
+
+.align 4
+.Loop_mod2x_v8:
+ ext v18.16b,v3.16b,v3.16b,#8
+ subs x3,x3,#32
+ pmull v0.1q,v22.1d,v3.1d
+ csel x12,xzr,x12,lo
+
+ pmull v5.1q,v21.1d,v17.1d
+ eor v18.16b,v18.16b,v3.16b
+ pmull2 v2.1q,v22.2d,v3.2d
+ eor v0.16b,v0.16b,v4.16b
+ pmull2 v1.1q,v21.2d,v18.2d
+ ld1 {v16.2d},[x2],x12
+
+ eor v2.16b,v2.16b,v6.16b
+ csel x12,xzr,x12,eq
+ eor v1.16b,v1.16b,v5.16b
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ ld1 {v17.2d},[x2],x12
+
+ rev64 v16.16b,v16.16b
+
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+
+ rev64 v17.16b,v17.16b
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ ext v7.16b,v17.16b,v17.16b,#8
+ ext v3.16b,v16.16b,v16.16b,#8
+ eor v0.16b,v1.16b,v18.16b
+ pmull v4.1q,v20.1d,v7.1d
+ eor v3.16b,v3.16b,v2.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v3.16b,v3.16b,v18.16b
+ eor v17.16b,v17.16b,v7.16b
+ eor v3.16b,v3.16b,v0.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b.hs .Loop_mod2x_v8
+
+ eor v2.16b,v2.16b,v18.16b
+ ext v3.16b,v16.16b,v16.16b,#8
+ adds x3,x3,#32
+ eor v0.16b,v0.16b,v2.16b
+ b.eq .Ldone_v8
+.Lodd_tail_v8:
+ ext v18.16b,v0.16b,v0.16b,#8
+ eor v3.16b,v3.16b,v0.16b
+ eor v17.16b,v16.16b,v18.16b
+
+ pmull v0.1q,v20.1d,v3.1d
+ eor v17.16b,v17.16b,v3.16b
+ pmull2 v2.1q,v20.2d,v3.2d
+ pmull v1.1q,v21.1d,v17.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+
+.Ldone_v8:
+
+ rev64 v0.16b,v0.16b
+
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0]
+
+ ret
+.size gcm_ghash_v8,.-gcm_ghash_v8
+.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2
+.section .note.GNU-stack,"",%progbits