]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
[AArch64] Optimize AES with fat build support
authorMamone Tarsha <maamoun.tk@googlemail.com>
Fri, 30 Jul 2021 11:45:28 +0000 (14:45 +0300)
committerMamone Tarsha <maamoun.tk@googlemail.com>
Fri, 30 Jul 2021 11:45:28 +0000 (14:45 +0300)
14 files changed:
arm64/crypto/aes128-decrypt.asm [new file with mode: 0644]
arm64/crypto/aes128-encrypt.asm [new file with mode: 0644]
arm64/crypto/aes192-decrypt.asm [new file with mode: 0644]
arm64/crypto/aes192-encrypt.asm [new file with mode: 0644]
arm64/crypto/aes256-decrypt.asm [new file with mode: 0644]
arm64/crypto/aes256-encrypt.asm [new file with mode: 0644]
arm64/fat/aes128-decrypt-2.asm [new file with mode: 0644]
arm64/fat/aes128-encrypt-2.asm [new file with mode: 0644]
arm64/fat/aes192-decrypt-2.asm [new file with mode: 0644]
arm64/fat/aes192-encrypt-2.asm [new file with mode: 0644]
arm64/fat/aes256-decrypt-2.asm [new file with mode: 0644]
arm64/fat/aes256-encrypt-2.asm [new file with mode: 0644]
configure.ac
fat-arm64.c

diff --git a/arm64/crypto/aes128-decrypt.asm b/arm64/crypto/aes128-decrypt.asm
new file mode 100644 (file)
index 0000000..f459bb1
--- /dev/null
@@ -0,0 +1,163 @@
+C arm64/crypto/aes128-decrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes128-decrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+
+C AES decryption round of 4-blocks
+C AESD_ROUND_4B(KEY)
+define(`AESD_ROUND_4B', m4_assert_numargs(1)`
+    aesd           S0.16b,$1.16b
+    aesimc         S0.16b,S0.16b
+    aesd           S1.16b,$1.16b
+    aesimc         S1.16b,S1.16b
+    aesd           S2.16b,$1.16b
+    aesimc         S2.16b,S2.16b
+    aesd           S3.16b,$1.16b
+    aesimc         S3.16b,S3.16b
+')
+
+C AES last decryption round of 4-blocks
+C AESD_LAST_ROUND_4B(KEY)
+define(`AESD_LAST_ROUND_4B', m4_assert_numargs(2)`
+    aesd           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+    aesd           S1.16b,$1.16b
+    eor            S1.16b,S1.16b,$2.16b
+    aesd           S2.16b,$1.16b
+    eor            S2.16b,S2.16b,$2.16b
+    aesd           S3.16b,$1.16b
+    eor            S3.16b,S3.16b,$2.16b
+')
+
+C AES decryption round of 1-block
+C AESD_ROUND_1B(KEY)
+define(`AESD_ROUND_1B', m4_assert_numargs(1)`
+    aesd           S0.16b,$1.16b
+    aesimc         S0.16b,S0.16b
+')
+
+C AES last decryption round of 1-block
+C AESD_LAST_ROUND_1B(KEY)
+define(`AESD_LAST_ROUND_1B', m4_assert_numargs(2)`
+    aesd           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes128_decrypt(const struct aes128_ctx *ctx,
+C                size_t length, uint8_t *dst,
+C                const uint8_t *src)
+
+PROLOGUE(nettle_aes128_decrypt)
+    ands           x4,LENGTH,#-64
+    b.eq           L1B
+
+    mov            x5,KEYS
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+    ld1            {K8.4s,K9.4s,K10.4s},[x5]
+
+L4B_loop:
+    ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+    
+    AESD_ROUND_4B(K0)
+    AESD_ROUND_4B(K1)
+    AESD_ROUND_4B(K2)
+    AESD_ROUND_4B(K3)
+    AESD_ROUND_4B(K4)
+    AESD_ROUND_4B(K5)
+    AESD_ROUND_4B(K6)
+    AESD_ROUND_4B(K7)
+    AESD_ROUND_4B(K8)
+    AESD_LAST_ROUND_4B(K9,K10)
+
+    st1            {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+    subs           x4,x4,#64
+    b.ne           L4B_loop
+
+    and            LENGTH,LENGTH,#63
+
+L1B:
+    cbz            LENGTH,Ldone
+
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s},[KEYS]
+
+L1B_loop:
+    ld1            {S0.16b},[SRC],#16
+    
+    AESD_ROUND_1B(K0)
+    AESD_ROUND_1B(K1)
+    AESD_ROUND_1B(K2)
+    AESD_ROUND_1B(K3)
+    AESD_ROUND_1B(K4)
+    AESD_ROUND_1B(K5)
+    AESD_ROUND_1B(K6)
+    AESD_ROUND_1B(K7)
+    AESD_ROUND_1B(K8)
+    AESD_LAST_ROUND_1B(K9,K10)
+
+    st1            {S0.16b},[DST],#16
+
+    subs           LENGTH,LENGTH,#16
+    b.ne           L1B_loop
+
+Ldone:
+    ret
+EPILOGUE(nettle_aes128_decrypt)
diff --git a/arm64/crypto/aes128-encrypt.asm b/arm64/crypto/aes128-encrypt.asm
new file mode 100644 (file)
index 0000000..b3c3b9c
--- /dev/null
@@ -0,0 +1,163 @@
+C arm64/crypto/aes128-encrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes128-encrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+
+C AES encryption round of 4-blocks
+C AESE_ROUND_4B(KEY)
+define(`AESE_ROUND_4B', m4_assert_numargs(1)`
+    aese           S0.16b,$1.16b
+    aesmc          S0.16b,S0.16b
+    aese           S1.16b,$1.16b
+    aesmc          S1.16b,S1.16b
+    aese           S2.16b,$1.16b
+    aesmc          S2.16b,S2.16b
+    aese           S3.16b,$1.16b
+    aesmc          S3.16b,S3.16b
+')
+
+C AES last encryption round of 4-blocks
+C AESE_LAST_ROUND_4B(KEY)
+define(`AESE_LAST_ROUND_4B', m4_assert_numargs(2)`
+    aese           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+    aese           S1.16b,$1.16b
+    eor            S1.16b,S1.16b,$2.16b
+    aese           S2.16b,$1.16b
+    eor            S2.16b,S2.16b,$2.16b
+    aese           S3.16b,$1.16b
+    eor            S3.16b,S3.16b,$2.16b
+')
+
+C AES encryption round of 1-block
+C AESE_ROUND_1B(KEY)
+define(`AESE_ROUND_1B', m4_assert_numargs(1)`
+    aese           S0.16b,$1.16b
+    aesmc          S0.16b,S0.16b
+')
+
+C AES last encryption round of 1-block
+C AESE_LAST_ROUND_1B(KEY)
+define(`AESE_LAST_ROUND_1B', m4_assert_numargs(2)`
+    aese           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes128_encrypt(const struct aes128_ctx *ctx,
+C                size_t length, uint8_t *dst,
+C                const uint8_t *src)
+
+PROLOGUE(nettle_aes128_encrypt)
+    ands           x4,LENGTH,#-64
+    b.eq           L1B
+
+    mov            x5,KEYS
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+    ld1            {K8.4s,K9.4s,K10.4s},[x5]
+
+L4B_loop:
+    ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+    
+    AESE_ROUND_4B(K0)
+    AESE_ROUND_4B(K1)
+    AESE_ROUND_4B(K2)
+    AESE_ROUND_4B(K3)
+    AESE_ROUND_4B(K4)
+    AESE_ROUND_4B(K5)
+    AESE_ROUND_4B(K6)
+    AESE_ROUND_4B(K7)
+    AESE_ROUND_4B(K8)
+    AESE_LAST_ROUND_4B(K9,K10)
+
+    st1            {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+    subs           x4,x4,#64
+    b.ne           L4B_loop
+
+    and            LENGTH,LENGTH,#63
+
+L1B:
+    cbz            LENGTH,Ldone
+
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s},[KEYS]
+
+L1B_loop:
+    ld1            {S0.16b},[SRC],#16
+    
+    AESE_ROUND_1B(K0)
+    AESE_ROUND_1B(K1)
+    AESE_ROUND_1B(K2)
+    AESE_ROUND_1B(K3)
+    AESE_ROUND_1B(K4)
+    AESE_ROUND_1B(K5)
+    AESE_ROUND_1B(K6)
+    AESE_ROUND_1B(K7)
+    AESE_ROUND_1B(K8)
+    AESE_LAST_ROUND_1B(K9,K10)
+
+    st1            {S0.16b},[DST],#16
+
+    subs           LENGTH,LENGTH,#16
+    b.ne           L1B_loop
+
+Ldone:
+    ret
+EPILOGUE(nettle_aes128_encrypt)
diff --git a/arm64/crypto/aes192-decrypt.asm b/arm64/crypto/aes192-decrypt.asm
new file mode 100644 (file)
index 0000000..8d6bca7
--- /dev/null
@@ -0,0 +1,171 @@
+C arm64/crypto/aes192-decrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes192-decrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+
+C AES decryption round of 4-blocks
+C AESD_ROUND_4B(KEY)
+define(`AESD_ROUND_4B', m4_assert_numargs(1)`
+    aesd           S0.16b,$1.16b
+    aesimc         S0.16b,S0.16b
+    aesd           S1.16b,$1.16b
+    aesimc         S1.16b,S1.16b
+    aesd           S2.16b,$1.16b
+    aesimc         S2.16b,S2.16b
+    aesd           S3.16b,$1.16b
+    aesimc         S3.16b,S3.16b
+')
+
+C AES last decryption round of 4-blocks
+C AESD_LAST_ROUND_4B(KEY)
+define(`AESD_LAST_ROUND_4B', m4_assert_numargs(2)`
+    aesd           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+    aesd           S1.16b,$1.16b
+    eor            S1.16b,S1.16b,$2.16b
+    aesd           S2.16b,$1.16b
+    eor            S2.16b,S2.16b,$2.16b
+    aesd           S3.16b,$1.16b
+    eor            S3.16b,S3.16b,$2.16b
+')
+
+C AES decryption round of 1-block
+C AESD_ROUND_1B(KEY)
+define(`AESD_ROUND_1B', m4_assert_numargs(1)`
+    aesd           S0.16b,$1.16b
+    aesimc         S0.16b,S0.16b
+')
+
+C AES last decryption round of 1-block
+C AESD_LAST_ROUND_1B(KEY)
+define(`AESD_LAST_ROUND_1B', m4_assert_numargs(2)`
+    aesd           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes192_decrypt(const struct aes192_ctx *ctx,
+C                size_t length, uint8_t *dst,
+C                const uint8_t *src)
+
+PROLOGUE(nettle_aes192_decrypt)
+    ands           x4,LENGTH,#-64
+    b.eq           L1B
+
+    mov            x5,KEYS
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+    ld1            {K12.4s},[x5]
+
+L4B_loop:
+    ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+    
+    AESD_ROUND_4B(K0)
+    AESD_ROUND_4B(K1)
+    AESD_ROUND_4B(K2)
+    AESD_ROUND_4B(K3)
+    AESD_ROUND_4B(K4)
+    AESD_ROUND_4B(K5)
+    AESD_ROUND_4B(K6)
+    AESD_ROUND_4B(K7)
+    AESD_ROUND_4B(K8)
+    AESD_ROUND_4B(K9)
+    AESD_ROUND_4B(K10)
+    AESD_LAST_ROUND_4B(K11,K12)
+
+    st1            {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+    subs           x4,x4,#64
+    b.ne           L4B_loop
+
+    and            LENGTH,LENGTH,#63
+
+L1B:
+    cbz            LENGTH,Ldone
+
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s},[KEYS]
+
+L1B_loop:
+    ld1            {S0.16b},[SRC],#16
+    
+    AESD_ROUND_1B(K0)
+    AESD_ROUND_1B(K1)
+    AESD_ROUND_1B(K2)
+    AESD_ROUND_1B(K3)
+    AESD_ROUND_1B(K4)
+    AESD_ROUND_1B(K5)
+    AESD_ROUND_1B(K6)
+    AESD_ROUND_1B(K7)
+    AESD_ROUND_1B(K8)
+    AESD_ROUND_1B(K9)
+    AESD_ROUND_1B(K10)
+    AESD_LAST_ROUND_1B(K11,K12)
+
+    st1            {S0.16b},[DST],#16
+
+    subs           LENGTH,LENGTH,#16
+    b.ne           L1B_loop
+
+Ldone:
+    ret
+EPILOGUE(nettle_aes192_decrypt)
diff --git a/arm64/crypto/aes192-encrypt.asm b/arm64/crypto/aes192-encrypt.asm
new file mode 100644 (file)
index 0000000..5a71786
--- /dev/null
@@ -0,0 +1,171 @@
+C arm64/crypto/aes192-encrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes192-encrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+
+C AES encryption round of 4-blocks
+C AESE_ROUND_4B(KEY)
+define(`AESE_ROUND_4B', m4_assert_numargs(1)`
+    aese           S0.16b,$1.16b
+    aesmc          S0.16b,S0.16b
+    aese           S1.16b,$1.16b
+    aesmc          S1.16b,S1.16b
+    aese           S2.16b,$1.16b
+    aesmc          S2.16b,S2.16b
+    aese           S3.16b,$1.16b
+    aesmc          S3.16b,S3.16b
+')
+
+C AES last encryption round of 4-blocks
+C AESE_LAST_ROUND_4B(KEY)
+define(`AESE_LAST_ROUND_4B', m4_assert_numargs(2)`
+    aese           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+    aese           S1.16b,$1.16b
+    eor            S1.16b,S1.16b,$2.16b
+    aese           S2.16b,$1.16b
+    eor            S2.16b,S2.16b,$2.16b
+    aese           S3.16b,$1.16b
+    eor            S3.16b,S3.16b,$2.16b
+')
+
+C AES encryption round of 1-block
+C AESE_ROUND_1B(KEY)
+define(`AESE_ROUND_1B', m4_assert_numargs(1)`
+    aese           S0.16b,$1.16b
+    aesmc          S0.16b,S0.16b
+')
+
+C AES last encryption round of 1-block
+C AESE_LAST_ROUND_1B(KEY)
+define(`AESE_LAST_ROUND_1B', m4_assert_numargs(2)`
+    aese           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes192_encrypt(const struct aes192_ctx *ctx,
+C                size_t length, uint8_t *dst,
+C                const uint8_t *src)
+
+PROLOGUE(nettle_aes192_encrypt)
+    ands           x4,LENGTH,#-64
+    b.eq           L1B
+
+    mov            x5,KEYS
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+    ld1            {K12.4s},[x5]
+
+L4B_loop:
+    ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+    
+    AESE_ROUND_4B(K0)
+    AESE_ROUND_4B(K1)
+    AESE_ROUND_4B(K2)
+    AESE_ROUND_4B(K3)
+    AESE_ROUND_4B(K4)
+    AESE_ROUND_4B(K5)
+    AESE_ROUND_4B(K6)
+    AESE_ROUND_4B(K7)
+    AESE_ROUND_4B(K8)
+    AESE_ROUND_4B(K9)
+    AESE_ROUND_4B(K10)
+    AESE_LAST_ROUND_4B(K11,K12)
+
+    st1            {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+    subs           x4,x4,#64
+    b.ne           L4B_loop
+
+    and            LENGTH,LENGTH,#63
+
+L1B:
+    cbz            LENGTH,Ldone
+
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s},[KEYS]
+
+L1B_loop:
+    ld1            {S0.16b},[SRC],#16
+    
+    AESE_ROUND_1B(K0)
+    AESE_ROUND_1B(K1)
+    AESE_ROUND_1B(K2)
+    AESE_ROUND_1B(K3)
+    AESE_ROUND_1B(K4)
+    AESE_ROUND_1B(K5)
+    AESE_ROUND_1B(K6)
+    AESE_ROUND_1B(K7)
+    AESE_ROUND_1B(K8)
+    AESE_ROUND_1B(K9)
+    AESE_ROUND_1B(K10)
+    AESE_LAST_ROUND_1B(K11,K12)
+
+    st1            {S0.16b},[DST],#16
+
+    subs           LENGTH,LENGTH,#16
+    b.ne           L1B_loop
+
+Ldone:
+    ret
+EPILOGUE(nettle_aes192_encrypt)
diff --git a/arm64/crypto/aes256-decrypt.asm b/arm64/crypto/aes256-decrypt.asm
new file mode 100644 (file)
index 0000000..6191d7b
--- /dev/null
@@ -0,0 +1,177 @@
+C arm64/crypto/aes256-decrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes256-decrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+define(`K13', `v29')
+define(`K14', `v30')
+
+C AES decryption round of 4-blocks
+C AESD_ROUND_4B(KEY)
+define(`AESD_ROUND_4B', m4_assert_numargs(1)`
+    aesd           S0.16b,$1.16b
+    aesimc         S0.16b,S0.16b
+    aesd           S1.16b,$1.16b
+    aesimc         S1.16b,S1.16b
+    aesd           S2.16b,$1.16b
+    aesimc         S2.16b,S2.16b
+    aesd           S3.16b,$1.16b
+    aesimc         S3.16b,S3.16b
+')
+
+C AES last decryption round of 4-blocks
+C AESD_LAST_ROUND_4B(KEY)
+define(`AESD_LAST_ROUND_4B', m4_assert_numargs(2)`
+    aesd           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+    aesd           S1.16b,$1.16b
+    eor            S1.16b,S1.16b,$2.16b
+    aesd           S2.16b,$1.16b
+    eor            S2.16b,S2.16b,$2.16b
+    aesd           S3.16b,$1.16b
+    eor            S3.16b,S3.16b,$2.16b
+')
+
+C AES decryption round of 1-block
+C AESD_ROUND_1B(KEY)
+define(`AESD_ROUND_1B', m4_assert_numargs(1)`
+    aesd           S0.16b,$1.16b
+    aesimc         S0.16b,S0.16b
+')
+
+C AES last decryption round of 1-block
+C AESD_LAST_ROUND_1B(KEY)
+define(`AESD_LAST_ROUND_1B', m4_assert_numargs(2)`
+    aesd           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes256_decrypt(const struct aes256_ctx *ctx,
+C                size_t length, uint8_t *dst,
+C                const uint8_t *src)
+
+PROLOGUE(nettle_aes256_decrypt)
+    ands           x4,LENGTH,#-64
+    b.eq           L1B
+
+    mov            x5,KEYS
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+    ld1            {K12.4s,K13.4s,K14.4s},[x5]
+
+L4B_loop:
+    ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+    
+    AESD_ROUND_4B(K0)
+    AESD_ROUND_4B(K1)
+    AESD_ROUND_4B(K2)
+    AESD_ROUND_4B(K3)
+    AESD_ROUND_4B(K4)
+    AESD_ROUND_4B(K5)
+    AESD_ROUND_4B(K6)
+    AESD_ROUND_4B(K7)
+    AESD_ROUND_4B(K8)
+    AESD_ROUND_4B(K9)
+    AESD_ROUND_4B(K10)
+    AESD_ROUND_4B(K11)
+    AESD_ROUND_4B(K12)
+    AESD_LAST_ROUND_4B(K13,K14)
+
+    st1            {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+    subs           x4,x4,#64
+    b.ne           L4B_loop
+
+    and            LENGTH,LENGTH,#63
+
+L1B:
+    cbz            LENGTH,Ldone
+
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s,K13.4s,K14.4s},[KEYS]
+
+L1B_loop:
+    ld1            {S0.16b},[SRC],#16
+    
+    AESD_ROUND_1B(K0)
+    AESD_ROUND_1B(K1)
+    AESD_ROUND_1B(K2)
+    AESD_ROUND_1B(K3)
+    AESD_ROUND_1B(K4)
+    AESD_ROUND_1B(K5)
+    AESD_ROUND_1B(K6)
+    AESD_ROUND_1B(K7)
+    AESD_ROUND_1B(K8)
+    AESD_ROUND_1B(K9)
+    AESD_ROUND_1B(K10)
+    AESD_ROUND_1B(K11)
+    AESD_ROUND_1B(K12)
+    AESD_LAST_ROUND_1B(K13,K14)
+
+    st1            {S0.16b},[DST],#16
+
+    subs           LENGTH,LENGTH,#16
+    b.ne           L1B_loop
+
+Ldone:
+    ret
+EPILOGUE(nettle_aes256_decrypt)
diff --git a/arm64/crypto/aes256-encrypt.asm b/arm64/crypto/aes256-encrypt.asm
new file mode 100644 (file)
index 0000000..4a53c0c
--- /dev/null
@@ -0,0 +1,177 @@
+C arm64/crypto/aes256-encrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes256-encrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+define(`K13', `v29')
+define(`K14', `v30')
+
+C AES encryption round of 4-blocks
+C AESE_ROUND_4B(KEY)
+define(`AESE_ROUND_4B', m4_assert_numargs(1)`
+    aese           S0.16b,$1.16b
+    aesmc          S0.16b,S0.16b
+    aese           S1.16b,$1.16b
+    aesmc          S1.16b,S1.16b
+    aese           S2.16b,$1.16b
+    aesmc          S2.16b,S2.16b
+    aese           S3.16b,$1.16b
+    aesmc          S3.16b,S3.16b
+')
+
+C AES last encryption round of 4-blocks
+C AESE_LAST_ROUND_4B(KEY)
+define(`AESE_LAST_ROUND_4B', m4_assert_numargs(2)`
+    aese           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+    aese           S1.16b,$1.16b
+    eor            S1.16b,S1.16b,$2.16b
+    aese           S2.16b,$1.16b
+    eor            S2.16b,S2.16b,$2.16b
+    aese           S3.16b,$1.16b
+    eor            S3.16b,S3.16b,$2.16b
+')
+
+C AES encryption round of 1-block
+C AESE_ROUND_1B(KEY)
+define(`AESE_ROUND_1B', m4_assert_numargs(1)`
+    aese           S0.16b,$1.16b
+    aesmc          S0.16b,S0.16b
+')
+
+C AES last encryption round of 1-block
+C AESE_LAST_ROUND_1B(KEY)
+define(`AESE_LAST_ROUND_1B', m4_assert_numargs(2)`
+    aese           S0.16b,$1.16b
+    eor            S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes256_encrypt(const struct aes256_ctx *ctx,
+C                size_t length, uint8_t *dst,
+C                const uint8_t *src)
+
+PROLOGUE(nettle_aes256_encrypt)
+    ands           x4,LENGTH,#-64
+    b.eq           L1B
+
+    mov            x5,KEYS
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+    ld1            {K12.4s,K13.4s,K14.4s},[x5]
+
+L4B_loop:
+    ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+    
+    AESE_ROUND_4B(K0)
+    AESE_ROUND_4B(K1)
+    AESE_ROUND_4B(K2)
+    AESE_ROUND_4B(K3)
+    AESE_ROUND_4B(K4)
+    AESE_ROUND_4B(K5)
+    AESE_ROUND_4B(K6)
+    AESE_ROUND_4B(K7)
+    AESE_ROUND_4B(K8)
+    AESE_ROUND_4B(K9)
+    AESE_ROUND_4B(K10)
+    AESE_ROUND_4B(K11)
+    AESE_ROUND_4B(K12)
+    AESE_LAST_ROUND_4B(K13,K14)
+
+    st1            {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+    subs           x4,x4,#64
+    b.ne           L4B_loop
+
+    and            LENGTH,LENGTH,#63
+
+L1B:
+    cbz            LENGTH,Ldone
+
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s,K13.4s,K14.4s},[KEYS]
+
+L1B_loop:
+    ld1            {S0.16b},[SRC],#16
+    
+    AESE_ROUND_1B(K0)
+    AESE_ROUND_1B(K1)
+    AESE_ROUND_1B(K2)
+    AESE_ROUND_1B(K3)
+    AESE_ROUND_1B(K4)
+    AESE_ROUND_1B(K5)
+    AESE_ROUND_1B(K6)
+    AESE_ROUND_1B(K7)
+    AESE_ROUND_1B(K8)
+    AESE_ROUND_1B(K9)
+    AESE_ROUND_1B(K10)
+    AESE_ROUND_1B(K11)
+    AESE_ROUND_1B(K12)
+    AESE_LAST_ROUND_1B(K13,K14)
+
+    st1            {S0.16b},[DST],#16
+
+    subs           LENGTH,LENGTH,#16
+    b.ne           L1B_loop
+
+Ldone:
+    ret
+EPILOGUE(nettle_aes256_encrypt)
diff --git a/arm64/fat/aes128-decrypt-2.asm b/arm64/fat/aes128-decrypt-2.asm
new file mode 100644 (file)
index 0000000..ff33615
--- /dev/null
@@ -0,0 +1,36 @@
+C arm64/fat/aes128-decrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes128_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes128-decrypt.asm')
diff --git a/arm64/fat/aes128-encrypt-2.asm b/arm64/fat/aes128-encrypt-2.asm
new file mode 100644 (file)
index 0000000..68cce46
--- /dev/null
@@ -0,0 +1,36 @@
+C arm64/fat/aes128-encrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes128_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes128-encrypt.asm')
diff --git a/arm64/fat/aes192-decrypt-2.asm b/arm64/fat/aes192-decrypt-2.asm
new file mode 100644 (file)
index 0000000..0937a06
--- /dev/null
@@ -0,0 +1,36 @@
+C arm64/fat/aes192-decrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes192_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes192-decrypt.asm')
diff --git a/arm64/fat/aes192-encrypt-2.asm b/arm64/fat/aes192-encrypt-2.asm
new file mode 100644 (file)
index 0000000..cf93bc2
--- /dev/null
@@ -0,0 +1,36 @@
+C arm64/fat/aes192-encrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes192_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes192-encrypt.asm')
diff --git a/arm64/fat/aes256-decrypt-2.asm b/arm64/fat/aes256-decrypt-2.asm
new file mode 100644 (file)
index 0000000..43fc49a
--- /dev/null
@@ -0,0 +1,36 @@
+C arm64/fat/aes256-decrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes256-decrypt.asm')
diff --git a/arm64/fat/aes256-encrypt-2.asm b/arm64/fat/aes256-encrypt-2.asm
new file mode 100644 (file)
index 0000000..26f4ac6
--- /dev/null
@@ -0,0 +1,36 @@
+C arm64/fat/aes256-encrypt.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes256-encrypt.asm')
index 44add732537d3ac843e01ef35a5f0b945de0a69c..1c34a2d9b0b15f08b77aa4eaead18f3adc6ddec5 100644 (file)
@@ -495,7 +495,7 @@ if test "x$enable_assembler" = xyes ; then
         if test "x$enable_fat" = xyes ; then
           asm_path="arm64/fat $asm_path"
           OPT_NETTLE_SOURCES="fat-arm64.c $OPT_NETTLE_SOURCES"
-          FAT_TEST_LIST="none pmull sha1 sha2"
+          FAT_TEST_LIST="none aes pmull sha1 sha2"
         else
           if test "$enable_arm64_crypto" = yes ; then
             asm_path="arm64/crypto $asm_path"
index 9bcb208a138d6f276ba24a9733b0d3c6b0b5ce89..fcb2ece815e499855773bf0bd2726e4e37742be8 100644 (file)
@@ -50,6 +50,7 @@
 
 #include "nettle-types.h"
 
+#include "aes.h"
 #include "gcm.h"
 #include "gcm-internal.h"
 #include "fat-setup.h"
@@ -58,6 +59,9 @@
 #ifndef HWCAP_ASIMD
 #define HWCAP_ASIMD (1 << 1)
 #endif
+#ifndef HWCAP_AES
+#define HWCAP_AES (1 << 3)
+#endif
 #ifndef HWCAP_PMULL
 #define HWCAP_PMULL (1 << 4)
 #endif
@@ -70,6 +74,7 @@
 
 struct arm64_features
 {
+  int have_aes;
   int have_pmull;
   int have_sha1;
   int have_sha2;
@@ -82,6 +87,7 @@ static void
 get_arm64_features (struct arm64_features *features)
 {
   const char *s;
+  features->have_aes = 0;
   features->have_pmull = 0;
   features->have_sha1 = 0;
   features->have_sha2 = 0;
@@ -93,7 +99,9 @@ get_arm64_features (struct arm64_features *features)
        const char *sep = strchr (s, ',');
        size_t length = sep ? (size_t) (sep - s) : strlen(s);
 
-       if (MATCH (s, length, "pmull", 5))
+       if (MATCH (s, length, "aes", 3))
+         features->have_aes = 1;
+  else if (MATCH (s, length, "pmull", 5))
          features->have_pmull = 1;
   else if (MATCH (s, length, "sha1", 4))
          features->have_sha1 = 1;
@@ -107,6 +115,8 @@ get_arm64_features (struct arm64_features *features)
     {
 #if USE_GETAUXVAL
       unsigned long hwcap = getauxval(AT_HWCAP);
+      features->have_aes
+       = ((hwcap & (HWCAP_ASIMD | HWCAP_AES)) == (HWCAP_ASIMD | HWCAP_AES));
       features->have_pmull
        = ((hwcap & (HWCAP_ASIMD | HWCAP_PMULL)) == (HWCAP_ASIMD | HWCAP_PMULL));
       features->have_sha1
@@ -117,6 +127,27 @@ get_arm64_features (struct arm64_features *features)
     }
 }
 
+DECLARE_FAT_FUNC(nettle_aes128_encrypt, aes128_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, arm64)
+DECLARE_FAT_FUNC(nettle_aes128_decrypt, aes128_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, arm64)
+
+DECLARE_FAT_FUNC(nettle_aes192_encrypt, aes192_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, arm64)
+DECLARE_FAT_FUNC(nettle_aes192_decrypt, aes192_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, arm64)
+
+DECLARE_FAT_FUNC(nettle_aes256_encrypt, aes256_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, arm64)
+DECLARE_FAT_FUNC(nettle_aes256_decrypt, aes256_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, arm64)
+
 #if GCM_TABLE_BITS == 8
 DECLARE_FAT_FUNC(_nettle_gcm_init_key, gcm_init_key_func)
 DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, c)
@@ -145,11 +176,33 @@ fat_init (void)
 
   verbose = getenv (ENV_VERBOSE) != NULL;
   if (verbose)
-    fprintf (stderr, "libnettle: cpu features:%s%s%s\n",
+    fprintf (stderr, "libnettle: cpu features:%s%s%s%s\n",
+            features.have_aes ? " aes instructions" : "",
             features.have_pmull ? " polynomial multiply long instructions (PMULL/PMULL2)" : "",
        features.have_sha1 ? " sha1 instructions" : "",
        features.have_sha2 ? " sha2 instructions" : "");
 
+  if (features.have_aes)
+  {
+    if (verbose)
+      fprintf (stderr, "libnettle: enabling hardware accelerated AES encrypt/decrypt code.\n");
+    nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_arm64;
+    nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_arm64;
+    nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_arm64;
+    nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_arm64;
+    nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_arm64;
+    nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_arm64;
+  }
+  else
+  {
+    nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_c;
+    nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_c;
+    nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_c;
+    nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_c;
+    nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_c;
+    nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_c;
+  }
+  
   if (features.have_pmull)
     {
       if (verbose)
@@ -192,6 +245,33 @@ fat_init (void)
     }
 }
 
+DEFINE_FAT_FUNC(nettle_aes128_encrypt, void,
+ (const struct aes128_ctx *ctx, size_t length,
+  uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes128_decrypt, void,
+ (const struct aes128_ctx *ctx, size_t length,
+  uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_aes192_encrypt, void,
+ (const struct aes192_ctx *ctx, size_t length,
+  uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes192_decrypt, void,
+ (const struct aes192_ctx *ctx, size_t length,
+  uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_aes256_encrypt, void,
+ (const struct aes256_ctx *ctx, size_t length,
+  uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes256_decrypt, void,
+ (const struct aes256_ctx *ctx, size_t length,
+  uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
 #if GCM_TABLE_BITS == 8
 DEFINE_FAT_FUNC(_nettle_gcm_init_key, void,
                (union nettle_block16 *table),