From 5f7740a3872e076688ec4767c5942d8b49c6efa7 Mon Sep 17 00:00:00 2001 From: Mamone Tarsha Date: Sun, 8 Aug 2021 15:09:59 +0300 Subject: [PATCH] [AArch64] Load AES keys at function prologue --- arm64/crypto/aes128-decrypt.asm | 13 ++++--------- arm64/crypto/aes128-encrypt.asm | 13 ++++--------- arm64/crypto/aes192-decrypt.asm | 16 +++++----------- arm64/crypto/aes192-encrypt.asm | 16 +++++----------- arm64/crypto/aes256-decrypt.asm | 16 +++++----------- arm64/crypto/aes256-encrypt.asm | 16 +++++----------- 6 files changed, 28 insertions(+), 62 deletions(-) diff --git a/arm64/crypto/aes128-decrypt.asm b/arm64/crypto/aes128-decrypt.asm index cd970471..aadfc480 100644 --- a/arm64/crypto/aes128-decrypt.asm +++ b/arm64/crypto/aes128-decrypt.asm @@ -63,14 +63,13 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes128_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -95,10 +94,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes128-encrypt.asm b/arm64/crypto/aes128-encrypt.asm index 5db609e5..3d9b9d90 100644 --- a/arm64/crypto/aes128-encrypt.asm +++ b/arm64/crypto/aes128-encrypt.asm @@ -63,14 +63,13 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes128_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -95,10 +94,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes192-decrypt.asm b/arm64/crypto/aes192-decrypt.asm index 87a4ca73..769edd15 100644 --- a/arm64/crypto/aes192-decrypt.asm +++ b/arm64/crypto/aes192-decrypt.asm @@ -65,15 +65,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes192_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -100,11 +99,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes192-encrypt.asm b/arm64/crypto/aes192-encrypt.asm index ad6be9b5..5e57d4a4 100644 --- a/arm64/crypto/aes192-encrypt.asm +++ b/arm64/crypto/aes192-encrypt.asm @@ -65,15 +65,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes192_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -100,11 +99,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes256-decrypt.asm b/arm64/crypto/aes256-decrypt.asm index 758bb143..d787a74a 100644 --- a/arm64/crypto/aes256-decrypt.asm +++ b/arm64/crypto/aes256-decrypt.asm @@ -67,15 +67,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes256_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s,K13.4s,K14.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s,K13.4s,K14.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -104,11 +103,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s,K13.4s,K14.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes256-encrypt.asm b/arm64/crypto/aes256-encrypt.asm index c02cb679..a6321b82 100644 --- a/arm64/crypto/aes256-encrypt.asm +++ b/arm64/crypto/aes256-encrypt.asm @@ -67,15 +67,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes256_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s,K13.4s,K14.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s,K13.4s,K14.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -104,11 +103,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s,K13.4s,K14.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 -- 2.47.2