From: Mamone Tarsha Date: Sun, 8 Aug 2021 12:09:59 +0000 (+0300) Subject: [AArch64] Load AES keys at function prologue X-Git-Tag: nettle_3.8_release_20220602~109^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5f7740a3872e076688ec4767c5942d8b49c6efa7;p=thirdparty%2Fnettle.git [AArch64] Load AES keys at function prologue --- diff --git a/arm64/crypto/aes128-decrypt.asm b/arm64/crypto/aes128-decrypt.asm index cd970471..aadfc480 100644 --- a/arm64/crypto/aes128-decrypt.asm +++ b/arm64/crypto/aes128-decrypt.asm @@ -63,14 +63,13 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes128_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -95,10 +94,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes128-encrypt.asm b/arm64/crypto/aes128-encrypt.asm index 5db609e5..3d9b9d90 100644 --- a/arm64/crypto/aes128-encrypt.asm +++ b/arm64/crypto/aes128-encrypt.asm @@ -63,14 +63,13 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes128_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -95,10 +94,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes192-decrypt.asm b/arm64/crypto/aes192-decrypt.asm index 87a4ca73..769edd15 100644 --- a/arm64/crypto/aes192-decrypt.asm +++ b/arm64/crypto/aes192-decrypt.asm @@ -65,15 +65,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes192_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -100,11 +99,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes192-encrypt.asm b/arm64/crypto/aes192-encrypt.asm index ad6be9b5..5e57d4a4 100644 --- a/arm64/crypto/aes192-encrypt.asm +++ b/arm64/crypto/aes192-encrypt.asm @@ -65,15 +65,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes192_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -100,11 +99,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes256-decrypt.asm b/arm64/crypto/aes256-decrypt.asm index 758bb143..d787a74a 100644 --- a/arm64/crypto/aes256-decrypt.asm +++ b/arm64/crypto/aes256-decrypt.asm @@ -67,15 +67,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes256_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s,K13.4s,K14.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s,K13.4s,K14.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -104,11 +103,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s,K13.4s,K14.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16 diff --git a/arm64/crypto/aes256-encrypt.asm b/arm64/crypto/aes256-encrypt.asm index c02cb679..a6321b82 100644 --- a/arm64/crypto/aes256-encrypt.asm +++ b/arm64/crypto/aes256-encrypt.asm @@ -67,15 +67,14 @@ C size_t length, uint8_t *dst, C const uint8_t *src) PROLOGUE(nettle_aes256_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s,K13.4s,K14.4s},[KEYS] + ands x4,LENGTH,#-64 b.eq L1B - mov x5,KEYS - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64 - ld1 {K12.4s,K13.4s,K14.4s},[x5] - L4B_loop: ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 @@ -104,11 +103,6 @@ L4B_loop: L1B: cbz LENGTH,Ldone - ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 - ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 - ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 - ld1 {K12.4s,K13.4s,K14.4s},[KEYS] - L1B_loop: ld1 {S0.16b},[SRC],#16