]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
[AArch64] Load AES keys at function prologue
authorMamone Tarsha <maamoun.tk@googlemail.com>
Sun, 8 Aug 2021 12:09:59 +0000 (15:09 +0300)
committerMamone Tarsha <maamoun.tk@googlemail.com>
Sun, 8 Aug 2021 12:09:59 +0000 (15:09 +0300)
arm64/crypto/aes128-decrypt.asm
arm64/crypto/aes128-encrypt.asm
arm64/crypto/aes192-decrypt.asm
arm64/crypto/aes192-encrypt.asm
arm64/crypto/aes256-decrypt.asm
arm64/crypto/aes256-encrypt.asm

index cd970471a992d610bc40e6de5f855c72d3d6578a..aadfc480509b6aadec171808e5317c2ffbe7428e 100644 (file)
@@ -63,14 +63,13 @@ C                size_t length, uint8_t *dst,
 C                const uint8_t *src)
 
 PROLOGUE(nettle_aes128_decrypt)
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s},[KEYS]
+    
     ands           x4,LENGTH,#-64
     b.eq           L1B
 
-    mov            x5,KEYS
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
-    ld1            {K8.4s,K9.4s,K10.4s},[x5]
-
 L4B_loop:
     ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
     
@@ -95,10 +94,6 @@ L4B_loop:
 L1B:
     cbz            LENGTH,Ldone
 
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
-    ld1            {K8.4s,K9.4s,K10.4s},[KEYS]
-
 L1B_loop:
     ld1            {S0.16b},[SRC],#16
     
index 5db609e5a93a880d281dd19836dc24727330bd6d..3d9b9d902a71e58272d3e6de6a9e3b1a2ae4efbd 100644 (file)
@@ -63,14 +63,13 @@ C                size_t length, uint8_t *dst,
 C                const uint8_t *src)
 
 PROLOGUE(nettle_aes128_encrypt)
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s},[KEYS]
+    
     ands           x4,LENGTH,#-64
     b.eq           L1B
 
-    mov            x5,KEYS
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
-    ld1            {K8.4s,K9.4s,K10.4s},[x5]
-
 L4B_loop:
     ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
     
@@ -95,10 +94,6 @@ L4B_loop:
 L1B:
     cbz            LENGTH,Ldone
 
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
-    ld1            {K8.4s,K9.4s,K10.4s},[KEYS]
-
 L1B_loop:
     ld1            {S0.16b},[SRC],#16
     
index 87a4ca73a2c1c36efad994efe69d7cacf2a8e170..769edd15dbe65a363d9eba0e64aef659b57f1de0 100644 (file)
@@ -65,15 +65,14 @@ C                size_t length, uint8_t *dst,
 C                const uint8_t *src)
 
 PROLOGUE(nettle_aes192_decrypt)
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s},[KEYS]
+    
     ands           x4,LENGTH,#-64
     b.eq           L1B
 
-    mov            x5,KEYS
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
-    ld1            {K12.4s},[x5]
-
 L4B_loop:
     ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
     
@@ -100,11 +99,6 @@ L4B_loop:
 L1B:
     cbz            LENGTH,Ldone
 
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
-    ld1            {K12.4s},[KEYS]
-
 L1B_loop:
     ld1            {S0.16b},[SRC],#16
     
index ad6be9b546c6e7a7fb3d006de7e3ce10b3aa1e2a..5e57d4a487fac6fe34c110eceefd5d9197e20cd6 100644 (file)
@@ -65,15 +65,14 @@ C                size_t length, uint8_t *dst,
 C                const uint8_t *src)
 
 PROLOGUE(nettle_aes192_encrypt)
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s},[KEYS]
+    
     ands           x4,LENGTH,#-64
     b.eq           L1B
 
-    mov            x5,KEYS
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
-    ld1            {K12.4s},[x5]
-
 L4B_loop:
     ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
     
@@ -100,11 +99,6 @@ L4B_loop:
 L1B:
     cbz            LENGTH,Ldone
 
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
-    ld1            {K12.4s},[KEYS]
-
 L1B_loop:
     ld1            {S0.16b},[SRC],#16
     
index 758bb143b40ed7f0ce3b8663185170051e59daf5..d787a74a40e7f2092612d6c63fc64d50d1e7c4f0 100644 (file)
@@ -67,15 +67,14 @@ C                size_t length, uint8_t *dst,
 C                const uint8_t *src)
 
 PROLOGUE(nettle_aes256_decrypt)
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s,K13.4s,K14.4s},[KEYS]
+    
     ands           x4,LENGTH,#-64
     b.eq           L1B
 
-    mov            x5,KEYS
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
-    ld1            {K12.4s,K13.4s,K14.4s},[x5]
-
 L4B_loop:
     ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
     
@@ -104,11 +103,6 @@ L4B_loop:
 L1B:
     cbz            LENGTH,Ldone
 
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
-    ld1            {K12.4s,K13.4s,K14.4s},[KEYS]
-
 L1B_loop:
     ld1            {S0.16b},[SRC],#16
     
index c02cb6792ed6e531f926586d82eac3a1296d6869..a6321b8265f7b35e86c58c0465729c024280a838 100644 (file)
@@ -67,15 +67,14 @@ C                size_t length, uint8_t *dst,
 C                const uint8_t *src)
 
 PROLOGUE(nettle_aes256_encrypt)
+    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+    ld1            {K12.4s,K13.4s,K14.4s},[KEYS]
+    
     ands           x4,LENGTH,#-64
     b.eq           L1B
 
-    mov            x5,KEYS
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
-    ld1            {K12.4s,K13.4s,K14.4s},[x5]
-
 L4B_loop:
     ld1            {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
     
@@ -104,11 +103,6 @@ L4B_loop:
 L1B:
     cbz            LENGTH,Ldone
 
-    ld1            {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
-    ld1            {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
-    ld1            {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
-    ld1            {K12.4s,K13.4s,K14.4s},[KEYS]
-
 L1B_loop:
     ld1            {S0.16b},[SRC],#16