]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
arm/v6: Alignment tweaks to aes code.
authorNiels Möller <nisse@lysator.liu.se>
Thu, 16 May 2013 08:32:45 +0000 (10:32 +0200)
committerNiels Möller <nisse@lysator.liu.se>
Thu, 16 May 2013 08:42:13 +0000 (10:42 +0200)
ChangeLog
arm/v6/aes-decrypt-internal.asm
arm/v6/aes-encrypt-internal.asm

index 6bcf524a8aa601cb1db0f0aaa4bd136c09153e85..38c26f6db3608d39bfe066b11092156f8cd9bf78 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2013-05-16  Niels Möller  <nisse@lysator.liu.se>
+
+       * arm/v6/aes-encrypt-internal.asm: Use ALIGN macro. Use 16-byte
+       alignment for loops.
+       * arm/v6/aes-decrypt-internal.asm: Likewise. Also added a nop
+       which mysteriously improves benchmark performance on Cortex-A9.
+
 2013-05-15  Niels Möller  <nisse@lysator.liu.se>
 
        * configure.ac (asm_path): Handle armv6 and armv7 differently from
index 22df3aa8d4ddf2cf9bc0e56617eb5a48c5e7165d..651636cc66d5b01d8300d2fe29f2b057e104f1f4 100644 (file)
@@ -50,13 +50,16 @@ define(<X3>, <r14>) C lr
        C              size_t length, uint8_t *dst,
        C              uint8_t *src)
        .text
-       .align 2
+       ALIGN(4)
 PROLOGUE(_nettle_aes_decrypt)
        teq     LENGTH, #0
        beq     .Lend
        ldr     SRC, [sp]
 
        push    {r4,r5,r6,r7,r8,r10,r11,lr}
+       nop     C For some mysterious reason, taking out this nop
+               C slows this function down on Cortex-A9.
+       ALIGN(16)
 .Lblock_loop:
        mov     KEY, CTX
        AES_LOAD(SRC,KEY,W0)
@@ -69,7 +72,7 @@ PROLOGUE(_nettle_aes_decrypt)
        add     TABLE, TABLE, #AES_TABLE0
 
        b       .Lentry
-       .align 2
+       ALIGN(16)
 .Lround_loop:
        C       Transform X -> W
        AES_DECRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY)
index 81eb6d04bc6e36b48ece516e93517462194594b5..15cf1bb07e2644d150c366de3c061d5a7613e481 100644 (file)
@@ -52,13 +52,14 @@ define(<X3>, <r14>) C lr
        C              size_t length, uint8_t *dst,
        C              uint8_t *src)
        .text
-       .align 2
+       ALIGN(4)
 PROLOGUE(_nettle_aes_encrypt)
        teq     LENGTH, #0
        beq     .Lend
        ldr     SRC, [sp]
 
        push    {r4,r5,r6,r7,r8,r10,r11,lr}
+       ALIGN(16)
 .Lblock_loop:
        mov     KEY, CTX
        AES_LOAD(SRC,KEY,W0)
@@ -71,7 +72,7 @@ PROLOGUE(_nettle_aes_encrypt)
        add     TABLE, TABLE, #AES_TABLE0
 
        b       .Lentry
-       .align 2
+       ALIGN(16)
 .Lround_loop:
        C       Transform X -> W
        AES_ENCRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY)