]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
ARM: Optmized aligned case of memxor3.
authorNiels Möller <nisse@lysator.liu.se>
Wed, 6 Feb 2013 13:29:08 +0000 (14:29 +0100)
committerNiels Möller <nisse@lysator.liu.se>
Wed, 6 Feb 2013 13:29:08 +0000 (14:29 +0100)
ChangeLog
armv7/memxor.asm

index 0d881de6bd950fd121271c6339e1a4a32f752796..90f03b7d901487dd9c0947d4347a501fa845dd1c 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,7 @@
 2013-02-06  Niels Möller  <nisse@lysator.liu.se>
 
-       * armv7/memxor.asm (memxor): Optimized aligned case, now runs at
-       0.75 cycles/byte.
+       * armv7/memxor.asm (memxor, memxor3): Optimized aligned case, now
+       runs at 0.75 cycles/byte.
 
        * armv7/README: New file.
        * armv7/machine.m4: New (empty) file.
index fe4f9e8a09a20b2a4741cbfb25be26fa282f4fd0..52d4bf46c71f3f08af35d7679599a17bcaa2921d 100644 (file)
@@ -295,16 +295,40 @@ PROLOGUE(memxor3)
        bne     .Lmemxor3_au ;
 
        C a, b and dst all have the same alignment.
+       sub     AP, #4
+       sub     BP, #4
+       sub     DST, #4
+       tst     N, #4
+       it      ne
+       subne   N, #4
+       bne     .Lmemxor3_aligned_word_loop
 
-.Lmemxor3_aligned_word_loop:
-       ldr     r4, [AP, #-4]!
-       ldr     r5, [BP, #-4]!
+       ldr     r4, [AP], #-4
+       ldr     r5, [BP], #-4
        eor     r4, r5
-       str     r4, [DST, #-4]!
-       subs    N, #4
+       str     r4, [DST], #-4
+       subs    N, #8
+       bcc     .Lmemxor3_aligned_word_end
+       
+.Lmemxor3_aligned_word_loop:
+       ldr     r4, [AP, #-4]
+       ldr     r5, [AP], #-8
+       ldr     r6, [BP, #-4]
+       ldr     r7, [BP], #-8
+
+       eor     r4, r6
+       eor     r5, r7
+       subs    N, #8
+       str     r4, [DST, #-4]
+       str     r5, [DST], #-8
+
        bcs     .Lmemxor3_aligned_word_loop
-       adds    N, #4
+.Lmemxor3_aligned_word_end:
+       adds    N, #8
        beq     .Lmemxor3_done
+       add     AP, #4
+       add     BP, #4
+       add     DST, #4
        b       .Lmemxor3_bytes
 
 .Lmemxor3_uu: