]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
ARM: Optmized aligned case of memxor.
authorNiels Möller <nisse@lysator.liu.se>
Wed, 6 Feb 2013 12:13:29 +0000 (13:13 +0100)
committerNiels Möller <nisse@lysator.liu.se>
Wed, 6 Feb 2013 12:13:29 +0000 (13:13 +0100)
ChangeLog
armv7/memxor.asm

index e05dedbf485777f55932048b7de80a3c5b4ba4fc..0d881de6bd950fd121271c6339e1a4a32f752796 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2013-02-06  Niels Möller  <nisse@lysator.liu.se>
 
+       * armv7/memxor.asm (memxor): Optimized aligned case, now runs at
+       0.75 cycles/byte.
+
        * armv7/README: New file.
        * armv7/machine.m4: New (empty) file.
        * armv7/memxor.asm: Initial assembly implementation.
index 78762d03d1ca27028f76e8e2b73e1fe7797d9c80..80e725e7d7c3174a48e6e5343d9725949eb348f5 100644 (file)
@@ -131,14 +131,36 @@ PROLOGUE(memxor)
        b       .Lmemxor_bytes
 
 .Lmemxor_same:
+       tst     N, #4
+       it      ne
+       subne   N, #4
+       bne     .Lmemxor_same_loop
+
        ldr     r3, [SRC], #+4
        ldr     r4, [DST]
        eor     r3, r4
        str     r3, [DST], #+4
-
-       subs    N, #4
-       bcs     .Lmemxor_same
-       adds    N, #4
+       
+       subs    N, #8
+       bcc     .Lmemxor_same_end
+
+.Lmemxor_same_loop:
+       C 6 cycles per iteration, 0.75 cycles/byte
+       ldr     r4, [SRC, #+4]
+       ldr     r3, [SRC], #+8
+       ldr     r6, [DST, #+4]
+       ldr     r5, [DST]
+       
+       eor     r4, r6
+       eor     r3, r5
+       subs    N, #8
+       
+       str     r4, [DST, #+4]
+       str     r3, [DST], #+8
+       bcs     .Lmemxor_same_loop
+       
+.Lmemxor_same_end:
+       adds    N, #8
        beq     .Lmemxor_done
        b       .Lmemxor_bytes
 EPILOGUE(memxor)