From: Niels Möller Date: Wed, 6 Feb 2013 13:29:08 +0000 (+0100) Subject: ARM: Optmized aligned case of memxor3. X-Git-Tag: nettle_2.7_release_20130424~115 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5b6cf250d8c3efdd3d3b9397aeda7c45042a2d3a;p=thirdparty%2Fnettle.git ARM: Optmized aligned case of memxor3. --- diff --git a/ChangeLog b/ChangeLog index 0d881de6..90f03b7d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ 2013-02-06 Niels Möller - * armv7/memxor.asm (memxor): Optimized aligned case, now runs at - 0.75 cycles/byte. + * armv7/memxor.asm (memxor, memxor3): Optimized aligned case, now + runs at 0.75 cycles/byte. * armv7/README: New file. * armv7/machine.m4: New (empty) file. diff --git a/armv7/memxor.asm b/armv7/memxor.asm index fe4f9e8a..52d4bf46 100644 --- a/armv7/memxor.asm +++ b/armv7/memxor.asm @@ -295,16 +295,40 @@ PROLOGUE(memxor3) bne .Lmemxor3_au ; C a, b and dst all have the same alignment. + sub AP, #4 + sub BP, #4 + sub DST, #4 + tst N, #4 + it ne + subne N, #4 + bne .Lmemxor3_aligned_word_loop -.Lmemxor3_aligned_word_loop: - ldr r4, [AP, #-4]! - ldr r5, [BP, #-4]! + ldr r4, [AP], #-4 + ldr r5, [BP], #-4 eor r4, r5 - str r4, [DST, #-4]! - subs N, #4 + str r4, [DST], #-4 + subs N, #8 + bcc .Lmemxor3_aligned_word_end + +.Lmemxor3_aligned_word_loop: + ldr r4, [AP, #-4] + ldr r5, [AP], #-8 + ldr r6, [BP, #-4] + ldr r7, [BP], #-8 + + eor r4, r6 + eor r5, r7 + subs N, #8 + str r4, [DST, #-4] + str r5, [DST], #-8 + bcs .Lmemxor3_aligned_word_loop - adds N, #4 +.Lmemxor3_aligned_word_end: + adds N, #8 beq .Lmemxor3_done + add AP, #4 + add BP, #4 + add DST, #4 b .Lmemxor3_bytes .Lmemxor3_uu: