From: Niels Möller Date: Wed, 6 Feb 2013 12:13:29 +0000 (+0100) Subject: ARM: Optmized aligned case of memxor. X-Git-Tag: nettle_2.7_release_20130424~117 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=537f64ba9926a7743c8a2353e0fb533718f90910;p=thirdparty%2Fnettle.git ARM: Optmized aligned case of memxor. --- diff --git a/ChangeLog b/ChangeLog index e05dedbf..0d881de6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2013-02-06 Niels Möller + * armv7/memxor.asm (memxor): Optimized aligned case, now runs at + 0.75 cycles/byte. + * armv7/README: New file. * armv7/machine.m4: New (empty) file. * armv7/memxor.asm: Initial assembly implementation. diff --git a/armv7/memxor.asm b/armv7/memxor.asm index 78762d03..80e725e7 100644 --- a/armv7/memxor.asm +++ b/armv7/memxor.asm @@ -131,14 +131,36 @@ PROLOGUE(memxor) b .Lmemxor_bytes .Lmemxor_same: + tst N, #4 + it ne + subne N, #4 + bne .Lmemxor_same_loop + ldr r3, [SRC], #+4 ldr r4, [DST] eor r3, r4 str r3, [DST], #+4 - - subs N, #4 - bcs .Lmemxor_same - adds N, #4 + + subs N, #8 + bcc .Lmemxor_same_end + +.Lmemxor_same_loop: + C 6 cycles per iteration, 0.75 cycles/byte + ldr r4, [SRC, #+4] + ldr r3, [SRC], #+8 + ldr r6, [DST, #+4] + ldr r5, [DST] + + eor r4, r6 + eor r3, r5 + subs N, #8 + + str r4, [DST, #+4] + str r3, [DST], #+8 + bcs .Lmemxor_same_loop + +.Lmemxor_same_end: + adds N, #8 beq .Lmemxor_done b .Lmemxor_bytes EPILOGUE(memxor)