From: Niels Möller Date: Mon, 3 Oct 2011 07:43:08 +0000 (+0200) Subject: Implemented sse2-loop. Configured at compile time, and currently X-Git-Tag: converted-master-branch-to-git~47 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dd8652d4b27173f2adefa77922b5fd3e18ddc8ef;p=thirdparty%2Fnettle.git Implemented sse2-loop. Configured at compile time, and currently disabled. Rev: nettle/x86_64/memxor.asm:1.3 --- diff --git a/x86_64/memxor.asm b/x86_64/memxor.asm index d9b05b18..7a5a23b6 100644 --- a/x86_64/memxor.asm +++ b/x86_64/memxor.asm @@ -28,7 +28,9 @@ define(, <%r9>) define(, <%rdi>) define(, <%r11>) define(, <%rdi>) C Overlaps with CNT - + +define(, ) + .file "memxor.asm" .text @@ -78,6 +80,10 @@ PROLOGUE(memxor3) jnz .Lalign_loop .Laligned: +ifelse(USE_SSE2, yes, < + cmp $16, N + jnc .Lsse2_case +>) C Check for the case that AP and BP have the same alignment, C but different from DST. mov AP, TMP @@ -209,4 +215,40 @@ C jz .Ldone .Ldone: ret + +ifelse(USE_SSE2, yes, < + +.Lsse2_case: + lea (DST, N), TMP + test $8, TMP + jz .Lsse2_next + sub $8, N + mov (AP, N), TMP + xor (BP, N), TMP + mov TMP, (DST, N) + jmp .Lsse2_next + + ALIGN(4) +.Lsse2_loop: + movdqu (AP, N), %xmm0 + movdqu (BP, N), %xmm1 + pxor %xmm0, %xmm1 + movdqa %xmm1, (DST, N) +.Lsse2_next: + sub $16, N + ja .Lsse2_loop + + C FIXME: See if we can do a full word first, before the + C byte-wise final loop. + jnz .Lfinal + + C Final operation is aligned + movdqu (AP), %xmm0 + movdqu (BP), %xmm1 + pxor %xmm0, %xmm1 + movdqa %xmm1, (DST) + ret +>) + + EPILOGUE(memxor3)