]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
Implemented sse2-loop. Configured at compile time, and currently
authorNiels Möller <nisse@lysator.liu.se>
Mon, 3 Oct 2011 07:43:08 +0000 (09:43 +0200)
committerNiels Möller <nisse@lysator.liu.se>
Mon, 3 Oct 2011 07:43:08 +0000 (09:43 +0200)
disabled.

Rev: nettle/x86_64/memxor.asm:1.3

x86_64/memxor.asm

index d9b05b18e5c5aa50c891e29ef182e4139fc45ea3..7a5a23b688708103d730dba3c5cd78e4bcd5b6ca 100644 (file)
@@ -28,7 +28,9 @@ define(<TMP2>, <%r9>)
 define(<CNT>, <%rdi>)
 define(<S0>, <%r11>)
 define(<S1>, <%rdi>) C Overlaps with CNT 
-       
+
+define(<USE_SSE2>, <no>)
+
        .file "memxor.asm"
 
        .text
@@ -78,6 +80,10 @@ PROLOGUE(memxor3)
        jnz     .Lalign_loop
 
 .Laligned:
+ifelse(USE_SSE2, yes, <
+       cmp     $16, N
+       jnc     .Lsse2_case
+>)
        C Check for the case that AP and BP have the same alignment,
        C but different from DST.
        mov     AP, TMP
@@ -209,4 +215,40 @@ C  jz      .Ldone
 
 .Ldone:
        ret
+
+ifelse(USE_SSE2, yes, <
+
+.Lsse2_case:
+       lea     (DST, N), TMP
+       test    $8, TMP
+       jz      .Lsse2_next
+       sub     $8, N
+       mov     (AP, N), TMP
+       xor     (BP, N), TMP
+       mov     TMP, (DST, N)
+       jmp     .Lsse2_next
+
+       ALIGN(4)
+.Lsse2_loop:
+       movdqu  (AP, N), %xmm0
+       movdqu  (BP, N), %xmm1
+       pxor    %xmm0, %xmm1
+       movdqa  %xmm1, (DST, N)
+.Lsse2_next:
+       sub     $16, N
+       ja      .Lsse2_loop
+       
+       C FIXME: See if we can do a full word first, before the
+       C byte-wise final loop.
+       jnz     .Lfinal         
+
+       C Final operation is aligned
+       movdqu  (AP), %xmm0
+       movdqu  (BP), %xmm1
+       pxor    %xmm0, %xmm1
+       movdqa  %xmm1, (DST)
+       ret
+>)     
+       
+
 EPILOGUE(memxor3)