disabled.
Rev: nettle/x86_64/memxor.asm:1.3
define(<CNT>, <%rdi>)
define(<S0>, <%r11>)
define(<S1>, <%rdi>) C Overlaps with CNT
-
+
+define(<USE_SSE2>, <no>)
+
.file "memxor.asm"
.text
jnz .Lalign_loop
.Laligned:
+ifelse(USE_SSE2, yes, <
+ cmp $16, N
+ jnc .Lsse2_case
+>)
C Check for the case that AP and BP have the same alignment,
C but different from DST.
mov AP, TMP
.Ldone:
ret
+
+ifelse(USE_SSE2, yes, <
+
+.Lsse2_case:
+ lea (DST, N), TMP
+ test $8, TMP
+ jz .Lsse2_next
+ sub $8, N
+ mov (AP, N), TMP
+ xor (BP, N), TMP
+ mov TMP, (DST, N)
+ jmp .Lsse2_next
+
+ ALIGN(4)
+.Lsse2_loop:
+ movdqu (AP, N), %xmm0
+ movdqu (BP, N), %xmm1
+ pxor %xmm0, %xmm1
+ movdqa %xmm1, (DST, N)
+.Lsse2_next:
+ sub $16, N
+ ja .Lsse2_loop
+
+ C FIXME: See if we can do a full word first, before the
+ C byte-wise final loop.
+ jnz .Lfinal
+
+ C Final operation is aligned
+ movdqu (AP), %xmm0
+ movdqu (BP), %xmm1
+ pxor %xmm0, %xmm1
+ movdqa %xmm1, (DST)
+ ret
+>)
+
+
EPILOGUE(memxor3)