2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 # include "asm-syntax.h"
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
38 # define STRCMP __wcscmp_sse2
41 # define ENTRANCE PUSH(%esi); PUSH(%edi)
42 # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
50 * This implementation uses SSE to compare up to 16 bytes at a time.
85 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
88 and $63, %eax /* esi alignment in cache line */
89 and $63, %edx /* edi alignment in cache line */
135 movdqu 16(%edi), %xmm1
136 movdqu 16(%esi), %xmm2
137 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
138 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
139 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
141 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
142 jnz L(less4_double_words_16)
144 movdqu 32(%edi), %xmm1
145 movdqu 32(%esi), %xmm2
146 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
147 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
148 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
150 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
151 jnz L(less4_double_words_32)
153 movdqu 48(%edi), %xmm1
154 movdqu 48(%esi), %xmm2
155 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
156 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
157 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
159 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
160 jnz L(less4_double_words_48)
164 jmp L(continue_48_48)
202 movdqu 16(%edi), %xmm1
203 movdqu 16(%esi), %xmm2
204 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
205 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
206 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
208 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
209 jnz L(less4_double_words_16)
211 movdqu 32(%edi), %xmm1
212 movdqu 32(%esi), %xmm2
213 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
214 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
215 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
217 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
218 jnz L(less4_double_words_32)
261 pcmpeqd (%edi), %xmm0
265 jnz L(less4_double_words1)
282 movdqu 16(%esi), %xmm2
283 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
284 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
285 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
287 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
288 jnz L(less4_double_words_16)
290 movdqu 32(%esi), %xmm2
291 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
292 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
293 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
295 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
296 jnz L(less4_double_words_32)
298 movdqu 48(%esi), %xmm2
299 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
300 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
301 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
303 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
304 jnz L(less4_double_words_48)
308 jmp L(continue_00_48)
371 movdqu 32(%edi), %xmm1
372 movdqu 32(%esi), %xmm2
373 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
374 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
375 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
377 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
378 jnz L(less4_double_words_32)
380 movdqu 48(%edi), %xmm1
381 movdqu 48(%esi), %xmm2
382 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
383 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
384 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
386 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
387 jnz L(less4_double_words_48)
391 jmp L(continue_32_48)
430 movdqu 16(%edi), %xmm1
431 movdqu 16(%esi), %xmm2
432 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
433 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
434 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
436 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
437 jnz L(less4_double_words_16)
463 movdqu 48(%edi), %xmm1
464 movdqu 48(%esi), %xmm2
465 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
466 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
467 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
469 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
470 jnz L(less4_double_words_48)
474 jmp L(continue_16_48)
479 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
480 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
481 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
483 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
484 jnz L(less4_double_words)
486 movdqa 16(%edi), %xmm3
487 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
488 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
489 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
491 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
492 jnz L(less4_double_words_16)
494 movdqa 32(%edi), %xmm5
495 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
496 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
497 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
499 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
500 jnz L(less4_double_words_32)
502 movdqa 48(%edi), %xmm1
503 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
504 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
505 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
507 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
508 jnz L(less4_double_words_48)
512 jmp L(continue_00_00)
517 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
518 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
519 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
521 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
522 jnz L(less4_double_words)
526 jmp L(continue_00_48)
531 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
532 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
533 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
535 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
536 jnz L(less4_double_words)
538 movdqu 16(%esi), %xmm2
539 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
540 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
541 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
543 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
544 jnz L(less4_double_words_16)
548 jmp L(continue_00_48)
553 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
554 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
555 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
557 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
558 jnz L(less4_double_words)
560 movdqu 16(%esi), %xmm2
561 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
562 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
563 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
565 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
566 jnz L(less4_double_words_16)
568 movdqu 32(%esi), %xmm2
569 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
570 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
571 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
573 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
574 jnz L(less4_double_words_32)
578 jmp L(continue_00_48)
582 pcmpeqd (%esi), %xmm0
586 jnz L(less4_double_words1)
603 movdqu 16(%edi), %xmm1
604 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
605 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
606 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
608 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
609 jnz L(less4_double_words_16)
611 movdqu 32(%edi), %xmm1
612 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
613 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
614 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
616 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
617 jnz L(less4_double_words_32)
619 movdqu 48(%edi), %xmm1
620 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
621 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
622 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
624 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
625 jnz L(less4_double_words_48)
629 jmp L(continue_48_00)
634 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
635 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
636 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
638 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
639 jnz L(less4_double_words)
643 jmp L(continue_48_00)
648 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
649 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
650 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
652 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
653 jnz L(less4_double_words)
655 movdqu 16(%edi), %xmm1
656 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
657 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
658 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
660 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
661 jnz L(less4_double_words_16)
665 jmp L(continue_48_00)
670 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
671 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
672 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
674 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
675 jnz L(less4_double_words)
677 movdqu 16(%edi), %xmm1
678 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
679 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
680 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
682 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
683 jnz L(less4_double_words_16)
685 movdqu 32(%edi), %xmm1
686 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
687 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
688 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
690 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
691 jnz L(less4_double_words_32)
695 jmp L(continue_48_00)
701 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
702 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
703 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
705 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
706 jnz L(less4_double_words)
710 jmp L(continue_48_48)
716 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
717 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
718 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
720 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
721 jnz L(less4_double_words)
723 movdqu 16(%edi), %xmm3
724 movdqu 16(%esi), %xmm4
725 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
726 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
727 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
729 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
730 jnz L(less4_double_words_16)
734 jmp L(continue_48_48)
740 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
741 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
742 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
744 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
745 jnz L(less4_double_words)
747 movdqu 16(%edi), %xmm3
748 movdqu 16(%esi), %xmm4
749 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
750 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
751 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
753 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
754 jnz L(less4_double_words_16)
756 movdqu 32(%edi), %xmm1
757 movdqu 32(%esi), %xmm2
758 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
759 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
760 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
762 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
763 jnz L(less4_double_words_32)
767 jmp L(continue_48_48)
773 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
774 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
775 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
777 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
778 jnz L(less4_double_words)
780 movdqu 16(%edi), %xmm1
781 movdqu 16(%esi), %xmm2
782 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
783 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
784 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
786 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
787 jnz L(less4_double_words_16)
791 jmp L(continue_32_48)
797 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
798 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
799 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
801 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
802 jnz L(less4_double_words)
806 jmp L(continue_16_48)
812 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
813 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
814 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
816 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
817 jnz L(less4_double_words)
821 jmp L(continue_32_48)
824 L(less4_double_words1):
848 L(less4_double_words):
850 jz L(next_two_double_words)
852 jz L(second_double_word)
858 L(second_double_word):
864 L(next_two_double_words):
866 jz L(fourth_double_word)
872 L(fourth_double_word):
878 L(less4_double_words_16):
880 jz L(next_two_double_words_16)
882 jz L(second_double_word_16)
888 L(second_double_word_16):
894 L(next_two_double_words_16):
896 jz L(fourth_double_word_16)
902 L(fourth_double_word_16):
908 L(less4_double_words_32):
910 jz L(next_two_double_words_32)
912 jz L(second_double_word_32)
918 L(second_double_word_32):
924 L(next_two_double_words_32):
926 jz L(fourth_double_word_32)
932 L(fourth_double_word_32):
938 L(less4_double_words_48):
940 jz L(next_two_double_words_48)
942 jz L(second_double_word_48)
948 L(second_double_word_48):
954 L(next_two_double_words_48):
956 jz L(fourth_double_word_48)
962 L(fourth_double_word_48):