2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
23 # define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
27 # define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
31 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
32 # define POP(REG) popl REG; CFI_POP (REG)
34 # define ENTRANCE PUSH(%esi); PUSH(%edi)
35 # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
40 /* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
45 * This implementation uses SSE to compare up to 16 bytes at a time.
80 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
83 and $63, %eax /* esi alignment in cache line */
84 and $63, %edx /* edi alignment in cache line */
130 movdqu 16(%edi), %xmm1
131 movdqu 16(%esi), %xmm2
132 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
133 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
134 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
136 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
137 jnz L(less4_double_words_16)
139 movdqu 32(%edi), %xmm1
140 movdqu 32(%esi), %xmm2
141 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
142 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
143 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
145 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
146 jnz L(less4_double_words_32)
148 movdqu 48(%edi), %xmm1
149 movdqu 48(%esi), %xmm2
150 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
151 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
152 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
154 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
155 jnz L(less4_double_words_48)
159 jmp L(continue_48_48)
197 movdqu 16(%edi), %xmm1
198 movdqu 16(%esi), %xmm2
199 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
200 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
201 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
203 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
204 jnz L(less4_double_words_16)
206 movdqu 32(%edi), %xmm1
207 movdqu 32(%esi), %xmm2
208 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
209 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
210 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
212 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
213 jnz L(less4_double_words_32)
256 pcmpeqd (%edi), %xmm0
260 jnz L(less4_double_words1)
277 movdqu 16(%esi), %xmm2
278 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
279 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
280 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
282 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
283 jnz L(less4_double_words_16)
285 movdqu 32(%esi), %xmm2
286 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
287 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
288 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
290 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
291 jnz L(less4_double_words_32)
293 movdqu 48(%esi), %xmm2
294 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
295 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
296 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
298 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
299 jnz L(less4_double_words_48)
303 jmp L(continue_00_48)
366 movdqu 32(%edi), %xmm1
367 movdqu 32(%esi), %xmm2
368 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
369 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
370 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
372 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
373 jnz L(less4_double_words_32)
375 movdqu 48(%edi), %xmm1
376 movdqu 48(%esi), %xmm2
377 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
378 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
379 psubb %xmm0, %xmm1 /* packed sub of comparison results */
381 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
382 jnz L(less4_double_words_48)
386 jmp L(continue_32_48)
425 movdqu 16(%edi), %xmm1
426 movdqu 16(%esi), %xmm2
427 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
428 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
429 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
431 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
432 jnz L(less4_double_words_16)
458 movdqu 48(%edi), %xmm1
459 movdqu 48(%esi), %xmm2
460 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
461 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
462 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
464 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
465 jnz L(less4_double_words_48)
469 jmp L(continue_16_48)
474 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
475 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
476 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
478 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
479 jnz L(less4_double_words)
481 movdqa 16(%edi), %xmm3
482 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
483 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
484 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
486 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
487 jnz L(less4_double_words_16)
489 movdqa 32(%edi), %xmm5
490 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
491 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
492 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
494 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
495 jnz L(less4_double_words_32)
497 movdqa 48(%edi), %xmm1
498 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
499 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
500 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
502 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
503 jnz L(less4_double_words_48)
507 jmp L(continue_00_00)
512 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
513 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
514 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
516 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
517 jnz L(less4_double_words)
521 jmp L(continue_00_48)
526 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
527 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
528 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
530 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
531 jnz L(less4_double_words)
533 movdqu 16(%esi), %xmm2
534 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
535 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
536 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
538 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
539 jnz L(less4_double_words_16)
543 jmp L(continue_00_48)
548 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
549 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
550 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
552 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
553 jnz L(less4_double_words)
555 movdqu 16(%esi), %xmm2
556 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
557 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
558 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
560 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
561 jnz L(less4_double_words_16)
563 movdqu 32(%esi), %xmm2
564 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
565 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
566 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
568 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
569 jnz L(less4_double_words_32)
573 jmp L(continue_00_48)
577 pcmpeqd (%esi), %xmm0
581 jnz L(less4_double_words1)
598 movdqu 16(%edi), %xmm1
599 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
600 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
601 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
603 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
604 jnz L(less4_double_words_16)
606 movdqu 32(%edi), %xmm1
607 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
608 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
609 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
611 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
612 jnz L(less4_double_words_32)
614 movdqu 48(%edi), %xmm1
615 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
616 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
617 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
619 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
620 jnz L(less4_double_words_48)
624 jmp L(continue_48_00)
629 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
630 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
631 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
633 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
634 jnz L(less4_double_words)
638 jmp L(continue_48_00)
643 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
644 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
645 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
647 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
648 jnz L(less4_double_words)
650 movdqu 16(%edi), %xmm1
651 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
652 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
653 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
655 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
656 jnz L(less4_double_words_16)
660 jmp L(continue_48_00)
665 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
666 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
667 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
669 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
670 jnz L(less4_double_words)
672 movdqu 16(%edi), %xmm1
673 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
674 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
675 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
677 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
678 jnz L(less4_double_words_16)
680 movdqu 32(%edi), %xmm1
681 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
682 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
683 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
685 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
686 jnz L(less4_double_words_32)
690 jmp L(continue_48_00)
696 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
697 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
698 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
700 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
701 jnz L(less4_double_words)
705 jmp L(continue_48_48)
711 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
712 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
713 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
715 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
716 jnz L(less4_double_words)
718 movdqu 16(%edi), %xmm3
719 movdqu 16(%esi), %xmm4
720 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
721 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
722 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
724 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
725 jnz L(less4_double_words_16)
729 jmp L(continue_48_48)
735 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
736 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
737 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
739 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
740 jnz L(less4_double_words)
742 movdqu 16(%edi), %xmm3
743 movdqu 16(%esi), %xmm4
744 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
745 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
746 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
748 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
749 jnz L(less4_double_words_16)
751 movdqu 32(%edi), %xmm1
752 movdqu 32(%esi), %xmm2
753 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
754 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
755 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
757 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
758 jnz L(less4_double_words_32)
762 jmp L(continue_48_48)
768 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
769 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
770 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
772 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
773 jnz L(less4_double_words)
775 movdqu 16(%edi), %xmm1
776 movdqu 16(%esi), %xmm2
777 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
778 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
779 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
781 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
782 jnz L(less4_double_words_16)
786 jmp L(continue_32_48)
792 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
793 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
794 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
796 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
797 jnz L(less4_double_words)
801 jmp L(continue_16_48)
807 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
808 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
809 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
811 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
812 jnz L(less4_double_words)
816 jmp L(continue_32_48)
819 L(less4_double_words1):
844 L(less4_double_words):
847 jz L(next_two_double_words)
849 jz L(second_double_word)
856 L(second_double_word):
863 L(next_two_double_words):
865 jz L(fourth_double_word)
872 L(fourth_double_word):
879 L(less4_double_words_16):
882 jz L(next_two_double_words_16)
884 jz L(second_double_word_16)
891 L(second_double_word_16):
898 L(next_two_double_words_16):
900 jz L(fourth_double_word_16)
907 L(fourth_double_word_16):
914 L(less4_double_words_32):
917 jz L(next_two_double_words_32)
919 jz L(second_double_word_32)
926 L(second_double_word_32):
933 L(next_two_double_words_32):
935 jz L(fourth_double_word_32)
942 L(fourth_double_word_32):
949 L(less4_double_words_48):
952 jz L(next_two_double_words_48)
954 jz L(second_double_word_48)
961 L(second_double_word_48):
968 L(next_two_double_words_48):
970 jz L(fourth_double_word_48)
977 L(fourth_double_word_48):