1 /* strcmp optimized with SSE2.
2 Copyright (C) 2017-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <isa-level.h>
21 /* Continue building at ISA level 2 as the strcmp-sse42 is not always
22 preferable for ISA level == 2 CPUs. */
23 #if ISA_SHOULD_BUILD (2)
25 # define STRCMP_ISA _sse2
26 # include "strcmp-naming.h"
30 # undef UPDATE_STRNCMP_COUNTER
33 # define LABEL(l) L(l)
36 # ifdef USE_AS_STRNCMP
37 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
38 if the new counter > the old one or is 0. */
39 # define UPDATE_STRNCMP_COUNTER \
40 /* calculate left number to compare */ \
41 lea -16(%rcx, %r11), %r9; \
43 jb LABEL(strcmp_exitz); \
45 je LABEL(strcmp_exitz); \
48 # elif defined USE_AS_STRCASECMP_L
49 # include "locale-defines.h"
51 # define UPDATE_STRNCMP_COUNTER
52 # elif defined USE_AS_STRNCASECMP_L
53 # include "locale-defines.h"
55 # define UPDATE_STRNCMP_COUNTER \
56 /* calculate left number to compare */ \
57 lea -16(%rcx, %r11), %r9; \
59 jb LABEL(strcmp_exitz); \
61 je LABEL(strcmp_exitz); \
64 # define UPDATE_STRNCMP_COUNTER
68 # ifdef USE_AS_STRCASECMP_L
70 # define ENTRY2(name) ENTRY (name)
71 # define END2(name) END (name)
75 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
76 mov %fs:(%rax),%RDX_LP
78 /* Either 1 or 5 bytes (dependeing if CET is enabled). */
81 /* FALLTHROUGH to strcasecmp_l. */
82 # elif defined USE_AS_STRNCASECMP_L
84 # define ENTRY2(name) ENTRY (name)
85 # define END2(name) END (name)
89 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
90 mov %fs:(%rax),%RCX_LP
92 /* Either 1 or 5 bytes (dependeing if CET is enabled). */
95 /* FALLTHROUGH to strncasecmp_l. */
99 # ifdef USE_AS_STRCASECMP_L
100 /* We have to fall back on the C implementation for locales
101 with encodings not matching ASCII for single bytes. */
102 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
103 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP
107 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
108 jne __strcasecmp_l_nonascii
109 # elif defined USE_AS_STRNCASECMP_L
110 /* We have to fall back on the C implementation for locales
111 with encodings not matching ASCII for single bytes. */
112 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
113 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP
117 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
118 jne __strncasecmp_l_nonascii
122 * This implementation uses SSE to compare up to 16 bytes at a time.
124 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
125 test %RDX_LP, %RDX_LP
126 je LABEL(strcmp_exitz)
133 /* Use 64bit AND here to avoid long NOP padding. */
134 and $0x3f, %rcx /* rsi alignment in cache line */
135 and $0x3f, %rax /* rdi alignment in cache line */
136 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
137 .section .rodata.cst16,"aM",@progbits,16
140 .quad 0x3f3f3f3f3f3f3f3f
141 .quad 0x3f3f3f3f3f3f3f3f
143 .quad 0x9999999999999999
144 .quad 0x9999999999999999
146 .quad 0x2020202020202020
147 .quad 0x2020202020202020
149 movdqa .Llcase_min(%rip), %xmm5
150 # define LCASE_MIN_reg %xmm5
151 movdqa .Llcase_max(%rip), %xmm6
152 # define LCASE_MAX_reg %xmm6
153 movdqa .Lcase_add(%rip), %xmm7
154 # define CASE_ADD_reg %xmm7
157 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
159 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
162 movhpd 8(%rdi), %xmm1
163 movhpd 8(%rsi), %xmm2
164 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
165 # define TOLOWER(reg1, reg2) \
166 movdqa LCASE_MIN_reg, %xmm8; \
167 movdqa LCASE_MIN_reg, %xmm9; \
170 pcmpgtb LCASE_MAX_reg, %xmm8; \
171 pcmpgtb LCASE_MAX_reg, %xmm9; \
172 pandn CASE_ADD_reg, %xmm8; \
173 pandn CASE_ADD_reg, %xmm9; \
176 TOLOWER (%xmm1, %xmm2)
178 # define TOLOWER(reg1, reg2)
180 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
181 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
182 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
183 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
185 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
186 jnz LABEL(less16bytes) /* If not, find different value or null char */
187 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
189 jbe LABEL(strcmp_exitz) /* finish comparision */
191 add $16, %rsi /* prepare to search next 16 bytes */
192 add $16, %rdi /* prepare to search next 16 bytes */
195 * Determine source and destination string offsets from 16-byte alignment.
196 * Use relative offset difference between the two to determine which case
201 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
202 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
203 mov $0xffff, %edx /* for equivalent offset */
205 and $0xf, %ecx /* offset of rsi */
206 and $0xf, %eax /* offset of rdi */
208 je LABEL(ashr_0) /* rsi and rdi relative offset same */
210 mov %edx, %r8d /* r8d is offset flag for exit tail */
216 lea LABEL(unaligned_table)(%rip), %r10
217 movslq (%r10, %r9,4), %r9
218 lea (%r10, %r9), %r10
219 _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
222 * The following cases will be handled by ashr_0
223 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
224 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
230 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
231 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
232 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
233 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
236 TOLOWER (%xmm1, %xmm2)
237 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
239 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
241 shr %cl, %edx /* adjust 0xffff for offset */
242 shr %cl, %r9d /* adjust for 16-byte offset */
245 * edx must be the same with r9d if in left byte (16-rcx) is equal to
246 * the start from (16-rax) and no null char was seen.
248 jne LABEL(less32bytes) /* mismatch or null char */
249 UPDATE_STRNCMP_COUNTER
252 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
255 * Now both strings are aligned at 16-byte boundary. Loop over strings
256 * checking 32-bytes per iteration.
260 movdqa (%rsi, %rcx), %xmm1
261 movdqa (%rdi, %rcx), %xmm2
262 TOLOWER (%xmm1, %xmm2)
269 jnz LABEL(exit) /* mismatch or null char seen */
271 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
273 jbe LABEL(strcmp_exitz)
276 movdqa (%rsi, %rcx), %xmm1
277 movdqa (%rdi, %rcx), %xmm2
278 TOLOWER (%xmm1, %xmm2)
286 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
288 jbe LABEL(strcmp_exitz)
291 jmp LABEL(loop_ashr_0)
294 * The following cases will be handled by ashr_1
295 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
296 * n(15) n -15 0(15 +(n-15) - n) ashr_1
303 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
304 pslldq $15, %xmm2 /* shift first string to align with second */
305 TOLOWER (%xmm1, %xmm2)
306 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
307 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
309 shr %cl, %edx /* adjust 0xffff for offset */
310 shr %cl, %r9d /* adjust for 16-byte offset */
312 jnz LABEL(less32bytes) /* mismatch or null char seen */
314 UPDATE_STRNCMP_COUNTER
317 mov $16, %rcx /* index for loads*/
318 mov $1, %r9d /* byte position left over from less32bytes case */
320 * Setup %r10 value allows us to detect crossing a page boundary.
321 * When %r10 goes positive we have crossed a page boundary and
322 * need to do a nibble.
325 and $0xfff, %r10 /* offset into 4K page */
326 sub $0x1000, %r10 /* subtract 4K pagesize */
331 jg LABEL(nibble_ashr_1) /* cross page boundary */
333 LABEL(gobble_ashr_1):
334 movdqa (%rsi, %rcx), %xmm1
335 movdqa (%rdi, %rcx), %xmm2
336 movdqa %xmm2, %xmm4 /* store for next cycle */
340 por %xmm3, %xmm2 /* merge into one 16byte value */
342 TOLOWER (%xmm1, %xmm2)
351 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
353 jbe LABEL(strcmp_exitz)
359 jg LABEL(nibble_ashr_1) /* cross page boundary */
361 movdqa (%rsi, %rcx), %xmm1
362 movdqa (%rdi, %rcx), %xmm2
363 movdqa %xmm2, %xmm4 /* store for next cycle */
367 por %xmm3, %xmm2 /* merge into one 16byte value */
369 TOLOWER (%xmm1, %xmm2)
378 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
380 jbe LABEL(strcmp_exitz)
384 jmp LABEL(loop_ashr_1)
387 * Nibble avoids loads across page boundary. This is to avoid a potential
388 * access into unmapped memory.
391 LABEL(nibble_ashr_1):
392 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
395 jnz LABEL(ashr_1_exittail) /* find null char*/
397 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
399 jbe LABEL(ashr_1_exittail)
403 sub $0x1000, %r10 /* substract 4K from %r10 */
404 jmp LABEL(gobble_ashr_1)
407 * Once find null char, determine if there is a string mismatch
408 * before the null char.
411 LABEL(ashr_1_exittail):
412 movdqa (%rsi, %rcx), %xmm1
418 * The following cases will be handled by ashr_2
419 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
420 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
429 TOLOWER (%xmm1, %xmm2)
436 jnz LABEL(less32bytes)
438 UPDATE_STRNCMP_COUNTER
441 mov $16, %rcx /* index for loads */
442 mov $2, %r9d /* byte position left over from less32bytes case */
444 * Setup %r10 value allows us to detect crossing a page boundary.
445 * When %r10 goes positive we have crossed a page boundary and
446 * need to do a nibble.
449 and $0xfff, %r10 /* offset into 4K page */
450 sub $0x1000, %r10 /* subtract 4K pagesize */
455 jg LABEL(nibble_ashr_2)
457 LABEL(gobble_ashr_2):
458 movdqa (%rsi, %rcx), %xmm1
459 movdqa (%rdi, %rcx), %xmm2
464 por %xmm3, %xmm2 /* merge into one 16byte value */
466 TOLOWER (%xmm1, %xmm2)
475 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
477 jbe LABEL(strcmp_exitz)
484 jg LABEL(nibble_ashr_2) /* cross page boundary */
486 movdqa (%rsi, %rcx), %xmm1
487 movdqa (%rdi, %rcx), %xmm2
492 por %xmm3, %xmm2 /* merge into one 16byte value */
494 TOLOWER (%xmm1, %xmm2)
503 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
505 jbe LABEL(strcmp_exitz)
510 jmp LABEL(loop_ashr_2)
513 LABEL(nibble_ashr_2):
514 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
517 jnz LABEL(ashr_2_exittail)
519 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
521 jbe LABEL(ashr_2_exittail)
526 jmp LABEL(gobble_ashr_2)
529 LABEL(ashr_2_exittail):
530 movdqa (%rsi, %rcx), %xmm1
536 * The following cases will be handled by ashr_3
537 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
538 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
547 TOLOWER (%xmm1, %xmm2)
554 jnz LABEL(less32bytes)
557 UPDATE_STRNCMP_COUNTER
560 mov $16, %rcx /* index for loads */
561 mov $3, %r9d /* byte position left over from less32bytes case */
563 * Setup %r10 value allows us to detect crossing a page boundary.
564 * When %r10 goes positive we have crossed a page boundary and
565 * need to do a nibble.
568 and $0xfff, %r10 /* offset into 4K page */
569 sub $0x1000, %r10 /* subtract 4K pagesize */
574 jg LABEL(nibble_ashr_3)
576 LABEL(gobble_ashr_3):
577 movdqa (%rsi, %rcx), %xmm1
578 movdqa (%rdi, %rcx), %xmm2
583 por %xmm3, %xmm2 /* merge into one 16byte value */
585 TOLOWER (%xmm1, %xmm2)
594 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
596 jbe LABEL(strcmp_exitz)
603 jg LABEL(nibble_ashr_3) /* cross page boundary */
605 movdqa (%rsi, %rcx), %xmm1
606 movdqa (%rdi, %rcx), %xmm2
611 por %xmm3, %xmm2 /* merge into one 16byte value */
613 TOLOWER (%xmm1, %xmm2)
622 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
624 jbe LABEL(strcmp_exitz)
629 jmp LABEL(loop_ashr_3)
632 LABEL(nibble_ashr_3):
633 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
636 jnz LABEL(ashr_3_exittail)
638 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
640 jbe LABEL(ashr_3_exittail)
645 jmp LABEL(gobble_ashr_3)
648 LABEL(ashr_3_exittail):
649 movdqa (%rsi, %rcx), %xmm1
655 * The following cases will be handled by ashr_4
656 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
657 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
666 TOLOWER (%xmm1, %xmm2)
673 jnz LABEL(less32bytes)
676 UPDATE_STRNCMP_COUNTER
679 mov $16, %rcx /* index for loads */
680 mov $4, %r9d /* byte position left over from less32bytes case */
682 * Setup %r10 value allows us to detect crossing a page boundary.
683 * When %r10 goes positive we have crossed a page boundary and
684 * need to do a nibble.
687 and $0xfff, %r10 /* offset into 4K page */
688 sub $0x1000, %r10 /* subtract 4K pagesize */
693 jg LABEL(nibble_ashr_4)
695 LABEL(gobble_ashr_4):
696 movdqa (%rsi, %rcx), %xmm1
697 movdqa (%rdi, %rcx), %xmm2
702 por %xmm3, %xmm2 /* merge into one 16byte value */
704 TOLOWER (%xmm1, %xmm2)
713 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
715 jbe LABEL(strcmp_exitz)
722 jg LABEL(nibble_ashr_4) /* cross page boundary */
724 movdqa (%rsi, %rcx), %xmm1
725 movdqa (%rdi, %rcx), %xmm2
730 por %xmm3, %xmm2 /* merge into one 16byte value */
732 TOLOWER (%xmm1, %xmm2)
741 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
743 jbe LABEL(strcmp_exitz)
748 jmp LABEL(loop_ashr_4)
751 LABEL(nibble_ashr_4):
752 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
755 jnz LABEL(ashr_4_exittail)
757 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
759 jbe LABEL(ashr_4_exittail)
764 jmp LABEL(gobble_ashr_4)
767 LABEL(ashr_4_exittail):
768 movdqa (%rsi, %rcx), %xmm1
774 * The following cases will be handled by ashr_5
775 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
776 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
785 TOLOWER (%xmm1, %xmm2)
792 jnz LABEL(less32bytes)
795 UPDATE_STRNCMP_COUNTER
798 mov $16, %rcx /* index for loads */
799 mov $5, %r9d /* byte position left over from less32bytes case */
801 * Setup %r10 value allows us to detect crossing a page boundary.
802 * When %r10 goes positive we have crossed a page boundary and
803 * need to do a nibble.
806 and $0xfff, %r10 /* offset into 4K page */
807 sub $0x1000, %r10 /* subtract 4K pagesize */
812 jg LABEL(nibble_ashr_5)
814 LABEL(gobble_ashr_5):
815 movdqa (%rsi, %rcx), %xmm1
816 movdqa (%rdi, %rcx), %xmm2
821 por %xmm3, %xmm2 /* merge into one 16byte value */
823 TOLOWER (%xmm1, %xmm2)
832 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
834 jbe LABEL(strcmp_exitz)
841 jg LABEL(nibble_ashr_5) /* cross page boundary */
843 movdqa (%rsi, %rcx), %xmm1
844 movdqa (%rdi, %rcx), %xmm2
849 por %xmm3, %xmm2 /* merge into one 16byte value */
851 TOLOWER (%xmm1, %xmm2)
860 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
862 jbe LABEL(strcmp_exitz)
867 jmp LABEL(loop_ashr_5)
870 LABEL(nibble_ashr_5):
871 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
874 jnz LABEL(ashr_5_exittail)
876 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
878 jbe LABEL(ashr_5_exittail)
883 jmp LABEL(gobble_ashr_5)
886 LABEL(ashr_5_exittail):
887 movdqa (%rsi, %rcx), %xmm1
893 * The following cases will be handled by ashr_6
894 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
895 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
904 TOLOWER (%xmm1, %xmm2)
911 jnz LABEL(less32bytes)
914 UPDATE_STRNCMP_COUNTER
917 mov $16, %rcx /* index for loads */
918 mov $6, %r9d /* byte position left over from less32bytes case */
920 * Setup %r10 value allows us to detect crossing a page boundary.
921 * When %r10 goes positive we have crossed a page boundary and
922 * need to do a nibble.
925 and $0xfff, %r10 /* offset into 4K page */
926 sub $0x1000, %r10 /* subtract 4K pagesize */
931 jg LABEL(nibble_ashr_6)
933 LABEL(gobble_ashr_6):
934 movdqa (%rsi, %rcx), %xmm1
935 movdqa (%rdi, %rcx), %xmm2
940 por %xmm3, %xmm2 /* merge into one 16byte value */
942 TOLOWER (%xmm1, %xmm2)
951 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
953 jbe LABEL(strcmp_exitz)
960 jg LABEL(nibble_ashr_6) /* cross page boundary */
962 movdqa (%rsi, %rcx), %xmm1
963 movdqa (%rdi, %rcx), %xmm2
968 por %xmm3, %xmm2 /* merge into one 16byte value */
970 TOLOWER (%xmm1, %xmm2)
979 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
981 jbe LABEL(strcmp_exitz)
986 jmp LABEL(loop_ashr_6)
989 LABEL(nibble_ashr_6):
990 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
993 jnz LABEL(ashr_6_exittail)
995 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
997 jbe LABEL(ashr_6_exittail)
1002 jmp LABEL(gobble_ashr_6)
1005 LABEL(ashr_6_exittail):
1006 movdqa (%rsi, %rcx), %xmm1
1009 jmp LABEL(aftertail)
1012 * The following cases will be handled by ashr_7
1013 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1014 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
1019 movdqa (%rdi), %xmm2
1020 movdqa (%rsi), %xmm1
1021 pcmpeqb %xmm1, %xmm0
1023 TOLOWER (%xmm1, %xmm2)
1024 pcmpeqb %xmm1, %xmm2
1026 pmovmskb %xmm2, %r9d
1030 jnz LABEL(less32bytes)
1031 movdqa (%rdi), %xmm3
1033 UPDATE_STRNCMP_COUNTER
1036 mov $16, %rcx /* index for loads */
1037 mov $7, %r9d /* byte position left over from less32bytes case */
1039 * Setup %r10 value allows us to detect crossing a page boundary.
1040 * When %r10 goes positive we have crossed a page boundary and
1041 * need to do a nibble.
1044 and $0xfff, %r10 /* offset into 4K page */
1045 sub $0x1000, %r10 /* subtract 4K pagesize */
1050 jg LABEL(nibble_ashr_7)
1052 LABEL(gobble_ashr_7):
1053 movdqa (%rsi, %rcx), %xmm1
1054 movdqa (%rdi, %rcx), %xmm2
1059 por %xmm3, %xmm2 /* merge into one 16byte value */
1061 TOLOWER (%xmm1, %xmm2)
1063 pcmpeqb %xmm1, %xmm0
1064 pcmpeqb %xmm2, %xmm1
1066 pmovmskb %xmm1, %edx
1070 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1072 jbe LABEL(strcmp_exitz)
1079 jg LABEL(nibble_ashr_7) /* cross page boundary */
1081 movdqa (%rsi, %rcx), %xmm1
1082 movdqa (%rdi, %rcx), %xmm2
1087 por %xmm3, %xmm2 /* merge into one 16byte value */
1089 TOLOWER (%xmm1, %xmm2)
1091 pcmpeqb %xmm1, %xmm0
1092 pcmpeqb %xmm2, %xmm1
1094 pmovmskb %xmm1, %edx
1098 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1100 jbe LABEL(strcmp_exitz)
1105 jmp LABEL(loop_ashr_7)
1108 LABEL(nibble_ashr_7):
1109 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1110 pmovmskb %xmm0, %edx
1112 jnz LABEL(ashr_7_exittail)
1114 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1116 jbe LABEL(ashr_7_exittail)
1121 jmp LABEL(gobble_ashr_7)
1124 LABEL(ashr_7_exittail):
1125 movdqa (%rsi, %rcx), %xmm1
1128 jmp LABEL(aftertail)
1131 * The following cases will be handled by ashr_8
1132 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1133 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1138 movdqa (%rdi), %xmm2
1139 movdqa (%rsi), %xmm1
1140 pcmpeqb %xmm1, %xmm0
1142 TOLOWER (%xmm1, %xmm2)
1143 pcmpeqb %xmm1, %xmm2
1145 pmovmskb %xmm2, %r9d
1149 jnz LABEL(less32bytes)
1150 movdqa (%rdi), %xmm3
1152 UPDATE_STRNCMP_COUNTER
1155 mov $16, %rcx /* index for loads */
1156 mov $8, %r9d /* byte position left over from less32bytes case */
1158 * Setup %r10 value allows us to detect crossing a page boundary.
1159 * When %r10 goes positive we have crossed a page boundary and
1160 * need to do a nibble.
1163 and $0xfff, %r10 /* offset into 4K page */
1164 sub $0x1000, %r10 /* subtract 4K pagesize */
1169 jg LABEL(nibble_ashr_8)
1171 LABEL(gobble_ashr_8):
1172 movdqa (%rsi, %rcx), %xmm1
1173 movdqa (%rdi, %rcx), %xmm2
1178 por %xmm3, %xmm2 /* merge into one 16byte value */
1180 TOLOWER (%xmm1, %xmm2)
1182 pcmpeqb %xmm1, %xmm0
1183 pcmpeqb %xmm2, %xmm1
1185 pmovmskb %xmm1, %edx
1189 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1191 jbe LABEL(strcmp_exitz)
1198 jg LABEL(nibble_ashr_8) /* cross page boundary */
1200 movdqa (%rsi, %rcx), %xmm1
1201 movdqa (%rdi, %rcx), %xmm2
1206 por %xmm3, %xmm2 /* merge into one 16byte value */
1208 TOLOWER (%xmm1, %xmm2)
1210 pcmpeqb %xmm1, %xmm0
1211 pcmpeqb %xmm2, %xmm1
1213 pmovmskb %xmm1, %edx
1217 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1219 jbe LABEL(strcmp_exitz)
1224 jmp LABEL(loop_ashr_8)
1227 LABEL(nibble_ashr_8):
1228 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1229 pmovmskb %xmm0, %edx
1231 jnz LABEL(ashr_8_exittail)
1233 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1235 jbe LABEL(ashr_8_exittail)
1240 jmp LABEL(gobble_ashr_8)
1243 LABEL(ashr_8_exittail):
1244 movdqa (%rsi, %rcx), %xmm1
1247 jmp LABEL(aftertail)
1250 * The following cases will be handled by ashr_9
1251 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1252 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1257 movdqa (%rdi), %xmm2
1258 movdqa (%rsi), %xmm1
1259 pcmpeqb %xmm1, %xmm0
1261 TOLOWER (%xmm1, %xmm2)
1262 pcmpeqb %xmm1, %xmm2
1264 pmovmskb %xmm2, %r9d
1268 jnz LABEL(less32bytes)
1269 movdqa (%rdi), %xmm3
1271 UPDATE_STRNCMP_COUNTER
1274 mov $16, %rcx /* index for loads */
1275 mov $9, %r9d /* byte position left over from less32bytes case */
1277 * Setup %r10 value allows us to detect crossing a page boundary.
1278 * When %r10 goes positive we have crossed a page boundary and
1279 * need to do a nibble.
1282 and $0xfff, %r10 /* offset into 4K page */
1283 sub $0x1000, %r10 /* subtract 4K pagesize */
1288 jg LABEL(nibble_ashr_9)
1290 LABEL(gobble_ashr_9):
1291 movdqa (%rsi, %rcx), %xmm1
1292 movdqa (%rdi, %rcx), %xmm2
1297 por %xmm3, %xmm2 /* merge into one 16byte value */
1299 TOLOWER (%xmm1, %xmm2)
1301 pcmpeqb %xmm1, %xmm0
1302 pcmpeqb %xmm2, %xmm1
1304 pmovmskb %xmm1, %edx
1308 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1310 jbe LABEL(strcmp_exitz)
1317 jg LABEL(nibble_ashr_9) /* cross page boundary */
1319 movdqa (%rsi, %rcx), %xmm1
1320 movdqa (%rdi, %rcx), %xmm2
1325 por %xmm3, %xmm2 /* merge into one 16byte value */
1327 TOLOWER (%xmm1, %xmm2)
1329 pcmpeqb %xmm1, %xmm0
1330 pcmpeqb %xmm2, %xmm1
1332 pmovmskb %xmm1, %edx
1336 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1338 jbe LABEL(strcmp_exitz)
1342 movdqa %xmm4, %xmm3 /* store for next cycle */
1343 jmp LABEL(loop_ashr_9)
1346 LABEL(nibble_ashr_9):
1347 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1348 pmovmskb %xmm0, %edx
1350 jnz LABEL(ashr_9_exittail)
1352 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1354 jbe LABEL(ashr_9_exittail)
1359 jmp LABEL(gobble_ashr_9)
1362 LABEL(ashr_9_exittail):
1363 movdqa (%rsi, %rcx), %xmm1
1366 jmp LABEL(aftertail)
1369 * The following cases will be handled by ashr_10
1370 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1371 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1376 movdqa (%rdi), %xmm2
1377 movdqa (%rsi), %xmm1
1378 pcmpeqb %xmm1, %xmm0
1380 TOLOWER (%xmm1, %xmm2)
1381 pcmpeqb %xmm1, %xmm2
1383 pmovmskb %xmm2, %r9d
1387 jnz LABEL(less32bytes)
1388 movdqa (%rdi), %xmm3
1390 UPDATE_STRNCMP_COUNTER
1393 mov $16, %rcx /* index for loads */
1394 mov $10, %r9d /* byte position left over from less32bytes case */
1396 * Setup %r10 value allows us to detect crossing a page boundary.
1397 * When %r10 goes positive we have crossed a page boundary and
1398 * need to do a nibble.
1401 and $0xfff, %r10 /* offset into 4K page */
1402 sub $0x1000, %r10 /* subtract 4K pagesize */
1405 LABEL(loop_ashr_10):
1407 jg LABEL(nibble_ashr_10)
1409 LABEL(gobble_ashr_10):
1410 movdqa (%rsi, %rcx), %xmm1
1411 movdqa (%rdi, %rcx), %xmm2
1416 por %xmm3, %xmm2 /* merge into one 16byte value */
1418 TOLOWER (%xmm1, %xmm2)
1420 pcmpeqb %xmm1, %xmm0
1421 pcmpeqb %xmm2, %xmm1
1423 pmovmskb %xmm1, %edx
1427 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1429 jbe LABEL(strcmp_exitz)
1436 jg LABEL(nibble_ashr_10) /* cross page boundary */
1438 movdqa (%rsi, %rcx), %xmm1
1439 movdqa (%rdi, %rcx), %xmm2
1444 por %xmm3, %xmm2 /* merge into one 16byte value */
1446 TOLOWER (%xmm1, %xmm2)
1448 pcmpeqb %xmm1, %xmm0
1449 pcmpeqb %xmm2, %xmm1
1451 pmovmskb %xmm1, %edx
1455 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1457 jbe LABEL(strcmp_exitz)
1462 jmp LABEL(loop_ashr_10)
1465 LABEL(nibble_ashr_10):
1466 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1467 pmovmskb %xmm0, %edx
1469 jnz LABEL(ashr_10_exittail)
1471 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1473 jbe LABEL(ashr_10_exittail)
1478 jmp LABEL(gobble_ashr_10)
1481 LABEL(ashr_10_exittail):
1482 movdqa (%rsi, %rcx), %xmm1
1485 jmp LABEL(aftertail)
1488 * The following cases will be handled by ashr_11
1489 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1490 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1495 movdqa (%rdi), %xmm2
1496 movdqa (%rsi), %xmm1
1497 pcmpeqb %xmm1, %xmm0
1499 TOLOWER (%xmm1, %xmm2)
1500 pcmpeqb %xmm1, %xmm2
1502 pmovmskb %xmm2, %r9d
1506 jnz LABEL(less32bytes)
1507 movdqa (%rdi), %xmm3
1509 UPDATE_STRNCMP_COUNTER
1512 mov $16, %rcx /* index for loads */
1513 mov $11, %r9d /* byte position left over from less32bytes case */
1515 * Setup %r10 value allows us to detect crossing a page boundary.
1516 * When %r10 goes positive we have crossed a page boundary and
1517 * need to do a nibble.
1520 and $0xfff, %r10 /* offset into 4K page */
1521 sub $0x1000, %r10 /* subtract 4K pagesize */
1524 LABEL(loop_ashr_11):
1526 jg LABEL(nibble_ashr_11)
1528 LABEL(gobble_ashr_11):
1529 movdqa (%rsi, %rcx), %xmm1
1530 movdqa (%rdi, %rcx), %xmm2
1535 por %xmm3, %xmm2 /* merge into one 16byte value */
1537 TOLOWER (%xmm1, %xmm2)
1539 pcmpeqb %xmm1, %xmm0
1540 pcmpeqb %xmm2, %xmm1
1542 pmovmskb %xmm1, %edx
1546 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1548 jbe LABEL(strcmp_exitz)
1555 jg LABEL(nibble_ashr_11) /* cross page boundary */
1557 movdqa (%rsi, %rcx), %xmm1
1558 movdqa (%rdi, %rcx), %xmm2
1563 por %xmm3, %xmm2 /* merge into one 16byte value */
1565 TOLOWER (%xmm1, %xmm2)
1567 pcmpeqb %xmm1, %xmm0
1568 pcmpeqb %xmm2, %xmm1
1570 pmovmskb %xmm1, %edx
1574 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1576 jbe LABEL(strcmp_exitz)
1581 jmp LABEL(loop_ashr_11)
1584 LABEL(nibble_ashr_11):
1585 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1586 pmovmskb %xmm0, %edx
1588 jnz LABEL(ashr_11_exittail)
1590 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1592 jbe LABEL(ashr_11_exittail)
1597 jmp LABEL(gobble_ashr_11)
1600 LABEL(ashr_11_exittail):
1601 movdqa (%rsi, %rcx), %xmm1
1604 jmp LABEL(aftertail)
1607 * The following cases will be handled by ashr_12
1608 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1609 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1614 movdqa (%rdi), %xmm2
1615 movdqa (%rsi), %xmm1
1616 pcmpeqb %xmm1, %xmm0
1618 TOLOWER (%xmm1, %xmm2)
1619 pcmpeqb %xmm1, %xmm2
1621 pmovmskb %xmm2, %r9d
1625 jnz LABEL(less32bytes)
1626 movdqa (%rdi), %xmm3
1628 UPDATE_STRNCMP_COUNTER
1631 mov $16, %rcx /* index for loads */
1632 mov $12, %r9d /* byte position left over from less32bytes case */
1634 * Setup %r10 value allows us to detect crossing a page boundary.
1635 * When %r10 goes positive we have crossed a page boundary and
1636 * need to do a nibble.
1639 and $0xfff, %r10 /* offset into 4K page */
1640 sub $0x1000, %r10 /* subtract 4K pagesize */
1643 LABEL(loop_ashr_12):
1645 jg LABEL(nibble_ashr_12)
1647 LABEL(gobble_ashr_12):
1648 movdqa (%rsi, %rcx), %xmm1
1649 movdqa (%rdi, %rcx), %xmm2
1654 por %xmm3, %xmm2 /* merge into one 16byte value */
1656 TOLOWER (%xmm1, %xmm2)
1658 pcmpeqb %xmm1, %xmm0
1659 pcmpeqb %xmm2, %xmm1
1661 pmovmskb %xmm1, %edx
1665 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1667 jbe LABEL(strcmp_exitz)
1674 jg LABEL(nibble_ashr_12) /* cross page boundary */
1676 movdqa (%rsi, %rcx), %xmm1
1677 movdqa (%rdi, %rcx), %xmm2
1682 por %xmm3, %xmm2 /* merge into one 16byte value */
1684 TOLOWER (%xmm1, %xmm2)
1686 pcmpeqb %xmm1, %xmm0
1687 pcmpeqb %xmm2, %xmm1
1689 pmovmskb %xmm1, %edx
1693 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1695 jbe LABEL(strcmp_exitz)
1700 jmp LABEL(loop_ashr_12)
1703 LABEL(nibble_ashr_12):
1704 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1705 pmovmskb %xmm0, %edx
1707 jnz LABEL(ashr_12_exittail)
1709 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1711 jbe LABEL(ashr_12_exittail)
1716 jmp LABEL(gobble_ashr_12)
1719 LABEL(ashr_12_exittail):
1720 movdqa (%rsi, %rcx), %xmm1
1723 jmp LABEL(aftertail)
1726 * The following cases will be handled by ashr_13
1727 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1728 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1733 movdqa (%rdi), %xmm2
1734 movdqa (%rsi), %xmm1
1735 pcmpeqb %xmm1, %xmm0
1737 TOLOWER (%xmm1, %xmm2)
1738 pcmpeqb %xmm1, %xmm2
1740 pmovmskb %xmm2, %r9d
1744 jnz LABEL(less32bytes)
1745 movdqa (%rdi), %xmm3
1747 UPDATE_STRNCMP_COUNTER
1750 mov $16, %rcx /* index for loads */
1751 mov $13, %r9d /* byte position left over from less32bytes case */
1753 * Setup %r10 value allows us to detect crossing a page boundary.
1754 * When %r10 goes positive we have crossed a page boundary and
1755 * need to do a nibble.
1758 and $0xfff, %r10 /* offset into 4K page */
1759 sub $0x1000, %r10 /* subtract 4K pagesize */
1762 LABEL(loop_ashr_13):
1764 jg LABEL(nibble_ashr_13)
1766 LABEL(gobble_ashr_13):
1767 movdqa (%rsi, %rcx), %xmm1
1768 movdqa (%rdi, %rcx), %xmm2
1773 por %xmm3, %xmm2 /* merge into one 16byte value */
1775 TOLOWER (%xmm1, %xmm2)
1777 pcmpeqb %xmm1, %xmm0
1778 pcmpeqb %xmm2, %xmm1
1780 pmovmskb %xmm1, %edx
1784 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1786 jbe LABEL(strcmp_exitz)
1793 jg LABEL(nibble_ashr_13) /* cross page boundary */
1795 movdqa (%rsi, %rcx), %xmm1
1796 movdqa (%rdi, %rcx), %xmm2
1801 por %xmm3, %xmm2 /* merge into one 16byte value */
1803 TOLOWER (%xmm1, %xmm2)
1805 pcmpeqb %xmm1, %xmm0
1806 pcmpeqb %xmm2, %xmm1
1808 pmovmskb %xmm1, %edx
1812 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1814 jbe LABEL(strcmp_exitz)
1819 jmp LABEL(loop_ashr_13)
1822 LABEL(nibble_ashr_13):
1823 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1824 pmovmskb %xmm0, %edx
1826 jnz LABEL(ashr_13_exittail)
1828 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1830 jbe LABEL(ashr_13_exittail)
1835 jmp LABEL(gobble_ashr_13)
1838 LABEL(ashr_13_exittail):
1839 movdqa (%rsi, %rcx), %xmm1
1842 jmp LABEL(aftertail)
1845 * The following cases will be handled by ashr_14
1846 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1847 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1852 movdqa (%rdi), %xmm2
1853 movdqa (%rsi), %xmm1
1854 pcmpeqb %xmm1, %xmm0
1856 TOLOWER (%xmm1, %xmm2)
1857 pcmpeqb %xmm1, %xmm2
1859 pmovmskb %xmm2, %r9d
1863 jnz LABEL(less32bytes)
1864 movdqa (%rdi), %xmm3
1866 UPDATE_STRNCMP_COUNTER
1869 mov $16, %rcx /* index for loads */
1870 mov $14, %r9d /* byte position left over from less32bytes case */
1872 * Setup %r10 value allows us to detect crossing a page boundary.
1873 * When %r10 goes positive we have crossed a page boundary and
1874 * need to do a nibble.
1877 and $0xfff, %r10 /* offset into 4K page */
1878 sub $0x1000, %r10 /* subtract 4K pagesize */
1881 LABEL(loop_ashr_14):
1883 jg LABEL(nibble_ashr_14)
1885 LABEL(gobble_ashr_14):
1886 movdqa (%rsi, %rcx), %xmm1
1887 movdqa (%rdi, %rcx), %xmm2
1892 por %xmm3, %xmm2 /* merge into one 16byte value */
1894 TOLOWER (%xmm1, %xmm2)
1896 pcmpeqb %xmm1, %xmm0
1897 pcmpeqb %xmm2, %xmm1
1899 pmovmskb %xmm1, %edx
1903 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1905 jbe LABEL(strcmp_exitz)
1912 jg LABEL(nibble_ashr_14) /* cross page boundary */
1914 movdqa (%rsi, %rcx), %xmm1
1915 movdqa (%rdi, %rcx), %xmm2
1920 por %xmm3, %xmm2 /* merge into one 16byte value */
1922 TOLOWER (%xmm1, %xmm2)
1924 pcmpeqb %xmm1, %xmm0
1925 pcmpeqb %xmm2, %xmm1
1927 pmovmskb %xmm1, %edx
1931 # if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
1933 jbe LABEL(strcmp_exitz)
1938 jmp LABEL(loop_ashr_14)
1941 LABEL(nibble_ashr_14):
1942 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1943 pmovmskb %xmm0, %edx
1945 jnz LABEL(ashr_14_exittail)
1947 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1949 jbe LABEL(ashr_14_exittail)
1954 jmp LABEL(gobble_ashr_14)
1957 LABEL(ashr_14_exittail):
1958 movdqa (%rsi, %rcx), %xmm1
1961 jmp LABEL(aftertail)
1964 * The following cases will be handled by ashr_15
1965 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1966 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
1971 movdqa (%rdi), %xmm2
1972 movdqa (%rsi), %xmm1
1973 pcmpeqb %xmm1, %xmm0
1975 TOLOWER (%xmm1, %xmm2)
1976 pcmpeqb %xmm1, %xmm2
1978 pmovmskb %xmm2, %r9d
1982 jnz LABEL(less32bytes)
1984 movdqa (%rdi), %xmm3
1986 UPDATE_STRNCMP_COUNTER
1989 mov $16, %rcx /* index for loads */
1990 mov $15, %r9d /* byte position left over from less32bytes case */
1992 * Setup %r10 value allows us to detect crossing a page boundary.
1993 * When %r10 goes positive we have crossed a page boundary and
1994 * need to do a nibble.
1997 and $0xfff, %r10 /* offset into 4K page */
1999 sub $0x1000, %r10 /* subtract 4K pagesize */
2002 LABEL(loop_ashr_15):
2004 jg LABEL(nibble_ashr_15)
2006 LABEL(gobble_ashr_15):
2007 movdqa (%rsi, %rcx), %xmm1
2008 movdqa (%rdi, %rcx), %xmm2
2013 por %xmm3, %xmm2 /* merge into one 16byte value */
2015 TOLOWER (%xmm1, %xmm2)
2017 pcmpeqb %xmm1, %xmm0
2018 pcmpeqb %xmm2, %xmm1
2020 pmovmskb %xmm1, %edx
2024 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2026 jbe LABEL(strcmp_exitz)
2033 jg LABEL(nibble_ashr_15) /* cross page boundary */
2035 movdqa (%rsi, %rcx), %xmm1
2036 movdqa (%rdi, %rcx), %xmm2
2041 por %xmm3, %xmm2 /* merge into one 16byte value */
2043 TOLOWER (%xmm1, %xmm2)
2045 pcmpeqb %xmm1, %xmm0
2046 pcmpeqb %xmm2, %xmm1
2048 pmovmskb %xmm1, %edx
2052 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2054 jbe LABEL(strcmp_exitz)
2059 jmp LABEL(loop_ashr_15)
2062 LABEL(nibble_ashr_15):
2063 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2064 pmovmskb %xmm0, %edx
2066 jnz LABEL(ashr_15_exittail)
2068 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2070 jbe LABEL(ashr_15_exittail)
2075 jmp LABEL(gobble_ashr_15)
2078 LABEL(ashr_15_exittail):
2079 movdqa (%rsi, %rcx), %xmm1
2085 TOLOWER (%xmm1, %xmm3)
2086 pcmpeqb %xmm3, %xmm1
2088 pmovmskb %xmm1, %edx
2093 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2095 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2096 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2099 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2104 bsf %rdx, %rdx /* find and store bit index in %rdx */
2106 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2108 jbe LABEL(strcmp_exitz)
2110 movzbl (%rsi, %rdx), %ecx
2111 movzbl (%rdi, %rdx), %eax
2113 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2114 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2115 movl (%rdx,%rcx,4), %ecx
2116 movl (%rdx,%rax,4), %eax
2122 LABEL(strcmp_exitz):
2131 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2132 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2133 movl (%rdx,%rcx,4), %ecx
2134 movl (%rdx,%rax,4), %eax
2141 .section .rodata,"a",@progbits
2143 LABEL(unaligned_table):
2144 .int LABEL(ashr_1) - LABEL(unaligned_table)
2145 .int LABEL(ashr_2) - LABEL(unaligned_table)
2146 .int LABEL(ashr_3) - LABEL(unaligned_table)
2147 .int LABEL(ashr_4) - LABEL(unaligned_table)
2148 .int LABEL(ashr_5) - LABEL(unaligned_table)
2149 .int LABEL(ashr_6) - LABEL(unaligned_table)
2150 .int LABEL(ashr_7) - LABEL(unaligned_table)
2151 .int LABEL(ashr_8) - LABEL(unaligned_table)
2152 .int LABEL(ashr_9) - LABEL(unaligned_table)
2153 .int LABEL(ashr_10) - LABEL(unaligned_table)
2154 .int LABEL(ashr_11) - LABEL(unaligned_table)
2155 .int LABEL(ashr_12) - LABEL(unaligned_table)
2156 .int LABEL(ashr_13) - LABEL(unaligned_table)
2157 .int LABEL(ashr_14) - LABEL(unaligned_table)
2158 .int LABEL(ashr_15) - LABEL(unaligned_table)
2159 .int LABEL(ashr_0) - LABEL(unaligned_table)