1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include "asm-syntax.h"
22 #undef UPDATE_STRNCMP_COUNTER
29 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
30 if the new counter > the old one or is 0. */
31 # define UPDATE_STRNCMP_COUNTER \
32 /* calculate left number to compare */ \
33 lea -16(%rcx, %r11), %r9; \
35 jb LABEL(strcmp_exitz); \
37 je LABEL(strcmp_exitz); \
40 #elif defined USE_AS_STRCASECMP_L
41 # include "locale-defines.h"
43 # define UPDATE_STRNCMP_COUNTER
44 #elif defined USE_AS_STRNCASECMP_L
45 # include "locale-defines.h"
47 # define UPDATE_STRNCMP_COUNTER \
48 /* calculate left number to compare */ \
49 lea -16(%rcx, %r11), %r9; \
51 jb LABEL(strcmp_exitz); \
53 je LABEL(strcmp_exitz); \
56 # define UPDATE_STRNCMP_COUNTER
58 # define STRCMP strcmp
65 .section .text.ssse3,"ax",@progbits
68 #ifdef USE_AS_STRCASECMP_L
70 # define ENTRY2(name) ENTRY (name)
71 # define END2(name) END (name)
75 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
76 mov %fs:(%rax),%RDX_LP
78 // XXX 5 byte should be before the function
80 .byte 0x0f,0x1f,0x44,0x00,0x00
82 # ifndef NO_NOLOCALE_ALIAS
83 weak_alias (__strcasecmp, strcasecmp)
84 libc_hidden_def (__strcasecmp)
86 /* FALLTHROUGH to strcasecmp_l. */
87 #elif defined USE_AS_STRNCASECMP_L
89 # define ENTRY2(name) ENTRY (name)
90 # define END2(name) END (name)
93 ENTRY2 (__strncasecmp)
94 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
95 mov %fs:(%rax),%RCX_LP
97 // XXX 5 byte should be before the function
99 .byte 0x0f,0x1f,0x44,0x00,0x00
101 # ifndef NO_NOLOCALE_ALIAS
102 weak_alias (__strncasecmp, strncasecmp)
103 libc_hidden_def (__strncasecmp)
105 /* FALLTHROUGH to strncasecmp_l. */
109 #ifdef USE_AS_STRCASECMP_L
110 /* We have to fall back on the C implementation for locales
111 with encodings not matching ASCII for single bytes. */
112 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
113 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP
117 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
118 jne __strcasecmp_l_nonascii
119 #elif defined USE_AS_STRNCASECMP_L
120 /* We have to fall back on the C implementation for locales
121 with encodings not matching ASCII for single bytes. */
122 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
123 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP
127 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
128 jne __strncasecmp_l_nonascii
132 * This implementation uses SSE to compare up to 16 bytes at a time.
134 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
135 test %RDX_LP, %RDX_LP
136 je LABEL(strcmp_exitz)
143 /* Use 64bit AND here to avoid long NOP padding. */
144 and $0x3f, %rcx /* rsi alignment in cache line */
145 and $0x3f, %rax /* rdi alignment in cache line */
146 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
147 .section .rodata.cst16,"aM",@progbits,16
150 .quad 0x4040404040404040
151 .quad 0x4040404040404040
153 .quad 0x5b5b5b5b5b5b5b5b
154 .quad 0x5b5b5b5b5b5b5b5b
156 .quad 0x2020202020202020
157 .quad 0x2020202020202020
159 movdqa .Lbelowupper(%rip), %xmm5
160 # define UCLOW_reg %xmm5
161 movdqa .Ltopupper(%rip), %xmm6
162 # define UCHIGH_reg %xmm6
163 movdqa .Ltouppermask(%rip), %xmm7
164 # define LCQWORD_reg %xmm7
167 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
169 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
172 movhpd 8(%rdi), %xmm1
173 movhpd 8(%rsi), %xmm2
174 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
175 # define TOLOWER(reg1, reg2) \
176 movdqa reg1, %xmm8; \
177 movdqa UCHIGH_reg, %xmm9; \
178 movdqa reg2, %xmm10; \
179 movdqa UCHIGH_reg, %xmm11; \
180 pcmpgtb UCLOW_reg, %xmm8; \
181 pcmpgtb reg1, %xmm9; \
182 pcmpgtb UCLOW_reg, %xmm10; \
183 pcmpgtb reg2, %xmm11; \
185 pand %xmm11, %xmm10; \
186 pand LCQWORD_reg, %xmm8; \
187 pand LCQWORD_reg, %xmm10; \
190 TOLOWER (%xmm1, %xmm2)
192 # define TOLOWER(reg1, reg2)
194 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
195 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
196 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
197 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
199 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
200 jnz LABEL(less16bytes) /* If not, find different value or null char */
201 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
203 jbe LABEL(strcmp_exitz) /* finish comparision */
205 add $16, %rsi /* prepare to search next 16 bytes */
206 add $16, %rdi /* prepare to search next 16 bytes */
209 * Determine source and destination string offsets from 16-byte alignment.
210 * Use relative offset difference between the two to determine which case
215 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
216 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
217 mov $0xffff, %edx /* for equivalent offset */
219 and $0xf, %ecx /* offset of rsi */
220 and $0xf, %eax /* offset of rdi */
222 je LABEL(ashr_0) /* rsi and rdi relative offset same */
224 mov %edx, %r8d /* r8d is offset flag for exit tail */
230 lea LABEL(unaligned_table)(%rip), %r10
231 movslq (%r10, %r9,4), %r9
232 lea (%r10, %r9), %r10
233 _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
236 * The following cases will be handled by ashr_0
237 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
238 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
244 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
245 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
246 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
247 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
250 TOLOWER (%xmm1, %xmm2)
251 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
253 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
255 shr %cl, %edx /* adjust 0xffff for offset */
256 shr %cl, %r9d /* adjust for 16-byte offset */
259 * edx must be the same with r9d if in left byte (16-rcx) is equal to
260 * the start from (16-rax) and no null char was seen.
262 jne LABEL(less32bytes) /* mismatch or null char */
263 UPDATE_STRNCMP_COUNTER
266 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
269 * Now both strings are aligned at 16-byte boundary. Loop over strings
270 * checking 32-bytes per iteration.
274 movdqa (%rsi, %rcx), %xmm1
275 movdqa (%rdi, %rcx), %xmm2
276 TOLOWER (%xmm1, %xmm2)
283 jnz LABEL(exit) /* mismatch or null char seen */
285 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
287 jbe LABEL(strcmp_exitz)
290 movdqa (%rsi, %rcx), %xmm1
291 movdqa (%rdi, %rcx), %xmm2
292 TOLOWER (%xmm1, %xmm2)
300 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
302 jbe LABEL(strcmp_exitz)
305 jmp LABEL(loop_ashr_0)
308 * The following cases will be handled by ashr_1
309 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
310 * n(15) n -15 0(15 +(n-15) - n) ashr_1
317 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
318 pslldq $15, %xmm2 /* shift first string to align with second */
319 TOLOWER (%xmm1, %xmm2)
320 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
321 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
323 shr %cl, %edx /* adjust 0xffff for offset */
324 shr %cl, %r9d /* adjust for 16-byte offset */
326 jnz LABEL(less32bytes) /* mismatch or null char seen */
328 UPDATE_STRNCMP_COUNTER
331 mov $16, %rcx /* index for loads*/
332 mov $1, %r9d /* byte position left over from less32bytes case */
334 * Setup %r10 value allows us to detect crossing a page boundary.
335 * When %r10 goes positive we have crossed a page boundary and
336 * need to do a nibble.
339 and $0xfff, %r10 /* offset into 4K page */
340 sub $0x1000, %r10 /* subtract 4K pagesize */
345 jg LABEL(nibble_ashr_1) /* cross page boundary */
347 LABEL(gobble_ashr_1):
348 movdqa (%rsi, %rcx), %xmm1
349 movdqa (%rdi, %rcx), %xmm2
350 movdqa %xmm2, %xmm4 /* store for next cycle */
355 por %xmm3, %xmm2 /* merge into one 16byte value */
357 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
359 TOLOWER (%xmm1, %xmm2)
368 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
370 jbe LABEL(strcmp_exitz)
376 jg LABEL(nibble_ashr_1) /* cross page boundary */
378 movdqa (%rsi, %rcx), %xmm1
379 movdqa (%rdi, %rcx), %xmm2
380 movdqa %xmm2, %xmm4 /* store for next cycle */
385 por %xmm3, %xmm2 /* merge into one 16byte value */
387 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
389 TOLOWER (%xmm1, %xmm2)
398 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
400 jbe LABEL(strcmp_exitz)
404 jmp LABEL(loop_ashr_1)
407 * Nibble avoids loads across page boundary. This is to avoid a potential
408 * access into unmapped memory.
411 LABEL(nibble_ashr_1):
412 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
415 jnz LABEL(ashr_1_exittail) /* find null char*/
417 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
419 jbe LABEL(ashr_1_exittail)
423 sub $0x1000, %r10 /* substract 4K from %r10 */
424 jmp LABEL(gobble_ashr_1)
427 * Once find null char, determine if there is a string mismatch
428 * before the null char.
431 LABEL(ashr_1_exittail):
432 movdqa (%rsi, %rcx), %xmm1
438 * The following cases will be handled by ashr_2
439 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
440 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
449 TOLOWER (%xmm1, %xmm2)
456 jnz LABEL(less32bytes)
458 UPDATE_STRNCMP_COUNTER
461 mov $16, %rcx /* index for loads */
462 mov $2, %r9d /* byte position left over from less32bytes case */
464 * Setup %r10 value allows us to detect crossing a page boundary.
465 * When %r10 goes positive we have crossed a page boundary and
466 * need to do a nibble.
469 and $0xfff, %r10 /* offset into 4K page */
470 sub $0x1000, %r10 /* subtract 4K pagesize */
475 jg LABEL(nibble_ashr_2)
477 LABEL(gobble_ashr_2):
478 movdqa (%rsi, %rcx), %xmm1
479 movdqa (%rdi, %rcx), %xmm2
485 por %xmm3, %xmm2 /* merge into one 16byte value */
487 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
489 TOLOWER (%xmm1, %xmm2)
498 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
500 jbe LABEL(strcmp_exitz)
507 jg LABEL(nibble_ashr_2) /* cross page boundary */
509 movdqa (%rsi, %rcx), %xmm1
510 movdqa (%rdi, %rcx), %xmm2
516 por %xmm3, %xmm2 /* merge into one 16byte value */
518 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
520 TOLOWER (%xmm1, %xmm2)
529 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
531 jbe LABEL(strcmp_exitz)
536 jmp LABEL(loop_ashr_2)
539 LABEL(nibble_ashr_2):
540 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
543 jnz LABEL(ashr_2_exittail)
545 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
547 jbe LABEL(ashr_2_exittail)
552 jmp LABEL(gobble_ashr_2)
555 LABEL(ashr_2_exittail):
556 movdqa (%rsi, %rcx), %xmm1
562 * The following cases will be handled by ashr_3
563 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
564 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
573 TOLOWER (%xmm1, %xmm2)
580 jnz LABEL(less32bytes)
583 UPDATE_STRNCMP_COUNTER
586 mov $16, %rcx /* index for loads */
587 mov $3, %r9d /* byte position left over from less32bytes case */
589 * Setup %r10 value allows us to detect crossing a page boundary.
590 * When %r10 goes positive we have crossed a page boundary and
591 * need to do a nibble.
594 and $0xfff, %r10 /* offset into 4K page */
595 sub $0x1000, %r10 /* subtract 4K pagesize */
600 jg LABEL(nibble_ashr_3)
602 LABEL(gobble_ashr_3):
603 movdqa (%rsi, %rcx), %xmm1
604 movdqa (%rdi, %rcx), %xmm2
610 por %xmm3, %xmm2 /* merge into one 16byte value */
612 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
614 TOLOWER (%xmm1, %xmm2)
623 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
625 jbe LABEL(strcmp_exitz)
632 jg LABEL(nibble_ashr_3) /* cross page boundary */
634 movdqa (%rsi, %rcx), %xmm1
635 movdqa (%rdi, %rcx), %xmm2
641 por %xmm3, %xmm2 /* merge into one 16byte value */
643 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
645 TOLOWER (%xmm1, %xmm2)
654 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
656 jbe LABEL(strcmp_exitz)
661 jmp LABEL(loop_ashr_3)
664 LABEL(nibble_ashr_3):
665 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
668 jnz LABEL(ashr_3_exittail)
670 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
672 jbe LABEL(ashr_3_exittail)
677 jmp LABEL(gobble_ashr_3)
680 LABEL(ashr_3_exittail):
681 movdqa (%rsi, %rcx), %xmm1
687 * The following cases will be handled by ashr_4
688 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
689 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
698 TOLOWER (%xmm1, %xmm2)
705 jnz LABEL(less32bytes)
708 UPDATE_STRNCMP_COUNTER
711 mov $16, %rcx /* index for loads */
712 mov $4, %r9d /* byte position left over from less32bytes case */
714 * Setup %r10 value allows us to detect crossing a page boundary.
715 * When %r10 goes positive we have crossed a page boundary and
716 * need to do a nibble.
719 and $0xfff, %r10 /* offset into 4K page */
720 sub $0x1000, %r10 /* subtract 4K pagesize */
725 jg LABEL(nibble_ashr_4)
727 LABEL(gobble_ashr_4):
728 movdqa (%rsi, %rcx), %xmm1
729 movdqa (%rdi, %rcx), %xmm2
735 por %xmm3, %xmm2 /* merge into one 16byte value */
737 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
739 TOLOWER (%xmm1, %xmm2)
748 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
750 jbe LABEL(strcmp_exitz)
757 jg LABEL(nibble_ashr_4) /* cross page boundary */
759 movdqa (%rsi, %rcx), %xmm1
760 movdqa (%rdi, %rcx), %xmm2
766 por %xmm3, %xmm2 /* merge into one 16byte value */
768 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
770 TOLOWER (%xmm1, %xmm2)
779 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
781 jbe LABEL(strcmp_exitz)
786 jmp LABEL(loop_ashr_4)
789 LABEL(nibble_ashr_4):
790 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
793 jnz LABEL(ashr_4_exittail)
795 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
797 jbe LABEL(ashr_4_exittail)
802 jmp LABEL(gobble_ashr_4)
805 LABEL(ashr_4_exittail):
806 movdqa (%rsi, %rcx), %xmm1
812 * The following cases will be handled by ashr_5
813 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
814 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
823 TOLOWER (%xmm1, %xmm2)
830 jnz LABEL(less32bytes)
833 UPDATE_STRNCMP_COUNTER
836 mov $16, %rcx /* index for loads */
837 mov $5, %r9d /* byte position left over from less32bytes case */
839 * Setup %r10 value allows us to detect crossing a page boundary.
840 * When %r10 goes positive we have crossed a page boundary and
841 * need to do a nibble.
844 and $0xfff, %r10 /* offset into 4K page */
845 sub $0x1000, %r10 /* subtract 4K pagesize */
850 jg LABEL(nibble_ashr_5)
852 LABEL(gobble_ashr_5):
853 movdqa (%rsi, %rcx), %xmm1
854 movdqa (%rdi, %rcx), %xmm2
860 por %xmm3, %xmm2 /* merge into one 16byte value */
862 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
864 TOLOWER (%xmm1, %xmm2)
873 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
875 jbe LABEL(strcmp_exitz)
882 jg LABEL(nibble_ashr_5) /* cross page boundary */
884 movdqa (%rsi, %rcx), %xmm1
885 movdqa (%rdi, %rcx), %xmm2
891 por %xmm3, %xmm2 /* merge into one 16byte value */
893 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
895 TOLOWER (%xmm1, %xmm2)
904 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
906 jbe LABEL(strcmp_exitz)
911 jmp LABEL(loop_ashr_5)
914 LABEL(nibble_ashr_5):
915 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
918 jnz LABEL(ashr_5_exittail)
920 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
922 jbe LABEL(ashr_5_exittail)
927 jmp LABEL(gobble_ashr_5)
930 LABEL(ashr_5_exittail):
931 movdqa (%rsi, %rcx), %xmm1
937 * The following cases will be handled by ashr_6
938 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
939 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
948 TOLOWER (%xmm1, %xmm2)
955 jnz LABEL(less32bytes)
958 UPDATE_STRNCMP_COUNTER
961 mov $16, %rcx /* index for loads */
962 mov $6, %r9d /* byte position left over from less32bytes case */
964 * Setup %r10 value allows us to detect crossing a page boundary.
965 * When %r10 goes positive we have crossed a page boundary and
966 * need to do a nibble.
969 and $0xfff, %r10 /* offset into 4K page */
970 sub $0x1000, %r10 /* subtract 4K pagesize */
975 jg LABEL(nibble_ashr_6)
977 LABEL(gobble_ashr_6):
978 movdqa (%rsi, %rcx), %xmm1
979 movdqa (%rdi, %rcx), %xmm2
985 por %xmm3, %xmm2 /* merge into one 16byte value */
987 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
989 TOLOWER (%xmm1, %xmm2)
998 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1000 jbe LABEL(strcmp_exitz)
1007 jg LABEL(nibble_ashr_6) /* cross page boundary */
1009 movdqa (%rsi, %rcx), %xmm1
1010 movdqa (%rdi, %rcx), %xmm2
1016 por %xmm3, %xmm2 /* merge into one 16byte value */
1018 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1020 TOLOWER (%xmm1, %xmm2)
1022 pcmpeqb %xmm1, %xmm0
1023 pcmpeqb %xmm2, %xmm1
1025 pmovmskb %xmm1, %edx
1029 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1031 jbe LABEL(strcmp_exitz)
1036 jmp LABEL(loop_ashr_6)
1039 LABEL(nibble_ashr_6):
1040 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1041 pmovmskb %xmm0, %edx
1043 jnz LABEL(ashr_6_exittail)
1045 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1047 jbe LABEL(ashr_6_exittail)
1052 jmp LABEL(gobble_ashr_6)
1055 LABEL(ashr_6_exittail):
1056 movdqa (%rsi, %rcx), %xmm1
1059 jmp LABEL(aftertail)
1062 * The following cases will be handled by ashr_7
1063 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1064 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
1069 movdqa (%rdi), %xmm2
1070 movdqa (%rsi), %xmm1
1071 pcmpeqb %xmm1, %xmm0
1073 TOLOWER (%xmm1, %xmm2)
1074 pcmpeqb %xmm1, %xmm2
1076 pmovmskb %xmm2, %r9d
1080 jnz LABEL(less32bytes)
1081 movdqa (%rdi), %xmm3
1083 UPDATE_STRNCMP_COUNTER
1086 mov $16, %rcx /* index for loads */
1087 mov $7, %r9d /* byte position left over from less32bytes case */
1089 * Setup %r10 value allows us to detect crossing a page boundary.
1090 * When %r10 goes positive we have crossed a page boundary and
1091 * need to do a nibble.
1094 and $0xfff, %r10 /* offset into 4K page */
1095 sub $0x1000, %r10 /* subtract 4K pagesize */
1100 jg LABEL(nibble_ashr_7)
1102 LABEL(gobble_ashr_7):
1103 movdqa (%rsi, %rcx), %xmm1
1104 movdqa (%rdi, %rcx), %xmm2
1110 por %xmm3, %xmm2 /* merge into one 16byte value */
1112 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1114 TOLOWER (%xmm1, %xmm2)
1116 pcmpeqb %xmm1, %xmm0
1117 pcmpeqb %xmm2, %xmm1
1119 pmovmskb %xmm1, %edx
1123 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1125 jbe LABEL(strcmp_exitz)
1132 jg LABEL(nibble_ashr_7) /* cross page boundary */
1134 movdqa (%rsi, %rcx), %xmm1
1135 movdqa (%rdi, %rcx), %xmm2
1141 por %xmm3, %xmm2 /* merge into one 16byte value */
1143 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1145 TOLOWER (%xmm1, %xmm2)
1147 pcmpeqb %xmm1, %xmm0
1148 pcmpeqb %xmm2, %xmm1
1150 pmovmskb %xmm1, %edx
1154 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1156 jbe LABEL(strcmp_exitz)
1161 jmp LABEL(loop_ashr_7)
1164 LABEL(nibble_ashr_7):
1165 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1166 pmovmskb %xmm0, %edx
1168 jnz LABEL(ashr_7_exittail)
1170 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1172 jbe LABEL(ashr_7_exittail)
1177 jmp LABEL(gobble_ashr_7)
1180 LABEL(ashr_7_exittail):
1181 movdqa (%rsi, %rcx), %xmm1
1184 jmp LABEL(aftertail)
1187 * The following cases will be handled by ashr_8
1188 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1189 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1194 movdqa (%rdi), %xmm2
1195 movdqa (%rsi), %xmm1
1196 pcmpeqb %xmm1, %xmm0
1198 TOLOWER (%xmm1, %xmm2)
1199 pcmpeqb %xmm1, %xmm2
1201 pmovmskb %xmm2, %r9d
1205 jnz LABEL(less32bytes)
1206 movdqa (%rdi), %xmm3
1208 UPDATE_STRNCMP_COUNTER
1211 mov $16, %rcx /* index for loads */
1212 mov $8, %r9d /* byte position left over from less32bytes case */
1214 * Setup %r10 value allows us to detect crossing a page boundary.
1215 * When %r10 goes positive we have crossed a page boundary and
1216 * need to do a nibble.
1219 and $0xfff, %r10 /* offset into 4K page */
1220 sub $0x1000, %r10 /* subtract 4K pagesize */
1225 jg LABEL(nibble_ashr_8)
1227 LABEL(gobble_ashr_8):
1228 movdqa (%rsi, %rcx), %xmm1
1229 movdqa (%rdi, %rcx), %xmm2
1235 por %xmm3, %xmm2 /* merge into one 16byte value */
1237 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1239 TOLOWER (%xmm1, %xmm2)
1241 pcmpeqb %xmm1, %xmm0
1242 pcmpeqb %xmm2, %xmm1
1244 pmovmskb %xmm1, %edx
1248 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1250 jbe LABEL(strcmp_exitz)
1257 jg LABEL(nibble_ashr_8) /* cross page boundary */
1259 movdqa (%rsi, %rcx), %xmm1
1260 movdqa (%rdi, %rcx), %xmm2
1266 por %xmm3, %xmm2 /* merge into one 16byte value */
1268 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1270 TOLOWER (%xmm1, %xmm2)
1272 pcmpeqb %xmm1, %xmm0
1273 pcmpeqb %xmm2, %xmm1
1275 pmovmskb %xmm1, %edx
1279 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1281 jbe LABEL(strcmp_exitz)
1286 jmp LABEL(loop_ashr_8)
1289 LABEL(nibble_ashr_8):
1290 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1291 pmovmskb %xmm0, %edx
1293 jnz LABEL(ashr_8_exittail)
1295 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1297 jbe LABEL(ashr_8_exittail)
1302 jmp LABEL(gobble_ashr_8)
1305 LABEL(ashr_8_exittail):
1306 movdqa (%rsi, %rcx), %xmm1
1309 jmp LABEL(aftertail)
1312 * The following cases will be handled by ashr_9
1313 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1314 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1319 movdqa (%rdi), %xmm2
1320 movdqa (%rsi), %xmm1
1321 pcmpeqb %xmm1, %xmm0
1323 TOLOWER (%xmm1, %xmm2)
1324 pcmpeqb %xmm1, %xmm2
1326 pmovmskb %xmm2, %r9d
1330 jnz LABEL(less32bytes)
1331 movdqa (%rdi), %xmm3
1333 UPDATE_STRNCMP_COUNTER
1336 mov $16, %rcx /* index for loads */
1337 mov $9, %r9d /* byte position left over from less32bytes case */
1339 * Setup %r10 value allows us to detect crossing a page boundary.
1340 * When %r10 goes positive we have crossed a page boundary and
1341 * need to do a nibble.
1344 and $0xfff, %r10 /* offset into 4K page */
1345 sub $0x1000, %r10 /* subtract 4K pagesize */
1350 jg LABEL(nibble_ashr_9)
1352 LABEL(gobble_ashr_9):
1353 movdqa (%rsi, %rcx), %xmm1
1354 movdqa (%rdi, %rcx), %xmm2
1360 por %xmm3, %xmm2 /* merge into one 16byte value */
1362 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1364 TOLOWER (%xmm1, %xmm2)
1366 pcmpeqb %xmm1, %xmm0
1367 pcmpeqb %xmm2, %xmm1
1369 pmovmskb %xmm1, %edx
1373 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1375 jbe LABEL(strcmp_exitz)
1382 jg LABEL(nibble_ashr_9) /* cross page boundary */
1384 movdqa (%rsi, %rcx), %xmm1
1385 movdqa (%rdi, %rcx), %xmm2
1391 por %xmm3, %xmm2 /* merge into one 16byte value */
1393 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1395 TOLOWER (%xmm1, %xmm2)
1397 pcmpeqb %xmm1, %xmm0
1398 pcmpeqb %xmm2, %xmm1
1400 pmovmskb %xmm1, %edx
1404 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1406 jbe LABEL(strcmp_exitz)
1410 movdqa %xmm4, %xmm3 /* store for next cycle */
1411 jmp LABEL(loop_ashr_9)
1414 LABEL(nibble_ashr_9):
1415 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1416 pmovmskb %xmm0, %edx
1418 jnz LABEL(ashr_9_exittail)
1420 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1422 jbe LABEL(ashr_9_exittail)
1427 jmp LABEL(gobble_ashr_9)
1430 LABEL(ashr_9_exittail):
1431 movdqa (%rsi, %rcx), %xmm1
1434 jmp LABEL(aftertail)
1437 * The following cases will be handled by ashr_10
1438 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1439 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1444 movdqa (%rdi), %xmm2
1445 movdqa (%rsi), %xmm1
1446 pcmpeqb %xmm1, %xmm0
1448 TOLOWER (%xmm1, %xmm2)
1449 pcmpeqb %xmm1, %xmm2
1451 pmovmskb %xmm2, %r9d
1455 jnz LABEL(less32bytes)
1456 movdqa (%rdi), %xmm3
1458 UPDATE_STRNCMP_COUNTER
1461 mov $16, %rcx /* index for loads */
1462 mov $10, %r9d /* byte position left over from less32bytes case */
1464 * Setup %r10 value allows us to detect crossing a page boundary.
1465 * When %r10 goes positive we have crossed a page boundary and
1466 * need to do a nibble.
1469 and $0xfff, %r10 /* offset into 4K page */
1470 sub $0x1000, %r10 /* subtract 4K pagesize */
1473 LABEL(loop_ashr_10):
1475 jg LABEL(nibble_ashr_10)
1477 LABEL(gobble_ashr_10):
1478 movdqa (%rsi, %rcx), %xmm1
1479 movdqa (%rdi, %rcx), %xmm2
1485 por %xmm3, %xmm2 /* merge into one 16byte value */
1487 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1489 TOLOWER (%xmm1, %xmm2)
1491 pcmpeqb %xmm1, %xmm0
1492 pcmpeqb %xmm2, %xmm1
1494 pmovmskb %xmm1, %edx
1498 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1500 jbe LABEL(strcmp_exitz)
1507 jg LABEL(nibble_ashr_10) /* cross page boundary */
1509 movdqa (%rsi, %rcx), %xmm1
1510 movdqa (%rdi, %rcx), %xmm2
1516 por %xmm3, %xmm2 /* merge into one 16byte value */
1518 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1520 TOLOWER (%xmm1, %xmm2)
1522 pcmpeqb %xmm1, %xmm0
1523 pcmpeqb %xmm2, %xmm1
1525 pmovmskb %xmm1, %edx
1529 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1531 jbe LABEL(strcmp_exitz)
1536 jmp LABEL(loop_ashr_10)
1539 LABEL(nibble_ashr_10):
1540 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1541 pmovmskb %xmm0, %edx
1543 jnz LABEL(ashr_10_exittail)
1545 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1547 jbe LABEL(ashr_10_exittail)
1552 jmp LABEL(gobble_ashr_10)
1555 LABEL(ashr_10_exittail):
1556 movdqa (%rsi, %rcx), %xmm1
1559 jmp LABEL(aftertail)
1562 * The following cases will be handled by ashr_11
1563 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1564 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1569 movdqa (%rdi), %xmm2
1570 movdqa (%rsi), %xmm1
1571 pcmpeqb %xmm1, %xmm0
1573 TOLOWER (%xmm1, %xmm2)
1574 pcmpeqb %xmm1, %xmm2
1576 pmovmskb %xmm2, %r9d
1580 jnz LABEL(less32bytes)
1581 movdqa (%rdi), %xmm3
1583 UPDATE_STRNCMP_COUNTER
1586 mov $16, %rcx /* index for loads */
1587 mov $11, %r9d /* byte position left over from less32bytes case */
1589 * Setup %r10 value allows us to detect crossing a page boundary.
1590 * When %r10 goes positive we have crossed a page boundary and
1591 * need to do a nibble.
1594 and $0xfff, %r10 /* offset into 4K page */
1595 sub $0x1000, %r10 /* subtract 4K pagesize */
1598 LABEL(loop_ashr_11):
1600 jg LABEL(nibble_ashr_11)
1602 LABEL(gobble_ashr_11):
1603 movdqa (%rsi, %rcx), %xmm1
1604 movdqa (%rdi, %rcx), %xmm2
1610 por %xmm3, %xmm2 /* merge into one 16byte value */
1612 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1614 TOLOWER (%xmm1, %xmm2)
1616 pcmpeqb %xmm1, %xmm0
1617 pcmpeqb %xmm2, %xmm1
1619 pmovmskb %xmm1, %edx
1623 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1625 jbe LABEL(strcmp_exitz)
1632 jg LABEL(nibble_ashr_11) /* cross page boundary */
1634 movdqa (%rsi, %rcx), %xmm1
1635 movdqa (%rdi, %rcx), %xmm2
1641 por %xmm3, %xmm2 /* merge into one 16byte value */
1643 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1645 TOLOWER (%xmm1, %xmm2)
1647 pcmpeqb %xmm1, %xmm0
1648 pcmpeqb %xmm2, %xmm1
1650 pmovmskb %xmm1, %edx
1654 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1656 jbe LABEL(strcmp_exitz)
1661 jmp LABEL(loop_ashr_11)
1664 LABEL(nibble_ashr_11):
1665 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1666 pmovmskb %xmm0, %edx
1668 jnz LABEL(ashr_11_exittail)
1670 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1672 jbe LABEL(ashr_11_exittail)
1677 jmp LABEL(gobble_ashr_11)
1680 LABEL(ashr_11_exittail):
1681 movdqa (%rsi, %rcx), %xmm1
1684 jmp LABEL(aftertail)
1687 * The following cases will be handled by ashr_12
1688 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1689 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1694 movdqa (%rdi), %xmm2
1695 movdqa (%rsi), %xmm1
1696 pcmpeqb %xmm1, %xmm0
1698 TOLOWER (%xmm1, %xmm2)
1699 pcmpeqb %xmm1, %xmm2
1701 pmovmskb %xmm2, %r9d
1705 jnz LABEL(less32bytes)
1706 movdqa (%rdi), %xmm3
1708 UPDATE_STRNCMP_COUNTER
1711 mov $16, %rcx /* index for loads */
1712 mov $12, %r9d /* byte position left over from less32bytes case */
1714 * Setup %r10 value allows us to detect crossing a page boundary.
1715 * When %r10 goes positive we have crossed a page boundary and
1716 * need to do a nibble.
1719 and $0xfff, %r10 /* offset into 4K page */
1720 sub $0x1000, %r10 /* subtract 4K pagesize */
1723 LABEL(loop_ashr_12):
1725 jg LABEL(nibble_ashr_12)
1727 LABEL(gobble_ashr_12):
1728 movdqa (%rsi, %rcx), %xmm1
1729 movdqa (%rdi, %rcx), %xmm2
1735 por %xmm3, %xmm2 /* merge into one 16byte value */
1737 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1739 TOLOWER (%xmm1, %xmm2)
1741 pcmpeqb %xmm1, %xmm0
1742 pcmpeqb %xmm2, %xmm1
1744 pmovmskb %xmm1, %edx
1748 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1750 jbe LABEL(strcmp_exitz)
1757 jg LABEL(nibble_ashr_12) /* cross page boundary */
1759 movdqa (%rsi, %rcx), %xmm1
1760 movdqa (%rdi, %rcx), %xmm2
1766 por %xmm3, %xmm2 /* merge into one 16byte value */
1768 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1770 TOLOWER (%xmm1, %xmm2)
1772 pcmpeqb %xmm1, %xmm0
1773 pcmpeqb %xmm2, %xmm1
1775 pmovmskb %xmm1, %edx
1779 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1781 jbe LABEL(strcmp_exitz)
1786 jmp LABEL(loop_ashr_12)
1789 LABEL(nibble_ashr_12):
1790 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1791 pmovmskb %xmm0, %edx
1793 jnz LABEL(ashr_12_exittail)
1795 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1797 jbe LABEL(ashr_12_exittail)
1802 jmp LABEL(gobble_ashr_12)
1805 LABEL(ashr_12_exittail):
1806 movdqa (%rsi, %rcx), %xmm1
1809 jmp LABEL(aftertail)
1812 * The following cases will be handled by ashr_13
1813 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1814 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1819 movdqa (%rdi), %xmm2
1820 movdqa (%rsi), %xmm1
1821 pcmpeqb %xmm1, %xmm0
1823 TOLOWER (%xmm1, %xmm2)
1824 pcmpeqb %xmm1, %xmm2
1826 pmovmskb %xmm2, %r9d
1830 jnz LABEL(less32bytes)
1831 movdqa (%rdi), %xmm3
1833 UPDATE_STRNCMP_COUNTER
1836 mov $16, %rcx /* index for loads */
1837 mov $13, %r9d /* byte position left over from less32bytes case */
1839 * Setup %r10 value allows us to detect crossing a page boundary.
1840 * When %r10 goes positive we have crossed a page boundary and
1841 * need to do a nibble.
1844 and $0xfff, %r10 /* offset into 4K page */
1845 sub $0x1000, %r10 /* subtract 4K pagesize */
1848 LABEL(loop_ashr_13):
1850 jg LABEL(nibble_ashr_13)
1852 LABEL(gobble_ashr_13):
1853 movdqa (%rsi, %rcx), %xmm1
1854 movdqa (%rdi, %rcx), %xmm2
1860 por %xmm3, %xmm2 /* merge into one 16byte value */
1862 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1864 TOLOWER (%xmm1, %xmm2)
1866 pcmpeqb %xmm1, %xmm0
1867 pcmpeqb %xmm2, %xmm1
1869 pmovmskb %xmm1, %edx
1873 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1875 jbe LABEL(strcmp_exitz)
1882 jg LABEL(nibble_ashr_13) /* cross page boundary */
1884 movdqa (%rsi, %rcx), %xmm1
1885 movdqa (%rdi, %rcx), %xmm2
1891 por %xmm3, %xmm2 /* merge into one 16byte value */
1893 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1895 TOLOWER (%xmm1, %xmm2)
1897 pcmpeqb %xmm1, %xmm0
1898 pcmpeqb %xmm2, %xmm1
1900 pmovmskb %xmm1, %edx
1904 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1906 jbe LABEL(strcmp_exitz)
1911 jmp LABEL(loop_ashr_13)
1914 LABEL(nibble_ashr_13):
1915 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1916 pmovmskb %xmm0, %edx
1918 jnz LABEL(ashr_13_exittail)
1920 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1922 jbe LABEL(ashr_13_exittail)
1927 jmp LABEL(gobble_ashr_13)
1930 LABEL(ashr_13_exittail):
1931 movdqa (%rsi, %rcx), %xmm1
1934 jmp LABEL(aftertail)
1937 * The following cases will be handled by ashr_14
1938 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1939 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1944 movdqa (%rdi), %xmm2
1945 movdqa (%rsi), %xmm1
1946 pcmpeqb %xmm1, %xmm0
1948 TOLOWER (%xmm1, %xmm2)
1949 pcmpeqb %xmm1, %xmm2
1951 pmovmskb %xmm2, %r9d
1955 jnz LABEL(less32bytes)
1956 movdqa (%rdi), %xmm3
1958 UPDATE_STRNCMP_COUNTER
1961 mov $16, %rcx /* index for loads */
1962 mov $14, %r9d /* byte position left over from less32bytes case */
1964 * Setup %r10 value allows us to detect crossing a page boundary.
1965 * When %r10 goes positive we have crossed a page boundary and
1966 * need to do a nibble.
1969 and $0xfff, %r10 /* offset into 4K page */
1970 sub $0x1000, %r10 /* subtract 4K pagesize */
1973 LABEL(loop_ashr_14):
1975 jg LABEL(nibble_ashr_14)
1977 LABEL(gobble_ashr_14):
1978 movdqa (%rsi, %rcx), %xmm1
1979 movdqa (%rdi, %rcx), %xmm2
1985 por %xmm3, %xmm2 /* merge into one 16byte value */
1987 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
1989 TOLOWER (%xmm1, %xmm2)
1991 pcmpeqb %xmm1, %xmm0
1992 pcmpeqb %xmm2, %xmm1
1994 pmovmskb %xmm1, %edx
1998 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2000 jbe LABEL(strcmp_exitz)
2007 jg LABEL(nibble_ashr_14) /* cross page boundary */
2009 movdqa (%rsi, %rcx), %xmm1
2010 movdqa (%rdi, %rcx), %xmm2
2016 por %xmm3, %xmm2 /* merge into one 16byte value */
2018 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2020 TOLOWER (%xmm1, %xmm2)
2022 pcmpeqb %xmm1, %xmm0
2023 pcmpeqb %xmm2, %xmm1
2025 pmovmskb %xmm1, %edx
2029 #if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
2031 jbe LABEL(strcmp_exitz)
2036 jmp LABEL(loop_ashr_14)
2039 LABEL(nibble_ashr_14):
2040 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2041 pmovmskb %xmm0, %edx
2043 jnz LABEL(ashr_14_exittail)
2045 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2047 jbe LABEL(ashr_14_exittail)
2052 jmp LABEL(gobble_ashr_14)
2055 LABEL(ashr_14_exittail):
2056 movdqa (%rsi, %rcx), %xmm1
2059 jmp LABEL(aftertail)
2062 * The following cases will be handled by ashr_15
2063 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2064 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
2069 movdqa (%rdi), %xmm2
2070 movdqa (%rsi), %xmm1
2071 pcmpeqb %xmm1, %xmm0
2073 TOLOWER (%xmm1, %xmm2)
2074 pcmpeqb %xmm1, %xmm2
2076 pmovmskb %xmm2, %r9d
2080 jnz LABEL(less32bytes)
2082 movdqa (%rdi), %xmm3
2084 UPDATE_STRNCMP_COUNTER
2087 mov $16, %rcx /* index for loads */
2088 mov $15, %r9d /* byte position left over from less32bytes case */
2090 * Setup %r10 value allows us to detect crossing a page boundary.
2091 * When %r10 goes positive we have crossed a page boundary and
2092 * need to do a nibble.
2095 and $0xfff, %r10 /* offset into 4K page */
2097 sub $0x1000, %r10 /* subtract 4K pagesize */
2100 LABEL(loop_ashr_15):
2102 jg LABEL(nibble_ashr_15)
2104 LABEL(gobble_ashr_15):
2105 movdqa (%rsi, %rcx), %xmm1
2106 movdqa (%rdi, %rcx), %xmm2
2112 por %xmm3, %xmm2 /* merge into one 16byte value */
2114 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2116 TOLOWER (%xmm1, %xmm2)
2118 pcmpeqb %xmm1, %xmm0
2119 pcmpeqb %xmm2, %xmm1
2121 pmovmskb %xmm1, %edx
2125 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2127 jbe LABEL(strcmp_exitz)
2134 jg LABEL(nibble_ashr_15) /* cross page boundary */
2136 movdqa (%rsi, %rcx), %xmm1
2137 movdqa (%rdi, %rcx), %xmm2
2143 por %xmm3, %xmm2 /* merge into one 16byte value */
2145 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2147 TOLOWER (%xmm1, %xmm2)
2149 pcmpeqb %xmm1, %xmm0
2150 pcmpeqb %xmm2, %xmm1
2152 pmovmskb %xmm1, %edx
2156 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2158 jbe LABEL(strcmp_exitz)
2163 jmp LABEL(loop_ashr_15)
2166 LABEL(nibble_ashr_15):
2167 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2168 pmovmskb %xmm0, %edx
2170 jnz LABEL(ashr_15_exittail)
2172 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2174 jbe LABEL(ashr_15_exittail)
2179 jmp LABEL(gobble_ashr_15)
2182 LABEL(ashr_15_exittail):
2183 movdqa (%rsi, %rcx), %xmm1
2189 TOLOWER (%xmm1, %xmm3)
2190 pcmpeqb %xmm3, %xmm1
2192 pmovmskb %xmm1, %edx
2197 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2199 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2200 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2203 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2208 bsf %rdx, %rdx /* find and store bit index in %rdx */
2210 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2212 jbe LABEL(strcmp_exitz)
2214 movzbl (%rsi, %rdx), %ecx
2215 movzbl (%rdi, %rdx), %eax
2217 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2218 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2219 movl (%rdx,%rcx,4), %ecx
2220 movl (%rdx,%rax,4), %eax
2226 LABEL(strcmp_exitz):
2235 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2236 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2237 movl (%rdx,%rcx,4), %ecx
2238 movl (%rdx,%rax,4), %eax
2245 .section .rodata,"a",@progbits
2247 LABEL(unaligned_table):
2248 .int LABEL(ashr_1) - LABEL(unaligned_table)
2249 .int LABEL(ashr_2) - LABEL(unaligned_table)
2250 .int LABEL(ashr_3) - LABEL(unaligned_table)
2251 .int LABEL(ashr_4) - LABEL(unaligned_table)
2252 .int LABEL(ashr_5) - LABEL(unaligned_table)
2253 .int LABEL(ashr_6) - LABEL(unaligned_table)
2254 .int LABEL(ashr_7) - LABEL(unaligned_table)
2255 .int LABEL(ashr_8) - LABEL(unaligned_table)
2256 .int LABEL(ashr_9) - LABEL(unaligned_table)
2257 .int LABEL(ashr_10) - LABEL(unaligned_table)
2258 .int LABEL(ashr_11) - LABEL(unaligned_table)
2259 .int LABEL(ashr_12) - LABEL(unaligned_table)
2260 .int LABEL(ashr_13) - LABEL(unaligned_table)
2261 .int LABEL(ashr_14) - LABEL(unaligned_table)
2262 .int LABEL(ashr_15) - LABEL(unaligned_table)
2263 .int LABEL(ashr_0) - LABEL(unaligned_table)
2264 libc_hidden_builtin_def (STRCMP)