]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/strcmp.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86_64 / strcmp.S
CommitLineData
78df0fcb 1/* Highly optimized version for x86-64.
b168057a 2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
78df0fcb
AJ
3 This file is part of the GNU C Library.
4 Based on i686 version contributed by Ulrich Drepper
5 <drepper@cygnus.com>, 1999.
7956a3d2 6 Updated with SSE2 support contributed by Intel Corporation.
78df0fcb
AJ
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
78df0fcb
AJ
21
22#include <sysdep.h>
23#include "asm-syntax.h"
78df0fcb 24
7956a3d2
L
25#undef UPDATE_STRNCMP_COUNTER
26
27#ifndef LABEL
28#define LABEL(l) L(l)
29#endif
30
31#ifdef USE_AS_STRNCMP
f69190e7
UD
32/* The simplified code below is not set up to handle strncmp() so far.
33 Should this become necessary it has to be implemented. For now
34 just report the problem. */
4f41c682 35# if !IS_IN (libc)
f69190e7
UD
36# error "strncmp not implemented so far"
37# endif
38
7956a3d2
L
39/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
40 if the new counter > the old one or is 0. */
41# define UPDATE_STRNCMP_COUNTER \
42 /* calculate left number to compare */ \
43 lea -16(%rcx, %r11), %r9; \
44 cmp %r9, %r11; \
45 jb LABEL(strcmp_exitz); \
46 test %r9, %r9; \
47 je LABEL(strcmp_exitz); \
48 mov %r9, %r11
49
42e08a54
UD
50#elif defined USE_AS_STRCASECMP_L
51# include "locale-defines.h"
52
53/* No support for strcasecmp outside libc so far since it is not needed. */
4f41c682 54# if !IS_IN (libc)
42e08a54
UD
55# error "strcasecmp_l not implemented so far"
56# endif
57
58# define UPDATE_STRNCMP_COUNTER
e9f82e0d
UD
59#elif defined USE_AS_STRNCASECMP_L
60# include "locale-defines.h"
61
62/* No support for strncasecmp outside libc so far since it is not needed. */
4f41c682 63# if !IS_IN (libc)
e9f82e0d
UD
64# error "strncasecmp_l not implemented so far"
65# endif
66
67# define UPDATE_STRNCMP_COUNTER \
68 /* calculate left number to compare */ \
69 lea -16(%rcx, %r11), %r9; \
70 cmp %r9, %r11; \
71 jb LABEL(strcmp_exitz); \
72 test %r9, %r9; \
73 je LABEL(strcmp_exitz); \
74 mov %r9, %r11
7956a3d2
L
75#else
76# define UPDATE_STRNCMP_COUNTER
77# ifndef STRCMP
25244f17 78# define STRCMP strcmp
7956a3d2
L
79# endif
80#endif
81
0fda545d 82#ifndef USE_SSSE3
7956a3d2 83 .text
0fda545d 84#else
f69190e7 85 .section .text.ssse3,"ax",@progbits
0fda545d
UD
86#endif
87
42e08a54 88#ifdef USE_AS_STRCASECMP_L
73507d3a
UD
89# ifndef ENTRY2
90# define ENTRY2(name) ENTRY (name)
91# define END2(name) END (name)
73507d3a
UD
92# endif
93
e9f82e0d 94ENTRY2 (__strcasecmp)
42e08a54 95 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
3cb84561 96 mov %fs:(%rax),%RDX_LP
42e08a54 97
73507d3a 98 // XXX 5 byte should be before the function
42e08a54
UD
99 /* 5-byte NOP. */
100 .byte 0x0f,0x1f,0x44,0x00,0x00
73507d3a
UD
101END2 (__strcasecmp)
102# ifndef NO_NOLOCALE_ALIAS
42e08a54
UD
103weak_alias (__strcasecmp, strcasecmp)
104libc_hidden_def (__strcasecmp)
73507d3a 105# endif
42e08a54 106 /* FALLTHROUGH to strcasecmp_l. */
e9f82e0d
UD
107#elif defined USE_AS_STRNCASECMP_L
108# ifndef ENTRY2
109# define ENTRY2(name) ENTRY (name)
110# define END2(name) END (name)
111# endif
112
113ENTRY2 (__strncasecmp)
114 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
3cb84561 115 mov %fs:(%rax),%RCX_LP
e9f82e0d
UD
116
117 // XXX 5 byte should be before the function
118 /* 5-byte NOP. */
119 .byte 0x0f,0x1f,0x44,0x00,0x00
120END2 (__strncasecmp)
121# ifndef NO_NOLOCALE_ALIAS
122weak_alias (__strncasecmp, strncasecmp)
123libc_hidden_def (__strncasecmp)
124# endif
125 /* FALLTHROUGH to strncasecmp_l. */
42e08a54
UD
126#endif
127
29691210 128ENTRY (STRCMP)
4f41c682 129#if !IS_IN (libc)
7956a3d2 130/* Simple version since we can't use SSE registers in ld.so. */
78df0fcb
AJ
131L(oop): movb (%rdi), %al
132 cmpb (%rsi), %al
133 jne L(neq)
134 incq %rdi
135 incq %rsi
136 testb %al, %al
137 jnz L(oop)
138
ee618985 139 xorl %eax, %eax
78df0fcb
AJ
140 ret
141
142L(neq): movl $1, %eax
143 movl $-1, %ecx
144 cmovbl %ecx, %eax
145 ret
29691210 146END (STRCMP)
4f41c682 147#else /* !IS_IN (libc) */
42e08a54
UD
148# ifdef USE_AS_STRCASECMP_L
149 /* We have to fall back on the C implementation for locales
150 with encodings not matching ASCII for single bytes. */
151# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
3cb84561 152 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP
42e08a54 153# else
3cb84561 154 mov (%rdx), %RAX_LP
42e08a54 155# endif
34372fc6 156 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
42e08a54 157 jne __strcasecmp_l_nonascii
e9f82e0d
UD
158# elif defined USE_AS_STRNCASECMP_L
159 /* We have to fall back on the C implementation for locales
160 with encodings not matching ASCII for single bytes. */
161# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
3cb84561 162 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP
e9f82e0d 163# else
3cb84561 164 mov (%rcx), %RAX_LP
e9f82e0d 165# endif
34372fc6 166 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
e9f82e0d 167 jne __strncasecmp_l_nonascii
42e08a54
UD
168# endif
169
7956a3d2
L
170/*
171 * This implementation uses SSE to compare up to 16 bytes at a time.
172 */
e9f82e0d 173# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
174 test %rdx, %rdx
175 je LABEL(strcmp_exitz)
176 cmp $1, %rdx
177 je LABEL(Byte0)
178 mov %rdx, %r11
f69190e7 179# endif
7956a3d2
L
180 mov %esi, %ecx
181 mov %edi, %eax
182/* Use 64bit AND here to avoid long NOP padding. */
183 and $0x3f, %rcx /* rsi alignment in cache line */
184 and $0x3f, %rax /* rdi alignment in cache line */
e9f82e0d 185# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
42e08a54
UD
186 .section .rodata.cst16,"aM",@progbits,16
187 .align 16
188.Lbelowupper:
189 .quad 0x4040404040404040
190 .quad 0x4040404040404040
191.Ltopupper:
192 .quad 0x5b5b5b5b5b5b5b5b
193 .quad 0x5b5b5b5b5b5b5b5b
194.Ltouppermask:
195 .quad 0x2020202020202020
196 .quad 0x2020202020202020
197 .previous
198 movdqa .Lbelowupper(%rip), %xmm5
e9f82e0d 199# define UCLOW_reg %xmm5
42e08a54 200 movdqa .Ltopupper(%rip), %xmm6
e9f82e0d 201# define UCHIGH_reg %xmm6
42e08a54 202 movdqa .Ltouppermask(%rip), %xmm7
e9f82e0d 203# define LCQWORD_reg %xmm7
42e08a54 204# endif
7956a3d2
L
205 cmp $0x30, %ecx
206 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
207 cmp $0x30, %eax
208 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
209 movlpd (%rdi), %xmm1
210 movlpd (%rsi), %xmm2
211 movhpd 8(%rdi), %xmm1
212 movhpd 8(%rsi), %xmm2
e9f82e0d 213# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
42e08a54
UD
214# define TOLOWER(reg1, reg2) \
215 movdqa reg1, %xmm8; \
216 movdqa UCHIGH_reg, %xmm9; \
217 movdqa reg2, %xmm10; \
218 movdqa UCHIGH_reg, %xmm11; \
219 pcmpgtb UCLOW_reg, %xmm8; \
220 pcmpgtb reg1, %xmm9; \
221 pcmpgtb UCLOW_reg, %xmm10; \
222 pcmpgtb reg2, %xmm11; \
223 pand %xmm9, %xmm8; \
224 pand %xmm11, %xmm10; \
225 pand LCQWORD_reg, %xmm8; \
226 pand LCQWORD_reg, %xmm10; \
227 por %xmm8, reg1; \
228 por %xmm10, reg2
229 TOLOWER (%xmm1, %xmm2)
230# else
231# define TOLOWER(reg1, reg2)
232# endif
7956a3d2
L
233 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
234 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
235 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
236 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
237 pmovmskb %xmm1, %edx
238 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
239 jnz LABEL(less16bytes) /* If not, find different value or null char */
e9f82e0d 240# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
241 sub $16, %r11
242 jbe LABEL(strcmp_exitz) /* finish comparision */
f69190e7 243# endif
7956a3d2
L
244 add $16, %rsi /* prepare to search next 16 bytes */
245 add $16, %rdi /* prepare to search next 16 bytes */
246
247 /*
248 * Determine source and destination string offsets from 16-byte alignment.
249 * Use relative offset difference between the two to determine which case
250 * below to use.
251 */
252 .p2align 4
253LABEL(crosscache):
254 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
255 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
256 mov $0xffff, %edx /* for equivalent offset */
257 xor %r8d, %r8d
258 and $0xf, %ecx /* offset of rsi */
259 and $0xf, %eax /* offset of rdi */
260 cmp %eax, %ecx
261 je LABEL(ashr_0) /* rsi and rdi relative offset same */
262 ja LABEL(bigger)
263 mov %edx, %r8d /* r8d is offset flag for exit tail */
264 xchg %ecx, %eax
265 xchg %rsi, %rdi
266LABEL(bigger):
267 lea 15(%rax), %r9
268 sub %rcx, %r9
269 lea LABEL(unaligned_table)(%rip), %r10
270 movslq (%r10, %r9,4), %r9
271 lea (%r10, %r9), %r10
272 jmp *%r10 /* jump to corresponding case */
273
274/*
275 * The following cases will be handled by ashr_0
276 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
277 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
278 */
279 .p2align 4
280LABEL(ashr_0):
281
282 movdqa (%rsi), %xmm1
283 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
284 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
e9f82e0d 285# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
7956a3d2 286 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
42e08a54
UD
287# else
288 movdqa (%rdi), %xmm2
289 TOLOWER (%xmm1, %xmm2)
290 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
291# endif
7956a3d2
L
292 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
293 pmovmskb %xmm1, %r9d
294 shr %cl, %edx /* adjust 0xffff for offset */
295 shr %cl, %r9d /* adjust for 16-byte offset */
296 sub %r9d, %edx
297 /*
298 * edx must be the same with r9d if in left byte (16-rcx) is equal to
299 * the start from (16-rax) and no null char was seen.
300 */
301 jne LABEL(less32bytes) /* mismatch or null char */
302 UPDATE_STRNCMP_COUNTER
303 mov $16, %rcx
304 mov $16, %r9
305 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
306
307 /*
308 * Now both strings are aligned at 16-byte boundary. Loop over strings
309 * checking 32-bytes per iteration.
310 */
311 .p2align 4
312LABEL(loop_ashr_0):
313 movdqa (%rsi, %rcx), %xmm1
314 movdqa (%rdi, %rcx), %xmm2
42e08a54 315 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
316
317 pcmpeqb %xmm1, %xmm0
318 pcmpeqb %xmm2, %xmm1
319 psubb %xmm0, %xmm1
320 pmovmskb %xmm1, %edx
321 sub $0xffff, %edx
322 jnz LABEL(exit) /* mismatch or null char seen */
323
e9f82e0d 324# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
325 sub $16, %r11
326 jbe LABEL(strcmp_exitz)
f69190e7 327# endif
7956a3d2
L
328 add $16, %rcx
329 movdqa (%rsi, %rcx), %xmm1
330 movdqa (%rdi, %rcx), %xmm2
42e08a54 331 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
332
333 pcmpeqb %xmm1, %xmm0
334 pcmpeqb %xmm2, %xmm1
335 psubb %xmm0, %xmm1
336 pmovmskb %xmm1, %edx
337 sub $0xffff, %edx
338 jnz LABEL(exit)
e9f82e0d 339# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
340 sub $16, %r11
341 jbe LABEL(strcmp_exitz)
f69190e7 342# endif
7956a3d2
L
343 add $16, %rcx
344 jmp LABEL(loop_ashr_0)
345
346/*
347 * The following cases will be handled by ashr_1
42e08a54 348 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
7956a3d2
L
349 * n(15) n -15 0(15 +(n-15) - n) ashr_1
350 */
351 .p2align 4
352LABEL(ashr_1):
353 pxor %xmm0, %xmm0
354 movdqa (%rdi), %xmm2
355 movdqa (%rsi), %xmm1
356 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
357 pslldq $15, %xmm2 /* shift first string to align with second */
42e08a54 358 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
359 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
360 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
361 pmovmskb %xmm2, %r9d
362 shr %cl, %edx /* adjust 0xffff for offset */
363 shr %cl, %r9d /* adjust for 16-byte offset */
364 sub %r9d, %edx
365 jnz LABEL(less32bytes) /* mismatch or null char seen */
366 movdqa (%rdi), %xmm3
367 UPDATE_STRNCMP_COUNTER
368
369 pxor %xmm0, %xmm0
370 mov $16, %rcx /* index for loads*/
371 mov $1, %r9d /* byte position left over from less32bytes case */
372 /*
373 * Setup %r10 value allows us to detect crossing a page boundary.
374 * When %r10 goes positive we have crossed a page boundary and
375 * need to do a nibble.
376 */
377 lea 1(%rdi), %r10
378 and $0xfff, %r10 /* offset into 4K page */
379 sub $0x1000, %r10 /* subtract 4K pagesize */
380
381 .p2align 4
382LABEL(loop_ashr_1):
383 add $16, %r10
384 jg LABEL(nibble_ashr_1) /* cross page boundary */
385
386LABEL(gobble_ashr_1):
387 movdqa (%rsi, %rcx), %xmm1
388 movdqa (%rdi, %rcx), %xmm2
389 movdqa %xmm2, %xmm4 /* store for next cycle */
390
f69190e7 391# ifndef USE_SSSE3
7956a3d2
L
392 psrldq $1, %xmm3
393 pslldq $15, %xmm2
394 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 395# else
0fda545d 396 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 397# endif
42e08a54 398 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
399
400 pcmpeqb %xmm1, %xmm0
401 pcmpeqb %xmm2, %xmm1
402 psubb %xmm0, %xmm1
403 pmovmskb %xmm1, %edx
404 sub $0xffff, %edx
405 jnz LABEL(exit)
406
e9f82e0d 407# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
408 sub $16, %r11
409 jbe LABEL(strcmp_exitz)
f69190e7 410# endif
7956a3d2
L
411 add $16, %rcx
412 movdqa %xmm4, %xmm3
413
414 add $16, %r10
415 jg LABEL(nibble_ashr_1) /* cross page boundary */
416
417 movdqa (%rsi, %rcx), %xmm1
418 movdqa (%rdi, %rcx), %xmm2
419 movdqa %xmm2, %xmm4 /* store for next cycle */
420
f69190e7 421# ifndef USE_SSSE3
7956a3d2 422 psrldq $1, %xmm3
0fda545d 423 pslldq $15, %xmm2
7956a3d2 424 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 425# else
0fda545d 426 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 427# endif
42e08a54 428 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
429
430 pcmpeqb %xmm1, %xmm0
431 pcmpeqb %xmm2, %xmm1
432 psubb %xmm0, %xmm1
433 pmovmskb %xmm1, %edx
434 sub $0xffff, %edx
435 jnz LABEL(exit)
436
e9f82e0d 437# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
438 sub $16, %r11
439 jbe LABEL(strcmp_exitz)
f69190e7 440# endif
7956a3d2
L
441 add $16, %rcx
442 movdqa %xmm4, %xmm3
443 jmp LABEL(loop_ashr_1)
444
445 /*
446 * Nibble avoids loads across page boundary. This is to avoid a potential
447 * access into unmapped memory.
448 */
449 .p2align 4
450LABEL(nibble_ashr_1):
451 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
452 pmovmskb %xmm0, %edx
453 test $0xfffe, %edx
454 jnz LABEL(ashr_1_exittail) /* find null char*/
455
e9f82e0d 456# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 457 cmp $15, %r11
7956a3d2 458 jbe LABEL(ashr_1_exittail)
f69190e7 459# endif
7956a3d2
L
460
461 pxor %xmm0, %xmm0
462 sub $0x1000, %r10 /* substract 4K from %r10 */
463 jmp LABEL(gobble_ashr_1)
464
465 /*
466 * Once find null char, determine if there is a string mismatch
467 * before the null char.
468 */
469 .p2align 4
470LABEL(ashr_1_exittail):
471 movdqa (%rsi, %rcx), %xmm1
472 psrldq $1, %xmm0
473 psrldq $1, %xmm3
474 jmp LABEL(aftertail)
475
476/*
477 * The following cases will be handled by ashr_2
f69190e7 478 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
7956a3d2
L
479 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
480 */
481 .p2align 4
482LABEL(ashr_2):
483 pxor %xmm0, %xmm0
484 movdqa (%rdi), %xmm2
485 movdqa (%rsi), %xmm1
486 pcmpeqb %xmm1, %xmm0
487 pslldq $14, %xmm2
42e08a54 488 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
489 pcmpeqb %xmm1, %xmm2
490 psubb %xmm0, %xmm2
491 pmovmskb %xmm2, %r9d
492 shr %cl, %edx
493 shr %cl, %r9d
494 sub %r9d, %edx
495 jnz LABEL(less32bytes)
496 movdqa (%rdi), %xmm3
497 UPDATE_STRNCMP_COUNTER
498
499 pxor %xmm0, %xmm0
500 mov $16, %rcx /* index for loads */
501 mov $2, %r9d /* byte position left over from less32bytes case */
502 /*
503 * Setup %r10 value allows us to detect crossing a page boundary.
504 * When %r10 goes positive we have crossed a page boundary and
505 * need to do a nibble.
506 */
507 lea 2(%rdi), %r10
508 and $0xfff, %r10 /* offset into 4K page */
509 sub $0x1000, %r10 /* subtract 4K pagesize */
510
511 .p2align 4
512LABEL(loop_ashr_2):
513 add $16, %r10
514 jg LABEL(nibble_ashr_2)
515
516LABEL(gobble_ashr_2):
517 movdqa (%rsi, %rcx), %xmm1
518 movdqa (%rdi, %rcx), %xmm2
519 movdqa %xmm2, %xmm4
520
f69190e7 521# ifndef USE_SSSE3
7956a3d2
L
522 psrldq $2, %xmm3
523 pslldq $14, %xmm2
0fda545d 524 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 525# else
0fda545d 526 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 527# endif
42e08a54 528 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
529
530 pcmpeqb %xmm1, %xmm0
531 pcmpeqb %xmm2, %xmm1
532 psubb %xmm0, %xmm1
533 pmovmskb %xmm1, %edx
534 sub $0xffff, %edx
535 jnz LABEL(exit)
536
e9f82e0d 537# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
538 sub $16, %r11
539 jbe LABEL(strcmp_exitz)
f69190e7 540# endif
7956a3d2
L
541
542 add $16, %rcx
543 movdqa %xmm4, %xmm3
544
545 add $16, %r10
546 jg LABEL(nibble_ashr_2) /* cross page boundary */
547
548 movdqa (%rsi, %rcx), %xmm1
549 movdqa (%rdi, %rcx), %xmm2
550 movdqa %xmm2, %xmm4
551
f69190e7 552# ifndef USE_SSSE3
7956a3d2 553 psrldq $2, %xmm3
0fda545d
UD
554 pslldq $14, %xmm2
555 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 556# else
0fda545d 557 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 558# endif
42e08a54 559 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
560
561 pcmpeqb %xmm1, %xmm0
562 pcmpeqb %xmm2, %xmm1
563 psubb %xmm0, %xmm1
564 pmovmskb %xmm1, %edx
565 sub $0xffff, %edx
566 jnz LABEL(exit)
567
e9f82e0d 568# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
569 sub $16, %r11
570 jbe LABEL(strcmp_exitz)
f69190e7 571# endif
7956a3d2
L
572
573 add $16, %rcx
574 movdqa %xmm4, %xmm3
575 jmp LABEL(loop_ashr_2)
576
577 .p2align 4
578LABEL(nibble_ashr_2):
579 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
580 pmovmskb %xmm0, %edx
581 test $0xfffc, %edx
582 jnz LABEL(ashr_2_exittail)
583
e9f82e0d 584# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 585 cmp $14, %r11
7956a3d2 586 jbe LABEL(ashr_2_exittail)
f69190e7 587# endif
7956a3d2
L
588
589 pxor %xmm0, %xmm0
590 sub $0x1000, %r10
591 jmp LABEL(gobble_ashr_2)
592
593 .p2align 4
594LABEL(ashr_2_exittail):
595 movdqa (%rsi, %rcx), %xmm1
596 psrldq $2, %xmm0
597 psrldq $2, %xmm3
598 jmp LABEL(aftertail)
599
600/*
601 * The following cases will be handled by ashr_3
602 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
603 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
604 */
605 .p2align 4
606LABEL(ashr_3):
607 pxor %xmm0, %xmm0
608 movdqa (%rdi), %xmm2
609 movdqa (%rsi), %xmm1
610 pcmpeqb %xmm1, %xmm0
611 pslldq $13, %xmm2
42e08a54 612 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
613 pcmpeqb %xmm1, %xmm2
614 psubb %xmm0, %xmm2
615 pmovmskb %xmm2, %r9d
616 shr %cl, %edx
617 shr %cl, %r9d
618 sub %r9d, %edx
619 jnz LABEL(less32bytes)
620 movdqa (%rdi), %xmm3
621
622 UPDATE_STRNCMP_COUNTER
623
624 pxor %xmm0, %xmm0
625 mov $16, %rcx /* index for loads */
626 mov $3, %r9d /* byte position left over from less32bytes case */
627 /*
628 * Setup %r10 value allows us to detect crossing a page boundary.
629 * When %r10 goes positive we have crossed a page boundary and
630 * need to do a nibble.
631 */
632 lea 3(%rdi), %r10
633 and $0xfff, %r10 /* offset into 4K page */
634 sub $0x1000, %r10 /* subtract 4K pagesize */
635
636 .p2align 4
637LABEL(loop_ashr_3):
638 add $16, %r10
639 jg LABEL(nibble_ashr_3)
640
641LABEL(gobble_ashr_3):
642 movdqa (%rsi, %rcx), %xmm1
643 movdqa (%rdi, %rcx), %xmm2
644 movdqa %xmm2, %xmm4
645
f69190e7 646# ifndef USE_SSSE3
7956a3d2
L
647 psrldq $3, %xmm3
648 pslldq $13, %xmm2
0fda545d 649 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 650# else
0fda545d 651 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 652# endif
42e08a54 653 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
654
655 pcmpeqb %xmm1, %xmm0
656 pcmpeqb %xmm2, %xmm1
657 psubb %xmm0, %xmm1
658 pmovmskb %xmm1, %edx
659 sub $0xffff, %edx
660 jnz LABEL(exit)
661
e9f82e0d 662# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
663 sub $16, %r11
664 jbe LABEL(strcmp_exitz)
f69190e7 665# endif
7956a3d2
L
666
667 add $16, %rcx
668 movdqa %xmm4, %xmm3
669
670 add $16, %r10
671 jg LABEL(nibble_ashr_3) /* cross page boundary */
672
673 movdqa (%rsi, %rcx), %xmm1
674 movdqa (%rdi, %rcx), %xmm2
675 movdqa %xmm2, %xmm4
676
f69190e7 677# ifndef USE_SSSE3
7956a3d2 678 psrldq $3, %xmm3
0fda545d
UD
679 pslldq $13, %xmm2
680 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 681# else
0fda545d 682 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 683# endif
42e08a54 684 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
685
686 pcmpeqb %xmm1, %xmm0
687 pcmpeqb %xmm2, %xmm1
688 psubb %xmm0, %xmm1
689 pmovmskb %xmm1, %edx
690 sub $0xffff, %edx
691 jnz LABEL(exit)
692
e9f82e0d 693# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
694 sub $16, %r11
695 jbe LABEL(strcmp_exitz)
f69190e7 696# endif
7956a3d2
L
697
698 add $16, %rcx
699 movdqa %xmm4, %xmm3
700 jmp LABEL(loop_ashr_3)
701
702 .p2align 4
703LABEL(nibble_ashr_3):
704 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
705 pmovmskb %xmm0, %edx
706 test $0xfff8, %edx
707 jnz LABEL(ashr_3_exittail)
708
e9f82e0d 709# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 710 cmp $13, %r11
7956a3d2 711 jbe LABEL(ashr_3_exittail)
f69190e7 712# endif
7956a3d2
L
713
714 pxor %xmm0, %xmm0
715 sub $0x1000, %r10
716 jmp LABEL(gobble_ashr_3)
717
718 .p2align 4
719LABEL(ashr_3_exittail):
720 movdqa (%rsi, %rcx), %xmm1
721 psrldq $3, %xmm0
722 psrldq $3, %xmm3
723 jmp LABEL(aftertail)
724
725/*
726 * The following cases will be handled by ashr_4
727 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
728 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
729 */
730 .p2align 4
731LABEL(ashr_4):
732 pxor %xmm0, %xmm0
733 movdqa (%rdi), %xmm2
734 movdqa (%rsi), %xmm1
735 pcmpeqb %xmm1, %xmm0
736 pslldq $12, %xmm2
42e08a54 737 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
738 pcmpeqb %xmm1, %xmm2
739 psubb %xmm0, %xmm2
740 pmovmskb %xmm2, %r9d
741 shr %cl, %edx
742 shr %cl, %r9d
743 sub %r9d, %edx
744 jnz LABEL(less32bytes)
745 movdqa (%rdi), %xmm3
746
747 UPDATE_STRNCMP_COUNTER
748
749 pxor %xmm0, %xmm0
750 mov $16, %rcx /* index for loads */
751 mov $4, %r9d /* byte position left over from less32bytes case */
752 /*
753 * Setup %r10 value allows us to detect crossing a page boundary.
754 * When %r10 goes positive we have crossed a page boundary and
755 * need to do a nibble.
756 */
757 lea 4(%rdi), %r10
758 and $0xfff, %r10 /* offset into 4K page */
759 sub $0x1000, %r10 /* subtract 4K pagesize */
760
761 .p2align 4
762LABEL(loop_ashr_4):
763 add $16, %r10
764 jg LABEL(nibble_ashr_4)
765
766LABEL(gobble_ashr_4):
767 movdqa (%rsi, %rcx), %xmm1
768 movdqa (%rdi, %rcx), %xmm2
769 movdqa %xmm2, %xmm4
770
f69190e7 771# ifndef USE_SSSE3
7956a3d2
L
772 psrldq $4, %xmm3
773 pslldq $12, %xmm2
0fda545d 774 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 775# else
0fda545d 776 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 777# endif
42e08a54 778 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
779
780 pcmpeqb %xmm1, %xmm0
781 pcmpeqb %xmm2, %xmm1
782 psubb %xmm0, %xmm1
783 pmovmskb %xmm1, %edx
784 sub $0xffff, %edx
785 jnz LABEL(exit)
786
e9f82e0d 787# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
788 sub $16, %r11
789 jbe LABEL(strcmp_exitz)
f69190e7 790# endif
7956a3d2
L
791
792 add $16, %rcx
793 movdqa %xmm4, %xmm3
794
795 add $16, %r10
796 jg LABEL(nibble_ashr_4) /* cross page boundary */
797
798 movdqa (%rsi, %rcx), %xmm1
799 movdqa (%rdi, %rcx), %xmm2
800 movdqa %xmm2, %xmm4
801
f69190e7 802# ifndef USE_SSSE3
7956a3d2 803 psrldq $4, %xmm3
0fda545d
UD
804 pslldq $12, %xmm2
805 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 806# else
0fda545d 807 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 808# endif
42e08a54 809 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
810
811 pcmpeqb %xmm1, %xmm0
812 pcmpeqb %xmm2, %xmm1
813 psubb %xmm0, %xmm1
814 pmovmskb %xmm1, %edx
815 sub $0xffff, %edx
816 jnz LABEL(exit)
817
e9f82e0d 818# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
819 sub $16, %r11
820 jbe LABEL(strcmp_exitz)
f69190e7 821# endif
7956a3d2
L
822
823 add $16, %rcx
824 movdqa %xmm4, %xmm3
825 jmp LABEL(loop_ashr_4)
826
827 .p2align 4
828LABEL(nibble_ashr_4):
829 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
830 pmovmskb %xmm0, %edx
831 test $0xfff0, %edx
832 jnz LABEL(ashr_4_exittail)
833
e9f82e0d 834# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 835 cmp $12, %r11
7956a3d2 836 jbe LABEL(ashr_4_exittail)
f69190e7 837# endif
7956a3d2
L
838
839 pxor %xmm0, %xmm0
840 sub $0x1000, %r10
841 jmp LABEL(gobble_ashr_4)
842
843 .p2align 4
844LABEL(ashr_4_exittail):
845 movdqa (%rsi, %rcx), %xmm1
846 psrldq $4, %xmm0
847 psrldq $4, %xmm3
848 jmp LABEL(aftertail)
849
850/*
851 * The following cases will be handled by ashr_5
852 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 853 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
7956a3d2
L
854 */
855 .p2align 4
856LABEL(ashr_5):
857 pxor %xmm0, %xmm0
858 movdqa (%rdi), %xmm2
859 movdqa (%rsi), %xmm1
860 pcmpeqb %xmm1, %xmm0
861 pslldq $11, %xmm2
42e08a54 862 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
863 pcmpeqb %xmm1, %xmm2
864 psubb %xmm0, %xmm2
865 pmovmskb %xmm2, %r9d
866 shr %cl, %edx
867 shr %cl, %r9d
868 sub %r9d, %edx
869 jnz LABEL(less32bytes)
870 movdqa (%rdi), %xmm3
871
872 UPDATE_STRNCMP_COUNTER
873
874 pxor %xmm0, %xmm0
875 mov $16, %rcx /* index for loads */
876 mov $5, %r9d /* byte position left over from less32bytes case */
877 /*
878 * Setup %r10 value allows us to detect crossing a page boundary.
879 * When %r10 goes positive we have crossed a page boundary and
880 * need to do a nibble.
881 */
882 lea 5(%rdi), %r10
883 and $0xfff, %r10 /* offset into 4K page */
884 sub $0x1000, %r10 /* subtract 4K pagesize */
885
886 .p2align 4
887LABEL(loop_ashr_5):
888 add $16, %r10
889 jg LABEL(nibble_ashr_5)
890
891LABEL(gobble_ashr_5):
892 movdqa (%rsi, %rcx), %xmm1
893 movdqa (%rdi, %rcx), %xmm2
894 movdqa %xmm2, %xmm4
895
f69190e7 896# ifndef USE_SSSE3
7956a3d2
L
897 psrldq $5, %xmm3
898 pslldq $11, %xmm2
0fda545d 899 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 900# else
0fda545d 901 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 902# endif
42e08a54 903 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
904
905 pcmpeqb %xmm1, %xmm0
906 pcmpeqb %xmm2, %xmm1
907 psubb %xmm0, %xmm1
908 pmovmskb %xmm1, %edx
909 sub $0xffff, %edx
910 jnz LABEL(exit)
911
8ffcee4a 912# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
913 sub $16, %r11
914 jbe LABEL(strcmp_exitz)
f69190e7 915# endif
7956a3d2
L
916
917 add $16, %rcx
918 movdqa %xmm4, %xmm3
919
920 add $16, %r10
921 jg LABEL(nibble_ashr_5) /* cross page boundary */
922
923 movdqa (%rsi, %rcx), %xmm1
924 movdqa (%rdi, %rcx), %xmm2
925 movdqa %xmm2, %xmm4
926
f69190e7 927# ifndef USE_SSSE3
7956a3d2 928 psrldq $5, %xmm3
0fda545d
UD
929 pslldq $11, %xmm2
930 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 931# else
0fda545d 932 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 933# endif
42e08a54 934 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
935
936 pcmpeqb %xmm1, %xmm0
937 pcmpeqb %xmm2, %xmm1
938 psubb %xmm0, %xmm1
939 pmovmskb %xmm1, %edx
940 sub $0xffff, %edx
941 jnz LABEL(exit)
942
e9f82e0d 943# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
944 sub $16, %r11
945 jbe LABEL(strcmp_exitz)
f69190e7 946# endif
7956a3d2
L
947
948 add $16, %rcx
949 movdqa %xmm4, %xmm3
950 jmp LABEL(loop_ashr_5)
951
952 .p2align 4
953LABEL(nibble_ashr_5):
954 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
955 pmovmskb %xmm0, %edx
956 test $0xffe0, %edx
957 jnz LABEL(ashr_5_exittail)
958
e9f82e0d 959# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 960 cmp $11, %r11
7956a3d2 961 jbe LABEL(ashr_5_exittail)
f69190e7 962# endif
7956a3d2
L
963
964 pxor %xmm0, %xmm0
965 sub $0x1000, %r10
966 jmp LABEL(gobble_ashr_5)
967
968 .p2align 4
969LABEL(ashr_5_exittail):
970 movdqa (%rsi, %rcx), %xmm1
971 psrldq $5, %xmm0
972 psrldq $5, %xmm3
973 jmp LABEL(aftertail)
974
975/*
976 * The following cases will be handled by ashr_6
977 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 978 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
7956a3d2
L
979 */
980 .p2align 4
981LABEL(ashr_6):
982 pxor %xmm0, %xmm0
983 movdqa (%rdi), %xmm2
984 movdqa (%rsi), %xmm1
985 pcmpeqb %xmm1, %xmm0
986 pslldq $10, %xmm2
42e08a54 987 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
988 pcmpeqb %xmm1, %xmm2
989 psubb %xmm0, %xmm2
990 pmovmskb %xmm2, %r9d
991 shr %cl, %edx
992 shr %cl, %r9d
993 sub %r9d, %edx
994 jnz LABEL(less32bytes)
995 movdqa (%rdi), %xmm3
996
997 UPDATE_STRNCMP_COUNTER
998
999 pxor %xmm0, %xmm0
1000 mov $16, %rcx /* index for loads */
1001 mov $6, %r9d /* byte position left over from less32bytes case */
1002 /*
1003 * Setup %r10 value allows us to detect crossing a page boundary.
1004 * When %r10 goes positive we have crossed a page boundary and
1005 * need to do a nibble.
1006 */
1007 lea 6(%rdi), %r10
1008 and $0xfff, %r10 /* offset into 4K page */
1009 sub $0x1000, %r10 /* subtract 4K pagesize */
1010
1011 .p2align 4
1012LABEL(loop_ashr_6):
1013 add $16, %r10
1014 jg LABEL(nibble_ashr_6)
1015
1016LABEL(gobble_ashr_6):
1017 movdqa (%rsi, %rcx), %xmm1
1018 movdqa (%rdi, %rcx), %xmm2
1019 movdqa %xmm2, %xmm4
1020
f69190e7 1021# ifndef USE_SSSE3
7956a3d2
L
1022 psrldq $6, %xmm3
1023 pslldq $10, %xmm2
0fda545d 1024 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1025# else
0fda545d 1026 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1027# endif
42e08a54 1028 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1029
1030 pcmpeqb %xmm1, %xmm0
1031 pcmpeqb %xmm2, %xmm1
1032 psubb %xmm0, %xmm1
1033 pmovmskb %xmm1, %edx
1034 sub $0xffff, %edx
1035 jnz LABEL(exit)
1036
e9f82e0d 1037# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1038 sub $16, %r11
1039 jbe LABEL(strcmp_exitz)
f69190e7 1040# endif
7956a3d2
L
1041
1042 add $16, %rcx
1043 movdqa %xmm4, %xmm3
1044
1045 add $16, %r10
1046 jg LABEL(nibble_ashr_6) /* cross page boundary */
1047
1048 movdqa (%rsi, %rcx), %xmm1
1049 movdqa (%rdi, %rcx), %xmm2
1050 movdqa %xmm2, %xmm4
1051
f69190e7 1052# ifndef USE_SSSE3
7956a3d2 1053 psrldq $6, %xmm3
0fda545d
UD
1054 pslldq $10, %xmm2
1055 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1056# else
0fda545d 1057 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1058# endif
42e08a54 1059 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1060
1061 pcmpeqb %xmm1, %xmm0
1062 pcmpeqb %xmm2, %xmm1
1063 psubb %xmm0, %xmm1
1064 pmovmskb %xmm1, %edx
1065 sub $0xffff, %edx
1066 jnz LABEL(exit)
1067
e9f82e0d 1068# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1069 sub $16, %r11
1070 jbe LABEL(strcmp_exitz)
f69190e7 1071# endif
7956a3d2
L
1072
1073 add $16, %rcx
1074 movdqa %xmm4, %xmm3
1075 jmp LABEL(loop_ashr_6)
1076
1077 .p2align 4
1078LABEL(nibble_ashr_6):
1079 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1080 pmovmskb %xmm0, %edx
1081 test $0xffc0, %edx
1082 jnz LABEL(ashr_6_exittail)
1083
e9f82e0d 1084# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1085 cmp $10, %r11
7956a3d2 1086 jbe LABEL(ashr_6_exittail)
f69190e7 1087# endif
7956a3d2
L
1088
1089 pxor %xmm0, %xmm0
1090 sub $0x1000, %r10
1091 jmp LABEL(gobble_ashr_6)
1092
1093 .p2align 4
1094LABEL(ashr_6_exittail):
1095 movdqa (%rsi, %rcx), %xmm1
1096 psrldq $6, %xmm0
1097 psrldq $6, %xmm3
1098 jmp LABEL(aftertail)
1099
1100/*
1101 * The following cases will be handled by ashr_7
1102 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1103 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
7956a3d2
L
1104 */
1105 .p2align 4
1106LABEL(ashr_7):
1107 pxor %xmm0, %xmm0
1108 movdqa (%rdi), %xmm2
1109 movdqa (%rsi), %xmm1
1110 pcmpeqb %xmm1, %xmm0
1111 pslldq $9, %xmm2
42e08a54 1112 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1113 pcmpeqb %xmm1, %xmm2
1114 psubb %xmm0, %xmm2
1115 pmovmskb %xmm2, %r9d
1116 shr %cl, %edx
1117 shr %cl, %r9d
1118 sub %r9d, %edx
1119 jnz LABEL(less32bytes)
1120 movdqa (%rdi), %xmm3
1121
1122 UPDATE_STRNCMP_COUNTER
1123
1124 pxor %xmm0, %xmm0
1125 mov $16, %rcx /* index for loads */
1126 mov $7, %r9d /* byte position left over from less32bytes case */
1127 /*
1128 * Setup %r10 value allows us to detect crossing a page boundary.
1129 * When %r10 goes positive we have crossed a page boundary and
1130 * need to do a nibble.
1131 */
1132 lea 7(%rdi), %r10
1133 and $0xfff, %r10 /* offset into 4K page */
1134 sub $0x1000, %r10 /* subtract 4K pagesize */
1135
1136 .p2align 4
1137LABEL(loop_ashr_7):
1138 add $16, %r10
1139 jg LABEL(nibble_ashr_7)
1140
1141LABEL(gobble_ashr_7):
1142 movdqa (%rsi, %rcx), %xmm1
1143 movdqa (%rdi, %rcx), %xmm2
1144 movdqa %xmm2, %xmm4
1145
f69190e7 1146# ifndef USE_SSSE3
7956a3d2
L
1147 psrldq $7, %xmm3
1148 pslldq $9, %xmm2
0fda545d 1149 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1150# else
0fda545d 1151 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1152# endif
42e08a54 1153 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1154
1155 pcmpeqb %xmm1, %xmm0
1156 pcmpeqb %xmm2, %xmm1
1157 psubb %xmm0, %xmm1
1158 pmovmskb %xmm1, %edx
1159 sub $0xffff, %edx
1160 jnz LABEL(exit)
1161
e9f82e0d 1162# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1163 sub $16, %r11
1164 jbe LABEL(strcmp_exitz)
f69190e7 1165# endif
7956a3d2
L
1166
1167 add $16, %rcx
1168 movdqa %xmm4, %xmm3
1169
1170 add $16, %r10
1171 jg LABEL(nibble_ashr_7) /* cross page boundary */
1172
1173 movdqa (%rsi, %rcx), %xmm1
1174 movdqa (%rdi, %rcx), %xmm2
1175 movdqa %xmm2, %xmm4
1176
f69190e7 1177# ifndef USE_SSSE3
7956a3d2 1178 psrldq $7, %xmm3
0fda545d
UD
1179 pslldq $9, %xmm2
1180 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1181# else
0fda545d 1182 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1183# endif
42e08a54 1184 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1185
1186 pcmpeqb %xmm1, %xmm0
1187 pcmpeqb %xmm2, %xmm1
1188 psubb %xmm0, %xmm1
1189 pmovmskb %xmm1, %edx
1190 sub $0xffff, %edx
1191 jnz LABEL(exit)
1192
e9f82e0d 1193# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1194 sub $16, %r11
1195 jbe LABEL(strcmp_exitz)
f69190e7 1196# endif
7956a3d2
L
1197
1198 add $16, %rcx
1199 movdqa %xmm4, %xmm3
1200 jmp LABEL(loop_ashr_7)
1201
1202 .p2align 4
1203LABEL(nibble_ashr_7):
1204 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1205 pmovmskb %xmm0, %edx
1206 test $0xff80, %edx
1207 jnz LABEL(ashr_7_exittail)
1208
e9f82e0d 1209# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1210 cmp $9, %r11
7956a3d2 1211 jbe LABEL(ashr_7_exittail)
f69190e7 1212# endif
7956a3d2
L
1213
1214 pxor %xmm0, %xmm0
1215 sub $0x1000, %r10
1216 jmp LABEL(gobble_ashr_7)
1217
1218 .p2align 4
1219LABEL(ashr_7_exittail):
1220 movdqa (%rsi, %rcx), %xmm1
1221 psrldq $7, %xmm0
1222 psrldq $7, %xmm3
1223 jmp LABEL(aftertail)
1224
1225/*
1226 * The following cases will be handled by ashr_8
1227 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1228 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
7956a3d2
L
1229 */
1230 .p2align 4
1231LABEL(ashr_8):
1232 pxor %xmm0, %xmm0
1233 movdqa (%rdi), %xmm2
1234 movdqa (%rsi), %xmm1
1235 pcmpeqb %xmm1, %xmm0
1236 pslldq $8, %xmm2
42e08a54 1237 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1238 pcmpeqb %xmm1, %xmm2
1239 psubb %xmm0, %xmm2
1240 pmovmskb %xmm2, %r9d
1241 shr %cl, %edx
1242 shr %cl, %r9d
1243 sub %r9d, %edx
1244 jnz LABEL(less32bytes)
1245 movdqa (%rdi), %xmm3
1246
1247 UPDATE_STRNCMP_COUNTER
1248
1249 pxor %xmm0, %xmm0
1250 mov $16, %rcx /* index for loads */
1251 mov $8, %r9d /* byte position left over from less32bytes case */
1252 /*
1253 * Setup %r10 value allows us to detect crossing a page boundary.
1254 * When %r10 goes positive we have crossed a page boundary and
1255 * need to do a nibble.
1256 */
1257 lea 8(%rdi), %r10
1258 and $0xfff, %r10 /* offset into 4K page */
1259 sub $0x1000, %r10 /* subtract 4K pagesize */
1260
1261 .p2align 4
1262LABEL(loop_ashr_8):
1263 add $16, %r10
1264 jg LABEL(nibble_ashr_8)
1265
1266LABEL(gobble_ashr_8):
1267 movdqa (%rsi, %rcx), %xmm1
1268 movdqa (%rdi, %rcx), %xmm2
1269 movdqa %xmm2, %xmm4
1270
f69190e7 1271# ifndef USE_SSSE3
7956a3d2
L
1272 psrldq $8, %xmm3
1273 pslldq $8, %xmm2
0fda545d 1274 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1275# else
0fda545d 1276 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1277# endif
42e08a54 1278 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1279
1280 pcmpeqb %xmm1, %xmm0
1281 pcmpeqb %xmm2, %xmm1
1282 psubb %xmm0, %xmm1
1283 pmovmskb %xmm1, %edx
1284 sub $0xffff, %edx
1285 jnz LABEL(exit)
1286
e9f82e0d 1287# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1288 sub $16, %r11
1289 jbe LABEL(strcmp_exitz)
f69190e7 1290# endif
7956a3d2
L
1291
1292 add $16, %rcx
1293 movdqa %xmm4, %xmm3
1294
1295 add $16, %r10
1296 jg LABEL(nibble_ashr_8) /* cross page boundary */
1297
1298 movdqa (%rsi, %rcx), %xmm1
1299 movdqa (%rdi, %rcx), %xmm2
1300 movdqa %xmm2, %xmm4
1301
f69190e7 1302# ifndef USE_SSSE3
7956a3d2 1303 psrldq $8, %xmm3
0fda545d
UD
1304 pslldq $8, %xmm2
1305 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1306# else
0fda545d 1307 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1308# endif
42e08a54 1309 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1310
1311 pcmpeqb %xmm1, %xmm0
1312 pcmpeqb %xmm2, %xmm1
1313 psubb %xmm0, %xmm1
1314 pmovmskb %xmm1, %edx
1315 sub $0xffff, %edx
1316 jnz LABEL(exit)
1317
e9f82e0d 1318# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1319 sub $16, %r11
1320 jbe LABEL(strcmp_exitz)
f69190e7 1321# endif
7956a3d2
L
1322
1323 add $16, %rcx
1324 movdqa %xmm4, %xmm3
1325 jmp LABEL(loop_ashr_8)
1326
1327 .p2align 4
1328LABEL(nibble_ashr_8):
1329 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1330 pmovmskb %xmm0, %edx
1331 test $0xff00, %edx
1332 jnz LABEL(ashr_8_exittail)
1333
e9f82e0d 1334# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1335 cmp $8, %r11
7956a3d2 1336 jbe LABEL(ashr_8_exittail)
f69190e7 1337# endif
7956a3d2
L
1338
1339 pxor %xmm0, %xmm0
1340 sub $0x1000, %r10
1341 jmp LABEL(gobble_ashr_8)
1342
1343 .p2align 4
1344LABEL(ashr_8_exittail):
1345 movdqa (%rsi, %rcx), %xmm1
1346 psrldq $8, %xmm0
1347 psrldq $8, %xmm3
1348 jmp LABEL(aftertail)
1349
1350/*
1351 * The following cases will be handled by ashr_9
1352 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1353 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
7956a3d2
L
1354 */
1355 .p2align 4
1356LABEL(ashr_9):
1357 pxor %xmm0, %xmm0
1358 movdqa (%rdi), %xmm2
1359 movdqa (%rsi), %xmm1
1360 pcmpeqb %xmm1, %xmm0
1361 pslldq $7, %xmm2
42e08a54 1362 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1363 pcmpeqb %xmm1, %xmm2
1364 psubb %xmm0, %xmm2
1365 pmovmskb %xmm2, %r9d
1366 shr %cl, %edx
1367 shr %cl, %r9d
1368 sub %r9d, %edx
1369 jnz LABEL(less32bytes)
1370 movdqa (%rdi), %xmm3
1371
1372 UPDATE_STRNCMP_COUNTER
1373
1374 pxor %xmm0, %xmm0
1375 mov $16, %rcx /* index for loads */
1376 mov $9, %r9d /* byte position left over from less32bytes case */
1377 /*
1378 * Setup %r10 value allows us to detect crossing a page boundary.
1379 * When %r10 goes positive we have crossed a page boundary and
1380 * need to do a nibble.
1381 */
1382 lea 9(%rdi), %r10
1383 and $0xfff, %r10 /* offset into 4K page */
1384 sub $0x1000, %r10 /* subtract 4K pagesize */
1385
1386 .p2align 4
1387LABEL(loop_ashr_9):
1388 add $16, %r10
1389 jg LABEL(nibble_ashr_9)
1390
1391LABEL(gobble_ashr_9):
1392 movdqa (%rsi, %rcx), %xmm1
1393 movdqa (%rdi, %rcx), %xmm2
1394 movdqa %xmm2, %xmm4
1395
f69190e7 1396# ifndef USE_SSSE3
7956a3d2
L
1397 psrldq $9, %xmm3
1398 pslldq $7, %xmm2
0fda545d 1399 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1400# else
0fda545d 1401 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1402# endif
42e08a54 1403 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1404
1405 pcmpeqb %xmm1, %xmm0
1406 pcmpeqb %xmm2, %xmm1
1407 psubb %xmm0, %xmm1
1408 pmovmskb %xmm1, %edx
1409 sub $0xffff, %edx
1410 jnz LABEL(exit)
1411
e9f82e0d 1412# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1413 sub $16, %r11
1414 jbe LABEL(strcmp_exitz)
f69190e7 1415# endif
7956a3d2
L
1416
1417 add $16, %rcx
1418 movdqa %xmm4, %xmm3
1419
1420 add $16, %r10
1421 jg LABEL(nibble_ashr_9) /* cross page boundary */
1422
1423 movdqa (%rsi, %rcx), %xmm1
1424 movdqa (%rdi, %rcx), %xmm2
1425 movdqa %xmm2, %xmm4
1426
f69190e7 1427# ifndef USE_SSSE3
7956a3d2 1428 psrldq $9, %xmm3
0fda545d
UD
1429 pslldq $7, %xmm2
1430 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1431# else
0fda545d 1432 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1433# endif
42e08a54 1434 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1435
1436 pcmpeqb %xmm1, %xmm0
1437 pcmpeqb %xmm2, %xmm1
1438 psubb %xmm0, %xmm1
1439 pmovmskb %xmm1, %edx
1440 sub $0xffff, %edx
1441 jnz LABEL(exit)
1442
e9f82e0d 1443# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1444 sub $16, %r11
1445 jbe LABEL(strcmp_exitz)
f69190e7 1446# endif
7956a3d2
L
1447
1448 add $16, %rcx
1449 movdqa %xmm4, %xmm3 /* store for next cycle */
1450 jmp LABEL(loop_ashr_9)
1451
1452 .p2align 4
1453LABEL(nibble_ashr_9):
1454 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1455 pmovmskb %xmm0, %edx
1456 test $0xfe00, %edx
1457 jnz LABEL(ashr_9_exittail)
1458
e9f82e0d 1459# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1460 cmp $7, %r11
7956a3d2 1461 jbe LABEL(ashr_9_exittail)
f69190e7 1462# endif
7956a3d2
L
1463
1464 pxor %xmm0, %xmm0
1465 sub $0x1000, %r10
1466 jmp LABEL(gobble_ashr_9)
1467
1468 .p2align 4
1469LABEL(ashr_9_exittail):
1470 movdqa (%rsi, %rcx), %xmm1
1471 psrldq $9, %xmm0
1472 psrldq $9, %xmm3
1473 jmp LABEL(aftertail)
1474
1475/*
1476 * The following cases will be handled by ashr_10
1477 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1478 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
7956a3d2
L
1479 */
1480 .p2align 4
1481LABEL(ashr_10):
1482 pxor %xmm0, %xmm0
1483 movdqa (%rdi), %xmm2
1484 movdqa (%rsi), %xmm1
1485 pcmpeqb %xmm1, %xmm0
1486 pslldq $6, %xmm2
42e08a54 1487 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1488 pcmpeqb %xmm1, %xmm2
1489 psubb %xmm0, %xmm2
1490 pmovmskb %xmm2, %r9d
1491 shr %cl, %edx
1492 shr %cl, %r9d
1493 sub %r9d, %edx
1494 jnz LABEL(less32bytes)
1495 movdqa (%rdi), %xmm3
1496
1497 UPDATE_STRNCMP_COUNTER
1498
1499 pxor %xmm0, %xmm0
1500 mov $16, %rcx /* index for loads */
1501 mov $10, %r9d /* byte position left over from less32bytes case */
1502 /*
1503 * Setup %r10 value allows us to detect crossing a page boundary.
1504 * When %r10 goes positive we have crossed a page boundary and
1505 * need to do a nibble.
1506 */
1507 lea 10(%rdi), %r10
1508 and $0xfff, %r10 /* offset into 4K page */
1509 sub $0x1000, %r10 /* subtract 4K pagesize */
1510
1511 .p2align 4
1512LABEL(loop_ashr_10):
1513 add $16, %r10
1514 jg LABEL(nibble_ashr_10)
1515
1516LABEL(gobble_ashr_10):
1517 movdqa (%rsi, %rcx), %xmm1
1518 movdqa (%rdi, %rcx), %xmm2
1519 movdqa %xmm2, %xmm4
1520
f69190e7 1521# ifndef USE_SSSE3
7956a3d2
L
1522 psrldq $10, %xmm3
1523 pslldq $6, %xmm2
0fda545d 1524 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1525# else
0fda545d 1526 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1527# endif
42e08a54 1528 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1529
1530 pcmpeqb %xmm1, %xmm0
1531 pcmpeqb %xmm2, %xmm1
1532 psubb %xmm0, %xmm1
1533 pmovmskb %xmm1, %edx
1534 sub $0xffff, %edx
1535 jnz LABEL(exit)
1536
e9f82e0d 1537# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1538 sub $16, %r11
1539 jbe LABEL(strcmp_exitz)
f69190e7 1540# endif
7956a3d2
L
1541
1542 add $16, %rcx
1543 movdqa %xmm4, %xmm3
1544
1545 add $16, %r10
1546 jg LABEL(nibble_ashr_10) /* cross page boundary */
1547
1548 movdqa (%rsi, %rcx), %xmm1
1549 movdqa (%rdi, %rcx), %xmm2
1550 movdqa %xmm2, %xmm4
1551
f69190e7 1552# ifndef USE_SSSE3
7956a3d2 1553 psrldq $10, %xmm3
0fda545d
UD
1554 pslldq $6, %xmm2
1555 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1556# else
0fda545d 1557 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1558# endif
42e08a54 1559 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1560
1561 pcmpeqb %xmm1, %xmm0
1562 pcmpeqb %xmm2, %xmm1
1563 psubb %xmm0, %xmm1
1564 pmovmskb %xmm1, %edx
1565 sub $0xffff, %edx
1566 jnz LABEL(exit)
1567
e9f82e0d 1568# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1569 sub $16, %r11
1570 jbe LABEL(strcmp_exitz)
f69190e7 1571# endif
7956a3d2
L
1572
1573 add $16, %rcx
1574 movdqa %xmm4, %xmm3
1575 jmp LABEL(loop_ashr_10)
1576
1577 .p2align 4
1578LABEL(nibble_ashr_10):
1579 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1580 pmovmskb %xmm0, %edx
1581 test $0xfc00, %edx
1582 jnz LABEL(ashr_10_exittail)
1583
e9f82e0d 1584# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1585 cmp $6, %r11
7956a3d2 1586 jbe LABEL(ashr_10_exittail)
f69190e7 1587# endif
7956a3d2
L
1588
1589 pxor %xmm0, %xmm0
1590 sub $0x1000, %r10
1591 jmp LABEL(gobble_ashr_10)
1592
1593 .p2align 4
1594LABEL(ashr_10_exittail):
1595 movdqa (%rsi, %rcx), %xmm1
1596 psrldq $10, %xmm0
1597 psrldq $10, %xmm3
1598 jmp LABEL(aftertail)
1599
1600/*
1601 * The following cases will be handled by ashr_11
1602 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1603 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
7956a3d2
L
1604 */
1605 .p2align 4
1606LABEL(ashr_11):
1607 pxor %xmm0, %xmm0
1608 movdqa (%rdi), %xmm2
1609 movdqa (%rsi), %xmm1
1610 pcmpeqb %xmm1, %xmm0
1611 pslldq $5, %xmm2
42e08a54 1612 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1613 pcmpeqb %xmm1, %xmm2
1614 psubb %xmm0, %xmm2
1615 pmovmskb %xmm2, %r9d
1616 shr %cl, %edx
1617 shr %cl, %r9d
1618 sub %r9d, %edx
1619 jnz LABEL(less32bytes)
1620 movdqa (%rdi), %xmm3
1621
1622 UPDATE_STRNCMP_COUNTER
1623
1624 pxor %xmm0, %xmm0
1625 mov $16, %rcx /* index for loads */
1626 mov $11, %r9d /* byte position left over from less32bytes case */
1627 /*
1628 * Setup %r10 value allows us to detect crossing a page boundary.
1629 * When %r10 goes positive we have crossed a page boundary and
1630 * need to do a nibble.
1631 */
1632 lea 11(%rdi), %r10
1633 and $0xfff, %r10 /* offset into 4K page */
1634 sub $0x1000, %r10 /* subtract 4K pagesize */
1635
1636 .p2align 4
1637LABEL(loop_ashr_11):
1638 add $16, %r10
1639 jg LABEL(nibble_ashr_11)
1640
1641LABEL(gobble_ashr_11):
1642 movdqa (%rsi, %rcx), %xmm1
1643 movdqa (%rdi, %rcx), %xmm2
1644 movdqa %xmm2, %xmm4
1645
f69190e7 1646# ifndef USE_SSSE3
7956a3d2
L
1647 psrldq $11, %xmm3
1648 pslldq $5, %xmm2
0fda545d 1649 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1650# else
0fda545d 1651 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1652# endif
42e08a54 1653 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1654
1655 pcmpeqb %xmm1, %xmm0
1656 pcmpeqb %xmm2, %xmm1
1657 psubb %xmm0, %xmm1
1658 pmovmskb %xmm1, %edx
1659 sub $0xffff, %edx
1660 jnz LABEL(exit)
1661
e9f82e0d 1662# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1663 sub $16, %r11
1664 jbe LABEL(strcmp_exitz)
f69190e7 1665# endif
7956a3d2
L
1666
1667 add $16, %rcx
1668 movdqa %xmm4, %xmm3
1669
1670 add $16, %r10
1671 jg LABEL(nibble_ashr_11) /* cross page boundary */
1672
1673 movdqa (%rsi, %rcx), %xmm1
1674 movdqa (%rdi, %rcx), %xmm2
1675 movdqa %xmm2, %xmm4
1676
f69190e7 1677# ifndef USE_SSSE3
7956a3d2 1678 psrldq $11, %xmm3
0fda545d
UD
1679 pslldq $5, %xmm2
1680 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1681# else
0fda545d 1682 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1683# endif
42e08a54 1684 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1685
1686 pcmpeqb %xmm1, %xmm0
1687 pcmpeqb %xmm2, %xmm1
1688 psubb %xmm0, %xmm1
1689 pmovmskb %xmm1, %edx
1690 sub $0xffff, %edx
1691 jnz LABEL(exit)
1692
e9f82e0d 1693# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1694 sub $16, %r11
1695 jbe LABEL(strcmp_exitz)
f69190e7 1696# endif
7956a3d2
L
1697
1698 add $16, %rcx
1699 movdqa %xmm4, %xmm3
1700 jmp LABEL(loop_ashr_11)
1701
1702 .p2align 4
1703LABEL(nibble_ashr_11):
1704 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1705 pmovmskb %xmm0, %edx
1706 test $0xf800, %edx
1707 jnz LABEL(ashr_11_exittail)
1708
e9f82e0d 1709# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1710 cmp $5, %r11
7956a3d2 1711 jbe LABEL(ashr_11_exittail)
f69190e7 1712# endif
7956a3d2
L
1713
1714 pxor %xmm0, %xmm0
1715 sub $0x1000, %r10
1716 jmp LABEL(gobble_ashr_11)
1717
1718 .p2align 4
1719LABEL(ashr_11_exittail):
1720 movdqa (%rsi, %rcx), %xmm1
1721 psrldq $11, %xmm0
1722 psrldq $11, %xmm3
1723 jmp LABEL(aftertail)
1724
1725/*
1726 * The following cases will be handled by ashr_12
1727 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1728 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
7956a3d2
L
1729 */
1730 .p2align 4
1731LABEL(ashr_12):
1732 pxor %xmm0, %xmm0
1733 movdqa (%rdi), %xmm2
1734 movdqa (%rsi), %xmm1
1735 pcmpeqb %xmm1, %xmm0
1736 pslldq $4, %xmm2
42e08a54 1737 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1738 pcmpeqb %xmm1, %xmm2
1739 psubb %xmm0, %xmm2
1740 pmovmskb %xmm2, %r9d
1741 shr %cl, %edx
1742 shr %cl, %r9d
1743 sub %r9d, %edx
1744 jnz LABEL(less32bytes)
1745 movdqa (%rdi), %xmm3
1746
1747 UPDATE_STRNCMP_COUNTER
1748
1749 pxor %xmm0, %xmm0
1750 mov $16, %rcx /* index for loads */
1751 mov $12, %r9d /* byte position left over from less32bytes case */
1752 /*
1753 * Setup %r10 value allows us to detect crossing a page boundary.
1754 * When %r10 goes positive we have crossed a page boundary and
1755 * need to do a nibble.
1756 */
1757 lea 12(%rdi), %r10
1758 and $0xfff, %r10 /* offset into 4K page */
1759 sub $0x1000, %r10 /* subtract 4K pagesize */
1760
1761 .p2align 4
1762LABEL(loop_ashr_12):
1763 add $16, %r10
1764 jg LABEL(nibble_ashr_12)
1765
1766LABEL(gobble_ashr_12):
1767 movdqa (%rsi, %rcx), %xmm1
1768 movdqa (%rdi, %rcx), %xmm2
1769 movdqa %xmm2, %xmm4
1770
f69190e7 1771# ifndef USE_SSSE3
7956a3d2
L
1772 psrldq $12, %xmm3
1773 pslldq $4, %xmm2
0fda545d 1774 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1775# else
0fda545d 1776 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1777# endif
42e08a54 1778 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1779
1780 pcmpeqb %xmm1, %xmm0
1781 pcmpeqb %xmm2, %xmm1
1782 psubb %xmm0, %xmm1
1783 pmovmskb %xmm1, %edx
1784 sub $0xffff, %edx
1785 jnz LABEL(exit)
1786
e9f82e0d 1787# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1788 sub $16, %r11
1789 jbe LABEL(strcmp_exitz)
f69190e7 1790# endif
7956a3d2
L
1791
1792 add $16, %rcx
1793 movdqa %xmm4, %xmm3
1794
1795 add $16, %r10
1796 jg LABEL(nibble_ashr_12) /* cross page boundary */
1797
1798 movdqa (%rsi, %rcx), %xmm1
1799 movdqa (%rdi, %rcx), %xmm2
1800 movdqa %xmm2, %xmm4
1801
f69190e7 1802# ifndef USE_SSSE3
7956a3d2 1803 psrldq $12, %xmm3
0fda545d
UD
1804 pslldq $4, %xmm2
1805 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1806# else
0fda545d 1807 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1808# endif
42e08a54 1809 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1810
1811 pcmpeqb %xmm1, %xmm0
1812 pcmpeqb %xmm2, %xmm1
1813 psubb %xmm0, %xmm1
1814 pmovmskb %xmm1, %edx
1815 sub $0xffff, %edx
1816 jnz LABEL(exit)
1817
e9f82e0d 1818# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1819 sub $16, %r11
1820 jbe LABEL(strcmp_exitz)
f69190e7 1821# endif
7956a3d2
L
1822
1823 add $16, %rcx
1824 movdqa %xmm4, %xmm3
1825 jmp LABEL(loop_ashr_12)
1826
1827 .p2align 4
1828LABEL(nibble_ashr_12):
1829 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1830 pmovmskb %xmm0, %edx
1831 test $0xf000, %edx
1832 jnz LABEL(ashr_12_exittail)
1833
e9f82e0d 1834# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1835 cmp $4, %r11
7956a3d2 1836 jbe LABEL(ashr_12_exittail)
f69190e7 1837# endif
7956a3d2
L
1838
1839 pxor %xmm0, %xmm0
1840 sub $0x1000, %r10
1841 jmp LABEL(gobble_ashr_12)
1842
1843 .p2align 4
1844LABEL(ashr_12_exittail):
1845 movdqa (%rsi, %rcx), %xmm1
1846 psrldq $12, %xmm0
1847 psrldq $12, %xmm3
1848 jmp LABEL(aftertail)
1849
1850/*
1851 * The following cases will be handled by ashr_13
1852 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1853 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
7956a3d2
L
1854 */
1855 .p2align 4
1856LABEL(ashr_13):
1857 pxor %xmm0, %xmm0
1858 movdqa (%rdi), %xmm2
1859 movdqa (%rsi), %xmm1
1860 pcmpeqb %xmm1, %xmm0
1861 pslldq $3, %xmm2
42e08a54 1862 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1863 pcmpeqb %xmm1, %xmm2
1864 psubb %xmm0, %xmm2
1865 pmovmskb %xmm2, %r9d
1866 shr %cl, %edx
1867 shr %cl, %r9d
1868 sub %r9d, %edx
1869 jnz LABEL(less32bytes)
1870 movdqa (%rdi), %xmm3
1871
1872 UPDATE_STRNCMP_COUNTER
1873
1874 pxor %xmm0, %xmm0
1875 mov $16, %rcx /* index for loads */
1876 mov $13, %r9d /* byte position left over from less32bytes case */
1877 /*
1878 * Setup %r10 value allows us to detect crossing a page boundary.
1879 * When %r10 goes positive we have crossed a page boundary and
1880 * need to do a nibble.
1881 */
1882 lea 13(%rdi), %r10
1883 and $0xfff, %r10 /* offset into 4K page */
1884 sub $0x1000, %r10 /* subtract 4K pagesize */
1885
1886 .p2align 4
1887LABEL(loop_ashr_13):
1888 add $16, %r10
1889 jg LABEL(nibble_ashr_13)
1890
1891LABEL(gobble_ashr_13):
1892 movdqa (%rsi, %rcx), %xmm1
1893 movdqa (%rdi, %rcx), %xmm2
1894 movdqa %xmm2, %xmm4
1895
f69190e7 1896# ifndef USE_SSSE3
7956a3d2
L
1897 psrldq $13, %xmm3
1898 pslldq $3, %xmm2
0fda545d 1899 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1900# else
0fda545d 1901 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1902# endif
42e08a54 1903 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1904
1905 pcmpeqb %xmm1, %xmm0
1906 pcmpeqb %xmm2, %xmm1
1907 psubb %xmm0, %xmm1
1908 pmovmskb %xmm1, %edx
1909 sub $0xffff, %edx
1910 jnz LABEL(exit)
1911
e9f82e0d 1912# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1913 sub $16, %r11
1914 jbe LABEL(strcmp_exitz)
f69190e7 1915# endif
7956a3d2
L
1916
1917 add $16, %rcx
1918 movdqa %xmm4, %xmm3
1919
1920 add $16, %r10
1921 jg LABEL(nibble_ashr_13) /* cross page boundary */
1922
1923 movdqa (%rsi, %rcx), %xmm1
1924 movdqa (%rdi, %rcx), %xmm2
1925 movdqa %xmm2, %xmm4
1926
f69190e7 1927# ifndef USE_SSSE3
7956a3d2 1928 psrldq $13, %xmm3
0fda545d
UD
1929 pslldq $3, %xmm2
1930 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1931# else
0fda545d 1932 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 1933# endif
42e08a54 1934 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1935
1936 pcmpeqb %xmm1, %xmm0
1937 pcmpeqb %xmm2, %xmm1
1938 psubb %xmm0, %xmm1
1939 pmovmskb %xmm1, %edx
1940 sub $0xffff, %edx
1941 jnz LABEL(exit)
1942
e9f82e0d 1943# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
1944 sub $16, %r11
1945 jbe LABEL(strcmp_exitz)
f69190e7 1946# endif
7956a3d2
L
1947
1948 add $16, %rcx
1949 movdqa %xmm4, %xmm3
1950 jmp LABEL(loop_ashr_13)
1951
1952 .p2align 4
1953LABEL(nibble_ashr_13):
1954 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1955 pmovmskb %xmm0, %edx
1956 test $0xe000, %edx
1957 jnz LABEL(ashr_13_exittail)
1958
e9f82e0d 1959# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 1960 cmp $3, %r11
7956a3d2 1961 jbe LABEL(ashr_13_exittail)
f69190e7 1962# endif
7956a3d2
L
1963
1964 pxor %xmm0, %xmm0
1965 sub $0x1000, %r10
1966 jmp LABEL(gobble_ashr_13)
1967
1968 .p2align 4
1969LABEL(ashr_13_exittail):
1970 movdqa (%rsi, %rcx), %xmm1
1971 psrldq $13, %xmm0
1972 psrldq $13, %xmm3
1973 jmp LABEL(aftertail)
1974
1975/*
1976 * The following cases will be handled by ashr_14
1977 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 1978 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
7956a3d2
L
1979 */
1980 .p2align 4
1981LABEL(ashr_14):
1982 pxor %xmm0, %xmm0
1983 movdqa (%rdi), %xmm2
1984 movdqa (%rsi), %xmm1
1985 pcmpeqb %xmm1, %xmm0
1986 pslldq $2, %xmm2
42e08a54 1987 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
1988 pcmpeqb %xmm1, %xmm2
1989 psubb %xmm0, %xmm2
1990 pmovmskb %xmm2, %r9d
1991 shr %cl, %edx
1992 shr %cl, %r9d
1993 sub %r9d, %edx
1994 jnz LABEL(less32bytes)
1995 movdqa (%rdi), %xmm3
1996
1997 UPDATE_STRNCMP_COUNTER
1998
1999 pxor %xmm0, %xmm0
2000 mov $16, %rcx /* index for loads */
2001 mov $14, %r9d /* byte position left over from less32bytes case */
2002 /*
2003 * Setup %r10 value allows us to detect crossing a page boundary.
2004 * When %r10 goes positive we have crossed a page boundary and
2005 * need to do a nibble.
2006 */
2007 lea 14(%rdi), %r10
2008 and $0xfff, %r10 /* offset into 4K page */
2009 sub $0x1000, %r10 /* subtract 4K pagesize */
2010
2011 .p2align 4
2012LABEL(loop_ashr_14):
2013 add $16, %r10
2014 jg LABEL(nibble_ashr_14)
2015
2016LABEL(gobble_ashr_14):
2017 movdqa (%rsi, %rcx), %xmm1
2018 movdqa (%rdi, %rcx), %xmm2
2019 movdqa %xmm2, %xmm4
2020
f69190e7 2021# ifndef USE_SSSE3
7956a3d2
L
2022 psrldq $14, %xmm3
2023 pslldq $2, %xmm2
0fda545d 2024 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2025# else
0fda545d 2026 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2027# endif
42e08a54 2028 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
2029
2030 pcmpeqb %xmm1, %xmm0
2031 pcmpeqb %xmm2, %xmm1
2032 psubb %xmm0, %xmm1
2033 pmovmskb %xmm1, %edx
2034 sub $0xffff, %edx
2035 jnz LABEL(exit)
2036
e9f82e0d 2037# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
2038 sub $16, %r11
2039 jbe LABEL(strcmp_exitz)
f69190e7 2040# endif
7956a3d2
L
2041
2042 add $16, %rcx
2043 movdqa %xmm4, %xmm3
2044
2045 add $16, %r10
2046 jg LABEL(nibble_ashr_14) /* cross page boundary */
2047
2048 movdqa (%rsi, %rcx), %xmm1
2049 movdqa (%rdi, %rcx), %xmm2
2050 movdqa %xmm2, %xmm4
2051
f69190e7 2052# ifndef USE_SSSE3
7956a3d2 2053 psrldq $14, %xmm3
0fda545d
UD
2054 pslldq $2, %xmm2
2055 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2056# else
0fda545d 2057 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2058# endif
42e08a54 2059 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
2060
2061 pcmpeqb %xmm1, %xmm0
2062 pcmpeqb %xmm2, %xmm1
2063 psubb %xmm0, %xmm1
2064 pmovmskb %xmm1, %edx
2065 sub $0xffff, %edx
2066 jnz LABEL(exit)
2067
e9f82e0d 2068# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
7956a3d2
L
2069 sub $16, %r11
2070 jbe LABEL(strcmp_exitz)
f69190e7 2071# endif
7956a3d2
L
2072
2073 add $16, %rcx
2074 movdqa %xmm4, %xmm3
2075 jmp LABEL(loop_ashr_14)
2076
2077 .p2align 4
2078LABEL(nibble_ashr_14):
2079 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2080 pmovmskb %xmm0, %edx
2081 test $0xc000, %edx
2082 jnz LABEL(ashr_14_exittail)
2083
e9f82e0d 2084# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7 2085 cmp $2, %r11
7956a3d2 2086 jbe LABEL(ashr_14_exittail)
f69190e7 2087# endif
7956a3d2
L
2088
2089 pxor %xmm0, %xmm0
2090 sub $0x1000, %r10
2091 jmp LABEL(gobble_ashr_14)
2092
2093 .p2align 4
2094LABEL(ashr_14_exittail):
2095 movdqa (%rsi, %rcx), %xmm1
2096 psrldq $14, %xmm0
2097 psrldq $14, %xmm3
2098 jmp LABEL(aftertail)
2099
2100/*
2101 * The following cases will be handled by ashr_15
2102 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
f69190e7 2103 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
7956a3d2
L
2104 */
2105 .p2align 4
2106LABEL(ashr_15):
2107 pxor %xmm0, %xmm0
2108 movdqa (%rdi), %xmm2
2109 movdqa (%rsi), %xmm1
2110 pcmpeqb %xmm1, %xmm0
2111 pslldq $1, %xmm2
42e08a54 2112 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
2113 pcmpeqb %xmm1, %xmm2
2114 psubb %xmm0, %xmm2
2115 pmovmskb %xmm2, %r9d
2116 shr %cl, %edx
2117 shr %cl, %r9d
2118 sub %r9d, %edx
2119 jnz LABEL(less32bytes)
2120
2121 movdqa (%rdi), %xmm3
2122
2123 UPDATE_STRNCMP_COUNTER
2124
2125 pxor %xmm0, %xmm0
2126 mov $16, %rcx /* index for loads */
2127 mov $15, %r9d /* byte position left over from less32bytes case */
2128 /*
2129 * Setup %r10 value allows us to detect crossing a page boundary.
2130 * When %r10 goes positive we have crossed a page boundary and
2131 * need to do a nibble.
2132 */
2133 lea 15(%rdi), %r10
2134 and $0xfff, %r10 /* offset into 4K page */
2135
2136 sub $0x1000, %r10 /* subtract 4K pagesize */
2137
2138 .p2align 4
2139LABEL(loop_ashr_15):
2140 add $16, %r10
2141 jg LABEL(nibble_ashr_15)
2142
2143LABEL(gobble_ashr_15):
2144 movdqa (%rsi, %rcx), %xmm1
2145 movdqa (%rdi, %rcx), %xmm2
2146 movdqa %xmm2, %xmm4
2147
f69190e7 2148# ifndef USE_SSSE3
7956a3d2
L
2149 psrldq $15, %xmm3
2150 pslldq $1, %xmm2
0fda545d 2151 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2152# else
0fda545d 2153 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2154# endif
42e08a54 2155 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
2156
2157 pcmpeqb %xmm1, %xmm0
2158 pcmpeqb %xmm2, %xmm1
2159 psubb %xmm0, %xmm1
2160 pmovmskb %xmm1, %edx
2161 sub $0xffff, %edx
2162 jnz LABEL(exit)
2163
e9f82e0d 2164# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
2165 sub $16, %r11
2166 jbe LABEL(strcmp_exitz)
f69190e7 2167# endif
7956a3d2
L
2168
2169 add $16, %rcx
2170 movdqa %xmm4, %xmm3
2171
2172 add $16, %r10
2173 jg LABEL(nibble_ashr_15) /* cross page boundary */
2174
2175 movdqa (%rsi, %rcx), %xmm1
2176 movdqa (%rdi, %rcx), %xmm2
2177 movdqa %xmm2, %xmm4
2178
f69190e7 2179# ifndef USE_SSSE3
7956a3d2 2180 psrldq $15, %xmm3
0fda545d
UD
2181 pslldq $1, %xmm2
2182 por %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2183# else
0fda545d 2184 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
f69190e7 2185# endif
42e08a54 2186 TOLOWER (%xmm1, %xmm2)
7956a3d2
L
2187
2188 pcmpeqb %xmm1, %xmm0
2189 pcmpeqb %xmm2, %xmm1
2190 psubb %xmm0, %xmm1
2191 pmovmskb %xmm1, %edx
2192 sub $0xffff, %edx
2193 jnz LABEL(exit)
2194
e9f82e0d 2195# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
2196 sub $16, %r11
2197 jbe LABEL(strcmp_exitz)
f69190e7 2198# endif
7956a3d2
L
2199
2200 add $16, %rcx
2201 movdqa %xmm4, %xmm3
2202 jmp LABEL(loop_ashr_15)
2203
2204 .p2align 4
2205LABEL(nibble_ashr_15):
2206 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2207 pmovmskb %xmm0, %edx
2208 test $0x8000, %edx
2209 jnz LABEL(ashr_15_exittail)
2210
e9f82e0d 2211# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
45db99c7
UD
2212 cmpq $1, %r11
2213 jbe LABEL(ashr_15_exittail)
f69190e7 2214# endif
7956a3d2
L
2215
2216 pxor %xmm0, %xmm0
2217 sub $0x1000, %r10
2218 jmp LABEL(gobble_ashr_15)
2219
2220 .p2align 4
2221LABEL(ashr_15_exittail):
2222 movdqa (%rsi, %rcx), %xmm1
2223 psrldq $15, %xmm3
2224 psrldq $15, %xmm0
2225
2226 .p2align 4
2227LABEL(aftertail):
42e08a54 2228 TOLOWER (%xmm1, %xmm3)
7956a3d2
L
2229 pcmpeqb %xmm3, %xmm1
2230 psubb %xmm0, %xmm1
2231 pmovmskb %xmm1, %edx
2232 not %edx
2233
2234 .p2align 4
2235LABEL(exit):
2236 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2237LABEL(less32bytes):
2238 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2239 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2240 test %r8d, %r8d
2241 jz LABEL(ret)
2242 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2243
2244 .p2align 4
2245LABEL(ret):
2246LABEL(less16bytes):
2247 bsf %rdx, %rdx /* find and store bit index in %rdx */
2248
e9f82e0d 2249# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
7956a3d2
L
2250 sub %rdx, %r11
2251 jbe LABEL(strcmp_exitz)
f69190e7 2252# endif
7956a3d2
L
2253 movzbl (%rsi, %rdx), %ecx
2254 movzbl (%rdi, %rdx), %eax
2255
e9f82e0d 2256# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
42e08a54
UD
2257 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2258 movl (%rdx,%rcx,4), %ecx
2259 movl (%rdx,%rax,4), %eax
2260# endif
2261
7956a3d2
L
2262 sub %ecx, %eax
2263 ret
2264
2265LABEL(strcmp_exitz):
2266 xor %eax, %eax
2267 ret
2268
2269 .p2align 4
2270LABEL(Byte0):
2271 movzx (%rsi), %ecx
2272 movzx (%rdi), %eax
2273
e9f82e0d
UD
2274# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2275 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2276 movl (%rdx,%rcx,4), %ecx
2277 movl (%rdx,%rax,4), %eax
2278# endif
2279
7956a3d2
L
2280 sub %ecx, %eax
2281 ret
29691210 2282END (STRCMP)
7956a3d2
L
2283
2284 .section .rodata,"a",@progbits
2285 .p2align 3
2286LABEL(unaligned_table):
2287 .int LABEL(ashr_1) - LABEL(unaligned_table)
2288 .int LABEL(ashr_2) - LABEL(unaligned_table)
2289 .int LABEL(ashr_3) - LABEL(unaligned_table)
2290 .int LABEL(ashr_4) - LABEL(unaligned_table)
2291 .int LABEL(ashr_5) - LABEL(unaligned_table)
2292 .int LABEL(ashr_6) - LABEL(unaligned_table)
2293 .int LABEL(ashr_7) - LABEL(unaligned_table)
2294 .int LABEL(ashr_8) - LABEL(unaligned_table)
2295 .int LABEL(ashr_9) - LABEL(unaligned_table)
2296 .int LABEL(ashr_10) - LABEL(unaligned_table)
2297 .int LABEL(ashr_11) - LABEL(unaligned_table)
2298 .int LABEL(ashr_12) - LABEL(unaligned_table)
2299 .int LABEL(ashr_13) - LABEL(unaligned_table)
2300 .int LABEL(ashr_14) - LABEL(unaligned_table)
2301 .int LABEL(ashr_15) - LABEL(unaligned_table)
2302 .int LABEL(ashr_0) - LABEL(unaligned_table)
4f41c682 2303#endif /* !IS_IN (libc) */
7956a3d2 2304libc_hidden_builtin_def (STRCMP)