]>
Commit | Line | Data |
---|---|---|
904057bc | 1 | /* strcmp with SSE4.2 |
6d7e8eda | 2 | Copyright (C) 2010-2023 Free Software Foundation, Inc. |
904057bc L |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
904057bc | 18 | |
4f41c682 | 19 | #if IS_IN (libc) |
904057bc L |
20 | |
21 | #include <sysdep.h> | |
22 | #include "asm-syntax.h" | |
23 | ||
24 | #define CFI_PUSH(REG) \ | |
25 | cfi_adjust_cfa_offset (4); \ | |
26 | cfi_rel_offset (REG, 0) | |
27 | ||
28 | #define CFI_POP(REG) \ | |
29 | cfi_adjust_cfa_offset (-4); \ | |
30 | cfi_restore (REG) | |
31 | ||
32 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
33 | #define POP(REG) popl REG; CFI_POP (REG) | |
34 | ||
6abf3465 | 35 | #ifdef USE_AS_STRNCMP |
904057bc | 36 | # ifndef STRCMP |
6abf3465 | 37 | # define STRCMP __strncmp_sse4_2 |
904057bc | 38 | # endif |
6abf3465 | 39 | # define STR1 8 |
904057bc | 40 | # define STR2 STR1+4 |
6abf3465 UD |
41 | # define CNT STR2+4 |
42 | # define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) | |
43 | # define REM %ebp | |
44 | #elif defined USE_AS_STRCASECMP_L | |
45 | # include "locale-defines.h" | |
904057bc | 46 | # ifndef STRCMP |
6abf3465 | 47 | # define STRCMP __strcasecmp_l_sse4_2 |
904057bc | 48 | # endif |
5583a086 AS |
49 | # ifdef PIC |
50 | # define STR1 12 | |
51 | # else | |
52 | # define STR1 8 | |
53 | # endif | |
6abf3465 | 54 | # define STR2 STR1+4 |
c0c3f78a | 55 | # define LOCALE 12 /* Loaded before the adjustment. */ |
6abf3465 UD |
56 | # ifdef PIC |
57 | # define RETURN POP (%edi); POP (%ebx); ret; \ | |
58 | .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) | |
59 | # else | |
60 | # define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) | |
61 | # endif | |
62 | # define NONASCII __strcasecmp_nonascii | |
63 | #elif defined USE_AS_STRNCASECMP_L | |
64 | # include "locale-defines.h" | |
65 | # ifndef STRCMP | |
66 | # define STRCMP __strncasecmp_l_sse4_2 | |
67 | # endif | |
5583a086 AS |
68 | # ifdef PIC |
69 | # define STR1 16 | |
70 | # else | |
71 | # define STR1 12 | |
72 | # endif | |
904057bc L |
73 | # define STR2 STR1+4 |
74 | # define CNT STR2+4 | |
c0c3f78a | 75 | # define LOCALE 16 /* Loaded before the adjustment. */ |
6abf3465 UD |
76 | # ifdef PIC |
77 | # define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ | |
78 | .p2align 4; \ | |
79 | CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) | |
80 | # else | |
81 | # define RETURN POP (%edi); POP (REM); ret; \ | |
82 | .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) | |
83 | # endif | |
84 | # define REM %ebp | |
85 | # define NONASCII __strncasecmp_nonascii | |
86 | #else | |
87 | # ifndef STRCMP | |
88 | # define STRCMP __strcmp_sse4_2 | |
89 | # endif | |
90 | # define STR1 4 | |
91 | # define STR2 STR1+4 | |
92 | # define RETURN ret; .p2align 4 | |
904057bc L |
93 | #endif |
94 | ||
95 | .section .text.sse4.2,"ax",@progbits | |
6abf3465 UD |
96 | |
97 | #ifdef USE_AS_STRCASECMP_L | |
98 | ENTRY (__strcasecmp_sse4_2) | |
99 | # ifdef PIC | |
100 | PUSH (%ebx) | |
9a1d9254 | 101 | LOAD_PIC_REG(bx) |
6abf3465 | 102 | movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax |
5583a086 | 103 | movl %gs:(%eax), %eax |
6abf3465 | 104 | # else |
5583a086 | 105 | movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax |
6abf3465 | 106 | # endif |
6abf3465 UD |
107 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 |
108 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
109 | # else | |
110 | movl (%eax), %eax | |
111 | # endif | |
112 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
5d228a43 AS |
113 | # ifdef PIC |
114 | je L(ascii) | |
115 | POP (%ebx) | |
116 | jmp __strcasecmp_nonascii | |
117 | # else | |
6abf3465 UD |
118 | jne __strcasecmp_nonascii |
119 | jmp L(ascii) | |
5d228a43 | 120 | # endif |
6abf3465 UD |
121 | END (__strcasecmp_sse4_2) |
122 | #endif | |
123 | ||
124 | #ifdef USE_AS_STRNCASECMP_L | |
125 | ENTRY (__strncasecmp_sse4_2) | |
126 | # ifdef PIC | |
127 | PUSH (%ebx) | |
9a1d9254 | 128 | LOAD_PIC_REG(bx) |
6abf3465 | 129 | movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax |
5583a086 | 130 | movl %gs:(%eax), %eax |
6abf3465 | 131 | # else |
5583a086 | 132 | movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax |
6abf3465 | 133 | # endif |
6abf3465 UD |
134 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 |
135 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
136 | # else | |
137 | movl (%eax), %eax | |
138 | # endif | |
139 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
5d228a43 AS |
140 | # ifdef PIC |
141 | je L(ascii) | |
142 | POP (%ebx) | |
143 | jmp __strncasecmp_nonascii | |
144 | # else | |
6abf3465 UD |
145 | jne __strncasecmp_nonascii |
146 | jmp L(ascii) | |
5d228a43 | 147 | # endif |
6abf3465 UD |
148 | END (__strncasecmp_sse4_2) |
149 | #endif | |
150 | ||
151 | ENTRY (STRCMP) | |
152 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
153 | movl LOCALE(%esp), %eax | |
154 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
155 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
156 | # else | |
157 | movl (%eax), %eax | |
158 | # endif | |
159 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
160 | jne NONASCII | |
161 | ||
162 | # ifdef PIC | |
163 | PUSH (%ebx) | |
9a1d9254 | 164 | LOAD_PIC_REG(bx) |
6abf3465 UD |
165 | # endif |
166 | L(ascii): | |
167 | .section .rodata.cst16,"aM",@progbits,16 | |
168 | .align 16 | |
169 | .Lbelowupper: | |
170 | .quad 0x4040404040404040 | |
171 | .quad 0x4040404040404040 | |
172 | .Ltopupper: | |
173 | .quad 0x5b5b5b5b5b5b5b5b | |
174 | .quad 0x5b5b5b5b5b5b5b5b | |
175 | .Ltouppermask: | |
176 | .quad 0x2020202020202020 | |
177 | .quad 0x2020202020202020 | |
178 | .previous | |
179 | ||
180 | # ifdef PIC | |
181 | # define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) | |
182 | # define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) | |
183 | # define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) | |
184 | # else | |
185 | # define UCLOW_reg .Lbelowupper | |
186 | # define UCHIGH_reg .Ltopupper | |
187 | # define LCQWORD_reg .Ltouppermask | |
188 | # endif | |
189 | #endif | |
190 | ||
191 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
192 | PUSH (REM) | |
193 | #endif | |
194 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
195 | PUSH (%edi) | |
904057bc L |
196 | #endif |
197 | mov STR1(%esp), %edx | |
198 | mov STR2(%esp), %eax | |
6abf3465 UD |
199 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
200 | movl CNT(%esp), REM | |
201 | test REM, REM | |
904057bc L |
202 | je L(eq) |
203 | #endif | |
204 | mov %dx, %cx | |
205 | and $0xfff, %cx | |
206 | cmp $0xff0, %cx | |
207 | ja L(first4bytes) | |
208 | movdqu (%edx), %xmm2 | |
209 | mov %eax, %ecx | |
210 | and $0xfff, %ecx | |
211 | cmp $0xff0, %ecx | |
212 | ja L(first4bytes) | |
6abf3465 UD |
213 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
214 | # define TOLOWER(reg1, reg2) \ | |
215 | movdqa reg1, %xmm3; \ | |
216 | movdqa UCHIGH_reg, %xmm4; \ | |
217 | movdqa reg2, %xmm5; \ | |
218 | movdqa UCHIGH_reg, %xmm6; \ | |
219 | pcmpgtb UCLOW_reg, %xmm3; \ | |
220 | pcmpgtb reg1, %xmm4; \ | |
221 | pcmpgtb UCLOW_reg, %xmm5; \ | |
222 | pcmpgtb reg2, %xmm6; \ | |
223 | pand %xmm4, %xmm3; \ | |
224 | pand %xmm6, %xmm5; \ | |
225 | pand LCQWORD_reg, %xmm3; \ | |
226 | pand LCQWORD_reg, %xmm5; \ | |
227 | por %xmm3, reg1; \ | |
228 | por %xmm5, reg2 | |
229 | ||
230 | movdqu (%eax), %xmm1 | |
231 | TOLOWER (%xmm2, %xmm1) | |
232 | movd %xmm2, %ecx | |
233 | movd %xmm1, %edi | |
234 | movdqa %xmm2, %xmm3 | |
235 | movdqa %xmm1, %xmm4 | |
236 | cmpl %edi, %ecx | |
237 | #else | |
238 | # define TOLOWER(reg1, reg) | |
239 | ||
904057bc L |
240 | movd %xmm2, %ecx |
241 | cmp (%eax), %ecx | |
6abf3465 | 242 | #endif |
904057bc | 243 | jne L(less4bytes) |
6abf3465 | 244 | #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |
904057bc | 245 | movdqu (%eax), %xmm1 |
6abf3465 | 246 | #endif |
904057bc L |
247 | pxor %xmm2, %xmm1 |
248 | pxor %xmm0, %xmm0 | |
249 | ptest %xmm1, %xmm0 | |
250 | jnc L(less16bytes) | |
251 | pcmpeqb %xmm0, %xmm2 | |
252 | ptest %xmm2, %xmm0 | |
253 | jnc L(less16bytes) | |
254 | ||
6abf3465 UD |
255 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
256 | sub $16, REM | |
904057bc L |
257 | jbe L(eq) |
258 | #endif | |
259 | add $16, %edx | |
260 | add $16, %eax | |
261 | L(first4bytes): | |
262 | movzbl (%eax), %ecx | |
6abf3465 UD |
263 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
264 | movzbl (%edx), %edi | |
265 | # ifdef PIC | |
266 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
267 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
268 | # else | |
269 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
270 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
271 | # endif | |
272 | cmpl %ecx, %edi | |
273 | #else | |
904057bc | 274 | cmpb %cl, (%edx) |
6abf3465 | 275 | #endif |
904057bc L |
276 | jne L(neq) |
277 | cmpl $0, %ecx | |
278 | je L(eq) | |
279 | ||
6abf3465 UD |
280 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
281 | cmp $1, REM | |
904057bc L |
282 | je L(eq) |
283 | #endif | |
284 | ||
285 | movzbl 1(%eax), %ecx | |
6abf3465 UD |
286 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
287 | movzbl 1(%edx), %edi | |
288 | # ifdef PIC | |
289 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
290 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
291 | # else | |
292 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
293 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
294 | # endif | |
295 | cmpl %ecx, %edi | |
296 | #else | |
904057bc | 297 | cmpb %cl, 1(%edx) |
6abf3465 | 298 | #endif |
904057bc L |
299 | jne L(neq) |
300 | cmpl $0, %ecx | |
301 | je L(eq) | |
302 | ||
6abf3465 UD |
303 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
304 | cmp $2, REM | |
904057bc L |
305 | je L(eq) |
306 | #endif | |
307 | movzbl 2(%eax), %ecx | |
6abf3465 UD |
308 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
309 | movzbl 2(%edx), %edi | |
310 | # ifdef PIC | |
311 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
312 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
313 | # else | |
314 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
315 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
316 | # endif | |
317 | cmpl %ecx, %edi | |
318 | #else | |
904057bc | 319 | cmpb %cl, 2(%edx) |
6abf3465 | 320 | #endif |
904057bc L |
321 | jne L(neq) |
322 | cmpl $0, %ecx | |
323 | je L(eq) | |
324 | ||
6abf3465 UD |
325 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
326 | cmp $3, REM | |
904057bc L |
327 | je L(eq) |
328 | #endif | |
329 | movzbl 3(%eax), %ecx | |
6abf3465 UD |
330 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
331 | movzbl 3(%edx), %edi | |
332 | # ifdef PIC | |
333 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
334 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
335 | # else | |
336 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
337 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
338 | # endif | |
339 | cmpl %ecx, %edi | |
340 | #else | |
904057bc | 341 | cmpb %cl, 3(%edx) |
6abf3465 | 342 | #endif |
904057bc L |
343 | jne L(neq) |
344 | cmpl $0, %ecx | |
345 | je L(eq) | |
346 | ||
6abf3465 UD |
347 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
348 | cmp $4, REM | |
904057bc L |
349 | je L(eq) |
350 | #endif | |
351 | movzbl 4(%eax), %ecx | |
6abf3465 UD |
352 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
353 | movzbl 4(%edx), %edi | |
354 | # ifdef PIC | |
355 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
356 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
357 | # else | |
358 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
359 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
360 | # endif | |
361 | cmpl %ecx, %edi | |
362 | #else | |
904057bc | 363 | cmpb %cl, 4(%edx) |
6abf3465 | 364 | #endif |
904057bc L |
365 | jne L(neq) |
366 | cmpl $0, %ecx | |
367 | je L(eq) | |
368 | ||
6abf3465 UD |
369 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
370 | cmp $5, REM | |
904057bc L |
371 | je L(eq) |
372 | #endif | |
373 | movzbl 5(%eax), %ecx | |
6abf3465 UD |
374 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
375 | movzbl 5(%edx), %edi | |
376 | # ifdef PIC | |
377 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
378 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
379 | # else | |
380 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
381 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
382 | # endif | |
383 | cmpl %ecx, %edi | |
384 | #else | |
904057bc | 385 | cmpb %cl, 5(%edx) |
6abf3465 | 386 | #endif |
904057bc L |
387 | jne L(neq) |
388 | cmpl $0, %ecx | |
389 | je L(eq) | |
390 | ||
6abf3465 UD |
391 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
392 | cmp $6, REM | |
904057bc L |
393 | je L(eq) |
394 | #endif | |
395 | movzbl 6(%eax), %ecx | |
6abf3465 UD |
396 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
397 | movzbl 6(%edx), %edi | |
398 | # ifdef PIC | |
399 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
400 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
401 | # else | |
402 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
403 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
404 | # endif | |
405 | cmpl %ecx, %edi | |
406 | #else | |
904057bc | 407 | cmpb %cl, 6(%edx) |
6abf3465 | 408 | #endif |
904057bc L |
409 | jne L(neq) |
410 | cmpl $0, %ecx | |
411 | je L(eq) | |
412 | ||
6abf3465 UD |
413 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
414 | cmp $7, REM | |
904057bc L |
415 | je L(eq) |
416 | #endif | |
417 | movzbl 7(%eax), %ecx | |
6abf3465 UD |
418 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
419 | movzbl 7(%edx), %edi | |
420 | # ifdef PIC | |
421 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
422 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
423 | # else | |
424 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
425 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
426 | # endif | |
427 | cmpl %ecx, %edi | |
428 | #else | |
904057bc | 429 | cmpb %cl, 7(%edx) |
6abf3465 | 430 | #endif |
904057bc L |
431 | jne L(neq) |
432 | cmpl $0, %ecx | |
433 | je L(eq) | |
434 | ||
6abf3465 UD |
435 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
436 | sub $8, REM | |
904057bc L |
437 | je L(eq) |
438 | #endif | |
439 | add $8, %eax | |
440 | add $8, %edx | |
441 | ||
6abf3465 | 442 | #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |
904057bc | 443 | PUSH (%edi) |
6abf3465 | 444 | #endif |
904057bc | 445 | PUSH (%esi) |
6abf3465 | 446 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
6bb74d9f | 447 | cfi_remember_state |
dc21aae6 | 448 | #endif |
904057bc L |
449 | mov %edx, %edi |
450 | mov %eax, %esi | |
451 | xorl %eax, %eax | |
452 | L(check_offset): | |
6abf3465 | 453 | movl %edi, %edx |
904057bc | 454 | movl %esi, %ecx |
6abf3465 | 455 | andl $0xfff, %edx |
904057bc | 456 | andl $0xfff, %ecx |
6abf3465 UD |
457 | cmpl %edx, %ecx |
458 | cmovl %edx, %ecx | |
904057bc L |
459 | lea -0xff0(%ecx), %edx |
460 | sub %edx, %edi | |
461 | sub %edx, %esi | |
462 | testl %edx, %edx | |
463 | jg L(crosspage) | |
464 | L(loop): | |
465 | movdqu (%esi,%edx), %xmm2 | |
466 | movdqu (%edi,%edx), %xmm1 | |
6abf3465 | 467 | TOLOWER (%xmm2, %xmm1) |
904057bc L |
468 | pcmpistri $0x1a, %xmm2, %xmm1 |
469 | jbe L(end) | |
470 | ||
6abf3465 UD |
471 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
472 | sub $16, REM | |
904057bc L |
473 | jbe L(more16byteseq) |
474 | #endif | |
475 | ||
476 | add $16, %edx | |
477 | jle L(loop) | |
478 | L(crosspage): | |
479 | movzbl (%edi,%edx), %eax | |
6abf3465 UD |
480 | movzbl (%esi,%edx), %ecx |
481 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
482 | # ifdef PIC | |
483 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax | |
484 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
485 | # else | |
486 | movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax | |
487 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
488 | # endif | |
489 | #endif | |
490 | subl %ecx, %eax | |
904057bc | 491 | jne L(ret) |
6abf3465 | 492 | testl %ecx, %ecx |
904057bc | 493 | je L(ret) |
6abf3465 UD |
494 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
495 | sub $1, REM | |
904057bc L |
496 | jbe L(more16byteseq) |
497 | #endif | |
498 | inc %edx | |
499 | cmp $15, %edx | |
500 | jle L(crosspage) | |
6cc2b8a6 L |
501 | add %edx, %edi |
502 | add %edx, %esi | |
904057bc L |
503 | jmp L(check_offset) |
504 | ||
28be6098 | 505 | .p2align 4 |
904057bc L |
506 | L(end): |
507 | jnc L(ret) | |
6abf3465 UD |
508 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
509 | sub %ecx, REM | |
904057bc L |
510 | jbe L(more16byteseq) |
511 | #endif | |
6abf3465 UD |
512 | lea (%ecx,%edx), %ecx |
513 | movzbl (%edi,%ecx), %eax | |
514 | movzbl (%esi,%ecx), %ecx | |
515 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
516 | # ifdef PIC | |
517 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax | |
518 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
519 | # else | |
520 | movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax | |
521 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
522 | # endif | |
523 | #endif | |
904057bc L |
524 | subl %ecx, %eax |
525 | L(ret): | |
526 | POP (%esi) | |
527 | POP (%edi) | |
6abf3465 UD |
528 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
529 | POP (REM) | |
530 | #endif | |
531 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
532 | # ifdef PIC | |
904057bc | 533 | POP (%ebx) |
6abf3465 | 534 | # endif |
904057bc L |
535 | #endif |
536 | ret | |
537 | ||
28be6098 | 538 | .p2align 4 |
6abf3465 | 539 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
dc21aae6 | 540 | cfi_restore_state |
904057bc L |
541 | L(more16byteseq): |
542 | POP (%esi) | |
6abf3465 | 543 | # ifdef USE_AS_STRNCMP |
904057bc | 544 | POP (%edi) |
6abf3465 | 545 | # endif |
904057bc L |
546 | #endif |
547 | L(eq): | |
548 | xorl %eax, %eax | |
28be6098 | 549 | RETURN |
6bb74d9f | 550 | |
904057bc L |
551 | L(neq): |
552 | mov $1, %eax | |
553 | ja L(neq_bigger) | |
554 | neg %eax | |
555 | L(neq_bigger): | |
28be6098 L |
556 | RETURN |
557 | ||
904057bc L |
558 | L(less16bytes): |
559 | add $0xfefefeff, %ecx | |
560 | jnc L(less4bytes) | |
6abf3465 UD |
561 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
562 | movd %xmm3, %edi | |
563 | xor %edi, %ecx | |
564 | #else | |
904057bc | 565 | xor (%edx), %ecx |
6abf3465 | 566 | #endif |
904057bc L |
567 | or $0xfefefeff, %ecx |
568 | add $1, %ecx | |
569 | jnz L(less4bytes) | |
570 | ||
6abf3465 UD |
571 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
572 | cmp $4, REM | |
904057bc L |
573 | jbe L(eq) |
574 | #endif | |
6abf3465 UD |
575 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
576 | psrldq $4, %xmm3 | |
577 | psrldq $4, %xmm4 | |
578 | movd %xmm3, %ecx | |
579 | movd %xmm4, %edi | |
580 | cmp %edi, %ecx | |
581 | mov %ecx, %edi | |
582 | #else | |
904057bc L |
583 | mov 4(%edx), %ecx |
584 | cmp 4(%eax), %ecx | |
6abf3465 | 585 | #endif |
904057bc L |
586 | jne L(more4bytes) |
587 | add $0xfefefeff, %ecx | |
588 | jnc L(more4bytes) | |
6abf3465 UD |
589 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
590 | xor %edi, %ecx | |
591 | #else | |
904057bc | 592 | xor 4(%edx), %ecx |
6abf3465 | 593 | #endif |
904057bc L |
594 | or $0xfefefeff, %ecx |
595 | add $1, %ecx | |
596 | jnz L(more4bytes) | |
597 | ||
6abf3465 UD |
598 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
599 | sub $8, REM | |
904057bc L |
600 | jbe L(eq) |
601 | #endif | |
602 | ||
603 | add $8, %edx | |
604 | add $8, %eax | |
605 | L(less4bytes): | |
606 | ||
607 | movzbl (%eax), %ecx | |
6abf3465 UD |
608 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
609 | movzbl (%edx), %edi | |
610 | # ifdef PIC | |
611 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
612 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
613 | # else | |
614 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
615 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
616 | # endif | |
617 | cmpl %ecx, %edi | |
618 | #else | |
904057bc | 619 | cmpb %cl, (%edx) |
6abf3465 | 620 | #endif |
904057bc L |
621 | jne L(neq) |
622 | cmpl $0, %ecx | |
623 | je L(eq) | |
624 | ||
6abf3465 UD |
625 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
626 | cmp $1, REM | |
904057bc L |
627 | je L(eq) |
628 | #endif | |
629 | movzbl 1(%eax), %ecx | |
6abf3465 UD |
630 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
631 | movzbl 1(%edx), %edi | |
632 | # ifdef PIC | |
633 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
634 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
635 | # else | |
636 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
637 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
638 | # endif | |
639 | cmpl %ecx, %edi | |
640 | #else | |
904057bc | 641 | cmpb %cl, 1(%edx) |
6abf3465 | 642 | #endif |
904057bc L |
643 | jne L(neq) |
644 | cmpl $0, %ecx | |
645 | je L(eq) | |
646 | ||
6abf3465 UD |
647 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
648 | cmp $2, REM | |
904057bc L |
649 | je L(eq) |
650 | #endif | |
651 | ||
652 | movzbl 2(%eax), %ecx | |
6abf3465 UD |
653 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
654 | movzbl 2(%edx), %edi | |
655 | # ifdef PIC | |
656 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
657 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
658 | # else | |
659 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
660 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
661 | # endif | |
662 | cmpl %ecx, %edi | |
663 | #else | |
904057bc | 664 | cmpb %cl, 2(%edx) |
6abf3465 | 665 | #endif |
904057bc L |
666 | jne L(neq) |
667 | cmpl $0, %ecx | |
668 | je L(eq) | |
669 | ||
6abf3465 UD |
670 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
671 | cmp $3, REM | |
904057bc L |
672 | je L(eq) |
673 | #endif | |
674 | movzbl 3(%eax), %ecx | |
6abf3465 UD |
675 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
676 | movzbl 3(%edx), %edi | |
677 | # ifdef PIC | |
678 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
679 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
680 | # else | |
681 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
682 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
683 | # endif | |
684 | cmpl %ecx, %edi | |
685 | #else | |
904057bc | 686 | cmpb %cl, 3(%edx) |
6abf3465 | 687 | #endif |
904057bc L |
688 | jne L(neq) |
689 | cmpl $0, %ecx | |
690 | je L(eq) | |
691 | ||
692 | L(more4bytes): | |
6abf3465 UD |
693 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
694 | cmp $4, REM | |
904057bc L |
695 | je L(eq) |
696 | #endif | |
697 | movzbl 4(%eax), %ecx | |
6abf3465 UD |
698 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
699 | movzbl 4(%edx), %edi | |
700 | # ifdef PIC | |
701 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
702 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
703 | # else | |
704 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
705 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
706 | # endif | |
707 | cmpl %ecx, %edi | |
708 | #else | |
904057bc | 709 | cmpb %cl, 4(%edx) |
6abf3465 | 710 | #endif |
904057bc L |
711 | jne L(neq) |
712 | cmpl $0, %ecx | |
713 | je L(eq) | |
714 | ||
715 | ||
6abf3465 UD |
716 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
717 | cmp $5, REM | |
904057bc L |
718 | je L(eq) |
719 | #endif | |
720 | movzbl 5(%eax), %ecx | |
6abf3465 UD |
721 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
722 | movzbl 5(%edx), %edi | |
723 | # ifdef PIC | |
724 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
725 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
726 | # else | |
727 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
728 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
729 | # endif | |
730 | cmpl %ecx, %edi | |
731 | #else | |
904057bc | 732 | cmpb %cl, 5(%edx) |
6abf3465 | 733 | #endif |
904057bc L |
734 | jne L(neq) |
735 | cmpl $0, %ecx | |
736 | je L(eq) | |
737 | ||
6abf3465 UD |
738 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
739 | cmp $6, REM | |
904057bc L |
740 | je L(eq) |
741 | #endif | |
742 | movzbl 6(%eax), %ecx | |
6abf3465 UD |
743 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
744 | movzbl 6(%edx), %edi | |
745 | # ifdef PIC | |
746 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
747 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
748 | # else | |
749 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
750 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
751 | # endif | |
752 | cmpl %ecx, %edi | |
753 | #else | |
904057bc | 754 | cmpb %cl, 6(%edx) |
6abf3465 | 755 | #endif |
904057bc L |
756 | jne L(neq) |
757 | cmpl $0, %ecx | |
758 | je L(eq) | |
759 | ||
6abf3465 UD |
760 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
761 | cmp $7, REM | |
904057bc L |
762 | je L(eq) |
763 | #endif | |
764 | movzbl 7(%eax), %ecx | |
6abf3465 UD |
765 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
766 | movzbl 7(%edx), %edi | |
767 | # ifdef PIC | |
768 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
769 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
770 | # else | |
771 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
772 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
773 | # endif | |
774 | cmpl %ecx, %edi | |
775 | #else | |
904057bc | 776 | cmpb %cl, 7(%edx) |
6abf3465 | 777 | #endif |
904057bc | 778 | jne L(neq) |
6bb74d9f | 779 | jmp L(eq) |
904057bc L |
780 | |
781 | END (STRCMP) | |
782 | ||
783 | #endif |