]>
Commit | Line | Data |
---|---|---|
904057bc | 1 | /* strcmp with SSE4.2 |
f7a9f785 | 2 | Copyright (C) 2010-2016 Free Software Foundation, Inc. |
904057bc L |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
904057bc | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
904057bc L |
21 | |
22 | #include <sysdep.h> | |
23 | #include "asm-syntax.h" | |
24 | ||
25 | #define CFI_PUSH(REG) \ | |
26 | cfi_adjust_cfa_offset (4); \ | |
27 | cfi_rel_offset (REG, 0) | |
28 | ||
29 | #define CFI_POP(REG) \ | |
30 | cfi_adjust_cfa_offset (-4); \ | |
31 | cfi_restore (REG) | |
32 | ||
33 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
34 | #define POP(REG) popl REG; CFI_POP (REG) | |
35 | ||
6abf3465 | 36 | #ifdef USE_AS_STRNCMP |
904057bc | 37 | # ifndef STRCMP |
6abf3465 | 38 | # define STRCMP __strncmp_sse4_2 |
904057bc | 39 | # endif |
6abf3465 | 40 | # define STR1 8 |
904057bc | 41 | # define STR2 STR1+4 |
6abf3465 UD |
42 | # define CNT STR2+4 |
43 | # define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) | |
44 | # define REM %ebp | |
45 | #elif defined USE_AS_STRCASECMP_L | |
46 | # include "locale-defines.h" | |
904057bc | 47 | # ifndef STRCMP |
6abf3465 | 48 | # define STRCMP __strcasecmp_l_sse4_2 |
904057bc | 49 | # endif |
5583a086 AS |
50 | # ifdef PIC |
51 | # define STR1 12 | |
52 | # else | |
53 | # define STR1 8 | |
54 | # endif | |
6abf3465 | 55 | # define STR2 STR1+4 |
c0c3f78a | 56 | # define LOCALE 12 /* Loaded before the adjustment. */ |
6abf3465 UD |
57 | # ifdef PIC |
58 | # define RETURN POP (%edi); POP (%ebx); ret; \ | |
59 | .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) | |
60 | # else | |
61 | # define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) | |
62 | # endif | |
63 | # define NONASCII __strcasecmp_nonascii | |
64 | #elif defined USE_AS_STRNCASECMP_L | |
65 | # include "locale-defines.h" | |
66 | # ifndef STRCMP | |
67 | # define STRCMP __strncasecmp_l_sse4_2 | |
68 | # endif | |
5583a086 AS |
69 | # ifdef PIC |
70 | # define STR1 16 | |
71 | # else | |
72 | # define STR1 12 | |
73 | # endif | |
904057bc L |
74 | # define STR2 STR1+4 |
75 | # define CNT STR2+4 | |
c0c3f78a | 76 | # define LOCALE 16 /* Loaded before the adjustment. */ |
6abf3465 UD |
77 | # ifdef PIC |
78 | # define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ | |
79 | .p2align 4; \ | |
80 | CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) | |
81 | # else | |
82 | # define RETURN POP (%edi); POP (REM); ret; \ | |
83 | .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) | |
84 | # endif | |
85 | # define REM %ebp | |
86 | # define NONASCII __strncasecmp_nonascii | |
87 | #else | |
88 | # ifndef STRCMP | |
89 | # define STRCMP __strcmp_sse4_2 | |
90 | # endif | |
91 | # define STR1 4 | |
92 | # define STR2 STR1+4 | |
93 | # define RETURN ret; .p2align 4 | |
904057bc L |
94 | #endif |
95 | ||
96 | .section .text.sse4.2,"ax",@progbits | |
6abf3465 UD |
97 | |
98 | #ifdef USE_AS_STRCASECMP_L | |
99 | ENTRY (__strcasecmp_sse4_2) | |
100 | # ifdef PIC | |
101 | PUSH (%ebx) | |
9a1d9254 | 102 | LOAD_PIC_REG(bx) |
6abf3465 | 103 | movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax |
5583a086 AS |
104 | # ifdef NO_TLS_DIRECT_SEG_REFS |
105 | addl %gs:0, %eax | |
106 | movl (%eax), %eax | |
107 | # else | |
108 | movl %gs:(%eax), %eax | |
109 | # endif | |
6abf3465 | 110 | # else |
5583a086 AS |
111 | # ifdef NO_TLS_DIRECT_SEG_REFS |
112 | movl %gs:0, %eax | |
113 | movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax | |
114 | # else | |
115 | movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax | |
116 | # endif | |
6abf3465 | 117 | # endif |
6abf3465 UD |
118 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 |
119 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
120 | # else | |
121 | movl (%eax), %eax | |
122 | # endif | |
123 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
5d228a43 AS |
124 | # ifdef PIC |
125 | je L(ascii) | |
126 | POP (%ebx) | |
127 | jmp __strcasecmp_nonascii | |
128 | # else | |
6abf3465 UD |
129 | jne __strcasecmp_nonascii |
130 | jmp L(ascii) | |
5d228a43 | 131 | # endif |
6abf3465 UD |
132 | END (__strcasecmp_sse4_2) |
133 | #endif | |
134 | ||
135 | #ifdef USE_AS_STRNCASECMP_L | |
136 | ENTRY (__strncasecmp_sse4_2) | |
137 | # ifdef PIC | |
138 | PUSH (%ebx) | |
9a1d9254 | 139 | LOAD_PIC_REG(bx) |
6abf3465 | 140 | movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax |
5583a086 AS |
141 | # ifdef NO_TLS_DIRECT_SEG_REFS |
142 | addl %gs:0, %eax | |
143 | movl (%eax), %eax | |
144 | # else | |
145 | movl %gs:(%eax), %eax | |
146 | # endif | |
6abf3465 | 147 | # else |
5583a086 AS |
148 | # ifdef NO_TLS_DIRECT_SEG_REFS |
149 | movl %gs:0, %eax | |
150 | movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax | |
151 | # else | |
152 | movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax | |
153 | # endif | |
6abf3465 | 154 | # endif |
6abf3465 UD |
155 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 |
156 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
157 | # else | |
158 | movl (%eax), %eax | |
159 | # endif | |
160 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
5d228a43 AS |
161 | # ifdef PIC |
162 | je L(ascii) | |
163 | POP (%ebx) | |
164 | jmp __strncasecmp_nonascii | |
165 | # else | |
6abf3465 UD |
166 | jne __strncasecmp_nonascii |
167 | jmp L(ascii) | |
5d228a43 | 168 | # endif |
6abf3465 UD |
169 | END (__strncasecmp_sse4_2) |
170 | #endif | |
171 | ||
172 | ENTRY (STRCMP) | |
173 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
174 | movl LOCALE(%esp), %eax | |
175 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
176 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
177 | # else | |
178 | movl (%eax), %eax | |
179 | # endif | |
180 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
181 | jne NONASCII | |
182 | ||
183 | # ifdef PIC | |
184 | PUSH (%ebx) | |
9a1d9254 | 185 | LOAD_PIC_REG(bx) |
6abf3465 UD |
186 | # endif |
187 | L(ascii): | |
188 | .section .rodata.cst16,"aM",@progbits,16 | |
189 | .align 16 | |
190 | .Lbelowupper: | |
191 | .quad 0x4040404040404040 | |
192 | .quad 0x4040404040404040 | |
193 | .Ltopupper: | |
194 | .quad 0x5b5b5b5b5b5b5b5b | |
195 | .quad 0x5b5b5b5b5b5b5b5b | |
196 | .Ltouppermask: | |
197 | .quad 0x2020202020202020 | |
198 | .quad 0x2020202020202020 | |
199 | .previous | |
200 | ||
201 | # ifdef PIC | |
202 | # define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) | |
203 | # define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) | |
204 | # define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) | |
205 | # else | |
206 | # define UCLOW_reg .Lbelowupper | |
207 | # define UCHIGH_reg .Ltopupper | |
208 | # define LCQWORD_reg .Ltouppermask | |
209 | # endif | |
210 | #endif | |
211 | ||
212 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
213 | PUSH (REM) | |
214 | #endif | |
215 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
216 | PUSH (%edi) | |
904057bc L |
217 | #endif |
218 | mov STR1(%esp), %edx | |
219 | mov STR2(%esp), %eax | |
6abf3465 UD |
220 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
221 | movl CNT(%esp), REM | |
222 | test REM, REM | |
904057bc L |
223 | je L(eq) |
224 | #endif | |
225 | mov %dx, %cx | |
226 | and $0xfff, %cx | |
227 | cmp $0xff0, %cx | |
228 | ja L(first4bytes) | |
229 | movdqu (%edx), %xmm2 | |
230 | mov %eax, %ecx | |
231 | and $0xfff, %ecx | |
232 | cmp $0xff0, %ecx | |
233 | ja L(first4bytes) | |
6abf3465 UD |
234 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
235 | # define TOLOWER(reg1, reg2) \ | |
236 | movdqa reg1, %xmm3; \ | |
237 | movdqa UCHIGH_reg, %xmm4; \ | |
238 | movdqa reg2, %xmm5; \ | |
239 | movdqa UCHIGH_reg, %xmm6; \ | |
240 | pcmpgtb UCLOW_reg, %xmm3; \ | |
241 | pcmpgtb reg1, %xmm4; \ | |
242 | pcmpgtb UCLOW_reg, %xmm5; \ | |
243 | pcmpgtb reg2, %xmm6; \ | |
244 | pand %xmm4, %xmm3; \ | |
245 | pand %xmm6, %xmm5; \ | |
246 | pand LCQWORD_reg, %xmm3; \ | |
247 | pand LCQWORD_reg, %xmm5; \ | |
248 | por %xmm3, reg1; \ | |
249 | por %xmm5, reg2 | |
250 | ||
251 | movdqu (%eax), %xmm1 | |
252 | TOLOWER (%xmm2, %xmm1) | |
253 | movd %xmm2, %ecx | |
254 | movd %xmm1, %edi | |
255 | movdqa %xmm2, %xmm3 | |
256 | movdqa %xmm1, %xmm4 | |
257 | cmpl %edi, %ecx | |
258 | #else | |
259 | # define TOLOWER(reg1, reg) | |
260 | ||
904057bc L |
261 | movd %xmm2, %ecx |
262 | cmp (%eax), %ecx | |
6abf3465 | 263 | #endif |
904057bc | 264 | jne L(less4bytes) |
6abf3465 | 265 | #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |
904057bc | 266 | movdqu (%eax), %xmm1 |
6abf3465 | 267 | #endif |
904057bc L |
268 | pxor %xmm2, %xmm1 |
269 | pxor %xmm0, %xmm0 | |
270 | ptest %xmm1, %xmm0 | |
271 | jnc L(less16bytes) | |
272 | pcmpeqb %xmm0, %xmm2 | |
273 | ptest %xmm2, %xmm0 | |
274 | jnc L(less16bytes) | |
275 | ||
6abf3465 UD |
276 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
277 | sub $16, REM | |
904057bc L |
278 | jbe L(eq) |
279 | #endif | |
280 | add $16, %edx | |
281 | add $16, %eax | |
282 | L(first4bytes): | |
283 | movzbl (%eax), %ecx | |
6abf3465 UD |
284 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
285 | movzbl (%edx), %edi | |
286 | # ifdef PIC | |
287 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
288 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
289 | # else | |
290 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
291 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
292 | # endif | |
293 | cmpl %ecx, %edi | |
294 | #else | |
904057bc | 295 | cmpb %cl, (%edx) |
6abf3465 | 296 | #endif |
904057bc L |
297 | jne L(neq) |
298 | cmpl $0, %ecx | |
299 | je L(eq) | |
300 | ||
6abf3465 UD |
301 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
302 | cmp $1, REM | |
904057bc L |
303 | je L(eq) |
304 | #endif | |
305 | ||
306 | movzbl 1(%eax), %ecx | |
6abf3465 UD |
307 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
308 | movzbl 1(%edx), %edi | |
309 | # ifdef PIC | |
310 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
311 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
312 | # else | |
313 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
314 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
315 | # endif | |
316 | cmpl %ecx, %edi | |
317 | #else | |
904057bc | 318 | cmpb %cl, 1(%edx) |
6abf3465 | 319 | #endif |
904057bc L |
320 | jne L(neq) |
321 | cmpl $0, %ecx | |
322 | je L(eq) | |
323 | ||
6abf3465 UD |
324 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
325 | cmp $2, REM | |
904057bc L |
326 | je L(eq) |
327 | #endif | |
328 | movzbl 2(%eax), %ecx | |
6abf3465 UD |
329 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
330 | movzbl 2(%edx), %edi | |
331 | # ifdef PIC | |
332 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
333 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
334 | # else | |
335 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
336 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
337 | # endif | |
338 | cmpl %ecx, %edi | |
339 | #else | |
904057bc | 340 | cmpb %cl, 2(%edx) |
6abf3465 | 341 | #endif |
904057bc L |
342 | jne L(neq) |
343 | cmpl $0, %ecx | |
344 | je L(eq) | |
345 | ||
6abf3465 UD |
346 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
347 | cmp $3, REM | |
904057bc L |
348 | je L(eq) |
349 | #endif | |
350 | movzbl 3(%eax), %ecx | |
6abf3465 UD |
351 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
352 | movzbl 3(%edx), %edi | |
353 | # ifdef PIC | |
354 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
355 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
356 | # else | |
357 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
358 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
359 | # endif | |
360 | cmpl %ecx, %edi | |
361 | #else | |
904057bc | 362 | cmpb %cl, 3(%edx) |
6abf3465 | 363 | #endif |
904057bc L |
364 | jne L(neq) |
365 | cmpl $0, %ecx | |
366 | je L(eq) | |
367 | ||
6abf3465 UD |
368 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
369 | cmp $4, REM | |
904057bc L |
370 | je L(eq) |
371 | #endif | |
372 | movzbl 4(%eax), %ecx | |
6abf3465 UD |
373 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
374 | movzbl 4(%edx), %edi | |
375 | # ifdef PIC | |
376 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
377 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
378 | # else | |
379 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
380 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
381 | # endif | |
382 | cmpl %ecx, %edi | |
383 | #else | |
904057bc | 384 | cmpb %cl, 4(%edx) |
6abf3465 | 385 | #endif |
904057bc L |
386 | jne L(neq) |
387 | cmpl $0, %ecx | |
388 | je L(eq) | |
389 | ||
6abf3465 UD |
390 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
391 | cmp $5, REM | |
904057bc L |
392 | je L(eq) |
393 | #endif | |
394 | movzbl 5(%eax), %ecx | |
6abf3465 UD |
395 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
396 | movzbl 5(%edx), %edi | |
397 | # ifdef PIC | |
398 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
399 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
400 | # else | |
401 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
402 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
403 | # endif | |
404 | cmpl %ecx, %edi | |
405 | #else | |
904057bc | 406 | cmpb %cl, 5(%edx) |
6abf3465 | 407 | #endif |
904057bc L |
408 | jne L(neq) |
409 | cmpl $0, %ecx | |
410 | je L(eq) | |
411 | ||
6abf3465 UD |
412 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
413 | cmp $6, REM | |
904057bc L |
414 | je L(eq) |
415 | #endif | |
416 | movzbl 6(%eax), %ecx | |
6abf3465 UD |
417 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
418 | movzbl 6(%edx), %edi | |
419 | # ifdef PIC | |
420 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
421 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
422 | # else | |
423 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
424 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
425 | # endif | |
426 | cmpl %ecx, %edi | |
427 | #else | |
904057bc | 428 | cmpb %cl, 6(%edx) |
6abf3465 | 429 | #endif |
904057bc L |
430 | jne L(neq) |
431 | cmpl $0, %ecx | |
432 | je L(eq) | |
433 | ||
6abf3465 UD |
434 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
435 | cmp $7, REM | |
904057bc L |
436 | je L(eq) |
437 | #endif | |
438 | movzbl 7(%eax), %ecx | |
6abf3465 UD |
439 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
440 | movzbl 7(%edx), %edi | |
441 | # ifdef PIC | |
442 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
443 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
444 | # else | |
445 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
446 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
447 | # endif | |
448 | cmpl %ecx, %edi | |
449 | #else | |
904057bc | 450 | cmpb %cl, 7(%edx) |
6abf3465 | 451 | #endif |
904057bc L |
452 | jne L(neq) |
453 | cmpl $0, %ecx | |
454 | je L(eq) | |
455 | ||
6abf3465 UD |
456 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
457 | sub $8, REM | |
904057bc L |
458 | je L(eq) |
459 | #endif | |
460 | add $8, %eax | |
461 | add $8, %edx | |
462 | ||
6abf3465 | 463 | #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |
904057bc | 464 | PUSH (%edi) |
6abf3465 | 465 | #endif |
904057bc | 466 | PUSH (%esi) |
6abf3465 | 467 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
6bb74d9f | 468 | cfi_remember_state |
dc21aae6 | 469 | #endif |
904057bc L |
470 | mov %edx, %edi |
471 | mov %eax, %esi | |
472 | xorl %eax, %eax | |
473 | L(check_offset): | |
6abf3465 | 474 | movl %edi, %edx |
904057bc | 475 | movl %esi, %ecx |
6abf3465 | 476 | andl $0xfff, %edx |
904057bc | 477 | andl $0xfff, %ecx |
6abf3465 UD |
478 | cmpl %edx, %ecx |
479 | cmovl %edx, %ecx | |
904057bc L |
480 | lea -0xff0(%ecx), %edx |
481 | sub %edx, %edi | |
482 | sub %edx, %esi | |
483 | testl %edx, %edx | |
484 | jg L(crosspage) | |
485 | L(loop): | |
486 | movdqu (%esi,%edx), %xmm2 | |
487 | movdqu (%edi,%edx), %xmm1 | |
6abf3465 | 488 | TOLOWER (%xmm2, %xmm1) |
904057bc L |
489 | pcmpistri $0x1a, %xmm2, %xmm1 |
490 | jbe L(end) | |
491 | ||
6abf3465 UD |
492 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
493 | sub $16, REM | |
904057bc L |
494 | jbe L(more16byteseq) |
495 | #endif | |
496 | ||
497 | add $16, %edx | |
498 | jle L(loop) | |
499 | L(crosspage): | |
500 | movzbl (%edi,%edx), %eax | |
6abf3465 UD |
501 | movzbl (%esi,%edx), %ecx |
502 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
503 | # ifdef PIC | |
504 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax | |
505 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
506 | # else | |
507 | movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax | |
508 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
509 | # endif | |
510 | #endif | |
511 | subl %ecx, %eax | |
904057bc | 512 | jne L(ret) |
6abf3465 | 513 | testl %ecx, %ecx |
904057bc | 514 | je L(ret) |
6abf3465 UD |
515 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
516 | sub $1, REM | |
904057bc L |
517 | jbe L(more16byteseq) |
518 | #endif | |
519 | inc %edx | |
520 | cmp $15, %edx | |
521 | jle L(crosspage) | |
6cc2b8a6 L |
522 | add %edx, %edi |
523 | add %edx, %esi | |
904057bc L |
524 | jmp L(check_offset) |
525 | ||
28be6098 | 526 | .p2align 4 |
904057bc L |
527 | L(end): |
528 | jnc L(ret) | |
6abf3465 UD |
529 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
530 | sub %ecx, REM | |
904057bc L |
531 | jbe L(more16byteseq) |
532 | #endif | |
6abf3465 UD |
533 | lea (%ecx,%edx), %ecx |
534 | movzbl (%edi,%ecx), %eax | |
535 | movzbl (%esi,%ecx), %ecx | |
536 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
537 | # ifdef PIC | |
538 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax | |
539 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
540 | # else | |
541 | movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax | |
542 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
543 | # endif | |
544 | #endif | |
904057bc L |
545 | subl %ecx, %eax |
546 | L(ret): | |
547 | POP (%esi) | |
548 | POP (%edi) | |
6abf3465 UD |
549 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
550 | POP (REM) | |
551 | #endif | |
552 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
553 | # ifdef PIC | |
904057bc | 554 | POP (%ebx) |
6abf3465 | 555 | # endif |
904057bc L |
556 | #endif |
557 | ret | |
558 | ||
28be6098 | 559 | .p2align 4 |
6abf3465 | 560 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
dc21aae6 | 561 | cfi_restore_state |
904057bc L |
562 | L(more16byteseq): |
563 | POP (%esi) | |
6abf3465 | 564 | # ifdef USE_AS_STRNCMP |
904057bc | 565 | POP (%edi) |
6abf3465 | 566 | # endif |
904057bc L |
567 | #endif |
568 | L(eq): | |
569 | xorl %eax, %eax | |
28be6098 | 570 | RETURN |
6bb74d9f | 571 | |
904057bc L |
572 | L(neq): |
573 | mov $1, %eax | |
574 | ja L(neq_bigger) | |
575 | neg %eax | |
576 | L(neq_bigger): | |
28be6098 L |
577 | RETURN |
578 | ||
904057bc L |
579 | L(less16bytes): |
580 | add $0xfefefeff, %ecx | |
581 | jnc L(less4bytes) | |
6abf3465 UD |
582 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
583 | movd %xmm3, %edi | |
584 | xor %edi, %ecx | |
585 | #else | |
904057bc | 586 | xor (%edx), %ecx |
6abf3465 | 587 | #endif |
904057bc L |
588 | or $0xfefefeff, %ecx |
589 | add $1, %ecx | |
590 | jnz L(less4bytes) | |
591 | ||
6abf3465 UD |
592 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
593 | cmp $4, REM | |
904057bc L |
594 | jbe L(eq) |
595 | #endif | |
6abf3465 UD |
596 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
597 | psrldq $4, %xmm3 | |
598 | psrldq $4, %xmm4 | |
599 | movd %xmm3, %ecx | |
600 | movd %xmm4, %edi | |
601 | cmp %edi, %ecx | |
602 | mov %ecx, %edi | |
603 | #else | |
904057bc L |
604 | mov 4(%edx), %ecx |
605 | cmp 4(%eax), %ecx | |
6abf3465 | 606 | #endif |
904057bc L |
607 | jne L(more4bytes) |
608 | add $0xfefefeff, %ecx | |
609 | jnc L(more4bytes) | |
6abf3465 UD |
610 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
611 | xor %edi, %ecx | |
612 | #else | |
904057bc | 613 | xor 4(%edx), %ecx |
6abf3465 | 614 | #endif |
904057bc L |
615 | or $0xfefefeff, %ecx |
616 | add $1, %ecx | |
617 | jnz L(more4bytes) | |
618 | ||
6abf3465 UD |
619 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
620 | sub $8, REM | |
904057bc L |
621 | jbe L(eq) |
622 | #endif | |
623 | ||
624 | add $8, %edx | |
625 | add $8, %eax | |
626 | L(less4bytes): | |
627 | ||
628 | movzbl (%eax), %ecx | |
6abf3465 UD |
629 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
630 | movzbl (%edx), %edi | |
631 | # ifdef PIC | |
632 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
633 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
634 | # else | |
635 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
636 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
637 | # endif | |
638 | cmpl %ecx, %edi | |
639 | #else | |
904057bc | 640 | cmpb %cl, (%edx) |
6abf3465 | 641 | #endif |
904057bc L |
642 | jne L(neq) |
643 | cmpl $0, %ecx | |
644 | je L(eq) | |
645 | ||
6abf3465 UD |
646 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
647 | cmp $1, REM | |
904057bc L |
648 | je L(eq) |
649 | #endif | |
650 | movzbl 1(%eax), %ecx | |
6abf3465 UD |
651 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
652 | movzbl 1(%edx), %edi | |
653 | # ifdef PIC | |
654 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
655 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
656 | # else | |
657 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
658 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
659 | # endif | |
660 | cmpl %ecx, %edi | |
661 | #else | |
904057bc | 662 | cmpb %cl, 1(%edx) |
6abf3465 | 663 | #endif |
904057bc L |
664 | jne L(neq) |
665 | cmpl $0, %ecx | |
666 | je L(eq) | |
667 | ||
6abf3465 UD |
668 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
669 | cmp $2, REM | |
904057bc L |
670 | je L(eq) |
671 | #endif | |
672 | ||
673 | movzbl 2(%eax), %ecx | |
6abf3465 UD |
674 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
675 | movzbl 2(%edx), %edi | |
676 | # ifdef PIC | |
677 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
678 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
679 | # else | |
680 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
681 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
682 | # endif | |
683 | cmpl %ecx, %edi | |
684 | #else | |
904057bc | 685 | cmpb %cl, 2(%edx) |
6abf3465 | 686 | #endif |
904057bc L |
687 | jne L(neq) |
688 | cmpl $0, %ecx | |
689 | je L(eq) | |
690 | ||
6abf3465 UD |
691 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
692 | cmp $3, REM | |
904057bc L |
693 | je L(eq) |
694 | #endif | |
695 | movzbl 3(%eax), %ecx | |
6abf3465 UD |
696 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
697 | movzbl 3(%edx), %edi | |
698 | # ifdef PIC | |
699 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
700 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
701 | # else | |
702 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
703 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
704 | # endif | |
705 | cmpl %ecx, %edi | |
706 | #else | |
904057bc | 707 | cmpb %cl, 3(%edx) |
6abf3465 | 708 | #endif |
904057bc L |
709 | jne L(neq) |
710 | cmpl $0, %ecx | |
711 | je L(eq) | |
712 | ||
713 | L(more4bytes): | |
6abf3465 UD |
714 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
715 | cmp $4, REM | |
904057bc L |
716 | je L(eq) |
717 | #endif | |
718 | movzbl 4(%eax), %ecx | |
6abf3465 UD |
719 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
720 | movzbl 4(%edx), %edi | |
721 | # ifdef PIC | |
722 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
723 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
724 | # else | |
725 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
726 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
727 | # endif | |
728 | cmpl %ecx, %edi | |
729 | #else | |
904057bc | 730 | cmpb %cl, 4(%edx) |
6abf3465 | 731 | #endif |
904057bc L |
732 | jne L(neq) |
733 | cmpl $0, %ecx | |
734 | je L(eq) | |
735 | ||
736 | ||
6abf3465 UD |
737 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
738 | cmp $5, REM | |
904057bc L |
739 | je L(eq) |
740 | #endif | |
741 | movzbl 5(%eax), %ecx | |
6abf3465 UD |
742 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
743 | movzbl 5(%edx), %edi | |
744 | # ifdef PIC | |
745 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
746 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
747 | # else | |
748 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
749 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
750 | # endif | |
751 | cmpl %ecx, %edi | |
752 | #else | |
904057bc | 753 | cmpb %cl, 5(%edx) |
6abf3465 | 754 | #endif |
904057bc L |
755 | jne L(neq) |
756 | cmpl $0, %ecx | |
757 | je L(eq) | |
758 | ||
6abf3465 UD |
759 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
760 | cmp $6, REM | |
904057bc L |
761 | je L(eq) |
762 | #endif | |
763 | movzbl 6(%eax), %ecx | |
6abf3465 UD |
764 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
765 | movzbl 6(%edx), %edi | |
766 | # ifdef PIC | |
767 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
768 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
769 | # else | |
770 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
771 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
772 | # endif | |
773 | cmpl %ecx, %edi | |
774 | #else | |
904057bc | 775 | cmpb %cl, 6(%edx) |
6abf3465 | 776 | #endif |
904057bc L |
777 | jne L(neq) |
778 | cmpl $0, %ecx | |
779 | je L(eq) | |
780 | ||
6abf3465 UD |
781 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
782 | cmp $7, REM | |
904057bc L |
783 | je L(eq) |
784 | #endif | |
785 | movzbl 7(%eax), %ecx | |
6abf3465 UD |
786 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
787 | movzbl 7(%edx), %edi | |
788 | # ifdef PIC | |
789 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
790 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
791 | # else | |
792 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
793 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
794 | # endif | |
795 | cmpl %ecx, %edi | |
796 | #else | |
904057bc | 797 | cmpb %cl, 7(%edx) |
6abf3465 | 798 | #endif |
904057bc | 799 | jne L(neq) |
6bb74d9f | 800 | jmp L(eq) |
904057bc L |
801 | |
802 | END (STRCMP) | |
803 | ||
804 | #endif |