]>
Commit | Line | Data |
---|---|---|
904057bc | 1 | /* strcmp with SSE4.2 |
6abf3465 | 2 | Copyright (C) 2010, 2011 Free Software Foundation, Inc. |
904057bc L |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
20 | ||
21 | #ifndef NOT_IN_libc | |
22 | ||
23 | #include <sysdep.h> | |
24 | #include "asm-syntax.h" | |
25 | ||
26 | #define CFI_PUSH(REG) \ | |
27 | cfi_adjust_cfa_offset (4); \ | |
28 | cfi_rel_offset (REG, 0) | |
29 | ||
30 | #define CFI_POP(REG) \ | |
31 | cfi_adjust_cfa_offset (-4); \ | |
32 | cfi_restore (REG) | |
33 | ||
34 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
35 | #define POP(REG) popl REG; CFI_POP (REG) | |
36 | ||
6abf3465 | 37 | #ifdef USE_AS_STRNCMP |
904057bc | 38 | # ifndef STRCMP |
6abf3465 | 39 | # define STRCMP __strncmp_sse4_2 |
904057bc | 40 | # endif |
6abf3465 | 41 | # define STR1 8 |
904057bc | 42 | # define STR2 STR1+4 |
6abf3465 UD |
43 | # define CNT STR2+4 |
44 | # define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) | |
45 | # define REM %ebp | |
46 | #elif defined USE_AS_STRCASECMP_L | |
47 | # include "locale-defines.h" | |
904057bc | 48 | # ifndef STRCMP |
6abf3465 | 49 | # define STRCMP __strcasecmp_l_sse4_2 |
904057bc | 50 | # endif |
6abf3465 UD |
51 | # define STR1 12 |
52 | # define STR2 STR1+4 | |
53 | # define LOCALE 12 /* Loaded before the adjustement. */ | |
54 | # ifdef PIC | |
55 | # define RETURN POP (%edi); POP (%ebx); ret; \ | |
56 | .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) | |
57 | # else | |
58 | # define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) | |
59 | # endif | |
60 | # define NONASCII __strcasecmp_nonascii | |
61 | #elif defined USE_AS_STRNCASECMP_L | |
62 | # include "locale-defines.h" | |
63 | # ifndef STRCMP | |
64 | # define STRCMP __strncasecmp_l_sse4_2 | |
65 | # endif | |
66 | # define STR1 16 | |
904057bc L |
67 | # define STR2 STR1+4 |
68 | # define CNT STR2+4 | |
6abf3465 UD |
69 | # define LOCALE 16 /* Loaded before the adjustement. */ |
70 | # ifdef PIC | |
71 | # define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ | |
72 | .p2align 4; \ | |
73 | CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) | |
74 | # else | |
75 | # define RETURN POP (%edi); POP (REM); ret; \ | |
76 | .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) | |
77 | # endif | |
78 | # define REM %ebp | |
79 | # define NONASCII __strncasecmp_nonascii | |
80 | #else | |
81 | # ifndef STRCMP | |
82 | # define STRCMP __strcmp_sse4_2 | |
83 | # endif | |
84 | # define STR1 4 | |
85 | # define STR2 STR1+4 | |
86 | # define RETURN ret; .p2align 4 | |
904057bc L |
87 | #endif |
88 | ||
89 | .section .text.sse4.2,"ax",@progbits | |
6abf3465 UD |
90 | |
91 | #ifdef USE_AS_STRCASECMP_L | |
92 | ENTRY (__strcasecmp_sse4_2) | |
93 | # ifdef PIC | |
94 | PUSH (%ebx) | |
95 | call __i686.get_pc_thunk.bx | |
96 | addl $_GLOBAL_OFFSET_TABLE_, %ebx | |
97 | movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax | |
98 | # else | |
99 | movl __libc_tsd_LOCALE@NTPOFF, %eax | |
100 | # endif | |
101 | movl %gs:(%eax), %eax | |
102 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
103 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
104 | # else | |
105 | movl (%eax), %eax | |
106 | # endif | |
107 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
108 | jne __strcasecmp_nonascii | |
109 | jmp L(ascii) | |
110 | END (__strcasecmp_sse4_2) | |
111 | #endif | |
112 | ||
113 | #ifdef USE_AS_STRNCASECMP_L | |
114 | ENTRY (__strncasecmp_sse4_2) | |
115 | # ifdef PIC | |
116 | PUSH (%ebx) | |
117 | call __i686.get_pc_thunk.bx | |
118 | addl $_GLOBAL_OFFSET_TABLE_, %ebx | |
119 | movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax | |
120 | # else | |
121 | movl __libc_tsd_LOCALE@NTPOFF, %eax | |
122 | # endif | |
123 | movl %gs:(%eax), %eax | |
124 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
125 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
126 | # else | |
127 | movl (%eax), %eax | |
128 | # endif | |
129 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
130 | jne __strncasecmp_nonascii | |
131 | jmp L(ascii) | |
132 | END (__strncasecmp_sse4_2) | |
133 | #endif | |
134 | ||
135 | ENTRY (STRCMP) | |
136 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
137 | movl LOCALE(%esp), %eax | |
138 | # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
139 | movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax | |
140 | # else | |
141 | movl (%eax), %eax | |
142 | # endif | |
143 | testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) | |
144 | jne NONASCII | |
145 | ||
146 | # ifdef PIC | |
147 | PUSH (%ebx) | |
148 | call __i686.get_pc_thunk.bx | |
149 | addl $_GLOBAL_OFFSET_TABLE_, %ebx | |
150 | # endif | |
151 | L(ascii): | |
152 | .section .rodata.cst16,"aM",@progbits,16 | |
153 | .align 16 | |
154 | .Lbelowupper: | |
155 | .quad 0x4040404040404040 | |
156 | .quad 0x4040404040404040 | |
157 | .Ltopupper: | |
158 | .quad 0x5b5b5b5b5b5b5b5b | |
159 | .quad 0x5b5b5b5b5b5b5b5b | |
160 | .Ltouppermask: | |
161 | .quad 0x2020202020202020 | |
162 | .quad 0x2020202020202020 | |
163 | .previous | |
164 | ||
165 | # ifdef PIC | |
166 | # define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) | |
167 | # define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) | |
168 | # define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) | |
169 | # else | |
170 | # define UCLOW_reg .Lbelowupper | |
171 | # define UCHIGH_reg .Ltopupper | |
172 | # define LCQWORD_reg .Ltouppermask | |
173 | # endif | |
174 | #endif | |
175 | ||
176 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
177 | PUSH (REM) | |
178 | #endif | |
179 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
180 | PUSH (%edi) | |
904057bc L |
181 | #endif |
182 | mov STR1(%esp), %edx | |
183 | mov STR2(%esp), %eax | |
6abf3465 UD |
184 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
185 | movl CNT(%esp), REM | |
186 | test REM, REM | |
904057bc L |
187 | je L(eq) |
188 | #endif | |
189 | mov %dx, %cx | |
190 | and $0xfff, %cx | |
191 | cmp $0xff0, %cx | |
192 | ja L(first4bytes) | |
193 | movdqu (%edx), %xmm2 | |
194 | mov %eax, %ecx | |
195 | and $0xfff, %ecx | |
196 | cmp $0xff0, %ecx | |
197 | ja L(first4bytes) | |
6abf3465 UD |
198 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
199 | # define TOLOWER(reg1, reg2) \ | |
200 | movdqa reg1, %xmm3; \ | |
201 | movdqa UCHIGH_reg, %xmm4; \ | |
202 | movdqa reg2, %xmm5; \ | |
203 | movdqa UCHIGH_reg, %xmm6; \ | |
204 | pcmpgtb UCLOW_reg, %xmm3; \ | |
205 | pcmpgtb reg1, %xmm4; \ | |
206 | pcmpgtb UCLOW_reg, %xmm5; \ | |
207 | pcmpgtb reg2, %xmm6; \ | |
208 | pand %xmm4, %xmm3; \ | |
209 | pand %xmm6, %xmm5; \ | |
210 | pand LCQWORD_reg, %xmm3; \ | |
211 | pand LCQWORD_reg, %xmm5; \ | |
212 | por %xmm3, reg1; \ | |
213 | por %xmm5, reg2 | |
214 | ||
215 | movdqu (%eax), %xmm1 | |
216 | TOLOWER (%xmm2, %xmm1) | |
217 | movd %xmm2, %ecx | |
218 | movd %xmm1, %edi | |
219 | movdqa %xmm2, %xmm3 | |
220 | movdqa %xmm1, %xmm4 | |
221 | cmpl %edi, %ecx | |
222 | #else | |
223 | # define TOLOWER(reg1, reg) | |
224 | ||
904057bc L |
225 | movd %xmm2, %ecx |
226 | cmp (%eax), %ecx | |
6abf3465 | 227 | #endif |
904057bc | 228 | jne L(less4bytes) |
6abf3465 | 229 | #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |
904057bc | 230 | movdqu (%eax), %xmm1 |
6abf3465 | 231 | #endif |
904057bc L |
232 | pxor %xmm2, %xmm1 |
233 | pxor %xmm0, %xmm0 | |
234 | ptest %xmm1, %xmm0 | |
235 | jnc L(less16bytes) | |
236 | pcmpeqb %xmm0, %xmm2 | |
237 | ptest %xmm2, %xmm0 | |
238 | jnc L(less16bytes) | |
239 | ||
6abf3465 UD |
240 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
241 | sub $16, REM | |
904057bc L |
242 | jbe L(eq) |
243 | #endif | |
244 | add $16, %edx | |
245 | add $16, %eax | |
246 | L(first4bytes): | |
247 | movzbl (%eax), %ecx | |
6abf3465 UD |
248 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
249 | movzbl (%edx), %edi | |
250 | # ifdef PIC | |
251 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
252 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
253 | # else | |
254 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
255 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
256 | # endif | |
257 | cmpl %ecx, %edi | |
258 | #else | |
904057bc | 259 | cmpb %cl, (%edx) |
6abf3465 | 260 | #endif |
904057bc L |
261 | jne L(neq) |
262 | cmpl $0, %ecx | |
263 | je L(eq) | |
264 | ||
6abf3465 UD |
265 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
266 | cmp $1, REM | |
904057bc L |
267 | je L(eq) |
268 | #endif | |
269 | ||
270 | movzbl 1(%eax), %ecx | |
6abf3465 UD |
271 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
272 | movzbl 1(%edx), %edi | |
273 | # ifdef PIC | |
274 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
275 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
276 | # else | |
277 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
278 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
279 | # endif | |
280 | cmpl %ecx, %edi | |
281 | #else | |
904057bc | 282 | cmpb %cl, 1(%edx) |
6abf3465 | 283 | #endif |
904057bc L |
284 | jne L(neq) |
285 | cmpl $0, %ecx | |
286 | je L(eq) | |
287 | ||
6abf3465 UD |
288 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
289 | cmp $2, REM | |
904057bc L |
290 | je L(eq) |
291 | #endif | |
292 | movzbl 2(%eax), %ecx | |
6abf3465 UD |
293 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
294 | movzbl 2(%edx), %edi | |
295 | # ifdef PIC | |
296 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
297 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
298 | # else | |
299 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
300 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
301 | # endif | |
302 | cmpl %ecx, %edi | |
303 | #else | |
904057bc | 304 | cmpb %cl, 2(%edx) |
6abf3465 | 305 | #endif |
904057bc L |
306 | jne L(neq) |
307 | cmpl $0, %ecx | |
308 | je L(eq) | |
309 | ||
6abf3465 UD |
310 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
311 | cmp $3, REM | |
904057bc L |
312 | je L(eq) |
313 | #endif | |
314 | movzbl 3(%eax), %ecx | |
6abf3465 UD |
315 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
316 | movzbl 3(%edx), %edi | |
317 | # ifdef PIC | |
318 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
319 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
320 | # else | |
321 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
322 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
323 | # endif | |
324 | cmpl %ecx, %edi | |
325 | #else | |
904057bc | 326 | cmpb %cl, 3(%edx) |
6abf3465 | 327 | #endif |
904057bc L |
328 | jne L(neq) |
329 | cmpl $0, %ecx | |
330 | je L(eq) | |
331 | ||
6abf3465 UD |
332 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
333 | cmp $4, REM | |
904057bc L |
334 | je L(eq) |
335 | #endif | |
336 | movzbl 4(%eax), %ecx | |
6abf3465 UD |
337 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
338 | movzbl 4(%edx), %edi | |
339 | # ifdef PIC | |
340 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
341 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
342 | # else | |
343 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
344 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
345 | # endif | |
346 | cmpl %ecx, %edi | |
347 | #else | |
904057bc | 348 | cmpb %cl, 4(%edx) |
6abf3465 | 349 | #endif |
904057bc L |
350 | jne L(neq) |
351 | cmpl $0, %ecx | |
352 | je L(eq) | |
353 | ||
6abf3465 UD |
354 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
355 | cmp $5, REM | |
904057bc L |
356 | je L(eq) |
357 | #endif | |
358 | movzbl 5(%eax), %ecx | |
6abf3465 UD |
359 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
360 | movzbl 5(%edx), %edi | |
361 | # ifdef PIC | |
362 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
363 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
364 | # else | |
365 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
366 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
367 | # endif | |
368 | cmpl %ecx, %edi | |
369 | #else | |
904057bc | 370 | cmpb %cl, 5(%edx) |
6abf3465 | 371 | #endif |
904057bc L |
372 | jne L(neq) |
373 | cmpl $0, %ecx | |
374 | je L(eq) | |
375 | ||
6abf3465 UD |
376 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
377 | cmp $6, REM | |
904057bc L |
378 | je L(eq) |
379 | #endif | |
380 | movzbl 6(%eax), %ecx | |
6abf3465 UD |
381 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
382 | movzbl 6(%edx), %edi | |
383 | # ifdef PIC | |
384 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
385 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
386 | # else | |
387 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
388 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
389 | # endif | |
390 | cmpl %ecx, %edi | |
391 | #else | |
904057bc | 392 | cmpb %cl, 6(%edx) |
6abf3465 | 393 | #endif |
904057bc L |
394 | jne L(neq) |
395 | cmpl $0, %ecx | |
396 | je L(eq) | |
397 | ||
6abf3465 UD |
398 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
399 | cmp $7, REM | |
904057bc L |
400 | je L(eq) |
401 | #endif | |
402 | movzbl 7(%eax), %ecx | |
6abf3465 UD |
403 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
404 | movzbl 7(%edx), %edi | |
405 | # ifdef PIC | |
406 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
407 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
408 | # else | |
409 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
410 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
411 | # endif | |
412 | cmpl %ecx, %edi | |
413 | #else | |
904057bc | 414 | cmpb %cl, 7(%edx) |
6abf3465 | 415 | #endif |
904057bc L |
416 | jne L(neq) |
417 | cmpl $0, %ecx | |
418 | je L(eq) | |
419 | ||
6abf3465 UD |
420 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
421 | sub $8, REM | |
904057bc L |
422 | je L(eq) |
423 | #endif | |
424 | add $8, %eax | |
425 | add $8, %edx | |
426 | ||
6abf3465 | 427 | #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |
904057bc | 428 | PUSH (%edi) |
6abf3465 | 429 | #endif |
904057bc | 430 | PUSH (%esi) |
6abf3465 | 431 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
6bb74d9f | 432 | cfi_remember_state |
dc21aae6 | 433 | #endif |
904057bc L |
434 | mov %edx, %edi |
435 | mov %eax, %esi | |
436 | xorl %eax, %eax | |
437 | L(check_offset): | |
6abf3465 | 438 | movl %edi, %edx |
904057bc | 439 | movl %esi, %ecx |
6abf3465 | 440 | andl $0xfff, %edx |
904057bc | 441 | andl $0xfff, %ecx |
6abf3465 UD |
442 | cmpl %edx, %ecx |
443 | cmovl %edx, %ecx | |
904057bc L |
444 | lea -0xff0(%ecx), %edx |
445 | sub %edx, %edi | |
446 | sub %edx, %esi | |
447 | testl %edx, %edx | |
448 | jg L(crosspage) | |
449 | L(loop): | |
450 | movdqu (%esi,%edx), %xmm2 | |
451 | movdqu (%edi,%edx), %xmm1 | |
6abf3465 | 452 | TOLOWER (%xmm2, %xmm1) |
904057bc L |
453 | pcmpistri $0x1a, %xmm2, %xmm1 |
454 | jbe L(end) | |
455 | ||
6abf3465 UD |
456 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
457 | sub $16, REM | |
904057bc L |
458 | jbe L(more16byteseq) |
459 | #endif | |
460 | ||
461 | add $16, %edx | |
462 | jle L(loop) | |
463 | L(crosspage): | |
464 | movzbl (%edi,%edx), %eax | |
6abf3465 UD |
465 | movzbl (%esi,%edx), %ecx |
466 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
467 | # ifdef PIC | |
468 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax | |
469 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
470 | # else | |
471 | movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax | |
472 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
473 | # endif | |
474 | #endif | |
475 | subl %ecx, %eax | |
904057bc | 476 | jne L(ret) |
6abf3465 | 477 | testl %ecx, %ecx |
904057bc | 478 | je L(ret) |
6abf3465 UD |
479 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
480 | sub $1, REM | |
904057bc L |
481 | jbe L(more16byteseq) |
482 | #endif | |
483 | inc %edx | |
484 | cmp $15, %edx | |
485 | jle L(crosspage) | |
6cc2b8a6 L |
486 | add %edx, %edi |
487 | add %edx, %esi | |
904057bc L |
488 | jmp L(check_offset) |
489 | ||
28be6098 | 490 | .p2align 4 |
904057bc L |
491 | L(end): |
492 | jnc L(ret) | |
6abf3465 UD |
493 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
494 | sub %ecx, REM | |
904057bc L |
495 | jbe L(more16byteseq) |
496 | #endif | |
6abf3465 UD |
497 | lea (%ecx,%edx), %ecx |
498 | movzbl (%edi,%ecx), %eax | |
499 | movzbl (%esi,%ecx), %ecx | |
500 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
501 | # ifdef PIC | |
502 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax | |
503 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
504 | # else | |
505 | movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax | |
506 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
507 | # endif | |
508 | #endif | |
904057bc L |
509 | subl %ecx, %eax |
510 | L(ret): | |
511 | POP (%esi) | |
512 | POP (%edi) | |
6abf3465 UD |
513 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
514 | POP (REM) | |
515 | #endif | |
516 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
517 | # ifdef PIC | |
904057bc | 518 | POP (%ebx) |
6abf3465 | 519 | # endif |
904057bc L |
520 | #endif |
521 | ret | |
522 | ||
28be6098 | 523 | .p2align 4 |
6abf3465 | 524 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
dc21aae6 | 525 | cfi_restore_state |
904057bc L |
526 | L(more16byteseq): |
527 | POP (%esi) | |
6abf3465 | 528 | # ifdef USE_AS_STRNCMP |
904057bc | 529 | POP (%edi) |
6abf3465 | 530 | # endif |
904057bc L |
531 | #endif |
532 | L(eq): | |
533 | xorl %eax, %eax | |
28be6098 | 534 | RETURN |
6bb74d9f | 535 | |
904057bc L |
536 | L(neq): |
537 | mov $1, %eax | |
538 | ja L(neq_bigger) | |
539 | neg %eax | |
540 | L(neq_bigger): | |
28be6098 L |
541 | RETURN |
542 | ||
904057bc L |
543 | L(less16bytes): |
544 | add $0xfefefeff, %ecx | |
545 | jnc L(less4bytes) | |
6abf3465 UD |
546 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
547 | movd %xmm3, %edi | |
548 | xor %edi, %ecx | |
549 | #else | |
904057bc | 550 | xor (%edx), %ecx |
6abf3465 | 551 | #endif |
904057bc L |
552 | or $0xfefefeff, %ecx |
553 | add $1, %ecx | |
554 | jnz L(less4bytes) | |
555 | ||
6abf3465 UD |
556 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
557 | cmp $4, REM | |
904057bc L |
558 | jbe L(eq) |
559 | #endif | |
6abf3465 UD |
560 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
561 | psrldq $4, %xmm3 | |
562 | psrldq $4, %xmm4 | |
563 | movd %xmm3, %ecx | |
564 | movd %xmm4, %edi | |
565 | cmp %edi, %ecx | |
566 | mov %ecx, %edi | |
567 | #else | |
904057bc L |
568 | mov 4(%edx), %ecx |
569 | cmp 4(%eax), %ecx | |
6abf3465 | 570 | #endif |
904057bc L |
571 | jne L(more4bytes) |
572 | add $0xfefefeff, %ecx | |
573 | jnc L(more4bytes) | |
6abf3465 UD |
574 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
575 | xor %edi, %ecx | |
576 | #else | |
904057bc | 577 | xor 4(%edx), %ecx |
6abf3465 | 578 | #endif |
904057bc L |
579 | or $0xfefefeff, %ecx |
580 | add $1, %ecx | |
581 | jnz L(more4bytes) | |
582 | ||
6abf3465 UD |
583 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
584 | sub $8, REM | |
904057bc L |
585 | jbe L(eq) |
586 | #endif | |
587 | ||
588 | add $8, %edx | |
589 | add $8, %eax | |
590 | L(less4bytes): | |
591 | ||
592 | movzbl (%eax), %ecx | |
6abf3465 UD |
593 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
594 | movzbl (%edx), %edi | |
595 | # ifdef PIC | |
596 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
597 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
598 | # else | |
599 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
600 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
601 | # endif | |
602 | cmpl %ecx, %edi | |
603 | #else | |
904057bc | 604 | cmpb %cl, (%edx) |
6abf3465 | 605 | #endif |
904057bc L |
606 | jne L(neq) |
607 | cmpl $0, %ecx | |
608 | je L(eq) | |
609 | ||
6abf3465 UD |
610 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
611 | cmp $1, REM | |
904057bc L |
612 | je L(eq) |
613 | #endif | |
614 | movzbl 1(%eax), %ecx | |
6abf3465 UD |
615 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
616 | movzbl 1(%edx), %edi | |
617 | # ifdef PIC | |
618 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
619 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
620 | # else | |
621 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
622 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
623 | # endif | |
624 | cmpl %ecx, %edi | |
625 | #else | |
904057bc | 626 | cmpb %cl, 1(%edx) |
6abf3465 | 627 | #endif |
904057bc L |
628 | jne L(neq) |
629 | cmpl $0, %ecx | |
630 | je L(eq) | |
631 | ||
6abf3465 UD |
632 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
633 | cmp $2, REM | |
904057bc L |
634 | je L(eq) |
635 | #endif | |
636 | ||
637 | movzbl 2(%eax), %ecx | |
6abf3465 UD |
638 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
639 | movzbl 2(%edx), %edi | |
640 | # ifdef PIC | |
641 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
642 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
643 | # else | |
644 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
645 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
646 | # endif | |
647 | cmpl %ecx, %edi | |
648 | #else | |
904057bc | 649 | cmpb %cl, 2(%edx) |
6abf3465 | 650 | #endif |
904057bc L |
651 | jne L(neq) |
652 | cmpl $0, %ecx | |
653 | je L(eq) | |
654 | ||
6abf3465 UD |
655 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
656 | cmp $3, REM | |
904057bc L |
657 | je L(eq) |
658 | #endif | |
659 | movzbl 3(%eax), %ecx | |
6abf3465 UD |
660 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
661 | movzbl 3(%edx), %edi | |
662 | # ifdef PIC | |
663 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
664 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
665 | # else | |
666 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
667 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
668 | # endif | |
669 | cmpl %ecx, %edi | |
670 | #else | |
904057bc | 671 | cmpb %cl, 3(%edx) |
6abf3465 | 672 | #endif |
904057bc L |
673 | jne L(neq) |
674 | cmpl $0, %ecx | |
675 | je L(eq) | |
676 | ||
677 | L(more4bytes): | |
6abf3465 UD |
678 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
679 | cmp $4, REM | |
904057bc L |
680 | je L(eq) |
681 | #endif | |
682 | movzbl 4(%eax), %ecx | |
6abf3465 UD |
683 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
684 | movzbl 4(%edx), %edi | |
685 | # ifdef PIC | |
686 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
687 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
688 | # else | |
689 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
690 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
691 | # endif | |
692 | cmpl %ecx, %edi | |
693 | #else | |
904057bc | 694 | cmpb %cl, 4(%edx) |
6abf3465 | 695 | #endif |
904057bc L |
696 | jne L(neq) |
697 | cmpl $0, %ecx | |
698 | je L(eq) | |
699 | ||
700 | ||
6abf3465 UD |
701 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
702 | cmp $5, REM | |
904057bc L |
703 | je L(eq) |
704 | #endif | |
705 | movzbl 5(%eax), %ecx | |
6abf3465 UD |
706 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
707 | movzbl 5(%edx), %edi | |
708 | # ifdef PIC | |
709 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
710 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
711 | # else | |
712 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
713 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
714 | # endif | |
715 | cmpl %ecx, %edi | |
716 | #else | |
904057bc | 717 | cmpb %cl, 5(%edx) |
6abf3465 | 718 | #endif |
904057bc L |
719 | jne L(neq) |
720 | cmpl $0, %ecx | |
721 | je L(eq) | |
722 | ||
6abf3465 UD |
723 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
724 | cmp $6, REM | |
904057bc L |
725 | je L(eq) |
726 | #endif | |
727 | movzbl 6(%eax), %ecx | |
6abf3465 UD |
728 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
729 | movzbl 6(%edx), %edi | |
730 | # ifdef PIC | |
731 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
732 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
733 | # else | |
734 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
735 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
736 | # endif | |
737 | cmpl %ecx, %edi | |
738 | #else | |
904057bc | 739 | cmpb %cl, 6(%edx) |
6abf3465 | 740 | #endif |
904057bc L |
741 | jne L(neq) |
742 | cmpl $0, %ecx | |
743 | je L(eq) | |
744 | ||
6abf3465 UD |
745 | #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L |
746 | cmp $7, REM | |
904057bc L |
747 | je L(eq) |
748 | #endif | |
749 | movzbl 7(%eax), %ecx | |
6abf3465 UD |
750 | #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L |
751 | movzbl 7(%edx), %edi | |
752 | # ifdef PIC | |
753 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx | |
754 | movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi | |
755 | # else | |
756 | movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx | |
757 | movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi | |
758 | # endif | |
759 | cmpl %ecx, %edi | |
760 | #else | |
904057bc | 761 | cmpb %cl, 7(%edx) |
6abf3465 | 762 | #endif |
904057bc | 763 | jne L(neq) |
6bb74d9f | 764 | jmp L(eq) |
904057bc L |
765 | |
766 | END (STRCMP) | |
767 | ||
768 | #endif |