]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/wcscmp-sse2.S
Add x86-32 optimized wcscmp
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / wcscmp-sse2.S
1 /* wcscmp with SSE2
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #ifndef NOT_IN_libc
22
23 # include <sysdep.h>
24 # include "asm-syntax.h"
25
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
29
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
32 cfi_restore (REG)
33
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
36
37 # ifndef STRCMP
38 # define STRCMP __wcscmp_sse2
39 # endif
40
41 # define ENTRANCE PUSH(%esi); PUSH(%edi)
42 # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
43 # define PARMS 4
44 # define STR1 PARMS
45 # define STR2 STR1+4
46
47 .text
48 ENTRY (STRCMP)
49 /*
50 * This implementation uses SSE to compare up to 16 bytes at a time.
51 */
52 mov STR1(%esp), %edx
53 mov STR2(%esp), %eax
54
55 mov (%eax), %ecx
56 cmp %ecx, (%edx)
57 jne L(neq)
58 test %ecx, %ecx
59 jz L(eq)
60
61 mov 4(%eax), %ecx
62 cmp %ecx, 4(%edx)
63 jne L(neq)
64 test %ecx, %ecx
65 jz L(eq)
66
67 mov 8(%eax), %ecx
68 cmp %ecx, 8(%edx)
69 jne L(neq)
70 test %ecx, %ecx
71 jz L(eq)
72
73 mov 12(%eax), %ecx
74 cmp %ecx, 12(%edx)
75 jne L(neq)
76 test %ecx, %ecx
77 jz L(eq)
78
79 ENTRANCE
80 add $16, %eax
81 add $16, %edx
82
83 mov %eax, %esi
84 mov %edx, %edi
85 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
86 mov %al, %ch
87 mov %dl, %cl
88 and $63, %eax /* esi alignment in cache line */
89 and $63, %edx /* edi alignment in cache line */
90 and $15, %cl
91 jz L(continue_00)
92 cmp $16, %edx
93 jb L(continue_0)
94 cmp $32, %edx
95 jb L(continue_16)
96 cmp $48, %edx
97 jb L(continue_32)
98
99 L(continue_48):
100 and $15, %ch
101 jz L(continue_48_00)
102 cmp $16, %eax
103 jb L(continue_0_48)
104 cmp $32, %eax
105 jb L(continue_16_48)
106 cmp $48, %eax
107 jb L(continue_32_48)
108
109 .p2align 4
110 L(continue_48_48):
111 mov (%esi), %ecx
112 cmp %ecx, (%edi)
113 jne L(nequal)
114 test %ecx, %ecx
115 jz L(equal)
116
117 mov 4(%esi), %ecx
118 cmp %ecx, 4(%edi)
119 jne L(nequal)
120 test %ecx, %ecx
121 jz L(equal)
122
123 mov 8(%esi), %ecx
124 cmp %ecx, 8(%edi)
125 jne L(nequal)
126 test %ecx, %ecx
127 jz L(equal)
128
129 mov 12(%esi), %ecx
130 cmp %ecx, 12(%edi)
131 jne L(nequal)
132 test %ecx, %ecx
133 jz L(equal)
134
135 movdqu 16(%edi), %xmm1
136 movdqu 16(%esi), %xmm2
137 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
138 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
139 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
140 pmovmskb %xmm1, %edx
141 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
142 jnz L(less4_double_words_16)
143
144 movdqu 32(%edi), %xmm1
145 movdqu 32(%esi), %xmm2
146 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
147 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
148 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
149 pmovmskb %xmm1, %edx
150 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
151 jnz L(less4_double_words_32)
152
153 movdqu 48(%edi), %xmm1
154 movdqu 48(%esi), %xmm2
155 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
156 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
157 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
158 pmovmskb %xmm1, %edx
159 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
160 jnz L(less4_double_words_48)
161
162 add $64, %esi
163 add $64, %edi
164 jmp L(continue_48_48)
165
166 L(continue_0):
167 and $15, %ch
168 jz L(continue_0_00)
169 cmp $16, %eax
170 jb L(continue_0_0)
171 cmp $32, %eax
172 jb L(continue_0_16)
173 cmp $48, %eax
174 jb L(continue_0_32)
175
176 .p2align 4
177 L(continue_0_48):
178 mov (%esi), %ecx
179 cmp %ecx, (%edi)
180 jne L(nequal)
181 test %ecx, %ecx
182 jz L(equal)
183
184 mov 4(%esi), %ecx
185 cmp %ecx, 4(%edi)
186 jne L(nequal)
187 test %ecx, %ecx
188 jz L(equal)
189
190 mov 8(%esi), %ecx
191 cmp %ecx, 8(%edi)
192 jne L(nequal)
193 test %ecx, %ecx
194 jz L(equal)
195
196 mov 12(%esi), %ecx
197 cmp %ecx, 12(%edi)
198 jne L(nequal)
199 test %ecx, %ecx
200 jz L(equal)
201
202 movdqu 16(%edi), %xmm1
203 movdqu 16(%esi), %xmm2
204 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
205 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
206 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
207 pmovmskb %xmm1, %edx
208 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
209 jnz L(less4_double_words_16)
210
211 movdqu 32(%edi), %xmm1
212 movdqu 32(%esi), %xmm2
213 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
214 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
215 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
216 pmovmskb %xmm1, %edx
217 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
218 jnz L(less4_double_words_32)
219
220 mov 48(%esi), %ecx
221 cmp %ecx, 48(%edi)
222 jne L(nequal)
223 test %ecx, %ecx
224 jz L(equal)
225
226 mov 52(%esi), %ecx
227 cmp %ecx, 52(%edi)
228 jne L(nequal)
229 test %ecx, %ecx
230 jz L(equal)
231
232 mov 56(%esi), %ecx
233 cmp %ecx, 56(%edi)
234 jne L(nequal)
235 test %ecx, %ecx
236 jz L(equal)
237
238 mov 60(%esi), %ecx
239 cmp %ecx, 60(%edi)
240 jne L(nequal)
241 test %ecx, %ecx
242 jz L(equal)
243
244 add $64, %esi
245 add $64, %edi
246 jmp L(continue_0_48)
247
248 .p2align 4
249 L(continue_00):
250 and $15, %ch
251 jz L(continue_00_00)
252 cmp $16, %eax
253 jb L(continue_00_0)
254 cmp $32, %eax
255 jb L(continue_00_16)
256 cmp $48, %eax
257 jb L(continue_00_32)
258
259 .p2align 4
260 L(continue_00_48):
261 pcmpeqd (%edi), %xmm0
262 mov (%edi), %eax
263 pmovmskb %xmm0, %ecx
264 test %ecx, %ecx
265 jnz L(less4_double_words1)
266
267 sub (%esi), %eax
268 jnz L(return)
269
270 mov 4(%edi), %eax
271 sub 4(%esi), %eax
272 jnz L(return)
273
274 mov 8(%edi), %eax
275 sub 8(%esi), %eax
276 jnz L(return)
277
278 mov 12(%edi), %eax
279 sub 12(%esi), %eax
280 jnz L(return)
281
282 movdqu 16(%esi), %xmm2
283 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
284 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
285 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
286 pmovmskb %xmm2, %edx
287 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
288 jnz L(less4_double_words_16)
289
290 movdqu 32(%esi), %xmm2
291 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
292 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
293 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
294 pmovmskb %xmm2, %edx
295 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
296 jnz L(less4_double_words_32)
297
298 movdqu 48(%esi), %xmm2
299 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
300 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
301 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
302 pmovmskb %xmm2, %edx
303 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
304 jnz L(less4_double_words_48)
305
306 add $64, %esi
307 add $64, %edi
308 jmp L(continue_00_48)
309
310 .p2align 4
311 L(continue_32):
312 and $15, %ch
313 jz L(continue_32_00)
314 cmp $16, %eax
315 jb L(continue_0_32)
316 cmp $32, %eax
317 jb L(continue_16_32)
318 cmp $48, %eax
319 jb L(continue_32_32)
320
321 .p2align 4
322 L(continue_32_48):
323 mov (%esi), %ecx
324 cmp %ecx, (%edi)
325 jne L(nequal)
326 test %ecx, %ecx
327 jz L(equal)
328
329 mov 4(%esi), %ecx
330 cmp %ecx, 4(%edi)
331 jne L(nequal)
332 test %ecx, %ecx
333 jz L(equal)
334
335 mov 8(%esi), %ecx
336 cmp %ecx, 8(%edi)
337 jne L(nequal)
338 test %ecx, %ecx
339 jz L(equal)
340
341 mov 12(%esi), %ecx
342 cmp %ecx, 12(%edi)
343 jne L(nequal)
344 test %ecx, %ecx
345 jz L(equal)
346
347 mov 16(%esi), %ecx
348 cmp %ecx, 16(%edi)
349 jne L(nequal)
350 test %ecx, %ecx
351 jz L(equal)
352
353 mov 20(%esi), %ecx
354 cmp %ecx, 20(%edi)
355 jne L(nequal)
356 test %ecx, %ecx
357 jz L(equal)
358
359 mov 24(%esi), %ecx
360 cmp %ecx, 24(%edi)
361 jne L(nequal)
362 test %ecx, %ecx
363 jz L(equal)
364
365 mov 28(%esi), %ecx
366 cmp %ecx, 28(%edi)
367 jne L(nequal)
368 test %ecx, %ecx
369 jz L(equal)
370
371 movdqu 32(%edi), %xmm1
372 movdqu 32(%esi), %xmm2
373 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
374 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
375 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
376 pmovmskb %xmm1, %edx
377 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
378 jnz L(less4_double_words_32)
379
380 movdqu 48(%edi), %xmm1
381 movdqu 48(%esi), %xmm2
382 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
383 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
384 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
385 pmovmskb %xmm1, %edx
386 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
387 jnz L(less4_double_words_48)
388
389 add $64, %esi
390 add $64, %edi
391 jmp L(continue_32_48)
392
393 .p2align 4
394 L(continue_16):
395 and $15, %ch
396 jz L(continue_16_00)
397 cmp $16, %eax
398 jb L(continue_0_16)
399 cmp $32, %eax
400 jb L(continue_16_16)
401 cmp $48, %eax
402 jb L(continue_16_32)
403
404 .p2align 4
405 L(continue_16_48):
406 mov (%esi), %ecx
407 cmp %ecx, (%edi)
408 jne L(nequal)
409 test %ecx, %ecx
410 jz L(equal)
411
412 mov 4(%esi), %ecx
413 cmp %ecx, 4(%edi)
414 jne L(nequal)
415 test %ecx, %ecx
416 jz L(equal)
417
418 mov 8(%esi), %ecx
419 cmp %ecx, 8(%edi)
420 jne L(nequal)
421 test %ecx, %ecx
422 jz L(equal)
423
424 mov 12(%esi), %ecx
425 cmp %ecx, 12(%edi)
426 jne L(nequal)
427 test %ecx, %ecx
428 jz L(equal)
429
430 movdqu 16(%edi), %xmm1
431 movdqu 16(%esi), %xmm2
432 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
433 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
434 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
435 pmovmskb %xmm1, %edx
436 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
437 jnz L(less4_double_words_16)
438
439 mov 32(%esi), %ecx
440 cmp %ecx, 32(%edi)
441 jne L(nequal)
442 test %ecx, %ecx
443 jz L(equal)
444
445 mov 36(%esi), %ecx
446 cmp %ecx, 36(%edi)
447 jne L(nequal)
448 test %ecx, %ecx
449 jz L(equal)
450
451 mov 40(%esi), %ecx
452 cmp %ecx, 40(%edi)
453 jne L(nequal)
454 test %ecx, %ecx
455 jz L(equal)
456
457 mov 44(%esi), %ecx
458 cmp %ecx, 44(%edi)
459 jne L(nequal)
460 test %ecx, %ecx
461 jz L(equal)
462
463 movdqu 48(%edi), %xmm1
464 movdqu 48(%esi), %xmm2
465 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
466 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
467 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
468 pmovmskb %xmm1, %edx
469 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
470 jnz L(less4_double_words_48)
471
472 add $64, %esi
473 add $64, %edi
474 jmp L(continue_16_48)
475
476 .p2align 4
477 L(continue_00_00):
478 movdqa (%edi), %xmm1
479 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
480 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
481 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
482 pmovmskb %xmm1, %edx
483 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
484 jnz L(less4_double_words)
485
486 movdqa 16(%edi), %xmm3
487 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
488 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
489 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
490 pmovmskb %xmm3, %edx
491 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
492 jnz L(less4_double_words_16)
493
494 movdqa 32(%edi), %xmm5
495 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
496 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
497 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
498 pmovmskb %xmm5, %edx
499 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
500 jnz L(less4_double_words_32)
501
502 movdqa 48(%edi), %xmm1
503 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
504 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
505 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
506 pmovmskb %xmm1, %edx
507 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
508 jnz L(less4_double_words_48)
509
510 add $64, %esi
511 add $64, %edi
512 jmp L(continue_00_00)
513
514 .p2align 4
515 L(continue_00_32):
516 movdqu (%esi), %xmm2
517 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
518 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
519 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
520 pmovmskb %xmm2, %edx
521 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
522 jnz L(less4_double_words)
523
524 add $16, %esi
525 add $16, %edi
526 jmp L(continue_00_48)
527
528 .p2align 4
529 L(continue_00_16):
530 movdqu (%esi), %xmm2
531 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
532 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
533 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
534 pmovmskb %xmm2, %edx
535 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
536 jnz L(less4_double_words)
537
538 movdqu 16(%esi), %xmm2
539 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
540 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
541 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
542 pmovmskb %xmm2, %edx
543 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
544 jnz L(less4_double_words_16)
545
546 add $32, %esi
547 add $32, %edi
548 jmp L(continue_00_48)
549
550 .p2align 4
551 L(continue_00_0):
552 movdqu (%esi), %xmm2
553 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
554 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
555 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
556 pmovmskb %xmm2, %edx
557 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
558 jnz L(less4_double_words)
559
560 movdqu 16(%esi), %xmm2
561 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
562 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
563 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
564 pmovmskb %xmm2, %edx
565 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
566 jnz L(less4_double_words_16)
567
568 movdqu 32(%esi), %xmm2
569 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
570 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
571 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
572 pmovmskb %xmm2, %edx
573 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
574 jnz L(less4_double_words_32)
575
576 add $48, %esi
577 add $48, %edi
578 jmp L(continue_00_48)
579
580 .p2align 4
581 L(continue_48_00):
582 pcmpeqd (%esi), %xmm0
583 mov (%edi), %eax
584 pmovmskb %xmm0, %ecx
585 test %ecx, %ecx
586 jnz L(less4_double_words1)
587
588 sub (%esi), %eax
589 jnz L(return)
590
591 mov 4(%edi), %eax
592 sub 4(%esi), %eax
593 jnz L(return)
594
595 mov 8(%edi), %eax
596 sub 8(%esi), %eax
597 jnz L(return)
598
599 mov 12(%edi), %eax
600 sub 12(%esi), %eax
601 jnz L(return)
602
603 movdqu 16(%edi), %xmm1
604 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
605 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
606 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
607 pmovmskb %xmm1, %edx
608 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
609 jnz L(less4_double_words_16)
610
611 movdqu 32(%edi), %xmm1
612 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
613 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
614 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
615 pmovmskb %xmm1, %edx
616 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
617 jnz L(less4_double_words_32)
618
619 movdqu 48(%edi), %xmm1
620 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
621 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
622 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
623 pmovmskb %xmm1, %edx
624 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
625 jnz L(less4_double_words_48)
626
627 add $64, %esi
628 add $64, %edi
629 jmp L(continue_48_00)
630
631 .p2align 4
632 L(continue_32_00):
633 movdqu (%edi), %xmm1
634 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
635 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
636 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
637 pmovmskb %xmm1, %edx
638 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
639 jnz L(less4_double_words)
640
641 add $16, %esi
642 add $16, %edi
643 jmp L(continue_48_00)
644
645 .p2align 4
646 L(continue_16_00):
647 movdqu (%edi), %xmm1
648 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
649 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
650 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
651 pmovmskb %xmm1, %edx
652 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
653 jnz L(less4_double_words)
654
655 movdqu 16(%edi), %xmm1
656 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
657 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
658 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
659 pmovmskb %xmm1, %edx
660 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
661 jnz L(less4_double_words_16)
662
663 add $32, %esi
664 add $32, %edi
665 jmp L(continue_48_00)
666
667 .p2align 4
668 L(continue_0_00):
669 movdqu (%edi), %xmm1
670 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
671 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
672 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
673 pmovmskb %xmm1, %edx
674 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
675 jnz L(less4_double_words)
676
677 movdqu 16(%edi), %xmm1
678 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
679 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
680 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
681 pmovmskb %xmm1, %edx
682 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
683 jnz L(less4_double_words_16)
684
685 movdqu 32(%edi), %xmm1
686 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
687 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
688 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
689 pmovmskb %xmm1, %edx
690 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
691 jnz L(less4_double_words_32)
692
693 add $48, %esi
694 add $48, %edi
695 jmp L(continue_48_00)
696
697 .p2align 4
698 L(continue_32_32):
699 movdqu (%edi), %xmm1
700 movdqu (%esi), %xmm2
701 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
702 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
703 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
704 pmovmskb %xmm1, %edx
705 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
706 jnz L(less4_double_words)
707
708 add $16, %esi
709 add $16, %edi
710 jmp L(continue_48_48)
711
712 .p2align 4
713 L(continue_16_16):
714 movdqu (%edi), %xmm1
715 movdqu (%esi), %xmm2
716 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
717 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
718 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
719 pmovmskb %xmm1, %edx
720 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
721 jnz L(less4_double_words)
722
723 movdqu 16(%edi), %xmm3
724 movdqu 16(%esi), %xmm4
725 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
726 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
727 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
728 pmovmskb %xmm3, %edx
729 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
730 jnz L(less4_double_words_16)
731
732 add $32, %esi
733 add $32, %edi
734 jmp L(continue_48_48)
735
736 .p2align 4
737 L(continue_0_0):
738 movdqu (%edi), %xmm1
739 movdqu (%esi), %xmm2
740 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
741 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
742 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
743 pmovmskb %xmm1, %edx
744 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
745 jnz L(less4_double_words)
746
747 movdqu 16(%edi), %xmm3
748 movdqu 16(%esi), %xmm4
749 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
750 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
751 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
752 pmovmskb %xmm3, %edx
753 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
754 jnz L(less4_double_words_16)
755
756 movdqu 32(%edi), %xmm1
757 movdqu 32(%esi), %xmm2
758 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
759 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
760 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
761 pmovmskb %xmm1, %edx
762 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
763 jnz L(less4_double_words_32)
764
765 add $48, %esi
766 add $48, %edi
767 jmp L(continue_48_48)
768
769 .p2align 4
770 L(continue_0_16):
771 movdqu (%edi), %xmm1
772 movdqu (%esi), %xmm2
773 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
774 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
775 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
776 pmovmskb %xmm1, %edx
777 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
778 jnz L(less4_double_words)
779
780 movdqu 16(%edi), %xmm1
781 movdqu 16(%esi), %xmm2
782 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
783 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
784 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
785 pmovmskb %xmm1, %edx
786 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
787 jnz L(less4_double_words_16)
788
789 add $32, %esi
790 add $32, %edi
791 jmp L(continue_32_48)
792
793 .p2align 4
794 L(continue_0_32):
795 movdqu (%edi), %xmm1
796 movdqu (%esi), %xmm2
797 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
798 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
799 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
800 pmovmskb %xmm1, %edx
801 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
802 jnz L(less4_double_words)
803
804 add $16, %esi
805 add $16, %edi
806 jmp L(continue_16_48)
807
808 .p2align 4
809 L(continue_16_32):
810 movdqu (%edi), %xmm1
811 movdqu (%esi), %xmm2
812 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
813 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
814 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
815 pmovmskb %xmm1, %edx
816 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
817 jnz L(less4_double_words)
818
819 add $16, %esi
820 add $16, %edi
821 jmp L(continue_32_48)
822
823 .p2align 4
824 L(less4_double_words1):
825 cmp (%esi), %eax
826 jne L(nequal)
827 test %eax, %eax
828 jz L(equal)
829
830 mov 4(%esi), %ecx
831 cmp %ecx, 4(%edi)
832 jne L(nequal)
833 test %ecx, %ecx
834 jz L(equal)
835
836 mov 8(%esi), %ecx
837 cmp %ecx, 8(%edi)
838 jne L(nequal)
839 test %ecx, %ecx
840 jz L(equal)
841
842 mov 12(%esi), %edx
843 mov 12(%edi), %eax
844 sub %edx, %eax
845 RETURN
846
847 .p2align 4
848 L(less4_double_words):
849 test %dl, %dl
850 jz L(next_two_double_words)
851 and $15, %dl
852 jz L(second_double_word)
853 mov (%edi), %eax
854 sub (%esi), %eax
855 RETURN
856
857 .p2align 4
858 L(second_double_word):
859 mov 4(%edi), %eax
860 sub 4(%esi), %eax
861 RETURN
862
863 .p2align 4
864 L(next_two_double_words):
865 and $15, %dh
866 jz L(fourth_double_word)
867 mov 8(%edi), %eax
868 sub 8(%esi), %eax
869 RETURN
870
871 .p2align 4
872 L(fourth_double_word):
873 mov 12(%edi), %eax
874 sub 12(%esi), %eax
875 RETURN
876
877 .p2align 4
878 L(less4_double_words_16):
879 test %dl, %dl
880 jz L(next_two_double_words_16)
881 and $15, %dl
882 jz L(second_double_word_16)
883 mov 16(%edi), %eax
884 sub 16(%esi), %eax
885 RETURN
886
887 .p2align 4
888 L(second_double_word_16):
889 mov 20(%edi), %eax
890 sub 20(%esi), %eax
891 RETURN
892
893 .p2align 4
894 L(next_two_double_words_16):
895 and $15, %dh
896 jz L(fourth_double_word_16)
897 mov 24(%edi), %eax
898 sub 24(%esi), %eax
899 RETURN
900
901 .p2align 4
902 L(fourth_double_word_16):
903 mov 28(%edi), %eax
904 sub 28(%esi), %eax
905 RETURN
906
907 .p2align 4
908 L(less4_double_words_32):
909 test %dl, %dl
910 jz L(next_two_double_words_32)
911 and $15, %dl
912 jz L(second_double_word_32)
913 mov 32(%edi), %eax
914 sub 32(%esi), %eax
915 RETURN
916
917 .p2align 4
918 L(second_double_word_32):
919 mov 36(%edi), %eax
920 sub 36(%esi), %eax
921 RETURN
922
923 .p2align 4
924 L(next_two_double_words_32):
925 and $15, %dh
926 jz L(fourth_double_word_32)
927 mov 40(%edi), %eax
928 sub 40(%esi), %eax
929 RETURN
930
931 .p2align 4
932 L(fourth_double_word_32):
933 mov 44(%edi), %eax
934 sub 44(%esi), %eax
935 RETURN
936
937 .p2align 4
938 L(less4_double_words_48):
939 test %dl, %dl
940 jz L(next_two_double_words_48)
941 and $15, %dl
942 jz L(second_double_word_48)
943 mov 48(%edi), %eax
944 sub 48(%esi), %eax
945 RETURN
946
947 .p2align 4
948 L(second_double_word_48):
949 mov 52(%edi), %eax
950 sub 52(%esi), %eax
951 RETURN
952
953 .p2align 4
954 L(next_two_double_words_48):
955 and $15, %dh
956 jz L(fourth_double_word_48)
957 mov 56(%edi), %eax
958 sub 56(%esi), %eax
959 RETURN
960
961 .p2align 4
962 L(fourth_double_word_48):
963 mov 60(%edi), %eax
964 sub 60(%esi), %eax
965 RETURN
966
967 .p2align 4
968 L(return):
969 RETURN
970
971 .p2align 4
972 L(nequal):
973 mov $1, %eax
974 ja L(nequal_bigger)
975 neg %eax
976
977 L(nequal_bigger):
978 RETURN
979
980 .p2align 4
981 L(equal):
982 xorl %eax, %eax
983 RETURN
984
985 CFI_POP (%edi)
986 CFI_POP (%esi)
987
988 .p2align 4
989 L(neq):
990 mov $1, %eax
991 ja L(neq_bigger)
992 neg %eax
993
994 L(neq_bigger):
995 ret
996
997 .p2align 4
998 L(eq):
999 xorl %eax, %eax
1000 ret
1001
1002 END (STRCMP)
1003 #endif
1004