]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/wcscmp-sse2.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / wcscmp-sse2.S
1 /* wcscmp with SSE2
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #ifndef NOT_IN_libc
21
22 # include <sysdep.h>
23
24 # define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28 # define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
33 # define POP(REG) popl REG; CFI_POP (REG)
34
35 # define ENTRANCE PUSH(%esi); PUSH(%edi)
36 # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
37 # define PARMS 4
38 # define STR1 PARMS
39 # define STR2 STR1+4
40
41 /* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
42
43 .text
44 ENTRY (__wcscmp_sse2)
45 /*
46 * This implementation uses SSE to compare up to 16 bytes at a time.
47 */
48 mov STR1(%esp), %edx
49 mov STR2(%esp), %eax
50
51 mov (%eax), %ecx
52 cmp %ecx, (%edx)
53 jne L(neq)
54 test %ecx, %ecx
55 jz L(eq)
56
57 mov 4(%eax), %ecx
58 cmp %ecx, 4(%edx)
59 jne L(neq)
60 test %ecx, %ecx
61 jz L(eq)
62
63 mov 8(%eax), %ecx
64 cmp %ecx, 8(%edx)
65 jne L(neq)
66 test %ecx, %ecx
67 jz L(eq)
68
69 mov 12(%eax), %ecx
70 cmp %ecx, 12(%edx)
71 jne L(neq)
72 test %ecx, %ecx
73 jz L(eq)
74
75 ENTRANCE
76 add $16, %eax
77 add $16, %edx
78
79 mov %eax, %esi
80 mov %edx, %edi
81 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
82 mov %al, %ch
83 mov %dl, %cl
84 and $63, %eax /* esi alignment in cache line */
85 and $63, %edx /* edi alignment in cache line */
86 and $15, %cl
87 jz L(continue_00)
88 cmp $16, %edx
89 jb L(continue_0)
90 cmp $32, %edx
91 jb L(continue_16)
92 cmp $48, %edx
93 jb L(continue_32)
94
95 L(continue_48):
96 and $15, %ch
97 jz L(continue_48_00)
98 cmp $16, %eax
99 jb L(continue_0_48)
100 cmp $32, %eax
101 jb L(continue_16_48)
102 cmp $48, %eax
103 jb L(continue_32_48)
104
105 .p2align 4
106 L(continue_48_48):
107 mov (%esi), %ecx
108 cmp %ecx, (%edi)
109 jne L(nequal)
110 test %ecx, %ecx
111 jz L(equal)
112
113 mov 4(%esi), %ecx
114 cmp %ecx, 4(%edi)
115 jne L(nequal)
116 test %ecx, %ecx
117 jz L(equal)
118
119 mov 8(%esi), %ecx
120 cmp %ecx, 8(%edi)
121 jne L(nequal)
122 test %ecx, %ecx
123 jz L(equal)
124
125 mov 12(%esi), %ecx
126 cmp %ecx, 12(%edi)
127 jne L(nequal)
128 test %ecx, %ecx
129 jz L(equal)
130
131 movdqu 16(%edi), %xmm1
132 movdqu 16(%esi), %xmm2
133 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
134 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
135 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
136 pmovmskb %xmm1, %edx
137 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
138 jnz L(less4_double_words_16)
139
140 movdqu 32(%edi), %xmm1
141 movdqu 32(%esi), %xmm2
142 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
143 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
144 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
145 pmovmskb %xmm1, %edx
146 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
147 jnz L(less4_double_words_32)
148
149 movdqu 48(%edi), %xmm1
150 movdqu 48(%esi), %xmm2
151 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
152 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
153 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
154 pmovmskb %xmm1, %edx
155 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
156 jnz L(less4_double_words_48)
157
158 add $64, %esi
159 add $64, %edi
160 jmp L(continue_48_48)
161
162 L(continue_0):
163 and $15, %ch
164 jz L(continue_0_00)
165 cmp $16, %eax
166 jb L(continue_0_0)
167 cmp $32, %eax
168 jb L(continue_0_16)
169 cmp $48, %eax
170 jb L(continue_0_32)
171
172 .p2align 4
173 L(continue_0_48):
174 mov (%esi), %ecx
175 cmp %ecx, (%edi)
176 jne L(nequal)
177 test %ecx, %ecx
178 jz L(equal)
179
180 mov 4(%esi), %ecx
181 cmp %ecx, 4(%edi)
182 jne L(nequal)
183 test %ecx, %ecx
184 jz L(equal)
185
186 mov 8(%esi), %ecx
187 cmp %ecx, 8(%edi)
188 jne L(nequal)
189 test %ecx, %ecx
190 jz L(equal)
191
192 mov 12(%esi), %ecx
193 cmp %ecx, 12(%edi)
194 jne L(nequal)
195 test %ecx, %ecx
196 jz L(equal)
197
198 movdqu 16(%edi), %xmm1
199 movdqu 16(%esi), %xmm2
200 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
201 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
202 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
203 pmovmskb %xmm1, %edx
204 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
205 jnz L(less4_double_words_16)
206
207 movdqu 32(%edi), %xmm1
208 movdqu 32(%esi), %xmm2
209 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
210 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
211 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
212 pmovmskb %xmm1, %edx
213 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
214 jnz L(less4_double_words_32)
215
216 mov 48(%esi), %ecx
217 cmp %ecx, 48(%edi)
218 jne L(nequal)
219 test %ecx, %ecx
220 jz L(equal)
221
222 mov 52(%esi), %ecx
223 cmp %ecx, 52(%edi)
224 jne L(nequal)
225 test %ecx, %ecx
226 jz L(equal)
227
228 mov 56(%esi), %ecx
229 cmp %ecx, 56(%edi)
230 jne L(nequal)
231 test %ecx, %ecx
232 jz L(equal)
233
234 mov 60(%esi), %ecx
235 cmp %ecx, 60(%edi)
236 jne L(nequal)
237 test %ecx, %ecx
238 jz L(equal)
239
240 add $64, %esi
241 add $64, %edi
242 jmp L(continue_0_48)
243
244 .p2align 4
245 L(continue_00):
246 and $15, %ch
247 jz L(continue_00_00)
248 cmp $16, %eax
249 jb L(continue_00_0)
250 cmp $32, %eax
251 jb L(continue_00_16)
252 cmp $48, %eax
253 jb L(continue_00_32)
254
255 .p2align 4
256 L(continue_00_48):
257 pcmpeqd (%edi), %xmm0
258 mov (%edi), %eax
259 pmovmskb %xmm0, %ecx
260 test %ecx, %ecx
261 jnz L(less4_double_words1)
262
263 cmp (%esi), %eax
264 jne L(nequal)
265
266 mov 4(%edi), %eax
267 cmp 4(%esi), %eax
268 jne L(nequal)
269
270 mov 8(%edi), %eax
271 cmp 8(%esi), %eax
272 jne L(nequal)
273
274 mov 12(%edi), %eax
275 cmp 12(%esi), %eax
276 jne L(nequal)
277
278 movdqu 16(%esi), %xmm2
279 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
280 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
281 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
282 pmovmskb %xmm2, %edx
283 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
284 jnz L(less4_double_words_16)
285
286 movdqu 32(%esi), %xmm2
287 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
288 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
289 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
290 pmovmskb %xmm2, %edx
291 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
292 jnz L(less4_double_words_32)
293
294 movdqu 48(%esi), %xmm2
295 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
296 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
297 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
298 pmovmskb %xmm2, %edx
299 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
300 jnz L(less4_double_words_48)
301
302 add $64, %esi
303 add $64, %edi
304 jmp L(continue_00_48)
305
306 .p2align 4
307 L(continue_32):
308 and $15, %ch
309 jz L(continue_32_00)
310 cmp $16, %eax
311 jb L(continue_0_32)
312 cmp $32, %eax
313 jb L(continue_16_32)
314 cmp $48, %eax
315 jb L(continue_32_32)
316
317 .p2align 4
318 L(continue_32_48):
319 mov (%esi), %ecx
320 cmp %ecx, (%edi)
321 jne L(nequal)
322 test %ecx, %ecx
323 jz L(equal)
324
325 mov 4(%esi), %ecx
326 cmp %ecx, 4(%edi)
327 jne L(nequal)
328 test %ecx, %ecx
329 jz L(equal)
330
331 mov 8(%esi), %ecx
332 cmp %ecx, 8(%edi)
333 jne L(nequal)
334 test %ecx, %ecx
335 jz L(equal)
336
337 mov 12(%esi), %ecx
338 cmp %ecx, 12(%edi)
339 jne L(nequal)
340 test %ecx, %ecx
341 jz L(equal)
342
343 mov 16(%esi), %ecx
344 cmp %ecx, 16(%edi)
345 jne L(nequal)
346 test %ecx, %ecx
347 jz L(equal)
348
349 mov 20(%esi), %ecx
350 cmp %ecx, 20(%edi)
351 jne L(nequal)
352 test %ecx, %ecx
353 jz L(equal)
354
355 mov 24(%esi), %ecx
356 cmp %ecx, 24(%edi)
357 jne L(nequal)
358 test %ecx, %ecx
359 jz L(equal)
360
361 mov 28(%esi), %ecx
362 cmp %ecx, 28(%edi)
363 jne L(nequal)
364 test %ecx, %ecx
365 jz L(equal)
366
367 movdqu 32(%edi), %xmm1
368 movdqu 32(%esi), %xmm2
369 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
370 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
371 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
372 pmovmskb %xmm1, %edx
373 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
374 jnz L(less4_double_words_32)
375
376 movdqu 48(%edi), %xmm1
377 movdqu 48(%esi), %xmm2
378 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
379 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
380 psubb %xmm0, %xmm1 /* packed sub of comparison results */
381 pmovmskb %xmm1, %edx
382 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
383 jnz L(less4_double_words_48)
384
385 add $64, %esi
386 add $64, %edi
387 jmp L(continue_32_48)
388
389 .p2align 4
390 L(continue_16):
391 and $15, %ch
392 jz L(continue_16_00)
393 cmp $16, %eax
394 jb L(continue_0_16)
395 cmp $32, %eax
396 jb L(continue_16_16)
397 cmp $48, %eax
398 jb L(continue_16_32)
399
400 .p2align 4
401 L(continue_16_48):
402 mov (%esi), %ecx
403 cmp %ecx, (%edi)
404 jne L(nequal)
405 test %ecx, %ecx
406 jz L(equal)
407
408 mov 4(%esi), %ecx
409 cmp %ecx, 4(%edi)
410 jne L(nequal)
411 test %ecx, %ecx
412 jz L(equal)
413
414 mov 8(%esi), %ecx
415 cmp %ecx, 8(%edi)
416 jne L(nequal)
417 test %ecx, %ecx
418 jz L(equal)
419
420 mov 12(%esi), %ecx
421 cmp %ecx, 12(%edi)
422 jne L(nequal)
423 test %ecx, %ecx
424 jz L(equal)
425
426 movdqu 16(%edi), %xmm1
427 movdqu 16(%esi), %xmm2
428 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
429 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
430 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
431 pmovmskb %xmm1, %edx
432 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
433 jnz L(less4_double_words_16)
434
435 mov 32(%esi), %ecx
436 cmp %ecx, 32(%edi)
437 jne L(nequal)
438 test %ecx, %ecx
439 jz L(equal)
440
441 mov 36(%esi), %ecx
442 cmp %ecx, 36(%edi)
443 jne L(nequal)
444 test %ecx, %ecx
445 jz L(equal)
446
447 mov 40(%esi), %ecx
448 cmp %ecx, 40(%edi)
449 jne L(nequal)
450 test %ecx, %ecx
451 jz L(equal)
452
453 mov 44(%esi), %ecx
454 cmp %ecx, 44(%edi)
455 jne L(nequal)
456 test %ecx, %ecx
457 jz L(equal)
458
459 movdqu 48(%edi), %xmm1
460 movdqu 48(%esi), %xmm2
461 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
462 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
463 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
464 pmovmskb %xmm1, %edx
465 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
466 jnz L(less4_double_words_48)
467
468 add $64, %esi
469 add $64, %edi
470 jmp L(continue_16_48)
471
472 .p2align 4
473 L(continue_00_00):
474 movdqa (%edi), %xmm1
475 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
476 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
477 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
478 pmovmskb %xmm1, %edx
479 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
480 jnz L(less4_double_words)
481
482 movdqa 16(%edi), %xmm3
483 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
484 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
485 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
486 pmovmskb %xmm3, %edx
487 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
488 jnz L(less4_double_words_16)
489
490 movdqa 32(%edi), %xmm5
491 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
492 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
493 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
494 pmovmskb %xmm5, %edx
495 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
496 jnz L(less4_double_words_32)
497
498 movdqa 48(%edi), %xmm1
499 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
500 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
501 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
502 pmovmskb %xmm1, %edx
503 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
504 jnz L(less4_double_words_48)
505
506 add $64, %esi
507 add $64, %edi
508 jmp L(continue_00_00)
509
510 .p2align 4
511 L(continue_00_32):
512 movdqu (%esi), %xmm2
513 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
514 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
515 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
516 pmovmskb %xmm2, %edx
517 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
518 jnz L(less4_double_words)
519
520 add $16, %esi
521 add $16, %edi
522 jmp L(continue_00_48)
523
524 .p2align 4
525 L(continue_00_16):
526 movdqu (%esi), %xmm2
527 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
528 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
529 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
530 pmovmskb %xmm2, %edx
531 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
532 jnz L(less4_double_words)
533
534 movdqu 16(%esi), %xmm2
535 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
536 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
537 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
538 pmovmskb %xmm2, %edx
539 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
540 jnz L(less4_double_words_16)
541
542 add $32, %esi
543 add $32, %edi
544 jmp L(continue_00_48)
545
546 .p2align 4
547 L(continue_00_0):
548 movdqu (%esi), %xmm2
549 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
550 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
551 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
552 pmovmskb %xmm2, %edx
553 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
554 jnz L(less4_double_words)
555
556 movdqu 16(%esi), %xmm2
557 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
558 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
559 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
560 pmovmskb %xmm2, %edx
561 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
562 jnz L(less4_double_words_16)
563
564 movdqu 32(%esi), %xmm2
565 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
566 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
567 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
568 pmovmskb %xmm2, %edx
569 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
570 jnz L(less4_double_words_32)
571
572 add $48, %esi
573 add $48, %edi
574 jmp L(continue_00_48)
575
576 .p2align 4
577 L(continue_48_00):
578 pcmpeqd (%esi), %xmm0
579 mov (%edi), %eax
580 pmovmskb %xmm0, %ecx
581 test %ecx, %ecx
582 jnz L(less4_double_words1)
583
584 cmp (%esi), %eax
585 jne L(nequal)
586
587 mov 4(%edi), %eax
588 cmp 4(%esi), %eax
589 jne L(nequal)
590
591 mov 8(%edi), %eax
592 cmp 8(%esi), %eax
593 jne L(nequal)
594
595 mov 12(%edi), %eax
596 cmp 12(%esi), %eax
597 jne L(nequal)
598
599 movdqu 16(%edi), %xmm1
600 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
601 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
602 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
603 pmovmskb %xmm1, %edx
604 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
605 jnz L(less4_double_words_16)
606
607 movdqu 32(%edi), %xmm1
608 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
609 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
610 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
611 pmovmskb %xmm1, %edx
612 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
613 jnz L(less4_double_words_32)
614
615 movdqu 48(%edi), %xmm1
616 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
617 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
618 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
619 pmovmskb %xmm1, %edx
620 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
621 jnz L(less4_double_words_48)
622
623 add $64, %esi
624 add $64, %edi
625 jmp L(continue_48_00)
626
627 .p2align 4
628 L(continue_32_00):
629 movdqu (%edi), %xmm1
630 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
631 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
632 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
633 pmovmskb %xmm1, %edx
634 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
635 jnz L(less4_double_words)
636
637 add $16, %esi
638 add $16, %edi
639 jmp L(continue_48_00)
640
641 .p2align 4
642 L(continue_16_00):
643 movdqu (%edi), %xmm1
644 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
645 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
646 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
647 pmovmskb %xmm1, %edx
648 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
649 jnz L(less4_double_words)
650
651 movdqu 16(%edi), %xmm1
652 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
653 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
654 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
655 pmovmskb %xmm1, %edx
656 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
657 jnz L(less4_double_words_16)
658
659 add $32, %esi
660 add $32, %edi
661 jmp L(continue_48_00)
662
663 .p2align 4
664 L(continue_0_00):
665 movdqu (%edi), %xmm1
666 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
667 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
668 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
669 pmovmskb %xmm1, %edx
670 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
671 jnz L(less4_double_words)
672
673 movdqu 16(%edi), %xmm1
674 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
675 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
676 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
677 pmovmskb %xmm1, %edx
678 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
679 jnz L(less4_double_words_16)
680
681 movdqu 32(%edi), %xmm1
682 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
683 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
684 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
685 pmovmskb %xmm1, %edx
686 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
687 jnz L(less4_double_words_32)
688
689 add $48, %esi
690 add $48, %edi
691 jmp L(continue_48_00)
692
693 .p2align 4
694 L(continue_32_32):
695 movdqu (%edi), %xmm1
696 movdqu (%esi), %xmm2
697 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
698 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
699 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
700 pmovmskb %xmm1, %edx
701 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
702 jnz L(less4_double_words)
703
704 add $16, %esi
705 add $16, %edi
706 jmp L(continue_48_48)
707
708 .p2align 4
709 L(continue_16_16):
710 movdqu (%edi), %xmm1
711 movdqu (%esi), %xmm2
712 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
713 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
714 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
715 pmovmskb %xmm1, %edx
716 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
717 jnz L(less4_double_words)
718
719 movdqu 16(%edi), %xmm3
720 movdqu 16(%esi), %xmm4
721 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
722 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
723 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
724 pmovmskb %xmm3, %edx
725 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
726 jnz L(less4_double_words_16)
727
728 add $32, %esi
729 add $32, %edi
730 jmp L(continue_48_48)
731
732 .p2align 4
733 L(continue_0_0):
734 movdqu (%edi), %xmm1
735 movdqu (%esi), %xmm2
736 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
737 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
738 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
739 pmovmskb %xmm1, %edx
740 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
741 jnz L(less4_double_words)
742
743 movdqu 16(%edi), %xmm3
744 movdqu 16(%esi), %xmm4
745 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
746 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
747 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
748 pmovmskb %xmm3, %edx
749 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
750 jnz L(less4_double_words_16)
751
752 movdqu 32(%edi), %xmm1
753 movdqu 32(%esi), %xmm2
754 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
755 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
756 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
757 pmovmskb %xmm1, %edx
758 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
759 jnz L(less4_double_words_32)
760
761 add $48, %esi
762 add $48, %edi
763 jmp L(continue_48_48)
764
765 .p2align 4
766 L(continue_0_16):
767 movdqu (%edi), %xmm1
768 movdqu (%esi), %xmm2
769 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
770 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
771 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
772 pmovmskb %xmm1, %edx
773 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
774 jnz L(less4_double_words)
775
776 movdqu 16(%edi), %xmm1
777 movdqu 16(%esi), %xmm2
778 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
779 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
780 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
781 pmovmskb %xmm1, %edx
782 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
783 jnz L(less4_double_words_16)
784
785 add $32, %esi
786 add $32, %edi
787 jmp L(continue_32_48)
788
789 .p2align 4
790 L(continue_0_32):
791 movdqu (%edi), %xmm1
792 movdqu (%esi), %xmm2
793 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
794 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
795 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
796 pmovmskb %xmm1, %edx
797 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
798 jnz L(less4_double_words)
799
800 add $16, %esi
801 add $16, %edi
802 jmp L(continue_16_48)
803
804 .p2align 4
805 L(continue_16_32):
806 movdqu (%edi), %xmm1
807 movdqu (%esi), %xmm2
808 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
809 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
810 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
811 pmovmskb %xmm1, %edx
812 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
813 jnz L(less4_double_words)
814
815 add $16, %esi
816 add $16, %edi
817 jmp L(continue_32_48)
818
819 .p2align 4
820 L(less4_double_words1):
821 cmp (%esi), %eax
822 jne L(nequal)
823 test %eax, %eax
824 jz L(equal)
825
826 mov 4(%esi), %ecx
827 cmp %ecx, 4(%edi)
828 jne L(nequal)
829 test %ecx, %ecx
830 jz L(equal)
831
832 mov 8(%esi), %ecx
833 cmp %ecx, 8(%edi)
834 jne L(nequal)
835 test %ecx, %ecx
836 jz L(equal)
837
838 mov 12(%esi), %ecx
839 cmp %ecx, 12(%edi)
840 jne L(nequal)
841 xor %eax, %eax
842 RETURN
843
844 .p2align 4
845 L(less4_double_words):
846 xor %eax, %eax
847 test %dl, %dl
848 jz L(next_two_double_words)
849 and $15, %dl
850 jz L(second_double_word)
851 mov (%esi), %ecx
852 cmp %ecx, (%edi)
853 jne L(nequal)
854 RETURN
855
856 .p2align 4
857 L(second_double_word):
858 mov 4(%esi), %ecx
859 cmp %ecx, 4(%edi)
860 jne L(nequal)
861 RETURN
862
863 .p2align 4
864 L(next_two_double_words):
865 and $15, %dh
866 jz L(fourth_double_word)
867 mov 8(%esi), %ecx
868 cmp %ecx, 8(%edi)
869 jne L(nequal)
870 RETURN
871
872 .p2align 4
873 L(fourth_double_word):
874 mov 12(%esi), %ecx
875 cmp %ecx, 12(%edi)
876 jne L(nequal)
877 RETURN
878
879 .p2align 4
880 L(less4_double_words_16):
881 xor %eax, %eax
882 test %dl, %dl
883 jz L(next_two_double_words_16)
884 and $15, %dl
885 jz L(second_double_word_16)
886 mov 16(%esi), %ecx
887 cmp %ecx, 16(%edi)
888 jne L(nequal)
889 RETURN
890
891 .p2align 4
892 L(second_double_word_16):
893 mov 20(%esi), %ecx
894 cmp %ecx, 20(%edi)
895 jne L(nequal)
896 RETURN
897
898 .p2align 4
899 L(next_two_double_words_16):
900 and $15, %dh
901 jz L(fourth_double_word_16)
902 mov 24(%esi), %ecx
903 cmp %ecx, 24(%edi)
904 jne L(nequal)
905 RETURN
906
907 .p2align 4
908 L(fourth_double_word_16):
909 mov 28(%esi), %ecx
910 cmp %ecx, 28(%edi)
911 jne L(nequal)
912 RETURN
913
914 .p2align 4
915 L(less4_double_words_32):
916 xor %eax, %eax
917 test %dl, %dl
918 jz L(next_two_double_words_32)
919 and $15, %dl
920 jz L(second_double_word_32)
921 mov 32(%esi), %ecx
922 cmp %ecx, 32(%edi)
923 jne L(nequal)
924 RETURN
925
926 .p2align 4
927 L(second_double_word_32):
928 mov 36(%esi), %ecx
929 cmp %ecx, 36(%edi)
930 jne L(nequal)
931 RETURN
932
933 .p2align 4
934 L(next_two_double_words_32):
935 and $15, %dh
936 jz L(fourth_double_word_32)
937 mov 40(%esi), %ecx
938 cmp %ecx, 40(%edi)
939 jne L(nequal)
940 RETURN
941
942 .p2align 4
943 L(fourth_double_word_32):
944 mov 44(%esi), %ecx
945 cmp %ecx, 44(%edi)
946 jne L(nequal)
947 RETURN
948
949 .p2align 4
950 L(less4_double_words_48):
951 xor %eax, %eax
952 test %dl, %dl
953 jz L(next_two_double_words_48)
954 and $15, %dl
955 jz L(second_double_word_48)
956 mov 48(%esi), %ecx
957 cmp %ecx, 48(%edi)
958 jne L(nequal)
959 RETURN
960
961 .p2align 4
962 L(second_double_word_48):
963 mov 52(%esi), %ecx
964 cmp %ecx, 52(%edi)
965 jne L(nequal)
966 RETURN
967
968 .p2align 4
969 L(next_two_double_words_48):
970 and $15, %dh
971 jz L(fourth_double_word_48)
972 mov 56(%esi), %ecx
973 cmp %ecx, 56(%edi)
974 jne L(nequal)
975 RETURN
976
977 .p2align 4
978 L(fourth_double_word_48):
979 mov 60(%esi), %ecx
980 cmp %ecx, 60(%edi)
981 jne L(nequal)
982 RETURN
983
984 .p2align 4
985 L(nequal):
986 mov $1, %eax
987 jg L(return)
988 neg %eax
989 RETURN
990
991 .p2align 4
992 L(return):
993 RETURN
994
995 .p2align 4
996 L(equal):
997 xorl %eax, %eax
998 RETURN
999
1000 CFI_POP (%edi)
1001 CFI_POP (%esi)
1002
1003 .p2align 4
1004 L(neq):
1005 mov $1, %eax
1006 jg L(neq_bigger)
1007 neg %eax
1008
1009 L(neq_bigger):
1010 ret
1011
1012 .p2align 4
1013 L(eq):
1014 xorl %eax, %eax
1015 ret
1016
1017 END (__wcscmp_sse2)
1018 #endif