]>
Commit | Line | Data |
---|---|---|
1b48c537 | 1 | /* wcscmp with SSE2 |
2b778ceb | 2 | Copyright (C) 2011-2021 Free Software Foundation, Inc. |
1b48c537 UD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
1b48c537 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
1b48c537 UD |
21 | |
22 | # include <sysdep.h> | |
1b48c537 UD |
23 | |
24 | # define CFI_PUSH(REG) \ | |
25 | cfi_adjust_cfa_offset (4); \ | |
26 | cfi_rel_offset (REG, 0) | |
27 | ||
28 | # define CFI_POP(REG) \ | |
29 | cfi_adjust_cfa_offset (-4); \ | |
30 | cfi_restore (REG) | |
31 | ||
32 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
33 | # define POP(REG) popl REG; CFI_POP (REG) | |
34 | ||
1b48c537 UD |
35 | # define ENTRANCE PUSH(%esi); PUSH(%edi) |
36 | # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); | |
37 | # define PARMS 4 | |
38 | # define STR1 PARMS | |
39 | # define STR2 STR1+4 | |
40 | ||
95584d3b LD |
41 | /* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */ |
42 | ||
1b48c537 | 43 | .text |
95584d3b | 44 | ENTRY (__wcscmp_sse2) |
1b48c537 UD |
45 | /* |
46 | * This implementation uses SSE to compare up to 16 bytes at a time. | |
47 | */ | |
48 | mov STR1(%esp), %edx | |
49 | mov STR2(%esp), %eax | |
50 | ||
51 | mov (%eax), %ecx | |
52 | cmp %ecx, (%edx) | |
53 | jne L(neq) | |
54 | test %ecx, %ecx | |
55 | jz L(eq) | |
56 | ||
57 | mov 4(%eax), %ecx | |
58 | cmp %ecx, 4(%edx) | |
59 | jne L(neq) | |
60 | test %ecx, %ecx | |
61 | jz L(eq) | |
62 | ||
63 | mov 8(%eax), %ecx | |
64 | cmp %ecx, 8(%edx) | |
65 | jne L(neq) | |
66 | test %ecx, %ecx | |
67 | jz L(eq) | |
68 | ||
69 | mov 12(%eax), %ecx | |
70 | cmp %ecx, 12(%edx) | |
71 | jne L(neq) | |
72 | test %ecx, %ecx | |
73 | jz L(eq) | |
74 | ||
75 | ENTRANCE | |
76 | add $16, %eax | |
77 | add $16, %edx | |
78 | ||
79 | mov %eax, %esi | |
80 | mov %edx, %edi | |
81 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ | |
82 | mov %al, %ch | |
83 | mov %dl, %cl | |
84 | and $63, %eax /* esi alignment in cache line */ | |
85 | and $63, %edx /* edi alignment in cache line */ | |
86 | and $15, %cl | |
87 | jz L(continue_00) | |
88 | cmp $16, %edx | |
89 | jb L(continue_0) | |
90 | cmp $32, %edx | |
91 | jb L(continue_16) | |
92 | cmp $48, %edx | |
93 | jb L(continue_32) | |
94 | ||
95 | L(continue_48): | |
96 | and $15, %ch | |
97 | jz L(continue_48_00) | |
98 | cmp $16, %eax | |
99 | jb L(continue_0_48) | |
100 | cmp $32, %eax | |
101 | jb L(continue_16_48) | |
102 | cmp $48, %eax | |
103 | jb L(continue_32_48) | |
104 | ||
105 | .p2align 4 | |
106 | L(continue_48_48): | |
107 | mov (%esi), %ecx | |
108 | cmp %ecx, (%edi) | |
109 | jne L(nequal) | |
110 | test %ecx, %ecx | |
111 | jz L(equal) | |
112 | ||
113 | mov 4(%esi), %ecx | |
114 | cmp %ecx, 4(%edi) | |
115 | jne L(nequal) | |
116 | test %ecx, %ecx | |
117 | jz L(equal) | |
118 | ||
119 | mov 8(%esi), %ecx | |
120 | cmp %ecx, 8(%edi) | |
121 | jne L(nequal) | |
122 | test %ecx, %ecx | |
123 | jz L(equal) | |
124 | ||
125 | mov 12(%esi), %ecx | |
126 | cmp %ecx, 12(%edi) | |
127 | jne L(nequal) | |
128 | test %ecx, %ecx | |
129 | jz L(equal) | |
f17424ed | 130 | |
1b48c537 UD |
131 | movdqu 16(%edi), %xmm1 |
132 | movdqu 16(%esi), %xmm2 | |
133 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
134 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
135 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
136 | pmovmskb %xmm1, %edx | |
137 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
138 | jnz L(less4_double_words_16) | |
139 | ||
140 | movdqu 32(%edi), %xmm1 | |
141 | movdqu 32(%esi), %xmm2 | |
142 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
143 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
144 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
145 | pmovmskb %xmm1, %edx | |
146 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
147 | jnz L(less4_double_words_32) | |
148 | ||
149 | movdqu 48(%edi), %xmm1 | |
150 | movdqu 48(%esi), %xmm2 | |
151 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
152 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
153 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
154 | pmovmskb %xmm1, %edx | |
155 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
156 | jnz L(less4_double_words_48) | |
157 | ||
158 | add $64, %esi | |
159 | add $64, %edi | |
160 | jmp L(continue_48_48) | |
161 | ||
162 | L(continue_0): | |
163 | and $15, %ch | |
164 | jz L(continue_0_00) | |
165 | cmp $16, %eax | |
166 | jb L(continue_0_0) | |
167 | cmp $32, %eax | |
168 | jb L(continue_0_16) | |
169 | cmp $48, %eax | |
170 | jb L(continue_0_32) | |
171 | ||
172 | .p2align 4 | |
173 | L(continue_0_48): | |
174 | mov (%esi), %ecx | |
175 | cmp %ecx, (%edi) | |
176 | jne L(nequal) | |
177 | test %ecx, %ecx | |
178 | jz L(equal) | |
179 | ||
180 | mov 4(%esi), %ecx | |
181 | cmp %ecx, 4(%edi) | |
182 | jne L(nequal) | |
183 | test %ecx, %ecx | |
184 | jz L(equal) | |
185 | ||
186 | mov 8(%esi), %ecx | |
187 | cmp %ecx, 8(%edi) | |
188 | jne L(nequal) | |
189 | test %ecx, %ecx | |
190 | jz L(equal) | |
191 | ||
192 | mov 12(%esi), %ecx | |
193 | cmp %ecx, 12(%edi) | |
194 | jne L(nequal) | |
195 | test %ecx, %ecx | |
196 | jz L(equal) | |
197 | ||
198 | movdqu 16(%edi), %xmm1 | |
199 | movdqu 16(%esi), %xmm2 | |
200 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
201 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
202 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
203 | pmovmskb %xmm1, %edx | |
204 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
205 | jnz L(less4_double_words_16) | |
206 | ||
207 | movdqu 32(%edi), %xmm1 | |
208 | movdqu 32(%esi), %xmm2 | |
209 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
210 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
211 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
212 | pmovmskb %xmm1, %edx | |
213 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
214 | jnz L(less4_double_words_32) | |
215 | ||
216 | mov 48(%esi), %ecx | |
217 | cmp %ecx, 48(%edi) | |
218 | jne L(nequal) | |
219 | test %ecx, %ecx | |
220 | jz L(equal) | |
221 | ||
222 | mov 52(%esi), %ecx | |
223 | cmp %ecx, 52(%edi) | |
224 | jne L(nequal) | |
225 | test %ecx, %ecx | |
226 | jz L(equal) | |
227 | ||
228 | mov 56(%esi), %ecx | |
229 | cmp %ecx, 56(%edi) | |
230 | jne L(nequal) | |
231 | test %ecx, %ecx | |
232 | jz L(equal) | |
233 | ||
234 | mov 60(%esi), %ecx | |
235 | cmp %ecx, 60(%edi) | |
236 | jne L(nequal) | |
237 | test %ecx, %ecx | |
238 | jz L(equal) | |
239 | ||
240 | add $64, %esi | |
241 | add $64, %edi | |
242 | jmp L(continue_0_48) | |
243 | ||
244 | .p2align 4 | |
245 | L(continue_00): | |
246 | and $15, %ch | |
247 | jz L(continue_00_00) | |
248 | cmp $16, %eax | |
249 | jb L(continue_00_0) | |
250 | cmp $32, %eax | |
251 | jb L(continue_00_16) | |
252 | cmp $48, %eax | |
253 | jb L(continue_00_32) | |
254 | ||
255 | .p2align 4 | |
256 | L(continue_00_48): | |
257 | pcmpeqd (%edi), %xmm0 | |
258 | mov (%edi), %eax | |
259 | pmovmskb %xmm0, %ecx | |
260 | test %ecx, %ecx | |
261 | jnz L(less4_double_words1) | |
262 | ||
95584d3b LD |
263 | cmp (%esi), %eax |
264 | jne L(nequal) | |
f17424ed | 265 | |
1b48c537 | 266 | mov 4(%edi), %eax |
95584d3b LD |
267 | cmp 4(%esi), %eax |
268 | jne L(nequal) | |
1b48c537 UD |
269 | |
270 | mov 8(%edi), %eax | |
95584d3b LD |
271 | cmp 8(%esi), %eax |
272 | jne L(nequal) | |
1b48c537 UD |
273 | |
274 | mov 12(%edi), %eax | |
95584d3b LD |
275 | cmp 12(%esi), %eax |
276 | jne L(nequal) | |
f17424ed | 277 | |
1b48c537 UD |
278 | movdqu 16(%esi), %xmm2 |
279 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
280 | pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
281 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
282 | pmovmskb %xmm2, %edx | |
283 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
284 | jnz L(less4_double_words_16) | |
285 | ||
286 | movdqu 32(%esi), %xmm2 | |
287 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
288 | pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
289 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
290 | pmovmskb %xmm2, %edx | |
291 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
292 | jnz L(less4_double_words_32) | |
293 | ||
294 | movdqu 48(%esi), %xmm2 | |
295 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
296 | pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
297 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
298 | pmovmskb %xmm2, %edx | |
299 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
300 | jnz L(less4_double_words_48) | |
301 | ||
302 | add $64, %esi | |
303 | add $64, %edi | |
304 | jmp L(continue_00_48) | |
305 | ||
306 | .p2align 4 | |
307 | L(continue_32): | |
308 | and $15, %ch | |
309 | jz L(continue_32_00) | |
310 | cmp $16, %eax | |
311 | jb L(continue_0_32) | |
312 | cmp $32, %eax | |
313 | jb L(continue_16_32) | |
314 | cmp $48, %eax | |
315 | jb L(continue_32_32) | |
316 | ||
317 | .p2align 4 | |
318 | L(continue_32_48): | |
319 | mov (%esi), %ecx | |
320 | cmp %ecx, (%edi) | |
321 | jne L(nequal) | |
322 | test %ecx, %ecx | |
323 | jz L(equal) | |
324 | ||
325 | mov 4(%esi), %ecx | |
326 | cmp %ecx, 4(%edi) | |
327 | jne L(nequal) | |
328 | test %ecx, %ecx | |
329 | jz L(equal) | |
330 | ||
331 | mov 8(%esi), %ecx | |
332 | cmp %ecx, 8(%edi) | |
333 | jne L(nequal) | |
334 | test %ecx, %ecx | |
335 | jz L(equal) | |
336 | ||
337 | mov 12(%esi), %ecx | |
338 | cmp %ecx, 12(%edi) | |
339 | jne L(nequal) | |
340 | test %ecx, %ecx | |
341 | jz L(equal) | |
342 | ||
343 | mov 16(%esi), %ecx | |
344 | cmp %ecx, 16(%edi) | |
345 | jne L(nequal) | |
346 | test %ecx, %ecx | |
347 | jz L(equal) | |
348 | ||
349 | mov 20(%esi), %ecx | |
350 | cmp %ecx, 20(%edi) | |
351 | jne L(nequal) | |
352 | test %ecx, %ecx | |
353 | jz L(equal) | |
354 | ||
355 | mov 24(%esi), %ecx | |
356 | cmp %ecx, 24(%edi) | |
357 | jne L(nequal) | |
358 | test %ecx, %ecx | |
359 | jz L(equal) | |
360 | ||
361 | mov 28(%esi), %ecx | |
362 | cmp %ecx, 28(%edi) | |
363 | jne L(nequal) | |
364 | test %ecx, %ecx | |
365 | jz L(equal) | |
366 | ||
367 | movdqu 32(%edi), %xmm1 | |
368 | movdqu 32(%esi), %xmm2 | |
369 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
370 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
371 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
372 | pmovmskb %xmm1, %edx | |
373 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
374 | jnz L(less4_double_words_32) | |
375 | ||
376 | movdqu 48(%edi), %xmm1 | |
377 | movdqu 48(%esi), %xmm2 | |
378 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
379 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
95584d3b | 380 | psubb %xmm0, %xmm1 /* packed sub of comparison results */ |
1b48c537 UD |
381 | pmovmskb %xmm1, %edx |
382 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
383 | jnz L(less4_double_words_48) | |
384 | ||
385 | add $64, %esi | |
386 | add $64, %edi | |
387 | jmp L(continue_32_48) | |
388 | ||
389 | .p2align 4 | |
390 | L(continue_16): | |
391 | and $15, %ch | |
392 | jz L(continue_16_00) | |
393 | cmp $16, %eax | |
394 | jb L(continue_0_16) | |
395 | cmp $32, %eax | |
396 | jb L(continue_16_16) | |
397 | cmp $48, %eax | |
398 | jb L(continue_16_32) | |
399 | ||
400 | .p2align 4 | |
401 | L(continue_16_48): | |
402 | mov (%esi), %ecx | |
403 | cmp %ecx, (%edi) | |
404 | jne L(nequal) | |
405 | test %ecx, %ecx | |
406 | jz L(equal) | |
407 | ||
408 | mov 4(%esi), %ecx | |
409 | cmp %ecx, 4(%edi) | |
410 | jne L(nequal) | |
411 | test %ecx, %ecx | |
412 | jz L(equal) | |
413 | ||
414 | mov 8(%esi), %ecx | |
415 | cmp %ecx, 8(%edi) | |
416 | jne L(nequal) | |
417 | test %ecx, %ecx | |
418 | jz L(equal) | |
419 | ||
420 | mov 12(%esi), %ecx | |
421 | cmp %ecx, 12(%edi) | |
422 | jne L(nequal) | |
423 | test %ecx, %ecx | |
424 | jz L(equal) | |
425 | ||
426 | movdqu 16(%edi), %xmm1 | |
427 | movdqu 16(%esi), %xmm2 | |
428 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
429 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
430 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
431 | pmovmskb %xmm1, %edx | |
432 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
433 | jnz L(less4_double_words_16) | |
434 | ||
435 | mov 32(%esi), %ecx | |
436 | cmp %ecx, 32(%edi) | |
437 | jne L(nequal) | |
438 | test %ecx, %ecx | |
439 | jz L(equal) | |
440 | ||
441 | mov 36(%esi), %ecx | |
442 | cmp %ecx, 36(%edi) | |
443 | jne L(nequal) | |
444 | test %ecx, %ecx | |
445 | jz L(equal) | |
446 | ||
447 | mov 40(%esi), %ecx | |
448 | cmp %ecx, 40(%edi) | |
449 | jne L(nequal) | |
450 | test %ecx, %ecx | |
451 | jz L(equal) | |
452 | ||
453 | mov 44(%esi), %ecx | |
454 | cmp %ecx, 44(%edi) | |
455 | jne L(nequal) | |
456 | test %ecx, %ecx | |
457 | jz L(equal) | |
458 | ||
459 | movdqu 48(%edi), %xmm1 | |
460 | movdqu 48(%esi), %xmm2 | |
461 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
462 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
463 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
464 | pmovmskb %xmm1, %edx | |
465 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
466 | jnz L(less4_double_words_48) | |
467 | ||
468 | add $64, %esi | |
469 | add $64, %edi | |
470 | jmp L(continue_16_48) | |
471 | ||
472 | .p2align 4 | |
473 | L(continue_00_00): | |
474 | movdqa (%edi), %xmm1 | |
475 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
476 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
477 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
478 | pmovmskb %xmm1, %edx | |
479 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
480 | jnz L(less4_double_words) | |
481 | ||
482 | movdqa 16(%edi), %xmm3 | |
483 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
484 | pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ | |
485 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
486 | pmovmskb %xmm3, %edx | |
487 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
488 | jnz L(less4_double_words_16) | |
489 | ||
490 | movdqa 32(%edi), %xmm5 | |
491 | pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ | |
492 | pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ | |
493 | psubb %xmm0, %xmm5 /* packed sub of comparison results*/ | |
494 | pmovmskb %xmm5, %edx | |
495 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
496 | jnz L(less4_double_words_32) | |
497 | ||
498 | movdqa 48(%edi), %xmm1 | |
499 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
500 | pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
501 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
502 | pmovmskb %xmm1, %edx | |
503 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
504 | jnz L(less4_double_words_48) | |
505 | ||
506 | add $64, %esi | |
507 | add $64, %edi | |
508 | jmp L(continue_00_00) | |
509 | ||
510 | .p2align 4 | |
511 | L(continue_00_32): | |
512 | movdqu (%esi), %xmm2 | |
513 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
514 | pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ | |
515 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
516 | pmovmskb %xmm2, %edx | |
517 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
518 | jnz L(less4_double_words) | |
519 | ||
520 | add $16, %esi | |
521 | add $16, %edi | |
522 | jmp L(continue_00_48) | |
523 | ||
524 | .p2align 4 | |
525 | L(continue_00_16): | |
526 | movdqu (%esi), %xmm2 | |
527 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
528 | pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ | |
529 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
530 | pmovmskb %xmm2, %edx | |
531 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
532 | jnz L(less4_double_words) | |
533 | ||
534 | movdqu 16(%esi), %xmm2 | |
535 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
536 | pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
537 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
538 | pmovmskb %xmm2, %edx | |
539 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
540 | jnz L(less4_double_words_16) | |
541 | ||
542 | add $32, %esi | |
543 | add $32, %edi | |
544 | jmp L(continue_00_48) | |
545 | ||
546 | .p2align 4 | |
547 | L(continue_00_0): | |
548 | movdqu (%esi), %xmm2 | |
549 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
550 | pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ | |
551 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
552 | pmovmskb %xmm2, %edx | |
553 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
554 | jnz L(less4_double_words) | |
555 | ||
556 | movdqu 16(%esi), %xmm2 | |
557 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
558 | pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
559 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
560 | pmovmskb %xmm2, %edx | |
561 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
562 | jnz L(less4_double_words_16) | |
563 | ||
564 | movdqu 32(%esi), %xmm2 | |
565 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
566 | pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
567 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
568 | pmovmskb %xmm2, %edx | |
569 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
570 | jnz L(less4_double_words_32) | |
571 | ||
572 | add $48, %esi | |
573 | add $48, %edi | |
574 | jmp L(continue_00_48) | |
575 | ||
576 | .p2align 4 | |
577 | L(continue_48_00): | |
578 | pcmpeqd (%esi), %xmm0 | |
579 | mov (%edi), %eax | |
580 | pmovmskb %xmm0, %ecx | |
581 | test %ecx, %ecx | |
582 | jnz L(less4_double_words1) | |
583 | ||
95584d3b LD |
584 | cmp (%esi), %eax |
585 | jne L(nequal) | |
f17424ed | 586 | |
1b48c537 | 587 | mov 4(%edi), %eax |
95584d3b LD |
588 | cmp 4(%esi), %eax |
589 | jne L(nequal) | |
1b48c537 UD |
590 | |
591 | mov 8(%edi), %eax | |
95584d3b LD |
592 | cmp 8(%esi), %eax |
593 | jne L(nequal) | |
1b48c537 UD |
594 | |
595 | mov 12(%edi), %eax | |
95584d3b LD |
596 | cmp 12(%esi), %eax |
597 | jne L(nequal) | |
f17424ed | 598 | |
1b48c537 UD |
599 | movdqu 16(%edi), %xmm1 |
600 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
601 | pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
602 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
603 | pmovmskb %xmm1, %edx | |
604 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
605 | jnz L(less4_double_words_16) | |
606 | ||
607 | movdqu 32(%edi), %xmm1 | |
608 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
609 | pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
610 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
611 | pmovmskb %xmm1, %edx | |
612 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
613 | jnz L(less4_double_words_32) | |
614 | ||
615 | movdqu 48(%edi), %xmm1 | |
616 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
617 | pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
618 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
619 | pmovmskb %xmm1, %edx | |
620 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
621 | jnz L(less4_double_words_48) | |
622 | ||
623 | add $64, %esi | |
624 | add $64, %edi | |
625 | jmp L(continue_48_00) | |
626 | ||
627 | .p2align 4 | |
628 | L(continue_32_00): | |
629 | movdqu (%edi), %xmm1 | |
630 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
631 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
632 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
633 | pmovmskb %xmm1, %edx | |
634 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
635 | jnz L(less4_double_words) | |
636 | ||
637 | add $16, %esi | |
638 | add $16, %edi | |
639 | jmp L(continue_48_00) | |
640 | ||
641 | .p2align 4 | |
642 | L(continue_16_00): | |
643 | movdqu (%edi), %xmm1 | |
644 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
645 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
646 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
647 | pmovmskb %xmm1, %edx | |
648 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
649 | jnz L(less4_double_words) | |
650 | ||
651 | movdqu 16(%edi), %xmm1 | |
652 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
653 | pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
654 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
655 | pmovmskb %xmm1, %edx | |
656 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
657 | jnz L(less4_double_words_16) | |
658 | ||
659 | add $32, %esi | |
660 | add $32, %edi | |
661 | jmp L(continue_48_00) | |
662 | ||
663 | .p2align 4 | |
664 | L(continue_0_00): | |
665 | movdqu (%edi), %xmm1 | |
666 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
667 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
668 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
669 | pmovmskb %xmm1, %edx | |
670 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
671 | jnz L(less4_double_words) | |
672 | ||
673 | movdqu 16(%edi), %xmm1 | |
674 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
675 | pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
676 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
677 | pmovmskb %xmm1, %edx | |
678 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
679 | jnz L(less4_double_words_16) | |
680 | ||
681 | movdqu 32(%edi), %xmm1 | |
682 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
683 | pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
684 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
685 | pmovmskb %xmm1, %edx | |
686 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
687 | jnz L(less4_double_words_32) | |
688 | ||
689 | add $48, %esi | |
690 | add $48, %edi | |
691 | jmp L(continue_48_00) | |
692 | ||
693 | .p2align 4 | |
694 | L(continue_32_32): | |
695 | movdqu (%edi), %xmm1 | |
696 | movdqu (%esi), %xmm2 | |
697 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
698 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
699 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
700 | pmovmskb %xmm1, %edx | |
701 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
702 | jnz L(less4_double_words) | |
703 | ||
704 | add $16, %esi | |
705 | add $16, %edi | |
706 | jmp L(continue_48_48) | |
707 | ||
708 | .p2align 4 | |
709 | L(continue_16_16): | |
710 | movdqu (%edi), %xmm1 | |
711 | movdqu (%esi), %xmm2 | |
712 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
713 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
714 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
715 | pmovmskb %xmm1, %edx | |
716 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
717 | jnz L(less4_double_words) | |
718 | ||
719 | movdqu 16(%edi), %xmm3 | |
720 | movdqu 16(%esi), %xmm4 | |
721 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
722 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ | |
723 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
724 | pmovmskb %xmm3, %edx | |
725 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
726 | jnz L(less4_double_words_16) | |
727 | ||
728 | add $32, %esi | |
729 | add $32, %edi | |
730 | jmp L(continue_48_48) | |
731 | ||
732 | .p2align 4 | |
733 | L(continue_0_0): | |
734 | movdqu (%edi), %xmm1 | |
735 | movdqu (%esi), %xmm2 | |
736 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
737 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
738 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
739 | pmovmskb %xmm1, %edx | |
740 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
741 | jnz L(less4_double_words) | |
742 | ||
743 | movdqu 16(%edi), %xmm3 | |
744 | movdqu 16(%esi), %xmm4 | |
745 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
746 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ | |
747 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
748 | pmovmskb %xmm3, %edx | |
749 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
750 | jnz L(less4_double_words_16) | |
751 | ||
752 | movdqu 32(%edi), %xmm1 | |
753 | movdqu 32(%esi), %xmm2 | |
754 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
755 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
756 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
757 | pmovmskb %xmm1, %edx | |
758 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
759 | jnz L(less4_double_words_32) | |
760 | ||
761 | add $48, %esi | |
762 | add $48, %edi | |
763 | jmp L(continue_48_48) | |
764 | ||
765 | .p2align 4 | |
766 | L(continue_0_16): | |
767 | movdqu (%edi), %xmm1 | |
768 | movdqu (%esi), %xmm2 | |
769 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
770 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
771 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
772 | pmovmskb %xmm1, %edx | |
773 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
774 | jnz L(less4_double_words) | |
775 | ||
776 | movdqu 16(%edi), %xmm1 | |
777 | movdqu 16(%esi), %xmm2 | |
778 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
779 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
780 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
781 | pmovmskb %xmm1, %edx | |
782 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
783 | jnz L(less4_double_words_16) | |
784 | ||
785 | add $32, %esi | |
786 | add $32, %edi | |
787 | jmp L(continue_32_48) | |
788 | ||
789 | .p2align 4 | |
790 | L(continue_0_32): | |
791 | movdqu (%edi), %xmm1 | |
792 | movdqu (%esi), %xmm2 | |
793 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
794 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
795 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
796 | pmovmskb %xmm1, %edx | |
797 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
798 | jnz L(less4_double_words) | |
799 | ||
800 | add $16, %esi | |
801 | add $16, %edi | |
802 | jmp L(continue_16_48) | |
803 | ||
804 | .p2align 4 | |
805 | L(continue_16_32): | |
806 | movdqu (%edi), %xmm1 | |
807 | movdqu (%esi), %xmm2 | |
808 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
809 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
810 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
811 | pmovmskb %xmm1, %edx | |
812 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
813 | jnz L(less4_double_words) | |
814 | ||
815 | add $16, %esi | |
816 | add $16, %edi | |
817 | jmp L(continue_32_48) | |
818 | ||
819 | .p2align 4 | |
820 | L(less4_double_words1): | |
821 | cmp (%esi), %eax | |
822 | jne L(nequal) | |
823 | test %eax, %eax | |
824 | jz L(equal) | |
825 | ||
826 | mov 4(%esi), %ecx | |
827 | cmp %ecx, 4(%edi) | |
828 | jne L(nequal) | |
829 | test %ecx, %ecx | |
830 | jz L(equal) | |
831 | ||
832 | mov 8(%esi), %ecx | |
833 | cmp %ecx, 8(%edi) | |
834 | jne L(nequal) | |
835 | test %ecx, %ecx | |
836 | jz L(equal) | |
837 | ||
95584d3b LD |
838 | mov 12(%esi), %ecx |
839 | cmp %ecx, 12(%edi) | |
840 | jne L(nequal) | |
841 | xor %eax, %eax | |
1b48c537 UD |
842 | RETURN |
843 | ||
844 | .p2align 4 | |
845 | L(less4_double_words): | |
95584d3b | 846 | xor %eax, %eax |
1b48c537 UD |
847 | test %dl, %dl |
848 | jz L(next_two_double_words) | |
849 | and $15, %dl | |
850 | jz L(second_double_word) | |
95584d3b LD |
851 | mov (%esi), %ecx |
852 | cmp %ecx, (%edi) | |
853 | jne L(nequal) | |
1b48c537 UD |
854 | RETURN |
855 | ||
856 | .p2align 4 | |
857 | L(second_double_word): | |
95584d3b LD |
858 | mov 4(%esi), %ecx |
859 | cmp %ecx, 4(%edi) | |
860 | jne L(nequal) | |
1b48c537 UD |
861 | RETURN |
862 | ||
863 | .p2align 4 | |
864 | L(next_two_double_words): | |
865 | and $15, %dh | |
866 | jz L(fourth_double_word) | |
95584d3b LD |
867 | mov 8(%esi), %ecx |
868 | cmp %ecx, 8(%edi) | |
869 | jne L(nequal) | |
1b48c537 UD |
870 | RETURN |
871 | ||
872 | .p2align 4 | |
873 | L(fourth_double_word): | |
95584d3b LD |
874 | mov 12(%esi), %ecx |
875 | cmp %ecx, 12(%edi) | |
876 | jne L(nequal) | |
1b48c537 UD |
877 | RETURN |
878 | ||
879 | .p2align 4 | |
880 | L(less4_double_words_16): | |
95584d3b | 881 | xor %eax, %eax |
1b48c537 UD |
882 | test %dl, %dl |
883 | jz L(next_two_double_words_16) | |
884 | and $15, %dl | |
885 | jz L(second_double_word_16) | |
95584d3b LD |
886 | mov 16(%esi), %ecx |
887 | cmp %ecx, 16(%edi) | |
888 | jne L(nequal) | |
1b48c537 UD |
889 | RETURN |
890 | ||
891 | .p2align 4 | |
892 | L(second_double_word_16): | |
95584d3b LD |
893 | mov 20(%esi), %ecx |
894 | cmp %ecx, 20(%edi) | |
895 | jne L(nequal) | |
1b48c537 UD |
896 | RETURN |
897 | ||
898 | .p2align 4 | |
899 | L(next_two_double_words_16): | |
900 | and $15, %dh | |
901 | jz L(fourth_double_word_16) | |
95584d3b LD |
902 | mov 24(%esi), %ecx |
903 | cmp %ecx, 24(%edi) | |
904 | jne L(nequal) | |
1b48c537 UD |
905 | RETURN |
906 | ||
907 | .p2align 4 | |
908 | L(fourth_double_word_16): | |
95584d3b LD |
909 | mov 28(%esi), %ecx |
910 | cmp %ecx, 28(%edi) | |
911 | jne L(nequal) | |
1b48c537 UD |
912 | RETURN |
913 | ||
914 | .p2align 4 | |
915 | L(less4_double_words_32): | |
95584d3b | 916 | xor %eax, %eax |
1b48c537 UD |
917 | test %dl, %dl |
918 | jz L(next_two_double_words_32) | |
919 | and $15, %dl | |
920 | jz L(second_double_word_32) | |
95584d3b LD |
921 | mov 32(%esi), %ecx |
922 | cmp %ecx, 32(%edi) | |
923 | jne L(nequal) | |
1b48c537 UD |
924 | RETURN |
925 | ||
926 | .p2align 4 | |
927 | L(second_double_word_32): | |
95584d3b LD |
928 | mov 36(%esi), %ecx |
929 | cmp %ecx, 36(%edi) | |
930 | jne L(nequal) | |
1b48c537 UD |
931 | RETURN |
932 | ||
933 | .p2align 4 | |
934 | L(next_two_double_words_32): | |
935 | and $15, %dh | |
936 | jz L(fourth_double_word_32) | |
95584d3b LD |
937 | mov 40(%esi), %ecx |
938 | cmp %ecx, 40(%edi) | |
939 | jne L(nequal) | |
1b48c537 UD |
940 | RETURN |
941 | ||
942 | .p2align 4 | |
943 | L(fourth_double_word_32): | |
95584d3b LD |
944 | mov 44(%esi), %ecx |
945 | cmp %ecx, 44(%edi) | |
946 | jne L(nequal) | |
1b48c537 UD |
947 | RETURN |
948 | ||
949 | .p2align 4 | |
950 | L(less4_double_words_48): | |
95584d3b | 951 | xor %eax, %eax |
1b48c537 UD |
952 | test %dl, %dl |
953 | jz L(next_two_double_words_48) | |
954 | and $15, %dl | |
955 | jz L(second_double_word_48) | |
95584d3b LD |
956 | mov 48(%esi), %ecx |
957 | cmp %ecx, 48(%edi) | |
958 | jne L(nequal) | |
1b48c537 UD |
959 | RETURN |
960 | ||
961 | .p2align 4 | |
962 | L(second_double_word_48): | |
95584d3b LD |
963 | mov 52(%esi), %ecx |
964 | cmp %ecx, 52(%edi) | |
965 | jne L(nequal) | |
1b48c537 UD |
966 | RETURN |
967 | ||
968 | .p2align 4 | |
969 | L(next_two_double_words_48): | |
970 | and $15, %dh | |
971 | jz L(fourth_double_word_48) | |
95584d3b LD |
972 | mov 56(%esi), %ecx |
973 | cmp %ecx, 56(%edi) | |
974 | jne L(nequal) | |
1b48c537 UD |
975 | RETURN |
976 | ||
977 | .p2align 4 | |
978 | L(fourth_double_word_48): | |
95584d3b LD |
979 | mov 60(%esi), %ecx |
980 | cmp %ecx, 60(%edi) | |
981 | jne L(nequal) | |
1b48c537 UD |
982 | RETURN |
983 | ||
984 | .p2align 4 | |
985 | L(nequal): | |
986 | mov $1, %eax | |
95584d3b | 987 | jg L(return) |
1b48c537 | 988 | neg %eax |
95584d3b | 989 | RETURN |
1b48c537 | 990 | |
95584d3b LD |
991 | .p2align 4 |
992 | L(return): | |
1b48c537 UD |
993 | RETURN |
994 | ||
995 | .p2align 4 | |
996 | L(equal): | |
997 | xorl %eax, %eax | |
998 | RETURN | |
999 | ||
1000 | CFI_POP (%edi) | |
1001 | CFI_POP (%esi) | |
1002 | ||
1003 | .p2align 4 | |
1004 | L(neq): | |
1005 | mov $1, %eax | |
95584d3b | 1006 | jg L(neq_bigger) |
1b48c537 UD |
1007 | neg %eax |
1008 | ||
1009 | L(neq_bigger): | |
1010 | ret | |
1011 | ||
1012 | .p2align 4 | |
1013 | L(eq): | |
1014 | xorl %eax, %eax | |
1015 | ret | |
1016 | ||
95584d3b | 1017 | END (__wcscmp_sse2) |
1b48c537 | 1018 | #endif |