]>
Commit | Line | Data |
---|---|---|
1b48c537 | 1 | /* wcscmp with SSE2 |
6d7e8eda | 2 | Copyright (C) 2011-2023 Free Software Foundation, Inc. |
1b48c537 UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
1b48c537 | 18 | |
4f41c682 | 19 | #if IS_IN (libc) |
1b48c537 UD |
20 | |
21 | # include <sysdep.h> | |
1b48c537 UD |
22 | |
23 | # define CFI_PUSH(REG) \ | |
24 | cfi_adjust_cfa_offset (4); \ | |
25 | cfi_rel_offset (REG, 0) | |
26 | ||
27 | # define CFI_POP(REG) \ | |
28 | cfi_adjust_cfa_offset (-4); \ | |
29 | cfi_restore (REG) | |
30 | ||
31 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
32 | # define POP(REG) popl REG; CFI_POP (REG) | |
33 | ||
1b48c537 UD |
34 | # define ENTRANCE PUSH(%esi); PUSH(%edi) |
35 | # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); | |
36 | # define PARMS 4 | |
37 | # define STR1 PARMS | |
38 | # define STR2 STR1+4 | |
39 | ||
95584d3b LD |
40 | /* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */ |
41 | ||
1b48c537 | 42 | .text |
95584d3b | 43 | ENTRY (__wcscmp_sse2) |
1b48c537 UD |
44 | /* |
45 | * This implementation uses SSE to compare up to 16 bytes at a time. | |
46 | */ | |
47 | mov STR1(%esp), %edx | |
48 | mov STR2(%esp), %eax | |
49 | ||
50 | mov (%eax), %ecx | |
51 | cmp %ecx, (%edx) | |
52 | jne L(neq) | |
53 | test %ecx, %ecx | |
54 | jz L(eq) | |
55 | ||
56 | mov 4(%eax), %ecx | |
57 | cmp %ecx, 4(%edx) | |
58 | jne L(neq) | |
59 | test %ecx, %ecx | |
60 | jz L(eq) | |
61 | ||
62 | mov 8(%eax), %ecx | |
63 | cmp %ecx, 8(%edx) | |
64 | jne L(neq) | |
65 | test %ecx, %ecx | |
66 | jz L(eq) | |
67 | ||
68 | mov 12(%eax), %ecx | |
69 | cmp %ecx, 12(%edx) | |
70 | jne L(neq) | |
71 | test %ecx, %ecx | |
72 | jz L(eq) | |
73 | ||
74 | ENTRANCE | |
75 | add $16, %eax | |
76 | add $16, %edx | |
77 | ||
78 | mov %eax, %esi | |
79 | mov %edx, %edi | |
80 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ | |
81 | mov %al, %ch | |
82 | mov %dl, %cl | |
83 | and $63, %eax /* esi alignment in cache line */ | |
84 | and $63, %edx /* edi alignment in cache line */ | |
85 | and $15, %cl | |
86 | jz L(continue_00) | |
87 | cmp $16, %edx | |
88 | jb L(continue_0) | |
89 | cmp $32, %edx | |
90 | jb L(continue_16) | |
91 | cmp $48, %edx | |
92 | jb L(continue_32) | |
93 | ||
94 | L(continue_48): | |
95 | and $15, %ch | |
96 | jz L(continue_48_00) | |
97 | cmp $16, %eax | |
98 | jb L(continue_0_48) | |
99 | cmp $32, %eax | |
100 | jb L(continue_16_48) | |
101 | cmp $48, %eax | |
102 | jb L(continue_32_48) | |
103 | ||
104 | .p2align 4 | |
105 | L(continue_48_48): | |
106 | mov (%esi), %ecx | |
107 | cmp %ecx, (%edi) | |
108 | jne L(nequal) | |
109 | test %ecx, %ecx | |
110 | jz L(equal) | |
111 | ||
112 | mov 4(%esi), %ecx | |
113 | cmp %ecx, 4(%edi) | |
114 | jne L(nequal) | |
115 | test %ecx, %ecx | |
116 | jz L(equal) | |
117 | ||
118 | mov 8(%esi), %ecx | |
119 | cmp %ecx, 8(%edi) | |
120 | jne L(nequal) | |
121 | test %ecx, %ecx | |
122 | jz L(equal) | |
123 | ||
124 | mov 12(%esi), %ecx | |
125 | cmp %ecx, 12(%edi) | |
126 | jne L(nequal) | |
127 | test %ecx, %ecx | |
128 | jz L(equal) | |
f17424ed | 129 | |
1b48c537 UD |
130 | movdqu 16(%edi), %xmm1 |
131 | movdqu 16(%esi), %xmm2 | |
132 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
133 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
134 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
135 | pmovmskb %xmm1, %edx | |
136 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
137 | jnz L(less4_double_words_16) | |
138 | ||
139 | movdqu 32(%edi), %xmm1 | |
140 | movdqu 32(%esi), %xmm2 | |
141 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
142 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
143 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
144 | pmovmskb %xmm1, %edx | |
145 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
146 | jnz L(less4_double_words_32) | |
147 | ||
148 | movdqu 48(%edi), %xmm1 | |
149 | movdqu 48(%esi), %xmm2 | |
150 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
151 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
152 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
153 | pmovmskb %xmm1, %edx | |
154 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
155 | jnz L(less4_double_words_48) | |
156 | ||
157 | add $64, %esi | |
158 | add $64, %edi | |
159 | jmp L(continue_48_48) | |
160 | ||
161 | L(continue_0): | |
162 | and $15, %ch | |
163 | jz L(continue_0_00) | |
164 | cmp $16, %eax | |
165 | jb L(continue_0_0) | |
166 | cmp $32, %eax | |
167 | jb L(continue_0_16) | |
168 | cmp $48, %eax | |
169 | jb L(continue_0_32) | |
170 | ||
171 | .p2align 4 | |
172 | L(continue_0_48): | |
173 | mov (%esi), %ecx | |
174 | cmp %ecx, (%edi) | |
175 | jne L(nequal) | |
176 | test %ecx, %ecx | |
177 | jz L(equal) | |
178 | ||
179 | mov 4(%esi), %ecx | |
180 | cmp %ecx, 4(%edi) | |
181 | jne L(nequal) | |
182 | test %ecx, %ecx | |
183 | jz L(equal) | |
184 | ||
185 | mov 8(%esi), %ecx | |
186 | cmp %ecx, 8(%edi) | |
187 | jne L(nequal) | |
188 | test %ecx, %ecx | |
189 | jz L(equal) | |
190 | ||
191 | mov 12(%esi), %ecx | |
192 | cmp %ecx, 12(%edi) | |
193 | jne L(nequal) | |
194 | test %ecx, %ecx | |
195 | jz L(equal) | |
196 | ||
197 | movdqu 16(%edi), %xmm1 | |
198 | movdqu 16(%esi), %xmm2 | |
199 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
200 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
201 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
202 | pmovmskb %xmm1, %edx | |
203 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
204 | jnz L(less4_double_words_16) | |
205 | ||
206 | movdqu 32(%edi), %xmm1 | |
207 | movdqu 32(%esi), %xmm2 | |
208 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
209 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
210 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
211 | pmovmskb %xmm1, %edx | |
212 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
213 | jnz L(less4_double_words_32) | |
214 | ||
215 | mov 48(%esi), %ecx | |
216 | cmp %ecx, 48(%edi) | |
217 | jne L(nequal) | |
218 | test %ecx, %ecx | |
219 | jz L(equal) | |
220 | ||
221 | mov 52(%esi), %ecx | |
222 | cmp %ecx, 52(%edi) | |
223 | jne L(nequal) | |
224 | test %ecx, %ecx | |
225 | jz L(equal) | |
226 | ||
227 | mov 56(%esi), %ecx | |
228 | cmp %ecx, 56(%edi) | |
229 | jne L(nequal) | |
230 | test %ecx, %ecx | |
231 | jz L(equal) | |
232 | ||
233 | mov 60(%esi), %ecx | |
234 | cmp %ecx, 60(%edi) | |
235 | jne L(nequal) | |
236 | test %ecx, %ecx | |
237 | jz L(equal) | |
238 | ||
239 | add $64, %esi | |
240 | add $64, %edi | |
241 | jmp L(continue_0_48) | |
242 | ||
243 | .p2align 4 | |
244 | L(continue_00): | |
245 | and $15, %ch | |
246 | jz L(continue_00_00) | |
247 | cmp $16, %eax | |
248 | jb L(continue_00_0) | |
249 | cmp $32, %eax | |
250 | jb L(continue_00_16) | |
251 | cmp $48, %eax | |
252 | jb L(continue_00_32) | |
253 | ||
254 | .p2align 4 | |
255 | L(continue_00_48): | |
256 | pcmpeqd (%edi), %xmm0 | |
257 | mov (%edi), %eax | |
258 | pmovmskb %xmm0, %ecx | |
259 | test %ecx, %ecx | |
260 | jnz L(less4_double_words1) | |
261 | ||
95584d3b LD |
262 | cmp (%esi), %eax |
263 | jne L(nequal) | |
f17424ed | 264 | |
1b48c537 | 265 | mov 4(%edi), %eax |
95584d3b LD |
266 | cmp 4(%esi), %eax |
267 | jne L(nequal) | |
1b48c537 UD |
268 | |
269 | mov 8(%edi), %eax | |
95584d3b LD |
270 | cmp 8(%esi), %eax |
271 | jne L(nequal) | |
1b48c537 UD |
272 | |
273 | mov 12(%edi), %eax | |
95584d3b LD |
274 | cmp 12(%esi), %eax |
275 | jne L(nequal) | |
f17424ed | 276 | |
1b48c537 UD |
277 | movdqu 16(%esi), %xmm2 |
278 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
279 | pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
280 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
281 | pmovmskb %xmm2, %edx | |
282 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
283 | jnz L(less4_double_words_16) | |
284 | ||
285 | movdqu 32(%esi), %xmm2 | |
286 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
287 | pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
288 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
289 | pmovmskb %xmm2, %edx | |
290 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
291 | jnz L(less4_double_words_32) | |
292 | ||
293 | movdqu 48(%esi), %xmm2 | |
294 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
295 | pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
296 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
297 | pmovmskb %xmm2, %edx | |
298 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
299 | jnz L(less4_double_words_48) | |
300 | ||
301 | add $64, %esi | |
302 | add $64, %edi | |
303 | jmp L(continue_00_48) | |
304 | ||
305 | .p2align 4 | |
306 | L(continue_32): | |
307 | and $15, %ch | |
308 | jz L(continue_32_00) | |
309 | cmp $16, %eax | |
310 | jb L(continue_0_32) | |
311 | cmp $32, %eax | |
312 | jb L(continue_16_32) | |
313 | cmp $48, %eax | |
314 | jb L(continue_32_32) | |
315 | ||
316 | .p2align 4 | |
317 | L(continue_32_48): | |
318 | mov (%esi), %ecx | |
319 | cmp %ecx, (%edi) | |
320 | jne L(nequal) | |
321 | test %ecx, %ecx | |
322 | jz L(equal) | |
323 | ||
324 | mov 4(%esi), %ecx | |
325 | cmp %ecx, 4(%edi) | |
326 | jne L(nequal) | |
327 | test %ecx, %ecx | |
328 | jz L(equal) | |
329 | ||
330 | mov 8(%esi), %ecx | |
331 | cmp %ecx, 8(%edi) | |
332 | jne L(nequal) | |
333 | test %ecx, %ecx | |
334 | jz L(equal) | |
335 | ||
336 | mov 12(%esi), %ecx | |
337 | cmp %ecx, 12(%edi) | |
338 | jne L(nequal) | |
339 | test %ecx, %ecx | |
340 | jz L(equal) | |
341 | ||
342 | mov 16(%esi), %ecx | |
343 | cmp %ecx, 16(%edi) | |
344 | jne L(nequal) | |
345 | test %ecx, %ecx | |
346 | jz L(equal) | |
347 | ||
348 | mov 20(%esi), %ecx | |
349 | cmp %ecx, 20(%edi) | |
350 | jne L(nequal) | |
351 | test %ecx, %ecx | |
352 | jz L(equal) | |
353 | ||
354 | mov 24(%esi), %ecx | |
355 | cmp %ecx, 24(%edi) | |
356 | jne L(nequal) | |
357 | test %ecx, %ecx | |
358 | jz L(equal) | |
359 | ||
360 | mov 28(%esi), %ecx | |
361 | cmp %ecx, 28(%edi) | |
362 | jne L(nequal) | |
363 | test %ecx, %ecx | |
364 | jz L(equal) | |
365 | ||
366 | movdqu 32(%edi), %xmm1 | |
367 | movdqu 32(%esi), %xmm2 | |
368 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
369 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
370 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
371 | pmovmskb %xmm1, %edx | |
372 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
373 | jnz L(less4_double_words_32) | |
374 | ||
375 | movdqu 48(%edi), %xmm1 | |
376 | movdqu 48(%esi), %xmm2 | |
377 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
378 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
95584d3b | 379 | psubb %xmm0, %xmm1 /* packed sub of comparison results */ |
1b48c537 UD |
380 | pmovmskb %xmm1, %edx |
381 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
382 | jnz L(less4_double_words_48) | |
383 | ||
384 | add $64, %esi | |
385 | add $64, %edi | |
386 | jmp L(continue_32_48) | |
387 | ||
388 | .p2align 4 | |
389 | L(continue_16): | |
390 | and $15, %ch | |
391 | jz L(continue_16_00) | |
392 | cmp $16, %eax | |
393 | jb L(continue_0_16) | |
394 | cmp $32, %eax | |
395 | jb L(continue_16_16) | |
396 | cmp $48, %eax | |
397 | jb L(continue_16_32) | |
398 | ||
399 | .p2align 4 | |
400 | L(continue_16_48): | |
401 | mov (%esi), %ecx | |
402 | cmp %ecx, (%edi) | |
403 | jne L(nequal) | |
404 | test %ecx, %ecx | |
405 | jz L(equal) | |
406 | ||
407 | mov 4(%esi), %ecx | |
408 | cmp %ecx, 4(%edi) | |
409 | jne L(nequal) | |
410 | test %ecx, %ecx | |
411 | jz L(equal) | |
412 | ||
413 | mov 8(%esi), %ecx | |
414 | cmp %ecx, 8(%edi) | |
415 | jne L(nequal) | |
416 | test %ecx, %ecx | |
417 | jz L(equal) | |
418 | ||
419 | mov 12(%esi), %ecx | |
420 | cmp %ecx, 12(%edi) | |
421 | jne L(nequal) | |
422 | test %ecx, %ecx | |
423 | jz L(equal) | |
424 | ||
425 | movdqu 16(%edi), %xmm1 | |
426 | movdqu 16(%esi), %xmm2 | |
427 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
428 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
429 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
430 | pmovmskb %xmm1, %edx | |
431 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
432 | jnz L(less4_double_words_16) | |
433 | ||
434 | mov 32(%esi), %ecx | |
435 | cmp %ecx, 32(%edi) | |
436 | jne L(nequal) | |
437 | test %ecx, %ecx | |
438 | jz L(equal) | |
439 | ||
440 | mov 36(%esi), %ecx | |
441 | cmp %ecx, 36(%edi) | |
442 | jne L(nequal) | |
443 | test %ecx, %ecx | |
444 | jz L(equal) | |
445 | ||
446 | mov 40(%esi), %ecx | |
447 | cmp %ecx, 40(%edi) | |
448 | jne L(nequal) | |
449 | test %ecx, %ecx | |
450 | jz L(equal) | |
451 | ||
452 | mov 44(%esi), %ecx | |
453 | cmp %ecx, 44(%edi) | |
454 | jne L(nequal) | |
455 | test %ecx, %ecx | |
456 | jz L(equal) | |
457 | ||
458 | movdqu 48(%edi), %xmm1 | |
459 | movdqu 48(%esi), %xmm2 | |
460 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
461 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
462 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
463 | pmovmskb %xmm1, %edx | |
464 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
465 | jnz L(less4_double_words_48) | |
466 | ||
467 | add $64, %esi | |
468 | add $64, %edi | |
469 | jmp L(continue_16_48) | |
470 | ||
471 | .p2align 4 | |
472 | L(continue_00_00): | |
473 | movdqa (%edi), %xmm1 | |
474 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
475 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
476 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
477 | pmovmskb %xmm1, %edx | |
478 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
479 | jnz L(less4_double_words) | |
480 | ||
481 | movdqa 16(%edi), %xmm3 | |
482 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
483 | pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ | |
484 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
485 | pmovmskb %xmm3, %edx | |
486 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
487 | jnz L(less4_double_words_16) | |
488 | ||
489 | movdqa 32(%edi), %xmm5 | |
490 | pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ | |
491 | pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ | |
492 | psubb %xmm0, %xmm5 /* packed sub of comparison results*/ | |
493 | pmovmskb %xmm5, %edx | |
494 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
495 | jnz L(less4_double_words_32) | |
496 | ||
497 | movdqa 48(%edi), %xmm1 | |
498 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
499 | pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
500 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
501 | pmovmskb %xmm1, %edx | |
502 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
503 | jnz L(less4_double_words_48) | |
504 | ||
505 | add $64, %esi | |
506 | add $64, %edi | |
507 | jmp L(continue_00_00) | |
508 | ||
509 | .p2align 4 | |
510 | L(continue_00_32): | |
511 | movdqu (%esi), %xmm2 | |
512 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
513 | pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ | |
514 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
515 | pmovmskb %xmm2, %edx | |
516 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
517 | jnz L(less4_double_words) | |
518 | ||
519 | add $16, %esi | |
520 | add $16, %edi | |
521 | jmp L(continue_00_48) | |
522 | ||
523 | .p2align 4 | |
524 | L(continue_00_16): | |
525 | movdqu (%esi), %xmm2 | |
526 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
527 | pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ | |
528 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
529 | pmovmskb %xmm2, %edx | |
530 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
531 | jnz L(less4_double_words) | |
532 | ||
533 | movdqu 16(%esi), %xmm2 | |
534 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
535 | pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
536 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
537 | pmovmskb %xmm2, %edx | |
538 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
539 | jnz L(less4_double_words_16) | |
540 | ||
541 | add $32, %esi | |
542 | add $32, %edi | |
543 | jmp L(continue_00_48) | |
544 | ||
545 | .p2align 4 | |
546 | L(continue_00_0): | |
547 | movdqu (%esi), %xmm2 | |
548 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
549 | pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ | |
550 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
551 | pmovmskb %xmm2, %edx | |
552 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
553 | jnz L(less4_double_words) | |
554 | ||
555 | movdqu 16(%esi), %xmm2 | |
556 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
557 | pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
558 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
559 | pmovmskb %xmm2, %edx | |
560 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
561 | jnz L(less4_double_words_16) | |
562 | ||
563 | movdqu 32(%esi), %xmm2 | |
564 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
565 | pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ | |
566 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
567 | pmovmskb %xmm2, %edx | |
568 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
569 | jnz L(less4_double_words_32) | |
570 | ||
571 | add $48, %esi | |
572 | add $48, %edi | |
573 | jmp L(continue_00_48) | |
574 | ||
575 | .p2align 4 | |
576 | L(continue_48_00): | |
577 | pcmpeqd (%esi), %xmm0 | |
578 | mov (%edi), %eax | |
579 | pmovmskb %xmm0, %ecx | |
580 | test %ecx, %ecx | |
581 | jnz L(less4_double_words1) | |
582 | ||
95584d3b LD |
583 | cmp (%esi), %eax |
584 | jne L(nequal) | |
f17424ed | 585 | |
1b48c537 | 586 | mov 4(%edi), %eax |
95584d3b LD |
587 | cmp 4(%esi), %eax |
588 | jne L(nequal) | |
1b48c537 UD |
589 | |
590 | mov 8(%edi), %eax | |
95584d3b LD |
591 | cmp 8(%esi), %eax |
592 | jne L(nequal) | |
1b48c537 UD |
593 | |
594 | mov 12(%edi), %eax | |
95584d3b LD |
595 | cmp 12(%esi), %eax |
596 | jne L(nequal) | |
f17424ed | 597 | |
1b48c537 UD |
598 | movdqu 16(%edi), %xmm1 |
599 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
600 | pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
601 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
602 | pmovmskb %xmm1, %edx | |
603 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
604 | jnz L(less4_double_words_16) | |
605 | ||
606 | movdqu 32(%edi), %xmm1 | |
607 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
608 | pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
609 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
610 | pmovmskb %xmm1, %edx | |
611 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
612 | jnz L(less4_double_words_32) | |
613 | ||
614 | movdqu 48(%edi), %xmm1 | |
615 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
616 | pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
617 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
618 | pmovmskb %xmm1, %edx | |
619 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
620 | jnz L(less4_double_words_48) | |
621 | ||
622 | add $64, %esi | |
623 | add $64, %edi | |
624 | jmp L(continue_48_00) | |
625 | ||
626 | .p2align 4 | |
627 | L(continue_32_00): | |
628 | movdqu (%edi), %xmm1 | |
629 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
630 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
631 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
632 | pmovmskb %xmm1, %edx | |
633 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
634 | jnz L(less4_double_words) | |
635 | ||
636 | add $16, %esi | |
637 | add $16, %edi | |
638 | jmp L(continue_48_00) | |
639 | ||
640 | .p2align 4 | |
641 | L(continue_16_00): | |
642 | movdqu (%edi), %xmm1 | |
643 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
644 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
645 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
646 | pmovmskb %xmm1, %edx | |
647 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
648 | jnz L(less4_double_words) | |
649 | ||
650 | movdqu 16(%edi), %xmm1 | |
651 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
652 | pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
653 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
654 | pmovmskb %xmm1, %edx | |
655 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
656 | jnz L(less4_double_words_16) | |
657 | ||
658 | add $32, %esi | |
659 | add $32, %edi | |
660 | jmp L(continue_48_00) | |
661 | ||
662 | .p2align 4 | |
663 | L(continue_0_00): | |
664 | movdqu (%edi), %xmm1 | |
665 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
666 | pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ | |
667 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
668 | pmovmskb %xmm1, %edx | |
669 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
670 | jnz L(less4_double_words) | |
671 | ||
672 | movdqu 16(%edi), %xmm1 | |
673 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
674 | pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
675 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
676 | pmovmskb %xmm1, %edx | |
677 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
678 | jnz L(less4_double_words_16) | |
679 | ||
680 | movdqu 32(%edi), %xmm1 | |
681 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
682 | pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ | |
683 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
684 | pmovmskb %xmm1, %edx | |
685 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
686 | jnz L(less4_double_words_32) | |
687 | ||
688 | add $48, %esi | |
689 | add $48, %edi | |
690 | jmp L(continue_48_00) | |
691 | ||
692 | .p2align 4 | |
693 | L(continue_32_32): | |
694 | movdqu (%edi), %xmm1 | |
695 | movdqu (%esi), %xmm2 | |
696 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
697 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
698 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
699 | pmovmskb %xmm1, %edx | |
700 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
701 | jnz L(less4_double_words) | |
702 | ||
703 | add $16, %esi | |
704 | add $16, %edi | |
705 | jmp L(continue_48_48) | |
706 | ||
707 | .p2align 4 | |
708 | L(continue_16_16): | |
709 | movdqu (%edi), %xmm1 | |
710 | movdqu (%esi), %xmm2 | |
711 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
712 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
713 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
714 | pmovmskb %xmm1, %edx | |
715 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
716 | jnz L(less4_double_words) | |
717 | ||
718 | movdqu 16(%edi), %xmm3 | |
719 | movdqu 16(%esi), %xmm4 | |
720 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
721 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ | |
722 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
723 | pmovmskb %xmm3, %edx | |
724 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
725 | jnz L(less4_double_words_16) | |
726 | ||
727 | add $32, %esi | |
728 | add $32, %edi | |
729 | jmp L(continue_48_48) | |
730 | ||
731 | .p2align 4 | |
732 | L(continue_0_0): | |
733 | movdqu (%edi), %xmm1 | |
734 | movdqu (%esi), %xmm2 | |
735 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
736 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
737 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
738 | pmovmskb %xmm1, %edx | |
739 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
740 | jnz L(less4_double_words) | |
741 | ||
742 | movdqu 16(%edi), %xmm3 | |
743 | movdqu 16(%esi), %xmm4 | |
744 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
745 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ | |
746 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
747 | pmovmskb %xmm3, %edx | |
748 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
749 | jnz L(less4_double_words_16) | |
750 | ||
751 | movdqu 32(%edi), %xmm1 | |
752 | movdqu 32(%esi), %xmm2 | |
753 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
754 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
755 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
756 | pmovmskb %xmm1, %edx | |
757 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
758 | jnz L(less4_double_words_32) | |
759 | ||
760 | add $48, %esi | |
761 | add $48, %edi | |
762 | jmp L(continue_48_48) | |
763 | ||
764 | .p2align 4 | |
765 | L(continue_0_16): | |
766 | movdqu (%edi), %xmm1 | |
767 | movdqu (%esi), %xmm2 | |
768 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
769 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
770 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
771 | pmovmskb %xmm1, %edx | |
772 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
773 | jnz L(less4_double_words) | |
774 | ||
775 | movdqu 16(%edi), %xmm1 | |
776 | movdqu 16(%esi), %xmm2 | |
777 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
778 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
779 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
780 | pmovmskb %xmm1, %edx | |
781 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
782 | jnz L(less4_double_words_16) | |
783 | ||
784 | add $32, %esi | |
785 | add $32, %edi | |
786 | jmp L(continue_32_48) | |
787 | ||
788 | .p2align 4 | |
789 | L(continue_0_32): | |
790 | movdqu (%edi), %xmm1 | |
791 | movdqu (%esi), %xmm2 | |
792 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
793 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
794 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
795 | pmovmskb %xmm1, %edx | |
796 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
797 | jnz L(less4_double_words) | |
798 | ||
799 | add $16, %esi | |
800 | add $16, %edi | |
801 | jmp L(continue_16_48) | |
802 | ||
803 | .p2align 4 | |
804 | L(continue_16_32): | |
805 | movdqu (%edi), %xmm1 | |
806 | movdqu (%esi), %xmm2 | |
807 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
808 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
809 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
810 | pmovmskb %xmm1, %edx | |
811 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
812 | jnz L(less4_double_words) | |
813 | ||
814 | add $16, %esi | |
815 | add $16, %edi | |
816 | jmp L(continue_32_48) | |
817 | ||
818 | .p2align 4 | |
819 | L(less4_double_words1): | |
820 | cmp (%esi), %eax | |
821 | jne L(nequal) | |
822 | test %eax, %eax | |
823 | jz L(equal) | |
824 | ||
825 | mov 4(%esi), %ecx | |
826 | cmp %ecx, 4(%edi) | |
827 | jne L(nequal) | |
828 | test %ecx, %ecx | |
829 | jz L(equal) | |
830 | ||
831 | mov 8(%esi), %ecx | |
832 | cmp %ecx, 8(%edi) | |
833 | jne L(nequal) | |
834 | test %ecx, %ecx | |
835 | jz L(equal) | |
836 | ||
95584d3b LD |
837 | mov 12(%esi), %ecx |
838 | cmp %ecx, 12(%edi) | |
839 | jne L(nequal) | |
840 | xor %eax, %eax | |
1b48c537 UD |
841 | RETURN |
842 | ||
843 | .p2align 4 | |
844 | L(less4_double_words): | |
95584d3b | 845 | xor %eax, %eax |
1b48c537 UD |
846 | test %dl, %dl |
847 | jz L(next_two_double_words) | |
848 | and $15, %dl | |
849 | jz L(second_double_word) | |
95584d3b LD |
850 | mov (%esi), %ecx |
851 | cmp %ecx, (%edi) | |
852 | jne L(nequal) | |
1b48c537 UD |
853 | RETURN |
854 | ||
855 | .p2align 4 | |
856 | L(second_double_word): | |
95584d3b LD |
857 | mov 4(%esi), %ecx |
858 | cmp %ecx, 4(%edi) | |
859 | jne L(nequal) | |
1b48c537 UD |
860 | RETURN |
861 | ||
862 | .p2align 4 | |
863 | L(next_two_double_words): | |
864 | and $15, %dh | |
865 | jz L(fourth_double_word) | |
95584d3b LD |
866 | mov 8(%esi), %ecx |
867 | cmp %ecx, 8(%edi) | |
868 | jne L(nequal) | |
1b48c537 UD |
869 | RETURN |
870 | ||
871 | .p2align 4 | |
872 | L(fourth_double_word): | |
95584d3b LD |
873 | mov 12(%esi), %ecx |
874 | cmp %ecx, 12(%edi) | |
875 | jne L(nequal) | |
1b48c537 UD |
876 | RETURN |
877 | ||
878 | .p2align 4 | |
879 | L(less4_double_words_16): | |
95584d3b | 880 | xor %eax, %eax |
1b48c537 UD |
881 | test %dl, %dl |
882 | jz L(next_two_double_words_16) | |
883 | and $15, %dl | |
884 | jz L(second_double_word_16) | |
95584d3b LD |
885 | mov 16(%esi), %ecx |
886 | cmp %ecx, 16(%edi) | |
887 | jne L(nequal) | |
1b48c537 UD |
888 | RETURN |
889 | ||
890 | .p2align 4 | |
891 | L(second_double_word_16): | |
95584d3b LD |
892 | mov 20(%esi), %ecx |
893 | cmp %ecx, 20(%edi) | |
894 | jne L(nequal) | |
1b48c537 UD |
895 | RETURN |
896 | ||
897 | .p2align 4 | |
898 | L(next_two_double_words_16): | |
899 | and $15, %dh | |
900 | jz L(fourth_double_word_16) | |
95584d3b LD |
901 | mov 24(%esi), %ecx |
902 | cmp %ecx, 24(%edi) | |
903 | jne L(nequal) | |
1b48c537 UD |
904 | RETURN |
905 | ||
906 | .p2align 4 | |
907 | L(fourth_double_word_16): | |
95584d3b LD |
908 | mov 28(%esi), %ecx |
909 | cmp %ecx, 28(%edi) | |
910 | jne L(nequal) | |
1b48c537 UD |
911 | RETURN |
912 | ||
913 | .p2align 4 | |
914 | L(less4_double_words_32): | |
95584d3b | 915 | xor %eax, %eax |
1b48c537 UD |
916 | test %dl, %dl |
917 | jz L(next_two_double_words_32) | |
918 | and $15, %dl | |
919 | jz L(second_double_word_32) | |
95584d3b LD |
920 | mov 32(%esi), %ecx |
921 | cmp %ecx, 32(%edi) | |
922 | jne L(nequal) | |
1b48c537 UD |
923 | RETURN |
924 | ||
925 | .p2align 4 | |
926 | L(second_double_word_32): | |
95584d3b LD |
927 | mov 36(%esi), %ecx |
928 | cmp %ecx, 36(%edi) | |
929 | jne L(nequal) | |
1b48c537 UD |
930 | RETURN |
931 | ||
932 | .p2align 4 | |
933 | L(next_two_double_words_32): | |
934 | and $15, %dh | |
935 | jz L(fourth_double_word_32) | |
95584d3b LD |
936 | mov 40(%esi), %ecx |
937 | cmp %ecx, 40(%edi) | |
938 | jne L(nequal) | |
1b48c537 UD |
939 | RETURN |
940 | ||
941 | .p2align 4 | |
942 | L(fourth_double_word_32): | |
95584d3b LD |
943 | mov 44(%esi), %ecx |
944 | cmp %ecx, 44(%edi) | |
945 | jne L(nequal) | |
1b48c537 UD |
946 | RETURN |
947 | ||
948 | .p2align 4 | |
949 | L(less4_double_words_48): | |
95584d3b | 950 | xor %eax, %eax |
1b48c537 UD |
951 | test %dl, %dl |
952 | jz L(next_two_double_words_48) | |
953 | and $15, %dl | |
954 | jz L(second_double_word_48) | |
95584d3b LD |
955 | mov 48(%esi), %ecx |
956 | cmp %ecx, 48(%edi) | |
957 | jne L(nequal) | |
1b48c537 UD |
958 | RETURN |
959 | ||
960 | .p2align 4 | |
961 | L(second_double_word_48): | |
95584d3b LD |
962 | mov 52(%esi), %ecx |
963 | cmp %ecx, 52(%edi) | |
964 | jne L(nequal) | |
1b48c537 UD |
965 | RETURN |
966 | ||
967 | .p2align 4 | |
968 | L(next_two_double_words_48): | |
969 | and $15, %dh | |
970 | jz L(fourth_double_word_48) | |
95584d3b LD |
971 | mov 56(%esi), %ecx |
972 | cmp %ecx, 56(%edi) | |
973 | jne L(nequal) | |
1b48c537 UD |
974 | RETURN |
975 | ||
976 | .p2align 4 | |
977 | L(fourth_double_word_48): | |
95584d3b LD |
978 | mov 60(%esi), %ecx |
979 | cmp %ecx, 60(%edi) | |
980 | jne L(nequal) | |
1b48c537 UD |
981 | RETURN |
982 | ||
983 | .p2align 4 | |
984 | L(nequal): | |
985 | mov $1, %eax | |
95584d3b | 986 | jg L(return) |
1b48c537 | 987 | neg %eax |
95584d3b | 988 | RETURN |
1b48c537 | 989 | |
95584d3b LD |
990 | .p2align 4 |
991 | L(return): | |
1b48c537 UD |
992 | RETURN |
993 | ||
994 | .p2align 4 | |
995 | L(equal): | |
996 | xorl %eax, %eax | |
997 | RETURN | |
998 | ||
999 | CFI_POP (%edi) | |
1000 | CFI_POP (%esi) | |
1001 | ||
1002 | .p2align 4 | |
1003 | L(neq): | |
1004 | mov $1, %eax | |
95584d3b | 1005 | jg L(neq_bigger) |
1b48c537 UD |
1006 | neg %eax |
1007 | ||
1008 | L(neq_bigger): | |
1009 | ret | |
1010 | ||
1011 | .p2align 4 | |
1012 | L(eq): | |
1013 | xorl %eax, %eax | |
1014 | ret | |
1015 | ||
95584d3b | 1016 | END (__wcscmp_sse2) |
1b48c537 | 1017 | #endif |