]>
Commit | Line | Data |
---|---|---|
49d42c37 | 1 | /* Optimized wcscmp for x86-64 with SSE2. |
b168057a | 2 | Copyright (C) 2011-2015 Free Software Foundation, Inc. |
49d42c37 UD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
49d42c37 UD |
19 | |
20 | #include <sysdep.h> | |
49d42c37 | 21 | |
95584d3b LD |
22 | /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ |
23 | ||
49d42c37 UD |
24 | .text |
25 | ENTRY (wcscmp) | |
26 | /* | |
27 | * This implementation uses SSE to compare up to 16 bytes at a time. | |
28 | */ | |
29 | mov %esi, %eax | |
30 | mov %edi, %edx | |
31 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ | |
32 | mov %al, %ch | |
33 | mov %dl, %cl | |
34 | and $63, %eax /* rsi alignment in cache line */ | |
35 | and $63, %edx /* rdi alignment in cache line */ | |
36 | and $15, %cl | |
37 | jz L(continue_00) | |
38 | cmp $16, %edx | |
39 | jb L(continue_0) | |
40 | cmp $32, %edx | |
41 | jb L(continue_16) | |
42 | cmp $48, %edx | |
43 | jb L(continue_32) | |
44 | ||
45 | L(continue_48): | |
46 | and $15, %ch | |
47 | jz L(continue_48_00) | |
48 | cmp $16, %eax | |
49 | jb L(continue_0_48) | |
50 | cmp $32, %eax | |
51 | jb L(continue_16_48) | |
52 | cmp $48, %eax | |
53 | jb L(continue_32_48) | |
54 | ||
55 | .p2align 4 | |
56 | L(continue_48_48): | |
57 | mov (%rsi), %ecx | |
58 | cmp %ecx, (%rdi) | |
59 | jne L(nequal) | |
60 | test %ecx, %ecx | |
61 | jz L(equal) | |
62 | ||
63 | mov 4(%rsi), %ecx | |
64 | cmp %ecx, 4(%rdi) | |
65 | jne L(nequal) | |
66 | test %ecx, %ecx | |
67 | jz L(equal) | |
68 | ||
69 | mov 8(%rsi), %ecx | |
70 | cmp %ecx, 8(%rdi) | |
71 | jne L(nequal) | |
72 | test %ecx, %ecx | |
73 | jz L(equal) | |
74 | ||
75 | mov 12(%rsi), %ecx | |
76 | cmp %ecx, 12(%rdi) | |
77 | jne L(nequal) | |
78 | test %ecx, %ecx | |
79 | jz L(equal) | |
f17424ed | 80 | |
49d42c37 UD |
81 | movdqu 16(%rdi), %xmm1 |
82 | movdqu 16(%rsi), %xmm2 | |
83 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
84 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
85 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
86 | pmovmskb %xmm1, %edx | |
87 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
88 | jnz L(less4_double_words_16) | |
89 | ||
90 | movdqu 32(%rdi), %xmm1 | |
91 | movdqu 32(%rsi), %xmm2 | |
92 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
93 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
94 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
95 | pmovmskb %xmm1, %edx | |
96 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
97 | jnz L(less4_double_words_32) | |
98 | ||
99 | movdqu 48(%rdi), %xmm1 | |
100 | movdqu 48(%rsi), %xmm2 | |
101 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
102 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
103 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
104 | pmovmskb %xmm1, %edx | |
105 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
106 | jnz L(less4_double_words_48) | |
107 | ||
108 | add $64, %rsi | |
109 | add $64, %rdi | |
110 | jmp L(continue_48_48) | |
111 | ||
112 | L(continue_0): | |
113 | and $15, %ch | |
114 | jz L(continue_0_00) | |
115 | cmp $16, %eax | |
116 | jb L(continue_0_0) | |
117 | cmp $32, %eax | |
118 | jb L(continue_0_16) | |
119 | cmp $48, %eax | |
120 | jb L(continue_0_32) | |
121 | ||
122 | .p2align 4 | |
123 | L(continue_0_48): | |
124 | mov (%rsi), %ecx | |
125 | cmp %ecx, (%rdi) | |
126 | jne L(nequal) | |
127 | test %ecx, %ecx | |
128 | jz L(equal) | |
129 | ||
130 | mov 4(%rsi), %ecx | |
131 | cmp %ecx, 4(%rdi) | |
132 | jne L(nequal) | |
133 | test %ecx, %ecx | |
134 | jz L(equal) | |
135 | ||
136 | mov 8(%rsi), %ecx | |
137 | cmp %ecx, 8(%rdi) | |
138 | jne L(nequal) | |
139 | test %ecx, %ecx | |
140 | jz L(equal) | |
141 | ||
142 | mov 12(%rsi), %ecx | |
143 | cmp %ecx, 12(%rdi) | |
144 | jne L(nequal) | |
145 | test %ecx, %ecx | |
146 | jz L(equal) | |
147 | ||
148 | movdqu 16(%rdi), %xmm1 | |
149 | movdqu 16(%rsi), %xmm2 | |
150 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
151 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
152 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
153 | pmovmskb %xmm1, %edx | |
154 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
155 | jnz L(less4_double_words_16) | |
156 | ||
157 | movdqu 32(%rdi), %xmm1 | |
158 | movdqu 32(%rsi), %xmm2 | |
159 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
160 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
161 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
162 | pmovmskb %xmm1, %edx | |
163 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
164 | jnz L(less4_double_words_32) | |
165 | ||
166 | mov 48(%rsi), %ecx | |
167 | cmp %ecx, 48(%rdi) | |
168 | jne L(nequal) | |
169 | test %ecx, %ecx | |
170 | jz L(equal) | |
171 | ||
172 | mov 52(%rsi), %ecx | |
173 | cmp %ecx, 52(%rdi) | |
174 | jne L(nequal) | |
175 | test %ecx, %ecx | |
176 | jz L(equal) | |
177 | ||
178 | mov 56(%rsi), %ecx | |
179 | cmp %ecx, 56(%rdi) | |
180 | jne L(nequal) | |
181 | test %ecx, %ecx | |
182 | jz L(equal) | |
183 | ||
184 | mov 60(%rsi), %ecx | |
185 | cmp %ecx, 60(%rdi) | |
186 | jne L(nequal) | |
187 | test %ecx, %ecx | |
188 | jz L(equal) | |
189 | ||
190 | add $64, %rsi | |
191 | add $64, %rdi | |
192 | jmp L(continue_0_48) | |
193 | ||
194 | .p2align 4 | |
195 | L(continue_00): | |
196 | and $15, %ch | |
197 | jz L(continue_00_00) | |
198 | cmp $16, %eax | |
199 | jb L(continue_00_0) | |
200 | cmp $32, %eax | |
201 | jb L(continue_00_16) | |
202 | cmp $48, %eax | |
203 | jb L(continue_00_32) | |
204 | ||
205 | .p2align 4 | |
206 | L(continue_00_48): | |
207 | pcmpeqd (%rdi), %xmm0 | |
208 | mov (%rdi), %eax | |
209 | pmovmskb %xmm0, %ecx | |
210 | test %ecx, %ecx | |
211 | jnz L(less4_double_words1) | |
212 | ||
95584d3b LD |
213 | cmp (%rsi), %eax |
214 | jne L(nequal) | |
f17424ed | 215 | |
49d42c37 | 216 | mov 4(%rdi), %eax |
95584d3b LD |
217 | cmp 4(%rsi), %eax |
218 | jne L(nequal) | |
49d42c37 UD |
219 | |
220 | mov 8(%rdi), %eax | |
95584d3b LD |
221 | cmp 8(%rsi), %eax |
222 | jne L(nequal) | |
49d42c37 UD |
223 | |
224 | mov 12(%rdi), %eax | |
95584d3b LD |
225 | cmp 12(%rsi), %eax |
226 | jne L(nequal) | |
f17424ed | 227 | |
49d42c37 UD |
228 | movdqu 16(%rsi), %xmm2 |
229 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
230 | pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
231 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
232 | pmovmskb %xmm2, %edx | |
233 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
234 | jnz L(less4_double_words_16) | |
235 | ||
236 | movdqu 32(%rsi), %xmm2 | |
237 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
238 | pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
239 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
240 | pmovmskb %xmm2, %edx | |
241 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
242 | jnz L(less4_double_words_32) | |
243 | ||
244 | movdqu 48(%rsi), %xmm2 | |
245 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
246 | pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
247 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
248 | pmovmskb %xmm2, %edx | |
249 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
250 | jnz L(less4_double_words_48) | |
251 | ||
252 | add $64, %rsi | |
253 | add $64, %rdi | |
254 | jmp L(continue_00_48) | |
255 | ||
256 | .p2align 4 | |
257 | L(continue_32): | |
258 | and $15, %ch | |
259 | jz L(continue_32_00) | |
260 | cmp $16, %eax | |
261 | jb L(continue_0_32) | |
262 | cmp $32, %eax | |
263 | jb L(continue_16_32) | |
264 | cmp $48, %eax | |
265 | jb L(continue_32_32) | |
266 | ||
267 | .p2align 4 | |
268 | L(continue_32_48): | |
269 | mov (%rsi), %ecx | |
270 | cmp %ecx, (%rdi) | |
271 | jne L(nequal) | |
272 | test %ecx, %ecx | |
273 | jz L(equal) | |
274 | ||
275 | mov 4(%rsi), %ecx | |
276 | cmp %ecx, 4(%rdi) | |
277 | jne L(nequal) | |
278 | test %ecx, %ecx | |
279 | jz L(equal) | |
280 | ||
281 | mov 8(%rsi), %ecx | |
282 | cmp %ecx, 8(%rdi) | |
283 | jne L(nequal) | |
284 | test %ecx, %ecx | |
285 | jz L(equal) | |
286 | ||
287 | mov 12(%rsi), %ecx | |
288 | cmp %ecx, 12(%rdi) | |
289 | jne L(nequal) | |
290 | test %ecx, %ecx | |
291 | jz L(equal) | |
292 | ||
293 | mov 16(%rsi), %ecx | |
294 | cmp %ecx, 16(%rdi) | |
295 | jne L(nequal) | |
296 | test %ecx, %ecx | |
297 | jz L(equal) | |
298 | ||
299 | mov 20(%rsi), %ecx | |
300 | cmp %ecx, 20(%rdi) | |
301 | jne L(nequal) | |
302 | test %ecx, %ecx | |
303 | jz L(equal) | |
304 | ||
305 | mov 24(%rsi), %ecx | |
306 | cmp %ecx, 24(%rdi) | |
307 | jne L(nequal) | |
308 | test %ecx, %ecx | |
309 | jz L(equal) | |
310 | ||
311 | mov 28(%rsi), %ecx | |
312 | cmp %ecx, 28(%rdi) | |
313 | jne L(nequal) | |
314 | test %ecx, %ecx | |
315 | jz L(equal) | |
316 | ||
317 | movdqu 32(%rdi), %xmm1 | |
318 | movdqu 32(%rsi), %xmm2 | |
319 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
320 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
321 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
322 | pmovmskb %xmm1, %edx | |
323 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
324 | jnz L(less4_double_words_32) | |
325 | ||
326 | movdqu 48(%rdi), %xmm1 | |
327 | movdqu 48(%rsi), %xmm2 | |
328 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
329 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
330 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
331 | pmovmskb %xmm1, %edx | |
332 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
333 | jnz L(less4_double_words_48) | |
334 | ||
335 | add $64, %rsi | |
336 | add $64, %rdi | |
337 | jmp L(continue_32_48) | |
338 | ||
339 | .p2align 4 | |
340 | L(continue_16): | |
341 | and $15, %ch | |
342 | jz L(continue_16_00) | |
343 | cmp $16, %eax | |
344 | jb L(continue_0_16) | |
345 | cmp $32, %eax | |
346 | jb L(continue_16_16) | |
347 | cmp $48, %eax | |
348 | jb L(continue_16_32) | |
349 | ||
350 | .p2align 4 | |
351 | L(continue_16_48): | |
352 | mov (%rsi), %ecx | |
353 | cmp %ecx, (%rdi) | |
354 | jne L(nequal) | |
355 | test %ecx, %ecx | |
356 | jz L(equal) | |
357 | ||
358 | mov 4(%rsi), %ecx | |
359 | cmp %ecx, 4(%rdi) | |
360 | jne L(nequal) | |
361 | test %ecx, %ecx | |
362 | jz L(equal) | |
363 | ||
364 | mov 8(%rsi), %ecx | |
365 | cmp %ecx, 8(%rdi) | |
366 | jne L(nequal) | |
367 | test %ecx, %ecx | |
368 | jz L(equal) | |
369 | ||
370 | mov 12(%rsi), %ecx | |
371 | cmp %ecx, 12(%rdi) | |
372 | jne L(nequal) | |
373 | test %ecx, %ecx | |
374 | jz L(equal) | |
375 | ||
376 | movdqu 16(%rdi), %xmm1 | |
377 | movdqu 16(%rsi), %xmm2 | |
378 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
379 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
380 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
381 | pmovmskb %xmm1, %edx | |
382 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
383 | jnz L(less4_double_words_16) | |
384 | ||
385 | mov 32(%rsi), %ecx | |
386 | cmp %ecx, 32(%rdi) | |
387 | jne L(nequal) | |
388 | test %ecx, %ecx | |
389 | jz L(equal) | |
390 | ||
391 | mov 36(%rsi), %ecx | |
392 | cmp %ecx, 36(%rdi) | |
393 | jne L(nequal) | |
394 | test %ecx, %ecx | |
395 | jz L(equal) | |
396 | ||
397 | mov 40(%rsi), %ecx | |
398 | cmp %ecx, 40(%rdi) | |
399 | jne L(nequal) | |
400 | test %ecx, %ecx | |
401 | jz L(equal) | |
402 | ||
403 | mov 44(%rsi), %ecx | |
404 | cmp %ecx, 44(%rdi) | |
405 | jne L(nequal) | |
406 | test %ecx, %ecx | |
407 | jz L(equal) | |
408 | ||
409 | movdqu 48(%rdi), %xmm1 | |
410 | movdqu 48(%rsi), %xmm2 | |
411 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
412 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
413 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
414 | pmovmskb %xmm1, %edx | |
415 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
416 | jnz L(less4_double_words_48) | |
417 | ||
418 | add $64, %rsi | |
419 | add $64, %rdi | |
420 | jmp L(continue_16_48) | |
421 | ||
422 | .p2align 4 | |
423 | L(continue_00_00): | |
424 | movdqa (%rdi), %xmm1 | |
425 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
426 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
427 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
428 | pmovmskb %xmm1, %edx | |
429 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
430 | jnz L(less4_double_words) | |
431 | ||
432 | movdqa 16(%rdi), %xmm3 | |
433 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
434 | pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ | |
435 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
436 | pmovmskb %xmm3, %edx | |
437 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
438 | jnz L(less4_double_words_16) | |
439 | ||
440 | movdqa 32(%rdi), %xmm5 | |
441 | pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ | |
442 | pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ | |
443 | psubb %xmm0, %xmm5 /* packed sub of comparison results*/ | |
444 | pmovmskb %xmm5, %edx | |
445 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
446 | jnz L(less4_double_words_32) | |
447 | ||
448 | movdqa 48(%rdi), %xmm1 | |
449 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
450 | pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
451 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
452 | pmovmskb %xmm1, %edx | |
453 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
454 | jnz L(less4_double_words_48) | |
455 | ||
456 | add $64, %rsi | |
457 | add $64, %rdi | |
458 | jmp L(continue_00_00) | |
459 | ||
460 | .p2align 4 | |
461 | L(continue_00_32): | |
462 | movdqu (%rsi), %xmm2 | |
463 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
464 | pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
465 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
466 | pmovmskb %xmm2, %edx | |
467 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
468 | jnz L(less4_double_words) | |
469 | ||
470 | add $16, %rsi | |
471 | add $16, %rdi | |
472 | jmp L(continue_00_48) | |
473 | ||
474 | .p2align 4 | |
475 | L(continue_00_16): | |
476 | movdqu (%rsi), %xmm2 | |
477 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
478 | pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
479 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
480 | pmovmskb %xmm2, %edx | |
481 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
482 | jnz L(less4_double_words) | |
483 | ||
484 | movdqu 16(%rsi), %xmm2 | |
485 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
486 | pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
487 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
488 | pmovmskb %xmm2, %edx | |
489 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
490 | jnz L(less4_double_words_16) | |
491 | ||
492 | add $32, %rsi | |
493 | add $32, %rdi | |
494 | jmp L(continue_00_48) | |
495 | ||
496 | .p2align 4 | |
497 | L(continue_00_0): | |
498 | movdqu (%rsi), %xmm2 | |
499 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
500 | pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
501 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
502 | pmovmskb %xmm2, %edx | |
503 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
504 | jnz L(less4_double_words) | |
505 | ||
506 | movdqu 16(%rsi), %xmm2 | |
507 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
508 | pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
509 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
510 | pmovmskb %xmm2, %edx | |
511 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
512 | jnz L(less4_double_words_16) | |
513 | ||
514 | movdqu 32(%rsi), %xmm2 | |
515 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ | |
516 | pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ | |
517 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
518 | pmovmskb %xmm2, %edx | |
519 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
520 | jnz L(less4_double_words_32) | |
521 | ||
522 | add $48, %rsi | |
523 | add $48, %rdi | |
524 | jmp L(continue_00_48) | |
525 | ||
526 | .p2align 4 | |
527 | L(continue_48_00): | |
528 | pcmpeqd (%rsi), %xmm0 | |
529 | mov (%rdi), %eax | |
530 | pmovmskb %xmm0, %ecx | |
531 | test %ecx, %ecx | |
532 | jnz L(less4_double_words1) | |
533 | ||
95584d3b LD |
534 | cmp (%rsi), %eax |
535 | jne L(nequal) | |
f17424ed | 536 | |
49d42c37 | 537 | mov 4(%rdi), %eax |
95584d3b LD |
538 | cmp 4(%rsi), %eax |
539 | jne L(nequal) | |
49d42c37 UD |
540 | |
541 | mov 8(%rdi), %eax | |
95584d3b LD |
542 | cmp 8(%rsi), %eax |
543 | jne L(nequal) | |
49d42c37 UD |
544 | |
545 | mov 12(%rdi), %eax | |
95584d3b LD |
546 | cmp 12(%rsi), %eax |
547 | jne L(nequal) | |
f17424ed | 548 | |
49d42c37 UD |
549 | movdqu 16(%rdi), %xmm1 |
550 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
551 | pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
552 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
553 | pmovmskb %xmm1, %edx | |
554 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
555 | jnz L(less4_double_words_16) | |
556 | ||
557 | movdqu 32(%rdi), %xmm1 | |
558 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
559 | pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
560 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
561 | pmovmskb %xmm1, %edx | |
562 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
563 | jnz L(less4_double_words_32) | |
564 | ||
565 | movdqu 48(%rdi), %xmm1 | |
566 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
567 | pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
568 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
569 | pmovmskb %xmm1, %edx | |
570 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
571 | jnz L(less4_double_words_48) | |
572 | ||
573 | add $64, %rsi | |
574 | add $64, %rdi | |
575 | jmp L(continue_48_00) | |
576 | ||
577 | .p2align 4 | |
578 | L(continue_32_00): | |
579 | movdqu (%rdi), %xmm1 | |
580 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
581 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
582 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
583 | pmovmskb %xmm1, %edx | |
584 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
585 | jnz L(less4_double_words) | |
586 | ||
587 | add $16, %rsi | |
588 | add $16, %rdi | |
589 | jmp L(continue_48_00) | |
590 | ||
591 | .p2align 4 | |
592 | L(continue_16_00): | |
593 | movdqu (%rdi), %xmm1 | |
594 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
595 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
596 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
597 | pmovmskb %xmm1, %edx | |
598 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
599 | jnz L(less4_double_words) | |
600 | ||
601 | movdqu 16(%rdi), %xmm1 | |
602 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
603 | pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
604 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
605 | pmovmskb %xmm1, %edx | |
606 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
607 | jnz L(less4_double_words_16) | |
608 | ||
609 | add $32, %rsi | |
610 | add $32, %rdi | |
611 | jmp L(continue_48_00) | |
612 | ||
613 | .p2align 4 | |
614 | L(continue_0_00): | |
615 | movdqu (%rdi), %xmm1 | |
616 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
617 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
618 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
619 | pmovmskb %xmm1, %edx | |
620 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
621 | jnz L(less4_double_words) | |
622 | ||
623 | movdqu 16(%rdi), %xmm1 | |
624 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
625 | pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
626 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
627 | pmovmskb %xmm1, %edx | |
628 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
629 | jnz L(less4_double_words_16) | |
630 | ||
631 | movdqu 32(%rdi), %xmm1 | |
632 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
633 | pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ | |
634 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
635 | pmovmskb %xmm1, %edx | |
636 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
637 | jnz L(less4_double_words_32) | |
638 | ||
639 | add $48, %rsi | |
640 | add $48, %rdi | |
641 | jmp L(continue_48_00) | |
642 | ||
643 | .p2align 4 | |
644 | L(continue_32_32): | |
645 | movdqu (%rdi), %xmm1 | |
646 | movdqu (%rsi), %xmm2 | |
647 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
648 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
649 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
650 | pmovmskb %xmm1, %edx | |
651 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
652 | jnz L(less4_double_words) | |
653 | ||
654 | add $16, %rsi | |
655 | add $16, %rdi | |
656 | jmp L(continue_48_48) | |
657 | ||
658 | .p2align 4 | |
659 | L(continue_16_16): | |
660 | movdqu (%rdi), %xmm1 | |
661 | movdqu (%rsi), %xmm2 | |
662 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
663 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
664 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
665 | pmovmskb %xmm1, %edx | |
666 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
667 | jnz L(less4_double_words) | |
668 | ||
669 | movdqu 16(%rdi), %xmm3 | |
670 | movdqu 16(%rsi), %xmm4 | |
671 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
672 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ | |
673 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
674 | pmovmskb %xmm3, %edx | |
675 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
676 | jnz L(less4_double_words_16) | |
677 | ||
678 | add $32, %rsi | |
679 | add $32, %rdi | |
680 | jmp L(continue_48_48) | |
681 | ||
682 | .p2align 4 | |
683 | L(continue_0_0): | |
684 | movdqu (%rdi), %xmm1 | |
685 | movdqu (%rsi), %xmm2 | |
686 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
687 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
688 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
689 | pmovmskb %xmm1, %edx | |
690 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
691 | jnz L(less4_double_words) | |
692 | ||
693 | movdqu 16(%rdi), %xmm3 | |
694 | movdqu 16(%rsi), %xmm4 | |
695 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ | |
696 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ | |
697 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ | |
698 | pmovmskb %xmm3, %edx | |
699 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
700 | jnz L(less4_double_words_16) | |
701 | ||
702 | movdqu 32(%rdi), %xmm1 | |
703 | movdqu 32(%rsi), %xmm2 | |
704 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
705 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
706 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
707 | pmovmskb %xmm1, %edx | |
708 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
709 | jnz L(less4_double_words_32) | |
710 | ||
711 | add $48, %rsi | |
712 | add $48, %rdi | |
713 | jmp L(continue_48_48) | |
714 | ||
715 | .p2align 4 | |
716 | L(continue_0_16): | |
717 | movdqu (%rdi), %xmm1 | |
718 | movdqu (%rsi), %xmm2 | |
719 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
720 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
721 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
722 | pmovmskb %xmm1, %edx | |
723 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
724 | jnz L(less4_double_words) | |
725 | ||
726 | movdqu 16(%rdi), %xmm1 | |
727 | movdqu 16(%rsi), %xmm2 | |
728 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
729 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
730 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
731 | pmovmskb %xmm1, %edx | |
732 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
733 | jnz L(less4_double_words_16) | |
734 | ||
735 | add $32, %rsi | |
736 | add $32, %rdi | |
737 | jmp L(continue_32_48) | |
738 | ||
739 | .p2align 4 | |
740 | L(continue_0_32): | |
741 | movdqu (%rdi), %xmm1 | |
742 | movdqu (%rsi), %xmm2 | |
743 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
744 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
745 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
746 | pmovmskb %xmm1, %edx | |
747 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
748 | jnz L(less4_double_words) | |
749 | ||
750 | add $16, %rsi | |
751 | add $16, %rdi | |
752 | jmp L(continue_16_48) | |
753 | ||
754 | .p2align 4 | |
755 | L(continue_16_32): | |
756 | movdqu (%rdi), %xmm1 | |
757 | movdqu (%rsi), %xmm2 | |
758 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ | |
759 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ | |
760 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
761 | pmovmskb %xmm1, %edx | |
762 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ | |
763 | jnz L(less4_double_words) | |
764 | ||
765 | add $16, %rsi | |
766 | add $16, %rdi | |
767 | jmp L(continue_32_48) | |
768 | ||
769 | .p2align 4 | |
770 | L(less4_double_words1): | |
771 | cmp (%rsi), %eax | |
772 | jne L(nequal) | |
773 | test %eax, %eax | |
774 | jz L(equal) | |
775 | ||
776 | mov 4(%rsi), %ecx | |
777 | cmp %ecx, 4(%rdi) | |
778 | jne L(nequal) | |
779 | test %ecx, %ecx | |
780 | jz L(equal) | |
781 | ||
782 | mov 8(%rsi), %ecx | |
783 | cmp %ecx, 8(%rdi) | |
784 | jne L(nequal) | |
785 | test %ecx, %ecx | |
786 | jz L(equal) | |
787 | ||
95584d3b LD |
788 | mov 12(%rsi), %ecx |
789 | cmp %ecx, 12(%rdi) | |
790 | jne L(nequal) | |
791 | xor %eax, %eax | |
49d42c37 UD |
792 | ret |
793 | ||
794 | .p2align 4 | |
795 | L(less4_double_words): | |
95584d3b | 796 | xor %eax, %eax |
49d42c37 UD |
797 | test %dl, %dl |
798 | jz L(next_two_double_words) | |
799 | and $15, %dl | |
800 | jz L(second_double_word) | |
801 | mov (%rdi), %eax | |
95584d3b LD |
802 | cmp (%rsi), %eax |
803 | jne L(nequal) | |
49d42c37 UD |
804 | ret |
805 | ||
806 | .p2align 4 | |
807 | L(second_double_word): | |
808 | mov 4(%rdi), %eax | |
95584d3b LD |
809 | cmp 4(%rsi), %eax |
810 | jne L(nequal) | |
49d42c37 UD |
811 | ret |
812 | ||
813 | .p2align 4 | |
814 | L(next_two_double_words): | |
815 | and $15, %dh | |
816 | jz L(fourth_double_word) | |
817 | mov 8(%rdi), %eax | |
95584d3b LD |
818 | cmp 8(%rsi), %eax |
819 | jne L(nequal) | |
49d42c37 UD |
820 | ret |
821 | ||
822 | .p2align 4 | |
823 | L(fourth_double_word): | |
824 | mov 12(%rdi), %eax | |
95584d3b LD |
825 | cmp 12(%rsi), %eax |
826 | jne L(nequal) | |
49d42c37 UD |
827 | ret |
828 | ||
829 | .p2align 4 | |
830 | L(less4_double_words_16): | |
95584d3b | 831 | xor %eax, %eax |
49d42c37 UD |
832 | test %dl, %dl |
833 | jz L(next_two_double_words_16) | |
834 | and $15, %dl | |
835 | jz L(second_double_word_16) | |
836 | mov 16(%rdi), %eax | |
95584d3b LD |
837 | cmp 16(%rsi), %eax |
838 | jne L(nequal) | |
49d42c37 UD |
839 | ret |
840 | ||
841 | .p2align 4 | |
842 | L(second_double_word_16): | |
843 | mov 20(%rdi), %eax | |
95584d3b LD |
844 | cmp 20(%rsi), %eax |
845 | jne L(nequal) | |
49d42c37 UD |
846 | ret |
847 | ||
848 | .p2align 4 | |
849 | L(next_two_double_words_16): | |
850 | and $15, %dh | |
851 | jz L(fourth_double_word_16) | |
852 | mov 24(%rdi), %eax | |
95584d3b LD |
853 | cmp 24(%rsi), %eax |
854 | jne L(nequal) | |
49d42c37 UD |
855 | ret |
856 | ||
857 | .p2align 4 | |
858 | L(fourth_double_word_16): | |
859 | mov 28(%rdi), %eax | |
95584d3b LD |
860 | cmp 28(%rsi), %eax |
861 | jne L(nequal) | |
49d42c37 UD |
862 | ret |
863 | ||
864 | .p2align 4 | |
865 | L(less4_double_words_32): | |
95584d3b | 866 | xor %eax, %eax |
49d42c37 UD |
867 | test %dl, %dl |
868 | jz L(next_two_double_words_32) | |
869 | and $15, %dl | |
870 | jz L(second_double_word_32) | |
871 | mov 32(%rdi), %eax | |
95584d3b LD |
872 | cmp 32(%rsi), %eax |
873 | jne L(nequal) | |
49d42c37 UD |
874 | ret |
875 | ||
876 | .p2align 4 | |
877 | L(second_double_word_32): | |
878 | mov 36(%rdi), %eax | |
95584d3b LD |
879 | cmp 36(%rsi), %eax |
880 | jne L(nequal) | |
49d42c37 UD |
881 | ret |
882 | ||
883 | .p2align 4 | |
884 | L(next_two_double_words_32): | |
885 | and $15, %dh | |
886 | jz L(fourth_double_word_32) | |
887 | mov 40(%rdi), %eax | |
95584d3b LD |
888 | cmp 40(%rsi), %eax |
889 | jne L(nequal) | |
49d42c37 UD |
890 | ret |
891 | ||
892 | .p2align 4 | |
893 | L(fourth_double_word_32): | |
894 | mov 44(%rdi), %eax | |
95584d3b LD |
895 | cmp 44(%rsi), %eax |
896 | jne L(nequal) | |
49d42c37 UD |
897 | ret |
898 | ||
899 | .p2align 4 | |
900 | L(less4_double_words_48): | |
95584d3b | 901 | xor %eax, %eax |
49d42c37 UD |
902 | test %dl, %dl |
903 | jz L(next_two_double_words_48) | |
904 | and $15, %dl | |
905 | jz L(second_double_word_48) | |
906 | mov 48(%rdi), %eax | |
95584d3b LD |
907 | cmp 48(%rsi), %eax |
908 | jne L(nequal) | |
49d42c37 UD |
909 | ret |
910 | ||
911 | .p2align 4 | |
912 | L(second_double_word_48): | |
913 | mov 52(%rdi), %eax | |
95584d3b LD |
914 | cmp 52(%rsi), %eax |
915 | jne L(nequal) | |
49d42c37 UD |
916 | ret |
917 | ||
918 | .p2align 4 | |
919 | L(next_two_double_words_48): | |
920 | and $15, %dh | |
921 | jz L(fourth_double_word_48) | |
922 | mov 56(%rdi), %eax | |
95584d3b LD |
923 | cmp 56(%rsi), %eax |
924 | jne L(nequal) | |
49d42c37 UD |
925 | ret |
926 | ||
927 | .p2align 4 | |
928 | L(fourth_double_word_48): | |
929 | mov 60(%rdi), %eax | |
95584d3b LD |
930 | cmp 60(%rsi), %eax |
931 | jne L(nequal) | |
49d42c37 UD |
932 | ret |
933 | ||
934 | .p2align 4 | |
935 | L(nequal): | |
936 | mov $1, %eax | |
95584d3b | 937 | jg L(nequal_bigger) |
49d42c37 UD |
938 | neg %eax |
939 | ||
940 | L(nequal_bigger): | |
941 | ret | |
942 | ||
943 | .p2align 4 | |
944 | L(equal): | |
945 | xor %rax, %rax | |
946 | ret | |
947 | ||
948 | END (wcscmp) | |
949 | libc_hidden_def (wcscmp) |