]>
Commit | Line | Data |
---|---|---|
1d3e4b61 | 1 | /* wcscpy with SSSE3 |
04277e02 | 2 | Copyright (C) 2011-2019 Free Software Foundation, Inc. |
1d3e4b61 UD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
1d3e4b61 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
1d3e4b61 UD |
21 | # include <sysdep.h> |
22 | ||
23 | # define CFI_PUSH(REG) \ | |
24 | cfi_adjust_cfa_offset (4); \ | |
25 | cfi_rel_offset (REG, 0) | |
26 | ||
27 | # define CFI_POP(REG) \ | |
28 | cfi_adjust_cfa_offset (-4); \ | |
29 | cfi_restore (REG) | |
30 | ||
31 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
32 | # define POP(REG) popl REG; CFI_POP (REG) | |
33 | ||
34 | # define PARMS 4 | |
35 | # define RETURN POP (%edi); ret; CFI_PUSH (%edi) | |
36 | # define STR1 PARMS | |
37 | # define STR2 STR1+4 | |
38 | # define LEN STR2+4 | |
39 | ||
40 | atom_text_section | |
41 | ENTRY (__wcscpy_ssse3) | |
42 | mov STR1(%esp), %edx | |
43 | mov STR2(%esp), %ecx | |
44 | ||
45 | cmp $0, (%ecx) | |
46 | jz L(ExitTail4) | |
47 | cmp $0, 4(%ecx) | |
48 | jz L(ExitTail8) | |
49 | cmp $0, 8(%ecx) | |
50 | jz L(ExitTail12) | |
51 | cmp $0, 12(%ecx) | |
52 | jz L(ExitTail16) | |
53 | ||
54 | PUSH (%edi) | |
55 | mov %edx, %edi | |
1d3e4b61 UD |
56 | PUSH (%esi) |
57 | lea 16(%ecx), %esi | |
58 | ||
59 | and $-16, %esi | |
60 | ||
61 | pxor %xmm0, %xmm0 | |
62 | pcmpeqd (%esi), %xmm0 | |
63 | movdqu (%ecx), %xmm1 | |
64 | movdqu %xmm1, (%edx) | |
65 | ||
66 | pmovmskb %xmm0, %eax | |
67 | sub %ecx, %esi | |
68 | ||
69 | test %eax, %eax | |
70 | jnz L(CopyFrom1To16Bytes) | |
71 | ||
72 | mov %edx, %eax | |
73 | lea 16(%edx), %edx | |
74 | and $-16, %edx | |
75 | sub %edx, %eax | |
76 | ||
77 | sub %eax, %ecx | |
78 | mov %ecx, %eax | |
79 | and $0xf, %eax | |
80 | mov $0, %esi | |
81 | ||
82 | jz L(Align16Both) | |
83 | cmp $4, %eax | |
84 | je L(Shl4) | |
85 | cmp $8, %eax | |
86 | je L(Shl8) | |
87 | jmp L(Shl12) | |
88 | ||
89 | L(Align16Both): | |
90 | movaps (%ecx), %xmm1 | |
91 | movaps 16(%ecx), %xmm2 | |
92 | movaps %xmm1, (%edx) | |
93 | pcmpeqd %xmm2, %xmm0 | |
94 | pmovmskb %xmm0, %eax | |
95 | lea 16(%esi), %esi | |
96 | ||
97 | test %eax, %eax | |
98 | jnz L(CopyFrom1To16Bytes) | |
99 | ||
100 | movaps 16(%ecx, %esi), %xmm3 | |
101 | movaps %xmm2, (%edx, %esi) | |
102 | pcmpeqd %xmm3, %xmm0 | |
103 | pmovmskb %xmm0, %eax | |
104 | lea 16(%esi), %esi | |
105 | ||
106 | test %eax, %eax | |
107 | jnz L(CopyFrom1To16Bytes) | |
108 | ||
109 | movaps 16(%ecx, %esi), %xmm4 | |
110 | movaps %xmm3, (%edx, %esi) | |
111 | pcmpeqd %xmm4, %xmm0 | |
112 | pmovmskb %xmm0, %eax | |
113 | lea 16(%esi), %esi | |
114 | ||
115 | test %eax, %eax | |
116 | jnz L(CopyFrom1To16Bytes) | |
117 | ||
118 | movaps 16(%ecx, %esi), %xmm1 | |
119 | movaps %xmm4, (%edx, %esi) | |
120 | pcmpeqd %xmm1, %xmm0 | |
121 | pmovmskb %xmm0, %eax | |
122 | lea 16(%esi), %esi | |
123 | ||
124 | test %eax, %eax | |
125 | jnz L(CopyFrom1To16Bytes) | |
126 | ||
127 | movaps 16(%ecx, %esi), %xmm2 | |
128 | movaps %xmm1, (%edx, %esi) | |
129 | pcmpeqd %xmm2, %xmm0 | |
130 | pmovmskb %xmm0, %eax | |
131 | lea 16(%esi), %esi | |
132 | ||
133 | test %eax, %eax | |
134 | jnz L(CopyFrom1To16Bytes) | |
135 | ||
136 | movaps 16(%ecx, %esi), %xmm3 | |
137 | movaps %xmm2, (%edx, %esi) | |
138 | pcmpeqd %xmm3, %xmm0 | |
139 | pmovmskb %xmm0, %eax | |
140 | lea 16(%esi), %esi | |
141 | ||
142 | test %eax, %eax | |
143 | jnz L(CopyFrom1To16Bytes) | |
144 | ||
145 | movaps %xmm3, (%edx, %esi) | |
146 | mov %ecx, %eax | |
147 | lea 16(%ecx, %esi), %ecx | |
148 | and $-0x40, %ecx | |
149 | sub %ecx, %eax | |
150 | sub %eax, %edx | |
151 | ||
152 | mov $-0x40, %esi | |
153 | ||
154 | L(Aligned64Loop): | |
155 | movaps (%ecx), %xmm2 | |
156 | movaps 32(%ecx), %xmm3 | |
157 | movaps %xmm2, %xmm4 | |
158 | movaps 16(%ecx), %xmm5 | |
159 | movaps %xmm3, %xmm6 | |
160 | movaps 48(%ecx), %xmm7 | |
161 | pminub %xmm5, %xmm2 | |
162 | pminub %xmm7, %xmm3 | |
163 | pminub %xmm2, %xmm3 | |
164 | lea 64(%edx), %edx | |
165 | pcmpeqd %xmm0, %xmm3 | |
166 | lea 64(%ecx), %ecx | |
167 | pmovmskb %xmm3, %eax | |
168 | ||
169 | test %eax, %eax | |
170 | jnz L(Aligned64Leave) | |
171 | movaps %xmm4, -64(%edx) | |
172 | movaps %xmm5, -48(%edx) | |
173 | movaps %xmm6, -32(%edx) | |
174 | movaps %xmm7, -16(%edx) | |
175 | jmp L(Aligned64Loop) | |
176 | ||
177 | L(Aligned64Leave): | |
178 | pcmpeqd %xmm4, %xmm0 | |
179 | pmovmskb %xmm0, %eax | |
180 | test %eax, %eax | |
181 | jnz L(CopyFrom1To16Bytes) | |
182 | ||
183 | pcmpeqd %xmm5, %xmm0 | |
184 | pmovmskb %xmm0, %eax | |
185 | movaps %xmm4, -64(%edx) | |
186 | test %eax, %eax | |
187 | lea 16(%esi), %esi | |
188 | jnz L(CopyFrom1To16Bytes) | |
189 | ||
190 | pcmpeqd %xmm6, %xmm0 | |
191 | pmovmskb %xmm0, %eax | |
192 | movaps %xmm5, -48(%edx) | |
193 | test %eax, %eax | |
194 | lea 16(%esi), %esi | |
195 | jnz L(CopyFrom1To16Bytes) | |
196 | ||
197 | movaps %xmm6, -32(%edx) | |
198 | pcmpeqd %xmm7, %xmm0 | |
199 | pmovmskb %xmm0, %eax | |
200 | test %eax, %eax | |
201 | lea 16(%esi), %esi | |
202 | jnz L(CopyFrom1To16Bytes) | |
203 | ||
204 | mov $-0x40, %esi | |
205 | movaps %xmm7, -16(%edx) | |
206 | jmp L(Aligned64Loop) | |
207 | ||
208 | .p2align 4 | |
209 | L(Shl4): | |
210 | movaps -4(%ecx), %xmm1 | |
211 | movaps 12(%ecx), %xmm2 | |
212 | L(Shl4Start): | |
213 | pcmpeqd %xmm2, %xmm0 | |
214 | pmovmskb %xmm0, %eax | |
215 | movaps %xmm2, %xmm3 | |
216 | ||
217 | test %eax, %eax | |
218 | jnz L(Shl4LoopExit) | |
219 | ||
220 | palignr $4, %xmm1, %xmm2 | |
1d3e4b61 UD |
221 | movaps %xmm2, (%edx) |
222 | movaps 28(%ecx), %xmm2 | |
223 | ||
224 | pcmpeqd %xmm2, %xmm0 | |
225 | lea 16(%edx), %edx | |
226 | pmovmskb %xmm0, %eax | |
227 | lea 16(%ecx), %ecx | |
c044cf14 | 228 | movaps %xmm2, %xmm1 |
1d3e4b61 UD |
229 | |
230 | test %eax, %eax | |
231 | jnz L(Shl4LoopExit) | |
232 | ||
c044cf14 | 233 | palignr $4, %xmm3, %xmm2 |
1d3e4b61 UD |
234 | movaps %xmm2, (%edx) |
235 | movaps 28(%ecx), %xmm2 | |
1d3e4b61 UD |
236 | |
237 | pcmpeqd %xmm2, %xmm0 | |
238 | lea 16(%edx), %edx | |
239 | pmovmskb %xmm0, %eax | |
240 | lea 16(%ecx), %ecx | |
241 | movaps %xmm2, %xmm3 | |
242 | ||
243 | test %eax, %eax | |
244 | jnz L(Shl4LoopExit) | |
245 | ||
246 | palignr $4, %xmm1, %xmm2 | |
1d3e4b61 UD |
247 | movaps %xmm2, (%edx) |
248 | movaps 28(%ecx), %xmm2 | |
249 | ||
250 | pcmpeqd %xmm2, %xmm0 | |
251 | lea 16(%edx), %edx | |
252 | pmovmskb %xmm0, %eax | |
253 | lea 16(%ecx), %ecx | |
1d3e4b61 UD |
254 | |
255 | test %eax, %eax | |
256 | jnz L(Shl4LoopExit) | |
257 | ||
c044cf14 | 258 | palignr $4, %xmm3, %xmm2 |
1d3e4b61 UD |
259 | movaps %xmm2, (%edx) |
260 | lea 28(%ecx), %ecx | |
261 | lea 16(%edx), %edx | |
262 | ||
263 | mov %ecx, %eax | |
264 | and $-0x40, %ecx | |
265 | sub %ecx, %eax | |
266 | lea -12(%ecx), %ecx | |
267 | sub %eax, %edx | |
268 | ||
269 | movaps -4(%ecx), %xmm1 | |
270 | ||
271 | L(Shl4LoopStart): | |
272 | movaps 12(%ecx), %xmm2 | |
273 | movaps 28(%ecx), %xmm3 | |
274 | movaps %xmm3, %xmm6 | |
275 | movaps 44(%ecx), %xmm4 | |
276 | movaps %xmm4, %xmm7 | |
277 | movaps 60(%ecx), %xmm5 | |
278 | pminub %xmm2, %xmm6 | |
279 | pminub %xmm5, %xmm7 | |
280 | pminub %xmm6, %xmm7 | |
281 | pcmpeqd %xmm0, %xmm7 | |
282 | pmovmskb %xmm7, %eax | |
283 | movaps %xmm5, %xmm7 | |
284 | palignr $4, %xmm4, %xmm5 | |
285 | test %eax, %eax | |
286 | palignr $4, %xmm3, %xmm4 | |
287 | jnz L(Shl4Start) | |
288 | ||
289 | palignr $4, %xmm2, %xmm3 | |
290 | lea 64(%ecx), %ecx | |
291 | palignr $4, %xmm1, %xmm2 | |
292 | movaps %xmm7, %xmm1 | |
293 | movaps %xmm5, 48(%edx) | |
294 | movaps %xmm4, 32(%edx) | |
295 | movaps %xmm3, 16(%edx) | |
296 | movaps %xmm2, (%edx) | |
297 | lea 64(%edx), %edx | |
298 | jmp L(Shl4LoopStart) | |
299 | ||
300 | L(Shl4LoopExit): | |
c044cf14 LD |
301 | movlpd (%ecx), %xmm0 |
302 | movl 8(%ecx), %esi | |
303 | movlpd %xmm0, (%edx) | |
304 | movl %esi, 8(%edx) | |
305 | POP (%esi) | |
1d3e4b61 UD |
306 | add $12, %edx |
307 | add $12, %ecx | |
1d3e4b61 UD |
308 | test %al, %al |
309 | jz L(ExitHigh) | |
310 | test $0x01, %al | |
311 | jnz L(Exit4) | |
312 | movlpd (%ecx), %xmm0 | |
313 | movlpd %xmm0, (%edx) | |
314 | movl %edi, %eax | |
315 | RETURN | |
316 | ||
317 | CFI_PUSH (%esi) | |
318 | ||
319 | .p2align 4 | |
320 | L(Shl8): | |
321 | movaps -8(%ecx), %xmm1 | |
322 | movaps 8(%ecx), %xmm2 | |
323 | L(Shl8Start): | |
324 | pcmpeqd %xmm2, %xmm0 | |
325 | pmovmskb %xmm0, %eax | |
326 | movaps %xmm2, %xmm3 | |
327 | ||
328 | test %eax, %eax | |
329 | jnz L(Shl8LoopExit) | |
330 | ||
331 | palignr $8, %xmm1, %xmm2 | |
1d3e4b61 UD |
332 | movaps %xmm2, (%edx) |
333 | movaps 24(%ecx), %xmm2 | |
334 | ||
335 | pcmpeqd %xmm2, %xmm0 | |
336 | lea 16(%edx), %edx | |
337 | pmovmskb %xmm0, %eax | |
338 | lea 16(%ecx), %ecx | |
c044cf14 | 339 | movaps %xmm2, %xmm1 |
1d3e4b61 UD |
340 | |
341 | test %eax, %eax | |
342 | jnz L(Shl8LoopExit) | |
343 | ||
c044cf14 | 344 | palignr $8, %xmm3, %xmm2 |
1d3e4b61 UD |
345 | movaps %xmm2, (%edx) |
346 | movaps 24(%ecx), %xmm2 | |
1d3e4b61 UD |
347 | |
348 | pcmpeqd %xmm2, %xmm0 | |
349 | lea 16(%edx), %edx | |
350 | pmovmskb %xmm0, %eax | |
351 | lea 16(%ecx), %ecx | |
352 | movaps %xmm2, %xmm3 | |
353 | ||
354 | test %eax, %eax | |
355 | jnz L(Shl8LoopExit) | |
356 | ||
357 | palignr $8, %xmm1, %xmm2 | |
1d3e4b61 UD |
358 | movaps %xmm2, (%edx) |
359 | movaps 24(%ecx), %xmm2 | |
360 | ||
361 | pcmpeqd %xmm2, %xmm0 | |
362 | lea 16(%edx), %edx | |
363 | pmovmskb %xmm0, %eax | |
364 | lea 16(%ecx), %ecx | |
1d3e4b61 UD |
365 | |
366 | test %eax, %eax | |
367 | jnz L(Shl8LoopExit) | |
368 | ||
c044cf14 | 369 | palignr $8, %xmm3, %xmm2 |
1d3e4b61 UD |
370 | movaps %xmm2, (%edx) |
371 | lea 24(%ecx), %ecx | |
372 | lea 16(%edx), %edx | |
373 | ||
374 | mov %ecx, %eax | |
375 | and $-0x40, %ecx | |
376 | sub %ecx, %eax | |
377 | lea -8(%ecx), %ecx | |
378 | sub %eax, %edx | |
379 | ||
380 | movaps -8(%ecx), %xmm1 | |
381 | ||
382 | L(Shl8LoopStart): | |
383 | movaps 8(%ecx), %xmm2 | |
384 | movaps 24(%ecx), %xmm3 | |
385 | movaps %xmm3, %xmm6 | |
386 | movaps 40(%ecx), %xmm4 | |
387 | movaps %xmm4, %xmm7 | |
388 | movaps 56(%ecx), %xmm5 | |
389 | pminub %xmm2, %xmm6 | |
390 | pminub %xmm5, %xmm7 | |
391 | pminub %xmm6, %xmm7 | |
392 | pcmpeqd %xmm0, %xmm7 | |
393 | pmovmskb %xmm7, %eax | |
394 | movaps %xmm5, %xmm7 | |
395 | palignr $8, %xmm4, %xmm5 | |
396 | test %eax, %eax | |
397 | palignr $8, %xmm3, %xmm4 | |
398 | jnz L(Shl8Start) | |
399 | ||
400 | palignr $8, %xmm2, %xmm3 | |
401 | lea 64(%ecx), %ecx | |
402 | palignr $8, %xmm1, %xmm2 | |
403 | movaps %xmm7, %xmm1 | |
404 | movaps %xmm5, 48(%edx) | |
405 | movaps %xmm4, 32(%edx) | |
406 | movaps %xmm3, 16(%edx) | |
407 | movaps %xmm2, (%edx) | |
408 | lea 64(%edx), %edx | |
409 | jmp L(Shl8LoopStart) | |
410 | ||
411 | L(Shl8LoopExit): | |
c044cf14 LD |
412 | movlpd (%ecx), %xmm0 |
413 | movlpd %xmm0, (%edx) | |
414 | POP (%esi) | |
1d3e4b61 UD |
415 | add $8, %edx |
416 | add $8, %ecx | |
1d3e4b61 UD |
417 | test %al, %al |
418 | jz L(ExitHigh) | |
419 | test $0x01, %al | |
420 | jnz L(Exit4) | |
421 | movlpd (%ecx), %xmm0 | |
422 | movlpd %xmm0, (%edx) | |
423 | movl %edi, %eax | |
424 | RETURN | |
425 | ||
426 | CFI_PUSH (%esi) | |
427 | ||
428 | .p2align 4 | |
429 | L(Shl12): | |
430 | movaps -12(%ecx), %xmm1 | |
431 | movaps 4(%ecx), %xmm2 | |
432 | L(Shl12Start): | |
433 | pcmpeqd %xmm2, %xmm0 | |
434 | pmovmskb %xmm0, %eax | |
435 | movaps %xmm2, %xmm3 | |
436 | ||
437 | test %eax, %eax | |
438 | jnz L(Shl12LoopExit) | |
439 | ||
440 | palignr $12, %xmm1, %xmm2 | |
1d3e4b61 UD |
441 | movaps %xmm2, (%edx) |
442 | movaps 20(%ecx), %xmm2 | |
443 | ||
444 | pcmpeqd %xmm2, %xmm0 | |
445 | lea 16(%edx), %edx | |
446 | pmovmskb %xmm0, %eax | |
447 | lea 16(%ecx), %ecx | |
c044cf14 | 448 | movaps %xmm2, %xmm1 |
1d3e4b61 UD |
449 | |
450 | test %eax, %eax | |
451 | jnz L(Shl12LoopExit) | |
452 | ||
c044cf14 | 453 | palignr $12, %xmm3, %xmm2 |
1d3e4b61 UD |
454 | movaps %xmm2, (%edx) |
455 | movaps 20(%ecx), %xmm2 | |
1d3e4b61 UD |
456 | |
457 | pcmpeqd %xmm2, %xmm0 | |
458 | lea 16(%edx), %edx | |
459 | pmovmskb %xmm0, %eax | |
460 | lea 16(%ecx), %ecx | |
461 | movaps %xmm2, %xmm3 | |
462 | ||
463 | test %eax, %eax | |
464 | jnz L(Shl12LoopExit) | |
465 | ||
466 | palignr $12, %xmm1, %xmm2 | |
1d3e4b61 UD |
467 | movaps %xmm2, (%edx) |
468 | movaps 20(%ecx), %xmm2 | |
469 | ||
470 | pcmpeqd %xmm2, %xmm0 | |
471 | lea 16(%edx), %edx | |
472 | pmovmskb %xmm0, %eax | |
473 | lea 16(%ecx), %ecx | |
1d3e4b61 UD |
474 | |
475 | test %eax, %eax | |
476 | jnz L(Shl12LoopExit) | |
477 | ||
c044cf14 | 478 | palignr $12, %xmm3, %xmm2 |
1d3e4b61 UD |
479 | movaps %xmm2, (%edx) |
480 | lea 20(%ecx), %ecx | |
481 | lea 16(%edx), %edx | |
482 | ||
483 | mov %ecx, %eax | |
484 | and $-0x40, %ecx | |
485 | sub %ecx, %eax | |
486 | lea -4(%ecx), %ecx | |
487 | sub %eax, %edx | |
488 | ||
489 | movaps -12(%ecx), %xmm1 | |
490 | ||
491 | L(Shl12LoopStart): | |
492 | movaps 4(%ecx), %xmm2 | |
493 | movaps 20(%ecx), %xmm3 | |
494 | movaps %xmm3, %xmm6 | |
495 | movaps 36(%ecx), %xmm4 | |
496 | movaps %xmm4, %xmm7 | |
497 | movaps 52(%ecx), %xmm5 | |
498 | pminub %xmm2, %xmm6 | |
499 | pminub %xmm5, %xmm7 | |
500 | pminub %xmm6, %xmm7 | |
501 | pcmpeqd %xmm0, %xmm7 | |
502 | pmovmskb %xmm7, %eax | |
503 | movaps %xmm5, %xmm7 | |
504 | palignr $12, %xmm4, %xmm5 | |
505 | test %eax, %eax | |
506 | palignr $12, %xmm3, %xmm4 | |
507 | jnz L(Shl12Start) | |
508 | ||
509 | palignr $12, %xmm2, %xmm3 | |
510 | lea 64(%ecx), %ecx | |
511 | palignr $12, %xmm1, %xmm2 | |
512 | movaps %xmm7, %xmm1 | |
513 | movaps %xmm5, 48(%edx) | |
514 | movaps %xmm4, 32(%edx) | |
515 | movaps %xmm3, 16(%edx) | |
516 | movaps %xmm2, (%edx) | |
517 | lea 64(%edx), %edx | |
518 | jmp L(Shl12LoopStart) | |
519 | ||
520 | L(Shl12LoopExit): | |
c044cf14 LD |
521 | movl (%ecx), %esi |
522 | movl %esi, (%edx) | |
1d3e4b61 | 523 | mov $4, %esi |
1d3e4b61 UD |
524 | |
525 | .p2align 4 | |
526 | L(CopyFrom1To16Bytes): | |
527 | add %esi, %edx | |
528 | add %esi, %ecx | |
529 | ||
530 | POP (%esi) | |
531 | test %al, %al | |
532 | jz L(ExitHigh) | |
533 | test $0x01, %al | |
534 | jnz L(Exit4) | |
c044cf14 | 535 | L(Exit8): |
1d3e4b61 UD |
536 | movlpd (%ecx), %xmm0 |
537 | movlpd %xmm0, (%edx) | |
538 | movl %edi, %eax | |
539 | RETURN | |
540 | ||
541 | .p2align 4 | |
542 | L(ExitHigh): | |
543 | test $0x01, %ah | |
544 | jnz L(Exit12) | |
c044cf14 | 545 | L(Exit16): |
1d3e4b61 UD |
546 | movdqu (%ecx), %xmm0 |
547 | movdqu %xmm0, (%edx) | |
548 | movl %edi, %eax | |
549 | RETURN | |
550 | ||
551 | .p2align 4 | |
552 | L(Exit4): | |
553 | movl (%ecx), %eax | |
554 | movl %eax, (%edx) | |
555 | movl %edi, %eax | |
556 | RETURN | |
557 | ||
558 | .p2align 4 | |
559 | L(Exit12): | |
560 | movlpd (%ecx), %xmm0 | |
561 | movlpd %xmm0, (%edx) | |
562 | movl 8(%ecx), %eax | |
563 | movl %eax, 8(%edx) | |
564 | movl %edi, %eax | |
565 | RETURN | |
566 | ||
567 | CFI_POP (%edi) | |
568 | ||
569 | .p2align 4 | |
570 | L(ExitTail4): | |
571 | movl (%ecx), %eax | |
572 | movl %eax, (%edx) | |
573 | movl %edx, %eax | |
574 | ret | |
575 | ||
576 | .p2align 4 | |
577 | L(ExitTail8): | |
578 | movlpd (%ecx), %xmm0 | |
579 | movlpd %xmm0, (%edx) | |
580 | movl %edx, %eax | |
581 | ret | |
582 | ||
583 | .p2align 4 | |
584 | L(ExitTail12): | |
585 | movlpd (%ecx), %xmm0 | |
586 | movlpd %xmm0, (%edx) | |
587 | movl 8(%ecx), %eax | |
588 | movl %eax, 8(%edx) | |
589 | movl %edx, %eax | |
590 | ret | |
591 | ||
592 | .p2align 4 | |
593 | L(ExitTail16): | |
594 | movdqu (%ecx), %xmm0 | |
595 | movdqu %xmm0, (%edx) | |
596 | movl %edx, %eax | |
597 | ret | |
598 | ||
599 | END (__wcscpy_ssse3) | |
600 | #endif |