]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strcpy-sse2.S
640fd06422593c40204254c618ee32ef4f9cfa6b
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strcpy-sse2.S
1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20
21 #if IS_IN (libc)
22
23 # include <sysdep.h>
24
25
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
29
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
32 cfi_restore (REG)
33
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
36
37 # ifndef STRCPY
38 # define STRCPY __strcpy_sse2
39 # endif
40
41 # define STR1 PARMS
42 # define STR2 STR1+4
43 # define LEN STR2+4
44
45 # ifdef USE_AS_STRNCPY
46 # define PARMS 16
47 # define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
48 # define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
49 CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
50
51 # ifdef SHARED
52 # define JMPTBL(I, B) I - B
53
54 /* Load an entry in a jump table into ECX and branch to it. TABLE is a
55 jump table with relative offsets.
56 INDEX is a register contains the index into the jump table.
57 SCALE is the scale of INDEX. */
58
59 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
60 /* We first load PC into ECX. */ \
61 SETUP_PIC_REG(cx); \
62 /* Get the address of the jump table. */ \
63 addl $(TABLE - .), %ecx; \
64 /* Get the entry and convert the relative offset to the \
65 absolute address. */ \
66 addl (%ecx,INDEX,SCALE), %ecx; \
67 /* We loaded the jump table and adjusted ECX. Go. */ \
68 jmp *%ecx
69 # else
70 # define JMPTBL(I, B) I
71
72 /* Branch to an entry in a jump table. TABLE is a jump table with
73 absolute offsets. INDEX is a register contains the index into the
74 jump table. SCALE is the scale of INDEX. */
75
76 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
77 jmp *TABLE(,INDEX,SCALE)
78 # endif
79
80 .text
81 ENTRY (STRCPY)
82 ENTRANCE
83 mov STR1(%esp), %edi
84 mov STR2(%esp), %esi
85 movl LEN(%esp), %ebx
86 test %ebx, %ebx
87 jz L(ExitZero)
88
89 mov %esi, %ecx
90 # ifndef USE_AS_STPCPY
91 mov %edi, %eax /* save result */
92 # endif
93 and $15, %ecx
94 jz L(SourceStringAlignmentZero)
95
96 and $-16, %esi
97 pxor %xmm0, %xmm0
98 pxor %xmm1, %xmm1
99
100 pcmpeqb (%esi), %xmm1
101 add %ecx, %ebx
102 pmovmskb %xmm1, %edx
103 shr %cl, %edx
104 # ifdef USE_AS_STPCPY
105 cmp $16, %ebx
106 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
107 # else
108 cmp $17, %ebx
109 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
110 # endif
111 test %edx, %edx
112 jnz L(CopyFrom1To16BytesTail)
113
114 pcmpeqb 16(%esi), %xmm0
115 pmovmskb %xmm0, %edx
116 # ifdef USE_AS_STPCPY
117 cmp $32, %ebx
118 jbe L(CopyFrom1To32BytesCase2OrCase3)
119 # else
120 cmp $33, %ebx
121 jbe L(CopyFrom1To32BytesCase2OrCase3)
122 # endif
123 test %edx, %edx
124 jnz L(CopyFrom1To32Bytes)
125
126 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
127 movdqu %xmm1, (%edi)
128
129 sub %ecx, %edi
130
131 /* If source address alignment != destination address alignment */
132 .p2align 4
133 L(Unalign16Both):
134 mov $16, %ecx
135 movdqa (%esi, %ecx), %xmm1
136 movaps 16(%esi, %ecx), %xmm2
137 movdqu %xmm1, (%edi, %ecx)
138 pcmpeqb %xmm2, %xmm0
139 pmovmskb %xmm0, %edx
140 add $16, %ecx
141 sub $48, %ebx
142 jbe L(CopyFrom1To16BytesCase2OrCase3)
143 test %edx, %edx
144 jnz L(CopyFrom1To16BytesUnalignedXmm2)
145
146 movaps 16(%esi, %ecx), %xmm3
147 movdqu %xmm2, (%edi, %ecx)
148 pcmpeqb %xmm3, %xmm0
149 pmovmskb %xmm0, %edx
150 add $16, %ecx
151 sub $16, %ebx
152 jbe L(CopyFrom1To16BytesCase2OrCase3)
153 test %edx, %edx
154 jnz L(CopyFrom1To16BytesUnalignedXmm3)
155
156 movaps 16(%esi, %ecx), %xmm4
157 movdqu %xmm3, (%edi, %ecx)
158 pcmpeqb %xmm4, %xmm0
159 pmovmskb %xmm0, %edx
160 add $16, %ecx
161 sub $16, %ebx
162 jbe L(CopyFrom1To16BytesCase2OrCase3)
163 test %edx, %edx
164 jnz L(CopyFrom1To16BytesUnalignedXmm4)
165
166 movaps 16(%esi, %ecx), %xmm1
167 movdqu %xmm4, (%edi, %ecx)
168 pcmpeqb %xmm1, %xmm0
169 pmovmskb %xmm0, %edx
170 add $16, %ecx
171 sub $16, %ebx
172 jbe L(CopyFrom1To16BytesCase2OrCase3)
173 test %edx, %edx
174 jnz L(CopyFrom1To16BytesUnalignedXmm1)
175
176 movaps 16(%esi, %ecx), %xmm2
177 movdqu %xmm1, (%edi, %ecx)
178 pcmpeqb %xmm2, %xmm0
179 pmovmskb %xmm0, %edx
180 add $16, %ecx
181 sub $16, %ebx
182 jbe L(CopyFrom1To16BytesCase2OrCase3)
183 test %edx, %edx
184 jnz L(CopyFrom1To16BytesUnalignedXmm2)
185
186 movaps 16(%esi, %ecx), %xmm3
187 movdqu %xmm2, (%edi, %ecx)
188 pcmpeqb %xmm3, %xmm0
189 pmovmskb %xmm0, %edx
190 add $16, %ecx
191 sub $16, %ebx
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
193 test %edx, %edx
194 jnz L(CopyFrom1To16BytesUnalignedXmm3)
195
196 movdqu %xmm3, (%edi, %ecx)
197 mov %esi, %edx
198 lea 16(%esi, %ecx), %esi
199 and $-0x40, %esi
200 sub %esi, %edx
201 sub %edx, %edi
202 lea 128(%ebx, %edx), %ebx
203
204 L(Unaligned64Loop):
205 movaps (%esi), %xmm2
206 movaps %xmm2, %xmm4
207 movaps 16(%esi), %xmm5
208 movaps 32(%esi), %xmm3
209 movaps %xmm3, %xmm6
210 movaps 48(%esi), %xmm7
211 pminub %xmm5, %xmm2
212 pminub %xmm7, %xmm3
213 pminub %xmm2, %xmm3
214 pcmpeqb %xmm0, %xmm3
215 pmovmskb %xmm3, %edx
216 sub $64, %ebx
217 jbe L(UnalignedLeaveCase2OrCase3)
218 test %edx, %edx
219 jnz L(Unaligned64Leave)
220 L(Unaligned64Loop_start):
221 add $64, %edi
222 add $64, %esi
223 movdqu %xmm4, -64(%edi)
224 movaps (%esi), %xmm2
225 movdqa %xmm2, %xmm4
226 movdqu %xmm5, -48(%edi)
227 movaps 16(%esi), %xmm5
228 pminub %xmm5, %xmm2
229 movaps 32(%esi), %xmm3
230 movdqu %xmm6, -32(%edi)
231 movaps %xmm3, %xmm6
232 movdqu %xmm7, -16(%edi)
233 movaps 48(%esi), %xmm7
234 pminub %xmm7, %xmm3
235 pminub %xmm2, %xmm3
236 pcmpeqb %xmm0, %xmm3
237 pmovmskb %xmm3, %edx
238 sub $64, %ebx
239 jbe L(UnalignedLeaveCase2OrCase3)
240 test %edx, %edx
241 jz L(Unaligned64Loop_start)
242 L(Unaligned64Leave):
243 pxor %xmm1, %xmm1
244
245 pcmpeqb %xmm4, %xmm0
246 pcmpeqb %xmm5, %xmm1
247 pmovmskb %xmm0, %edx
248 pmovmskb %xmm1, %ecx
249 test %edx, %edx
250 jnz L(CopyFrom1To16BytesUnaligned_0)
251 test %ecx, %ecx
252 jnz L(CopyFrom1To16BytesUnaligned_16)
253
254 pcmpeqb %xmm6, %xmm0
255 pcmpeqb %xmm7, %xmm1
256 pmovmskb %xmm0, %edx
257 pmovmskb %xmm1, %ecx
258 test %edx, %edx
259 jnz L(CopyFrom1To16BytesUnaligned_32)
260
261 bsf %ecx, %edx
262 movdqu %xmm4, (%edi)
263 movdqu %xmm5, 16(%edi)
264 movdqu %xmm6, 32(%edi)
265 # ifdef USE_AS_STPCPY
266 lea 48(%edi, %edx), %eax
267 # endif
268 movdqu %xmm7, 48(%edi)
269 add $15, %ebx
270 sub %edx, %ebx
271 lea 49(%edi, %edx), %edi
272 jmp L(StrncpyFillTailWithZero)
273
274 /* If source address alignment == destination address alignment */
275
276 L(SourceStringAlignmentZero):
277 pxor %xmm0, %xmm0
278 movdqa (%esi), %xmm1
279 pcmpeqb %xmm1, %xmm0
280 pmovmskb %xmm0, %edx
281 # ifdef USE_AS_STPCPY
282 cmp $16, %ebx
283 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
284 # else
285 cmp $17, %ebx
286 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
287 # endif
288 test %edx, %edx
289 jnz L(CopyFrom1To16BytesTail1)
290
291 pcmpeqb 16(%esi), %xmm0
292 movdqu %xmm1, (%edi)
293 pmovmskb %xmm0, %edx
294 # ifdef USE_AS_STPCPY
295 cmp $32, %ebx
296 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
297 # else
298 cmp $33, %ebx
299 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
300 # endif
301 test %edx, %edx
302 jnz L(CopyFrom1To32Bytes1)
303
304 jmp L(Unalign16Both)
305
306 /*-----------------End of main part---------------------------*/
307
308 /* Case1 */
309 .p2align 4
310 L(CopyFrom1To16BytesTail):
311 sub %ecx, %ebx
312 add %ecx, %esi
313 bsf %edx, %edx
314 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
315
316 .p2align 4
317 L(CopyFrom1To32Bytes1):
318 add $16, %esi
319 add $16, %edi
320 sub $16, %ebx
321 L(CopyFrom1To16BytesTail1):
322 bsf %edx, %edx
323 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
324
325 .p2align 4
326 L(CopyFrom1To32Bytes):
327 sub %ecx, %ebx
328 bsf %edx, %edx
329 add %ecx, %esi
330 add $16, %edx
331 sub %ecx, %edx
332 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
333
334 .p2align 4
335 L(CopyFrom1To16BytesUnaligned_0):
336 bsf %edx, %edx
337 # ifdef USE_AS_STPCPY
338 lea (%edi, %edx), %eax
339 # endif
340 movdqu %xmm4, (%edi)
341 add $63, %ebx
342 sub %edx, %ebx
343 lea 1(%edi, %edx), %edi
344 jmp L(StrncpyFillTailWithZero)
345
346 .p2align 4
347 L(CopyFrom1To16BytesUnaligned_16):
348 bsf %ecx, %edx
349 movdqu %xmm4, (%edi)
350 # ifdef USE_AS_STPCPY
351 lea 16(%edi, %edx), %eax
352 # endif
353 movdqu %xmm5, 16(%edi)
354 add $47, %ebx
355 sub %edx, %ebx
356 lea 17(%edi, %edx), %edi
357 jmp L(StrncpyFillTailWithZero)
358
359 .p2align 4
360 L(CopyFrom1To16BytesUnaligned_32):
361 bsf %edx, %edx
362 movdqu %xmm4, (%edi)
363 movdqu %xmm5, 16(%edi)
364 # ifdef USE_AS_STPCPY
365 lea 32(%edi, %edx), %eax
366 # endif
367 movdqu %xmm6, 32(%edi)
368 add $31, %ebx
369 sub %edx, %ebx
370 lea 33(%edi, %edx), %edi
371 jmp L(StrncpyFillTailWithZero)
372
373 .p2align 4
374 L(CopyFrom1To16BytesUnalignedXmm6):
375 movdqu %xmm6, (%edi, %ecx)
376 jmp L(CopyFrom1To16BytesXmmExit)
377
378 .p2align 4
379 L(CopyFrom1To16BytesUnalignedXmm5):
380 movdqu %xmm5, (%edi, %ecx)
381 jmp L(CopyFrom1To16BytesXmmExit)
382
383 .p2align 4
384 L(CopyFrom1To16BytesUnalignedXmm4):
385 movdqu %xmm4, (%edi, %ecx)
386 jmp L(CopyFrom1To16BytesXmmExit)
387
388 .p2align 4
389 L(CopyFrom1To16BytesUnalignedXmm3):
390 movdqu %xmm3, (%edi, %ecx)
391 jmp L(CopyFrom1To16BytesXmmExit)
392
393 .p2align 4
394 L(CopyFrom1To16BytesUnalignedXmm1):
395 movdqu %xmm1, (%edi, %ecx)
396 jmp L(CopyFrom1To16BytesXmmExit)
397
398 .p2align 4
399 L(CopyFrom1To16BytesExit):
400 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
401
402 /* Case2 */
403
404 .p2align 4
405 L(CopyFrom1To16BytesCase2):
406 add $16, %ebx
407 add %ecx, %edi
408 add %ecx, %esi
409 bsf %edx, %edx
410 cmp %ebx, %edx
411 jb L(CopyFrom1To16BytesExit)
412 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
413
414 .p2align 4
415 L(CopyFrom1To32BytesCase2):
416 sub %ecx, %ebx
417 add %ecx, %esi
418 bsf %edx, %edx
419 add $16, %edx
420 sub %ecx, %edx
421 cmp %ebx, %edx
422 jb L(CopyFrom1To16BytesExit)
423 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
424
425 L(CopyFrom1To16BytesTailCase2):
426 sub %ecx, %ebx
427 add %ecx, %esi
428 bsf %edx, %edx
429 cmp %ebx, %edx
430 jb L(CopyFrom1To16BytesExit)
431 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
432
433 L(CopyFrom1To16BytesTail1Case2):
434 bsf %edx, %edx
435 cmp %ebx, %edx
436 jb L(CopyFrom1To16BytesExit)
437 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
438
439 /* Case2 or Case3, Case3 */
440
441 .p2align 4
442 L(CopyFrom1To16BytesCase2OrCase3):
443 test %edx, %edx
444 jnz L(CopyFrom1To16BytesCase2)
445 L(CopyFrom1To16BytesCase3):
446 add $16, %ebx
447 add %ecx, %edi
448 add %ecx, %esi
449 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
450
451 .p2align 4
452 L(CopyFrom1To32BytesCase2OrCase3):
453 test %edx, %edx
454 jnz L(CopyFrom1To32BytesCase2)
455 sub %ecx, %ebx
456 add %ecx, %esi
457 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
458
459 .p2align 4
460 L(CopyFrom1To16BytesTailCase2OrCase3):
461 test %edx, %edx
462 jnz L(CopyFrom1To16BytesTailCase2)
463 sub %ecx, %ebx
464 add %ecx, %esi
465 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
466
467 .p2align 4
468 L(CopyFrom1To32Bytes1Case2OrCase3):
469 add $16, %edi
470 add $16, %esi
471 sub $16, %ebx
472 L(CopyFrom1To16BytesTail1Case2OrCase3):
473 test %edx, %edx
474 jnz L(CopyFrom1To16BytesTail1Case2)
475 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
476
477 .p2align 4
478 L(Exit0):
479 # ifdef USE_AS_STPCPY
480 mov %edi, %eax
481 # endif
482 RETURN
483
484 .p2align 4
485 L(Exit1):
486 movb %dh, (%edi)
487 # ifdef USE_AS_STPCPY
488 lea (%edi), %eax
489 # endif
490 sub $1, %ebx
491 lea 1(%edi), %edi
492 jnz L(StrncpyFillTailWithZero)
493 RETURN
494
495 .p2align 4
496 L(Exit2):
497 movw (%esi), %dx
498 movw %dx, (%edi)
499 # ifdef USE_AS_STPCPY
500 lea 1(%edi), %eax
501 # endif
502 sub $2, %ebx
503 lea 2(%edi), %edi
504 jnz L(StrncpyFillTailWithZero)
505 RETURN
506
507 .p2align 4
508 L(Exit3):
509 movw (%esi), %cx
510 movw %cx, (%edi)
511 movb %dh, 2(%edi)
512 # ifdef USE_AS_STPCPY
513 lea 2(%edi), %eax
514 # endif
515 sub $3, %ebx
516 lea 3(%edi), %edi
517 jnz L(StrncpyFillTailWithZero)
518 RETURN
519
520 .p2align 4
521 L(Exit4):
522 movl (%esi), %edx
523 movl %edx, (%edi)
524 # ifdef USE_AS_STPCPY
525 lea 3(%edi), %eax
526 # endif
527 sub $4, %ebx
528 lea 4(%edi), %edi
529 jnz L(StrncpyFillTailWithZero)
530 RETURN
531
532 .p2align 4
533 L(Exit5):
534 movl (%esi), %ecx
535 movb %dh, 4(%edi)
536 movl %ecx, (%edi)
537 # ifdef USE_AS_STPCPY
538 lea 4(%edi), %eax
539 # endif
540 sub $5, %ebx
541 lea 5(%edi), %edi
542 jnz L(StrncpyFillTailWithZero)
543 RETURN
544
545 .p2align 4
546 L(Exit6):
547 movl (%esi), %ecx
548 movw 4(%esi), %dx
549 movl %ecx, (%edi)
550 movw %dx, 4(%edi)
551 # ifdef USE_AS_STPCPY
552 lea 5(%edi), %eax
553 # endif
554 sub $6, %ebx
555 lea 6(%edi), %edi
556 jnz L(StrncpyFillTailWithZero)
557 RETURN
558
559 .p2align 4
560 L(Exit7):
561 movl (%esi), %ecx
562 movl 3(%esi), %edx
563 movl %ecx, (%edi)
564 movl %edx, 3(%edi)
565 # ifdef USE_AS_STPCPY
566 lea 6(%edi), %eax
567 # endif
568 sub $7, %ebx
569 lea 7(%edi), %edi
570 jnz L(StrncpyFillTailWithZero)
571 RETURN
572
573 .p2align 4
574 L(Exit8):
575 movlpd (%esi), %xmm0
576 movlpd %xmm0, (%edi)
577 # ifdef USE_AS_STPCPY
578 lea 7(%edi), %eax
579 # endif
580 sub $8, %ebx
581 lea 8(%edi), %edi
582 jnz L(StrncpyFillTailWithZero)
583 RETURN
584
585 .p2align 4
586 L(Exit9):
587 movlpd (%esi), %xmm0
588 movb %dh, 8(%edi)
589 movlpd %xmm0, (%edi)
590 # ifdef USE_AS_STPCPY
591 lea 8(%edi), %eax
592 # endif
593 sub $9, %ebx
594 lea 9(%edi), %edi
595 jnz L(StrncpyFillTailWithZero)
596 RETURN
597
598 .p2align 4
599 L(Exit10):
600 movlpd (%esi), %xmm0
601 movw 8(%esi), %dx
602 movlpd %xmm0, (%edi)
603 movw %dx, 8(%edi)
604 # ifdef USE_AS_STPCPY
605 lea 9(%edi), %eax
606 # endif
607 sub $10, %ebx
608 lea 10(%edi), %edi
609 jnz L(StrncpyFillTailWithZero)
610 RETURN
611
612 .p2align 4
613 L(Exit11):
614 movlpd (%esi), %xmm0
615 movl 7(%esi), %edx
616 movlpd %xmm0, (%edi)
617 movl %edx, 7(%edi)
618 # ifdef USE_AS_STPCPY
619 lea 10(%edi), %eax
620 # endif
621 sub $11, %ebx
622 lea 11(%edi), %edi
623 jnz L(StrncpyFillTailWithZero)
624 RETURN
625
626 .p2align 4
627 L(Exit12):
628 movlpd (%esi), %xmm0
629 movl 8(%esi), %edx
630 movlpd %xmm0, (%edi)
631 movl %edx, 8(%edi)
632 # ifdef USE_AS_STPCPY
633 lea 11(%edi), %eax
634 # endif
635 sub $12, %ebx
636 lea 12(%edi), %edi
637 jnz L(StrncpyFillTailWithZero)
638 RETURN
639
640 .p2align 4
641 L(Exit13):
642 movlpd (%esi), %xmm0
643 movlpd 5(%esi), %xmm1
644 movlpd %xmm0, (%edi)
645 movlpd %xmm1, 5(%edi)
646 # ifdef USE_AS_STPCPY
647 lea 12(%edi), %eax
648 # endif
649 sub $13, %ebx
650 lea 13(%edi), %edi
651 jnz L(StrncpyFillTailWithZero)
652 RETURN
653
654 .p2align 4
655 L(Exit14):
656 movlpd (%esi), %xmm0
657 movlpd 6(%esi), %xmm1
658 movlpd %xmm0, (%edi)
659 movlpd %xmm1, 6(%edi)
660 # ifdef USE_AS_STPCPY
661 lea 13(%edi), %eax
662 # endif
663 sub $14, %ebx
664 lea 14(%edi), %edi
665 jnz L(StrncpyFillTailWithZero)
666 RETURN
667
668 .p2align 4
669 L(Exit15):
670 movlpd (%esi), %xmm0
671 movlpd 7(%esi), %xmm1
672 movlpd %xmm0, (%edi)
673 movlpd %xmm1, 7(%edi)
674 # ifdef USE_AS_STPCPY
675 lea 14(%edi), %eax
676 # endif
677 sub $15, %ebx
678 lea 15(%edi), %edi
679 jnz L(StrncpyFillTailWithZero)
680 RETURN
681
682 .p2align 4
683 L(Exit16):
684 movdqu (%esi), %xmm0
685 movdqu %xmm0, (%edi)
686 # ifdef USE_AS_STPCPY
687 lea 15(%edi), %eax
688 # endif
689 sub $16, %ebx
690 lea 16(%edi), %edi
691 jnz L(StrncpyFillTailWithZero)
692 RETURN
693
694 .p2align 4
695 L(Exit17):
696 movdqu (%esi), %xmm0
697 movdqu %xmm0, (%edi)
698 movb %dh, 16(%edi)
699 # ifdef USE_AS_STPCPY
700 lea 16(%edi), %eax
701 # endif
702 sub $17, %ebx
703 lea 17(%edi), %edi
704 jnz L(StrncpyFillTailWithZero)
705 RETURN
706
707 .p2align 4
708 L(Exit18):
709 movdqu (%esi), %xmm0
710 movw 16(%esi), %cx
711 movdqu %xmm0, (%edi)
712 movw %cx, 16(%edi)
713 # ifdef USE_AS_STPCPY
714 lea 17(%edi), %eax
715 # endif
716 sub $18, %ebx
717 lea 18(%edi), %edi
718 jnz L(StrncpyFillTailWithZero)
719 RETURN
720
721 .p2align 4
722 L(Exit19):
723 movdqu (%esi), %xmm0
724 movl 15(%esi), %ecx
725 movdqu %xmm0, (%edi)
726 movl %ecx, 15(%edi)
727 # ifdef USE_AS_STPCPY
728 lea 18(%edi), %eax
729 # endif
730 sub $19, %ebx
731 lea 19(%edi), %edi
732 jnz L(StrncpyFillTailWithZero)
733 RETURN
734
735 .p2align 4
736 L(Exit20):
737 movdqu (%esi), %xmm0
738 movl 16(%esi), %ecx
739 movdqu %xmm0, (%edi)
740 movl %ecx, 16(%edi)
741 # ifdef USE_AS_STPCPY
742 lea 19(%edi), %eax
743 # endif
744 sub $20, %ebx
745 lea 20(%edi), %edi
746 jnz L(StrncpyFillTailWithZero)
747 RETURN
748
749 .p2align 4
750 L(Exit21):
751 movdqu (%esi), %xmm0
752 movl 16(%esi), %ecx
753 movdqu %xmm0, (%edi)
754 movl %ecx, 16(%edi)
755 movb %dh, 20(%edi)
756 # ifdef USE_AS_STPCPY
757 lea 20(%edi), %eax
758 # endif
759 sub $21, %ebx
760 lea 21(%edi), %edi
761 jnz L(StrncpyFillTailWithZero)
762 RETURN
763
764 .p2align 4
765 L(Exit22):
766 movdqu (%esi), %xmm0
767 movlpd 14(%esi), %xmm3
768 movdqu %xmm0, (%edi)
769 movlpd %xmm3, 14(%edi)
770 # ifdef USE_AS_STPCPY
771 lea 21(%edi), %eax
772 # endif
773 sub $22, %ebx
774 lea 22(%edi), %edi
775 jnz L(StrncpyFillTailWithZero)
776 RETURN
777
778 .p2align 4
779 L(Exit23):
780 movdqu (%esi), %xmm0
781 movlpd 15(%esi), %xmm3
782 movdqu %xmm0, (%edi)
783 movlpd %xmm3, 15(%edi)
784 # ifdef USE_AS_STPCPY
785 lea 22(%edi), %eax
786 # endif
787 sub $23, %ebx
788 lea 23(%edi), %edi
789 jnz L(StrncpyFillTailWithZero)
790 RETURN
791
792 .p2align 4
793 L(Exit24):
794 movdqu (%esi), %xmm0
795 movlpd 16(%esi), %xmm2
796 movdqu %xmm0, (%edi)
797 movlpd %xmm2, 16(%edi)
798 # ifdef USE_AS_STPCPY
799 lea 23(%edi), %eax
800 # endif
801 sub $24, %ebx
802 lea 24(%edi), %edi
803 jnz L(StrncpyFillTailWithZero)
804 RETURN
805
806 .p2align 4
807 L(Exit25):
808 movdqu (%esi), %xmm0
809 movlpd 16(%esi), %xmm2
810 movdqu %xmm0, (%edi)
811 movlpd %xmm2, 16(%edi)
812 movb %dh, 24(%edi)
813 # ifdef USE_AS_STPCPY
814 lea 24(%edi), %eax
815 # endif
816 sub $25, %ebx
817 lea 25(%edi), %edi
818 jnz L(StrncpyFillTailWithZero)
819 RETURN
820
821 .p2align 4
822 L(Exit26):
823 movdqu (%esi), %xmm0
824 movlpd 16(%esi), %xmm2
825 movw 24(%esi), %cx
826 movdqu %xmm0, (%edi)
827 movlpd %xmm2, 16(%edi)
828 movw %cx, 24(%edi)
829 # ifdef USE_AS_STPCPY
830 lea 25(%edi), %eax
831 # endif
832 sub $26, %ebx
833 lea 26(%edi), %edi
834 jnz L(StrncpyFillTailWithZero)
835 RETURN
836
837 .p2align 4
838 L(Exit27):
839 movdqu (%esi), %xmm0
840 movlpd 16(%esi), %xmm2
841 movl 23(%esi), %ecx
842 movdqu %xmm0, (%edi)
843 movlpd %xmm2, 16(%edi)
844 movl %ecx, 23(%edi)
845 # ifdef USE_AS_STPCPY
846 lea 26(%edi), %eax
847 # endif
848 sub $27, %ebx
849 lea 27(%edi), %edi
850 jnz L(StrncpyFillTailWithZero)
851 RETURN
852
853 .p2align 4
854 L(Exit28):
855 movdqu (%esi), %xmm0
856 movlpd 16(%esi), %xmm2
857 movl 24(%esi), %ecx
858 movdqu %xmm0, (%edi)
859 movlpd %xmm2, 16(%edi)
860 movl %ecx, 24(%edi)
861 # ifdef USE_AS_STPCPY
862 lea 27(%edi), %eax
863 # endif
864 sub $28, %ebx
865 lea 28(%edi), %edi
866 jnz L(StrncpyFillTailWithZero)
867 RETURN
868
869 .p2align 4
870 L(Exit29):
871 movdqu (%esi), %xmm0
872 movdqu 13(%esi), %xmm2
873 movdqu %xmm0, (%edi)
874 movdqu %xmm2, 13(%edi)
875 # ifdef USE_AS_STPCPY
876 lea 28(%edi), %eax
877 # endif
878 sub $29, %ebx
879 lea 29(%edi), %edi
880 jnz L(StrncpyFillTailWithZero)
881 RETURN
882
883 .p2align 4
884 L(Exit30):
885 movdqu (%esi), %xmm0
886 movdqu 14(%esi), %xmm2
887 movdqu %xmm0, (%edi)
888 movdqu %xmm2, 14(%edi)
889 # ifdef USE_AS_STPCPY
890 lea 29(%edi), %eax
891 # endif
892 sub $30, %ebx
893 lea 30(%edi), %edi
894 jnz L(StrncpyFillTailWithZero)
895 RETURN
896
897
898 .p2align 4
899 L(Exit31):
900 movdqu (%esi), %xmm0
901 movdqu 15(%esi), %xmm2
902 movdqu %xmm0, (%edi)
903 movdqu %xmm2, 15(%edi)
904 # ifdef USE_AS_STPCPY
905 lea 30(%edi), %eax
906 # endif
907 sub $31, %ebx
908 lea 31(%edi), %edi
909 jnz L(StrncpyFillTailWithZero)
910 RETURN
911
912 .p2align 4
913 L(Exit32):
914 movdqu (%esi), %xmm0
915 movdqu 16(%esi), %xmm2
916 movdqu %xmm0, (%edi)
917 movdqu %xmm2, 16(%edi)
918 # ifdef USE_AS_STPCPY
919 lea 31(%edi), %eax
920 # endif
921 sub $32, %ebx
922 lea 32(%edi), %edi
923 jnz L(StrncpyFillTailWithZero)
924 RETURN
925
926 .p2align 4
927 L(StrncpyExit1):
928 movb (%esi), %dl
929 movb %dl, (%edi)
930 # ifdef USE_AS_STPCPY
931 lea 1(%edi), %eax
932 # endif
933 RETURN
934
935 .p2align 4
936 L(StrncpyExit2):
937 movw (%esi), %dx
938 movw %dx, (%edi)
939 # ifdef USE_AS_STPCPY
940 lea 2(%edi), %eax
941 # endif
942 RETURN
943
944 .p2align 4
945 L(StrncpyExit3):
946 movw (%esi), %cx
947 movb 2(%esi), %dl
948 movw %cx, (%edi)
949 movb %dl, 2(%edi)
950 # ifdef USE_AS_STPCPY
951 lea 3(%edi), %eax
952 # endif
953 RETURN
954
955 .p2align 4
956 L(StrncpyExit4):
957 movl (%esi), %edx
958 movl %edx, (%edi)
959 # ifdef USE_AS_STPCPY
960 lea 4(%edi), %eax
961 # endif
962 RETURN
963
964 .p2align 4
965 L(StrncpyExit5):
966 movl (%esi), %ecx
967 movb 4(%esi), %dl
968 movl %ecx, (%edi)
969 movb %dl, 4(%edi)
970 # ifdef USE_AS_STPCPY
971 lea 5(%edi), %eax
972 # endif
973 RETURN
974
975 .p2align 4
976 L(StrncpyExit6):
977 movl (%esi), %ecx
978 movw 4(%esi), %dx
979 movl %ecx, (%edi)
980 movw %dx, 4(%edi)
981 # ifdef USE_AS_STPCPY
982 lea 6(%edi), %eax
983 # endif
984 RETURN
985
986 .p2align 4
987 L(StrncpyExit7):
988 movl (%esi), %ecx
989 movl 3(%esi), %edx
990 movl %ecx, (%edi)
991 movl %edx, 3(%edi)
992 # ifdef USE_AS_STPCPY
993 lea 7(%edi), %eax
994 # endif
995 RETURN
996
997 .p2align 4
998 L(StrncpyExit8):
999 movlpd (%esi), %xmm0
1000 movlpd %xmm0, (%edi)
1001 # ifdef USE_AS_STPCPY
1002 lea 8(%edi), %eax
1003 # endif
1004 RETURN
1005
1006 .p2align 4
1007 L(StrncpyExit9):
1008 movlpd (%esi), %xmm0
1009 movb 8(%esi), %dl
1010 movlpd %xmm0, (%edi)
1011 movb %dl, 8(%edi)
1012 # ifdef USE_AS_STPCPY
1013 lea 9(%edi), %eax
1014 # endif
1015 RETURN
1016
1017 .p2align 4
1018 L(StrncpyExit10):
1019 movlpd (%esi), %xmm0
1020 movw 8(%esi), %dx
1021 movlpd %xmm0, (%edi)
1022 movw %dx, 8(%edi)
1023 # ifdef USE_AS_STPCPY
1024 lea 10(%edi), %eax
1025 # endif
1026 RETURN
1027
1028 .p2align 4
1029 L(StrncpyExit11):
1030 movlpd (%esi), %xmm0
1031 movl 7(%esi), %edx
1032 movlpd %xmm0, (%edi)
1033 movl %edx, 7(%edi)
1034 # ifdef USE_AS_STPCPY
1035 lea 11(%edi), %eax
1036 # endif
1037 RETURN
1038
1039 .p2align 4
1040 L(StrncpyExit12):
1041 movlpd (%esi), %xmm0
1042 movl 8(%esi), %edx
1043 movlpd %xmm0, (%edi)
1044 movl %edx, 8(%edi)
1045 # ifdef USE_AS_STPCPY
1046 lea 12(%edi), %eax
1047 # endif
1048 RETURN
1049
1050 .p2align 4
1051 L(StrncpyExit13):
1052 movlpd (%esi), %xmm0
1053 movlpd 5(%esi), %xmm1
1054 movlpd %xmm0, (%edi)
1055 movlpd %xmm1, 5(%edi)
1056 # ifdef USE_AS_STPCPY
1057 lea 13(%edi), %eax
1058 # endif
1059 RETURN
1060
1061 .p2align 4
1062 L(StrncpyExit14):
1063 movlpd (%esi), %xmm0
1064 movlpd 6(%esi), %xmm1
1065 movlpd %xmm0, (%edi)
1066 movlpd %xmm1, 6(%edi)
1067 # ifdef USE_AS_STPCPY
1068 lea 14(%edi), %eax
1069 # endif
1070 RETURN
1071
1072 .p2align 4
1073 L(StrncpyExit15):
1074 movlpd (%esi), %xmm0
1075 movlpd 7(%esi), %xmm1
1076 movlpd %xmm0, (%edi)
1077 movlpd %xmm1, 7(%edi)
1078 # ifdef USE_AS_STPCPY
1079 lea 15(%edi), %eax
1080 # endif
1081 RETURN
1082
1083 .p2align 4
1084 L(StrncpyExit16):
1085 movdqu (%esi), %xmm0
1086 movdqu %xmm0, (%edi)
1087 # ifdef USE_AS_STPCPY
1088 lea 16(%edi), %eax
1089 # endif
1090 RETURN
1091
1092 .p2align 4
1093 L(StrncpyExit17):
1094 movdqu (%esi), %xmm0
1095 movb 16(%esi), %cl
1096 movdqu %xmm0, (%edi)
1097 movb %cl, 16(%edi)
1098 # ifdef USE_AS_STPCPY
1099 lea 17(%edi), %eax
1100 # endif
1101 RETURN
1102
1103 .p2align 4
1104 L(StrncpyExit18):
1105 movdqu (%esi), %xmm0
1106 movw 16(%esi), %cx
1107 movdqu %xmm0, (%edi)
1108 movw %cx, 16(%edi)
1109 # ifdef USE_AS_STPCPY
1110 lea 18(%edi), %eax
1111 # endif
1112 RETURN
1113
1114 .p2align 4
1115 L(StrncpyExit19):
1116 movdqu (%esi), %xmm0
1117 movl 15(%esi), %ecx
1118 movdqu %xmm0, (%edi)
1119 movl %ecx, 15(%edi)
1120 # ifdef USE_AS_STPCPY
1121 lea 19(%edi), %eax
1122 # endif
1123 RETURN
1124
1125 .p2align 4
1126 L(StrncpyExit20):
1127 movdqu (%esi), %xmm0
1128 movl 16(%esi), %ecx
1129 movdqu %xmm0, (%edi)
1130 movl %ecx, 16(%edi)
1131 # ifdef USE_AS_STPCPY
1132 lea 20(%edi), %eax
1133 # endif
1134 RETURN
1135
1136 .p2align 4
1137 L(StrncpyExit21):
1138 movdqu (%esi), %xmm0
1139 movl 16(%esi), %ecx
1140 movb 20(%esi), %dl
1141 movdqu %xmm0, (%edi)
1142 movl %ecx, 16(%edi)
1143 movb %dl, 20(%edi)
1144 # ifdef USE_AS_STPCPY
1145 lea 21(%edi), %eax
1146 # endif
1147 RETURN
1148
1149 .p2align 4
1150 L(StrncpyExit22):
1151 movdqu (%esi), %xmm0
1152 movlpd 14(%esi), %xmm3
1153 movdqu %xmm0, (%edi)
1154 movlpd %xmm3, 14(%edi)
1155 # ifdef USE_AS_STPCPY
1156 lea 22(%edi), %eax
1157 # endif
1158 RETURN
1159
1160 .p2align 4
1161 L(StrncpyExit23):
1162 movdqu (%esi), %xmm0
1163 movlpd 15(%esi), %xmm3
1164 movdqu %xmm0, (%edi)
1165 movlpd %xmm3, 15(%edi)
1166 # ifdef USE_AS_STPCPY
1167 lea 23(%edi), %eax
1168 # endif
1169 RETURN
1170
1171 .p2align 4
1172 L(StrncpyExit24):
1173 movdqu (%esi), %xmm0
1174 movlpd 16(%esi), %xmm2
1175 movdqu %xmm0, (%edi)
1176 movlpd %xmm2, 16(%edi)
1177 # ifdef USE_AS_STPCPY
1178 lea 24(%edi), %eax
1179 # endif
1180 RETURN
1181
1182 .p2align 4
1183 L(StrncpyExit25):
1184 movdqu (%esi), %xmm0
1185 movlpd 16(%esi), %xmm2
1186 movb 24(%esi), %cl
1187 movdqu %xmm0, (%edi)
1188 movlpd %xmm2, 16(%edi)
1189 movb %cl, 24(%edi)
1190 # ifdef USE_AS_STPCPY
1191 lea 25(%edi), %eax
1192 # endif
1193 RETURN
1194
1195 .p2align 4
1196 L(StrncpyExit26):
1197 movdqu (%esi), %xmm0
1198 movlpd 16(%esi), %xmm2
1199 movw 24(%esi), %cx
1200 movdqu %xmm0, (%edi)
1201 movlpd %xmm2, 16(%edi)
1202 movw %cx, 24(%edi)
1203 # ifdef USE_AS_STPCPY
1204 lea 26(%edi), %eax
1205 # endif
1206 RETURN
1207
1208 .p2align 4
1209 L(StrncpyExit27):
1210 movdqu (%esi), %xmm0
1211 movlpd 16(%esi), %xmm2
1212 movl 23(%esi), %ecx
1213 movdqu %xmm0, (%edi)
1214 movlpd %xmm2, 16(%edi)
1215 movl %ecx, 23(%edi)
1216 # ifdef USE_AS_STPCPY
1217 lea 27(%edi), %eax
1218 # endif
1219 RETURN
1220
1221 .p2align 4
1222 L(StrncpyExit28):
1223 movdqu (%esi), %xmm0
1224 movlpd 16(%esi), %xmm2
1225 movl 24(%esi), %ecx
1226 movdqu %xmm0, (%edi)
1227 movlpd %xmm2, 16(%edi)
1228 movl %ecx, 24(%edi)
1229 # ifdef USE_AS_STPCPY
1230 lea 28(%edi), %eax
1231 # endif
1232 RETURN
1233
1234 .p2align 4
1235 L(StrncpyExit29):
1236 movdqu (%esi), %xmm0
1237 movdqu 13(%esi), %xmm2
1238 movdqu %xmm0, (%edi)
1239 movdqu %xmm2, 13(%edi)
1240 # ifdef USE_AS_STPCPY
1241 lea 29(%edi), %eax
1242 # endif
1243 RETURN
1244
1245 .p2align 4
1246 L(StrncpyExit30):
1247 movdqu (%esi), %xmm0
1248 movdqu 14(%esi), %xmm2
1249 movdqu %xmm0, (%edi)
1250 movdqu %xmm2, 14(%edi)
1251 # ifdef USE_AS_STPCPY
1252 lea 30(%edi), %eax
1253 # endif
1254 RETURN
1255
1256 .p2align 4
1257 L(StrncpyExit31):
1258 movdqu (%esi), %xmm0
1259 movdqu 15(%esi), %xmm2
1260 movdqu %xmm0, (%edi)
1261 movdqu %xmm2, 15(%edi)
1262 # ifdef USE_AS_STPCPY
1263 lea 31(%edi), %eax
1264 # endif
1265 RETURN
1266
1267 .p2align 4
1268 L(StrncpyExit32):
1269 movdqu (%esi), %xmm0
1270 movdqu 16(%esi), %xmm2
1271 movdqu %xmm0, (%edi)
1272 movdqu %xmm2, 16(%edi)
1273 # ifdef USE_AS_STPCPY
1274 lea 32(%edi), %eax
1275 # endif
1276 RETURN
1277
1278 .p2align 4
1279 L(StrncpyExit33):
1280 movdqu (%esi), %xmm0
1281 movdqu 16(%esi), %xmm2
1282 movb 32(%esi), %cl
1283 movdqu %xmm0, (%edi)
1284 movdqu %xmm2, 16(%edi)
1285 movb %cl, 32(%edi)
1286 RETURN
1287
1288 .p2align 4
1289 L(Fill0):
1290 RETURN
1291
1292 .p2align 4
1293 L(Fill1):
1294 movb %dl, (%edi)
1295 RETURN
1296
1297 .p2align 4
1298 L(Fill2):
1299 movw %dx, (%edi)
1300 RETURN
1301
1302 .p2align 4
1303 L(Fill3):
1304 movl %edx, -1(%edi)
1305 RETURN
1306
1307 .p2align 4
1308 L(Fill4):
1309 movl %edx, (%edi)
1310 RETURN
1311
1312 .p2align 4
1313 L(Fill5):
1314 movl %edx, (%edi)
1315 movb %dl, 4(%edi)
1316 RETURN
1317
1318 .p2align 4
1319 L(Fill6):
1320 movl %edx, (%edi)
1321 movw %dx, 4(%edi)
1322 RETURN
1323
1324 .p2align 4
1325 L(Fill7):
1326 movlpd %xmm0, -1(%edi)
1327 RETURN
1328
1329 .p2align 4
1330 L(Fill8):
1331 movlpd %xmm0, (%edi)
1332 RETURN
1333
1334 .p2align 4
1335 L(Fill9):
1336 movlpd %xmm0, (%edi)
1337 movb %dl, 8(%edi)
1338 RETURN
1339
1340 .p2align 4
1341 L(Fill10):
1342 movlpd %xmm0, (%edi)
1343 movw %dx, 8(%edi)
1344 RETURN
1345
1346 .p2align 4
1347 L(Fill11):
1348 movlpd %xmm0, (%edi)
1349 movl %edx, 7(%edi)
1350 RETURN
1351
1352 .p2align 4
1353 L(Fill12):
1354 movlpd %xmm0, (%edi)
1355 movl %edx, 8(%edi)
1356 RETURN
1357
1358 .p2align 4
1359 L(Fill13):
1360 movlpd %xmm0, (%edi)
1361 movlpd %xmm0, 5(%edi)
1362 RETURN
1363
1364 .p2align 4
1365 L(Fill14):
1366 movlpd %xmm0, (%edi)
1367 movlpd %xmm0, 6(%edi)
1368 RETURN
1369
1370 .p2align 4
1371 L(Fill15):
1372 movdqu %xmm0, -1(%edi)
1373 RETURN
1374
1375 .p2align 4
1376 L(Fill16):
1377 movdqu %xmm0, (%edi)
1378 RETURN
1379
1380 .p2align 4
1381 L(CopyFrom1To16BytesUnalignedXmm2):
1382 movdqu %xmm2, (%edi, %ecx)
1383
1384 .p2align 4
1385 L(CopyFrom1To16BytesXmmExit):
1386 bsf %edx, %edx
1387 add $15, %ebx
1388 add %ecx, %edi
1389 # ifdef USE_AS_STPCPY
1390 lea (%edi, %edx), %eax
1391 # endif
1392 sub %edx, %ebx
1393 lea 1(%edi, %edx), %edi
1394
1395 .p2align 4
1396 L(StrncpyFillTailWithZero):
1397 pxor %xmm0, %xmm0
1398 xor %edx, %edx
1399 sub $16, %ebx
1400 jbe L(StrncpyFillExit)
1401
1402 movdqu %xmm0, (%edi)
1403 add $16, %edi
1404
1405 mov %edi, %esi
1406 and $0xf, %esi
1407 sub %esi, %edi
1408 add %esi, %ebx
1409 sub $64, %ebx
1410 jb L(StrncpyFillLess64)
1411
1412 L(StrncpyFillLoopMovdqa):
1413 movdqa %xmm0, (%edi)
1414 movdqa %xmm0, 16(%edi)
1415 movdqa %xmm0, 32(%edi)
1416 movdqa %xmm0, 48(%edi)
1417 add $64, %edi
1418 sub $64, %ebx
1419 jae L(StrncpyFillLoopMovdqa)
1420
1421 L(StrncpyFillLess64):
1422 add $32, %ebx
1423 jl L(StrncpyFillLess32)
1424 movdqa %xmm0, (%edi)
1425 movdqa %xmm0, 16(%edi)
1426 add $32, %edi
1427 sub $16, %ebx
1428 jl L(StrncpyFillExit)
1429 movdqa %xmm0, (%edi)
1430 add $16, %edi
1431 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1432
1433 L(StrncpyFillLess32):
1434 add $16, %ebx
1435 jl L(StrncpyFillExit)
1436 movdqa %xmm0, (%edi)
1437 add $16, %edi
1438 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1439
1440 L(StrncpyFillExit):
1441 add $16, %ebx
1442 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1443
1444 .p2align 4
1445 L(UnalignedLeaveCase2OrCase3):
1446 test %edx, %edx
1447 jnz L(Unaligned64LeaveCase2)
1448 L(Unaligned64LeaveCase3):
1449 lea 64(%ebx), %ecx
1450 and $-16, %ecx
1451 add $48, %ebx
1452 jl L(CopyFrom1To16BytesCase3)
1453 movdqu %xmm4, (%edi)
1454 sub $16, %ebx
1455 jb L(CopyFrom1To16BytesCase3)
1456 movdqu %xmm5, 16(%edi)
1457 sub $16, %ebx
1458 jb L(CopyFrom1To16BytesCase3)
1459 movdqu %xmm6, 32(%edi)
1460 sub $16, %ebx
1461 jb L(CopyFrom1To16BytesCase3)
1462 movdqu %xmm7, 48(%edi)
1463 # ifdef USE_AS_STPCPY
1464 lea 64(%edi), %eax
1465 # endif
1466 RETURN
1467
1468 .p2align 4
1469 L(Unaligned64LeaveCase2):
1470 xor %ecx, %ecx
1471 pcmpeqb %xmm4, %xmm0
1472 pmovmskb %xmm0, %edx
1473 add $48, %ebx
1474 jle L(CopyFrom1To16BytesCase2OrCase3)
1475 test %edx, %edx
1476 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1477
1478 pcmpeqb %xmm5, %xmm0
1479 pmovmskb %xmm0, %edx
1480 movdqu %xmm4, (%edi)
1481 add $16, %ecx
1482 sub $16, %ebx
1483 jbe L(CopyFrom1To16BytesCase2OrCase3)
1484 test %edx, %edx
1485 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1486
1487 pcmpeqb %xmm6, %xmm0
1488 pmovmskb %xmm0, %edx
1489 movdqu %xmm5, 16(%edi)
1490 add $16, %ecx
1491 sub $16, %ebx
1492 jbe L(CopyFrom1To16BytesCase2OrCase3)
1493 test %edx, %edx
1494 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1495
1496 pcmpeqb %xmm7, %xmm0
1497 pmovmskb %xmm0, %edx
1498 movdqu %xmm6, 32(%edi)
1499 lea 16(%edi, %ecx), %edi
1500 lea 16(%esi, %ecx), %esi
1501 bsf %edx, %edx
1502 cmp %ebx, %edx
1503 jb L(CopyFrom1To16BytesExit)
1504 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
1505
1506 .p2align 4
1507 L(ExitZero):
1508 movl %edi, %eax
1509 RETURN
1510
1511 END (STRCPY)
1512
1513 .p2align 4
1514 .section .rodata
1515 L(ExitTable):
1516 .int JMPTBL(L(Exit1), L(ExitTable))
1517 .int JMPTBL(L(Exit2), L(ExitTable))
1518 .int JMPTBL(L(Exit3), L(ExitTable))
1519 .int JMPTBL(L(Exit4), L(ExitTable))
1520 .int JMPTBL(L(Exit5), L(ExitTable))
1521 .int JMPTBL(L(Exit6), L(ExitTable))
1522 .int JMPTBL(L(Exit7), L(ExitTable))
1523 .int JMPTBL(L(Exit8), L(ExitTable))
1524 .int JMPTBL(L(Exit9), L(ExitTable))
1525 .int JMPTBL(L(Exit10), L(ExitTable))
1526 .int JMPTBL(L(Exit11), L(ExitTable))
1527 .int JMPTBL(L(Exit12), L(ExitTable))
1528 .int JMPTBL(L(Exit13), L(ExitTable))
1529 .int JMPTBL(L(Exit14), L(ExitTable))
1530 .int JMPTBL(L(Exit15), L(ExitTable))
1531 .int JMPTBL(L(Exit16), L(ExitTable))
1532 .int JMPTBL(L(Exit17), L(ExitTable))
1533 .int JMPTBL(L(Exit18), L(ExitTable))
1534 .int JMPTBL(L(Exit19), L(ExitTable))
1535 .int JMPTBL(L(Exit20), L(ExitTable))
1536 .int JMPTBL(L(Exit21), L(ExitTable))
1537 .int JMPTBL(L(Exit22), L(ExitTable))
1538 .int JMPTBL(L(Exit23), L(ExitTable))
1539 .int JMPTBL(L(Exit24), L(ExitTable))
1540 .int JMPTBL(L(Exit25), L(ExitTable))
1541 .int JMPTBL(L(Exit26), L(ExitTable))
1542 .int JMPTBL(L(Exit27), L(ExitTable))
1543 .int JMPTBL(L(Exit28), L(ExitTable))
1544 .int JMPTBL(L(Exit29), L(ExitTable))
1545 .int JMPTBL(L(Exit30), L(ExitTable))
1546 .int JMPTBL(L(Exit31), L(ExitTable))
1547 .int JMPTBL(L(Exit32), L(ExitTable))
1548
1549 L(ExitStrncpyTable):
1550 .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
1551 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1552 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1553 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1554 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1555 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1556 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1557 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1558 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1559 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1560 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1561 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1562 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1563 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1564 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1565 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1566 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1567 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1568 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1569 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1570 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1571 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1572 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1573 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1574 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1575 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1576 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1577 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1578 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1579 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1580 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1581 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1582 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1583 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1584
1585 .p2align 4
1586 L(FillTable):
1587 .int JMPTBL(L(Fill0), L(FillTable))
1588 .int JMPTBL(L(Fill1), L(FillTable))
1589 .int JMPTBL(L(Fill2), L(FillTable))
1590 .int JMPTBL(L(Fill3), L(FillTable))
1591 .int JMPTBL(L(Fill4), L(FillTable))
1592 .int JMPTBL(L(Fill5), L(FillTable))
1593 .int JMPTBL(L(Fill6), L(FillTable))
1594 .int JMPTBL(L(Fill7), L(FillTable))
1595 .int JMPTBL(L(Fill8), L(FillTable))
1596 .int JMPTBL(L(Fill9), L(FillTable))
1597 .int JMPTBL(L(Fill10), L(FillTable))
1598 .int JMPTBL(L(Fill11), L(FillTable))
1599 .int JMPTBL(L(Fill12), L(FillTable))
1600 .int JMPTBL(L(Fill13), L(FillTable))
1601 .int JMPTBL(L(Fill14), L(FillTable))
1602 .int JMPTBL(L(Fill15), L(FillTable))
1603 .int JMPTBL(L(Fill16), L(FillTable))
1604 # else
1605 # define PARMS 4
1606 # define ENTRANCE
1607 # define RETURN POP (%edi); ret; CFI_PUSH (%edi)
1608 # define RETURN1 ret
1609
1610 .text
1611 ENTRY (STRCPY)
1612 ENTRANCE
1613 mov STR1(%esp), %edx
1614 mov STR2(%esp), %ecx
1615
1616 cmpb $0, (%ecx)
1617 jz L(ExitTail1)
1618 cmpb $0, 1(%ecx)
1619 jz L(ExitTail2)
1620 cmpb $0, 2(%ecx)
1621 jz L(ExitTail3)
1622 cmpb $0, 3(%ecx)
1623 jz L(ExitTail4)
1624 cmpb $0, 4(%ecx)
1625 jz L(ExitTail5)
1626 cmpb $0, 5(%ecx)
1627 jz L(ExitTail6)
1628 cmpb $0, 6(%ecx)
1629 jz L(ExitTail7)
1630 cmpb $0, 7(%ecx)
1631 jz L(ExitTail8)
1632 cmpb $0, 8(%ecx)
1633 jz L(ExitTail9)
1634 cmpb $0, 9(%ecx)
1635 jz L(ExitTail10)
1636 cmpb $0, 10(%ecx)
1637 jz L(ExitTail11)
1638 cmpb $0, 11(%ecx)
1639 jz L(ExitTail12)
1640 cmpb $0, 12(%ecx)
1641 jz L(ExitTail13)
1642 cmpb $0, 13(%ecx)
1643 jz L(ExitTail14)
1644 cmpb $0, 14(%ecx)
1645 jz L(ExitTail15)
1646 cmpb $0, 15(%ecx)
1647 jz L(ExitTail16)
1648
1649 PUSH (%edi)
1650 PUSH (%ebx)
1651
1652 mov %edx, %edi
1653 lea 16(%ecx), %ebx
1654 and $-16, %ebx
1655 pxor %xmm0, %xmm0
1656 movdqu (%ecx), %xmm1
1657 movdqu %xmm1, (%edx)
1658 pcmpeqb (%ebx), %xmm0
1659 pmovmskb %xmm0, %eax
1660 sub %ecx, %ebx
1661 test %eax, %eax
1662 jnz L(CopyFrom1To16Bytes)
1663
1664 mov %ecx, %eax
1665 lea 16(%ecx), %ecx
1666 and $-16, %ecx
1667 sub %ecx, %eax
1668 sub %eax, %edx
1669 xor %ebx, %ebx
1670
1671 .p2align 4
1672 movdqa (%ecx), %xmm1
1673 movaps 16(%ecx), %xmm2
1674 movdqu %xmm1, (%edx)
1675 pcmpeqb %xmm2, %xmm0
1676 pmovmskb %xmm0, %eax
1677 add $16, %ebx
1678 test %eax, %eax
1679 jnz L(CopyFrom1To16Bytes)
1680
1681 movaps 16(%ecx, %ebx), %xmm3
1682 movdqu %xmm2, (%edx, %ebx)
1683 pcmpeqb %xmm3, %xmm0
1684 pmovmskb %xmm0, %eax
1685 add $16, %ebx
1686 test %eax, %eax
1687 jnz L(CopyFrom1To16Bytes)
1688
1689 movaps 16(%ecx, %ebx), %xmm4
1690 movdqu %xmm3, (%edx, %ebx)
1691 pcmpeqb %xmm4, %xmm0
1692 pmovmskb %xmm0, %eax
1693 add $16, %ebx
1694 test %eax, %eax
1695 jnz L(CopyFrom1To16Bytes)
1696
1697 movaps 16(%ecx, %ebx), %xmm1
1698 movdqu %xmm4, (%edx, %ebx)
1699 pcmpeqb %xmm1, %xmm0
1700 pmovmskb %xmm0, %eax
1701 add $16, %ebx
1702 test %eax, %eax
1703 jnz L(CopyFrom1To16Bytes)
1704
1705 movaps 16(%ecx, %ebx), %xmm2
1706 movdqu %xmm1, (%edx, %ebx)
1707 pcmpeqb %xmm2, %xmm0
1708 pmovmskb %xmm0, %eax
1709 add $16, %ebx
1710 test %eax, %eax
1711 jnz L(CopyFrom1To16Bytes)
1712
1713 movaps 16(%ecx, %ebx), %xmm3
1714 movdqu %xmm2, (%edx, %ebx)
1715 pcmpeqb %xmm3, %xmm0
1716 pmovmskb %xmm0, %eax
1717 add $16, %ebx
1718 test %eax, %eax
1719 jnz L(CopyFrom1To16Bytes)
1720
1721 movdqu %xmm3, (%edx, %ebx)
1722 mov %ecx, %eax
1723 lea 16(%ecx, %ebx), %ecx
1724 and $-0x40, %ecx
1725 sub %ecx, %eax
1726 sub %eax, %edx
1727
1728 L(Aligned64Loop):
1729 movaps (%ecx), %xmm2
1730 movaps %xmm2, %xmm4
1731 movaps 16(%ecx), %xmm5
1732 movaps 32(%ecx), %xmm3
1733 movaps %xmm3, %xmm6
1734 movaps 48(%ecx), %xmm7
1735 pminub %xmm5, %xmm2
1736 add $64, %ecx
1737 pminub %xmm7, %xmm3
1738 add $64, %edx
1739 pminub %xmm2, %xmm3
1740 pcmpeqb %xmm0, %xmm3
1741 pmovmskb %xmm3, %eax
1742 test %eax, %eax
1743 jnz L(Aligned64Leave)
1744 L(Aligned64Loop_start):
1745 movdqu %xmm4, -64(%edx)
1746 movaps (%ecx), %xmm2
1747 movdqa %xmm2, %xmm4
1748 movdqu %xmm5, -48(%edx)
1749 movaps 16(%ecx), %xmm5
1750 pminub %xmm5, %xmm2
1751 movaps 32(%ecx), %xmm3
1752 movdqu %xmm6, -32(%edx)
1753 movaps %xmm3, %xmm6
1754 movdqu %xmm7, -16(%edx)
1755 movaps 48(%ecx), %xmm7
1756 pminub %xmm7, %xmm3
1757 pminub %xmm2, %xmm3
1758 pcmpeqb %xmm3, %xmm0
1759 pmovmskb %xmm0, %eax
1760 add $64, %edx
1761 add $64, %ecx
1762 test %eax, %eax
1763 jz L(Aligned64Loop_start)
1764 L(Aligned64Leave):
1765 sub $0xa0, %ebx
1766 pxor %xmm0, %xmm0
1767 pcmpeqb %xmm4, %xmm0
1768 pmovmskb %xmm0, %eax
1769 test %eax, %eax
1770 jnz L(CopyFrom1To16Bytes)
1771
1772 pcmpeqb %xmm5, %xmm0
1773 pmovmskb %xmm0, %eax
1774 movdqu %xmm4, -64(%edx)
1775 test %eax, %eax
1776 lea 16(%ebx), %ebx
1777 jnz L(CopyFrom1To16Bytes)
1778
1779 pcmpeqb %xmm6, %xmm0
1780 pmovmskb %xmm0, %eax
1781 movdqu %xmm5, -48(%edx)
1782 test %eax, %eax
1783 lea 16(%ebx), %ebx
1784 jnz L(CopyFrom1To16Bytes)
1785
1786 movdqu %xmm6, -32(%edx)
1787 pcmpeqb %xmm7, %xmm0
1788 pmovmskb %xmm0, %eax
1789 lea 16(%ebx), %ebx
1790
1791 /*-----------------End of main part---------------------------*/
1792
1793 .p2align 4
1794 L(CopyFrom1To16Bytes):
1795 add %ebx, %edx
1796 add %ebx, %ecx
1797
1798 POP (%ebx)
1799 test %al, %al
1800 jz L(ExitHigh)
1801 test $0x01, %al
1802 jnz L(Exit1)
1803 test $0x02, %al
1804 jnz L(Exit2)
1805 test $0x04, %al
1806 jnz L(Exit3)
1807 test $0x08, %al
1808 jnz L(Exit4)
1809 test $0x10, %al
1810 jnz L(Exit5)
1811 test $0x20, %al
1812 jnz L(Exit6)
1813 test $0x40, %al
1814 jnz L(Exit7)
1815 /* Exit 8 */
1816 movl (%ecx), %eax
1817 movl %eax, (%edx)
1818 movl 4(%ecx), %eax
1819 movl %eax, 4(%edx)
1820 # ifdef USE_AS_STPCPY
1821 lea 7(%edx), %eax
1822 # else
1823 movl %edi, %eax
1824 # endif
1825 RETURN
1826
1827 .p2align 4
1828 L(ExitHigh):
1829 test $0x01, %ah
1830 jnz L(Exit9)
1831 test $0x02, %ah
1832 jnz L(Exit10)
1833 test $0x04, %ah
1834 jnz L(Exit11)
1835 test $0x08, %ah
1836 jnz L(Exit12)
1837 test $0x10, %ah
1838 jnz L(Exit13)
1839 test $0x20, %ah
1840 jnz L(Exit14)
1841 test $0x40, %ah
1842 jnz L(Exit15)
1843 /* Exit 16 */
1844 movlpd (%ecx), %xmm0
1845 movlpd %xmm0, (%edx)
1846 movlpd 8(%ecx), %xmm0
1847 movlpd %xmm0, 8(%edx)
1848 # ifdef USE_AS_STPCPY
1849 lea 15(%edx), %eax
1850 # else
1851 movl %edi, %eax
1852 # endif
1853 RETURN
1854
1855 .p2align 4
1856 L(Exit1):
1857 movb (%ecx), %al
1858 movb %al, (%edx)
1859 # ifdef USE_AS_STPCPY
1860 lea (%edx), %eax
1861 # else
1862 movl %edi, %eax
1863 # endif
1864 RETURN
1865
1866 .p2align 4
1867 L(Exit2):
1868 movw (%ecx), %ax
1869 movw %ax, (%edx)
1870 # ifdef USE_AS_STPCPY
1871 lea 1(%edx), %eax
1872 # else
1873 movl %edi, %eax
1874 # endif
1875 RETURN
1876
1877 .p2align 4
1878 L(Exit3):
1879 movw (%ecx), %ax
1880 movw %ax, (%edx)
1881 movb 2(%ecx), %al
1882 movb %al, 2(%edx)
1883 # ifdef USE_AS_STPCPY
1884 lea 2(%edx), %eax
1885 # else
1886 movl %edi, %eax
1887 # endif
1888 RETURN
1889
1890 .p2align 4
1891 L(Exit4):
1892 movl (%ecx), %eax
1893 movl %eax, (%edx)
1894 # ifdef USE_AS_STPCPY
1895 lea 3(%edx), %eax
1896 # else
1897 movl %edi, %eax
1898 # endif
1899 RETURN
1900
1901 .p2align 4
1902 L(Exit5):
1903 movl (%ecx), %eax
1904 movl %eax, (%edx)
1905 movb 4(%ecx), %al
1906 movb %al, 4(%edx)
1907 # ifdef USE_AS_STPCPY
1908 lea 4(%edx), %eax
1909 # else
1910 movl %edi, %eax
1911 # endif
1912 RETURN
1913
1914 .p2align 4
1915 L(Exit6):
1916 movl (%ecx), %eax
1917 movl %eax, (%edx)
1918 movw 4(%ecx), %ax
1919 movw %ax, 4(%edx)
1920 # ifdef USE_AS_STPCPY
1921 lea 5(%edx), %eax
1922 # else
1923 movl %edi, %eax
1924 # endif
1925 RETURN
1926
1927 .p2align 4
1928 L(Exit7):
1929 movl (%ecx), %eax
1930 movl %eax, (%edx)
1931 movl 3(%ecx), %eax
1932 movl %eax, 3(%edx)
1933 # ifdef USE_AS_STPCPY
1934 lea 6(%edx), %eax
1935 # else
1936 movl %edi, %eax
1937 # endif
1938 RETURN
1939
1940 .p2align 4
1941 L(Exit9):
1942 movl (%ecx), %eax
1943 movl %eax, (%edx)
1944 movl 4(%ecx), %eax
1945 movl %eax, 4(%edx)
1946 movb 8(%ecx), %al
1947 movb %al, 8(%edx)
1948 # ifdef USE_AS_STPCPY
1949 lea 8(%edx), %eax
1950 # else
1951 movl %edi, %eax
1952 # endif
1953 RETURN
1954
1955 .p2align 4
1956 L(Exit10):
1957 movl (%ecx), %eax
1958 movl %eax, (%edx)
1959 movl 4(%ecx), %eax
1960 movl %eax, 4(%edx)
1961 movw 8(%ecx), %ax
1962 movw %ax, 8(%edx)
1963 # ifdef USE_AS_STPCPY
1964 lea 9(%edx), %eax
1965 # else
1966 movl %edi, %eax
1967 # endif
1968 RETURN
1969
1970 .p2align 4
1971 L(Exit11):
1972 movl (%ecx), %eax
1973 movl %eax, (%edx)
1974 movl 4(%ecx), %eax
1975 movl %eax, 4(%edx)
1976 movl 7(%ecx), %eax
1977 movl %eax, 7(%edx)
1978 # ifdef USE_AS_STPCPY
1979 lea 10(%edx), %eax
1980 # else
1981 movl %edi, %eax
1982 # endif
1983 RETURN
1984
1985 .p2align 4
1986 L(Exit12):
1987 movl (%ecx), %eax
1988 movl %eax, (%edx)
1989 movl 4(%ecx), %eax
1990 movl %eax, 4(%edx)
1991 movl 8(%ecx), %eax
1992 movl %eax, 8(%edx)
1993 # ifdef USE_AS_STPCPY
1994 lea 11(%edx), %eax
1995 # else
1996 movl %edi, %eax
1997 # endif
1998 RETURN
1999
2000 .p2align 4
2001 L(Exit13):
2002 movlpd (%ecx), %xmm0
2003 movlpd %xmm0, (%edx)
2004 movlpd 5(%ecx), %xmm0
2005 movlpd %xmm0, 5(%edx)
2006 # ifdef USE_AS_STPCPY
2007 lea 12(%edx), %eax
2008 # else
2009 movl %edi, %eax
2010 # endif
2011 RETURN
2012
2013 .p2align 4
2014 L(Exit14):
2015 movlpd (%ecx), %xmm0
2016 movlpd %xmm0, (%edx)
2017 movlpd 6(%ecx), %xmm0
2018 movlpd %xmm0, 6(%edx)
2019 # ifdef USE_AS_STPCPY
2020 lea 13(%edx), %eax
2021 # else
2022 movl %edi, %eax
2023 # endif
2024 RETURN
2025
2026 .p2align 4
2027 L(Exit15):
2028 movlpd (%ecx), %xmm0
2029 movlpd %xmm0, (%edx)
2030 movlpd 7(%ecx), %xmm0
2031 movlpd %xmm0, 7(%edx)
2032 # ifdef USE_AS_STPCPY
2033 lea 14(%edx), %eax
2034 # else
2035 movl %edi, %eax
2036 # endif
2037 RETURN
2038
2039 CFI_POP (%edi)
2040
2041 .p2align 4
2042 L(ExitTail1):
2043 movb (%ecx), %al
2044 movb %al, (%edx)
2045 movl %edx, %eax
2046 RETURN1
2047
2048 .p2align 4
2049 L(ExitTail2):
2050 movw (%ecx), %ax
2051 movw %ax, (%edx)
2052 # ifdef USE_AS_STPCPY
2053 lea 1(%edx), %eax
2054 # else
2055 movl %edx, %eax
2056 # endif
2057 RETURN1
2058
2059 .p2align 4
2060 L(ExitTail3):
2061 movw (%ecx), %ax
2062 movw %ax, (%edx)
2063 movb 2(%ecx), %al
2064 movb %al, 2(%edx)
2065 # ifdef USE_AS_STPCPY
2066 lea 2(%edx), %eax
2067 # else
2068 movl %edx, %eax
2069 # endif
2070 RETURN1
2071
2072 .p2align 4
2073 L(ExitTail4):
2074 movl (%ecx), %eax
2075 movl %eax, (%edx)
2076 # ifdef USE_AS_STPCPY
2077 lea 3(%edx), %eax
2078 # else
2079 movl %edx, %eax
2080 # endif
2081 RETURN1
2082
2083 .p2align 4
2084 L(ExitTail5):
2085 movl (%ecx), %eax
2086 movl %eax, (%edx)
2087 movb 4(%ecx), %al
2088 movb %al, 4(%edx)
2089 # ifdef USE_AS_STPCPY
2090 lea 4(%edx), %eax
2091 # else
2092 movl %edx, %eax
2093 # endif
2094 RETURN1
2095
2096 .p2align 4
2097 L(ExitTail6):
2098 movl (%ecx), %eax
2099 movl %eax, (%edx)
2100 movw 4(%ecx), %ax
2101 movw %ax, 4(%edx)
2102 # ifdef USE_AS_STPCPY
2103 lea 5(%edx), %eax
2104 # else
2105 movl %edx, %eax
2106 # endif
2107 RETURN1
2108
2109 .p2align 4
2110 L(ExitTail7):
2111 movl (%ecx), %eax
2112 movl %eax, (%edx)
2113 movl 3(%ecx), %eax
2114 movl %eax, 3(%edx)
2115 # ifdef USE_AS_STPCPY
2116 lea 6(%edx), %eax
2117 # else
2118 movl %edx, %eax
2119 # endif
2120 RETURN1
2121
2122 .p2align 4
2123 L(ExitTail8):
2124 movl (%ecx), %eax
2125 movl %eax, (%edx)
2126 movl 4(%ecx), %eax
2127 movl %eax, 4(%edx)
2128 # ifdef USE_AS_STPCPY
2129 lea 7(%edx), %eax
2130 # else
2131 movl %edx, %eax
2132 # endif
2133 RETURN1
2134
2135 .p2align 4
2136 L(ExitTail9):
2137 movl (%ecx), %eax
2138 movl %eax, (%edx)
2139 movl 4(%ecx), %eax
2140 movl %eax, 4(%edx)
2141 movb 8(%ecx), %al
2142 movb %al, 8(%edx)
2143 # ifdef USE_AS_STPCPY
2144 lea 8(%edx), %eax
2145 # else
2146 movl %edx, %eax
2147 # endif
2148 RETURN1
2149
2150 .p2align 4
2151 L(ExitTail10):
2152 movl (%ecx), %eax
2153 movl %eax, (%edx)
2154 movl 4(%ecx), %eax
2155 movl %eax, 4(%edx)
2156 movw 8(%ecx), %ax
2157 movw %ax, 8(%edx)
2158 # ifdef USE_AS_STPCPY
2159 lea 9(%edx), %eax
2160 # else
2161 movl %edx, %eax
2162 # endif
2163 RETURN1
2164
2165 .p2align 4
2166 L(ExitTail11):
2167 movl (%ecx), %eax
2168 movl %eax, (%edx)
2169 movl 4(%ecx), %eax
2170 movl %eax, 4(%edx)
2171 movl 7(%ecx), %eax
2172 movl %eax, 7(%edx)
2173 # ifdef USE_AS_STPCPY
2174 lea 10(%edx), %eax
2175 # else
2176 movl %edx, %eax
2177 # endif
2178 RETURN1
2179
2180 .p2align 4
2181 L(ExitTail12):
2182 movl (%ecx), %eax
2183 movl %eax, (%edx)
2184 movl 4(%ecx), %eax
2185 movl %eax, 4(%edx)
2186 movl 8(%ecx), %eax
2187 movl %eax, 8(%edx)
2188 # ifdef USE_AS_STPCPY
2189 lea 11(%edx), %eax
2190 # else
2191 movl %edx, %eax
2192 # endif
2193 RETURN1
2194
2195 .p2align 4
2196 L(ExitTail13):
2197 movlpd (%ecx), %xmm0
2198 movlpd %xmm0, (%edx)
2199 movlpd 5(%ecx), %xmm0
2200 movlpd %xmm0, 5(%edx)
2201 # ifdef USE_AS_STPCPY
2202 lea 12(%edx), %eax
2203 # else
2204 movl %edx, %eax
2205 # endif
2206 RETURN1
2207
2208 .p2align 4
2209 L(ExitTail14):
2210 movlpd (%ecx), %xmm0
2211 movlpd %xmm0, (%edx)
2212 movlpd 6(%ecx), %xmm0
2213 movlpd %xmm0, 6(%edx)
2214 # ifdef USE_AS_STPCPY
2215 lea 13(%edx), %eax
2216 # else
2217 movl %edx, %eax
2218 # endif
2219 RETURN1
2220
2221 .p2align 4
2222 L(ExitTail15):
2223 movlpd (%ecx), %xmm0
2224 movlpd %xmm0, (%edx)
2225 movlpd 7(%ecx), %xmm0
2226 movlpd %xmm0, 7(%edx)
2227 # ifdef USE_AS_STPCPY
2228 lea 14(%edx), %eax
2229 # else
2230 movl %edx, %eax
2231 # endif
2232 RETURN1
2233
2234 .p2align 4
2235 L(ExitTail16):
2236 movlpd (%ecx), %xmm0
2237 movlpd %xmm0, (%edx)
2238 movlpd 8(%ecx), %xmm0
2239 movlpd %xmm0, 8(%edx)
2240 # ifdef USE_AS_STPCPY
2241 lea 15(%edx), %eax
2242 # else
2243 movl %edx, %eax
2244 # endif
2245 RETURN1
2246
2247 END (STRCPY)
2248 # endif
2249
2250 #endif