]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
Fix whitespaces
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / strcpy-sse2-unaligned.S
1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #ifndef NOT_IN_libc
22
23 # ifndef USE_AS_STRCAT
24 # include <sysdep.h>
25
26 # ifndef STRCPY
27 # define STRCPY __strcpy_sse2_unaligned
28 # endif
29
30 # endif
31
32 # define JMPTBL(I, B) I - B
33 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
34 lea TABLE(%rip), %r11; \
35 movslq (%r11, INDEX, SCALE), %rcx; \
36 lea (%r11, %rcx), %rcx; \
37 jmp *%rcx
38
39 # ifndef USE_AS_STRCAT
40
41 .text
42 ENTRY (STRCPY)
43 # ifdef USE_AS_STRNCPY
44 mov %rdx, %r8
45 test %r8, %r8
46 jz L(ExitZero)
47 # endif
48 mov %rsi, %rcx
49 # ifndef USE_AS_STPCPY
50 mov %rdi, %rax /* save result */
51 # endif
52
53 # endif
54
55 and $15, %rcx
56 jz L(SourceStringAlignmentZero)
57
58 and $-16, %rsi
59 pxor %xmm0, %xmm0
60 pxor %xmm1, %xmm1
61
62 pcmpeqb (%rsi), %xmm1
63 # ifdef USE_AS_STRNCPY
64 add %rcx, %r8
65 # endif
66 pmovmskb %xmm1, %rdx
67 shr %cl, %rdx
68 # ifdef USE_AS_STRNCPY
69 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
70 cmp $16, %r8
71 # else
72 cmp $17, %r8
73 # endif
74 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
75 # endif
76 test %rdx, %rdx
77 jnz L(CopyFrom1To16BytesTail)
78
79 pcmpeqb 16(%rsi), %xmm0
80 pmovmskb %xmm0, %rdx
81 # ifdef USE_AS_STRNCPY
82 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
83 cmp $32, %r8
84 # else
85 cmp $33, %r8
86 # endif
87 jbe L(CopyFrom1To32BytesCase2OrCase3)
88 # endif
89 test %rdx, %rdx
90 jnz L(CopyFrom1To32Bytes)
91
92 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
93 movdqu %xmm1, (%rdi)
94
95 sub %rcx, %rdi
96
97 /* If source adress alignment != destination adress alignment */
98 .p2align 4
99 L(Unalign16Both):
100 mov $16, %rcx
101 movdqa (%rsi, %rcx), %xmm1
102 movaps 16(%rsi, %rcx), %xmm2
103 movdqu %xmm1, (%rdi, %rcx)
104 pcmpeqb %xmm2, %xmm0
105 pmovmskb %xmm0, %rdx
106 add $16, %rcx
107 # ifdef USE_AS_STRNCPY
108 sub $48, %r8
109 jbe L(CopyFrom1To16BytesCase2OrCase3)
110 # endif
111 test %rdx, %rdx
112 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
113 jnz L(CopyFrom1To16BytesUnalignedXmm2)
114 # else
115 jnz L(CopyFrom1To16Bytes)
116 # endif
117
118 movaps 16(%rsi, %rcx), %xmm3
119 movdqu %xmm2, (%rdi, %rcx)
120 pcmpeqb %xmm3, %xmm0
121 pmovmskb %xmm0, %rdx
122 add $16, %rcx
123 # ifdef USE_AS_STRNCPY
124 sub $16, %r8
125 jbe L(CopyFrom1To16BytesCase2OrCase3)
126 # endif
127 test %rdx, %rdx
128 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
129 jnz L(CopyFrom1To16BytesUnalignedXmm3)
130 # else
131 jnz L(CopyFrom1To16Bytes)
132 # endif
133
134 movaps 16(%rsi, %rcx), %xmm4
135 movdqu %xmm3, (%rdi, %rcx)
136 pcmpeqb %xmm4, %xmm0
137 pmovmskb %xmm0, %rdx
138 add $16, %rcx
139 # ifdef USE_AS_STRNCPY
140 sub $16, %r8
141 jbe L(CopyFrom1To16BytesCase2OrCase3)
142 # endif
143 test %rdx, %rdx
144 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
145 jnz L(CopyFrom1To16BytesUnalignedXmm4)
146 # else
147 jnz L(CopyFrom1To16Bytes)
148 # endif
149
150 movaps 16(%rsi, %rcx), %xmm1
151 movdqu %xmm4, (%rdi, %rcx)
152 pcmpeqb %xmm1, %xmm0
153 pmovmskb %xmm0, %rdx
154 add $16, %rcx
155 # ifdef USE_AS_STRNCPY
156 sub $16, %r8
157 jbe L(CopyFrom1To16BytesCase2OrCase3)
158 # endif
159 test %rdx, %rdx
160 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
161 jnz L(CopyFrom1To16BytesUnalignedXmm1)
162 # else
163 jnz L(CopyFrom1To16Bytes)
164 # endif
165
166 movaps 16(%rsi, %rcx), %xmm2
167 movdqu %xmm1, (%rdi, %rcx)
168 pcmpeqb %xmm2, %xmm0
169 pmovmskb %xmm0, %rdx
170 add $16, %rcx
171 # ifdef USE_AS_STRNCPY
172 sub $16, %r8
173 jbe L(CopyFrom1To16BytesCase2OrCase3)
174 # endif
175 test %rdx, %rdx
176 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
177 jnz L(CopyFrom1To16BytesUnalignedXmm2)
178 # else
179 jnz L(CopyFrom1To16Bytes)
180 # endif
181
182 movaps 16(%rsi, %rcx), %xmm3
183 movdqu %xmm2, (%rdi, %rcx)
184 pcmpeqb %xmm3, %xmm0
185 pmovmskb %xmm0, %rdx
186 add $16, %rcx
187 # ifdef USE_AS_STRNCPY
188 sub $16, %r8
189 jbe L(CopyFrom1To16BytesCase2OrCase3)
190 # endif
191 test %rdx, %rdx
192 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
193 jnz L(CopyFrom1To16BytesUnalignedXmm3)
194 # else
195 jnz L(CopyFrom1To16Bytes)
196 # endif
197
198 movdqu %xmm3, (%rdi, %rcx)
199 mov %rsi, %rdx
200 lea 16(%rsi, %rcx), %rsi
201 and $-0x40, %rsi
202 sub %rsi, %rdx
203 sub %rdx, %rdi
204 # ifdef USE_AS_STRNCPY
205 lea 128(%r8, %rdx), %r8
206 # endif
207 L(Unaligned64Loop):
208 movaps (%rsi), %xmm2
209 movaps %xmm2, %xmm4
210 movaps 16(%rsi), %xmm5
211 movaps 32(%rsi), %xmm3
212 movaps %xmm3, %xmm6
213 movaps 48(%rsi), %xmm7
214 pminub %xmm5, %xmm2
215 pminub %xmm7, %xmm3
216 pminub %xmm2, %xmm3
217 pcmpeqb %xmm0, %xmm3
218 pmovmskb %xmm3, %rdx
219 # ifdef USE_AS_STRNCPY
220 sub $64, %r8
221 jbe L(UnalignedLeaveCase2OrCase3)
222 # endif
223 test %rdx, %rdx
224 jnz L(Unaligned64Leave)
225
226 L(Unaligned64Loop_start):
227 add $64, %rdi
228 add $64, %rsi
229 movdqu %xmm4, -64(%rdi)
230 movaps (%rsi), %xmm2
231 movdqa %xmm2, %xmm4
232 movdqu %xmm5, -48(%rdi)
233 movaps 16(%rsi), %xmm5
234 pminub %xmm5, %xmm2
235 movaps 32(%rsi), %xmm3
236 movdqu %xmm6, -32(%rdi)
237 movaps %xmm3, %xmm6
238 movdqu %xmm7, -16(%rdi)
239 movaps 48(%rsi), %xmm7
240 pminub %xmm7, %xmm3
241 pminub %xmm2, %xmm3
242 pcmpeqb %xmm0, %xmm3
243 pmovmskb %xmm3, %rdx
244 # ifdef USE_AS_STRNCPY
245 sub $64, %r8
246 jbe L(UnalignedLeaveCase2OrCase3)
247 # endif
248 test %rdx, %rdx
249 jz L(Unaligned64Loop_start)
250
251 L(Unaligned64Leave):
252 pxor %xmm1, %xmm1
253
254 pcmpeqb %xmm4, %xmm0
255 pcmpeqb %xmm5, %xmm1
256 pmovmskb %xmm0, %rdx
257 pmovmskb %xmm1, %rcx
258 test %rdx, %rdx
259 jnz L(CopyFrom1To16BytesUnaligned_0)
260 test %rcx, %rcx
261 jnz L(CopyFrom1To16BytesUnaligned_16)
262
263 pcmpeqb %xmm6, %xmm0
264 pcmpeqb %xmm7, %xmm1
265 pmovmskb %xmm0, %rdx
266 pmovmskb %xmm1, %rcx
267 test %rdx, %rdx
268 jnz L(CopyFrom1To16BytesUnaligned_32)
269
270 bsf %rcx, %rdx
271 movdqu %xmm4, (%rdi)
272 movdqu %xmm5, 16(%rdi)
273 movdqu %xmm6, 32(%rdi)
274 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
275 # ifdef USE_AS_STPCPY
276 lea 48(%rdi, %rdx), %rax
277 # endif
278 movdqu %xmm7, 48(%rdi)
279 add $15, %r8
280 sub %rdx, %r8
281 lea 49(%rdi, %rdx), %rdi
282 jmp L(StrncpyFillTailWithZero)
283 # else
284 add $48, %rsi
285 add $48, %rdi
286 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
287 # endif
288
289 /* If source adress alignment == destination adress alignment */
290
291 L(SourceStringAlignmentZero):
292 pxor %xmm0, %xmm0
293 movdqa (%rsi), %xmm1
294 pcmpeqb %xmm1, %xmm0
295 pmovmskb %xmm0, %rdx
296
297 # ifdef USE_AS_STRNCPY
298 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
299 cmp $16, %r8
300 # else
301 cmp $17, %r8
302 # endif
303 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
304 # endif
305 test %rdx, %rdx
306 jnz L(CopyFrom1To16BytesTail1)
307
308 pcmpeqb 16(%rsi), %xmm0
309 movdqu %xmm1, (%rdi)
310 pmovmskb %xmm0, %rdx
311
312 # ifdef USE_AS_STRNCPY
313 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
314 cmp $32, %r8
315 # else
316 cmp $33, %r8
317 # endif
318 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
319 # endif
320 test %rdx, %rdx
321 jnz L(CopyFrom1To32Bytes1)
322 jmp L(Unalign16Both)
323
324 /*------End of main part with loops---------------------*/
325
326 /* Case1 */
327
328 # if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
329 .p2align 4
330 L(CopyFrom1To16Bytes):
331 add %rcx, %rdi
332 add %rcx, %rsi
333 bsf %rdx, %rdx
334 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
335 # endif
336 .p2align 4
337 L(CopyFrom1To16BytesTail):
338 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
339 sub %rcx, %r8
340 # endif
341 add %rcx, %rsi
342 bsf %rdx, %rdx
343 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
344
345 .p2align 4
346 L(CopyFrom1To32Bytes1):
347 add $16, %rsi
348 add $16, %rdi
349 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
350 sub $16, %r8
351 # endif
352 L(CopyFrom1To16BytesTail1):
353 bsf %rdx, %rdx
354 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
355
356 .p2align 4
357 L(CopyFrom1To32Bytes):
358 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
359 sub %rcx, %r8
360 # endif
361 bsf %rdx, %rdx
362 add %rcx, %rsi
363 add $16, %rdx
364 sub %rcx, %rdx
365 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
366
367 .p2align 4
368 L(CopyFrom1To16BytesUnaligned_0):
369 bsf %rdx, %rdx
370 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
371 # ifdef USE_AS_STPCPY
372 lea (%rdi, %rdx), %rax
373 # endif
374 movdqu %xmm4, (%rdi)
375 add $63, %r8
376 sub %rdx, %r8
377 lea 1(%rdi, %rdx), %rdi
378 jmp L(StrncpyFillTailWithZero)
379 # else
380 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
381 # endif
382
383 .p2align 4
384 L(CopyFrom1To16BytesUnaligned_16):
385 bsf %rcx, %rdx
386 movdqu %xmm4, (%rdi)
387 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
388 # ifdef USE_AS_STPCPY
389 lea 16(%rdi, %rdx), %rax
390 # endif
391 movdqu %xmm5, 16(%rdi)
392 add $47, %r8
393 sub %rdx, %r8
394 lea 17(%rdi, %rdx), %rdi
395 jmp L(StrncpyFillTailWithZero)
396 # else
397 add $16, %rsi
398 add $16, %rdi
399 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
400 # endif
401
402 .p2align 4
403 L(CopyFrom1To16BytesUnaligned_32):
404 bsf %rdx, %rdx
405 movdqu %xmm4, (%rdi)
406 movdqu %xmm5, 16(%rdi)
407 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
408 # ifdef USE_AS_STPCPY
409 lea 32(%rdi, %rdx), %rax
410 # endif
411 movdqu %xmm6, 32(%rdi)
412 add $31, %r8
413 sub %rdx, %r8
414 lea 33(%rdi, %rdx), %rdi
415 jmp L(StrncpyFillTailWithZero)
416 # else
417 add $32, %rsi
418 add $32, %rdi
419 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
420 # endif
421
422 # ifdef USE_AS_STRNCPY
423 # ifndef USE_AS_STRCAT
424 .p2align 4
425 L(CopyFrom1To16BytesUnalignedXmm6):
426 movdqu %xmm6, (%rdi, %rcx)
427 jmp L(CopyFrom1To16BytesXmmExit)
428
429 .p2align 4
430 L(CopyFrom1To16BytesUnalignedXmm5):
431 movdqu %xmm5, (%rdi, %rcx)
432 jmp L(CopyFrom1To16BytesXmmExit)
433
434 .p2align 4
435 L(CopyFrom1To16BytesUnalignedXmm4):
436 movdqu %xmm4, (%rdi, %rcx)
437 jmp L(CopyFrom1To16BytesXmmExit)
438
439 .p2align 4
440 L(CopyFrom1To16BytesUnalignedXmm3):
441 movdqu %xmm3, (%rdi, %rcx)
442 jmp L(CopyFrom1To16BytesXmmExit)
443
444 .p2align 4
445 L(CopyFrom1To16BytesUnalignedXmm1):
446 movdqu %xmm1, (%rdi, %rcx)
447 jmp L(CopyFrom1To16BytesXmmExit)
448 # endif
449
450 .p2align 4
451 L(CopyFrom1To16BytesExit):
452 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
453
454 /* Case2 */
455
456 .p2align 4
457 L(CopyFrom1To16BytesCase2):
458 add $16, %r8
459 add %rcx, %rdi
460 add %rcx, %rsi
461 bsf %rdx, %rdx
462 cmp %r8, %rdx
463 jb L(CopyFrom1To16BytesExit)
464 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
465
466 .p2align 4
467 L(CopyFrom1To32BytesCase2):
468 sub %rcx, %r8
469 add %rcx, %rsi
470 bsf %rdx, %rdx
471 add $16, %rdx
472 sub %rcx, %rdx
473 cmp %r8, %rdx
474 jb L(CopyFrom1To16BytesExit)
475 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
476
477 L(CopyFrom1To16BytesTailCase2):
478 sub %rcx, %r8
479 add %rcx, %rsi
480 bsf %rdx, %rdx
481 cmp %r8, %rdx
482 jb L(CopyFrom1To16BytesExit)
483 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
484
485 L(CopyFrom1To16BytesTail1Case2):
486 bsf %rdx, %rdx
487 cmp %r8, %rdx
488 jb L(CopyFrom1To16BytesExit)
489 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
490
491 /* Case2 or Case3, Case3 */
492
493 .p2align 4
494 L(CopyFrom1To16BytesCase2OrCase3):
495 test %rdx, %rdx
496 jnz L(CopyFrom1To16BytesCase2)
497 L(CopyFrom1To16BytesCase3):
498 add $16, %r8
499 add %rcx, %rdi
500 add %rcx, %rsi
501 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
502
503 .p2align 4
504 L(CopyFrom1To32BytesCase2OrCase3):
505 test %rdx, %rdx
506 jnz L(CopyFrom1To32BytesCase2)
507 sub %rcx, %r8
508 add %rcx, %rsi
509 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
510
511 .p2align 4
512 L(CopyFrom1To16BytesTailCase2OrCase3):
513 test %rdx, %rdx
514 jnz L(CopyFrom1To16BytesTailCase2)
515 sub %rcx, %r8
516 add %rcx, %rsi
517 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
518
519 .p2align 4
520 L(CopyFrom1To32Bytes1Case2OrCase3):
521 add $16, %rdi
522 add $16, %rsi
523 sub $16, %r8
524 L(CopyFrom1To16BytesTail1Case2OrCase3):
525 test %rdx, %rdx
526 jnz L(CopyFrom1To16BytesTail1Case2)
527 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
528
529 # endif
530
531 /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
532
533 .p2align 4
534 L(Exit1):
535 mov %dh, (%rdi)
536 # ifdef USE_AS_STPCPY
537 lea (%rdi), %rax
538 # endif
539 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
540 sub $1, %r8
541 lea 1(%rdi), %rdi
542 jnz L(StrncpyFillTailWithZero)
543 # endif
544 ret
545
546 .p2align 4
547 L(Exit2):
548 mov (%rsi), %dx
549 mov %dx, (%rdi)
550 # ifdef USE_AS_STPCPY
551 lea 1(%rdi), %rax
552 # endif
553 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
554 sub $2, %r8
555 lea 2(%rdi), %rdi
556 jnz L(StrncpyFillTailWithZero)
557 # endif
558 ret
559
560 .p2align 4
561 L(Exit3):
562 mov (%rsi), %cx
563 mov %cx, (%rdi)
564 mov %dh, 2(%rdi)
565 # ifdef USE_AS_STPCPY
566 lea 2(%rdi), %rax
567 # endif
568 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
569 sub $3, %r8
570 lea 3(%rdi), %rdi
571 jnz L(StrncpyFillTailWithZero)
572 # endif
573 ret
574
575 .p2align 4
576 L(Exit4):
577 mov (%rsi), %edx
578 mov %edx, (%rdi)
579 # ifdef USE_AS_STPCPY
580 lea 3(%rdi), %rax
581 # endif
582 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
583 sub $4, %r8
584 lea 4(%rdi), %rdi
585 jnz L(StrncpyFillTailWithZero)
586 # endif
587 ret
588
589 .p2align 4
590 L(Exit5):
591 mov (%rsi), %ecx
592 mov %dh, 4(%rdi)
593 mov %ecx, (%rdi)
594 # ifdef USE_AS_STPCPY
595 lea 4(%rdi), %rax
596 # endif
597 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
598 sub $5, %r8
599 lea 5(%rdi), %rdi
600 jnz L(StrncpyFillTailWithZero)
601 # endif
602 ret
603
604 .p2align 4
605 L(Exit6):
606 mov (%rsi), %ecx
607 mov 4(%rsi), %dx
608 mov %ecx, (%rdi)
609 mov %dx, 4(%rdi)
610 # ifdef USE_AS_STPCPY
611 lea 5(%rdi), %rax
612 # endif
613 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
614 sub $6, %r8
615 lea 6(%rdi), %rdi
616 jnz L(StrncpyFillTailWithZero)
617 # endif
618 ret
619
620 .p2align 4
621 L(Exit7):
622 mov (%rsi), %ecx
623 mov 3(%rsi), %edx
624 mov %ecx, (%rdi)
625 mov %edx, 3(%rdi)
626 # ifdef USE_AS_STPCPY
627 lea 6(%rdi), %rax
628 # endif
629 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
630 sub $7, %r8
631 lea 7(%rdi), %rdi
632 jnz L(StrncpyFillTailWithZero)
633 # endif
634 ret
635
636 .p2align 4
637 L(Exit8):
638 mov (%rsi), %rdx
639 mov %rdx, (%rdi)
640 # ifdef USE_AS_STPCPY
641 lea 7(%rdi), %rax
642 # endif
643 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
644 sub $8, %r8
645 lea 8(%rdi), %rdi
646 jnz L(StrncpyFillTailWithZero)
647 # endif
648 ret
649
650 .p2align 4
651 L(Exit9):
652 mov (%rsi), %rcx
653 mov %dh, 8(%rdi)
654 mov %rcx, (%rdi)
655 # ifdef USE_AS_STPCPY
656 lea 8(%rdi), %rax
657 # endif
658 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
659 sub $9, %r8
660 lea 9(%rdi), %rdi
661 jnz L(StrncpyFillTailWithZero)
662 # endif
663 ret
664
665 .p2align 4
666 L(Exit10):
667 mov (%rsi), %rcx
668 mov 8(%rsi), %dx
669 mov %rcx, (%rdi)
670 mov %dx, 8(%rdi)
671 # ifdef USE_AS_STPCPY
672 lea 9(%rdi), %rax
673 # endif
674 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
675 sub $10, %r8
676 lea 10(%rdi), %rdi
677 jnz L(StrncpyFillTailWithZero)
678 # endif
679 ret
680
681 .p2align 4
682 L(Exit11):
683 mov (%rsi), %rcx
684 mov 7(%rsi), %edx
685 mov %rcx, (%rdi)
686 mov %edx, 7(%rdi)
687 # ifdef USE_AS_STPCPY
688 lea 10(%rdi), %rax
689 # endif
690 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
691 sub $11, %r8
692 lea 11(%rdi), %rdi
693 jnz L(StrncpyFillTailWithZero)
694 # endif
695 ret
696
697 .p2align 4
698 L(Exit12):
699 mov (%rsi), %rcx
700 mov 8(%rsi), %edx
701 mov %rcx, (%rdi)
702 mov %edx, 8(%rdi)
703 # ifdef USE_AS_STPCPY
704 lea 11(%rdi), %rax
705 # endif
706 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
707 sub $12, %r8
708 lea 12(%rdi), %rdi
709 jnz L(StrncpyFillTailWithZero)
710 # endif
711 ret
712
713 .p2align 4
714 L(Exit13):
715 mov (%rsi), %rcx
716 mov 5(%rsi), %rdx
717 mov %rcx, (%rdi)
718 mov %rdx, 5(%rdi)
719 # ifdef USE_AS_STPCPY
720 lea 12(%rdi), %rax
721 # endif
722 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
723 sub $13, %r8
724 lea 13(%rdi), %rdi
725 jnz L(StrncpyFillTailWithZero)
726 # endif
727 ret
728
729 .p2align 4
730 L(Exit14):
731 mov (%rsi), %rcx
732 mov 6(%rsi), %rdx
733 mov %rcx, (%rdi)
734 mov %rdx, 6(%rdi)
735 # ifdef USE_AS_STPCPY
736 lea 13(%rdi), %rax
737 # endif
738 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
739 sub $14, %r8
740 lea 14(%rdi), %rdi
741 jnz L(StrncpyFillTailWithZero)
742 # endif
743 ret
744
745 .p2align 4
746 L(Exit15):
747 mov (%rsi), %rcx
748 mov 7(%rsi), %rdx
749 mov %rcx, (%rdi)
750 mov %rdx, 7(%rdi)
751 # ifdef USE_AS_STPCPY
752 lea 14(%rdi), %rax
753 # endif
754 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
755 sub $15, %r8
756 lea 15(%rdi), %rdi
757 jnz L(StrncpyFillTailWithZero)
758 # endif
759 ret
760
761 .p2align 4
762 L(Exit16):
763 movdqu (%rsi), %xmm0
764 movdqu %xmm0, (%rdi)
765 # ifdef USE_AS_STPCPY
766 lea 15(%rdi), %rax
767 # endif
768 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
769 sub $16, %r8
770 lea 16(%rdi), %rdi
771 jnz L(StrncpyFillTailWithZero)
772 # endif
773 ret
774
775 .p2align 4
776 L(Exit17):
777 movdqu (%rsi), %xmm0
778 movdqu %xmm0, (%rdi)
779 mov %dh, 16(%rdi)
780 # ifdef USE_AS_STPCPY
781 lea 16(%rdi), %rax
782 # endif
783 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
784 sub $17, %r8
785 lea 17(%rdi), %rdi
786 jnz L(StrncpyFillTailWithZero)
787 # endif
788 ret
789
790 .p2align 4
791 L(Exit18):
792 movdqu (%rsi), %xmm0
793 mov 16(%rsi), %cx
794 movdqu %xmm0, (%rdi)
795 mov %cx, 16(%rdi)
796 # ifdef USE_AS_STPCPY
797 lea 17(%rdi), %rax
798 # endif
799 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
800 sub $18, %r8
801 lea 18(%rdi), %rdi
802 jnz L(StrncpyFillTailWithZero)
803 # endif
804 ret
805
806 .p2align 4
807 L(Exit19):
808 movdqu (%rsi), %xmm0
809 mov 15(%rsi), %ecx
810 movdqu %xmm0, (%rdi)
811 mov %ecx, 15(%rdi)
812 # ifdef USE_AS_STPCPY
813 lea 18(%rdi), %rax
814 # endif
815 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
816 sub $19, %r8
817 lea 19(%rdi), %rdi
818 jnz L(StrncpyFillTailWithZero)
819 # endif
820 ret
821
822 .p2align 4
823 L(Exit20):
824 movdqu (%rsi), %xmm0
825 mov 16(%rsi), %ecx
826 movdqu %xmm0, (%rdi)
827 mov %ecx, 16(%rdi)
828 # ifdef USE_AS_STPCPY
829 lea 19(%rdi), %rax
830 # endif
831 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
832 sub $20, %r8
833 lea 20(%rdi), %rdi
834 jnz L(StrncpyFillTailWithZero)
835 # endif
836 ret
837
838 .p2align 4
839 L(Exit21):
840 movdqu (%rsi), %xmm0
841 mov 16(%rsi), %ecx
842 movdqu %xmm0, (%rdi)
843 mov %ecx, 16(%rdi)
844 mov %dh, 20(%rdi)
845 # ifdef USE_AS_STPCPY
846 lea 20(%rdi), %rax
847 # endif
848 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
849 sub $21, %r8
850 lea 21(%rdi), %rdi
851 jnz L(StrncpyFillTailWithZero)
852 # endif
853 ret
854
855 .p2align 4
856 L(Exit22):
857 movdqu (%rsi), %xmm0
858 mov 14(%rsi), %rcx
859 movdqu %xmm0, (%rdi)
860 mov %rcx, 14(%rdi)
861 # ifdef USE_AS_STPCPY
862 lea 21(%rdi), %rax
863 # endif
864 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
865 sub $22, %r8
866 lea 22(%rdi), %rdi
867 jnz L(StrncpyFillTailWithZero)
868 # endif
869 ret
870
871 .p2align 4
872 L(Exit23):
873 movdqu (%rsi), %xmm0
874 mov 15(%rsi), %rcx
875 movdqu %xmm0, (%rdi)
876 mov %rcx, 15(%rdi)
877 # ifdef USE_AS_STPCPY
878 lea 22(%rdi), %rax
879 # endif
880 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
881 sub $23, %r8
882 lea 23(%rdi), %rdi
883 jnz L(StrncpyFillTailWithZero)
884 # endif
885 ret
886
887 .p2align 4
888 L(Exit24):
889 movdqu (%rsi), %xmm0
890 mov 16(%rsi), %rcx
891 movdqu %xmm0, (%rdi)
892 mov %rcx, 16(%rdi)
893 # ifdef USE_AS_STPCPY
894 lea 23(%rdi), %rax
895 # endif
896 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
897 sub $24, %r8
898 lea 24(%rdi), %rdi
899 jnz L(StrncpyFillTailWithZero)
900 # endif
901 ret
902
903 .p2align 4
904 L(Exit25):
905 movdqu (%rsi), %xmm0
906 mov 16(%rsi), %rcx
907 movdqu %xmm0, (%rdi)
908 mov %rcx, 16(%rdi)
909 mov %dh, 24(%rdi)
910 # ifdef USE_AS_STPCPY
911 lea 24(%rdi), %rax
912 # endif
913 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
914 sub $25, %r8
915 lea 25(%rdi), %rdi
916 jnz L(StrncpyFillTailWithZero)
917 # endif
918 ret
919
920 .p2align 4
921 L(Exit26):
922 movdqu (%rsi), %xmm0
923 mov 16(%rsi), %rdx
924 mov 24(%rsi), %cx
925 movdqu %xmm0, (%rdi)
926 mov %rdx, 16(%rdi)
927 mov %cx, 24(%rdi)
928 # ifdef USE_AS_STPCPY
929 lea 25(%rdi), %rax
930 # endif
931 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
932 sub $26, %r8
933 lea 26(%rdi), %rdi
934 jnz L(StrncpyFillTailWithZero)
935 # endif
936 ret
937
938 .p2align 4
939 L(Exit27):
940 movdqu (%rsi), %xmm0
941 mov 16(%rsi), %rdx
942 mov 23(%rsi), %ecx
943 movdqu %xmm0, (%rdi)
944 mov %rdx, 16(%rdi)
945 mov %ecx, 23(%rdi)
946 # ifdef USE_AS_STPCPY
947 lea 26(%rdi), %rax
948 # endif
949 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
950 sub $27, %r8
951 lea 27(%rdi), %rdi
952 jnz L(StrncpyFillTailWithZero)
953 # endif
954 ret
955
956 .p2align 4
957 L(Exit28):
958 movdqu (%rsi), %xmm0
959 mov 16(%rsi), %rdx
960 mov 24(%rsi), %ecx
961 movdqu %xmm0, (%rdi)
962 mov %rdx, 16(%rdi)
963 mov %ecx, 24(%rdi)
964 # ifdef USE_AS_STPCPY
965 lea 27(%rdi), %rax
966 # endif
967 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
968 sub $28, %r8
969 lea 28(%rdi), %rdi
970 jnz L(StrncpyFillTailWithZero)
971 # endif
972 ret
973
974 .p2align 4
975 L(Exit29):
976 movdqu (%rsi), %xmm0
977 movdqu 13(%rsi), %xmm2
978 movdqu %xmm0, (%rdi)
979 movdqu %xmm2, 13(%rdi)
980 # ifdef USE_AS_STPCPY
981 lea 28(%rdi), %rax
982 # endif
983 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
984 sub $29, %r8
985 lea 29(%rdi), %rdi
986 jnz L(StrncpyFillTailWithZero)
987 # endif
988 ret
989
990 .p2align 4
991 L(Exit30):
992 movdqu (%rsi), %xmm0
993 movdqu 14(%rsi), %xmm2
994 movdqu %xmm0, (%rdi)
995 movdqu %xmm2, 14(%rdi)
996 # ifdef USE_AS_STPCPY
997 lea 29(%rdi), %rax
998 # endif
999 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1000 sub $30, %r8
1001 lea 30(%rdi), %rdi
1002 jnz L(StrncpyFillTailWithZero)
1003 # endif
1004 ret
1005
1006 .p2align 4
1007 L(Exit31):
1008 movdqu (%rsi), %xmm0
1009 movdqu 15(%rsi), %xmm2
1010 movdqu %xmm0, (%rdi)
1011 movdqu %xmm2, 15(%rdi)
1012 # ifdef USE_AS_STPCPY
1013 lea 30(%rdi), %rax
1014 # endif
1015 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1016 sub $31, %r8
1017 lea 31(%rdi), %rdi
1018 jnz L(StrncpyFillTailWithZero)
1019 # endif
1020 ret
1021
1022 .p2align 4
1023 L(Exit32):
1024 movdqu (%rsi), %xmm0
1025 movdqu 16(%rsi), %xmm2
1026 movdqu %xmm0, (%rdi)
1027 movdqu %xmm2, 16(%rdi)
1028 # ifdef USE_AS_STPCPY
1029 lea 31(%rdi), %rax
1030 # endif
1031 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1032 sub $32, %r8
1033 lea 32(%rdi), %rdi
1034 jnz L(StrncpyFillTailWithZero)
1035 # endif
1036 ret
1037
1038 # ifdef USE_AS_STRNCPY
1039
1040 .p2align 4
1041 L(StrncpyExit0):
1042 # ifdef USE_AS_STPCPY
1043 mov %rdi, %rax
1044 # endif
1045 # ifdef USE_AS_STRCAT
1046 xor %ch, %ch
1047 movb %ch, (%rdi)
1048 # endif
1049 ret
1050
1051 .p2align 4
1052 L(StrncpyExit1):
1053 mov (%rsi), %dl
1054 mov %dl, (%rdi)
1055 # ifdef USE_AS_STPCPY
1056 lea 1(%rdi), %rax
1057 # endif
1058 # ifdef USE_AS_STRCAT
1059 xor %ch, %ch
1060 movb %ch, 1(%rdi)
1061 # endif
1062 ret
1063
1064 .p2align 4
1065 L(StrncpyExit2):
1066 mov (%rsi), %dx
1067 mov %dx, (%rdi)
1068 # ifdef USE_AS_STPCPY
1069 lea 2(%rdi), %rax
1070 # endif
1071 # ifdef USE_AS_STRCAT
1072 xor %ch, %ch
1073 movb %ch, 2(%rdi)
1074 # endif
1075 ret
1076
1077 .p2align 4
1078 L(StrncpyExit3):
1079 mov (%rsi), %cx
1080 mov 2(%rsi), %dl
1081 mov %cx, (%rdi)
1082 mov %dl, 2(%rdi)
1083 # ifdef USE_AS_STPCPY
1084 lea 3(%rdi), %rax
1085 # endif
1086 # ifdef USE_AS_STRCAT
1087 xor %ch, %ch
1088 movb %ch, 3(%rdi)
1089 # endif
1090 ret
1091
1092 .p2align 4
1093 L(StrncpyExit4):
1094 mov (%rsi), %edx
1095 mov %edx, (%rdi)
1096 # ifdef USE_AS_STPCPY
1097 lea 4(%rdi), %rax
1098 # endif
1099 # ifdef USE_AS_STRCAT
1100 xor %ch, %ch
1101 movb %ch, 4(%rdi)
1102 # endif
1103 ret
1104
1105 .p2align 4
1106 L(StrncpyExit5):
1107 mov (%rsi), %ecx
1108 mov 4(%rsi), %dl
1109 mov %ecx, (%rdi)
1110 mov %dl, 4(%rdi)
1111 # ifdef USE_AS_STPCPY
1112 lea 5(%rdi), %rax
1113 # endif
1114 # ifdef USE_AS_STRCAT
1115 xor %ch, %ch
1116 movb %ch, 5(%rdi)
1117 # endif
1118 ret
1119
1120 .p2align 4
1121 L(StrncpyExit6):
1122 mov (%rsi), %ecx
1123 mov 4(%rsi), %dx
1124 mov %ecx, (%rdi)
1125 mov %dx, 4(%rdi)
1126 # ifdef USE_AS_STPCPY
1127 lea 6(%rdi), %rax
1128 # endif
1129 # ifdef USE_AS_STRCAT
1130 xor %ch, %ch
1131 movb %ch, 6(%rdi)
1132 # endif
1133 ret
1134
1135 .p2align 4
1136 L(StrncpyExit7):
1137 mov (%rsi), %ecx
1138 mov 3(%rsi), %edx
1139 mov %ecx, (%rdi)
1140 mov %edx, 3(%rdi)
1141 # ifdef USE_AS_STPCPY
1142 lea 7(%rdi), %rax
1143 # endif
1144 # ifdef USE_AS_STRCAT
1145 xor %ch, %ch
1146 movb %ch, 7(%rdi)
1147 # endif
1148 ret
1149
1150 .p2align 4
1151 L(StrncpyExit8):
1152 mov (%rsi), %rdx
1153 mov %rdx, (%rdi)
1154 # ifdef USE_AS_STPCPY
1155 lea 8(%rdi), %rax
1156 # endif
1157 # ifdef USE_AS_STRCAT
1158 xor %ch, %ch
1159 movb %ch, 8(%rdi)
1160 # endif
1161 ret
1162
1163 .p2align 4
1164 L(StrncpyExit9):
1165 mov (%rsi), %rcx
1166 mov 8(%rsi), %dl
1167 mov %rcx, (%rdi)
1168 mov %dl, 8(%rdi)
1169 # ifdef USE_AS_STPCPY
1170 lea 9(%rdi), %rax
1171 # endif
1172 # ifdef USE_AS_STRCAT
1173 xor %ch, %ch
1174 movb %ch, 9(%rdi)
1175 # endif
1176 ret
1177
1178 .p2align 4
1179 L(StrncpyExit10):
1180 mov (%rsi), %rcx
1181 mov 8(%rsi), %dx
1182 mov %rcx, (%rdi)
1183 mov %dx, 8(%rdi)
1184 # ifdef USE_AS_STPCPY
1185 lea 10(%rdi), %rax
1186 # endif
1187 # ifdef USE_AS_STRCAT
1188 xor %ch, %ch
1189 movb %ch, 10(%rdi)
1190 # endif
1191 ret
1192
1193 .p2align 4
1194 L(StrncpyExit11):
1195 mov (%rsi), %rcx
1196 mov 7(%rsi), %edx
1197 mov %rcx, (%rdi)
1198 mov %edx, 7(%rdi)
1199 # ifdef USE_AS_STPCPY
1200 lea 11(%rdi), %rax
1201 # endif
1202 # ifdef USE_AS_STRCAT
1203 xor %ch, %ch
1204 movb %ch, 11(%rdi)
1205 # endif
1206 ret
1207
1208 .p2align 4
1209 L(StrncpyExit12):
1210 mov (%rsi), %rcx
1211 mov 8(%rsi), %edx
1212 mov %rcx, (%rdi)
1213 mov %edx, 8(%rdi)
1214 # ifdef USE_AS_STPCPY
1215 lea 12(%rdi), %rax
1216 # endif
1217 # ifdef USE_AS_STRCAT
1218 xor %ch, %ch
1219 movb %ch, 12(%rdi)
1220 # endif
1221 ret
1222
1223 .p2align 4
1224 L(StrncpyExit13):
1225 mov (%rsi), %rcx
1226 mov 5(%rsi), %rdx
1227 mov %rcx, (%rdi)
1228 mov %rdx, 5(%rdi)
1229 # ifdef USE_AS_STPCPY
1230 lea 13(%rdi), %rax
1231 # endif
1232 # ifdef USE_AS_STRCAT
1233 xor %ch, %ch
1234 movb %ch, 13(%rdi)
1235 # endif
1236 ret
1237
1238 .p2align 4
1239 L(StrncpyExit14):
1240 mov (%rsi), %rcx
1241 mov 6(%rsi), %rdx
1242 mov %rcx, (%rdi)
1243 mov %rdx, 6(%rdi)
1244 # ifdef USE_AS_STPCPY
1245 lea 14(%rdi), %rax
1246 # endif
1247 # ifdef USE_AS_STRCAT
1248 xor %ch, %ch
1249 movb %ch, 14(%rdi)
1250 # endif
1251 ret
1252
1253 .p2align 4
1254 L(StrncpyExit15):
1255 mov (%rsi), %rcx
1256 mov 7(%rsi), %rdx
1257 mov %rcx, (%rdi)
1258 mov %rdx, 7(%rdi)
1259 # ifdef USE_AS_STPCPY
1260 lea 15(%rdi), %rax
1261 # endif
1262 # ifdef USE_AS_STRCAT
1263 xor %ch, %ch
1264 movb %ch, 15(%rdi)
1265 # endif
1266 ret
1267
1268 .p2align 4
1269 L(StrncpyExit16):
1270 movdqu (%rsi), %xmm0
1271 movdqu %xmm0, (%rdi)
1272 # ifdef USE_AS_STPCPY
1273 lea 16(%rdi), %rax
1274 # endif
1275 # ifdef USE_AS_STRCAT
1276 xor %ch, %ch
1277 movb %ch, 16(%rdi)
1278 # endif
1279 ret
1280
1281 .p2align 4
1282 L(StrncpyExit17):
1283 movdqu (%rsi), %xmm0
1284 mov 16(%rsi), %cl
1285 movdqu %xmm0, (%rdi)
1286 mov %cl, 16(%rdi)
1287 # ifdef USE_AS_STPCPY
1288 lea 17(%rdi), %rax
1289 # endif
1290 # ifdef USE_AS_STRCAT
1291 xor %ch, %ch
1292 movb %ch, 17(%rdi)
1293 # endif
1294 ret
1295
1296 .p2align 4
1297 L(StrncpyExit18):
1298 movdqu (%rsi), %xmm0
1299 mov 16(%rsi), %cx
1300 movdqu %xmm0, (%rdi)
1301 mov %cx, 16(%rdi)
1302 # ifdef USE_AS_STPCPY
1303 lea 18(%rdi), %rax
1304 # endif
1305 # ifdef USE_AS_STRCAT
1306 xor %ch, %ch
1307 movb %ch, 18(%rdi)
1308 # endif
1309 ret
1310
1311 .p2align 4
1312 L(StrncpyExit19):
1313 movdqu (%rsi), %xmm0
1314 mov 15(%rsi), %ecx
1315 movdqu %xmm0, (%rdi)
1316 mov %ecx, 15(%rdi)
1317 # ifdef USE_AS_STPCPY
1318 lea 19(%rdi), %rax
1319 # endif
1320 # ifdef USE_AS_STRCAT
1321 xor %ch, %ch
1322 movb %ch, 19(%rdi)
1323 # endif
1324 ret
1325
1326 .p2align 4
1327 L(StrncpyExit20):
1328 movdqu (%rsi), %xmm0
1329 mov 16(%rsi), %ecx
1330 movdqu %xmm0, (%rdi)
1331 mov %ecx, 16(%rdi)
1332 # ifdef USE_AS_STPCPY
1333 lea 20(%rdi), %rax
1334 # endif
1335 # ifdef USE_AS_STRCAT
1336 xor %ch, %ch
1337 movb %ch, 20(%rdi)
1338 # endif
1339 ret
1340
1341 .p2align 4
1342 L(StrncpyExit21):
1343 movdqu (%rsi), %xmm0
1344 mov 16(%rsi), %ecx
1345 mov 20(%rsi), %dl
1346 movdqu %xmm0, (%rdi)
1347 mov %ecx, 16(%rdi)
1348 mov %dl, 20(%rdi)
1349 # ifdef USE_AS_STPCPY
1350 lea 21(%rdi), %rax
1351 # endif
1352 # ifdef USE_AS_STRCAT
1353 xor %ch, %ch
1354 movb %ch, 21(%rdi)
1355 # endif
1356 ret
1357
1358 .p2align 4
1359 L(StrncpyExit22):
1360 movdqu (%rsi), %xmm0
1361 mov 14(%rsi), %rcx
1362 movdqu %xmm0, (%rdi)
1363 mov %rcx, 14(%rdi)
1364 # ifdef USE_AS_STPCPY
1365 lea 22(%rdi), %rax
1366 # endif
1367 # ifdef USE_AS_STRCAT
1368 xor %ch, %ch
1369 movb %ch, 22(%rdi)
1370 # endif
1371 ret
1372
1373 .p2align 4
1374 L(StrncpyExit23):
1375 movdqu (%rsi), %xmm0
1376 mov 15(%rsi), %rcx
1377 movdqu %xmm0, (%rdi)
1378 mov %rcx, 15(%rdi)
1379 # ifdef USE_AS_STPCPY
1380 lea 23(%rdi), %rax
1381 # endif
1382 # ifdef USE_AS_STRCAT
1383 xor %ch, %ch
1384 movb %ch, 23(%rdi)
1385 # endif
1386 ret
1387
1388 .p2align 4
1389 L(StrncpyExit24):
1390 movdqu (%rsi), %xmm0
1391 mov 16(%rsi), %rcx
1392 movdqu %xmm0, (%rdi)
1393 mov %rcx, 16(%rdi)
1394 # ifdef USE_AS_STPCPY
1395 lea 24(%rdi), %rax
1396 # endif
1397 # ifdef USE_AS_STRCAT
1398 xor %ch, %ch
1399 movb %ch, 24(%rdi)
1400 # endif
1401 ret
1402
1403 .p2align 4
1404 L(StrncpyExit25):
1405 movdqu (%rsi), %xmm0
1406 mov 16(%rsi), %rdx
1407 mov 24(%rsi), %cl
1408 movdqu %xmm0, (%rdi)
1409 mov %rdx, 16(%rdi)
1410 mov %cl, 24(%rdi)
1411 # ifdef USE_AS_STPCPY
1412 lea 25(%rdi), %rax
1413 # endif
1414 # ifdef USE_AS_STRCAT
1415 xor %ch, %ch
1416 movb %ch, 25(%rdi)
1417 # endif
1418 ret
1419
1420 .p2align 4
1421 L(StrncpyExit26):
1422 movdqu (%rsi), %xmm0
1423 mov 16(%rsi), %rdx
1424 mov 24(%rsi), %cx
1425 movdqu %xmm0, (%rdi)
1426 mov %rdx, 16(%rdi)
1427 mov %cx, 24(%rdi)
1428 # ifdef USE_AS_STPCPY
1429 lea 26(%rdi), %rax
1430 # endif
1431 # ifdef USE_AS_STRCAT
1432 xor %ch, %ch
1433 movb %ch, 26(%rdi)
1434 # endif
1435 ret
1436
1437 .p2align 4
1438 L(StrncpyExit27):
1439 movdqu (%rsi), %xmm0
1440 mov 16(%rsi), %rdx
1441 mov 23(%rsi), %ecx
1442 movdqu %xmm0, (%rdi)
1443 mov %rdx, 16(%rdi)
1444 mov %ecx, 23(%rdi)
1445 # ifdef USE_AS_STPCPY
1446 lea 27(%rdi), %rax
1447 # endif
1448 # ifdef USE_AS_STRCAT
1449 xor %ch, %ch
1450 movb %ch, 27(%rdi)
1451 # endif
1452 ret
1453
1454 .p2align 4
1455 L(StrncpyExit28):
1456 movdqu (%rsi), %xmm0
1457 mov 16(%rsi), %rdx
1458 mov 24(%rsi), %ecx
1459 movdqu %xmm0, (%rdi)
1460 mov %rdx, 16(%rdi)
1461 mov %ecx, 24(%rdi)
1462 # ifdef USE_AS_STPCPY
1463 lea 28(%rdi), %rax
1464 # endif
1465 # ifdef USE_AS_STRCAT
1466 xor %ch, %ch
1467 movb %ch, 28(%rdi)
1468 # endif
1469 ret
1470
1471 .p2align 4
1472 L(StrncpyExit29):
1473 movdqu (%rsi), %xmm0
1474 movdqu 13(%rsi), %xmm2
1475 movdqu %xmm0, (%rdi)
1476 movdqu %xmm2, 13(%rdi)
1477 # ifdef USE_AS_STPCPY
1478 lea 29(%rdi), %rax
1479 # endif
1480 # ifdef USE_AS_STRCAT
1481 xor %ch, %ch
1482 movb %ch, 29(%rdi)
1483 # endif
1484 ret
1485
1486 .p2align 4
1487 L(StrncpyExit30):
1488 movdqu (%rsi), %xmm0
1489 movdqu 14(%rsi), %xmm2
1490 movdqu %xmm0, (%rdi)
1491 movdqu %xmm2, 14(%rdi)
1492 # ifdef USE_AS_STPCPY
1493 lea 30(%rdi), %rax
1494 # endif
1495 # ifdef USE_AS_STRCAT
1496 xor %ch, %ch
1497 movb %ch, 30(%rdi)
1498 # endif
1499 ret
1500
1501 .p2align 4
1502 L(StrncpyExit31):
1503 movdqu (%rsi), %xmm0
1504 movdqu 15(%rsi), %xmm2
1505 movdqu %xmm0, (%rdi)
1506 movdqu %xmm2, 15(%rdi)
1507 # ifdef USE_AS_STPCPY
1508 lea 31(%rdi), %rax
1509 # endif
1510 # ifdef USE_AS_STRCAT
1511 xor %ch, %ch
1512 movb %ch, 31(%rdi)
1513 # endif
1514 ret
1515
1516 .p2align 4
1517 L(StrncpyExit32):
1518 movdqu (%rsi), %xmm0
1519 movdqu 16(%rsi), %xmm2
1520 movdqu %xmm0, (%rdi)
1521 movdqu %xmm2, 16(%rdi)
1522 # ifdef USE_AS_STPCPY
1523 lea 32(%rdi), %rax
1524 # endif
1525 # ifdef USE_AS_STRCAT
1526 xor %ch, %ch
1527 movb %ch, 32(%rdi)
1528 # endif
1529 ret
1530
1531 .p2align 4
1532 L(StrncpyExit33):
1533 movdqu (%rsi), %xmm0
1534 movdqu 16(%rsi), %xmm2
1535 mov 32(%rsi), %cl
1536 movdqu %xmm0, (%rdi)
1537 movdqu %xmm2, 16(%rdi)
1538 mov %cl, 32(%rdi)
1539 # ifdef USE_AS_STRCAT
1540 xor %ch, %ch
1541 movb %ch, 33(%rdi)
1542 # endif
1543 ret
1544
1545 # ifndef USE_AS_STRCAT
1546
1547 .p2align 4
1548 L(Fill0):
1549 ret
1550
1551 .p2align 4
1552 L(Fill1):
1553 mov %dl, (%rdi)
1554 ret
1555
1556 .p2align 4
1557 L(Fill2):
1558 mov %dx, (%rdi)
1559 ret
1560
1561 .p2align 4
1562 L(Fill3):
1563 mov %edx, -1(%rdi)
1564 ret
1565
1566 .p2align 4
1567 L(Fill4):
1568 mov %edx, (%rdi)
1569 ret
1570
1571 .p2align 4
1572 L(Fill5):
1573 mov %edx, (%rdi)
1574 mov %dl, 4(%rdi)
1575 ret
1576
1577 .p2align 4
1578 L(Fill6):
1579 mov %edx, (%rdi)
1580 mov %dx, 4(%rdi)
1581 ret
1582
1583 .p2align 4
1584 L(Fill7):
1585 mov %rdx, -1(%rdi)
1586 ret
1587
1588 .p2align 4
1589 L(Fill8):
1590 mov %rdx, (%rdi)
1591 ret
1592
1593 .p2align 4
1594 L(Fill9):
1595 mov %rdx, (%rdi)
1596 mov %dl, 8(%rdi)
1597 ret
1598
1599 .p2align 4
1600 L(Fill10):
1601 mov %rdx, (%rdi)
1602 mov %dx, 8(%rdi)
1603 ret
1604
1605 .p2align 4
1606 L(Fill11):
1607 mov %rdx, (%rdi)
1608 mov %edx, 7(%rdi)
1609 ret
1610
1611 .p2align 4
1612 L(Fill12):
1613 mov %rdx, (%rdi)
1614 mov %edx, 8(%rdi)
1615 ret
1616
1617 .p2align 4
1618 L(Fill13):
1619 mov %rdx, (%rdi)
1620 mov %rdx, 5(%rdi)
1621 ret
1622
1623 .p2align 4
1624 L(Fill14):
1625 mov %rdx, (%rdi)
1626 mov %rdx, 6(%rdi)
1627 ret
1628
1629 .p2align 4
1630 L(Fill15):
1631 movdqu %xmm0, -1(%rdi)
1632 ret
1633
1634 .p2align 4
1635 L(Fill16):
1636 movdqu %xmm0, (%rdi)
1637 ret
1638
1639 .p2align 4
1640 L(CopyFrom1To16BytesUnalignedXmm2):
1641 movdqu %xmm2, (%rdi, %rcx)
1642
1643 .p2align 4
1644 L(CopyFrom1To16BytesXmmExit):
1645 bsf %rdx, %rdx
1646 add $15, %r8
1647 add %rcx, %rdi
1648 # ifdef USE_AS_STPCPY
1649 lea (%rdi, %rdx), %rax
1650 # endif
1651 sub %rdx, %r8
1652 lea 1(%rdi, %rdx), %rdi
1653
1654 .p2align 4
1655 L(StrncpyFillTailWithZero):
1656 pxor %xmm0, %xmm0
1657 xor %rdx, %rdx
1658 sub $16, %r8
1659 jbe L(StrncpyFillExit)
1660
1661 movdqu %xmm0, (%rdi)
1662 add $16, %rdi
1663
1664 mov %rdi, %rsi
1665 and $0xf, %rsi
1666 sub %rsi, %rdi
1667 add %rsi, %r8
1668 sub $64, %r8
1669 jb L(StrncpyFillLess64)
1670
1671 L(StrncpyFillLoopMovdqa):
1672 movdqa %xmm0, (%rdi)
1673 movdqa %xmm0, 16(%rdi)
1674 movdqa %xmm0, 32(%rdi)
1675 movdqa %xmm0, 48(%rdi)
1676 add $64, %rdi
1677 sub $64, %r8
1678 jae L(StrncpyFillLoopMovdqa)
1679
1680 L(StrncpyFillLess64):
1681 add $32, %r8
1682 jl L(StrncpyFillLess32)
1683 movdqa %xmm0, (%rdi)
1684 movdqa %xmm0, 16(%rdi)
1685 add $32, %rdi
1686 sub $16, %r8
1687 jl L(StrncpyFillExit)
1688 movdqa %xmm0, (%rdi)
1689 add $16, %rdi
1690 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1691
1692 L(StrncpyFillLess32):
1693 add $16, %r8
1694 jl L(StrncpyFillExit)
1695 movdqa %xmm0, (%rdi)
1696 add $16, %rdi
1697 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1698
1699 L(StrncpyFillExit):
1700 add $16, %r8
1701 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1702
1703 /* end of ifndef USE_AS_STRCAT */
1704 # endif
1705
1706 .p2align 4
1707 L(UnalignedLeaveCase2OrCase3):
1708 test %rdx, %rdx
1709 jnz L(Unaligned64LeaveCase2)
1710 L(Unaligned64LeaveCase3):
1711 lea 64(%r8), %rcx
1712 and $-16, %rcx
1713 add $48, %r8
1714 jl L(CopyFrom1To16BytesCase3)
1715 movdqu %xmm4, (%rdi)
1716 sub $16, %r8
1717 jb L(CopyFrom1To16BytesCase3)
1718 movdqu %xmm5, 16(%rdi)
1719 sub $16, %r8
1720 jb L(CopyFrom1To16BytesCase3)
1721 movdqu %xmm6, 32(%rdi)
1722 sub $16, %r8
1723 jb L(CopyFrom1To16BytesCase3)
1724 movdqu %xmm7, 48(%rdi)
1725 # ifdef USE_AS_STPCPY
1726 lea 64(%rdi), %rax
1727 # endif
1728 # ifdef USE_AS_STRCAT
1729 xor %ch, %ch
1730 movb %ch, 64(%rdi)
1731 # endif
1732 ret
1733
1734 .p2align 4
1735 L(Unaligned64LeaveCase2):
1736 xor %rcx, %rcx
1737 pcmpeqb %xmm4, %xmm0
1738 pmovmskb %xmm0, %rdx
1739 add $48, %r8
1740 jle L(CopyFrom1To16BytesCase2OrCase3)
1741 test %rdx, %rdx
1742 # ifndef USE_AS_STRCAT
1743 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1744 # else
1745 jnz L(CopyFrom1To16Bytes)
1746 # endif
1747 pcmpeqb %xmm5, %xmm0
1748 pmovmskb %xmm0, %rdx
1749 movdqu %xmm4, (%rdi)
1750 add $16, %rcx
1751 sub $16, %r8
1752 jbe L(CopyFrom1To16BytesCase2OrCase3)
1753 test %rdx, %rdx
1754 # ifndef USE_AS_STRCAT
1755 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1756 # else
1757 jnz L(CopyFrom1To16Bytes)
1758 # endif
1759
1760 pcmpeqb %xmm6, %xmm0
1761 pmovmskb %xmm0, %rdx
1762 movdqu %xmm5, 16(%rdi)
1763 add $16, %rcx
1764 sub $16, %r8
1765 jbe L(CopyFrom1To16BytesCase2OrCase3)
1766 test %rdx, %rdx
1767 # ifndef USE_AS_STRCAT
1768 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1769 # else
1770 jnz L(CopyFrom1To16Bytes)
1771 # endif
1772
1773 pcmpeqb %xmm7, %xmm0
1774 pmovmskb %xmm0, %rdx
1775 movdqu %xmm6, 32(%rdi)
1776 lea 16(%rdi, %rcx), %rdi
1777 lea 16(%rsi, %rcx), %rsi
1778 bsf %rdx, %rdx
1779 cmp %r8, %rdx
1780 jb L(CopyFrom1To16BytesExit)
1781 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1782
1783 .p2align 4
1784 L(ExitZero):
1785 # ifndef USE_AS_STRCAT
1786 mov %rdi, %rax
1787 # endif
1788 ret
1789
1790 # endif
1791
1792 # ifndef USE_AS_STRCAT
1793 END (STRCPY)
1794 # else
1795 END (STRCAT)
1796 # endif
1797 .p2align 4
1798 .section .rodata
1799 L(ExitTable):
1800 .int JMPTBL(L(Exit1), L(ExitTable))
1801 .int JMPTBL(L(Exit2), L(ExitTable))
1802 .int JMPTBL(L(Exit3), L(ExitTable))
1803 .int JMPTBL(L(Exit4), L(ExitTable))
1804 .int JMPTBL(L(Exit5), L(ExitTable))
1805 .int JMPTBL(L(Exit6), L(ExitTable))
1806 .int JMPTBL(L(Exit7), L(ExitTable))
1807 .int JMPTBL(L(Exit8), L(ExitTable))
1808 .int JMPTBL(L(Exit9), L(ExitTable))
1809 .int JMPTBL(L(Exit10), L(ExitTable))
1810 .int JMPTBL(L(Exit11), L(ExitTable))
1811 .int JMPTBL(L(Exit12), L(ExitTable))
1812 .int JMPTBL(L(Exit13), L(ExitTable))
1813 .int JMPTBL(L(Exit14), L(ExitTable))
1814 .int JMPTBL(L(Exit15), L(ExitTable))
1815 .int JMPTBL(L(Exit16), L(ExitTable))
1816 .int JMPTBL(L(Exit17), L(ExitTable))
1817 .int JMPTBL(L(Exit18), L(ExitTable))
1818 .int JMPTBL(L(Exit19), L(ExitTable))
1819 .int JMPTBL(L(Exit20), L(ExitTable))
1820 .int JMPTBL(L(Exit21), L(ExitTable))
1821 .int JMPTBL(L(Exit22), L(ExitTable))
1822 .int JMPTBL(L(Exit23), L(ExitTable))
1823 .int JMPTBL(L(Exit24), L(ExitTable))
1824 .int JMPTBL(L(Exit25), L(ExitTable))
1825 .int JMPTBL(L(Exit26), L(ExitTable))
1826 .int JMPTBL(L(Exit27), L(ExitTable))
1827 .int JMPTBL(L(Exit28), L(ExitTable))
1828 .int JMPTBL(L(Exit29), L(ExitTable))
1829 .int JMPTBL(L(Exit30), L(ExitTable))
1830 .int JMPTBL(L(Exit31), L(ExitTable))
1831 .int JMPTBL(L(Exit32), L(ExitTable))
1832 # ifdef USE_AS_STRNCPY
1833 L(ExitStrncpyTable):
1834 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1865 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1866 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1867 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1868 # ifndef USE_AS_STRCAT
1869 .p2align 4
1870 L(FillTable):
1871 .int JMPTBL(L(Fill0), L(FillTable))
1872 .int JMPTBL(L(Fill1), L(FillTable))
1873 .int JMPTBL(L(Fill2), L(FillTable))
1874 .int JMPTBL(L(Fill3), L(FillTable))
1875 .int JMPTBL(L(Fill4), L(FillTable))
1876 .int JMPTBL(L(Fill5), L(FillTable))
1877 .int JMPTBL(L(Fill6), L(FillTable))
1878 .int JMPTBL(L(Fill7), L(FillTable))
1879 .int JMPTBL(L(Fill8), L(FillTable))
1880 .int JMPTBL(L(Fill9), L(FillTable))
1881 .int JMPTBL(L(Fill10), L(FillTable))
1882 .int JMPTBL(L(Fill11), L(FillTable))
1883 .int JMPTBL(L(Fill12), L(FillTable))
1884 .int JMPTBL(L(Fill13), L(FillTable))
1885 .int JMPTBL(L(Fill14), L(FillTable))
1886 .int JMPTBL(L(Fill15), L(FillTable))
1887 .int JMPTBL(L(Fill16), L(FillTable))
1888 # endif
1889 # endif
1890 #endif