1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 # ifndef USE_AS_STRCAT
27 # define STRCPY __strcpy_sse2_unaligned
32 # define JMPTBL(I, B) I - B
33 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
34 lea TABLE(%rip), %r11; \
35 movslq (%r11, INDEX, SCALE), %rcx; \
36 lea (%r11, %rcx), %rcx; \
39 # ifndef USE_AS_STRCAT
43 # ifdef USE_AS_STRNCPY
49 # ifndef USE_AS_STPCPY
50 mov %rdi, %rax /* save result */
56 jz L(SourceStringAlignmentZero)
63 # ifdef USE_AS_STRNCPY
68 # ifdef USE_AS_STRNCPY
69 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
74 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
77 jnz L(CopyFrom1To16BytesTail)
79 pcmpeqb 16(%rsi), %xmm0
81 # ifdef USE_AS_STRNCPY
82 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
87 jbe L(CopyFrom1To32BytesCase2OrCase3)
90 jnz L(CopyFrom1To32Bytes)
92 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
97 /* If source adress alignment != destination adress alignment */
101 movdqa (%rsi, %rcx), %xmm1
102 movaps 16(%rsi, %rcx), %xmm2
103 movdqu %xmm1, (%rdi, %rcx)
107 # ifdef USE_AS_STRNCPY
109 jbe L(CopyFrom1To16BytesCase2OrCase3)
112 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
113 jnz L(CopyFrom1To16BytesUnalignedXmm2)
115 jnz L(CopyFrom1To16Bytes)
118 movaps 16(%rsi, %rcx), %xmm3
119 movdqu %xmm2, (%rdi, %rcx)
123 # ifdef USE_AS_STRNCPY
125 jbe L(CopyFrom1To16BytesCase2OrCase3)
128 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
129 jnz L(CopyFrom1To16BytesUnalignedXmm3)
131 jnz L(CopyFrom1To16Bytes)
134 movaps 16(%rsi, %rcx), %xmm4
135 movdqu %xmm3, (%rdi, %rcx)
139 # ifdef USE_AS_STRNCPY
141 jbe L(CopyFrom1To16BytesCase2OrCase3)
144 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
145 jnz L(CopyFrom1To16BytesUnalignedXmm4)
147 jnz L(CopyFrom1To16Bytes)
150 movaps 16(%rsi, %rcx), %xmm1
151 movdqu %xmm4, (%rdi, %rcx)
155 # ifdef USE_AS_STRNCPY
157 jbe L(CopyFrom1To16BytesCase2OrCase3)
160 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
161 jnz L(CopyFrom1To16BytesUnalignedXmm1)
163 jnz L(CopyFrom1To16Bytes)
166 movaps 16(%rsi, %rcx), %xmm2
167 movdqu %xmm1, (%rdi, %rcx)
171 # ifdef USE_AS_STRNCPY
173 jbe L(CopyFrom1To16BytesCase2OrCase3)
176 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
177 jnz L(CopyFrom1To16BytesUnalignedXmm2)
179 jnz L(CopyFrom1To16Bytes)
182 movaps 16(%rsi, %rcx), %xmm3
183 movdqu %xmm2, (%rdi, %rcx)
187 # ifdef USE_AS_STRNCPY
189 jbe L(CopyFrom1To16BytesCase2OrCase3)
192 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
193 jnz L(CopyFrom1To16BytesUnalignedXmm3)
195 jnz L(CopyFrom1To16Bytes)
198 movdqu %xmm3, (%rdi, %rcx)
200 lea 16(%rsi, %rcx), %rsi
204 # ifdef USE_AS_STRNCPY
205 lea 128(%r8, %rdx), %r8
210 movaps 16(%rsi), %xmm5
211 movaps 32(%rsi), %xmm3
213 movaps 48(%rsi), %xmm7
219 # ifdef USE_AS_STRNCPY
221 jbe L(UnalignedLeaveCase2OrCase3)
224 jnz L(Unaligned64Leave)
226 L(Unaligned64Loop_start):
229 movdqu %xmm4, -64(%rdi)
232 movdqu %xmm5, -48(%rdi)
233 movaps 16(%rsi), %xmm5
235 movaps 32(%rsi), %xmm3
236 movdqu %xmm6, -32(%rdi)
238 movdqu %xmm7, -16(%rdi)
239 movaps 48(%rsi), %xmm7
244 # ifdef USE_AS_STRNCPY
246 jbe L(UnalignedLeaveCase2OrCase3)
249 jz L(Unaligned64Loop_start)
259 jnz L(CopyFrom1To16BytesUnaligned_0)
261 jnz L(CopyFrom1To16BytesUnaligned_16)
268 jnz L(CopyFrom1To16BytesUnaligned_32)
272 movdqu %xmm5, 16(%rdi)
273 movdqu %xmm6, 32(%rdi)
274 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
275 # ifdef USE_AS_STPCPY
276 lea 48(%rdi, %rdx), %rax
278 movdqu %xmm7, 48(%rdi)
281 lea 49(%rdi, %rdx), %rdi
282 jmp L(StrncpyFillTailWithZero)
286 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
289 /* If source adress alignment == destination adress alignment */
291 L(SourceStringAlignmentZero):
297 # ifdef USE_AS_STRNCPY
298 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
303 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
306 jnz L(CopyFrom1To16BytesTail1)
308 pcmpeqb 16(%rsi), %xmm0
312 # ifdef USE_AS_STRNCPY
313 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
318 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
321 jnz L(CopyFrom1To32Bytes1)
324 /*------End of main part with loops---------------------*/
328 # if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
330 L(CopyFrom1To16Bytes):
334 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
337 L(CopyFrom1To16BytesTail):
338 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
343 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
346 L(CopyFrom1To32Bytes1):
349 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
352 L(CopyFrom1To16BytesTail1):
354 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
357 L(CopyFrom1To32Bytes):
358 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
365 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
368 L(CopyFrom1To16BytesUnaligned_0):
370 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
371 # ifdef USE_AS_STPCPY
372 lea (%rdi, %rdx), %rax
377 lea 1(%rdi, %rdx), %rdi
378 jmp L(StrncpyFillTailWithZero)
380 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
384 L(CopyFrom1To16BytesUnaligned_16):
387 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
388 # ifdef USE_AS_STPCPY
389 lea 16(%rdi, %rdx), %rax
391 movdqu %xmm5, 16(%rdi)
394 lea 17(%rdi, %rdx), %rdi
395 jmp L(StrncpyFillTailWithZero)
399 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
403 L(CopyFrom1To16BytesUnaligned_32):
406 movdqu %xmm5, 16(%rdi)
407 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
408 # ifdef USE_AS_STPCPY
409 lea 32(%rdi, %rdx), %rax
411 movdqu %xmm6, 32(%rdi)
414 lea 33(%rdi, %rdx), %rdi
415 jmp L(StrncpyFillTailWithZero)
419 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
422 # ifdef USE_AS_STRNCPY
423 # ifndef USE_AS_STRCAT
425 L(CopyFrom1To16BytesUnalignedXmm6):
426 movdqu %xmm6, (%rdi, %rcx)
427 jmp L(CopyFrom1To16BytesXmmExit)
430 L(CopyFrom1To16BytesUnalignedXmm5):
431 movdqu %xmm5, (%rdi, %rcx)
432 jmp L(CopyFrom1To16BytesXmmExit)
435 L(CopyFrom1To16BytesUnalignedXmm4):
436 movdqu %xmm4, (%rdi, %rcx)
437 jmp L(CopyFrom1To16BytesXmmExit)
440 L(CopyFrom1To16BytesUnalignedXmm3):
441 movdqu %xmm3, (%rdi, %rcx)
442 jmp L(CopyFrom1To16BytesXmmExit)
445 L(CopyFrom1To16BytesUnalignedXmm1):
446 movdqu %xmm1, (%rdi, %rcx)
447 jmp L(CopyFrom1To16BytesXmmExit)
451 L(CopyFrom1To16BytesExit):
452 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
457 L(CopyFrom1To16BytesCase2):
463 jb L(CopyFrom1To16BytesExit)
464 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
467 L(CopyFrom1To32BytesCase2):
474 jb L(CopyFrom1To16BytesExit)
475 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
477 L(CopyFrom1To16BytesTailCase2):
482 jb L(CopyFrom1To16BytesExit)
483 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
485 L(CopyFrom1To16BytesTail1Case2):
488 jb L(CopyFrom1To16BytesExit)
489 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
491 /* Case2 or Case3, Case3 */
494 L(CopyFrom1To16BytesCase2OrCase3):
496 jnz L(CopyFrom1To16BytesCase2)
497 L(CopyFrom1To16BytesCase3):
501 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
504 L(CopyFrom1To32BytesCase2OrCase3):
506 jnz L(CopyFrom1To32BytesCase2)
509 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
512 L(CopyFrom1To16BytesTailCase2OrCase3):
514 jnz L(CopyFrom1To16BytesTailCase2)
517 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
520 L(CopyFrom1To32Bytes1Case2OrCase3):
524 L(CopyFrom1To16BytesTail1Case2OrCase3):
526 jnz L(CopyFrom1To16BytesTail1Case2)
527 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
531 /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
536 # ifdef USE_AS_STPCPY
539 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
542 jnz L(StrncpyFillTailWithZero)
550 # ifdef USE_AS_STPCPY
553 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
556 jnz L(StrncpyFillTailWithZero)
565 # ifdef USE_AS_STPCPY
568 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
571 jnz L(StrncpyFillTailWithZero)
579 # ifdef USE_AS_STPCPY
582 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
585 jnz L(StrncpyFillTailWithZero)
594 # ifdef USE_AS_STPCPY
597 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
600 jnz L(StrncpyFillTailWithZero)
610 # ifdef USE_AS_STPCPY
613 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
616 jnz L(StrncpyFillTailWithZero)
626 # ifdef USE_AS_STPCPY
629 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
632 jnz L(StrncpyFillTailWithZero)
640 # ifdef USE_AS_STPCPY
643 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
646 jnz L(StrncpyFillTailWithZero)
655 # ifdef USE_AS_STPCPY
658 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
661 jnz L(StrncpyFillTailWithZero)
671 # ifdef USE_AS_STPCPY
674 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
677 jnz L(StrncpyFillTailWithZero)
687 # ifdef USE_AS_STPCPY
690 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
693 jnz L(StrncpyFillTailWithZero)
703 # ifdef USE_AS_STPCPY
706 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
709 jnz L(StrncpyFillTailWithZero)
719 # ifdef USE_AS_STPCPY
722 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
725 jnz L(StrncpyFillTailWithZero)
735 # ifdef USE_AS_STPCPY
738 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
741 jnz L(StrncpyFillTailWithZero)
751 # ifdef USE_AS_STPCPY
754 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
757 jnz L(StrncpyFillTailWithZero)
765 # ifdef USE_AS_STPCPY
768 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
771 jnz L(StrncpyFillTailWithZero)
780 # ifdef USE_AS_STPCPY
783 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
786 jnz L(StrncpyFillTailWithZero)
796 # ifdef USE_AS_STPCPY
799 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
802 jnz L(StrncpyFillTailWithZero)
812 # ifdef USE_AS_STPCPY
815 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
818 jnz L(StrncpyFillTailWithZero)
828 # ifdef USE_AS_STPCPY
831 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
834 jnz L(StrncpyFillTailWithZero)
845 # ifdef USE_AS_STPCPY
848 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
851 jnz L(StrncpyFillTailWithZero)
861 # ifdef USE_AS_STPCPY
864 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
867 jnz L(StrncpyFillTailWithZero)
877 # ifdef USE_AS_STPCPY
880 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
883 jnz L(StrncpyFillTailWithZero)
893 # ifdef USE_AS_STPCPY
896 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
899 jnz L(StrncpyFillTailWithZero)
910 # ifdef USE_AS_STPCPY
913 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
916 jnz L(StrncpyFillTailWithZero)
928 # ifdef USE_AS_STPCPY
931 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
934 jnz L(StrncpyFillTailWithZero)
946 # ifdef USE_AS_STPCPY
949 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
952 jnz L(StrncpyFillTailWithZero)
964 # ifdef USE_AS_STPCPY
967 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
970 jnz L(StrncpyFillTailWithZero)
977 movdqu 13(%rsi), %xmm2
979 movdqu %xmm2, 13(%rdi)
980 # ifdef USE_AS_STPCPY
983 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
986 jnz L(StrncpyFillTailWithZero)
993 movdqu 14(%rsi), %xmm2
995 movdqu %xmm2, 14(%rdi)
996 # ifdef USE_AS_STPCPY
999 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1002 jnz L(StrncpyFillTailWithZero)
1008 movdqu (%rsi), %xmm0
1009 movdqu 15(%rsi), %xmm2
1010 movdqu %xmm0, (%rdi)
1011 movdqu %xmm2, 15(%rdi)
1012 # ifdef USE_AS_STPCPY
1015 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1018 jnz L(StrncpyFillTailWithZero)
1024 movdqu (%rsi), %xmm0
1025 movdqu 16(%rsi), %xmm2
1026 movdqu %xmm0, (%rdi)
1027 movdqu %xmm2, 16(%rdi)
1028 # ifdef USE_AS_STPCPY
1031 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1034 jnz L(StrncpyFillTailWithZero)
1038 # ifdef USE_AS_STRNCPY
1042 # ifdef USE_AS_STPCPY
1045 # ifdef USE_AS_STRCAT
1055 # ifdef USE_AS_STPCPY
1058 # ifdef USE_AS_STRCAT
1068 # ifdef USE_AS_STPCPY
1071 # ifdef USE_AS_STRCAT
1083 # ifdef USE_AS_STPCPY
1086 # ifdef USE_AS_STRCAT
1096 # ifdef USE_AS_STPCPY
1099 # ifdef USE_AS_STRCAT
1111 # ifdef USE_AS_STPCPY
1114 # ifdef USE_AS_STRCAT
1126 # ifdef USE_AS_STPCPY
1129 # ifdef USE_AS_STRCAT
1141 # ifdef USE_AS_STPCPY
1144 # ifdef USE_AS_STRCAT
1154 # ifdef USE_AS_STPCPY
1157 # ifdef USE_AS_STRCAT
1169 # ifdef USE_AS_STPCPY
1172 # ifdef USE_AS_STRCAT
1184 # ifdef USE_AS_STPCPY
1187 # ifdef USE_AS_STRCAT
1199 # ifdef USE_AS_STPCPY
1202 # ifdef USE_AS_STRCAT
1214 # ifdef USE_AS_STPCPY
1217 # ifdef USE_AS_STRCAT
1229 # ifdef USE_AS_STPCPY
1232 # ifdef USE_AS_STRCAT
1244 # ifdef USE_AS_STPCPY
1247 # ifdef USE_AS_STRCAT
1259 # ifdef USE_AS_STPCPY
1262 # ifdef USE_AS_STRCAT
1270 movdqu (%rsi), %xmm0
1271 movdqu %xmm0, (%rdi)
1272 # ifdef USE_AS_STPCPY
1275 # ifdef USE_AS_STRCAT
1283 movdqu (%rsi), %xmm0
1285 movdqu %xmm0, (%rdi)
1287 # ifdef USE_AS_STPCPY
1290 # ifdef USE_AS_STRCAT
1298 movdqu (%rsi), %xmm0
1300 movdqu %xmm0, (%rdi)
1302 # ifdef USE_AS_STPCPY
1305 # ifdef USE_AS_STRCAT
1313 movdqu (%rsi), %xmm0
1315 movdqu %xmm0, (%rdi)
1317 # ifdef USE_AS_STPCPY
1320 # ifdef USE_AS_STRCAT
1328 movdqu (%rsi), %xmm0
1330 movdqu %xmm0, (%rdi)
1332 # ifdef USE_AS_STPCPY
1335 # ifdef USE_AS_STRCAT
1343 movdqu (%rsi), %xmm0
1346 movdqu %xmm0, (%rdi)
1349 # ifdef USE_AS_STPCPY
1352 # ifdef USE_AS_STRCAT
1360 movdqu (%rsi), %xmm0
1362 movdqu %xmm0, (%rdi)
1364 # ifdef USE_AS_STPCPY
1367 # ifdef USE_AS_STRCAT
1375 movdqu (%rsi), %xmm0
1377 movdqu %xmm0, (%rdi)
1379 # ifdef USE_AS_STPCPY
1382 # ifdef USE_AS_STRCAT
1390 movdqu (%rsi), %xmm0
1392 movdqu %xmm0, (%rdi)
1394 # ifdef USE_AS_STPCPY
1397 # ifdef USE_AS_STRCAT
1405 movdqu (%rsi), %xmm0
1408 movdqu %xmm0, (%rdi)
1411 # ifdef USE_AS_STPCPY
1414 # ifdef USE_AS_STRCAT
1422 movdqu (%rsi), %xmm0
1425 movdqu %xmm0, (%rdi)
1428 # ifdef USE_AS_STPCPY
1431 # ifdef USE_AS_STRCAT
1439 movdqu (%rsi), %xmm0
1442 movdqu %xmm0, (%rdi)
1445 # ifdef USE_AS_STPCPY
1448 # ifdef USE_AS_STRCAT
1456 movdqu (%rsi), %xmm0
1459 movdqu %xmm0, (%rdi)
1462 # ifdef USE_AS_STPCPY
1465 # ifdef USE_AS_STRCAT
1473 movdqu (%rsi), %xmm0
1474 movdqu 13(%rsi), %xmm2
1475 movdqu %xmm0, (%rdi)
1476 movdqu %xmm2, 13(%rdi)
1477 # ifdef USE_AS_STPCPY
1480 # ifdef USE_AS_STRCAT
1488 movdqu (%rsi), %xmm0
1489 movdqu 14(%rsi), %xmm2
1490 movdqu %xmm0, (%rdi)
1491 movdqu %xmm2, 14(%rdi)
1492 # ifdef USE_AS_STPCPY
1495 # ifdef USE_AS_STRCAT
1503 movdqu (%rsi), %xmm0
1504 movdqu 15(%rsi), %xmm2
1505 movdqu %xmm0, (%rdi)
1506 movdqu %xmm2, 15(%rdi)
1507 # ifdef USE_AS_STPCPY
1510 # ifdef USE_AS_STRCAT
1518 movdqu (%rsi), %xmm0
1519 movdqu 16(%rsi), %xmm2
1520 movdqu %xmm0, (%rdi)
1521 movdqu %xmm2, 16(%rdi)
1522 # ifdef USE_AS_STPCPY
1525 # ifdef USE_AS_STRCAT
1533 movdqu (%rsi), %xmm0
1534 movdqu 16(%rsi), %xmm2
1536 movdqu %xmm0, (%rdi)
1537 movdqu %xmm2, 16(%rdi)
1539 # ifdef USE_AS_STRCAT
1545 # ifndef USE_AS_STRCAT
1631 movdqu %xmm0, -1(%rdi)
1636 movdqu %xmm0, (%rdi)
1640 L(CopyFrom1To16BytesUnalignedXmm2):
1641 movdqu %xmm2, (%rdi, %rcx)
1644 L(CopyFrom1To16BytesXmmExit):
1648 # ifdef USE_AS_STPCPY
1649 lea (%rdi, %rdx), %rax
1652 lea 1(%rdi, %rdx), %rdi
1655 L(StrncpyFillTailWithZero):
1659 jbe L(StrncpyFillExit)
1661 movdqu %xmm0, (%rdi)
1669 jb L(StrncpyFillLess64)
1671 L(StrncpyFillLoopMovdqa):
1672 movdqa %xmm0, (%rdi)
1673 movdqa %xmm0, 16(%rdi)
1674 movdqa %xmm0, 32(%rdi)
1675 movdqa %xmm0, 48(%rdi)
1678 jae L(StrncpyFillLoopMovdqa)
1680 L(StrncpyFillLess64):
1682 jl L(StrncpyFillLess32)
1683 movdqa %xmm0, (%rdi)
1684 movdqa %xmm0, 16(%rdi)
1687 jl L(StrncpyFillExit)
1688 movdqa %xmm0, (%rdi)
1690 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1692 L(StrncpyFillLess32):
1694 jl L(StrncpyFillExit)
1695 movdqa %xmm0, (%rdi)
1697 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1701 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1703 /* end of ifndef USE_AS_STRCAT */
1707 L(UnalignedLeaveCase2OrCase3):
1709 jnz L(Unaligned64LeaveCase2)
1710 L(Unaligned64LeaveCase3):
1714 jl L(CopyFrom1To16BytesCase3)
1715 movdqu %xmm4, (%rdi)
1717 jb L(CopyFrom1To16BytesCase3)
1718 movdqu %xmm5, 16(%rdi)
1720 jb L(CopyFrom1To16BytesCase3)
1721 movdqu %xmm6, 32(%rdi)
1723 jb L(CopyFrom1To16BytesCase3)
1724 movdqu %xmm7, 48(%rdi)
1725 # ifdef USE_AS_STPCPY
1728 # ifdef USE_AS_STRCAT
1735 L(Unaligned64LeaveCase2):
1737 pcmpeqb %xmm4, %xmm0
1738 pmovmskb %xmm0, %rdx
1740 jle L(CopyFrom1To16BytesCase2OrCase3)
1742 # ifndef USE_AS_STRCAT
1743 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1745 jnz L(CopyFrom1To16Bytes)
1747 pcmpeqb %xmm5, %xmm0
1748 pmovmskb %xmm0, %rdx
1749 movdqu %xmm4, (%rdi)
1752 jbe L(CopyFrom1To16BytesCase2OrCase3)
1754 # ifndef USE_AS_STRCAT
1755 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1757 jnz L(CopyFrom1To16Bytes)
1760 pcmpeqb %xmm6, %xmm0
1761 pmovmskb %xmm0, %rdx
1762 movdqu %xmm5, 16(%rdi)
1765 jbe L(CopyFrom1To16BytesCase2OrCase3)
1767 # ifndef USE_AS_STRCAT
1768 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1770 jnz L(CopyFrom1To16Bytes)
1773 pcmpeqb %xmm7, %xmm0
1774 pmovmskb %xmm0, %rdx
1775 movdqu %xmm6, 32(%rdi)
1776 lea 16(%rdi, %rcx), %rdi
1777 lea 16(%rsi, %rcx), %rsi
1780 jb L(CopyFrom1To16BytesExit)
1781 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1785 # ifndef USE_AS_STRCAT
1792 # ifndef USE_AS_STRCAT
1800 .int JMPTBL(L(Exit1), L(ExitTable))
1801 .int JMPTBL(L(Exit2), L(ExitTable))
1802 .int JMPTBL(L(Exit3), L(ExitTable))
1803 .int JMPTBL(L(Exit4), L(ExitTable))
1804 .int JMPTBL(L(Exit5), L(ExitTable))
1805 .int JMPTBL(L(Exit6), L(ExitTable))
1806 .int JMPTBL(L(Exit7), L(ExitTable))
1807 .int JMPTBL(L(Exit8), L(ExitTable))
1808 .int JMPTBL(L(Exit9), L(ExitTable))
1809 .int JMPTBL(L(Exit10), L(ExitTable))
1810 .int JMPTBL(L(Exit11), L(ExitTable))
1811 .int JMPTBL(L(Exit12), L(ExitTable))
1812 .int JMPTBL(L(Exit13), L(ExitTable))
1813 .int JMPTBL(L(Exit14), L(ExitTable))
1814 .int JMPTBL(L(Exit15), L(ExitTable))
1815 .int JMPTBL(L(Exit16), L(ExitTable))
1816 .int JMPTBL(L(Exit17), L(ExitTable))
1817 .int JMPTBL(L(Exit18), L(ExitTable))
1818 .int JMPTBL(L(Exit19), L(ExitTable))
1819 .int JMPTBL(L(Exit20), L(ExitTable))
1820 .int JMPTBL(L(Exit21), L(ExitTable))
1821 .int JMPTBL(L(Exit22), L(ExitTable))
1822 .int JMPTBL(L(Exit23), L(ExitTable))
1823 .int JMPTBL(L(Exit24), L(ExitTable))
1824 .int JMPTBL(L(Exit25), L(ExitTable))
1825 .int JMPTBL(L(Exit26), L(ExitTable))
1826 .int JMPTBL(L(Exit27), L(ExitTable))
1827 .int JMPTBL(L(Exit28), L(ExitTable))
1828 .int JMPTBL(L(Exit29), L(ExitTable))
1829 .int JMPTBL(L(Exit30), L(ExitTable))
1830 .int JMPTBL(L(Exit31), L(ExitTable))
1831 .int JMPTBL(L(Exit32), L(ExitTable))
1832 # ifdef USE_AS_STRNCPY
1833 L(ExitStrncpyTable):
1834 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1865 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1866 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1867 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1868 # ifndef USE_AS_STRCAT
1871 .int JMPTBL(L(Fill0), L(FillTable))
1872 .int JMPTBL(L(Fill1), L(FillTable))
1873 .int JMPTBL(L(Fill2), L(FillTable))
1874 .int JMPTBL(L(Fill3), L(FillTable))
1875 .int JMPTBL(L(Fill4), L(FillTable))
1876 .int JMPTBL(L(Fill5), L(FillTable))
1877 .int JMPTBL(L(Fill6), L(FillTable))
1878 .int JMPTBL(L(Fill7), L(FillTable))
1879 .int JMPTBL(L(Fill8), L(FillTable))
1880 .int JMPTBL(L(Fill9), L(FillTable))
1881 .int JMPTBL(L(Fill10), L(FillTable))
1882 .int JMPTBL(L(Fill11), L(FillTable))
1883 .int JMPTBL(L(Fill12), L(FillTable))
1884 .int JMPTBL(L(Fill13), L(FillTable))
1885 .int JMPTBL(L(Fill14), L(FillTable))
1886 .int JMPTBL(L(Fill15), L(FillTable))
1887 .int JMPTBL(L(Fill16), L(FillTable))