1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
22 # ifndef USE_AS_STRCAT
26 # define STRCPY __strcpy_sse2_unaligned
31 # define JMPTBL(I, B) I - B
32 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
33 lea TABLE(%rip), %r11; \
34 movslq (%r11, INDEX, SCALE), %rcx; \
35 lea (%r11, %rcx), %rcx; \
38 # ifndef USE_AS_STRCAT
42 # ifdef USE_AS_STRNCPY
48 # ifndef USE_AS_STPCPY
49 mov %rdi, %rax /* save result */
56 jbe L(SourceStringAlignmentLess32)
67 # ifdef USE_AS_STRNCPY
68 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
77 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
80 jnz L(CopyFrom1To16BytesTail)
82 pcmpeqb 16(%rsi), %xmm0
85 # ifdef USE_AS_STRNCPY
88 jbe L(CopyFrom1To32BytesCase2OrCase3)
91 jnz L(CopyFrom1To32Bytes)
93 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
96 /* If source adress alignment != destination adress alignment */
100 # ifdef USE_AS_STRNCPY
104 movdqa (%rsi, %rcx), %xmm1
105 movaps 16(%rsi, %rcx), %xmm2
106 movdqu %xmm1, (%rdi, %rcx)
110 # ifdef USE_AS_STRNCPY
112 jbe L(CopyFrom1To16BytesCase2OrCase3)
115 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
116 jnz L(CopyFrom1To16BytesUnalignedXmm2)
118 jnz L(CopyFrom1To16Bytes)
121 movaps 16(%rsi, %rcx), %xmm3
122 movdqu %xmm2, (%rdi, %rcx)
126 # ifdef USE_AS_STRNCPY
128 jbe L(CopyFrom1To16BytesCase2OrCase3)
131 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
132 jnz L(CopyFrom1To16BytesUnalignedXmm3)
134 jnz L(CopyFrom1To16Bytes)
137 movaps 16(%rsi, %rcx), %xmm4
138 movdqu %xmm3, (%rdi, %rcx)
142 # ifdef USE_AS_STRNCPY
144 jbe L(CopyFrom1To16BytesCase2OrCase3)
147 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
148 jnz L(CopyFrom1To16BytesUnalignedXmm4)
150 jnz L(CopyFrom1To16Bytes)
153 movaps 16(%rsi, %rcx), %xmm1
154 movdqu %xmm4, (%rdi, %rcx)
158 # ifdef USE_AS_STRNCPY
160 jbe L(CopyFrom1To16BytesCase2OrCase3)
163 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
164 jnz L(CopyFrom1To16BytesUnalignedXmm1)
166 jnz L(CopyFrom1To16Bytes)
169 movaps 16(%rsi, %rcx), %xmm2
170 movdqu %xmm1, (%rdi, %rcx)
174 # ifdef USE_AS_STRNCPY
176 jbe L(CopyFrom1To16BytesCase2OrCase3)
179 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
180 jnz L(CopyFrom1To16BytesUnalignedXmm2)
182 jnz L(CopyFrom1To16Bytes)
185 movaps 16(%rsi, %rcx), %xmm3
186 movdqu %xmm2, (%rdi, %rcx)
190 # ifdef USE_AS_STRNCPY
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
195 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
196 jnz L(CopyFrom1To16BytesUnalignedXmm3)
198 jnz L(CopyFrom1To16Bytes)
201 movdqu %xmm3, (%rdi, %rcx)
203 lea 16(%rsi, %rcx), %rsi
207 # ifdef USE_AS_STRNCPY
208 lea 128(%r8, %rdx), %r8
213 movaps 16(%rsi), %xmm5
214 movaps 32(%rsi), %xmm3
216 movaps 48(%rsi), %xmm7
222 # ifdef USE_AS_STRNCPY
224 jbe L(UnalignedLeaveCase2OrCase3)
227 jnz L(Unaligned64Leave)
229 L(Unaligned64Loop_start):
232 movdqu %xmm4, -64(%rdi)
235 movdqu %xmm5, -48(%rdi)
236 movaps 16(%rsi), %xmm5
238 movaps 32(%rsi), %xmm3
239 movdqu %xmm6, -32(%rdi)
241 movdqu %xmm7, -16(%rdi)
242 movaps 48(%rsi), %xmm7
247 # ifdef USE_AS_STRNCPY
249 jbe L(UnalignedLeaveCase2OrCase3)
252 jz L(Unaligned64Loop_start)
262 jnz L(CopyFrom1To16BytesUnaligned_0)
264 jnz L(CopyFrom1To16BytesUnaligned_16)
271 jnz L(CopyFrom1To16BytesUnaligned_32)
275 movdqu %xmm5, 16(%rdi)
276 movdqu %xmm6, 32(%rdi)
277 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
278 # ifdef USE_AS_STPCPY
279 lea 48(%rdi, %rdx), %rax
281 movdqu %xmm7, 48(%rdi)
284 lea 49(%rdi, %rdx), %rdi
285 jmp L(StrncpyFillTailWithZero)
289 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
292 /* If source adress alignment == destination adress alignment */
294 L(SourceStringAlignmentLess32):
297 movdqu 16(%rsi), %xmm2
301 # ifdef USE_AS_STRNCPY
302 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
307 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
310 jnz L(CopyFrom1To16BytesTail1)
316 # ifdef USE_AS_STRNCPY
317 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
322 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
325 jnz L(CopyFrom1To32Bytes1)
331 /*------End of main part with loops---------------------*/
335 # if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
337 L(CopyFrom1To16Bytes):
341 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
344 L(CopyFrom1To16BytesTail):
347 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
350 L(CopyFrom1To32Bytes1):
353 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
356 L(CopyFrom1To16BytesTail1):
358 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
361 L(CopyFrom1To32Bytes):
366 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
369 L(CopyFrom1To16BytesUnaligned_0):
371 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
372 # ifdef USE_AS_STPCPY
373 lea (%rdi, %rdx), %rax
378 lea 1(%rdi, %rdx), %rdi
379 jmp L(StrncpyFillTailWithZero)
381 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
385 L(CopyFrom1To16BytesUnaligned_16):
388 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
389 # ifdef USE_AS_STPCPY
390 lea 16(%rdi, %rdx), %rax
392 movdqu %xmm5, 16(%rdi)
395 lea 17(%rdi, %rdx), %rdi
396 jmp L(StrncpyFillTailWithZero)
400 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
404 L(CopyFrom1To16BytesUnaligned_32):
407 movdqu %xmm5, 16(%rdi)
408 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
409 # ifdef USE_AS_STPCPY
410 lea 32(%rdi, %rdx), %rax
412 movdqu %xmm6, 32(%rdi)
415 lea 33(%rdi, %rdx), %rdi
416 jmp L(StrncpyFillTailWithZero)
420 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
423 # ifdef USE_AS_STRNCPY
424 # ifndef USE_AS_STRCAT
426 L(CopyFrom1To16BytesUnalignedXmm6):
427 movdqu %xmm6, (%rdi, %rcx)
428 jmp L(CopyFrom1To16BytesXmmExit)
431 L(CopyFrom1To16BytesUnalignedXmm5):
432 movdqu %xmm5, (%rdi, %rcx)
433 jmp L(CopyFrom1To16BytesXmmExit)
436 L(CopyFrom1To16BytesUnalignedXmm4):
437 movdqu %xmm4, (%rdi, %rcx)
438 jmp L(CopyFrom1To16BytesXmmExit)
441 L(CopyFrom1To16BytesUnalignedXmm3):
442 movdqu %xmm3, (%rdi, %rcx)
443 jmp L(CopyFrom1To16BytesXmmExit)
446 L(CopyFrom1To16BytesUnalignedXmm1):
447 movdqu %xmm1, (%rdi, %rcx)
448 jmp L(CopyFrom1To16BytesXmmExit)
452 L(CopyFrom1To16BytesExit):
453 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
458 L(CopyFrom1To16BytesCase2):
464 jb L(CopyFrom1To16BytesExit)
465 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
468 L(CopyFrom1To32BytesCase2):
474 jb L(CopyFrom1To16BytesExit)
475 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
477 L(CopyFrom1To16BytesTailCase2):
481 jb L(CopyFrom1To16BytesExit)
482 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
484 L(CopyFrom1To16BytesTail1Case2):
487 jb L(CopyFrom1To16BytesExit)
488 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
490 /* Case2 or Case3, Case3 */
493 L(CopyFrom1To16BytesCase2OrCase3):
495 jnz L(CopyFrom1To16BytesCase2)
496 L(CopyFrom1To16BytesCase3):
500 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
503 L(CopyFrom1To32BytesCase2OrCase3):
505 jnz L(CopyFrom1To32BytesCase2)
507 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
510 L(CopyFrom1To16BytesTailCase2OrCase3):
512 jnz L(CopyFrom1To16BytesTailCase2)
514 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
517 L(CopyFrom1To32Bytes1Case2OrCase3):
521 L(CopyFrom1To16BytesTail1Case2OrCase3):
523 jnz L(CopyFrom1To16BytesTail1Case2)
524 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
528 /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
533 # ifdef USE_AS_STPCPY
536 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
539 jnz L(StrncpyFillTailWithZero)
547 # ifdef USE_AS_STPCPY
550 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
553 jnz L(StrncpyFillTailWithZero)
562 # ifdef USE_AS_STPCPY
565 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
568 jnz L(StrncpyFillTailWithZero)
576 # ifdef USE_AS_STPCPY
579 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
582 jnz L(StrncpyFillTailWithZero)
591 # ifdef USE_AS_STPCPY
594 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
597 jnz L(StrncpyFillTailWithZero)
607 # ifdef USE_AS_STPCPY
610 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
613 jnz L(StrncpyFillTailWithZero)
623 # ifdef USE_AS_STPCPY
626 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
629 jnz L(StrncpyFillTailWithZero)
637 # ifdef USE_AS_STPCPY
640 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
643 jnz L(StrncpyFillTailWithZero)
652 # ifdef USE_AS_STPCPY
655 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
658 jnz L(StrncpyFillTailWithZero)
668 # ifdef USE_AS_STPCPY
671 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
674 jnz L(StrncpyFillTailWithZero)
684 # ifdef USE_AS_STPCPY
687 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
690 jnz L(StrncpyFillTailWithZero)
700 # ifdef USE_AS_STPCPY
703 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
706 jnz L(StrncpyFillTailWithZero)
716 # ifdef USE_AS_STPCPY
719 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
722 jnz L(StrncpyFillTailWithZero)
732 # ifdef USE_AS_STPCPY
735 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
738 jnz L(StrncpyFillTailWithZero)
748 # ifdef USE_AS_STPCPY
751 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
754 jnz L(StrncpyFillTailWithZero)
762 # ifdef USE_AS_STPCPY
765 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
768 jnz L(StrncpyFillTailWithZero)
777 # ifdef USE_AS_STPCPY
780 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
783 jnz L(StrncpyFillTailWithZero)
793 # ifdef USE_AS_STPCPY
796 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
799 jnz L(StrncpyFillTailWithZero)
809 # ifdef USE_AS_STPCPY
812 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
815 jnz L(StrncpyFillTailWithZero)
825 # ifdef USE_AS_STPCPY
828 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
831 jnz L(StrncpyFillTailWithZero)
842 # ifdef USE_AS_STPCPY
845 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
848 jnz L(StrncpyFillTailWithZero)
858 # ifdef USE_AS_STPCPY
861 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
864 jnz L(StrncpyFillTailWithZero)
874 # ifdef USE_AS_STPCPY
877 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
880 jnz L(StrncpyFillTailWithZero)
890 # ifdef USE_AS_STPCPY
893 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
896 jnz L(StrncpyFillTailWithZero)
907 # ifdef USE_AS_STPCPY
910 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
913 jnz L(StrncpyFillTailWithZero)
925 # ifdef USE_AS_STPCPY
928 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
931 jnz L(StrncpyFillTailWithZero)
943 # ifdef USE_AS_STPCPY
946 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
949 jnz L(StrncpyFillTailWithZero)
961 # ifdef USE_AS_STPCPY
964 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
967 jnz L(StrncpyFillTailWithZero)
974 movdqu 13(%rsi), %xmm2
976 movdqu %xmm2, 13(%rdi)
977 # ifdef USE_AS_STPCPY
980 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
983 jnz L(StrncpyFillTailWithZero)
990 movdqu 14(%rsi), %xmm2
992 movdqu %xmm2, 14(%rdi)
993 # ifdef USE_AS_STPCPY
996 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
999 jnz L(StrncpyFillTailWithZero)
1005 movdqu (%rsi), %xmm0
1006 movdqu 15(%rsi), %xmm2
1007 movdqu %xmm0, (%rdi)
1008 movdqu %xmm2, 15(%rdi)
1009 # ifdef USE_AS_STPCPY
1012 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1015 jnz L(StrncpyFillTailWithZero)
1021 movdqu (%rsi), %xmm0
1022 movdqu 16(%rsi), %xmm2
1023 movdqu %xmm0, (%rdi)
1024 movdqu %xmm2, 16(%rdi)
1025 # ifdef USE_AS_STPCPY
1028 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1031 jnz L(StrncpyFillTailWithZero)
1035 # ifdef USE_AS_STRNCPY
1039 # ifdef USE_AS_STPCPY
1042 # ifdef USE_AS_STRCAT
1052 # ifdef USE_AS_STPCPY
1055 # ifdef USE_AS_STRCAT
1065 # ifdef USE_AS_STPCPY
1068 # ifdef USE_AS_STRCAT
1080 # ifdef USE_AS_STPCPY
1083 # ifdef USE_AS_STRCAT
1093 # ifdef USE_AS_STPCPY
1096 # ifdef USE_AS_STRCAT
1108 # ifdef USE_AS_STPCPY
1111 # ifdef USE_AS_STRCAT
1123 # ifdef USE_AS_STPCPY
1126 # ifdef USE_AS_STRCAT
1138 # ifdef USE_AS_STPCPY
1141 # ifdef USE_AS_STRCAT
1151 # ifdef USE_AS_STPCPY
1154 # ifdef USE_AS_STRCAT
1166 # ifdef USE_AS_STPCPY
1169 # ifdef USE_AS_STRCAT
1181 # ifdef USE_AS_STPCPY
1184 # ifdef USE_AS_STRCAT
1196 # ifdef USE_AS_STPCPY
1199 # ifdef USE_AS_STRCAT
1211 # ifdef USE_AS_STPCPY
1214 # ifdef USE_AS_STRCAT
1226 # ifdef USE_AS_STPCPY
1229 # ifdef USE_AS_STRCAT
1241 # ifdef USE_AS_STPCPY
1244 # ifdef USE_AS_STRCAT
1256 # ifdef USE_AS_STPCPY
1259 # ifdef USE_AS_STRCAT
1267 movdqu (%rsi), %xmm0
1268 movdqu %xmm0, (%rdi)
1269 # ifdef USE_AS_STPCPY
1272 # ifdef USE_AS_STRCAT
1280 movdqu (%rsi), %xmm0
1282 movdqu %xmm0, (%rdi)
1284 # ifdef USE_AS_STPCPY
1287 # ifdef USE_AS_STRCAT
1295 movdqu (%rsi), %xmm0
1297 movdqu %xmm0, (%rdi)
1299 # ifdef USE_AS_STPCPY
1302 # ifdef USE_AS_STRCAT
1310 movdqu (%rsi), %xmm0
1312 movdqu %xmm0, (%rdi)
1314 # ifdef USE_AS_STPCPY
1317 # ifdef USE_AS_STRCAT
1325 movdqu (%rsi), %xmm0
1327 movdqu %xmm0, (%rdi)
1329 # ifdef USE_AS_STPCPY
1332 # ifdef USE_AS_STRCAT
1340 movdqu (%rsi), %xmm0
1343 movdqu %xmm0, (%rdi)
1346 # ifdef USE_AS_STPCPY
1349 # ifdef USE_AS_STRCAT
1357 movdqu (%rsi), %xmm0
1359 movdqu %xmm0, (%rdi)
1361 # ifdef USE_AS_STPCPY
1364 # ifdef USE_AS_STRCAT
1372 movdqu (%rsi), %xmm0
1374 movdqu %xmm0, (%rdi)
1376 # ifdef USE_AS_STPCPY
1379 # ifdef USE_AS_STRCAT
1387 movdqu (%rsi), %xmm0
1389 movdqu %xmm0, (%rdi)
1391 # ifdef USE_AS_STPCPY
1394 # ifdef USE_AS_STRCAT
1402 movdqu (%rsi), %xmm0
1405 movdqu %xmm0, (%rdi)
1408 # ifdef USE_AS_STPCPY
1411 # ifdef USE_AS_STRCAT
1419 movdqu (%rsi), %xmm0
1422 movdqu %xmm0, (%rdi)
1425 # ifdef USE_AS_STPCPY
1428 # ifdef USE_AS_STRCAT
1436 movdqu (%rsi), %xmm0
1439 movdqu %xmm0, (%rdi)
1442 # ifdef USE_AS_STPCPY
1445 # ifdef USE_AS_STRCAT
1453 movdqu (%rsi), %xmm0
1456 movdqu %xmm0, (%rdi)
1459 # ifdef USE_AS_STPCPY
1462 # ifdef USE_AS_STRCAT
1470 movdqu (%rsi), %xmm0
1471 movdqu 13(%rsi), %xmm2
1472 movdqu %xmm0, (%rdi)
1473 movdqu %xmm2, 13(%rdi)
1474 # ifdef USE_AS_STPCPY
1477 # ifdef USE_AS_STRCAT
1485 movdqu (%rsi), %xmm0
1486 movdqu 14(%rsi), %xmm2
1487 movdqu %xmm0, (%rdi)
1488 movdqu %xmm2, 14(%rdi)
1489 # ifdef USE_AS_STPCPY
1492 # ifdef USE_AS_STRCAT
1500 movdqu (%rsi), %xmm0
1501 movdqu 15(%rsi), %xmm2
1502 movdqu %xmm0, (%rdi)
1503 movdqu %xmm2, 15(%rdi)
1504 # ifdef USE_AS_STPCPY
1507 # ifdef USE_AS_STRCAT
1515 movdqu (%rsi), %xmm0
1516 movdqu 16(%rsi), %xmm2
1517 movdqu %xmm0, (%rdi)
1518 movdqu %xmm2, 16(%rdi)
1519 # ifdef USE_AS_STPCPY
1522 # ifdef USE_AS_STRCAT
1530 movdqu (%rsi), %xmm0
1531 movdqu 16(%rsi), %xmm2
1533 movdqu %xmm0, (%rdi)
1534 movdqu %xmm2, 16(%rdi)
1536 # ifdef USE_AS_STRCAT
1542 # ifndef USE_AS_STRCAT
1628 movdqu %xmm0, -1(%rdi)
1633 movdqu %xmm0, (%rdi)
1637 L(CopyFrom1To16BytesUnalignedXmm2):
1638 movdqu %xmm2, (%rdi, %rcx)
1641 L(CopyFrom1To16BytesXmmExit):
1645 # ifdef USE_AS_STPCPY
1646 lea (%rdi, %rdx), %rax
1649 lea 1(%rdi, %rdx), %rdi
1652 L(StrncpyFillTailWithZero):
1656 jbe L(StrncpyFillExit)
1658 movdqu %xmm0, (%rdi)
1666 jb L(StrncpyFillLess64)
1668 L(StrncpyFillLoopMovdqa):
1669 movdqa %xmm0, (%rdi)
1670 movdqa %xmm0, 16(%rdi)
1671 movdqa %xmm0, 32(%rdi)
1672 movdqa %xmm0, 48(%rdi)
1675 jae L(StrncpyFillLoopMovdqa)
1677 L(StrncpyFillLess64):
1679 jl L(StrncpyFillLess32)
1680 movdqa %xmm0, (%rdi)
1681 movdqa %xmm0, 16(%rdi)
1684 jl L(StrncpyFillExit)
1685 movdqa %xmm0, (%rdi)
1687 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1689 L(StrncpyFillLess32):
1691 jl L(StrncpyFillExit)
1692 movdqa %xmm0, (%rdi)
1694 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1698 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1700 /* end of ifndef USE_AS_STRCAT */
1704 L(UnalignedLeaveCase2OrCase3):
1706 jnz L(Unaligned64LeaveCase2)
1707 L(Unaligned64LeaveCase3):
1711 jl L(CopyFrom1To16BytesCase3)
1712 movdqu %xmm4, (%rdi)
1714 jb L(CopyFrom1To16BytesCase3)
1715 movdqu %xmm5, 16(%rdi)
1717 jb L(CopyFrom1To16BytesCase3)
1718 movdqu %xmm6, 32(%rdi)
1720 jb L(CopyFrom1To16BytesCase3)
1721 movdqu %xmm7, 48(%rdi)
1722 # ifdef USE_AS_STPCPY
1725 # ifdef USE_AS_STRCAT
1732 L(Unaligned64LeaveCase2):
1734 pcmpeqb %xmm4, %xmm0
1735 pmovmskb %xmm0, %rdx
1737 jle L(CopyFrom1To16BytesCase2OrCase3)
1739 # ifndef USE_AS_STRCAT
1740 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1742 jnz L(CopyFrom1To16Bytes)
1744 pcmpeqb %xmm5, %xmm0
1745 pmovmskb %xmm0, %rdx
1746 movdqu %xmm4, (%rdi)
1749 jbe L(CopyFrom1To16BytesCase2OrCase3)
1751 # ifndef USE_AS_STRCAT
1752 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1754 jnz L(CopyFrom1To16Bytes)
1757 pcmpeqb %xmm6, %xmm0
1758 pmovmskb %xmm0, %rdx
1759 movdqu %xmm5, 16(%rdi)
1762 jbe L(CopyFrom1To16BytesCase2OrCase3)
1764 # ifndef USE_AS_STRCAT
1765 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1767 jnz L(CopyFrom1To16Bytes)
1770 pcmpeqb %xmm7, %xmm0
1771 pmovmskb %xmm0, %rdx
1772 movdqu %xmm6, 32(%rdi)
1773 lea 16(%rdi, %rcx), %rdi
1774 lea 16(%rsi, %rcx), %rsi
1777 jb L(CopyFrom1To16BytesExit)
1778 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1782 # ifndef USE_AS_STRCAT
1789 # ifndef USE_AS_STRCAT
1797 .int JMPTBL(L(Exit1), L(ExitTable))
1798 .int JMPTBL(L(Exit2), L(ExitTable))
1799 .int JMPTBL(L(Exit3), L(ExitTable))
1800 .int JMPTBL(L(Exit4), L(ExitTable))
1801 .int JMPTBL(L(Exit5), L(ExitTable))
1802 .int JMPTBL(L(Exit6), L(ExitTable))
1803 .int JMPTBL(L(Exit7), L(ExitTable))
1804 .int JMPTBL(L(Exit8), L(ExitTable))
1805 .int JMPTBL(L(Exit9), L(ExitTable))
1806 .int JMPTBL(L(Exit10), L(ExitTable))
1807 .int JMPTBL(L(Exit11), L(ExitTable))
1808 .int JMPTBL(L(Exit12), L(ExitTable))
1809 .int JMPTBL(L(Exit13), L(ExitTable))
1810 .int JMPTBL(L(Exit14), L(ExitTable))
1811 .int JMPTBL(L(Exit15), L(ExitTable))
1812 .int JMPTBL(L(Exit16), L(ExitTable))
1813 .int JMPTBL(L(Exit17), L(ExitTable))
1814 .int JMPTBL(L(Exit18), L(ExitTable))
1815 .int JMPTBL(L(Exit19), L(ExitTable))
1816 .int JMPTBL(L(Exit20), L(ExitTable))
1817 .int JMPTBL(L(Exit21), L(ExitTable))
1818 .int JMPTBL(L(Exit22), L(ExitTable))
1819 .int JMPTBL(L(Exit23), L(ExitTable))
1820 .int JMPTBL(L(Exit24), L(ExitTable))
1821 .int JMPTBL(L(Exit25), L(ExitTable))
1822 .int JMPTBL(L(Exit26), L(ExitTable))
1823 .int JMPTBL(L(Exit27), L(ExitTable))
1824 .int JMPTBL(L(Exit28), L(ExitTable))
1825 .int JMPTBL(L(Exit29), L(ExitTable))
1826 .int JMPTBL(L(Exit30), L(ExitTable))
1827 .int JMPTBL(L(Exit31), L(ExitTable))
1828 .int JMPTBL(L(Exit32), L(ExitTable))
1829 # ifdef USE_AS_STRNCPY
1830 L(ExitStrncpyTable):
1831 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1832 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1833 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1834 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1865 # ifndef USE_AS_STRCAT
1868 .int JMPTBL(L(Fill0), L(FillTable))
1869 .int JMPTBL(L(Fill1), L(FillTable))
1870 .int JMPTBL(L(Fill2), L(FillTable))
1871 .int JMPTBL(L(Fill3), L(FillTable))
1872 .int JMPTBL(L(Fill4), L(FillTable))
1873 .int JMPTBL(L(Fill5), L(FillTable))
1874 .int JMPTBL(L(Fill6), L(FillTable))
1875 .int JMPTBL(L(Fill7), L(FillTable))
1876 .int JMPTBL(L(Fill8), L(FillTable))
1877 .int JMPTBL(L(Fill9), L(FillTable))
1878 .int JMPTBL(L(Fill10), L(FillTable))
1879 .int JMPTBL(L(Fill11), L(FillTable))
1880 .int JMPTBL(L(Fill12), L(FillTable))
1881 .int JMPTBL(L(Fill13), L(FillTable))
1882 .int JMPTBL(L(Fill14), L(FillTable))
1883 .int JMPTBL(L(Fill15), L(FillTable))
1884 .int JMPTBL(L(Fill16), L(FillTable))