2 Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
38 # define JMPTBL(I, B) I - B
40 /* Load an entry in a jump table into ECX and branch to it. TABLE is a
41 jump table with relative offsets. INDEX is a register contains the
42 index into the jump table. SCALE is the scale of INDEX. */
44 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
45 /* We first load PC into ECX. */ \
47 /* Get the address of the jump table. */ \
48 addl $(TABLE - .), %ecx; \
49 /* Get the entry and convert the relative offset to the \
50 absolute address. */ \
51 addl (%ecx,INDEX,SCALE), %ecx; \
52 /* We loaded the jump table and adjusted ECX. Go. */ \
55 # define JMPTBL(I, B) I
57 /* Branch to an entry in a jump table. TABLE is a jump table with
58 absolute offsets. INDEX is a register contains the index into the
59 jump table. SCALE is the scale of INDEX. */
61 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
62 jmp *TABLE(,INDEX,SCALE)
66 # define STRCAT __strcat_sse2
73 # ifdef USE_AS_STRNCAT
80 # define USE_AS_STRCAT
81 # ifdef USE_AS_STRNCAT
82 # define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
84 # define RETURN POP(%esi); ret; CFI_PUSH(%esi);
92 # ifdef USE_AS_STRNCAT
108 ja L(alignment_prolog)
116 movdqu 16(%esi), %xmm6
133 pcmpeqb (%eax), %xmm0
135 movdqu 16(%esi), %xmm6
151 pcmpeqb 16(%eax), %xmm0
156 pcmpeqb 32(%eax), %xmm1
161 pcmpeqb 48(%eax), %xmm2
166 pcmpeqb 64(%eax), %xmm3
173 jmp L(StartStrcpyPart)
178 lea 16(%eax, %edx), %eax
179 jmp L(StartStrcpyPart)
184 lea 32(%eax, %edx), %eax
185 jmp L(StartStrcpyPart)
190 lea 48(%eax, %edx), %eax
191 jmp L(StartStrcpyPart)
198 jmp L(StartStrcpyPart)
208 # ifdef USE_AS_STRNCAT
210 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
213 jnz L(CopyFrom1To16BytesTail1)
217 # ifdef USE_AS_STRNCAT
219 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
222 jnz L(CopyFrom1To32Bytes1)
228 # ifdef USE_AS_STRNCAT
241 pcmpeqb (%eax), %xmm0
255 pcmpeqb 16(%eax), %xmm0
260 pcmpeqb 32(%eax), %xmm1
265 pcmpeqb 48(%eax), %xmm2
270 pcmpeqb 64(%eax), %xmm3
277 jmp L(StartStrcpyPart_1)
282 lea 16(%eax, %edx), %eax
283 jmp L(StartStrcpyPart_1)
288 lea 32(%eax, %edx), %eax
289 jmp L(StartStrcpyPart_1)
294 lea 48(%eax, %edx), %eax
295 jmp L(StartStrcpyPart_1)
304 L(StartStrcpyPart_1):
311 # ifdef USE_AS_STRNCAT
315 pcmpeqb (%esi), %xmm1
316 # ifdef USE_AS_STRNCAT
321 # ifdef USE_AS_STRNCAT
323 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
326 jnz L(CopyFrom1To16BytesTail)
328 pcmpeqb 16(%esi), %xmm0
330 # ifdef USE_AS_STRNCAT
332 jbe L(CopyFrom1To32BytesCase2OrCase3)
335 jnz L(CopyFrom1To32Bytes)
337 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
344 movdqa (%esi, %ecx), %xmm1
345 movaps 16(%esi, %ecx), %xmm2
346 movdqu %xmm1, (%eax, %ecx)
350 # ifdef USE_AS_STRNCAT
352 jbe L(CopyFrom1To16BytesCase2OrCase3)
355 jnz L(CopyFrom1To16Bytes)
356 L(Unalign16BothBigN):
357 movaps 16(%esi, %ecx), %xmm3
358 movdqu %xmm2, (%eax, %ecx)
362 # ifdef USE_AS_STRNCAT
364 jbe L(CopyFrom1To16BytesCase2OrCase3)
367 jnz L(CopyFrom1To16Bytes)
369 movaps 16(%esi, %ecx), %xmm4
370 movdqu %xmm3, (%eax, %ecx)
374 # ifdef USE_AS_STRNCAT
376 jbe L(CopyFrom1To16BytesCase2OrCase3)
379 jnz L(CopyFrom1To16Bytes)
381 movaps 16(%esi, %ecx), %xmm1
382 movdqu %xmm4, (%eax, %ecx)
386 # ifdef USE_AS_STRNCAT
388 jbe L(CopyFrom1To16BytesCase2OrCase3)
391 jnz L(CopyFrom1To16Bytes)
393 movaps 16(%esi, %ecx), %xmm2
394 movdqu %xmm1, (%eax, %ecx)
398 # ifdef USE_AS_STRNCAT
400 jbe L(CopyFrom1To16BytesCase2OrCase3)
403 jnz L(CopyFrom1To16Bytes)
405 movaps 16(%esi, %ecx), %xmm3
406 movdqu %xmm2, (%eax, %ecx)
410 # ifdef USE_AS_STRNCAT
412 jbe L(CopyFrom1To16BytesCase2OrCase3)
415 jnz L(CopyFrom1To16Bytes)
417 movdqu %xmm3, (%eax, %ecx)
419 lea 16(%esi, %ecx), %esi
423 # ifdef USE_AS_STRNCAT
424 lea 128(%ebx, %edx), %ebx
428 movaps 16(%esi), %xmm5
429 movaps 32(%esi), %xmm3
431 movaps 48(%esi), %xmm7
437 # ifdef USE_AS_STRNCAT
439 jbe L(UnalignedLeaveCase2OrCase3)
442 jnz L(Unaligned64Leave)
445 L(Unaligned64Loop_start):
448 movdqu %xmm4, -64(%eax)
451 movdqu %xmm5, -48(%eax)
452 movaps 16(%esi), %xmm5
454 movaps 32(%esi), %xmm3
455 movdqu %xmm6, -32(%eax)
457 movdqu %xmm7, -16(%eax)
458 movaps 48(%esi), %xmm7
463 # ifdef USE_AS_STRNCAT
465 jbe L(UnalignedLeaveCase2OrCase3)
468 jz L(Unaligned64Loop_start)
478 jnz L(CopyFrom1To16BytesUnaligned_0)
480 jnz L(CopyFrom1To16BytesUnaligned_16)
487 jnz L(CopyFrom1To16BytesUnaligned_32)
491 movdqu %xmm5, 16(%eax)
492 movdqu %xmm6, 32(%eax)
495 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
497 # ifdef USE_AS_STRNCAT
500 pcmpeqb (%esi), %xmm1
504 jnz L(CopyFrom1To16BytesTail)
506 pcmpeqb 16(%esi), %xmm0
509 jnz L(CopyFrom1To32Bytes)
511 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
518 movdqa (%esi, %ecx), %xmm1
519 movaps 16(%esi, %ecx), %xmm2
520 movdqu %xmm1, (%eax, %ecx)
525 jnz L(CopyFrom1To16Bytes)
526 jmp L(Unalign16BothBigN)
529 /*------------end of main part-------------------------------*/
533 L(CopyFrom1To16Bytes):
537 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
540 L(CopyFrom1To16BytesTail):
543 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
546 L(CopyFrom1To32Bytes1):
549 L(CopyFrom1To16BytesTail1):
551 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
554 L(CopyFrom1To32Bytes):
559 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
562 L(CopyFrom1To16BytesUnaligned_0):
564 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
567 L(CopyFrom1To16BytesUnaligned_16):
572 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
575 L(CopyFrom1To16BytesUnaligned_32):
578 movdqu %xmm5, 16(%eax)
581 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
583 # ifdef USE_AS_STRNCAT
586 L(CopyFrom1To16BytesExit):
587 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
592 L(CopyFrom1To16BytesCase2):
598 jb L(CopyFrom1To16BytesExit)
599 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
602 L(CopyFrom1To32BytesCase2):
609 jb L(CopyFrom1To16BytesExit)
610 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
612 L(CopyFrom1To16BytesTailCase2):
617 jb L(CopyFrom1To16BytesExit)
618 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
620 L(CopyFrom1To16BytesTail1Case2):
623 jb L(CopyFrom1To16BytesExit)
624 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
626 /* Case2 or Case3, Case3 */
629 L(CopyFrom1To16BytesCase2OrCase3):
631 jnz L(CopyFrom1To16BytesCase2)
632 L(CopyFrom1To16BytesCase3):
636 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
639 L(CopyFrom1To32BytesCase2OrCase3):
641 jnz L(CopyFrom1To32BytesCase2)
644 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
647 L(CopyFrom1To16BytesTailCase2OrCase3):
649 jnz L(CopyFrom1To16BytesTailCase2)
652 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
655 L(CopyFrom1To32Bytes1Case2OrCase3):
659 L(CopyFrom1To16BytesTail1Case2OrCase3):
661 jnz L(CopyFrom1To16BytesTail1Case2)
662 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
666 # ifdef USE_AS_STRNCAT
675 # ifdef USE_AS_STRNCAT
680 # ifdef USE_AS_STRNCAT
688 # ifdef USE_AS_STRNCAT
699 # ifdef USE_AS_STRNCAT
706 # ifdef USE_AS_STRNCAT
714 # ifdef USE_AS_STRNCAT
725 # ifdef USE_AS_STRNCAT
731 # ifdef USE_AS_STRNCAT
740 # ifdef USE_AS_STRNCAT
753 # ifdef USE_AS_STRNCAT
766 # ifdef USE_AS_STRNCAT
777 # ifdef USE_AS_STRNCAT
783 # ifdef USE_AS_STRNCAT
792 # ifdef USE_AS_STRNCAT
805 # ifdef USE_AS_STRNCAT
818 # ifdef USE_AS_STRNCAT
831 # ifdef USE_AS_STRNCAT
837 movlpd 5(%esi), %xmm1
839 movlpd %xmm1, 5(%eax)
844 # ifdef USE_AS_STRNCAT
850 movlpd 6(%esi), %xmm1
852 movlpd %xmm1, 6(%eax)
857 # ifdef USE_AS_STRNCAT
863 movlpd 7(%esi), %xmm1
865 movlpd %xmm1, 7(%eax)
870 # ifdef USE_AS_STRNCAT
881 # ifdef USE_AS_STRNCAT
887 # ifdef USE_AS_STRNCAT
896 # ifdef USE_AS_STRNCAT
909 # ifdef USE_AS_STRNCAT
922 # ifdef USE_AS_STRNCAT
935 # ifdef USE_AS_STRNCAT
942 # ifdef USE_AS_STRNCAT
952 # ifdef USE_AS_STRNCAT
958 movlpd 14(%esi), %xmm3
960 movlpd %xmm3, 14(%eax)
965 # ifdef USE_AS_STRNCAT
971 movlpd 15(%esi), %xmm3
973 movlpd %xmm3, 15(%eax)
978 # ifdef USE_AS_STRNCAT
984 movlpd 16(%esi), %xmm2
986 movlpd %xmm2, 16(%eax)
991 # ifdef USE_AS_STRNCAT
997 movlpd 16(%esi), %xmm2
998 # ifdef USE_AS_STRNCAT
1001 movdqu %xmm0, (%eax)
1002 movlpd %xmm2, 16(%eax)
1004 mov STR3(%esp), %eax
1008 # ifdef USE_AS_STRNCAT
1013 movdqu (%esi), %xmm0
1014 movlpd 16(%esi), %xmm2
1016 movdqu %xmm0, (%eax)
1017 movlpd %xmm2, 16(%eax)
1019 mov STR3(%esp), %eax
1023 # ifdef USE_AS_STRNCAT
1028 movdqu (%esi), %xmm0
1029 movlpd 16(%esi), %xmm2
1031 movdqu %xmm0, (%eax)
1032 movlpd %xmm2, 16(%eax)
1034 mov STR3(%esp), %eax
1038 # ifdef USE_AS_STRNCAT
1043 movdqu (%esi), %xmm0
1044 movlpd 16(%esi), %xmm2
1046 movdqu %xmm0, (%eax)
1047 movlpd %xmm2, 16(%eax)
1049 mov STR3(%esp), %eax
1053 # ifdef USE_AS_STRNCAT
1058 movdqu (%esi), %xmm0
1059 movdqu 13(%esi), %xmm2
1060 movdqu %xmm0, (%eax)
1061 movdqu %xmm2, 13(%eax)
1062 mov STR3(%esp), %eax
1066 # ifdef USE_AS_STRNCAT
1071 movdqu (%esi), %xmm0
1072 movdqu 14(%esi), %xmm2
1073 movdqu %xmm0, (%eax)
1074 movdqu %xmm2, 14(%eax)
1075 mov STR3(%esp), %eax
1079 # ifdef USE_AS_STRNCAT
1084 movdqu (%esi), %xmm0
1085 movdqu 15(%esi), %xmm2
1086 movdqu %xmm0, (%eax)
1087 movdqu %xmm2, 15(%eax)
1088 mov STR3(%esp), %eax
1092 # ifdef USE_AS_STRNCAT
1097 movdqu (%esi), %xmm0
1098 movdqu 16(%esi), %xmm2
1099 movdqu %xmm0, (%eax)
1100 movdqu %xmm2, 16(%eax)
1101 mov STR3(%esp), %eax
1104 # ifdef USE_AS_STRNCAT
1107 L(UnalignedLeaveCase2OrCase3):
1109 jnz L(Unaligned64LeaveCase2)
1110 L(Unaligned64LeaveCase3):
1114 jl L(CopyFrom1To16BytesCase3)
1115 movdqu %xmm4, (%eax)
1117 jb L(CopyFrom1To16BytesCase3)
1118 movdqu %xmm5, 16(%eax)
1120 jb L(CopyFrom1To16BytesCase3)
1121 movdqu %xmm6, 32(%eax)
1123 jb L(CopyFrom1To16BytesCase3)
1124 movdqu %xmm7, 48(%eax)
1127 mov STR3(%esp), %eax
1131 L(Unaligned64LeaveCase2):
1133 pcmpeqb %xmm4, %xmm0
1134 pmovmskb %xmm0, %edx
1136 jle L(CopyFrom1To16BytesCase2OrCase3)
1138 jnz L(CopyFrom1To16Bytes)
1140 pcmpeqb %xmm5, %xmm0
1141 pmovmskb %xmm0, %edx
1142 movdqu %xmm4, (%eax)
1145 jbe L(CopyFrom1To16BytesCase2OrCase3)
1147 jnz L(CopyFrom1To16Bytes)
1149 pcmpeqb %xmm6, %xmm0
1150 pmovmskb %xmm0, %edx
1151 movdqu %xmm5, 16(%eax)
1154 jbe L(CopyFrom1To16BytesCase2OrCase3)
1156 jnz L(CopyFrom1To16Bytes)
1158 pcmpeqb %xmm7, %xmm0
1159 pmovmskb %xmm0, %edx
1160 movdqu %xmm6, 32(%eax)
1161 lea 16(%eax, %ecx), %eax
1162 lea 16(%esi, %ecx), %esi
1165 jb L(CopyFrom1To16BytesExit)
1166 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
1177 .int JMPTBL(L(Exit1), L(ExitTable))
1178 .int JMPTBL(L(Exit2), L(ExitTable))
1179 .int JMPTBL(L(Exit3), L(ExitTable))
1180 .int JMPTBL(L(Exit4), L(ExitTable))
1181 .int JMPTBL(L(Exit5), L(ExitTable))
1182 .int JMPTBL(L(Exit6), L(ExitTable))
1183 .int JMPTBL(L(Exit7), L(ExitTable))
1184 .int JMPTBL(L(Exit8), L(ExitTable))
1185 .int JMPTBL(L(Exit9), L(ExitTable))
1186 .int JMPTBL(L(Exit10), L(ExitTable))
1187 .int JMPTBL(L(Exit11), L(ExitTable))
1188 .int JMPTBL(L(Exit12), L(ExitTable))
1189 .int JMPTBL(L(Exit13), L(ExitTable))
1190 .int JMPTBL(L(Exit14), L(ExitTable))
1191 .int JMPTBL(L(Exit15), L(ExitTable))
1192 .int JMPTBL(L(Exit16), L(ExitTable))
1193 .int JMPTBL(L(Exit17), L(ExitTable))
1194 .int JMPTBL(L(Exit18), L(ExitTable))
1195 .int JMPTBL(L(Exit19), L(ExitTable))
1196 .int JMPTBL(L(Exit20), L(ExitTable))
1197 .int JMPTBL(L(Exit21), L(ExitTable))
1198 .int JMPTBL(L(Exit22), L(ExitTable))
1199 .int JMPTBL(L(Exit23), L(ExitTable))
1200 .int JMPTBL(L(Exit24), L(ExitTable))
1201 .int JMPTBL(L(Exit25), L(ExitTable))
1202 .int JMPTBL(L(Exit26), L(ExitTable))
1203 .int JMPTBL(L(Exit27), L(ExitTable))
1204 .int JMPTBL(L(Exit28), L(ExitTable))
1205 .int JMPTBL(L(Exit29), L(ExitTable))
1206 .int JMPTBL(L(Exit30), L(ExitTable))
1207 .int JMPTBL(L(Exit31), L(ExitTable))
1208 .int JMPTBL(L(Exit32), L(ExitTable))
1209 # ifdef USE_AS_STRNCAT
1210 L(ExitStrncatTable):
1211 .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
1212 .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
1213 .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
1214 .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
1215 .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
1216 .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
1217 .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
1218 .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
1219 .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
1220 .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
1221 .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
1222 .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
1223 .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
1224 .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
1225 .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
1226 .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
1227 .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
1228 .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
1229 .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
1230 .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
1231 .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
1232 .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
1233 .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
1234 .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
1235 .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
1236 .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
1237 .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
1238 .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
1239 .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
1240 .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
1241 .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
1242 .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
1243 .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))