1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
38 # define STRCPY __strcpy_sse2
45 # ifdef USE_AS_STRNCPY
47 # define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
48 # define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
49 CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
52 # define JMPTBL(I, B) I - B
54 /* Load an entry in a jump table into ECX and branch to it. TABLE is a
55 jump table with relative offsets.
56 INDEX is a register contains the index into the jump table.
57 SCALE is the scale of INDEX. */
59 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
60 /* We first load PC into ECX. */ \
62 /* Get the address of the jump table. */ \
63 addl $(TABLE - .), %ecx; \
64 /* Get the entry and convert the relative offset to the \
65 absolute address. */ \
66 addl (%ecx,INDEX,SCALE), %ecx; \
67 /* We loaded the jump table and adjusted ECX. Go. */ \
70 # define JMPTBL(I, B) I
72 /* Branch to an entry in a jump table. TABLE is a jump table with
73 absolute offsets. INDEX is a register contains the index into the
74 jump table. SCALE is the scale of INDEX. */
76 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
77 jmp *TABLE(,INDEX,SCALE)
90 # ifndef USE_AS_STPCPY
91 mov %edi, %eax /* save result */
94 jz L(SourceStringAlignmentZero)
100 pcmpeqb (%esi), %xmm1
104 # ifdef USE_AS_STPCPY
106 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
109 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
112 jnz L(CopyFrom1To16BytesTail)
114 pcmpeqb 16(%esi), %xmm0
116 # ifdef USE_AS_STPCPY
118 jbe L(CopyFrom1To32BytesCase2OrCase3)
121 jbe L(CopyFrom1To32BytesCase2OrCase3)
124 jnz L(CopyFrom1To32Bytes)
126 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
131 /* If source address alignment != destination address alignment */
135 movdqa (%esi, %ecx), %xmm1
136 movaps 16(%esi, %ecx), %xmm2
137 movdqu %xmm1, (%edi, %ecx)
142 jbe L(CopyFrom1To16BytesCase2OrCase3)
144 jnz L(CopyFrom1To16BytesUnalignedXmm2)
146 movaps 16(%esi, %ecx), %xmm3
147 movdqu %xmm2, (%edi, %ecx)
152 jbe L(CopyFrom1To16BytesCase2OrCase3)
154 jnz L(CopyFrom1To16BytesUnalignedXmm3)
156 movaps 16(%esi, %ecx), %xmm4
157 movdqu %xmm3, (%edi, %ecx)
162 jbe L(CopyFrom1To16BytesCase2OrCase3)
164 jnz L(CopyFrom1To16BytesUnalignedXmm4)
166 movaps 16(%esi, %ecx), %xmm1
167 movdqu %xmm4, (%edi, %ecx)
172 jbe L(CopyFrom1To16BytesCase2OrCase3)
174 jnz L(CopyFrom1To16BytesUnalignedXmm1)
176 movaps 16(%esi, %ecx), %xmm2
177 movdqu %xmm1, (%edi, %ecx)
182 jbe L(CopyFrom1To16BytesCase2OrCase3)
184 jnz L(CopyFrom1To16BytesUnalignedXmm2)
186 movaps 16(%esi, %ecx), %xmm3
187 movdqu %xmm2, (%edi, %ecx)
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
194 jnz L(CopyFrom1To16BytesUnalignedXmm3)
196 movdqu %xmm3, (%edi, %ecx)
198 lea 16(%esi, %ecx), %esi
202 lea 128(%ebx, %edx), %ebx
207 movaps 16(%esi), %xmm5
208 movaps 32(%esi), %xmm3
210 movaps 48(%esi), %xmm7
217 jbe L(UnalignedLeaveCase2OrCase3)
219 jnz L(Unaligned64Leave)
220 L(Unaligned64Loop_start):
223 movdqu %xmm4, -64(%edi)
226 movdqu %xmm5, -48(%edi)
227 movaps 16(%esi), %xmm5
229 movaps 32(%esi), %xmm3
230 movdqu %xmm6, -32(%edi)
232 movdqu %xmm7, -16(%edi)
233 movaps 48(%esi), %xmm7
239 jbe L(UnalignedLeaveCase2OrCase3)
241 jz L(Unaligned64Loop_start)
250 jnz L(CopyFrom1To16BytesUnaligned_0)
252 jnz L(CopyFrom1To16BytesUnaligned_16)
259 jnz L(CopyFrom1To16BytesUnaligned_32)
263 movdqu %xmm5, 16(%edi)
264 movdqu %xmm6, 32(%edi)
265 # ifdef USE_AS_STPCPY
266 lea 48(%edi, %edx), %eax
268 movdqu %xmm7, 48(%edi)
271 lea 49(%edi, %edx), %edi
272 jmp L(StrncpyFillTailWithZero)
274 /* If source address alignment == destination address alignment */
276 L(SourceStringAlignmentZero):
281 # ifdef USE_AS_STPCPY
283 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
286 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
289 jnz L(CopyFrom1To16BytesTail1)
291 pcmpeqb 16(%esi), %xmm0
294 # ifdef USE_AS_STPCPY
296 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
299 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
302 jnz L(CopyFrom1To32Bytes1)
306 /*-----------------End of main part---------------------------*/
310 L(CopyFrom1To16BytesTail):
314 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
317 L(CopyFrom1To32Bytes1):
321 L(CopyFrom1To16BytesTail1):
323 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
326 L(CopyFrom1To32Bytes):
332 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
335 L(CopyFrom1To16BytesUnaligned_0):
337 # ifdef USE_AS_STPCPY
338 lea (%edi, %edx), %eax
343 lea 1(%edi, %edx), %edi
344 jmp L(StrncpyFillTailWithZero)
347 L(CopyFrom1To16BytesUnaligned_16):
350 # ifdef USE_AS_STPCPY
351 lea 16(%edi, %edx), %eax
353 movdqu %xmm5, 16(%edi)
356 lea 17(%edi, %edx), %edi
357 jmp L(StrncpyFillTailWithZero)
360 L(CopyFrom1To16BytesUnaligned_32):
363 movdqu %xmm5, 16(%edi)
364 # ifdef USE_AS_STPCPY
365 lea 32(%edi, %edx), %eax
367 movdqu %xmm6, 32(%edi)
370 lea 33(%edi, %edx), %edi
371 jmp L(StrncpyFillTailWithZero)
374 L(CopyFrom1To16BytesUnalignedXmm6):
375 movdqu %xmm6, (%edi, %ecx)
376 jmp L(CopyFrom1To16BytesXmmExit)
379 L(CopyFrom1To16BytesUnalignedXmm5):
380 movdqu %xmm5, (%edi, %ecx)
381 jmp L(CopyFrom1To16BytesXmmExit)
384 L(CopyFrom1To16BytesUnalignedXmm4):
385 movdqu %xmm4, (%edi, %ecx)
386 jmp L(CopyFrom1To16BytesXmmExit)
389 L(CopyFrom1To16BytesUnalignedXmm3):
390 movdqu %xmm3, (%edi, %ecx)
391 jmp L(CopyFrom1To16BytesXmmExit)
394 L(CopyFrom1To16BytesUnalignedXmm1):
395 movdqu %xmm1, (%edi, %ecx)
396 jmp L(CopyFrom1To16BytesXmmExit)
399 L(CopyFrom1To16BytesExit):
400 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
405 L(CopyFrom1To16BytesCase2):
411 jb L(CopyFrom1To16BytesExit)
412 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
415 L(CopyFrom1To32BytesCase2):
422 jb L(CopyFrom1To16BytesExit)
423 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
425 L(CopyFrom1To16BytesTailCase2):
430 jb L(CopyFrom1To16BytesExit)
431 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
433 L(CopyFrom1To16BytesTail1Case2):
436 jb L(CopyFrom1To16BytesExit)
437 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
439 /* Case2 or Case3, Case3 */
442 L(CopyFrom1To16BytesCase2OrCase3):
444 jnz L(CopyFrom1To16BytesCase2)
445 L(CopyFrom1To16BytesCase3):
449 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
452 L(CopyFrom1To32BytesCase2OrCase3):
454 jnz L(CopyFrom1To32BytesCase2)
457 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
460 L(CopyFrom1To16BytesTailCase2OrCase3):
462 jnz L(CopyFrom1To16BytesTailCase2)
465 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
468 L(CopyFrom1To32Bytes1Case2OrCase3):
472 L(CopyFrom1To16BytesTail1Case2OrCase3):
474 jnz L(CopyFrom1To16BytesTail1Case2)
475 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
479 # ifdef USE_AS_STPCPY
487 # ifdef USE_AS_STPCPY
492 jnz L(StrncpyFillTailWithZero)
499 # ifdef USE_AS_STPCPY
504 jnz L(StrncpyFillTailWithZero)
512 # ifdef USE_AS_STPCPY
517 jnz L(StrncpyFillTailWithZero)
524 # ifdef USE_AS_STPCPY
529 jnz L(StrncpyFillTailWithZero)
537 # ifdef USE_AS_STPCPY
542 jnz L(StrncpyFillTailWithZero)
551 # ifdef USE_AS_STPCPY
556 jnz L(StrncpyFillTailWithZero)
565 # ifdef USE_AS_STPCPY
570 jnz L(StrncpyFillTailWithZero)
577 # ifdef USE_AS_STPCPY
582 jnz L(StrncpyFillTailWithZero)
590 # ifdef USE_AS_STPCPY
595 jnz L(StrncpyFillTailWithZero)
604 # ifdef USE_AS_STPCPY
609 jnz L(StrncpyFillTailWithZero)
618 # ifdef USE_AS_STPCPY
623 jnz L(StrncpyFillTailWithZero)
632 # ifdef USE_AS_STPCPY
637 jnz L(StrncpyFillTailWithZero)
643 movlpd 5(%esi), %xmm1
645 movlpd %xmm1, 5(%edi)
646 # ifdef USE_AS_STPCPY
651 jnz L(StrncpyFillTailWithZero)
657 movlpd 6(%esi), %xmm1
659 movlpd %xmm1, 6(%edi)
660 # ifdef USE_AS_STPCPY
665 jnz L(StrncpyFillTailWithZero)
671 movlpd 7(%esi), %xmm1
673 movlpd %xmm1, 7(%edi)
674 # ifdef USE_AS_STPCPY
679 jnz L(StrncpyFillTailWithZero)
686 # ifdef USE_AS_STPCPY
691 jnz L(StrncpyFillTailWithZero)
699 # ifdef USE_AS_STPCPY
704 jnz L(StrncpyFillTailWithZero)
713 # ifdef USE_AS_STPCPY
718 jnz L(StrncpyFillTailWithZero)
727 # ifdef USE_AS_STPCPY
732 jnz L(StrncpyFillTailWithZero)
741 # ifdef USE_AS_STPCPY
746 jnz L(StrncpyFillTailWithZero)
756 # ifdef USE_AS_STPCPY
761 jnz L(StrncpyFillTailWithZero)
767 movlpd 14(%esi), %xmm3
769 movlpd %xmm3, 14(%edi)
770 # ifdef USE_AS_STPCPY
775 jnz L(StrncpyFillTailWithZero)
781 movlpd 15(%esi), %xmm3
783 movlpd %xmm3, 15(%edi)
784 # ifdef USE_AS_STPCPY
789 jnz L(StrncpyFillTailWithZero)
795 movlpd 16(%esi), %xmm2
797 movlpd %xmm2, 16(%edi)
798 # ifdef USE_AS_STPCPY
803 jnz L(StrncpyFillTailWithZero)
809 movlpd 16(%esi), %xmm2
811 movlpd %xmm2, 16(%edi)
813 # ifdef USE_AS_STPCPY
818 jnz L(StrncpyFillTailWithZero)
824 movlpd 16(%esi), %xmm2
827 movlpd %xmm2, 16(%edi)
829 # ifdef USE_AS_STPCPY
834 jnz L(StrncpyFillTailWithZero)
840 movlpd 16(%esi), %xmm2
843 movlpd %xmm2, 16(%edi)
845 # ifdef USE_AS_STPCPY
850 jnz L(StrncpyFillTailWithZero)
856 movlpd 16(%esi), %xmm2
859 movlpd %xmm2, 16(%edi)
861 # ifdef USE_AS_STPCPY
866 jnz L(StrncpyFillTailWithZero)
872 movdqu 13(%esi), %xmm2
874 movdqu %xmm2, 13(%edi)
875 # ifdef USE_AS_STPCPY
880 jnz L(StrncpyFillTailWithZero)
886 movdqu 14(%esi), %xmm2
888 movdqu %xmm2, 14(%edi)
889 # ifdef USE_AS_STPCPY
894 jnz L(StrncpyFillTailWithZero)
901 movdqu 15(%esi), %xmm2
903 movdqu %xmm2, 15(%edi)
904 # ifdef USE_AS_STPCPY
909 jnz L(StrncpyFillTailWithZero)
915 movdqu 16(%esi), %xmm2
917 movdqu %xmm2, 16(%edi)
918 # ifdef USE_AS_STPCPY
923 jnz L(StrncpyFillTailWithZero)
930 # ifdef USE_AS_STPCPY
939 # ifdef USE_AS_STPCPY
950 # ifdef USE_AS_STPCPY
959 # ifdef USE_AS_STPCPY
970 # ifdef USE_AS_STPCPY
981 # ifdef USE_AS_STPCPY
992 # ifdef USE_AS_STPCPY
1000 movlpd %xmm0, (%edi)
1001 # ifdef USE_AS_STPCPY
1008 movlpd (%esi), %xmm0
1010 movlpd %xmm0, (%edi)
1012 # ifdef USE_AS_STPCPY
1019 movlpd (%esi), %xmm0
1021 movlpd %xmm0, (%edi)
1023 # ifdef USE_AS_STPCPY
1030 movlpd (%esi), %xmm0
1032 movlpd %xmm0, (%edi)
1034 # ifdef USE_AS_STPCPY
1041 movlpd (%esi), %xmm0
1043 movlpd %xmm0, (%edi)
1045 # ifdef USE_AS_STPCPY
1052 movlpd (%esi), %xmm0
1053 movlpd 5(%esi), %xmm1
1054 movlpd %xmm0, (%edi)
1055 movlpd %xmm1, 5(%edi)
1056 # ifdef USE_AS_STPCPY
1063 movlpd (%esi), %xmm0
1064 movlpd 6(%esi), %xmm1
1065 movlpd %xmm0, (%edi)
1066 movlpd %xmm1, 6(%edi)
1067 # ifdef USE_AS_STPCPY
1074 movlpd (%esi), %xmm0
1075 movlpd 7(%esi), %xmm1
1076 movlpd %xmm0, (%edi)
1077 movlpd %xmm1, 7(%edi)
1078 # ifdef USE_AS_STPCPY
1085 movdqu (%esi), %xmm0
1086 movdqu %xmm0, (%edi)
1087 # ifdef USE_AS_STPCPY
1094 movdqu (%esi), %xmm0
1096 movdqu %xmm0, (%edi)
1098 # ifdef USE_AS_STPCPY
1105 movdqu (%esi), %xmm0
1107 movdqu %xmm0, (%edi)
1109 # ifdef USE_AS_STPCPY
1116 movdqu (%esi), %xmm0
1118 movdqu %xmm0, (%edi)
1120 # ifdef USE_AS_STPCPY
1127 movdqu (%esi), %xmm0
1129 movdqu %xmm0, (%edi)
1131 # ifdef USE_AS_STPCPY
1138 movdqu (%esi), %xmm0
1141 movdqu %xmm0, (%edi)
1144 # ifdef USE_AS_STPCPY
1151 movdqu (%esi), %xmm0
1152 movlpd 14(%esi), %xmm3
1153 movdqu %xmm0, (%edi)
1154 movlpd %xmm3, 14(%edi)
1155 # ifdef USE_AS_STPCPY
1162 movdqu (%esi), %xmm0
1163 movlpd 15(%esi), %xmm3
1164 movdqu %xmm0, (%edi)
1165 movlpd %xmm3, 15(%edi)
1166 # ifdef USE_AS_STPCPY
1173 movdqu (%esi), %xmm0
1174 movlpd 16(%esi), %xmm2
1175 movdqu %xmm0, (%edi)
1176 movlpd %xmm2, 16(%edi)
1177 # ifdef USE_AS_STPCPY
1184 movdqu (%esi), %xmm0
1185 movlpd 16(%esi), %xmm2
1187 movdqu %xmm0, (%edi)
1188 movlpd %xmm2, 16(%edi)
1190 # ifdef USE_AS_STPCPY
1197 movdqu (%esi), %xmm0
1198 movlpd 16(%esi), %xmm2
1200 movdqu %xmm0, (%edi)
1201 movlpd %xmm2, 16(%edi)
1203 # ifdef USE_AS_STPCPY
1210 movdqu (%esi), %xmm0
1211 movlpd 16(%esi), %xmm2
1213 movdqu %xmm0, (%edi)
1214 movlpd %xmm2, 16(%edi)
1216 # ifdef USE_AS_STPCPY
1223 movdqu (%esi), %xmm0
1224 movlpd 16(%esi), %xmm2
1226 movdqu %xmm0, (%edi)
1227 movlpd %xmm2, 16(%edi)
1229 # ifdef USE_AS_STPCPY
1236 movdqu (%esi), %xmm0
1237 movdqu 13(%esi), %xmm2
1238 movdqu %xmm0, (%edi)
1239 movdqu %xmm2, 13(%edi)
1240 # ifdef USE_AS_STPCPY
1247 movdqu (%esi), %xmm0
1248 movdqu 14(%esi), %xmm2
1249 movdqu %xmm0, (%edi)
1250 movdqu %xmm2, 14(%edi)
1251 # ifdef USE_AS_STPCPY
1258 movdqu (%esi), %xmm0
1259 movdqu 15(%esi), %xmm2
1260 movdqu %xmm0, (%edi)
1261 movdqu %xmm2, 15(%edi)
1262 # ifdef USE_AS_STPCPY
1269 movdqu (%esi), %xmm0
1270 movdqu 16(%esi), %xmm2
1271 movdqu %xmm0, (%edi)
1272 movdqu %xmm2, 16(%edi)
1273 # ifdef USE_AS_STPCPY
1280 movdqu (%esi), %xmm0
1281 movdqu 16(%esi), %xmm2
1283 movdqu %xmm0, (%edi)
1284 movdqu %xmm2, 16(%edi)
1326 movlpd %xmm0, -1(%edi)
1331 movlpd %xmm0, (%edi)
1336 movlpd %xmm0, (%edi)
1342 movlpd %xmm0, (%edi)
1348 movlpd %xmm0, (%edi)
1354 movlpd %xmm0, (%edi)
1360 movlpd %xmm0, (%edi)
1361 movlpd %xmm0, 5(%edi)
1366 movlpd %xmm0, (%edi)
1367 movlpd %xmm0, 6(%edi)
1372 movdqu %xmm0, -1(%edi)
1377 movdqu %xmm0, (%edi)
1381 L(CopyFrom1To16BytesUnalignedXmm2):
1382 movdqu %xmm2, (%edi, %ecx)
1385 L(CopyFrom1To16BytesXmmExit):
1389 # ifdef USE_AS_STPCPY
1390 lea (%edi, %edx), %eax
1393 lea 1(%edi, %edx), %edi
1396 L(StrncpyFillTailWithZero):
1400 jbe L(StrncpyFillExit)
1402 movdqu %xmm0, (%edi)
1410 jb L(StrncpyFillLess64)
1412 L(StrncpyFillLoopMovdqa):
1413 movdqa %xmm0, (%edi)
1414 movdqa %xmm0, 16(%edi)
1415 movdqa %xmm0, 32(%edi)
1416 movdqa %xmm0, 48(%edi)
1419 jae L(StrncpyFillLoopMovdqa)
1421 L(StrncpyFillLess64):
1423 jl L(StrncpyFillLess32)
1424 movdqa %xmm0, (%edi)
1425 movdqa %xmm0, 16(%edi)
1428 jl L(StrncpyFillExit)
1429 movdqa %xmm0, (%edi)
1431 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1433 L(StrncpyFillLess32):
1435 jl L(StrncpyFillExit)
1436 movdqa %xmm0, (%edi)
1438 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1442 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1445 L(UnalignedLeaveCase2OrCase3):
1447 jnz L(Unaligned64LeaveCase2)
1448 L(Unaligned64LeaveCase3):
1452 jl L(CopyFrom1To16BytesCase3)
1453 movdqu %xmm4, (%edi)
1455 jb L(CopyFrom1To16BytesCase3)
1456 movdqu %xmm5, 16(%edi)
1458 jb L(CopyFrom1To16BytesCase3)
1459 movdqu %xmm6, 32(%edi)
1461 jb L(CopyFrom1To16BytesCase3)
1462 movdqu %xmm7, 48(%edi)
1463 # ifdef USE_AS_STPCPY
1469 L(Unaligned64LeaveCase2):
1471 pcmpeqb %xmm4, %xmm0
1472 pmovmskb %xmm0, %edx
1474 jle L(CopyFrom1To16BytesCase2OrCase3)
1476 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1478 pcmpeqb %xmm5, %xmm0
1479 pmovmskb %xmm0, %edx
1480 movdqu %xmm4, (%edi)
1483 jbe L(CopyFrom1To16BytesCase2OrCase3)
1485 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1487 pcmpeqb %xmm6, %xmm0
1488 pmovmskb %xmm0, %edx
1489 movdqu %xmm5, 16(%edi)
1492 jbe L(CopyFrom1To16BytesCase2OrCase3)
1494 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1496 pcmpeqb %xmm7, %xmm0
1497 pmovmskb %xmm0, %edx
1498 movdqu %xmm6, 32(%edi)
1499 lea 16(%edi, %ecx), %edi
1500 lea 16(%esi, %ecx), %esi
1503 jb L(CopyFrom1To16BytesExit)
1504 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
1516 .int JMPTBL(L(Exit1), L(ExitTable))
1517 .int JMPTBL(L(Exit2), L(ExitTable))
1518 .int JMPTBL(L(Exit3), L(ExitTable))
1519 .int JMPTBL(L(Exit4), L(ExitTable))
1520 .int JMPTBL(L(Exit5), L(ExitTable))
1521 .int JMPTBL(L(Exit6), L(ExitTable))
1522 .int JMPTBL(L(Exit7), L(ExitTable))
1523 .int JMPTBL(L(Exit8), L(ExitTable))
1524 .int JMPTBL(L(Exit9), L(ExitTable))
1525 .int JMPTBL(L(Exit10), L(ExitTable))
1526 .int JMPTBL(L(Exit11), L(ExitTable))
1527 .int JMPTBL(L(Exit12), L(ExitTable))
1528 .int JMPTBL(L(Exit13), L(ExitTable))
1529 .int JMPTBL(L(Exit14), L(ExitTable))
1530 .int JMPTBL(L(Exit15), L(ExitTable))
1531 .int JMPTBL(L(Exit16), L(ExitTable))
1532 .int JMPTBL(L(Exit17), L(ExitTable))
1533 .int JMPTBL(L(Exit18), L(ExitTable))
1534 .int JMPTBL(L(Exit19), L(ExitTable))
1535 .int JMPTBL(L(Exit20), L(ExitTable))
1536 .int JMPTBL(L(Exit21), L(ExitTable))
1537 .int JMPTBL(L(Exit22), L(ExitTable))
1538 .int JMPTBL(L(Exit23), L(ExitTable))
1539 .int JMPTBL(L(Exit24), L(ExitTable))
1540 .int JMPTBL(L(Exit25), L(ExitTable))
1541 .int JMPTBL(L(Exit26), L(ExitTable))
1542 .int JMPTBL(L(Exit27), L(ExitTable))
1543 .int JMPTBL(L(Exit28), L(ExitTable))
1544 .int JMPTBL(L(Exit29), L(ExitTable))
1545 .int JMPTBL(L(Exit30), L(ExitTable))
1546 .int JMPTBL(L(Exit31), L(ExitTable))
1547 .int JMPTBL(L(Exit32), L(ExitTable))
1549 L(ExitStrncpyTable):
1550 .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
1551 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1552 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1553 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1554 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1555 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1556 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1557 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1558 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1559 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1560 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1561 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1562 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1563 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1564 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1565 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1566 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1567 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1568 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1569 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1570 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1571 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1572 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1573 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1574 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1575 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1576 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1577 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1578 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1579 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1580 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1581 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1582 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1583 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1587 .int JMPTBL(L(Fill0), L(FillTable))
1588 .int JMPTBL(L(Fill1), L(FillTable))
1589 .int JMPTBL(L(Fill2), L(FillTable))
1590 .int JMPTBL(L(Fill3), L(FillTable))
1591 .int JMPTBL(L(Fill4), L(FillTable))
1592 .int JMPTBL(L(Fill5), L(FillTable))
1593 .int JMPTBL(L(Fill6), L(FillTable))
1594 .int JMPTBL(L(Fill7), L(FillTable))
1595 .int JMPTBL(L(Fill8), L(FillTable))
1596 .int JMPTBL(L(Fill9), L(FillTable))
1597 .int JMPTBL(L(Fill10), L(FillTable))
1598 .int JMPTBL(L(Fill11), L(FillTable))
1599 .int JMPTBL(L(Fill12), L(FillTable))
1600 .int JMPTBL(L(Fill13), L(FillTable))
1601 .int JMPTBL(L(Fill14), L(FillTable))
1602 .int JMPTBL(L(Fill15), L(FillTable))
1603 .int JMPTBL(L(Fill16), L(FillTable))
1607 # define RETURN POP (%edi); ret; CFI_PUSH (%edi)
1608 # define RETURN1 ret
1613 mov STR1(%esp), %edx
1614 mov STR2(%esp), %ecx
1656 movdqu (%ecx), %xmm1
1657 movdqu %xmm1, (%edx)
1658 pcmpeqb (%ebx), %xmm0
1659 pmovmskb %xmm0, %eax
1662 jnz L(CopyFrom1To16Bytes)
1672 movdqa (%ecx), %xmm1
1673 movaps 16(%ecx), %xmm2
1674 movdqu %xmm1, (%edx)
1675 pcmpeqb %xmm2, %xmm0
1676 pmovmskb %xmm0, %eax
1679 jnz L(CopyFrom1To16Bytes)
1681 movaps 16(%ecx, %ebx), %xmm3
1682 movdqu %xmm2, (%edx, %ebx)
1683 pcmpeqb %xmm3, %xmm0
1684 pmovmskb %xmm0, %eax
1687 jnz L(CopyFrom1To16Bytes)
1689 movaps 16(%ecx, %ebx), %xmm4
1690 movdqu %xmm3, (%edx, %ebx)
1691 pcmpeqb %xmm4, %xmm0
1692 pmovmskb %xmm0, %eax
1695 jnz L(CopyFrom1To16Bytes)
1697 movaps 16(%ecx, %ebx), %xmm1
1698 movdqu %xmm4, (%edx, %ebx)
1699 pcmpeqb %xmm1, %xmm0
1700 pmovmskb %xmm0, %eax
1703 jnz L(CopyFrom1To16Bytes)
1705 movaps 16(%ecx, %ebx), %xmm2
1706 movdqu %xmm1, (%edx, %ebx)
1707 pcmpeqb %xmm2, %xmm0
1708 pmovmskb %xmm0, %eax
1711 jnz L(CopyFrom1To16Bytes)
1713 movaps 16(%ecx, %ebx), %xmm3
1714 movdqu %xmm2, (%edx, %ebx)
1715 pcmpeqb %xmm3, %xmm0
1716 pmovmskb %xmm0, %eax
1719 jnz L(CopyFrom1To16Bytes)
1721 movdqu %xmm3, (%edx, %ebx)
1723 lea 16(%ecx, %ebx), %ecx
1729 movaps (%ecx), %xmm2
1731 movaps 16(%ecx), %xmm5
1732 movaps 32(%ecx), %xmm3
1734 movaps 48(%ecx), %xmm7
1740 pcmpeqb %xmm0, %xmm3
1741 pmovmskb %xmm3, %eax
1743 jnz L(Aligned64Leave)
1744 L(Aligned64Loop_start):
1745 movdqu %xmm4, -64(%edx)
1746 movaps (%ecx), %xmm2
1748 movdqu %xmm5, -48(%edx)
1749 movaps 16(%ecx), %xmm5
1751 movaps 32(%ecx), %xmm3
1752 movdqu %xmm6, -32(%edx)
1754 movdqu %xmm7, -16(%edx)
1755 movaps 48(%ecx), %xmm7
1758 pcmpeqb %xmm3, %xmm0
1759 pmovmskb %xmm0, %eax
1763 jz L(Aligned64Loop_start)
1767 pcmpeqb %xmm4, %xmm0
1768 pmovmskb %xmm0, %eax
1770 jnz L(CopyFrom1To16Bytes)
1772 pcmpeqb %xmm5, %xmm0
1773 pmovmskb %xmm0, %eax
1774 movdqu %xmm4, -64(%edx)
1777 jnz L(CopyFrom1To16Bytes)
1779 pcmpeqb %xmm6, %xmm0
1780 pmovmskb %xmm0, %eax
1781 movdqu %xmm5, -48(%edx)
1784 jnz L(CopyFrom1To16Bytes)
1786 movdqu %xmm6, -32(%edx)
1787 pcmpeqb %xmm7, %xmm0
1788 pmovmskb %xmm0, %eax
1791 /*-----------------End of main part---------------------------*/
1794 L(CopyFrom1To16Bytes):
1820 # ifdef USE_AS_STPCPY
1844 movlpd (%ecx), %xmm0
1845 movlpd %xmm0, (%edx)
1846 movlpd 8(%ecx), %xmm0
1847 movlpd %xmm0, 8(%edx)
1848 # ifdef USE_AS_STPCPY
1859 # ifdef USE_AS_STPCPY
1870 # ifdef USE_AS_STPCPY
1883 # ifdef USE_AS_STPCPY
1894 # ifdef USE_AS_STPCPY
1907 # ifdef USE_AS_STPCPY
1920 # ifdef USE_AS_STPCPY
1933 # ifdef USE_AS_STPCPY
1948 # ifdef USE_AS_STPCPY
1963 # ifdef USE_AS_STPCPY
1978 # ifdef USE_AS_STPCPY
1993 # ifdef USE_AS_STPCPY
2002 movlpd (%ecx), %xmm0
2003 movlpd %xmm0, (%edx)
2004 movlpd 5(%ecx), %xmm0
2005 movlpd %xmm0, 5(%edx)
2006 # ifdef USE_AS_STPCPY
2015 movlpd (%ecx), %xmm0
2016 movlpd %xmm0, (%edx)
2017 movlpd 6(%ecx), %xmm0
2018 movlpd %xmm0, 6(%edx)
2019 # ifdef USE_AS_STPCPY
2028 movlpd (%ecx), %xmm0
2029 movlpd %xmm0, (%edx)
2030 movlpd 7(%ecx), %xmm0
2031 movlpd %xmm0, 7(%edx)
2032 # ifdef USE_AS_STPCPY
2052 # ifdef USE_AS_STPCPY
2065 # ifdef USE_AS_STPCPY
2076 # ifdef USE_AS_STPCPY
2089 # ifdef USE_AS_STPCPY
2102 # ifdef USE_AS_STPCPY
2115 # ifdef USE_AS_STPCPY
2128 # ifdef USE_AS_STPCPY
2143 # ifdef USE_AS_STPCPY
2158 # ifdef USE_AS_STPCPY
2173 # ifdef USE_AS_STPCPY
2188 # ifdef USE_AS_STPCPY
2197 movlpd (%ecx), %xmm0
2198 movlpd %xmm0, (%edx)
2199 movlpd 5(%ecx), %xmm0
2200 movlpd %xmm0, 5(%edx)
2201 # ifdef USE_AS_STPCPY
2210 movlpd (%ecx), %xmm0
2211 movlpd %xmm0, (%edx)
2212 movlpd 6(%ecx), %xmm0
2213 movlpd %xmm0, 6(%edx)
2214 # ifdef USE_AS_STPCPY
2223 movlpd (%ecx), %xmm0
2224 movlpd %xmm0, (%edx)
2225 movlpd 7(%ecx), %xmm0
2226 movlpd %xmm0, 7(%edx)
2227 # ifdef USE_AS_STPCPY
2236 movlpd (%ecx), %xmm0
2237 movlpd %xmm0, (%edx)
2238 movlpd 8(%ecx), %xmm0
2239 movlpd %xmm0, 8(%edx)
2240 # ifdef USE_AS_STPCPY