2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
38 # define STRCPY __strcpy_ssse3
41 # ifdef USE_AS_STRNCPY
43 # define ENTRANCE PUSH(%ebx)
44 # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
45 # define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
50 # define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
57 /* In this code following instructions are used for copying:
62 movaps - 16 byte - requires 16 byte alignment
63 of sourse and destination adresses.
64 16 byte alignment: adress is 32bit value,
65 right four bit of adress shall be 0.
73 # ifdef USE_AS_STRNCPY
78 jbe L(StrncpyExit8Bytes)
96 # ifdef USE_AS_STRNCPY
98 jb L(StrncpyExit15Bytes)
114 # ifdef USE_AS_STRNCPY
124 # ifdef USE_AS_STRNCPY
128 /* add 16 bytes ecx_shift to ebx */
134 esi = alignment_16(ecx) + ecx_shift + 16;
135 ecx_shift = ecx - alignment_16(ecx)
139 esi = alignment_16(ecx) + 16
145 look if there is zero symbol in next 16 bytes of string
146 from esi to esi + 15 and form mask in xmm0
148 pcmpeqb (%esi), %xmm0
149 movlpd 8(%ecx), %xmm1
150 movlpd %xmm1, 8(%edx)
152 /* convert byte mask in xmm0 to bit mask */
157 /* esi = 16 - ecx_shift */
159 /* eax = 0: there isn't end of string from position esi to esi+15 */
161 # ifdef USE_AS_STRNCPY
163 jbe L(CopyFrom1To16BytesCase2OrCase3)
166 jnz L(CopyFrom1To16Bytes)
171 edx = edx + 16 = alignment_16(edx) + edx_shift + 16
175 /* Now: edx = alignment_16(edx) + 16 */
179 /* Now: eax = edx_shift - 16 */
181 # ifdef USE_AS_STRNCPY
193 case ecx_shift >= edx_shift:
194 ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16
195 case ecx_shift < edx_shift:
196 ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift)
201 case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift
202 case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift)
203 eax can be 0, 1, ..., 15
207 /* case: ecx_shift == edx_shift */
245 movaps 16(%ecx), %xmm2
250 # ifdef USE_AS_STRNCPY
252 jbe L(CopyFrom1To16BytesCase2OrCase3)
255 jnz L(CopyFrom1To16Bytes)
257 movaps 16(%ecx, %esi), %xmm3
258 movaps %xmm2, (%edx, %esi)
262 # ifdef USE_AS_STRNCPY
264 jbe L(CopyFrom1To16BytesCase2OrCase3)
267 jnz L(CopyFrom1To16Bytes)
269 movaps 16(%ecx, %esi), %xmm4
270 movaps %xmm3, (%edx, %esi)
274 # ifdef USE_AS_STRNCPY
276 jbe L(CopyFrom1To16BytesCase2OrCase3)
279 jnz L(CopyFrom1To16Bytes)
281 movaps 16(%ecx, %esi), %xmm1
282 movaps %xmm4, (%edx, %esi)
286 # ifdef USE_AS_STRNCPY
288 jbe L(CopyFrom1To16BytesCase2OrCase3)
291 jnz L(CopyFrom1To16Bytes)
293 movaps 16(%ecx, %esi), %xmm2
294 movaps %xmm1, (%edx, %esi)
298 # ifdef USE_AS_STRNCPY
300 jbe L(CopyFrom1To16BytesCase2OrCase3)
303 jnz L(CopyFrom1To16Bytes)
305 movaps 16(%ecx, %esi), %xmm3
306 movaps %xmm2, (%edx, %esi)
310 # ifdef USE_AS_STRNCPY
312 jbe L(CopyFrom1To16BytesCase2OrCase3)
315 jnz L(CopyFrom1To16Bytes)
317 movaps %xmm3, (%edx, %esi)
319 lea 16(%ecx, %esi), %ecx
323 # ifdef USE_AS_STRNCPY
324 lea 48+64(%ebx, %eax), %ebx
330 movaps 32(%ecx), %xmm3
332 movaps 16(%ecx), %xmm5
334 movaps 48(%ecx), %xmm7
342 # ifdef USE_AS_STRNCPY
344 jbe L(StrncpyLeaveCase2OrCase3)
347 jnz L(Aligned64Leave)
348 movaps %xmm4, -64(%edx)
349 movaps %xmm5, -48(%edx)
350 movaps %xmm6, -32(%edx)
351 movaps %xmm7, -16(%edx)
355 # ifdef USE_AS_STRNCPY
361 jnz L(CopyFrom1To16Bytes)
364 # ifdef USE_AS_STRNCPY
368 movaps %xmm4, -64(%edx)
371 jnz L(CopyFrom1To16Bytes)
374 # ifdef USE_AS_STRNCPY
378 movaps %xmm5, -48(%edx)
381 jnz L(CopyFrom1To16Bytes)
383 movaps %xmm6, -32(%edx)
385 # ifdef USE_AS_STRNCPY
390 jmp L(CopyFrom1To16Bytes)
394 movaps -1(%ecx), %xmm1
395 movaps 15(%ecx), %xmm2
400 # ifdef USE_AS_STRNCPY
402 jbe L(StrncpyExit1Case2OrCase3)
407 palignr $1, %xmm1, %xmm2
410 movaps 31(%ecx), %xmm2
417 # ifdef USE_AS_STRNCPY
419 jbe L(StrncpyExit1Case2OrCase3)
424 palignr $1, %xmm1, %xmm2
426 movaps 31(%ecx), %xmm2
434 # ifdef USE_AS_STRNCPY
436 jbe L(StrncpyExit1Case2OrCase3)
441 palignr $1, %xmm1, %xmm2
444 movaps 31(%ecx), %xmm2
451 # ifdef USE_AS_STRNCPY
453 jbe L(StrncpyExit1Case2OrCase3)
458 palignr $1, %xmm1, %xmm2
469 # ifdef USE_AS_STRNCPY
472 movaps -1(%ecx), %xmm1
475 movaps 15(%ecx), %xmm2
476 movaps 31(%ecx), %xmm3
478 movaps 47(%ecx), %xmm4
480 movaps 63(%ecx), %xmm5
487 palignr $1, %xmm4, %xmm5
489 palignr $1, %xmm3, %xmm4
491 # ifdef USE_AS_STRNCPY
495 palignr $1, %xmm2, %xmm3
497 palignr $1, %xmm1, %xmm2
499 movaps %xmm5, 48(%edx)
500 movaps %xmm4, 32(%edx)
501 movaps %xmm3, 16(%edx)
510 palignr $1, %xmm1, %xmm6
512 jmp L(CopyFrom1To16Bytes)
516 movaps -2(%ecx), %xmm1
517 movaps 14(%ecx), %xmm2
522 # ifdef USE_AS_STRNCPY
524 jbe L(StrncpyExit2Case2OrCase3)
529 palignr $2, %xmm1, %xmm2
532 movaps 30(%ecx), %xmm2
539 # ifdef USE_AS_STRNCPY
541 jbe L(StrncpyExit2Case2OrCase3)
546 palignr $2, %xmm1, %xmm2
548 movaps 30(%ecx), %xmm2
556 # ifdef USE_AS_STRNCPY
558 jbe L(StrncpyExit2Case2OrCase3)
563 palignr $2, %xmm1, %xmm2
566 movaps 30(%ecx), %xmm2
573 # ifdef USE_AS_STRNCPY
575 jbe L(StrncpyExit2Case2OrCase3)
580 palignr $2, %xmm1, %xmm2
591 # ifdef USE_AS_STRNCPY
594 movaps -2(%ecx), %xmm1
597 movaps 14(%ecx), %xmm2
598 movaps 30(%ecx), %xmm3
600 movaps 46(%ecx), %xmm4
602 movaps 62(%ecx), %xmm5
609 palignr $2, %xmm4, %xmm5
611 palignr $2, %xmm3, %xmm4
613 # ifdef USE_AS_STRNCPY
617 palignr $2, %xmm2, %xmm3
619 palignr $2, %xmm1, %xmm2
621 movaps %xmm5, 48(%edx)
622 movaps %xmm4, 32(%edx)
623 movaps %xmm3, 16(%edx)
632 palignr $2, %xmm1, %xmm6
634 jmp L(CopyFrom1To16Bytes)
638 movaps -3(%ecx), %xmm1
639 movaps 13(%ecx), %xmm2
644 # ifdef USE_AS_STRNCPY
646 jbe L(StrncpyExit3Case2OrCase3)
651 palignr $3, %xmm1, %xmm2
654 movaps 29(%ecx), %xmm2
661 # ifdef USE_AS_STRNCPY
663 jbe L(StrncpyExit3Case2OrCase3)
668 palignr $3, %xmm1, %xmm2
670 movaps 29(%ecx), %xmm2
678 # ifdef USE_AS_STRNCPY
680 jbe L(StrncpyExit3Case2OrCase3)
685 palignr $3, %xmm1, %xmm2
688 movaps 29(%ecx), %xmm2
695 # ifdef USE_AS_STRNCPY
697 jbe L(StrncpyExit3Case2OrCase3)
702 palignr $3, %xmm1, %xmm2
713 # ifdef USE_AS_STRNCPY
716 movaps -3(%ecx), %xmm1
719 movaps 13(%ecx), %xmm2
720 movaps 29(%ecx), %xmm3
722 movaps 45(%ecx), %xmm4
724 movaps 61(%ecx), %xmm5
731 palignr $3, %xmm4, %xmm5
733 palignr $3, %xmm3, %xmm4
735 # ifdef USE_AS_STRNCPY
739 palignr $3, %xmm2, %xmm3
741 palignr $3, %xmm1, %xmm2
743 movaps %xmm5, 48(%edx)
744 movaps %xmm4, 32(%edx)
745 movaps %xmm3, 16(%edx)
754 palignr $3, %xmm1, %xmm6
756 jmp L(CopyFrom1To16Bytes)
760 movaps -4(%ecx), %xmm1
761 movaps 12(%ecx), %xmm2
766 # ifdef USE_AS_STRNCPY
768 jbe L(StrncpyExit4Case2OrCase3)
773 palignr $4, %xmm1, %xmm2
776 movaps 28(%ecx), %xmm2
783 # ifdef USE_AS_STRNCPY
785 jbe L(StrncpyExit4Case2OrCase3)
790 palignr $4, %xmm1, %xmm2
792 movaps 28(%ecx), %xmm2
800 # ifdef USE_AS_STRNCPY
802 jbe L(StrncpyExit4Case2OrCase3)
807 palignr $4, %xmm1, %xmm2
810 movaps 28(%ecx), %xmm2
817 # ifdef USE_AS_STRNCPY
819 jbe L(StrncpyExit4Case2OrCase3)
824 palignr $4, %xmm1, %xmm2
835 # ifdef USE_AS_STRNCPY
838 movaps -4(%ecx), %xmm1
841 movaps 12(%ecx), %xmm2
842 movaps 28(%ecx), %xmm3
844 movaps 44(%ecx), %xmm4
846 movaps 60(%ecx), %xmm5
853 palignr $4, %xmm4, %xmm5
855 palignr $4, %xmm3, %xmm4
857 # ifdef USE_AS_STRNCPY
861 palignr $4, %xmm2, %xmm3
863 palignr $4, %xmm1, %xmm2
865 movaps %xmm5, 48(%edx)
866 movaps %xmm4, 32(%edx)
867 movaps %xmm3, 16(%edx)
876 palignr $4, %xmm1, %xmm6
878 jmp L(CopyFrom1To16Bytes)
882 movaps -5(%ecx), %xmm1
883 movaps 11(%ecx), %xmm2
888 # ifdef USE_AS_STRNCPY
890 jbe L(StrncpyExit5Case2OrCase3)
895 palignr $5, %xmm1, %xmm2
898 movaps 27(%ecx), %xmm2
905 # ifdef USE_AS_STRNCPY
907 jbe L(StrncpyExit5Case2OrCase3)
912 palignr $5, %xmm1, %xmm2
914 movaps 27(%ecx), %xmm2
922 # ifdef USE_AS_STRNCPY
924 jbe L(StrncpyExit5Case2OrCase3)
929 palignr $5, %xmm1, %xmm2
932 movaps 27(%ecx), %xmm2
939 # ifdef USE_AS_STRNCPY
941 jbe L(StrncpyExit5Case2OrCase3)
946 palignr $5, %xmm1, %xmm2
957 # ifdef USE_AS_STRNCPY
960 movaps -5(%ecx), %xmm1
963 movaps 11(%ecx), %xmm2
964 movaps 27(%ecx), %xmm3
966 movaps 43(%ecx), %xmm4
968 movaps 59(%ecx), %xmm5
975 palignr $5, %xmm4, %xmm5
977 palignr $5, %xmm3, %xmm4
979 # ifdef USE_AS_STRNCPY
983 palignr $5, %xmm2, %xmm3
985 palignr $5, %xmm1, %xmm2
987 movaps %xmm5, 48(%edx)
988 movaps %xmm4, 32(%edx)
989 movaps %xmm3, 16(%edx)
998 palignr $5, %xmm1, %xmm6
1000 jmp L(CopyFrom1To16Bytes)
1004 movaps -6(%ecx), %xmm1
1005 movaps 10(%ecx), %xmm2
1007 pcmpeqb %xmm2, %xmm0
1008 pmovmskb %xmm0, %eax
1010 # ifdef USE_AS_STRNCPY
1012 jbe L(StrncpyExit6Case2OrCase3)
1017 palignr $6, %xmm1, %xmm2
1019 movaps %xmm2, (%edx)
1020 movaps 26(%ecx), %xmm2
1022 pcmpeqb %xmm2, %xmm0
1024 pmovmskb %xmm0, %eax
1027 # ifdef USE_AS_STRNCPY
1029 jbe L(StrncpyExit6Case2OrCase3)
1034 palignr $6, %xmm1, %xmm2
1035 movaps %xmm2, (%edx)
1036 movaps 26(%ecx), %xmm2
1039 pcmpeqb %xmm2, %xmm0
1041 pmovmskb %xmm0, %eax
1044 # ifdef USE_AS_STRNCPY
1046 jbe L(StrncpyExit6Case2OrCase3)
1051 palignr $6, %xmm1, %xmm2
1053 movaps %xmm2, (%edx)
1054 movaps 26(%ecx), %xmm2
1056 pcmpeqb %xmm2, %xmm0
1058 pmovmskb %xmm0, %eax
1061 # ifdef USE_AS_STRNCPY
1063 jbe L(StrncpyExit6Case2OrCase3)
1068 palignr $6, %xmm1, %xmm2
1070 movaps %xmm2, (%edx)
1079 # ifdef USE_AS_STRNCPY
1082 movaps -6(%ecx), %xmm1
1085 movaps 10(%ecx), %xmm2
1086 movaps 26(%ecx), %xmm3
1088 movaps 42(%ecx), %xmm4
1090 movaps 58(%ecx), %xmm5
1094 pcmpeqb %xmm0, %xmm7
1095 pmovmskb %xmm7, %eax
1097 palignr $6, %xmm4, %xmm5
1099 palignr $6, %xmm3, %xmm4
1101 # ifdef USE_AS_STRNCPY
1103 jbe L(StrncpyLeave6)
1105 palignr $6, %xmm2, %xmm3
1107 palignr $6, %xmm1, %xmm2
1109 movaps %xmm5, 48(%edx)
1110 movaps %xmm4, 32(%edx)
1111 movaps %xmm3, 16(%edx)
1112 movaps %xmm2, (%edx)
1114 jmp L(Shl6LoopStart)
1117 movaps (%edx), %xmm6
1120 palignr $6, %xmm1, %xmm6
1121 movaps %xmm6, (%edx)
1122 jmp L(CopyFrom1To16Bytes)
1126 movaps -7(%ecx), %xmm1
1127 movaps 9(%ecx), %xmm2
1129 pcmpeqb %xmm2, %xmm0
1130 pmovmskb %xmm0, %eax
1132 # ifdef USE_AS_STRNCPY
1134 jbe L(StrncpyExit7Case2OrCase3)
1139 palignr $7, %xmm1, %xmm2
1141 movaps %xmm2, (%edx)
1142 movaps 25(%ecx), %xmm2
1144 pcmpeqb %xmm2, %xmm0
1146 pmovmskb %xmm0, %eax
1149 # ifdef USE_AS_STRNCPY
1151 jbe L(StrncpyExit7Case2OrCase3)
1156 palignr $7, %xmm1, %xmm2
1157 movaps %xmm2, (%edx)
1158 movaps 25(%ecx), %xmm2
1161 pcmpeqb %xmm2, %xmm0
1163 pmovmskb %xmm0, %eax
1166 # ifdef USE_AS_STRNCPY
1168 jbe L(StrncpyExit7Case2OrCase3)
1173 palignr $7, %xmm1, %xmm2
1175 movaps %xmm2, (%edx)
1176 movaps 25(%ecx), %xmm2
1178 pcmpeqb %xmm2, %xmm0
1180 pmovmskb %xmm0, %eax
1183 # ifdef USE_AS_STRNCPY
1185 jbe L(StrncpyExit7Case2OrCase3)
1190 palignr $7, %xmm1, %xmm2
1192 movaps %xmm2, (%edx)
1201 # ifdef USE_AS_STRNCPY
1204 movaps -7(%ecx), %xmm1
1207 movaps 9(%ecx), %xmm2
1208 movaps 25(%ecx), %xmm3
1210 movaps 41(%ecx), %xmm4
1212 movaps 57(%ecx), %xmm5
1216 pcmpeqb %xmm0, %xmm7
1217 pmovmskb %xmm7, %eax
1219 palignr $7, %xmm4, %xmm5
1221 palignr $7, %xmm3, %xmm4
1223 # ifdef USE_AS_STRNCPY
1225 jbe L(StrncpyLeave7)
1227 palignr $7, %xmm2, %xmm3
1229 palignr $7, %xmm1, %xmm2
1231 movaps %xmm5, 48(%edx)
1232 movaps %xmm4, 32(%edx)
1233 movaps %xmm3, 16(%edx)
1234 movaps %xmm2, (%edx)
1236 jmp L(Shl7LoopStart)
1239 movaps (%edx), %xmm6
1242 palignr $7, %xmm1, %xmm6
1243 movaps %xmm6, (%edx)
1244 jmp L(CopyFrom1To16Bytes)
1248 movaps -8(%ecx), %xmm1
1249 movaps 8(%ecx), %xmm2
1251 pcmpeqb %xmm2, %xmm0
1252 pmovmskb %xmm0, %eax
1254 # ifdef USE_AS_STRNCPY
1256 jbe L(StrncpyExit8Case2OrCase3)
1261 palignr $8, %xmm1, %xmm2
1263 movaps %xmm2, (%edx)
1264 movaps 24(%ecx), %xmm2
1266 pcmpeqb %xmm2, %xmm0
1268 pmovmskb %xmm0, %eax
1271 # ifdef USE_AS_STRNCPY
1273 jbe L(StrncpyExit8Case2OrCase3)
1278 palignr $8, %xmm1, %xmm2
1279 movaps %xmm2, (%edx)
1280 movaps 24(%ecx), %xmm2
1283 pcmpeqb %xmm2, %xmm0
1285 pmovmskb %xmm0, %eax
1288 # ifdef USE_AS_STRNCPY
1290 jbe L(StrncpyExit8Case2OrCase3)
1295 palignr $8, %xmm1, %xmm2
1297 movaps %xmm2, (%edx)
1298 movaps 24(%ecx), %xmm2
1300 pcmpeqb %xmm2, %xmm0
1302 pmovmskb %xmm0, %eax
1305 # ifdef USE_AS_STRNCPY
1307 jbe L(StrncpyExit8Case2OrCase3)
1312 palignr $8, %xmm1, %xmm2
1314 movaps %xmm2, (%edx)
1323 # ifdef USE_AS_STRNCPY
1326 movaps -8(%ecx), %xmm1
1329 movaps 8(%ecx), %xmm2
1330 movaps 24(%ecx), %xmm3
1332 movaps 40(%ecx), %xmm4
1334 movaps 56(%ecx), %xmm5
1338 pcmpeqb %xmm0, %xmm7
1339 pmovmskb %xmm7, %eax
1341 palignr $8, %xmm4, %xmm5
1343 palignr $8, %xmm3, %xmm4
1345 # ifdef USE_AS_STRNCPY
1347 jbe L(StrncpyLeave8)
1349 palignr $8, %xmm2, %xmm3
1351 palignr $8, %xmm1, %xmm2
1353 movaps %xmm5, 48(%edx)
1354 movaps %xmm4, 32(%edx)
1355 movaps %xmm3, 16(%edx)
1356 movaps %xmm2, (%edx)
1358 jmp L(Shl8LoopStart)
1361 movaps (%edx), %xmm6
1364 palignr $8, %xmm1, %xmm6
1365 movaps %xmm6, (%edx)
1366 jmp L(CopyFrom1To16Bytes)
1370 movaps -9(%ecx), %xmm1
1371 movaps 7(%ecx), %xmm2
1373 pcmpeqb %xmm2, %xmm0
1374 pmovmskb %xmm0, %eax
1376 # ifdef USE_AS_STRNCPY
1378 jbe L(StrncpyExit9Case2OrCase3)
1383 palignr $9, %xmm1, %xmm2
1385 movaps %xmm2, (%edx)
1386 movaps 23(%ecx), %xmm2
1388 pcmpeqb %xmm2, %xmm0
1390 pmovmskb %xmm0, %eax
1393 # ifdef USE_AS_STRNCPY
1395 jbe L(StrncpyExit9Case2OrCase3)
1400 palignr $9, %xmm1, %xmm2
1401 movaps %xmm2, (%edx)
1402 movaps 23(%ecx), %xmm2
1405 pcmpeqb %xmm2, %xmm0
1407 pmovmskb %xmm0, %eax
1410 # ifdef USE_AS_STRNCPY
1412 jbe L(StrncpyExit9Case2OrCase3)
1417 palignr $9, %xmm1, %xmm2
1419 movaps %xmm2, (%edx)
1420 movaps 23(%ecx), %xmm2
1422 pcmpeqb %xmm2, %xmm0
1424 pmovmskb %xmm0, %eax
1427 # ifdef USE_AS_STRNCPY
1429 jbe L(StrncpyExit9Case2OrCase3)
1434 palignr $9, %xmm1, %xmm2
1436 movaps %xmm2, (%edx)
1445 # ifdef USE_AS_STRNCPY
1448 movaps -9(%ecx), %xmm1
1451 movaps 7(%ecx), %xmm2
1452 movaps 23(%ecx), %xmm3
1454 movaps 39(%ecx), %xmm4
1456 movaps 55(%ecx), %xmm5
1460 pcmpeqb %xmm0, %xmm7
1461 pmovmskb %xmm7, %eax
1463 palignr $9, %xmm4, %xmm5
1465 palignr $9, %xmm3, %xmm4
1467 # ifdef USE_AS_STRNCPY
1469 jbe L(StrncpyLeave9)
1471 palignr $9, %xmm2, %xmm3
1473 palignr $9, %xmm1, %xmm2
1475 movaps %xmm5, 48(%edx)
1476 movaps %xmm4, 32(%edx)
1477 movaps %xmm3, 16(%edx)
1478 movaps %xmm2, (%edx)
1480 jmp L(Shl9LoopStart)
1483 movaps (%edx), %xmm6
1486 palignr $9, %xmm1, %xmm6
1487 movaps %xmm6, (%edx)
1488 jmp L(CopyFrom1To16Bytes)
1492 movaps -10(%ecx), %xmm1
1493 movaps 6(%ecx), %xmm2
1495 pcmpeqb %xmm2, %xmm0
1496 pmovmskb %xmm0, %eax
1498 # ifdef USE_AS_STRNCPY
1500 jbe L(StrncpyExit10Case2OrCase3)
1503 jnz L(Shl10LoopExit)
1505 palignr $10, %xmm1, %xmm2
1507 movaps %xmm2, (%edx)
1508 movaps 22(%ecx), %xmm2
1510 pcmpeqb %xmm2, %xmm0
1512 pmovmskb %xmm0, %eax
1515 # ifdef USE_AS_STRNCPY
1517 jbe L(StrncpyExit10Case2OrCase3)
1520 jnz L(Shl10LoopExit)
1522 palignr $10, %xmm1, %xmm2
1523 movaps %xmm2, (%edx)
1524 movaps 22(%ecx), %xmm2
1527 pcmpeqb %xmm2, %xmm0
1529 pmovmskb %xmm0, %eax
1532 # ifdef USE_AS_STRNCPY
1534 jbe L(StrncpyExit10Case2OrCase3)
1537 jnz L(Shl10LoopExit)
1539 palignr $10, %xmm1, %xmm2
1541 movaps %xmm2, (%edx)
1542 movaps 22(%ecx), %xmm2
1544 pcmpeqb %xmm2, %xmm0
1546 pmovmskb %xmm0, %eax
1549 # ifdef USE_AS_STRNCPY
1551 jbe L(StrncpyExit10Case2OrCase3)
1554 jnz L(Shl10LoopExit)
1556 palignr $10, %xmm1, %xmm2
1558 movaps %xmm2, (%edx)
1567 # ifdef USE_AS_STRNCPY
1570 movaps -10(%ecx), %xmm1
1573 movaps 6(%ecx), %xmm2
1574 movaps 22(%ecx), %xmm3
1576 movaps 38(%ecx), %xmm4
1578 movaps 54(%ecx), %xmm5
1582 pcmpeqb %xmm0, %xmm7
1583 pmovmskb %xmm7, %eax
1585 palignr $10, %xmm4, %xmm5
1587 palignr $10, %xmm3, %xmm4
1589 # ifdef USE_AS_STRNCPY
1591 jbe L(StrncpyLeave10)
1593 palignr $10, %xmm2, %xmm3
1595 palignr $10, %xmm1, %xmm2
1597 movaps %xmm5, 48(%edx)
1598 movaps %xmm4, 32(%edx)
1599 movaps %xmm3, 16(%edx)
1600 movaps %xmm2, (%edx)
1602 jmp L(Shl10LoopStart)
1605 movaps (%edx), %xmm6
1608 palignr $10, %xmm1, %xmm6
1609 movaps %xmm6, (%edx)
1610 jmp L(CopyFrom1To16Bytes)
1614 movaps -11(%ecx), %xmm1
1615 movaps 5(%ecx), %xmm2
1617 pcmpeqb %xmm2, %xmm0
1618 pmovmskb %xmm0, %eax
1620 # ifdef USE_AS_STRNCPY
1622 jbe L(StrncpyExit11Case2OrCase3)
1625 jnz L(Shl11LoopExit)
1627 palignr $11, %xmm1, %xmm2
1629 movaps %xmm2, (%edx)
1630 movaps 21(%ecx), %xmm2
1632 pcmpeqb %xmm2, %xmm0
1634 pmovmskb %xmm0, %eax
1637 # ifdef USE_AS_STRNCPY
1639 jbe L(StrncpyExit11Case2OrCase3)
1642 jnz L(Shl11LoopExit)
1644 palignr $11, %xmm1, %xmm2
1645 movaps %xmm2, (%edx)
1646 movaps 21(%ecx), %xmm2
1649 pcmpeqb %xmm2, %xmm0
1651 pmovmskb %xmm0, %eax
1654 # ifdef USE_AS_STRNCPY
1656 jbe L(StrncpyExit11Case2OrCase3)
1659 jnz L(Shl11LoopExit)
1661 palignr $11, %xmm1, %xmm2
1663 movaps %xmm2, (%edx)
1664 movaps 21(%ecx), %xmm2
1666 pcmpeqb %xmm2, %xmm0
1668 pmovmskb %xmm0, %eax
1671 # ifdef USE_AS_STRNCPY
1673 jbe L(StrncpyExit11Case2OrCase3)
1676 jnz L(Shl11LoopExit)
1678 palignr $11, %xmm1, %xmm2
1680 movaps %xmm2, (%edx)
1689 # ifdef USE_AS_STRNCPY
1692 movaps -11(%ecx), %xmm1
1695 movaps 5(%ecx), %xmm2
1696 movaps 21(%ecx), %xmm3
1698 movaps 37(%ecx), %xmm4
1700 movaps 53(%ecx), %xmm5
1704 pcmpeqb %xmm0, %xmm7
1705 pmovmskb %xmm7, %eax
1707 palignr $11, %xmm4, %xmm5
1709 palignr $11, %xmm3, %xmm4
1711 # ifdef USE_AS_STRNCPY
1713 jbe L(StrncpyLeave11)
1715 palignr $11, %xmm2, %xmm3
1717 palignr $11, %xmm1, %xmm2
1719 movaps %xmm5, 48(%edx)
1720 movaps %xmm4, 32(%edx)
1721 movaps %xmm3, 16(%edx)
1722 movaps %xmm2, (%edx)
1724 jmp L(Shl11LoopStart)
1727 movaps (%edx), %xmm6
1730 palignr $11, %xmm1, %xmm6
1731 movaps %xmm6, (%edx)
1732 jmp L(CopyFrom1To16Bytes)
1736 movaps -12(%ecx), %xmm1
1737 movaps 4(%ecx), %xmm2
1739 pcmpeqb %xmm2, %xmm0
1740 pmovmskb %xmm0, %eax
1742 # ifdef USE_AS_STRNCPY
1744 jbe L(StrncpyExit12Case2OrCase3)
1747 jnz L(Shl12LoopExit)
1749 palignr $12, %xmm1, %xmm2
1751 movaps %xmm2, (%edx)
1752 movaps 20(%ecx), %xmm2
1754 pcmpeqb %xmm2, %xmm0
1756 pmovmskb %xmm0, %eax
1759 # ifdef USE_AS_STRNCPY
1761 jbe L(StrncpyExit12Case2OrCase3)
1764 jnz L(Shl12LoopExit)
1766 palignr $12, %xmm1, %xmm2
1767 movaps %xmm2, (%edx)
1768 movaps 20(%ecx), %xmm2
1771 pcmpeqb %xmm2, %xmm0
1773 pmovmskb %xmm0, %eax
1776 # ifdef USE_AS_STRNCPY
1778 jbe L(StrncpyExit12Case2OrCase3)
1781 jnz L(Shl12LoopExit)
1783 palignr $12, %xmm1, %xmm2
1785 movaps %xmm2, (%edx)
1786 movaps 20(%ecx), %xmm2
1788 pcmpeqb %xmm2, %xmm0
1790 pmovmskb %xmm0, %eax
1793 # ifdef USE_AS_STRNCPY
1795 jbe L(StrncpyExit12Case2OrCase3)
1798 jnz L(Shl12LoopExit)
1800 palignr $12, %xmm1, %xmm2
1802 movaps %xmm2, (%edx)
1811 # ifdef USE_AS_STRNCPY
1814 movaps -12(%ecx), %xmm1
1817 movaps 4(%ecx), %xmm2
1818 movaps 20(%ecx), %xmm3
1820 movaps 36(%ecx), %xmm4
1822 movaps 52(%ecx), %xmm5
1826 pcmpeqb %xmm0, %xmm7
1827 pmovmskb %xmm7, %eax
1829 palignr $12, %xmm4, %xmm5
1831 palignr $12, %xmm3, %xmm4
1833 # ifdef USE_AS_STRNCPY
1835 jbe L(StrncpyLeave12)
1837 palignr $12, %xmm2, %xmm3
1839 palignr $12, %xmm1, %xmm2
1841 movaps %xmm5, 48(%edx)
1842 movaps %xmm4, 32(%edx)
1843 movaps %xmm3, 16(%edx)
1844 movaps %xmm2, (%edx)
1846 jmp L(Shl12LoopStart)
1849 movaps (%edx), %xmm6
1852 palignr $12, %xmm1, %xmm6
1853 movaps %xmm6, (%edx)
1854 jmp L(CopyFrom1To16Bytes)
1858 movaps -13(%ecx), %xmm1
1859 movaps 3(%ecx), %xmm2
1861 pcmpeqb %xmm2, %xmm0
1862 pmovmskb %xmm0, %eax
1864 # ifdef USE_AS_STRNCPY
1866 jbe L(StrncpyExit13Case2OrCase3)
1869 jnz L(Shl13LoopExit)
1871 palignr $13, %xmm1, %xmm2
1873 movaps %xmm2, (%edx)
1874 movaps 19(%ecx), %xmm2
1876 pcmpeqb %xmm2, %xmm0
1878 pmovmskb %xmm0, %eax
1881 # ifdef USE_AS_STRNCPY
1883 jbe L(StrncpyExit13Case2OrCase3)
1886 jnz L(Shl13LoopExit)
1888 palignr $13, %xmm1, %xmm2
1889 movaps %xmm2, (%edx)
1890 movaps 19(%ecx), %xmm2
1893 pcmpeqb %xmm2, %xmm0
1895 pmovmskb %xmm0, %eax
1898 # ifdef USE_AS_STRNCPY
1900 jbe L(StrncpyExit13Case2OrCase3)
1903 jnz L(Shl13LoopExit)
1905 palignr $13, %xmm1, %xmm2
1907 movaps %xmm2, (%edx)
1908 movaps 19(%ecx), %xmm2
1910 pcmpeqb %xmm2, %xmm0
1912 pmovmskb %xmm0, %eax
1915 # ifdef USE_AS_STRNCPY
1917 jbe L(StrncpyExit13Case2OrCase3)
1920 jnz L(Shl13LoopExit)
1922 palignr $13, %xmm1, %xmm2
1924 movaps %xmm2, (%edx)
1933 # ifdef USE_AS_STRNCPY
1936 movaps -13(%ecx), %xmm1
1939 movaps 3(%ecx), %xmm2
1940 movaps 19(%ecx), %xmm3
1942 movaps 35(%ecx), %xmm4
1944 movaps 51(%ecx), %xmm5
1948 pcmpeqb %xmm0, %xmm7
1949 pmovmskb %xmm7, %eax
1951 palignr $13, %xmm4, %xmm5
1953 palignr $13, %xmm3, %xmm4
1955 # ifdef USE_AS_STRNCPY
1957 jbe L(StrncpyLeave13)
1959 palignr $13, %xmm2, %xmm3
1961 palignr $13, %xmm1, %xmm2
1963 movaps %xmm5, 48(%edx)
1964 movaps %xmm4, 32(%edx)
1965 movaps %xmm3, 16(%edx)
1966 movaps %xmm2, (%edx)
1968 jmp L(Shl13LoopStart)
1971 movaps (%edx), %xmm6
1974 palignr $13, %xmm1, %xmm6
1975 movaps %xmm6, (%edx)
1976 jmp L(CopyFrom1To16Bytes)
1980 movaps -14(%ecx), %xmm1
1981 movaps 2(%ecx), %xmm2
1983 pcmpeqb %xmm2, %xmm0
1984 pmovmskb %xmm0, %eax
1986 # ifdef USE_AS_STRNCPY
1988 jbe L(StrncpyExit14Case2OrCase3)
1991 jnz L(Shl14LoopExit)
1993 palignr $14, %xmm1, %xmm2
1995 movaps %xmm2, (%edx)
1996 movaps 18(%ecx), %xmm2
1998 pcmpeqb %xmm2, %xmm0
2000 pmovmskb %xmm0, %eax
2003 # ifdef USE_AS_STRNCPY
2005 jbe L(StrncpyExit14Case2OrCase3)
2008 jnz L(Shl14LoopExit)
2010 palignr $14, %xmm1, %xmm2
2011 movaps %xmm2, (%edx)
2012 movaps 18(%ecx), %xmm2
2015 pcmpeqb %xmm2, %xmm0
2017 pmovmskb %xmm0, %eax
2020 # ifdef USE_AS_STRNCPY
2022 jbe L(StrncpyExit14Case2OrCase3)
2025 jnz L(Shl14LoopExit)
2027 palignr $14, %xmm1, %xmm2
2029 movaps %xmm2, (%edx)
2030 movaps 18(%ecx), %xmm2
2032 pcmpeqb %xmm2, %xmm0
2034 pmovmskb %xmm0, %eax
2037 # ifdef USE_AS_STRNCPY
2039 jbe L(StrncpyExit14Case2OrCase3)
2042 jnz L(Shl14LoopExit)
2044 palignr $14, %xmm1, %xmm2
2046 movaps %xmm2, (%edx)
2055 # ifdef USE_AS_STRNCPY
2058 movaps -14(%ecx), %xmm1
2061 movaps 2(%ecx), %xmm2
2062 movaps 18(%ecx), %xmm3
2064 movaps 34(%ecx), %xmm4
2066 movaps 50(%ecx), %xmm5
2070 pcmpeqb %xmm0, %xmm7
2071 pmovmskb %xmm7, %eax
2073 palignr $14, %xmm4, %xmm5
2075 palignr $14, %xmm3, %xmm4
2077 # ifdef USE_AS_STRNCPY
2079 jbe L(StrncpyLeave14)
2081 palignr $14, %xmm2, %xmm3
2083 palignr $14, %xmm1, %xmm2
2085 movaps %xmm5, 48(%edx)
2086 movaps %xmm4, 32(%edx)
2087 movaps %xmm3, 16(%edx)
2088 movaps %xmm2, (%edx)
2090 jmp L(Shl14LoopStart)
2093 movaps (%edx), %xmm6
2096 palignr $14, %xmm1, %xmm6
2097 movaps %xmm6, (%edx)
2098 jmp L(CopyFrom1To16Bytes)
2102 movaps -15(%ecx), %xmm1
2103 movaps 1(%ecx), %xmm2
2105 pcmpeqb %xmm2, %xmm0
2106 pmovmskb %xmm0, %eax
2108 # ifdef USE_AS_STRNCPY
2110 jbe L(StrncpyExit15Case2OrCase3)
2113 jnz L(Shl15LoopExit)
2115 palignr $15, %xmm1, %xmm2
2117 movaps %xmm2, (%edx)
2118 movaps 17(%ecx), %xmm2
2120 pcmpeqb %xmm2, %xmm0
2122 pmovmskb %xmm0, %eax
2125 # ifdef USE_AS_STRNCPY
2127 jbe L(StrncpyExit15Case2OrCase3)
2130 jnz L(Shl15LoopExit)
2132 palignr $15, %xmm1, %xmm2
2133 movaps %xmm2, (%edx)
2134 movaps 17(%ecx), %xmm2
2137 pcmpeqb %xmm2, %xmm0
2139 pmovmskb %xmm0, %eax
2142 # ifdef USE_AS_STRNCPY
2144 jbe L(StrncpyExit15Case2OrCase3)
2147 jnz L(Shl15LoopExit)
2149 palignr $15, %xmm1, %xmm2
2151 movaps %xmm2, (%edx)
2152 movaps 17(%ecx), %xmm2
2154 pcmpeqb %xmm2, %xmm0
2156 pmovmskb %xmm0, %eax
2159 # ifdef USE_AS_STRNCPY
2161 jbe L(StrncpyExit15Case2OrCase3)
2164 jnz L(Shl15LoopExit)
2166 palignr $15, %xmm1, %xmm2
2168 movaps %xmm2, (%edx)
2177 # ifdef USE_AS_STRNCPY
2180 movaps -15(%ecx), %xmm1
2183 movaps 1(%ecx), %xmm2
2184 movaps 17(%ecx), %xmm3
2186 movaps 33(%ecx), %xmm4
2188 movaps 49(%ecx), %xmm5
2192 pcmpeqb %xmm0, %xmm7
2193 pmovmskb %xmm7, %eax
2195 palignr $15, %xmm4, %xmm5
2197 palignr $15, %xmm3, %xmm4
2199 # ifdef USE_AS_STRNCPY
2201 jbe L(StrncpyLeave15)
2203 palignr $15, %xmm2, %xmm3
2205 palignr $15, %xmm1, %xmm2
2207 movaps %xmm5, 48(%edx)
2208 movaps %xmm4, 32(%edx)
2209 movaps %xmm3, 16(%edx)
2210 movaps %xmm2, (%edx)
2212 jmp L(Shl15LoopStart)
2215 movaps (%edx), %xmm6
2218 palignr $15, %xmm1, %xmm6
2219 movaps %xmm6, (%edx)
2222 L(CopyFrom1To16Bytes):
2223 # ifdef USE_AS_STRNCPY
2249 movlpd (%ecx), %xmm0
2250 movlpd %xmm0, (%edx)
2251 # ifdef USE_AS_STPCPY
2256 # ifdef USE_AS_STRNCPY
2259 jnz L(StrncpyFillTailWithZero1)
2260 # ifdef USE_AS_STPCPY
2286 movlpd (%ecx), %xmm0
2287 movlpd %xmm0, (%edx)
2288 movlpd 8(%ecx), %xmm0
2289 movlpd %xmm0, 8(%edx)
2290 # ifdef USE_AS_STPCPY
2295 # ifdef USE_AS_STRNCPY
2298 jnz L(StrncpyFillTailWithZero1)
2299 # ifdef USE_AS_STPCPY
2306 # ifdef USE_AS_STRNCPY
2311 L(CopyFrom1To16BytesCase2):
2314 lea (%esi, %edx), %esi
2387 L(CopyFrom1To16BytesCase2OrCase3):
2389 jnz L(CopyFrom1To16BytesCase2)
2392 L(CopyFrom1To16BytesCase3):
2410 L(More8Case3): /* but less than 16 */
2418 L(More4Case3): /* but less than 8 */
2423 L(Less12Case3): /* but more than 8 */
2434 # ifdef USE_AS_STPCPY
2439 # ifdef USE_AS_STRNCPY
2442 jnz L(StrncpyFillTailWithZero1)
2443 # ifdef USE_AS_STPCPY
2454 # ifdef USE_AS_STPCPY
2459 # ifdef USE_AS_STRNCPY
2462 jnz L(StrncpyFillTailWithZero1)
2463 # ifdef USE_AS_STPCPY
2476 # ifdef USE_AS_STPCPY
2481 # ifdef USE_AS_STRNCPY
2484 jnz L(StrncpyFillTailWithZero1)
2485 # ifdef USE_AS_STPCPY
2496 # ifdef USE_AS_STPCPY
2501 # ifdef USE_AS_STRNCPY
2504 jnz L(StrncpyFillTailWithZero1)
2505 # ifdef USE_AS_STPCPY
2518 # ifdef USE_AS_STPCPY
2523 # ifdef USE_AS_STRNCPY
2526 jnz L(StrncpyFillTailWithZero1)
2527 # ifdef USE_AS_STPCPY
2540 # ifdef USE_AS_STPCPY
2545 # ifdef USE_AS_STRNCPY
2548 jnz L(StrncpyFillTailWithZero1)
2549 # ifdef USE_AS_STPCPY
2562 # ifdef USE_AS_STPCPY
2567 # ifdef USE_AS_STRNCPY
2570 jnz L(StrncpyFillTailWithZero1)
2571 # ifdef USE_AS_STPCPY
2580 movlpd (%ecx), %xmm0
2581 movlpd %xmm0, (%edx)
2584 # ifdef USE_AS_STPCPY
2589 # ifdef USE_AS_STRNCPY
2592 jnz L(StrncpyFillTailWithZero1)
2593 # ifdef USE_AS_STPCPY
2602 movlpd (%ecx), %xmm0
2603 movlpd %xmm0, (%edx)
2606 # ifdef USE_AS_STPCPY
2611 # ifdef USE_AS_STRNCPY
2614 jnz L(StrncpyFillTailWithZero1)
2615 # ifdef USE_AS_STPCPY
2624 movlpd (%ecx), %xmm0
2625 movlpd %xmm0, (%edx)
2628 # ifdef USE_AS_STPCPY
2633 # ifdef USE_AS_STRNCPY
2636 jnz L(StrncpyFillTailWithZero1)
2637 # ifdef USE_AS_STPCPY
2646 movlpd (%ecx), %xmm0
2647 movlpd %xmm0, (%edx)
2650 # ifdef USE_AS_STPCPY
2655 # ifdef USE_AS_STRNCPY
2658 jnz L(StrncpyFillTailWithZero1)
2659 # ifdef USE_AS_STPCPY
2668 movlpd (%ecx), %xmm0
2669 movlpd %xmm0, (%edx)
2670 movlpd 5(%ecx), %xmm0
2671 movlpd %xmm0, 5(%edx)
2672 # ifdef USE_AS_STPCPY
2677 # ifdef USE_AS_STRNCPY
2680 jnz L(StrncpyFillTailWithZero1)
2681 # ifdef USE_AS_STPCPY
2690 movlpd (%ecx), %xmm0
2691 movlpd %xmm0, (%edx)
2692 movlpd 6(%ecx), %xmm0
2693 movlpd %xmm0, 6(%edx)
2694 # ifdef USE_AS_STPCPY
2699 # ifdef USE_AS_STRNCPY
2702 jnz L(StrncpyFillTailWithZero1)
2703 # ifdef USE_AS_STPCPY
2712 movlpd (%ecx), %xmm0
2713 movlpd %xmm0, (%edx)
2714 movlpd 7(%ecx), %xmm0
2715 movlpd %xmm0, 7(%edx)
2716 # ifdef USE_AS_STPCPY
2721 # ifdef USE_AS_STRNCPY
2724 jnz L(StrncpyFillTailWithZero1)
2725 # ifdef USE_AS_STPCPY
2734 # ifdef USE_AS_STRNCPY
2780 movlpd %xmm0, (%ecx)
2785 movlpd %xmm0, (%ecx)
2791 movlpd %xmm0, (%ecx)
2797 movlpd %xmm0, (%ecx)
2803 movlpd %xmm0, (%ecx)
2809 movlpd %xmm0, (%ecx)
2810 movlpd %xmm0, 5(%ecx)
2815 movlpd %xmm0, (%ecx)
2816 movlpd %xmm0, 6(%ecx)
2821 movlpd %xmm0, (%ecx)
2822 movlpd %xmm0, 7(%ecx)
2827 movlpd %xmm0, (%ecx)
2828 movlpd %xmm0, 8(%ecx)
2832 L(StrncpyFillExit1):
2834 L(FillFrom1To16Bytes):
2849 L(FillMore8): /* but less than 16 */
2857 L(FillMore4): /* but less than 8 */
2862 L(FillLess12): /* but more than 8 */
2871 L(StrncpyFillTailWithZero1):
2873 L(StrncpyFillTailWithZero):
2877 jbe L(StrncpyFillExit1)
2879 movlpd %xmm0, (%ecx)
2880 movlpd %xmm0, 8(%ecx)
2890 jb L(StrncpyFillLess64)
2892 L(StrncpyFillLoopMovdqa):
2893 movdqa %xmm0, (%ecx)
2894 movdqa %xmm0, 16(%ecx)
2895 movdqa %xmm0, 32(%ecx)
2896 movdqa %xmm0, 48(%ecx)
2899 jae L(StrncpyFillLoopMovdqa)
2901 L(StrncpyFillLess64):
2903 jl L(StrncpyFillLess32)
2904 movdqa %xmm0, (%ecx)
2905 movdqa %xmm0, 16(%ecx)
2908 jl L(StrncpyFillExit1)
2909 movdqa %xmm0, (%ecx)
2911 jmp L(FillFrom1To16Bytes)
2913 L(StrncpyFillLess32):
2915 jl L(StrncpyFillExit1)
2916 movdqa %xmm0, (%ecx)
2918 jmp L(FillFrom1To16Bytes)
2925 # ifdef USE_AS_STPCPY
2930 # ifdef USE_AS_STRNCPY
2933 jnz L(StrncpyFillTailWithZero)
2934 # ifdef USE_AS_STPCPY
2945 # ifdef USE_AS_STPCPY
2950 # ifdef USE_AS_STRNCPY
2953 jnz L(StrncpyFillTailWithZero)
2954 # ifdef USE_AS_STPCPY
2967 # ifdef USE_AS_STPCPY
2972 # ifdef USE_AS_STRNCPY
2975 jnz L(StrncpyFillTailWithZero)
2976 # ifdef USE_AS_STPCPY
2987 # ifdef USE_AS_STPCPY
2992 # ifdef USE_AS_STRNCPY
2995 jnz L(StrncpyFillTailWithZero)
2996 # ifdef USE_AS_STPCPY
3009 # ifdef USE_AS_STPCPY
3014 # ifdef USE_AS_STRNCPY
3017 jnz L(StrncpyFillTailWithZero)
3018 # ifdef USE_AS_STPCPY
3031 # ifdef USE_AS_STPCPY
3036 # ifdef USE_AS_STRNCPY
3039 jnz L(StrncpyFillTailWithZero)
3040 # ifdef USE_AS_STPCPY
3053 # ifdef USE_AS_STPCPY
3058 # ifdef USE_AS_STRNCPY
3061 jnz L(StrncpyFillTailWithZero)
3062 # ifdef USE_AS_STPCPY
3071 movlpd (%ecx), %xmm0
3072 movlpd %xmm0, (%edx)
3075 # ifdef USE_AS_STPCPY
3080 # ifdef USE_AS_STRNCPY
3083 jnz L(StrncpyFillTailWithZero)
3084 # ifdef USE_AS_STPCPY
3093 movlpd (%ecx), %xmm0
3094 movlpd %xmm0, (%edx)
3097 # ifdef USE_AS_STPCPY
3102 # ifdef USE_AS_STRNCPY
3105 jnz L(StrncpyFillTailWithZero)
3106 # ifdef USE_AS_STPCPY
3115 movlpd (%ecx), %xmm0
3116 movlpd %xmm0, (%edx)
3119 # ifdef USE_AS_STPCPY
3124 # ifdef USE_AS_STRNCPY
3127 jnz L(StrncpyFillTailWithZero)
3128 # ifdef USE_AS_STPCPY
3137 movlpd (%ecx), %xmm0
3138 movlpd %xmm0, (%edx)
3141 # ifdef USE_AS_STPCPY
3146 # ifdef USE_AS_STRNCPY
3149 jnz L(StrncpyFillTailWithZero)
3150 # ifdef USE_AS_STPCPY
3159 movlpd (%ecx), %xmm0
3160 movlpd %xmm0, (%edx)
3161 movlpd 5(%ecx), %xmm0
3162 movlpd %xmm0, 5(%edx)
3163 # ifdef USE_AS_STPCPY
3168 # ifdef USE_AS_STRNCPY
3171 jnz L(StrncpyFillTailWithZero)
3172 # ifdef USE_AS_STPCPY
3181 movlpd (%ecx), %xmm0
3182 movlpd %xmm0, (%edx)
3183 movlpd 6(%ecx), %xmm0
3184 movlpd %xmm0, 6(%edx)
3185 # ifdef USE_AS_STPCPY
3190 # ifdef USE_AS_STRNCPY
3193 jnz L(StrncpyFillTailWithZero)
3194 # ifdef USE_AS_STPCPY
3203 movlpd (%ecx), %xmm0
3204 movlpd %xmm0, (%edx)
3205 movlpd 8(%ecx), %xmm0
3206 movlpd %xmm0, 8(%edx)
3207 # ifdef USE_AS_STPCPY
3212 # ifdef USE_AS_STRNCPY
3215 jnz L(StrncpyFillTailWithZero)
3216 # ifdef USE_AS_STPCPY
3222 # ifdef USE_AS_STRNCPY
3223 L(StrncpyLeaveCase2OrCase3):
3225 jnz L(Aligned64LeaveCase2)
3227 L(Aligned64LeaveCase3):
3229 jle L(CopyFrom1To16BytesCase3)
3230 movaps %xmm4, -64(%edx)
3233 jbe L(CopyFrom1To16BytesCase3)
3234 movaps %xmm5, -48(%edx)
3237 jbe L(CopyFrom1To16BytesCase3)
3238 movaps %xmm6, -32(%edx)
3241 jmp L(CopyFrom1To16BytesCase3)
3243 L(Aligned64LeaveCase2):
3244 pcmpeqb %xmm4, %xmm0
3245 pmovmskb %xmm0, %eax
3247 jle L(CopyFrom1To16BytesCase2OrCase3)
3249 jnz L(CopyFrom1To16Bytes)
3251 pcmpeqb %xmm5, %xmm0
3252 pmovmskb %xmm0, %eax
3253 movaps %xmm4, -64(%edx)
3256 jbe L(CopyFrom1To16BytesCase2OrCase3)
3258 jnz L(CopyFrom1To16Bytes)
3260 pcmpeqb %xmm6, %xmm0
3261 pmovmskb %xmm0, %eax
3262 movaps %xmm5, -48(%edx)
3265 jbe L(CopyFrom1To16BytesCase2OrCase3)
3267 jnz L(CopyFrom1To16Bytes)
3269 pcmpeqb %xmm7, %xmm0
3270 pmovmskb %xmm0, %eax
3271 movaps %xmm6, -32(%edx)
3274 jmp L(CopyFrom1To16BytesCase2)
3275 /* -------------------------------------------------- */
3276 L(StrncpyExit1Case2OrCase3):
3277 movaps (%edx), %xmm6
3280 palignr $1, %xmm1, %xmm6
3281 movaps %xmm6, (%edx)
3283 jnz L(CopyFrom1To16BytesCase2)
3284 jmp L(CopyFrom1To16BytesCase3)
3286 L(StrncpyExit2Case2OrCase3):
3287 movaps (%edx), %xmm6
3290 palignr $2, %xmm1, %xmm6
3291 movaps %xmm6, (%edx)
3293 jnz L(CopyFrom1To16BytesCase2)
3294 jmp L(CopyFrom1To16BytesCase3)
3296 L(StrncpyExit3Case2OrCase3):
3297 movaps (%edx), %xmm6
3300 palignr $3, %xmm1, %xmm6
3301 movaps %xmm6, (%edx)
3303 jnz L(CopyFrom1To16BytesCase2)
3304 jmp L(CopyFrom1To16BytesCase3)
3306 L(StrncpyExit4Case2OrCase3):
3307 movaps (%edx), %xmm6
3310 palignr $4, %xmm1, %xmm6
3311 movaps %xmm6, (%edx)
3313 jnz L(CopyFrom1To16BytesCase2)
3314 jmp L(CopyFrom1To16BytesCase3)
3316 L(StrncpyExit5Case2OrCase3):
3317 movaps (%edx), %xmm6
3320 palignr $5, %xmm1, %xmm6
3321 movaps %xmm6, (%edx)
3323 jnz L(CopyFrom1To16BytesCase2)
3324 jmp L(CopyFrom1To16BytesCase3)
3326 L(StrncpyExit6Case2OrCase3):
3327 movaps (%edx), %xmm6
3330 palignr $6, %xmm1, %xmm6
3331 movaps %xmm6, (%edx)
3333 jnz L(CopyFrom1To16BytesCase2)
3334 jmp L(CopyFrom1To16BytesCase3)
3336 L(StrncpyExit7Case2OrCase3):
3337 movaps (%edx), %xmm6
3340 palignr $7, %xmm1, %xmm6
3341 movaps %xmm6, (%edx)
3343 jnz L(CopyFrom1To16BytesCase2)
3344 jmp L(CopyFrom1To16BytesCase3)
3346 L(StrncpyExit8Case2OrCase3):
3347 movaps (%edx), %xmm6
3350 palignr $8, %xmm1, %xmm6
3351 movaps %xmm6, (%edx)
3353 jnz L(CopyFrom1To16BytesCase2)
3354 jmp L(CopyFrom1To16BytesCase3)
3356 L(StrncpyExit9Case2OrCase3):
3357 movaps (%edx), %xmm6
3360 palignr $9, %xmm1, %xmm6
3361 movaps %xmm6, (%edx)
3363 jnz L(CopyFrom1To16BytesCase2)
3364 jmp L(CopyFrom1To16BytesCase3)
3366 L(StrncpyExit10Case2OrCase3):
3367 movaps (%edx), %xmm6
3370 palignr $10, %xmm1, %xmm6
3371 movaps %xmm6, (%edx)
3373 jnz L(CopyFrom1To16BytesCase2)
3374 jmp L(CopyFrom1To16BytesCase3)
3376 L(StrncpyExit11Case2OrCase3):
3377 movaps (%edx), %xmm6
3380 palignr $11, %xmm1, %xmm6
3381 movaps %xmm6, (%edx)
3383 jnz L(CopyFrom1To16BytesCase2)
3384 jmp L(CopyFrom1To16BytesCase3)
3386 L(StrncpyExit12Case2OrCase3):
3387 movaps (%edx), %xmm6
3390 palignr $12, %xmm1, %xmm6
3391 movaps %xmm6, (%edx)
3393 jnz L(CopyFrom1To16BytesCase2)
3394 jmp L(CopyFrom1To16BytesCase3)
3396 L(StrncpyExit13Case2OrCase3):
3397 movaps (%edx), %xmm6
3400 palignr $13, %xmm1, %xmm6
3401 movaps %xmm6, (%edx)
3403 jnz L(CopyFrom1To16BytesCase2)
3404 jmp L(CopyFrom1To16BytesCase3)
3406 L(StrncpyExit14Case2OrCase3):
3407 movaps (%edx), %xmm6
3410 palignr $14, %xmm1, %xmm6
3411 movaps %xmm6, (%edx)
3413 jnz L(CopyFrom1To16BytesCase2)
3414 jmp L(CopyFrom1To16BytesCase3)
3416 L(StrncpyExit15Case2OrCase3):
3417 movaps (%edx), %xmm6
3420 palignr $15, %xmm1, %xmm6
3421 movaps %xmm6, (%edx)
3423 jnz L(CopyFrom1To16BytesCase2)
3424 jmp L(CopyFrom1To16BytesCase3)
3430 palignr $1, %xmm1, %xmm2
3432 movaps %xmm2, (%edx)
3433 movaps 31(%ecx), %xmm2
3438 palignr $1, %xmm1, %xmm2
3439 movaps %xmm2, 16(%edx)
3440 movaps 31+16(%ecx), %xmm2
3446 movaps %xmm4, 32(%edx)
3451 movaps %xmm5, 48(%edx)
3456 movaps (%edx, %esi), %xmm6
3458 palignr $1, %xmm1, %xmm6
3459 movaps %xmm6, (%edx, %esi)
3461 jmp L(CopyFrom1To16BytesCase3)
3467 palignr $2, %xmm1, %xmm2
3469 movaps %xmm2, (%edx)
3470 movaps 30(%ecx), %xmm2
3475 palignr $2, %xmm1, %xmm2
3476 movaps %xmm2, 16(%edx)
3477 movaps 30+16(%ecx), %xmm2
3483 movaps %xmm4, 32(%edx)
3488 movaps %xmm5, 48(%edx)
3493 movaps (%edx, %esi), %xmm6
3495 palignr $2, %xmm1, %xmm6
3496 movaps %xmm6, (%edx, %esi)
3498 jmp L(CopyFrom1To16BytesCase3)
3504 palignr $3, %xmm1, %xmm2
3506 movaps %xmm2, (%edx)
3507 movaps 29(%ecx), %xmm2
3512 palignr $3, %xmm1, %xmm2
3513 movaps %xmm2, 16(%edx)
3514 movaps 29+16(%ecx), %xmm2
3520 movaps %xmm4, 32(%edx)
3525 movaps %xmm5, 48(%edx)
3530 movaps (%edx, %esi), %xmm6
3532 palignr $3, %xmm1, %xmm6
3533 movaps %xmm6, (%edx, %esi)
3535 jmp L(CopyFrom1To16BytesCase3)
3541 palignr $4, %xmm1, %xmm2
3543 movaps %xmm2, (%edx)
3544 movaps 28(%ecx), %xmm2
3549 palignr $4, %xmm1, %xmm2
3550 movaps %xmm2, 16(%edx)
3551 movaps 28+16(%ecx), %xmm2
3557 movaps %xmm4, 32(%edx)
3562 movaps %xmm5, 48(%edx)
3567 movaps (%edx, %esi), %xmm6
3569 palignr $4, %xmm1, %xmm6
3570 movaps %xmm6, (%edx, %esi)
3572 jmp L(CopyFrom1To16BytesCase3)
3578 palignr $5, %xmm1, %xmm2
3580 movaps %xmm2, (%edx)
3581 movaps 27(%ecx), %xmm2
3586 palignr $5, %xmm1, %xmm2
3587 movaps %xmm2, 16(%edx)
3588 movaps 27+16(%ecx), %xmm2
3594 movaps %xmm4, 32(%edx)
3599 movaps %xmm5, 48(%edx)
3604 movaps (%edx, %esi), %xmm6
3606 palignr $5, %xmm1, %xmm6
3607 movaps %xmm6, (%edx, %esi)
3609 jmp L(CopyFrom1To16BytesCase3)
3615 palignr $6, %xmm1, %xmm2
3617 movaps %xmm2, (%edx)
3618 movaps 26(%ecx), %xmm2
3623 palignr $6, %xmm1, %xmm2
3624 movaps %xmm2, 16(%edx)
3625 movaps 26+16(%ecx), %xmm2
3631 movaps %xmm4, 32(%edx)
3636 movaps %xmm5, 48(%edx)
3641 movaps (%edx, %esi), %xmm6
3643 palignr $6, %xmm1, %xmm6
3644 movaps %xmm6, (%edx, %esi)
3646 jmp L(CopyFrom1To16BytesCase3)
3652 palignr $7, %xmm1, %xmm2
3654 movaps %xmm2, (%edx)
3655 movaps 25(%ecx), %xmm2
3660 palignr $7, %xmm1, %xmm2
3661 movaps %xmm2, 16(%edx)
3662 movaps 25+16(%ecx), %xmm2
3668 movaps %xmm4, 32(%edx)
3673 movaps %xmm5, 48(%edx)
3678 movaps (%edx, %esi), %xmm6
3680 palignr $7, %xmm1, %xmm6
3681 movaps %xmm6, (%edx, %esi)
3683 jmp L(CopyFrom1To16BytesCase3)
3689 palignr $8, %xmm1, %xmm2
3691 movaps %xmm2, (%edx)
3692 movaps 24(%ecx), %xmm2
3697 palignr $8, %xmm1, %xmm2
3698 movaps %xmm2, 16(%edx)
3699 movaps 24+16(%ecx), %xmm2
3705 movaps %xmm4, 32(%edx)
3710 movaps %xmm5, 48(%edx)
3715 movaps (%edx, %esi), %xmm6
3717 palignr $8, %xmm1, %xmm6
3718 movaps %xmm6, (%edx, %esi)
3720 jmp L(CopyFrom1To16BytesCase3)
3726 palignr $9, %xmm1, %xmm2
3728 movaps %xmm2, (%edx)
3729 movaps 23(%ecx), %xmm2
3734 palignr $9, %xmm1, %xmm2
3735 movaps %xmm2, 16(%edx)
3736 movaps 23+16(%ecx), %xmm2
3742 movaps %xmm4, 32(%edx)
3747 movaps %xmm5, 48(%edx)
3752 movaps (%edx, %esi), %xmm6
3754 palignr $9, %xmm1, %xmm6
3755 movaps %xmm6, (%edx, %esi)
3757 jmp L(CopyFrom1To16BytesCase3)
3762 jle L(StrncpyExit10)
3763 palignr $10, %xmm1, %xmm2
3765 movaps %xmm2, (%edx)
3766 movaps 22(%ecx), %xmm2
3770 jbe L(StrncpyExit10)
3771 palignr $10, %xmm1, %xmm2
3772 movaps %xmm2, 16(%edx)
3773 movaps 22+16(%ecx), %xmm2
3777 jbe L(StrncpyExit10)
3779 movaps %xmm4, 32(%edx)
3782 jbe L(StrncpyExit10)
3784 movaps %xmm5, 48(%edx)
3789 movaps (%edx, %esi), %xmm6
3791 palignr $10, %xmm1, %xmm6
3792 movaps %xmm6, (%edx, %esi)
3794 jmp L(CopyFrom1To16BytesCase3)
3799 jle L(StrncpyExit11)
3800 palignr $11, %xmm1, %xmm2
3802 movaps %xmm2, (%edx)
3803 movaps 21(%ecx), %xmm2
3807 jbe L(StrncpyExit11)
3808 palignr $11, %xmm1, %xmm2
3809 movaps %xmm2, 16(%edx)
3810 movaps 21+16(%ecx), %xmm2
3814 jbe L(StrncpyExit11)
3816 movaps %xmm4, 32(%edx)
3819 jbe L(StrncpyExit11)
3821 movaps %xmm5, 48(%edx)
3826 movaps (%edx, %esi), %xmm6
3828 palignr $11, %xmm1, %xmm6
3829 movaps %xmm6, (%edx, %esi)
3831 jmp L(CopyFrom1To16BytesCase3)
3836 jle L(StrncpyExit12)
3837 palignr $12, %xmm1, %xmm2
3839 movaps %xmm2, (%edx)
3840 movaps 20(%ecx), %xmm2
3844 jbe L(StrncpyExit12)
3845 palignr $12, %xmm1, %xmm2
3846 movaps %xmm2, 16(%edx)
3847 movaps 20+16(%ecx), %xmm2
3851 jbe L(StrncpyExit12)
3853 movaps %xmm4, 32(%edx)
3856 jbe L(StrncpyExit12)
3858 movaps %xmm5, 48(%edx)
3863 movaps (%edx, %esi), %xmm6
3865 palignr $12, %xmm1, %xmm6
3866 movaps %xmm6, (%edx, %esi)
3868 jmp L(CopyFrom1To16BytesCase3)
3873 jle L(StrncpyExit13)
3874 palignr $13, %xmm1, %xmm2
3876 movaps %xmm2, (%edx)
3877 movaps 19(%ecx), %xmm2
3881 jbe L(StrncpyExit13)
3882 palignr $13, %xmm1, %xmm2
3883 movaps %xmm2, 16(%edx)
3884 movaps 19+16(%ecx), %xmm2
3888 jbe L(StrncpyExit13)
3890 movaps %xmm4, 32(%edx)
3893 jbe L(StrncpyExit13)
3895 movaps %xmm5, 48(%edx)
3900 movaps (%edx, %esi), %xmm6
3902 palignr $13, %xmm1, %xmm6
3903 movaps %xmm6, (%edx, %esi)
3905 jmp L(CopyFrom1To16BytesCase3)
3910 jle L(StrncpyExit14)
3911 palignr $14, %xmm1, %xmm2
3913 movaps %xmm2, (%edx)
3914 movaps 18(%ecx), %xmm2
3918 jbe L(StrncpyExit14)
3919 palignr $14, %xmm1, %xmm2
3920 movaps %xmm2, 16(%edx)
3921 movaps 18+16(%ecx), %xmm2
3925 jbe L(StrncpyExit14)
3927 movaps %xmm4, 32(%edx)
3930 jbe L(StrncpyExit14)
3932 movaps %xmm5, 48(%edx)
3937 movaps (%edx, %esi), %xmm6
3939 palignr $14, %xmm1, %xmm6
3940 movaps %xmm6, (%edx, %esi)
3942 jmp L(CopyFrom1To16BytesCase3)
3947 jle L(StrncpyExit15)
3948 palignr $15, %xmm1, %xmm2
3950 movaps %xmm2, (%edx)
3951 movaps 17(%ecx), %xmm2
3955 jbe L(StrncpyExit15)
3956 palignr $15, %xmm1, %xmm2
3957 movaps %xmm2, 16(%edx)
3958 movaps 17+16(%ecx), %xmm2
3962 jbe L(StrncpyExit15)
3964 movaps %xmm4, 32(%edx)
3967 jbe L(StrncpyExit15)
3969 movaps %xmm5, 48(%edx)
3974 movaps (%edx, %esi), %xmm6
3976 palignr $15, %xmm1, %xmm6
3977 movaps %xmm6, (%edx, %esi)
3979 jmp L(CopyFrom1To16BytesCase3)
3987 L(StrncpyExit15Bytes):
4016 movlpd (%ecx), %xmm0
4017 movlpd %xmm0, (%edx)
4018 movlpd 7(%ecx), %xmm0
4019 movlpd %xmm0, 7(%edx)
4020 # ifdef USE_AS_STPCPY
4025 # ifdef USE_AS_STRNCPY
4028 jnz L(StrncpyFillTailWithZero)
4029 # ifdef USE_AS_STPCPY
4036 # ifdef USE_AS_STRNCPY
4038 L(StrncpyExit8Bytes):
4070 movlpd (%ecx), %xmm0
4071 movlpd %xmm0, (%edx)
4072 # ifdef USE_AS_STPCPY
4077 # ifdef USE_AS_STRNCPY
4080 jnz L(StrncpyFillTailWithZero)
4081 # ifdef USE_AS_STPCPY