]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
d616d1a7e586ef6c2c7817a6fd5c788b96dbf73a
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / strcpy-sse2-unaligned.S
1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21
22 # ifndef USE_AS_STRCAT
23 # include <sysdep.h>
24
25 # ifndef STRCPY
26 # define STRCPY __strcpy_sse2_unaligned
27 # endif
28
29 # endif
30
31 # define JMPTBL(I, B) I - B
32 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
33 lea TABLE(%rip), %r11; \
34 movslq (%r11, INDEX, SCALE), %rcx; \
35 lea (%r11, %rcx), %rcx; \
36 jmp *%rcx
37
38 # ifndef USE_AS_STRCAT
39
40 .text
41 ENTRY (STRCPY)
42 # ifdef USE_AS_STRNCPY
43 mov %rdx, %r8
44 test %r8, %r8
45 jz L(ExitZero)
46 # endif
47 mov %rsi, %rcx
48 # ifndef USE_AS_STPCPY
49 mov %rdi, %rax /* save result */
50 # endif
51
52 # endif
53
54 and $63, %rcx
55 cmp $32, %rcx
56 jbe L(SourceStringAlignmentLess32)
57
58 and $-16, %rsi
59 and $15, %rcx
60 pxor %xmm0, %xmm0
61 pxor %xmm1, %xmm1
62
63 pcmpeqb (%rsi), %xmm1
64 pmovmskb %xmm1, %rdx
65 shr %cl, %rdx
66
67 # ifdef USE_AS_STRNCPY
68 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
69 mov $16, %r10
70 sub %rcx, %r10
71 cmp %r10, %r8
72 # else
73 mov $17, %r10
74 sub %rcx, %r10
75 cmp %r10, %r8
76 # endif
77 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
78 # endif
79 test %rdx, %rdx
80 jnz L(CopyFrom1To16BytesTail)
81
82 pcmpeqb 16(%rsi), %xmm0
83 pmovmskb %xmm0, %rdx
84
85 # ifdef USE_AS_STRNCPY
86 add $16, %r10
87 cmp %r10, %r8
88 jbe L(CopyFrom1To32BytesCase2OrCase3)
89 # endif
90 test %rdx, %rdx
91 jnz L(CopyFrom1To32Bytes)
92
93 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
94 movdqu %xmm1, (%rdi)
95
96 /* If source address alignment != destination address alignment */
97 .p2align 4
98 L(Unalign16Both):
99 sub %rcx, %rdi
100 # ifdef USE_AS_STRNCPY
101 add %rcx, %r8
102 # endif
103 mov $16, %rcx
104 movdqa (%rsi, %rcx), %xmm1
105 movaps 16(%rsi, %rcx), %xmm2
106 movdqu %xmm1, (%rdi, %rcx)
107 pcmpeqb %xmm2, %xmm0
108 pmovmskb %xmm0, %rdx
109 add $16, %rcx
110 # ifdef USE_AS_STRNCPY
111 sub $48, %r8
112 jbe L(CopyFrom1To16BytesCase2OrCase3)
113 # endif
114 test %rdx, %rdx
115 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
116 jnz L(CopyFrom1To16BytesUnalignedXmm2)
117 # else
118 jnz L(CopyFrom1To16Bytes)
119 # endif
120
121 movaps 16(%rsi, %rcx), %xmm3
122 movdqu %xmm2, (%rdi, %rcx)
123 pcmpeqb %xmm3, %xmm0
124 pmovmskb %xmm0, %rdx
125 add $16, %rcx
126 # ifdef USE_AS_STRNCPY
127 sub $16, %r8
128 jbe L(CopyFrom1To16BytesCase2OrCase3)
129 # endif
130 test %rdx, %rdx
131 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
132 jnz L(CopyFrom1To16BytesUnalignedXmm3)
133 # else
134 jnz L(CopyFrom1To16Bytes)
135 # endif
136
137 movaps 16(%rsi, %rcx), %xmm4
138 movdqu %xmm3, (%rdi, %rcx)
139 pcmpeqb %xmm4, %xmm0
140 pmovmskb %xmm0, %rdx
141 add $16, %rcx
142 # ifdef USE_AS_STRNCPY
143 sub $16, %r8
144 jbe L(CopyFrom1To16BytesCase2OrCase3)
145 # endif
146 test %rdx, %rdx
147 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
148 jnz L(CopyFrom1To16BytesUnalignedXmm4)
149 # else
150 jnz L(CopyFrom1To16Bytes)
151 # endif
152
153 movaps 16(%rsi, %rcx), %xmm1
154 movdqu %xmm4, (%rdi, %rcx)
155 pcmpeqb %xmm1, %xmm0
156 pmovmskb %xmm0, %rdx
157 add $16, %rcx
158 # ifdef USE_AS_STRNCPY
159 sub $16, %r8
160 jbe L(CopyFrom1To16BytesCase2OrCase3)
161 # endif
162 test %rdx, %rdx
163 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
164 jnz L(CopyFrom1To16BytesUnalignedXmm1)
165 # else
166 jnz L(CopyFrom1To16Bytes)
167 # endif
168
169 movaps 16(%rsi, %rcx), %xmm2
170 movdqu %xmm1, (%rdi, %rcx)
171 pcmpeqb %xmm2, %xmm0
172 pmovmskb %xmm0, %rdx
173 add $16, %rcx
174 # ifdef USE_AS_STRNCPY
175 sub $16, %r8
176 jbe L(CopyFrom1To16BytesCase2OrCase3)
177 # endif
178 test %rdx, %rdx
179 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
180 jnz L(CopyFrom1To16BytesUnalignedXmm2)
181 # else
182 jnz L(CopyFrom1To16Bytes)
183 # endif
184
185 movaps 16(%rsi, %rcx), %xmm3
186 movdqu %xmm2, (%rdi, %rcx)
187 pcmpeqb %xmm3, %xmm0
188 pmovmskb %xmm0, %rdx
189 add $16, %rcx
190 # ifdef USE_AS_STRNCPY
191 sub $16, %r8
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
193 # endif
194 test %rdx, %rdx
195 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
196 jnz L(CopyFrom1To16BytesUnalignedXmm3)
197 # else
198 jnz L(CopyFrom1To16Bytes)
199 # endif
200
201 movdqu %xmm3, (%rdi, %rcx)
202 mov %rsi, %rdx
203 lea 16(%rsi, %rcx), %rsi
204 and $-0x40, %rsi
205 sub %rsi, %rdx
206 sub %rdx, %rdi
207 # ifdef USE_AS_STRNCPY
208 lea 128(%r8, %rdx), %r8
209 # endif
210 L(Unaligned64Loop):
211 movaps (%rsi), %xmm2
212 movaps %xmm2, %xmm4
213 movaps 16(%rsi), %xmm5
214 movaps 32(%rsi), %xmm3
215 movaps %xmm3, %xmm6
216 movaps 48(%rsi), %xmm7
217 pminub %xmm5, %xmm2
218 pminub %xmm7, %xmm3
219 pminub %xmm2, %xmm3
220 pcmpeqb %xmm0, %xmm3
221 pmovmskb %xmm3, %rdx
222 # ifdef USE_AS_STRNCPY
223 sub $64, %r8
224 jbe L(UnalignedLeaveCase2OrCase3)
225 # endif
226 test %rdx, %rdx
227 jnz L(Unaligned64Leave)
228
229 L(Unaligned64Loop_start):
230 add $64, %rdi
231 add $64, %rsi
232 movdqu %xmm4, -64(%rdi)
233 movaps (%rsi), %xmm2
234 movdqa %xmm2, %xmm4
235 movdqu %xmm5, -48(%rdi)
236 movaps 16(%rsi), %xmm5
237 pminub %xmm5, %xmm2
238 movaps 32(%rsi), %xmm3
239 movdqu %xmm6, -32(%rdi)
240 movaps %xmm3, %xmm6
241 movdqu %xmm7, -16(%rdi)
242 movaps 48(%rsi), %xmm7
243 pminub %xmm7, %xmm3
244 pminub %xmm2, %xmm3
245 pcmpeqb %xmm0, %xmm3
246 pmovmskb %xmm3, %rdx
247 # ifdef USE_AS_STRNCPY
248 sub $64, %r8
249 jbe L(UnalignedLeaveCase2OrCase3)
250 # endif
251 test %rdx, %rdx
252 jz L(Unaligned64Loop_start)
253
254 L(Unaligned64Leave):
255 pxor %xmm1, %xmm1
256
257 pcmpeqb %xmm4, %xmm0
258 pcmpeqb %xmm5, %xmm1
259 pmovmskb %xmm0, %rdx
260 pmovmskb %xmm1, %rcx
261 test %rdx, %rdx
262 jnz L(CopyFrom1To16BytesUnaligned_0)
263 test %rcx, %rcx
264 jnz L(CopyFrom1To16BytesUnaligned_16)
265
266 pcmpeqb %xmm6, %xmm0
267 pcmpeqb %xmm7, %xmm1
268 pmovmskb %xmm0, %rdx
269 pmovmskb %xmm1, %rcx
270 test %rdx, %rdx
271 jnz L(CopyFrom1To16BytesUnaligned_32)
272
273 bsf %rcx, %rdx
274 movdqu %xmm4, (%rdi)
275 movdqu %xmm5, 16(%rdi)
276 movdqu %xmm6, 32(%rdi)
277 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
278 # ifdef USE_AS_STPCPY
279 lea 48(%rdi, %rdx), %rax
280 # endif
281 movdqu %xmm7, 48(%rdi)
282 add $15, %r8
283 sub %rdx, %r8
284 lea 49(%rdi, %rdx), %rdi
285 jmp L(StrncpyFillTailWithZero)
286 # else
287 add $48, %rsi
288 add $48, %rdi
289 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
290 # endif
291
292 /* If source address alignment == destination address alignment */
293
294 L(SourceStringAlignmentLess32):
295 pxor %xmm0, %xmm0
296 movdqu (%rsi), %xmm1
297 movdqu 16(%rsi), %xmm2
298 pcmpeqb %xmm1, %xmm0
299 pmovmskb %xmm0, %rdx
300
301 # ifdef USE_AS_STRNCPY
302 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
303 cmp $16, %r8
304 # else
305 cmp $17, %r8
306 # endif
307 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
308 # endif
309 test %rdx, %rdx
310 jnz L(CopyFrom1To16BytesTail1)
311
312 pcmpeqb %xmm2, %xmm0
313 movdqu %xmm1, (%rdi)
314 pmovmskb %xmm0, %rdx
315
316 # ifdef USE_AS_STRNCPY
317 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
318 cmp $32, %r8
319 # else
320 cmp $33, %r8
321 # endif
322 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
323 # endif
324 test %rdx, %rdx
325 jnz L(CopyFrom1To32Bytes1)
326
327 and $-16, %rsi
328 and $15, %rcx
329 jmp L(Unalign16Both)
330
331 /*------End of main part with loops---------------------*/
332
333 /* Case1 */
334
335 # if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
336 .p2align 4
337 L(CopyFrom1To16Bytes):
338 add %rcx, %rdi
339 add %rcx, %rsi
340 bsf %rdx, %rdx
341 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
342 # endif
343 .p2align 4
344 L(CopyFrom1To16BytesTail):
345 add %rcx, %rsi
346 bsf %rdx, %rdx
347 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
348
349 .p2align 4
350 L(CopyFrom1To32Bytes1):
351 add $16, %rsi
352 add $16, %rdi
353 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
354 sub $16, %r8
355 # endif
356 L(CopyFrom1To16BytesTail1):
357 bsf %rdx, %rdx
358 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
359
360 .p2align 4
361 L(CopyFrom1To32Bytes):
362 bsf %rdx, %rdx
363 add %rcx, %rsi
364 add $16, %rdx
365 sub %rcx, %rdx
366 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
367
368 .p2align 4
369 L(CopyFrom1To16BytesUnaligned_0):
370 bsf %rdx, %rdx
371 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
372 # ifdef USE_AS_STPCPY
373 lea (%rdi, %rdx), %rax
374 # endif
375 movdqu %xmm4, (%rdi)
376 add $63, %r8
377 sub %rdx, %r8
378 lea 1(%rdi, %rdx), %rdi
379 jmp L(StrncpyFillTailWithZero)
380 # else
381 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
382 # endif
383
384 .p2align 4
385 L(CopyFrom1To16BytesUnaligned_16):
386 bsf %rcx, %rdx
387 movdqu %xmm4, (%rdi)
388 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
389 # ifdef USE_AS_STPCPY
390 lea 16(%rdi, %rdx), %rax
391 # endif
392 movdqu %xmm5, 16(%rdi)
393 add $47, %r8
394 sub %rdx, %r8
395 lea 17(%rdi, %rdx), %rdi
396 jmp L(StrncpyFillTailWithZero)
397 # else
398 add $16, %rsi
399 add $16, %rdi
400 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
401 # endif
402
403 .p2align 4
404 L(CopyFrom1To16BytesUnaligned_32):
405 bsf %rdx, %rdx
406 movdqu %xmm4, (%rdi)
407 movdqu %xmm5, 16(%rdi)
408 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
409 # ifdef USE_AS_STPCPY
410 lea 32(%rdi, %rdx), %rax
411 # endif
412 movdqu %xmm6, 32(%rdi)
413 add $31, %r8
414 sub %rdx, %r8
415 lea 33(%rdi, %rdx), %rdi
416 jmp L(StrncpyFillTailWithZero)
417 # else
418 add $32, %rsi
419 add $32, %rdi
420 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
421 # endif
422
423 # ifdef USE_AS_STRNCPY
424 # ifndef USE_AS_STRCAT
425 .p2align 4
426 L(CopyFrom1To16BytesUnalignedXmm6):
427 movdqu %xmm6, (%rdi, %rcx)
428 jmp L(CopyFrom1To16BytesXmmExit)
429
430 .p2align 4
431 L(CopyFrom1To16BytesUnalignedXmm5):
432 movdqu %xmm5, (%rdi, %rcx)
433 jmp L(CopyFrom1To16BytesXmmExit)
434
435 .p2align 4
436 L(CopyFrom1To16BytesUnalignedXmm4):
437 movdqu %xmm4, (%rdi, %rcx)
438 jmp L(CopyFrom1To16BytesXmmExit)
439
440 .p2align 4
441 L(CopyFrom1To16BytesUnalignedXmm3):
442 movdqu %xmm3, (%rdi, %rcx)
443 jmp L(CopyFrom1To16BytesXmmExit)
444
445 .p2align 4
446 L(CopyFrom1To16BytesUnalignedXmm1):
447 movdqu %xmm1, (%rdi, %rcx)
448 jmp L(CopyFrom1To16BytesXmmExit)
449 # endif
450
451 .p2align 4
452 L(CopyFrom1To16BytesExit):
453 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
454
455 /* Case2 */
456
457 .p2align 4
458 L(CopyFrom1To16BytesCase2):
459 add $16, %r8
460 add %rcx, %rdi
461 add %rcx, %rsi
462 bsf %rdx, %rdx
463 cmp %r8, %rdx
464 jb L(CopyFrom1To16BytesExit)
465 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
466
467 .p2align 4
468 L(CopyFrom1To32BytesCase2):
469 add %rcx, %rsi
470 bsf %rdx, %rdx
471 add $16, %rdx
472 sub %rcx, %rdx
473 cmp %r8, %rdx
474 jb L(CopyFrom1To16BytesExit)
475 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
476
477 L(CopyFrom1To16BytesTailCase2):
478 add %rcx, %rsi
479 bsf %rdx, %rdx
480 cmp %r8, %rdx
481 jb L(CopyFrom1To16BytesExit)
482 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
483
484 L(CopyFrom1To16BytesTail1Case2):
485 bsf %rdx, %rdx
486 cmp %r8, %rdx
487 jb L(CopyFrom1To16BytesExit)
488 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
489
490 /* Case2 or Case3, Case3 */
491
492 .p2align 4
493 L(CopyFrom1To16BytesCase2OrCase3):
494 test %rdx, %rdx
495 jnz L(CopyFrom1To16BytesCase2)
496 L(CopyFrom1To16BytesCase3):
497 add $16, %r8
498 add %rcx, %rdi
499 add %rcx, %rsi
500 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
501
502 .p2align 4
503 L(CopyFrom1To32BytesCase2OrCase3):
504 test %rdx, %rdx
505 jnz L(CopyFrom1To32BytesCase2)
506 add %rcx, %rsi
507 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
508
509 .p2align 4
510 L(CopyFrom1To16BytesTailCase2OrCase3):
511 test %rdx, %rdx
512 jnz L(CopyFrom1To16BytesTailCase2)
513 add %rcx, %rsi
514 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
515
516 .p2align 4
517 L(CopyFrom1To32Bytes1Case2OrCase3):
518 add $16, %rdi
519 add $16, %rsi
520 sub $16, %r8
521 L(CopyFrom1To16BytesTail1Case2OrCase3):
522 test %rdx, %rdx
523 jnz L(CopyFrom1To16BytesTail1Case2)
524 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
525
526 # endif
527
528 /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
529
530 .p2align 4
531 L(Exit1):
532 mov %dh, (%rdi)
533 # ifdef USE_AS_STPCPY
534 lea (%rdi), %rax
535 # endif
536 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
537 sub $1, %r8
538 lea 1(%rdi), %rdi
539 jnz L(StrncpyFillTailWithZero)
540 # endif
541 ret
542
543 .p2align 4
544 L(Exit2):
545 mov (%rsi), %dx
546 mov %dx, (%rdi)
547 # ifdef USE_AS_STPCPY
548 lea 1(%rdi), %rax
549 # endif
550 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
551 sub $2, %r8
552 lea 2(%rdi), %rdi
553 jnz L(StrncpyFillTailWithZero)
554 # endif
555 ret
556
557 .p2align 4
558 L(Exit3):
559 mov (%rsi), %cx
560 mov %cx, (%rdi)
561 mov %dh, 2(%rdi)
562 # ifdef USE_AS_STPCPY
563 lea 2(%rdi), %rax
564 # endif
565 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
566 sub $3, %r8
567 lea 3(%rdi), %rdi
568 jnz L(StrncpyFillTailWithZero)
569 # endif
570 ret
571
572 .p2align 4
573 L(Exit4):
574 mov (%rsi), %edx
575 mov %edx, (%rdi)
576 # ifdef USE_AS_STPCPY
577 lea 3(%rdi), %rax
578 # endif
579 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
580 sub $4, %r8
581 lea 4(%rdi), %rdi
582 jnz L(StrncpyFillTailWithZero)
583 # endif
584 ret
585
586 .p2align 4
587 L(Exit5):
588 mov (%rsi), %ecx
589 mov %dh, 4(%rdi)
590 mov %ecx, (%rdi)
591 # ifdef USE_AS_STPCPY
592 lea 4(%rdi), %rax
593 # endif
594 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
595 sub $5, %r8
596 lea 5(%rdi), %rdi
597 jnz L(StrncpyFillTailWithZero)
598 # endif
599 ret
600
601 .p2align 4
602 L(Exit6):
603 mov (%rsi), %ecx
604 mov 4(%rsi), %dx
605 mov %ecx, (%rdi)
606 mov %dx, 4(%rdi)
607 # ifdef USE_AS_STPCPY
608 lea 5(%rdi), %rax
609 # endif
610 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
611 sub $6, %r8
612 lea 6(%rdi), %rdi
613 jnz L(StrncpyFillTailWithZero)
614 # endif
615 ret
616
617 .p2align 4
618 L(Exit7):
619 mov (%rsi), %ecx
620 mov 3(%rsi), %edx
621 mov %ecx, (%rdi)
622 mov %edx, 3(%rdi)
623 # ifdef USE_AS_STPCPY
624 lea 6(%rdi), %rax
625 # endif
626 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
627 sub $7, %r8
628 lea 7(%rdi), %rdi
629 jnz L(StrncpyFillTailWithZero)
630 # endif
631 ret
632
633 .p2align 4
634 L(Exit8):
635 mov (%rsi), %rdx
636 mov %rdx, (%rdi)
637 # ifdef USE_AS_STPCPY
638 lea 7(%rdi), %rax
639 # endif
640 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
641 sub $8, %r8
642 lea 8(%rdi), %rdi
643 jnz L(StrncpyFillTailWithZero)
644 # endif
645 ret
646
647 .p2align 4
648 L(Exit9):
649 mov (%rsi), %rcx
650 mov %dh, 8(%rdi)
651 mov %rcx, (%rdi)
652 # ifdef USE_AS_STPCPY
653 lea 8(%rdi), %rax
654 # endif
655 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
656 sub $9, %r8
657 lea 9(%rdi), %rdi
658 jnz L(StrncpyFillTailWithZero)
659 # endif
660 ret
661
662 .p2align 4
663 L(Exit10):
664 mov (%rsi), %rcx
665 mov 8(%rsi), %dx
666 mov %rcx, (%rdi)
667 mov %dx, 8(%rdi)
668 # ifdef USE_AS_STPCPY
669 lea 9(%rdi), %rax
670 # endif
671 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
672 sub $10, %r8
673 lea 10(%rdi), %rdi
674 jnz L(StrncpyFillTailWithZero)
675 # endif
676 ret
677
678 .p2align 4
679 L(Exit11):
680 mov (%rsi), %rcx
681 mov 7(%rsi), %edx
682 mov %rcx, (%rdi)
683 mov %edx, 7(%rdi)
684 # ifdef USE_AS_STPCPY
685 lea 10(%rdi), %rax
686 # endif
687 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
688 sub $11, %r8
689 lea 11(%rdi), %rdi
690 jnz L(StrncpyFillTailWithZero)
691 # endif
692 ret
693
694 .p2align 4
695 L(Exit12):
696 mov (%rsi), %rcx
697 mov 8(%rsi), %edx
698 mov %rcx, (%rdi)
699 mov %edx, 8(%rdi)
700 # ifdef USE_AS_STPCPY
701 lea 11(%rdi), %rax
702 # endif
703 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
704 sub $12, %r8
705 lea 12(%rdi), %rdi
706 jnz L(StrncpyFillTailWithZero)
707 # endif
708 ret
709
710 .p2align 4
711 L(Exit13):
712 mov (%rsi), %rcx
713 mov 5(%rsi), %rdx
714 mov %rcx, (%rdi)
715 mov %rdx, 5(%rdi)
716 # ifdef USE_AS_STPCPY
717 lea 12(%rdi), %rax
718 # endif
719 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
720 sub $13, %r8
721 lea 13(%rdi), %rdi
722 jnz L(StrncpyFillTailWithZero)
723 # endif
724 ret
725
726 .p2align 4
727 L(Exit14):
728 mov (%rsi), %rcx
729 mov 6(%rsi), %rdx
730 mov %rcx, (%rdi)
731 mov %rdx, 6(%rdi)
732 # ifdef USE_AS_STPCPY
733 lea 13(%rdi), %rax
734 # endif
735 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
736 sub $14, %r8
737 lea 14(%rdi), %rdi
738 jnz L(StrncpyFillTailWithZero)
739 # endif
740 ret
741
742 .p2align 4
743 L(Exit15):
744 mov (%rsi), %rcx
745 mov 7(%rsi), %rdx
746 mov %rcx, (%rdi)
747 mov %rdx, 7(%rdi)
748 # ifdef USE_AS_STPCPY
749 lea 14(%rdi), %rax
750 # endif
751 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
752 sub $15, %r8
753 lea 15(%rdi), %rdi
754 jnz L(StrncpyFillTailWithZero)
755 # endif
756 ret
757
758 .p2align 4
759 L(Exit16):
760 movdqu (%rsi), %xmm0
761 movdqu %xmm0, (%rdi)
762 # ifdef USE_AS_STPCPY
763 lea 15(%rdi), %rax
764 # endif
765 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
766 sub $16, %r8
767 lea 16(%rdi), %rdi
768 jnz L(StrncpyFillTailWithZero)
769 # endif
770 ret
771
772 .p2align 4
773 L(Exit17):
774 movdqu (%rsi), %xmm0
775 movdqu %xmm0, (%rdi)
776 mov %dh, 16(%rdi)
777 # ifdef USE_AS_STPCPY
778 lea 16(%rdi), %rax
779 # endif
780 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
781 sub $17, %r8
782 lea 17(%rdi), %rdi
783 jnz L(StrncpyFillTailWithZero)
784 # endif
785 ret
786
787 .p2align 4
788 L(Exit18):
789 movdqu (%rsi), %xmm0
790 mov 16(%rsi), %cx
791 movdqu %xmm0, (%rdi)
792 mov %cx, 16(%rdi)
793 # ifdef USE_AS_STPCPY
794 lea 17(%rdi), %rax
795 # endif
796 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
797 sub $18, %r8
798 lea 18(%rdi), %rdi
799 jnz L(StrncpyFillTailWithZero)
800 # endif
801 ret
802
803 .p2align 4
804 L(Exit19):
805 movdqu (%rsi), %xmm0
806 mov 15(%rsi), %ecx
807 movdqu %xmm0, (%rdi)
808 mov %ecx, 15(%rdi)
809 # ifdef USE_AS_STPCPY
810 lea 18(%rdi), %rax
811 # endif
812 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
813 sub $19, %r8
814 lea 19(%rdi), %rdi
815 jnz L(StrncpyFillTailWithZero)
816 # endif
817 ret
818
819 .p2align 4
820 L(Exit20):
821 movdqu (%rsi), %xmm0
822 mov 16(%rsi), %ecx
823 movdqu %xmm0, (%rdi)
824 mov %ecx, 16(%rdi)
825 # ifdef USE_AS_STPCPY
826 lea 19(%rdi), %rax
827 # endif
828 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
829 sub $20, %r8
830 lea 20(%rdi), %rdi
831 jnz L(StrncpyFillTailWithZero)
832 # endif
833 ret
834
835 .p2align 4
836 L(Exit21):
837 movdqu (%rsi), %xmm0
838 mov 16(%rsi), %ecx
839 movdqu %xmm0, (%rdi)
840 mov %ecx, 16(%rdi)
841 mov %dh, 20(%rdi)
842 # ifdef USE_AS_STPCPY
843 lea 20(%rdi), %rax
844 # endif
845 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
846 sub $21, %r8
847 lea 21(%rdi), %rdi
848 jnz L(StrncpyFillTailWithZero)
849 # endif
850 ret
851
852 .p2align 4
853 L(Exit22):
854 movdqu (%rsi), %xmm0
855 mov 14(%rsi), %rcx
856 movdqu %xmm0, (%rdi)
857 mov %rcx, 14(%rdi)
858 # ifdef USE_AS_STPCPY
859 lea 21(%rdi), %rax
860 # endif
861 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
862 sub $22, %r8
863 lea 22(%rdi), %rdi
864 jnz L(StrncpyFillTailWithZero)
865 # endif
866 ret
867
868 .p2align 4
869 L(Exit23):
870 movdqu (%rsi), %xmm0
871 mov 15(%rsi), %rcx
872 movdqu %xmm0, (%rdi)
873 mov %rcx, 15(%rdi)
874 # ifdef USE_AS_STPCPY
875 lea 22(%rdi), %rax
876 # endif
877 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
878 sub $23, %r8
879 lea 23(%rdi), %rdi
880 jnz L(StrncpyFillTailWithZero)
881 # endif
882 ret
883
884 .p2align 4
885 L(Exit24):
886 movdqu (%rsi), %xmm0
887 mov 16(%rsi), %rcx
888 movdqu %xmm0, (%rdi)
889 mov %rcx, 16(%rdi)
890 # ifdef USE_AS_STPCPY
891 lea 23(%rdi), %rax
892 # endif
893 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
894 sub $24, %r8
895 lea 24(%rdi), %rdi
896 jnz L(StrncpyFillTailWithZero)
897 # endif
898 ret
899
900 .p2align 4
901 L(Exit25):
902 movdqu (%rsi), %xmm0
903 mov 16(%rsi), %rcx
904 movdqu %xmm0, (%rdi)
905 mov %rcx, 16(%rdi)
906 mov %dh, 24(%rdi)
907 # ifdef USE_AS_STPCPY
908 lea 24(%rdi), %rax
909 # endif
910 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
911 sub $25, %r8
912 lea 25(%rdi), %rdi
913 jnz L(StrncpyFillTailWithZero)
914 # endif
915 ret
916
917 .p2align 4
918 L(Exit26):
919 movdqu (%rsi), %xmm0
920 mov 16(%rsi), %rdx
921 mov 24(%rsi), %cx
922 movdqu %xmm0, (%rdi)
923 mov %rdx, 16(%rdi)
924 mov %cx, 24(%rdi)
925 # ifdef USE_AS_STPCPY
926 lea 25(%rdi), %rax
927 # endif
928 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
929 sub $26, %r8
930 lea 26(%rdi), %rdi
931 jnz L(StrncpyFillTailWithZero)
932 # endif
933 ret
934
935 .p2align 4
936 L(Exit27):
937 movdqu (%rsi), %xmm0
938 mov 16(%rsi), %rdx
939 mov 23(%rsi), %ecx
940 movdqu %xmm0, (%rdi)
941 mov %rdx, 16(%rdi)
942 mov %ecx, 23(%rdi)
943 # ifdef USE_AS_STPCPY
944 lea 26(%rdi), %rax
945 # endif
946 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
947 sub $27, %r8
948 lea 27(%rdi), %rdi
949 jnz L(StrncpyFillTailWithZero)
950 # endif
951 ret
952
953 .p2align 4
954 L(Exit28):
955 movdqu (%rsi), %xmm0
956 mov 16(%rsi), %rdx
957 mov 24(%rsi), %ecx
958 movdqu %xmm0, (%rdi)
959 mov %rdx, 16(%rdi)
960 mov %ecx, 24(%rdi)
961 # ifdef USE_AS_STPCPY
962 lea 27(%rdi), %rax
963 # endif
964 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
965 sub $28, %r8
966 lea 28(%rdi), %rdi
967 jnz L(StrncpyFillTailWithZero)
968 # endif
969 ret
970
971 .p2align 4
972 L(Exit29):
973 movdqu (%rsi), %xmm0
974 movdqu 13(%rsi), %xmm2
975 movdqu %xmm0, (%rdi)
976 movdqu %xmm2, 13(%rdi)
977 # ifdef USE_AS_STPCPY
978 lea 28(%rdi), %rax
979 # endif
980 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
981 sub $29, %r8
982 lea 29(%rdi), %rdi
983 jnz L(StrncpyFillTailWithZero)
984 # endif
985 ret
986
987 .p2align 4
988 L(Exit30):
989 movdqu (%rsi), %xmm0
990 movdqu 14(%rsi), %xmm2
991 movdqu %xmm0, (%rdi)
992 movdqu %xmm2, 14(%rdi)
993 # ifdef USE_AS_STPCPY
994 lea 29(%rdi), %rax
995 # endif
996 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
997 sub $30, %r8
998 lea 30(%rdi), %rdi
999 jnz L(StrncpyFillTailWithZero)
1000 # endif
1001 ret
1002
1003 .p2align 4
1004 L(Exit31):
1005 movdqu (%rsi), %xmm0
1006 movdqu 15(%rsi), %xmm2
1007 movdqu %xmm0, (%rdi)
1008 movdqu %xmm2, 15(%rdi)
1009 # ifdef USE_AS_STPCPY
1010 lea 30(%rdi), %rax
1011 # endif
1012 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1013 sub $31, %r8
1014 lea 31(%rdi), %rdi
1015 jnz L(StrncpyFillTailWithZero)
1016 # endif
1017 ret
1018
1019 .p2align 4
1020 L(Exit32):
1021 movdqu (%rsi), %xmm0
1022 movdqu 16(%rsi), %xmm2
1023 movdqu %xmm0, (%rdi)
1024 movdqu %xmm2, 16(%rdi)
1025 # ifdef USE_AS_STPCPY
1026 lea 31(%rdi), %rax
1027 # endif
1028 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1029 sub $32, %r8
1030 lea 32(%rdi), %rdi
1031 jnz L(StrncpyFillTailWithZero)
1032 # endif
1033 ret
1034
1035 # ifdef USE_AS_STRNCPY
1036
1037 .p2align 4
1038 L(StrncpyExit0):
1039 # ifdef USE_AS_STPCPY
1040 mov %rdi, %rax
1041 # endif
1042 # ifdef USE_AS_STRCAT
1043 xor %ch, %ch
1044 movb %ch, (%rdi)
1045 # endif
1046 ret
1047
1048 .p2align 4
1049 L(StrncpyExit1):
1050 mov (%rsi), %dl
1051 mov %dl, (%rdi)
1052 # ifdef USE_AS_STPCPY
1053 lea 1(%rdi), %rax
1054 # endif
1055 # ifdef USE_AS_STRCAT
1056 xor %ch, %ch
1057 movb %ch, 1(%rdi)
1058 # endif
1059 ret
1060
1061 .p2align 4
1062 L(StrncpyExit2):
1063 mov (%rsi), %dx
1064 mov %dx, (%rdi)
1065 # ifdef USE_AS_STPCPY
1066 lea 2(%rdi), %rax
1067 # endif
1068 # ifdef USE_AS_STRCAT
1069 xor %ch, %ch
1070 movb %ch, 2(%rdi)
1071 # endif
1072 ret
1073
1074 .p2align 4
1075 L(StrncpyExit3):
1076 mov (%rsi), %cx
1077 mov 2(%rsi), %dl
1078 mov %cx, (%rdi)
1079 mov %dl, 2(%rdi)
1080 # ifdef USE_AS_STPCPY
1081 lea 3(%rdi), %rax
1082 # endif
1083 # ifdef USE_AS_STRCAT
1084 xor %ch, %ch
1085 movb %ch, 3(%rdi)
1086 # endif
1087 ret
1088
1089 .p2align 4
1090 L(StrncpyExit4):
1091 mov (%rsi), %edx
1092 mov %edx, (%rdi)
1093 # ifdef USE_AS_STPCPY
1094 lea 4(%rdi), %rax
1095 # endif
1096 # ifdef USE_AS_STRCAT
1097 xor %ch, %ch
1098 movb %ch, 4(%rdi)
1099 # endif
1100 ret
1101
1102 .p2align 4
1103 L(StrncpyExit5):
1104 mov (%rsi), %ecx
1105 mov 4(%rsi), %dl
1106 mov %ecx, (%rdi)
1107 mov %dl, 4(%rdi)
1108 # ifdef USE_AS_STPCPY
1109 lea 5(%rdi), %rax
1110 # endif
1111 # ifdef USE_AS_STRCAT
1112 xor %ch, %ch
1113 movb %ch, 5(%rdi)
1114 # endif
1115 ret
1116
1117 .p2align 4
1118 L(StrncpyExit6):
1119 mov (%rsi), %ecx
1120 mov 4(%rsi), %dx
1121 mov %ecx, (%rdi)
1122 mov %dx, 4(%rdi)
1123 # ifdef USE_AS_STPCPY
1124 lea 6(%rdi), %rax
1125 # endif
1126 # ifdef USE_AS_STRCAT
1127 xor %ch, %ch
1128 movb %ch, 6(%rdi)
1129 # endif
1130 ret
1131
1132 .p2align 4
1133 L(StrncpyExit7):
1134 mov (%rsi), %ecx
1135 mov 3(%rsi), %edx
1136 mov %ecx, (%rdi)
1137 mov %edx, 3(%rdi)
1138 # ifdef USE_AS_STPCPY
1139 lea 7(%rdi), %rax
1140 # endif
1141 # ifdef USE_AS_STRCAT
1142 xor %ch, %ch
1143 movb %ch, 7(%rdi)
1144 # endif
1145 ret
1146
1147 .p2align 4
1148 L(StrncpyExit8):
1149 mov (%rsi), %rdx
1150 mov %rdx, (%rdi)
1151 # ifdef USE_AS_STPCPY
1152 lea 8(%rdi), %rax
1153 # endif
1154 # ifdef USE_AS_STRCAT
1155 xor %ch, %ch
1156 movb %ch, 8(%rdi)
1157 # endif
1158 ret
1159
1160 .p2align 4
1161 L(StrncpyExit9):
1162 mov (%rsi), %rcx
1163 mov 8(%rsi), %dl
1164 mov %rcx, (%rdi)
1165 mov %dl, 8(%rdi)
1166 # ifdef USE_AS_STPCPY
1167 lea 9(%rdi), %rax
1168 # endif
1169 # ifdef USE_AS_STRCAT
1170 xor %ch, %ch
1171 movb %ch, 9(%rdi)
1172 # endif
1173 ret
1174
1175 .p2align 4
1176 L(StrncpyExit10):
1177 mov (%rsi), %rcx
1178 mov 8(%rsi), %dx
1179 mov %rcx, (%rdi)
1180 mov %dx, 8(%rdi)
1181 # ifdef USE_AS_STPCPY
1182 lea 10(%rdi), %rax
1183 # endif
1184 # ifdef USE_AS_STRCAT
1185 xor %ch, %ch
1186 movb %ch, 10(%rdi)
1187 # endif
1188 ret
1189
1190 .p2align 4
1191 L(StrncpyExit11):
1192 mov (%rsi), %rcx
1193 mov 7(%rsi), %edx
1194 mov %rcx, (%rdi)
1195 mov %edx, 7(%rdi)
1196 # ifdef USE_AS_STPCPY
1197 lea 11(%rdi), %rax
1198 # endif
1199 # ifdef USE_AS_STRCAT
1200 xor %ch, %ch
1201 movb %ch, 11(%rdi)
1202 # endif
1203 ret
1204
1205 .p2align 4
1206 L(StrncpyExit12):
1207 mov (%rsi), %rcx
1208 mov 8(%rsi), %edx
1209 mov %rcx, (%rdi)
1210 mov %edx, 8(%rdi)
1211 # ifdef USE_AS_STPCPY
1212 lea 12(%rdi), %rax
1213 # endif
1214 # ifdef USE_AS_STRCAT
1215 xor %ch, %ch
1216 movb %ch, 12(%rdi)
1217 # endif
1218 ret
1219
1220 .p2align 4
1221 L(StrncpyExit13):
1222 mov (%rsi), %rcx
1223 mov 5(%rsi), %rdx
1224 mov %rcx, (%rdi)
1225 mov %rdx, 5(%rdi)
1226 # ifdef USE_AS_STPCPY
1227 lea 13(%rdi), %rax
1228 # endif
1229 # ifdef USE_AS_STRCAT
1230 xor %ch, %ch
1231 movb %ch, 13(%rdi)
1232 # endif
1233 ret
1234
1235 .p2align 4
1236 L(StrncpyExit14):
1237 mov (%rsi), %rcx
1238 mov 6(%rsi), %rdx
1239 mov %rcx, (%rdi)
1240 mov %rdx, 6(%rdi)
1241 # ifdef USE_AS_STPCPY
1242 lea 14(%rdi), %rax
1243 # endif
1244 # ifdef USE_AS_STRCAT
1245 xor %ch, %ch
1246 movb %ch, 14(%rdi)
1247 # endif
1248 ret
1249
1250 .p2align 4
1251 L(StrncpyExit15):
1252 mov (%rsi), %rcx
1253 mov 7(%rsi), %rdx
1254 mov %rcx, (%rdi)
1255 mov %rdx, 7(%rdi)
1256 # ifdef USE_AS_STPCPY
1257 lea 15(%rdi), %rax
1258 # endif
1259 # ifdef USE_AS_STRCAT
1260 xor %ch, %ch
1261 movb %ch, 15(%rdi)
1262 # endif
1263 ret
1264
1265 .p2align 4
1266 L(StrncpyExit16):
1267 movdqu (%rsi), %xmm0
1268 movdqu %xmm0, (%rdi)
1269 # ifdef USE_AS_STPCPY
1270 lea 16(%rdi), %rax
1271 # endif
1272 # ifdef USE_AS_STRCAT
1273 xor %ch, %ch
1274 movb %ch, 16(%rdi)
1275 # endif
1276 ret
1277
1278 .p2align 4
1279 L(StrncpyExit17):
1280 movdqu (%rsi), %xmm0
1281 mov 16(%rsi), %cl
1282 movdqu %xmm0, (%rdi)
1283 mov %cl, 16(%rdi)
1284 # ifdef USE_AS_STPCPY
1285 lea 17(%rdi), %rax
1286 # endif
1287 # ifdef USE_AS_STRCAT
1288 xor %ch, %ch
1289 movb %ch, 17(%rdi)
1290 # endif
1291 ret
1292
1293 .p2align 4
1294 L(StrncpyExit18):
1295 movdqu (%rsi), %xmm0
1296 mov 16(%rsi), %cx
1297 movdqu %xmm0, (%rdi)
1298 mov %cx, 16(%rdi)
1299 # ifdef USE_AS_STPCPY
1300 lea 18(%rdi), %rax
1301 # endif
1302 # ifdef USE_AS_STRCAT
1303 xor %ch, %ch
1304 movb %ch, 18(%rdi)
1305 # endif
1306 ret
1307
1308 .p2align 4
1309 L(StrncpyExit19):
1310 movdqu (%rsi), %xmm0
1311 mov 15(%rsi), %ecx
1312 movdqu %xmm0, (%rdi)
1313 mov %ecx, 15(%rdi)
1314 # ifdef USE_AS_STPCPY
1315 lea 19(%rdi), %rax
1316 # endif
1317 # ifdef USE_AS_STRCAT
1318 xor %ch, %ch
1319 movb %ch, 19(%rdi)
1320 # endif
1321 ret
1322
1323 .p2align 4
1324 L(StrncpyExit20):
1325 movdqu (%rsi), %xmm0
1326 mov 16(%rsi), %ecx
1327 movdqu %xmm0, (%rdi)
1328 mov %ecx, 16(%rdi)
1329 # ifdef USE_AS_STPCPY
1330 lea 20(%rdi), %rax
1331 # endif
1332 # ifdef USE_AS_STRCAT
1333 xor %ch, %ch
1334 movb %ch, 20(%rdi)
1335 # endif
1336 ret
1337
1338 .p2align 4
1339 L(StrncpyExit21):
1340 movdqu (%rsi), %xmm0
1341 mov 16(%rsi), %ecx
1342 mov 20(%rsi), %dl
1343 movdqu %xmm0, (%rdi)
1344 mov %ecx, 16(%rdi)
1345 mov %dl, 20(%rdi)
1346 # ifdef USE_AS_STPCPY
1347 lea 21(%rdi), %rax
1348 # endif
1349 # ifdef USE_AS_STRCAT
1350 xor %ch, %ch
1351 movb %ch, 21(%rdi)
1352 # endif
1353 ret
1354
1355 .p2align 4
1356 L(StrncpyExit22):
1357 movdqu (%rsi), %xmm0
1358 mov 14(%rsi), %rcx
1359 movdqu %xmm0, (%rdi)
1360 mov %rcx, 14(%rdi)
1361 # ifdef USE_AS_STPCPY
1362 lea 22(%rdi), %rax
1363 # endif
1364 # ifdef USE_AS_STRCAT
1365 xor %ch, %ch
1366 movb %ch, 22(%rdi)
1367 # endif
1368 ret
1369
1370 .p2align 4
1371 L(StrncpyExit23):
1372 movdqu (%rsi), %xmm0
1373 mov 15(%rsi), %rcx
1374 movdqu %xmm0, (%rdi)
1375 mov %rcx, 15(%rdi)
1376 # ifdef USE_AS_STPCPY
1377 lea 23(%rdi), %rax
1378 # endif
1379 # ifdef USE_AS_STRCAT
1380 xor %ch, %ch
1381 movb %ch, 23(%rdi)
1382 # endif
1383 ret
1384
1385 .p2align 4
1386 L(StrncpyExit24):
1387 movdqu (%rsi), %xmm0
1388 mov 16(%rsi), %rcx
1389 movdqu %xmm0, (%rdi)
1390 mov %rcx, 16(%rdi)
1391 # ifdef USE_AS_STPCPY
1392 lea 24(%rdi), %rax
1393 # endif
1394 # ifdef USE_AS_STRCAT
1395 xor %ch, %ch
1396 movb %ch, 24(%rdi)
1397 # endif
1398 ret
1399
1400 .p2align 4
1401 L(StrncpyExit25):
1402 movdqu (%rsi), %xmm0
1403 mov 16(%rsi), %rdx
1404 mov 24(%rsi), %cl
1405 movdqu %xmm0, (%rdi)
1406 mov %rdx, 16(%rdi)
1407 mov %cl, 24(%rdi)
1408 # ifdef USE_AS_STPCPY
1409 lea 25(%rdi), %rax
1410 # endif
1411 # ifdef USE_AS_STRCAT
1412 xor %ch, %ch
1413 movb %ch, 25(%rdi)
1414 # endif
1415 ret
1416
1417 .p2align 4
1418 L(StrncpyExit26):
1419 movdqu (%rsi), %xmm0
1420 mov 16(%rsi), %rdx
1421 mov 24(%rsi), %cx
1422 movdqu %xmm0, (%rdi)
1423 mov %rdx, 16(%rdi)
1424 mov %cx, 24(%rdi)
1425 # ifdef USE_AS_STPCPY
1426 lea 26(%rdi), %rax
1427 # endif
1428 # ifdef USE_AS_STRCAT
1429 xor %ch, %ch
1430 movb %ch, 26(%rdi)
1431 # endif
1432 ret
1433
1434 .p2align 4
1435 L(StrncpyExit27):
1436 movdqu (%rsi), %xmm0
1437 mov 16(%rsi), %rdx
1438 mov 23(%rsi), %ecx
1439 movdqu %xmm0, (%rdi)
1440 mov %rdx, 16(%rdi)
1441 mov %ecx, 23(%rdi)
1442 # ifdef USE_AS_STPCPY
1443 lea 27(%rdi), %rax
1444 # endif
1445 # ifdef USE_AS_STRCAT
1446 xor %ch, %ch
1447 movb %ch, 27(%rdi)
1448 # endif
1449 ret
1450
1451 .p2align 4
1452 L(StrncpyExit28):
1453 movdqu (%rsi), %xmm0
1454 mov 16(%rsi), %rdx
1455 mov 24(%rsi), %ecx
1456 movdqu %xmm0, (%rdi)
1457 mov %rdx, 16(%rdi)
1458 mov %ecx, 24(%rdi)
1459 # ifdef USE_AS_STPCPY
1460 lea 28(%rdi), %rax
1461 # endif
1462 # ifdef USE_AS_STRCAT
1463 xor %ch, %ch
1464 movb %ch, 28(%rdi)
1465 # endif
1466 ret
1467
1468 .p2align 4
1469 L(StrncpyExit29):
1470 movdqu (%rsi), %xmm0
1471 movdqu 13(%rsi), %xmm2
1472 movdqu %xmm0, (%rdi)
1473 movdqu %xmm2, 13(%rdi)
1474 # ifdef USE_AS_STPCPY
1475 lea 29(%rdi), %rax
1476 # endif
1477 # ifdef USE_AS_STRCAT
1478 xor %ch, %ch
1479 movb %ch, 29(%rdi)
1480 # endif
1481 ret
1482
1483 .p2align 4
1484 L(StrncpyExit30):
1485 movdqu (%rsi), %xmm0
1486 movdqu 14(%rsi), %xmm2
1487 movdqu %xmm0, (%rdi)
1488 movdqu %xmm2, 14(%rdi)
1489 # ifdef USE_AS_STPCPY
1490 lea 30(%rdi), %rax
1491 # endif
1492 # ifdef USE_AS_STRCAT
1493 xor %ch, %ch
1494 movb %ch, 30(%rdi)
1495 # endif
1496 ret
1497
1498 .p2align 4
1499 L(StrncpyExit31):
1500 movdqu (%rsi), %xmm0
1501 movdqu 15(%rsi), %xmm2
1502 movdqu %xmm0, (%rdi)
1503 movdqu %xmm2, 15(%rdi)
1504 # ifdef USE_AS_STPCPY
1505 lea 31(%rdi), %rax
1506 # endif
1507 # ifdef USE_AS_STRCAT
1508 xor %ch, %ch
1509 movb %ch, 31(%rdi)
1510 # endif
1511 ret
1512
1513 .p2align 4
1514 L(StrncpyExit32):
1515 movdqu (%rsi), %xmm0
1516 movdqu 16(%rsi), %xmm2
1517 movdqu %xmm0, (%rdi)
1518 movdqu %xmm2, 16(%rdi)
1519 # ifdef USE_AS_STPCPY
1520 lea 32(%rdi), %rax
1521 # endif
1522 # ifdef USE_AS_STRCAT
1523 xor %ch, %ch
1524 movb %ch, 32(%rdi)
1525 # endif
1526 ret
1527
1528 .p2align 4
1529 L(StrncpyExit33):
1530 movdqu (%rsi), %xmm0
1531 movdqu 16(%rsi), %xmm2
1532 mov 32(%rsi), %cl
1533 movdqu %xmm0, (%rdi)
1534 movdqu %xmm2, 16(%rdi)
1535 mov %cl, 32(%rdi)
1536 # ifdef USE_AS_STRCAT
1537 xor %ch, %ch
1538 movb %ch, 33(%rdi)
1539 # endif
1540 ret
1541
1542 # ifndef USE_AS_STRCAT
1543
1544 .p2align 4
1545 L(Fill0):
1546 ret
1547
1548 .p2align 4
1549 L(Fill1):
1550 mov %dl, (%rdi)
1551 ret
1552
1553 .p2align 4
1554 L(Fill2):
1555 mov %dx, (%rdi)
1556 ret
1557
1558 .p2align 4
1559 L(Fill3):
1560 mov %edx, -1(%rdi)
1561 ret
1562
1563 .p2align 4
1564 L(Fill4):
1565 mov %edx, (%rdi)
1566 ret
1567
1568 .p2align 4
1569 L(Fill5):
1570 mov %edx, (%rdi)
1571 mov %dl, 4(%rdi)
1572 ret
1573
1574 .p2align 4
1575 L(Fill6):
1576 mov %edx, (%rdi)
1577 mov %dx, 4(%rdi)
1578 ret
1579
1580 .p2align 4
1581 L(Fill7):
1582 mov %rdx, -1(%rdi)
1583 ret
1584
1585 .p2align 4
1586 L(Fill8):
1587 mov %rdx, (%rdi)
1588 ret
1589
1590 .p2align 4
1591 L(Fill9):
1592 mov %rdx, (%rdi)
1593 mov %dl, 8(%rdi)
1594 ret
1595
1596 .p2align 4
1597 L(Fill10):
1598 mov %rdx, (%rdi)
1599 mov %dx, 8(%rdi)
1600 ret
1601
1602 .p2align 4
1603 L(Fill11):
1604 mov %rdx, (%rdi)
1605 mov %edx, 7(%rdi)
1606 ret
1607
1608 .p2align 4
1609 L(Fill12):
1610 mov %rdx, (%rdi)
1611 mov %edx, 8(%rdi)
1612 ret
1613
1614 .p2align 4
1615 L(Fill13):
1616 mov %rdx, (%rdi)
1617 mov %rdx, 5(%rdi)
1618 ret
1619
1620 .p2align 4
1621 L(Fill14):
1622 mov %rdx, (%rdi)
1623 mov %rdx, 6(%rdi)
1624 ret
1625
1626 .p2align 4
1627 L(Fill15):
1628 movdqu %xmm0, -1(%rdi)
1629 ret
1630
1631 .p2align 4
1632 L(Fill16):
1633 movdqu %xmm0, (%rdi)
1634 ret
1635
1636 .p2align 4
1637 L(CopyFrom1To16BytesUnalignedXmm2):
1638 movdqu %xmm2, (%rdi, %rcx)
1639
1640 .p2align 4
1641 L(CopyFrom1To16BytesXmmExit):
1642 bsf %rdx, %rdx
1643 add $15, %r8
1644 add %rcx, %rdi
1645 # ifdef USE_AS_STPCPY
1646 lea (%rdi, %rdx), %rax
1647 # endif
1648 sub %rdx, %r8
1649 lea 1(%rdi, %rdx), %rdi
1650
1651 .p2align 4
1652 L(StrncpyFillTailWithZero):
1653 pxor %xmm0, %xmm0
1654 xor %rdx, %rdx
1655 sub $16, %r8
1656 jbe L(StrncpyFillExit)
1657
1658 movdqu %xmm0, (%rdi)
1659 add $16, %rdi
1660
1661 mov %rdi, %rsi
1662 and $0xf, %rsi
1663 sub %rsi, %rdi
1664 add %rsi, %r8
1665 sub $64, %r8
1666 jb L(StrncpyFillLess64)
1667
1668 L(StrncpyFillLoopMovdqa):
1669 movdqa %xmm0, (%rdi)
1670 movdqa %xmm0, 16(%rdi)
1671 movdqa %xmm0, 32(%rdi)
1672 movdqa %xmm0, 48(%rdi)
1673 add $64, %rdi
1674 sub $64, %r8
1675 jae L(StrncpyFillLoopMovdqa)
1676
1677 L(StrncpyFillLess64):
1678 add $32, %r8
1679 jl L(StrncpyFillLess32)
1680 movdqa %xmm0, (%rdi)
1681 movdqa %xmm0, 16(%rdi)
1682 add $32, %rdi
1683 sub $16, %r8
1684 jl L(StrncpyFillExit)
1685 movdqa %xmm0, (%rdi)
1686 add $16, %rdi
1687 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1688
1689 L(StrncpyFillLess32):
1690 add $16, %r8
1691 jl L(StrncpyFillExit)
1692 movdqa %xmm0, (%rdi)
1693 add $16, %rdi
1694 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1695
1696 L(StrncpyFillExit):
1697 add $16, %r8
1698 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1699
1700 /* end of ifndef USE_AS_STRCAT */
1701 # endif
1702
1703 .p2align 4
1704 L(UnalignedLeaveCase2OrCase3):
1705 test %rdx, %rdx
1706 jnz L(Unaligned64LeaveCase2)
1707 L(Unaligned64LeaveCase3):
1708 lea 64(%r8), %rcx
1709 and $-16, %rcx
1710 add $48, %r8
1711 jl L(CopyFrom1To16BytesCase3)
1712 movdqu %xmm4, (%rdi)
1713 sub $16, %r8
1714 jb L(CopyFrom1To16BytesCase3)
1715 movdqu %xmm5, 16(%rdi)
1716 sub $16, %r8
1717 jb L(CopyFrom1To16BytesCase3)
1718 movdqu %xmm6, 32(%rdi)
1719 sub $16, %r8
1720 jb L(CopyFrom1To16BytesCase3)
1721 movdqu %xmm7, 48(%rdi)
1722 # ifdef USE_AS_STPCPY
1723 lea 64(%rdi), %rax
1724 # endif
1725 # ifdef USE_AS_STRCAT
1726 xor %ch, %ch
1727 movb %ch, 64(%rdi)
1728 # endif
1729 ret
1730
1731 .p2align 4
1732 L(Unaligned64LeaveCase2):
1733 xor %rcx, %rcx
1734 pcmpeqb %xmm4, %xmm0
1735 pmovmskb %xmm0, %rdx
1736 add $48, %r8
1737 jle L(CopyFrom1To16BytesCase2OrCase3)
1738 test %rdx, %rdx
1739 # ifndef USE_AS_STRCAT
1740 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1741 # else
1742 jnz L(CopyFrom1To16Bytes)
1743 # endif
1744 pcmpeqb %xmm5, %xmm0
1745 pmovmskb %xmm0, %rdx
1746 movdqu %xmm4, (%rdi)
1747 add $16, %rcx
1748 sub $16, %r8
1749 jbe L(CopyFrom1To16BytesCase2OrCase3)
1750 test %rdx, %rdx
1751 # ifndef USE_AS_STRCAT
1752 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1753 # else
1754 jnz L(CopyFrom1To16Bytes)
1755 # endif
1756
1757 pcmpeqb %xmm6, %xmm0
1758 pmovmskb %xmm0, %rdx
1759 movdqu %xmm5, 16(%rdi)
1760 add $16, %rcx
1761 sub $16, %r8
1762 jbe L(CopyFrom1To16BytesCase2OrCase3)
1763 test %rdx, %rdx
1764 # ifndef USE_AS_STRCAT
1765 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1766 # else
1767 jnz L(CopyFrom1To16Bytes)
1768 # endif
1769
1770 pcmpeqb %xmm7, %xmm0
1771 pmovmskb %xmm0, %rdx
1772 movdqu %xmm6, 32(%rdi)
1773 lea 16(%rdi, %rcx), %rdi
1774 lea 16(%rsi, %rcx), %rsi
1775 bsf %rdx, %rdx
1776 cmp %r8, %rdx
1777 jb L(CopyFrom1To16BytesExit)
1778 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1779
1780 .p2align 4
1781 L(ExitZero):
1782 # ifndef USE_AS_STRCAT
1783 mov %rdi, %rax
1784 # endif
1785 ret
1786
1787 # endif
1788
1789 # ifndef USE_AS_STRCAT
1790 END (STRCPY)
1791 # else
1792 END (STRCAT)
1793 # endif
1794 .p2align 4
1795 .section .rodata
1796 L(ExitTable):
1797 .int JMPTBL(L(Exit1), L(ExitTable))
1798 .int JMPTBL(L(Exit2), L(ExitTable))
1799 .int JMPTBL(L(Exit3), L(ExitTable))
1800 .int JMPTBL(L(Exit4), L(ExitTable))
1801 .int JMPTBL(L(Exit5), L(ExitTable))
1802 .int JMPTBL(L(Exit6), L(ExitTable))
1803 .int JMPTBL(L(Exit7), L(ExitTable))
1804 .int JMPTBL(L(Exit8), L(ExitTable))
1805 .int JMPTBL(L(Exit9), L(ExitTable))
1806 .int JMPTBL(L(Exit10), L(ExitTable))
1807 .int JMPTBL(L(Exit11), L(ExitTable))
1808 .int JMPTBL(L(Exit12), L(ExitTable))
1809 .int JMPTBL(L(Exit13), L(ExitTable))
1810 .int JMPTBL(L(Exit14), L(ExitTable))
1811 .int JMPTBL(L(Exit15), L(ExitTable))
1812 .int JMPTBL(L(Exit16), L(ExitTable))
1813 .int JMPTBL(L(Exit17), L(ExitTable))
1814 .int JMPTBL(L(Exit18), L(ExitTable))
1815 .int JMPTBL(L(Exit19), L(ExitTable))
1816 .int JMPTBL(L(Exit20), L(ExitTable))
1817 .int JMPTBL(L(Exit21), L(ExitTable))
1818 .int JMPTBL(L(Exit22), L(ExitTable))
1819 .int JMPTBL(L(Exit23), L(ExitTable))
1820 .int JMPTBL(L(Exit24), L(ExitTable))
1821 .int JMPTBL(L(Exit25), L(ExitTable))
1822 .int JMPTBL(L(Exit26), L(ExitTable))
1823 .int JMPTBL(L(Exit27), L(ExitTable))
1824 .int JMPTBL(L(Exit28), L(ExitTable))
1825 .int JMPTBL(L(Exit29), L(ExitTable))
1826 .int JMPTBL(L(Exit30), L(ExitTable))
1827 .int JMPTBL(L(Exit31), L(ExitTable))
1828 .int JMPTBL(L(Exit32), L(ExitTable))
1829 # ifdef USE_AS_STRNCPY
1830 L(ExitStrncpyTable):
1831 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1832 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1833 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1834 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1865 # ifndef USE_AS_STRCAT
1866 .p2align 4
1867 L(FillTable):
1868 .int JMPTBL(L(Fill0), L(FillTable))
1869 .int JMPTBL(L(Fill1), L(FillTable))
1870 .int JMPTBL(L(Fill2), L(FillTable))
1871 .int JMPTBL(L(Fill3), L(FillTable))
1872 .int JMPTBL(L(Fill4), L(FillTable))
1873 .int JMPTBL(L(Fill5), L(FillTable))
1874 .int JMPTBL(L(Fill6), L(FillTable))
1875 .int JMPTBL(L(Fill7), L(FillTable))
1876 .int JMPTBL(L(Fill8), L(FillTable))
1877 .int JMPTBL(L(Fill9), L(FillTable))
1878 .int JMPTBL(L(Fill10), L(FillTable))
1879 .int JMPTBL(L(Fill11), L(FillTable))
1880 .int JMPTBL(L(Fill12), L(FillTable))
1881 .int JMPTBL(L(Fill13), L(FillTable))
1882 .int JMPTBL(L(Fill14), L(FillTable))
1883 .int JMPTBL(L(Fill15), L(FillTable))
1884 .int JMPTBL(L(Fill16), L(FillTable))
1885 # endif
1886 # endif
1887 #endif