]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/strcpy-ssse3.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / strcpy-ssse3.S
1 /* strcpy with SSSE3
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21
22 # ifndef USE_AS_STRCAT
23 # include <sysdep.h>
24
25 # ifndef STRCPY
26 # define STRCPY __strcpy_ssse3
27 # endif
28
29 .section .text.ssse3,"ax",@progbits
30 ENTRY (STRCPY)
31
32 mov %rsi, %rcx
33 # ifdef USE_AS_STRNCPY
34 mov %rdx, %r8
35 # endif
36 mov %rdi, %rdx
37 # ifdef USE_AS_STRNCPY
38 test %r8, %r8
39 jz L(Exit0)
40 cmp $8, %r8
41 jbe L(StrncpyExit8Bytes)
42 # endif
43 cmpb $0, (%rcx)
44 jz L(Exit1)
45 cmpb $0, 1(%rcx)
46 jz L(Exit2)
47 cmpb $0, 2(%rcx)
48 jz L(Exit3)
49 cmpb $0, 3(%rcx)
50 jz L(Exit4)
51 cmpb $0, 4(%rcx)
52 jz L(Exit5)
53 cmpb $0, 5(%rcx)
54 jz L(Exit6)
55 cmpb $0, 6(%rcx)
56 jz L(Exit7)
57 cmpb $0, 7(%rcx)
58 jz L(Exit8)
59 # ifdef USE_AS_STRNCPY
60 cmp $16, %r8
61 jb L(StrncpyExit15Bytes)
62 # endif
63 cmpb $0, 8(%rcx)
64 jz L(Exit9)
65 cmpb $0, 9(%rcx)
66 jz L(Exit10)
67 cmpb $0, 10(%rcx)
68 jz L(Exit11)
69 cmpb $0, 11(%rcx)
70 jz L(Exit12)
71 cmpb $0, 12(%rcx)
72 jz L(Exit13)
73 cmpb $0, 13(%rcx)
74 jz L(Exit14)
75 cmpb $0, 14(%rcx)
76 jz L(Exit15)
77 # ifdef USE_AS_STRNCPY
78 cmp $16, %r8
79 je L(Exit16)
80 # endif
81 cmpb $0, 15(%rcx)
82 jz L(Exit16)
83 # endif
84
85 # ifdef USE_AS_STRNCPY
86 mov %rcx, %rsi
87 sub $16, %r8
88 and $0xf, %rsi
89
90 /* add 16 bytes rcx_offset to r8 */
91
92 add %rsi, %r8
93 # endif
94 lea 16(%rcx), %rsi
95 and $-16, %rsi
96 pxor %xmm0, %xmm0
97 mov (%rcx), %r9
98 mov %r9, (%rdx)
99 pcmpeqb (%rsi), %xmm0
100 mov 8(%rcx), %r9
101 mov %r9, 8(%rdx)
102
103 /* convert byte mask in xmm0 to bit mask */
104
105 pmovmskb %xmm0, %rax
106 sub %rcx, %rsi
107
108 # ifdef USE_AS_STRNCPY
109 sub $16, %r8
110 jbe L(CopyFrom1To16BytesCase2OrCase3)
111 # endif
112 test %rax, %rax
113 jnz L(CopyFrom1To16Bytes)
114
115 mov %rdx, %rax
116 lea 16(%rdx), %rdx
117 and $-16, %rdx
118 sub %rdx, %rax
119
120 # ifdef USE_AS_STRNCPY
121 add %rax, %rsi
122 lea -1(%rsi), %rsi
123 and $1<<31, %esi
124 test %rsi, %rsi
125 jnz L(ContinueCopy)
126 lea 16(%r8), %r8
127
128 L(ContinueCopy):
129 # endif
130 sub %rax, %rcx
131 mov %rcx, %rax
132 and $0xf, %rax
133 mov $0, %rsi
134
135 /* case: rcx_offset == rdx_offset */
136
137 jz L(Align16Both)
138
139 cmp $8, %rax
140 jae L(ShlHigh8)
141 cmp $1, %rax
142 je L(Shl1)
143 cmp $2, %rax
144 je L(Shl2)
145 cmp $3, %rax
146 je L(Shl3)
147 cmp $4, %rax
148 je L(Shl4)
149 cmp $5, %rax
150 je L(Shl5)
151 cmp $6, %rax
152 je L(Shl6)
153 jmp L(Shl7)
154
155 L(ShlHigh8):
156 je L(Shl8)
157 cmp $9, %rax
158 je L(Shl9)
159 cmp $10, %rax
160 je L(Shl10)
161 cmp $11, %rax
162 je L(Shl11)
163 cmp $12, %rax
164 je L(Shl12)
165 cmp $13, %rax
166 je L(Shl13)
167 cmp $14, %rax
168 je L(Shl14)
169 jmp L(Shl15)
170
171 L(Align16Both):
172 movaps (%rcx), %xmm1
173 movaps 16(%rcx), %xmm2
174 movaps %xmm1, (%rdx)
175 pcmpeqb %xmm2, %xmm0
176 pmovmskb %xmm0, %rax
177 lea 16(%rsi), %rsi
178 # ifdef USE_AS_STRNCPY
179 sub $16, %r8
180 jbe L(CopyFrom1To16BytesCase2OrCase3)
181 # endif
182 test %rax, %rax
183 jnz L(CopyFrom1To16Bytes)
184
185 movaps 16(%rcx, %rsi), %xmm3
186 movaps %xmm2, (%rdx, %rsi)
187 pcmpeqb %xmm3, %xmm0
188 pmovmskb %xmm0, %rax
189 lea 16(%rsi), %rsi
190 # ifdef USE_AS_STRNCPY
191 sub $16, %r8
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
193 # endif
194 test %rax, %rax
195 jnz L(CopyFrom1To16Bytes)
196
197 movaps 16(%rcx, %rsi), %xmm4
198 movaps %xmm3, (%rdx, %rsi)
199 pcmpeqb %xmm4, %xmm0
200 pmovmskb %xmm0, %rax
201 lea 16(%rsi), %rsi
202 # ifdef USE_AS_STRNCPY
203 sub $16, %r8
204 jbe L(CopyFrom1To16BytesCase2OrCase3)
205 # endif
206 test %rax, %rax
207 jnz L(CopyFrom1To16Bytes)
208
209 movaps 16(%rcx, %rsi), %xmm1
210 movaps %xmm4, (%rdx, %rsi)
211 pcmpeqb %xmm1, %xmm0
212 pmovmskb %xmm0, %rax
213 lea 16(%rsi), %rsi
214 # ifdef USE_AS_STRNCPY
215 sub $16, %r8
216 jbe L(CopyFrom1To16BytesCase2OrCase3)
217 # endif
218 test %rax, %rax
219 jnz L(CopyFrom1To16Bytes)
220
221 movaps 16(%rcx, %rsi), %xmm2
222 movaps %xmm1, (%rdx, %rsi)
223 pcmpeqb %xmm2, %xmm0
224 pmovmskb %xmm0, %rax
225 lea 16(%rsi), %rsi
226 # ifdef USE_AS_STRNCPY
227 sub $16, %r8
228 jbe L(CopyFrom1To16BytesCase2OrCase3)
229 # endif
230 test %rax, %rax
231 jnz L(CopyFrom1To16Bytes)
232
233 movaps 16(%rcx, %rsi), %xmm3
234 movaps %xmm2, (%rdx, %rsi)
235 pcmpeqb %xmm3, %xmm0
236 pmovmskb %xmm0, %rax
237 lea 16(%rsi), %rsi
238 # ifdef USE_AS_STRNCPY
239 sub $16, %r8
240 jbe L(CopyFrom1To16BytesCase2OrCase3)
241 # endif
242 test %rax, %rax
243 jnz L(CopyFrom1To16Bytes)
244
245 movaps %xmm3, (%rdx, %rsi)
246 mov %rcx, %rax
247 lea 16(%rcx, %rsi), %rcx
248 and $-0x40, %rcx
249 sub %rcx, %rax
250 sub %rax, %rdx
251 # ifdef USE_AS_STRNCPY
252 lea 112(%r8, %rax), %r8
253 # endif
254 mov $-0x40, %rsi
255
256 .p2align 4
257 L(Aligned64Loop):
258 movaps (%rcx), %xmm2
259 movaps %xmm2, %xmm4
260 movaps 16(%rcx), %xmm5
261 movaps 32(%rcx), %xmm3
262 movaps %xmm3, %xmm6
263 movaps 48(%rcx), %xmm7
264 pminub %xmm5, %xmm2
265 pminub %xmm7, %xmm3
266 pminub %xmm2, %xmm3
267 pcmpeqb %xmm0, %xmm3
268 pmovmskb %xmm3, %rax
269 lea 64(%rdx), %rdx
270 lea 64(%rcx), %rcx
271 # ifdef USE_AS_STRNCPY
272 sub $64, %r8
273 jbe L(StrncpyLeaveCase2OrCase3)
274 # endif
275 test %rax, %rax
276 jnz L(Aligned64Leave)
277 movaps %xmm4, -64(%rdx)
278 movaps %xmm5, -48(%rdx)
279 movaps %xmm6, -32(%rdx)
280 movaps %xmm7, -16(%rdx)
281 jmp L(Aligned64Loop)
282
283 L(Aligned64Leave):
284 # ifdef USE_AS_STRNCPY
285 lea 48(%r8), %r8
286 # endif
287 pcmpeqb %xmm4, %xmm0
288 pmovmskb %xmm0, %rax
289 test %rax, %rax
290 jnz L(CopyFrom1To16Bytes)
291
292 pcmpeqb %xmm5, %xmm0
293 # ifdef USE_AS_STRNCPY
294 lea -16(%r8), %r8
295 # endif
296 pmovmskb %xmm0, %rax
297 movaps %xmm4, -64(%rdx)
298 test %rax, %rax
299 lea 16(%rsi), %rsi
300 jnz L(CopyFrom1To16Bytes)
301
302 pcmpeqb %xmm6, %xmm0
303 # ifdef USE_AS_STRNCPY
304 lea -16(%r8), %r8
305 # endif
306 pmovmskb %xmm0, %rax
307 movaps %xmm5, -48(%rdx)
308 test %rax, %rax
309 lea 16(%rsi), %rsi
310 jnz L(CopyFrom1To16Bytes)
311
312 movaps %xmm6, -32(%rdx)
313 pcmpeqb %xmm7, %xmm0
314 # ifdef USE_AS_STRNCPY
315 lea -16(%r8), %r8
316 # endif
317 pmovmskb %xmm0, %rax
318 lea 16(%rsi), %rsi
319 jmp L(CopyFrom1To16Bytes)
320
321 .p2align 4
322 L(Shl1):
323 movaps -1(%rcx), %xmm1
324 movaps 15(%rcx), %xmm2
325 L(Shl1Start):
326 pcmpeqb %xmm2, %xmm0
327 pmovmskb %xmm0, %rax
328 movaps %xmm2, %xmm3
329 # ifdef USE_AS_STRNCPY
330 sub $16, %r8
331 jbe L(StrncpyExit1Case2OrCase3)
332 # endif
333 test %rax, %rax
334 jnz L(Shl1LoopExit)
335
336 palignr $1, %xmm1, %xmm2
337 movaps %xmm2, (%rdx)
338 movaps 31(%rcx), %xmm2
339
340 pcmpeqb %xmm2, %xmm0
341 lea 16(%rdx), %rdx
342 pmovmskb %xmm0, %rax
343 lea 16(%rcx), %rcx
344 movaps %xmm2, %xmm1
345 # ifdef USE_AS_STRNCPY
346 sub $16, %r8
347 jbe L(StrncpyExit1Case2OrCase3)
348 # endif
349 test %rax, %rax
350 jnz L(Shl1LoopExit)
351
352 palignr $1, %xmm3, %xmm2
353 movaps %xmm2, (%rdx)
354 movaps 31(%rcx), %xmm2
355
356 pcmpeqb %xmm2, %xmm0
357 lea 16(%rdx), %rdx
358 pmovmskb %xmm0, %rax
359 lea 16(%rcx), %rcx
360 movaps %xmm2, %xmm3
361 # ifdef USE_AS_STRNCPY
362 sub $16, %r8
363 jbe L(StrncpyExit1Case2OrCase3)
364 # endif
365 test %rax, %rax
366 jnz L(Shl1LoopExit)
367
368 palignr $1, %xmm1, %xmm2
369 movaps %xmm2, (%rdx)
370 movaps 31(%rcx), %xmm2
371
372 pcmpeqb %xmm2, %xmm0
373 lea 16(%rdx), %rdx
374 pmovmskb %xmm0, %rax
375 lea 16(%rcx), %rcx
376 # ifdef USE_AS_STRNCPY
377 sub $16, %r8
378 jbe L(StrncpyExit1Case2OrCase3)
379 # endif
380 test %rax, %rax
381 jnz L(Shl1LoopExit)
382
383 palignr $1, %xmm3, %xmm2
384 movaps %xmm2, (%rdx)
385 lea 31(%rcx), %rcx
386 lea 16(%rdx), %rdx
387
388 mov %rcx, %rax
389 and $-0x40, %rcx
390 sub %rcx, %rax
391 lea -15(%rcx), %rcx
392 sub %rax, %rdx
393 # ifdef USE_AS_STRNCPY
394 add %rax, %r8
395 # endif
396 movaps -1(%rcx), %xmm1
397
398 /* 64 bytes loop */
399 .p2align 4
400 L(Shl1LoopStart):
401 movaps 15(%rcx), %xmm2
402 movaps 31(%rcx), %xmm3
403 movaps %xmm3, %xmm6
404 movaps 47(%rcx), %xmm4
405 movaps %xmm4, %xmm7
406 movaps 63(%rcx), %xmm5
407 pminub %xmm2, %xmm6
408 pminub %xmm5, %xmm7
409 pminub %xmm6, %xmm7
410 pcmpeqb %xmm0, %xmm7
411 pmovmskb %xmm7, %rax
412 movaps %xmm5, %xmm7
413 palignr $1, %xmm4, %xmm5
414 test %rax, %rax
415 palignr $1, %xmm3, %xmm4
416 jnz L(Shl1Start)
417 # ifdef USE_AS_STRNCPY
418 sub $64, %r8
419 jbe L(StrncpyLeave1)
420 # endif
421 palignr $1, %xmm2, %xmm3
422 lea 64(%rcx), %rcx
423 palignr $1, %xmm1, %xmm2
424 movaps %xmm7, %xmm1
425 movaps %xmm5, 48(%rdx)
426 movaps %xmm4, 32(%rdx)
427 movaps %xmm3, 16(%rdx)
428 movaps %xmm2, (%rdx)
429 lea 64(%rdx), %rdx
430 jmp L(Shl1LoopStart)
431
432 L(Shl1LoopExit):
433 movdqu -1(%rcx), %xmm1
434 mov $15, %rsi
435 movdqu %xmm1, -1(%rdx)
436 jmp L(CopyFrom1To16Bytes)
437
438 .p2align 4
439 L(Shl2):
440 movaps -2(%rcx), %xmm1
441 movaps 14(%rcx), %xmm2
442 L(Shl2Start):
443 pcmpeqb %xmm2, %xmm0
444 pmovmskb %xmm0, %rax
445 movaps %xmm2, %xmm3
446 # ifdef USE_AS_STRNCPY
447 sub $16, %r8
448 jbe L(StrncpyExit2Case2OrCase3)
449 # endif
450 test %rax, %rax
451 jnz L(Shl2LoopExit)
452
453 palignr $2, %xmm1, %xmm2
454 movaps %xmm2, (%rdx)
455 movaps 30(%rcx), %xmm2
456
457 pcmpeqb %xmm2, %xmm0
458 lea 16(%rdx), %rdx
459 pmovmskb %xmm0, %rax
460 lea 16(%rcx), %rcx
461 movaps %xmm2, %xmm1
462 # ifdef USE_AS_STRNCPY
463 sub $16, %r8
464 jbe L(StrncpyExit2Case2OrCase3)
465 # endif
466 test %rax, %rax
467 jnz L(Shl2LoopExit)
468
469 palignr $2, %xmm3, %xmm2
470 movaps %xmm2, (%rdx)
471 movaps 30(%rcx), %xmm2
472
473 pcmpeqb %xmm2, %xmm0
474 lea 16(%rdx), %rdx
475 pmovmskb %xmm0, %rax
476 lea 16(%rcx), %rcx
477 movaps %xmm2, %xmm3
478 # ifdef USE_AS_STRNCPY
479 sub $16, %r8
480 jbe L(StrncpyExit2Case2OrCase3)
481 # endif
482 test %rax, %rax
483 jnz L(Shl2LoopExit)
484
485 palignr $2, %xmm1, %xmm2
486 movaps %xmm2, (%rdx)
487 movaps 30(%rcx), %xmm2
488
489 pcmpeqb %xmm2, %xmm0
490 lea 16(%rdx), %rdx
491 pmovmskb %xmm0, %rax
492 lea 16(%rcx), %rcx
493 # ifdef USE_AS_STRNCPY
494 sub $16, %r8
495 jbe L(StrncpyExit2Case2OrCase3)
496 # endif
497 test %rax, %rax
498 jnz L(Shl2LoopExit)
499
500 palignr $2, %xmm3, %xmm2
501 movaps %xmm2, (%rdx)
502 lea 30(%rcx), %rcx
503 lea 16(%rdx), %rdx
504
505 mov %rcx, %rax
506 and $-0x40, %rcx
507 sub %rcx, %rax
508 lea -14(%rcx), %rcx
509 sub %rax, %rdx
510 # ifdef USE_AS_STRNCPY
511 add %rax, %r8
512 # endif
513 movaps -2(%rcx), %xmm1
514
515 /* 64 bytes loop */
516 .p2align 4
517 L(Shl2LoopStart):
518 movaps 14(%rcx), %xmm2
519 movaps 30(%rcx), %xmm3
520 movaps %xmm3, %xmm6
521 movaps 46(%rcx), %xmm4
522 movaps %xmm4, %xmm7
523 movaps 62(%rcx), %xmm5
524 pminub %xmm2, %xmm6
525 pminub %xmm5, %xmm7
526 pminub %xmm6, %xmm7
527 pcmpeqb %xmm0, %xmm7
528 pmovmskb %xmm7, %rax
529 movaps %xmm5, %xmm7
530 palignr $2, %xmm4, %xmm5
531 test %rax, %rax
532 palignr $2, %xmm3, %xmm4
533 jnz L(Shl2Start)
534 # ifdef USE_AS_STRNCPY
535 sub $64, %r8
536 jbe L(StrncpyLeave2)
537 # endif
538 palignr $2, %xmm2, %xmm3
539 lea 64(%rcx), %rcx
540 palignr $2, %xmm1, %xmm2
541 movaps %xmm7, %xmm1
542 movaps %xmm5, 48(%rdx)
543 movaps %xmm4, 32(%rdx)
544 movaps %xmm3, 16(%rdx)
545 movaps %xmm2, (%rdx)
546 lea 64(%rdx), %rdx
547 jmp L(Shl2LoopStart)
548
549 L(Shl2LoopExit):
550 movdqu -2(%rcx), %xmm1
551 mov $14, %rsi
552 movdqu %xmm1, -2(%rdx)
553 jmp L(CopyFrom1To16Bytes)
554
555 .p2align 4
556 L(Shl3):
557 movaps -3(%rcx), %xmm1
558 movaps 13(%rcx), %xmm2
559 L(Shl3Start):
560 pcmpeqb %xmm2, %xmm0
561 pmovmskb %xmm0, %rax
562 movaps %xmm2, %xmm3
563 # ifdef USE_AS_STRNCPY
564 sub $16, %r8
565 jbe L(StrncpyExit3Case2OrCase3)
566 # endif
567 test %rax, %rax
568 jnz L(Shl3LoopExit)
569
570 palignr $3, %xmm1, %xmm2
571 movaps %xmm2, (%rdx)
572 movaps 29(%rcx), %xmm2
573
574 pcmpeqb %xmm2, %xmm0
575 lea 16(%rdx), %rdx
576 pmovmskb %xmm0, %rax
577 lea 16(%rcx), %rcx
578 movaps %xmm2, %xmm1
579 # ifdef USE_AS_STRNCPY
580 sub $16, %r8
581 jbe L(StrncpyExit3Case2OrCase3)
582 # endif
583 test %rax, %rax
584 jnz L(Shl3LoopExit)
585
586 palignr $3, %xmm3, %xmm2
587 movaps %xmm2, (%rdx)
588 movaps 29(%rcx), %xmm2
589
590 pcmpeqb %xmm2, %xmm0
591 lea 16(%rdx), %rdx
592 pmovmskb %xmm0, %rax
593 lea 16(%rcx), %rcx
594 movaps %xmm2, %xmm3
595 # ifdef USE_AS_STRNCPY
596 sub $16, %r8
597 jbe L(StrncpyExit3Case2OrCase3)
598 # endif
599 test %rax, %rax
600 jnz L(Shl3LoopExit)
601
602 palignr $3, %xmm1, %xmm2
603 movaps %xmm2, (%rdx)
604 movaps 29(%rcx), %xmm2
605
606 pcmpeqb %xmm2, %xmm0
607 lea 16(%rdx), %rdx
608 pmovmskb %xmm0, %rax
609 lea 16(%rcx), %rcx
610 # ifdef USE_AS_STRNCPY
611 sub $16, %r8
612 jbe L(StrncpyExit3Case2OrCase3)
613 # endif
614 test %rax, %rax
615 jnz L(Shl3LoopExit)
616
617 palignr $3, %xmm3, %xmm2
618 movaps %xmm2, (%rdx)
619 lea 29(%rcx), %rcx
620 lea 16(%rdx), %rdx
621
622 mov %rcx, %rax
623 and $-0x40, %rcx
624 sub %rcx, %rax
625 lea -13(%rcx), %rcx
626 sub %rax, %rdx
627 # ifdef USE_AS_STRNCPY
628 add %rax, %r8
629 # endif
630 movaps -3(%rcx), %xmm1
631
632 /* 64 bytes loop */
633 .p2align 4
634 L(Shl3LoopStart):
635 movaps 13(%rcx), %xmm2
636 movaps 29(%rcx), %xmm3
637 movaps %xmm3, %xmm6
638 movaps 45(%rcx), %xmm4
639 movaps %xmm4, %xmm7
640 movaps 61(%rcx), %xmm5
641 pminub %xmm2, %xmm6
642 pminub %xmm5, %xmm7
643 pminub %xmm6, %xmm7
644 pcmpeqb %xmm0, %xmm7
645 pmovmskb %xmm7, %rax
646 movaps %xmm5, %xmm7
647 palignr $3, %xmm4, %xmm5
648 test %rax, %rax
649 palignr $3, %xmm3, %xmm4
650 jnz L(Shl3Start)
651 # ifdef USE_AS_STRNCPY
652 sub $64, %r8
653 jbe L(StrncpyLeave3)
654 # endif
655 palignr $3, %xmm2, %xmm3
656 lea 64(%rcx), %rcx
657 palignr $3, %xmm1, %xmm2
658 movaps %xmm7, %xmm1
659 movaps %xmm5, 48(%rdx)
660 movaps %xmm4, 32(%rdx)
661 movaps %xmm3, 16(%rdx)
662 movaps %xmm2, (%rdx)
663 lea 64(%rdx), %rdx
664 jmp L(Shl3LoopStart)
665
666 L(Shl3LoopExit):
667 movdqu -3(%rcx), %xmm1
668 mov $13, %rsi
669 movdqu %xmm1, -3(%rdx)
670 jmp L(CopyFrom1To16Bytes)
671
672 .p2align 4
673 L(Shl4):
674 movaps -4(%rcx), %xmm1
675 movaps 12(%rcx), %xmm2
676 L(Shl4Start):
677 pcmpeqb %xmm2, %xmm0
678 pmovmskb %xmm0, %rax
679 movaps %xmm2, %xmm3
680 # ifdef USE_AS_STRNCPY
681 sub $16, %r8
682 jbe L(StrncpyExit4Case2OrCase3)
683 # endif
684 test %rax, %rax
685 jnz L(Shl4LoopExit)
686
687 palignr $4, %xmm1, %xmm2
688 movaps %xmm2, (%rdx)
689 movaps 28(%rcx), %xmm2
690
691 pcmpeqb %xmm2, %xmm0
692 lea 16(%rdx), %rdx
693 pmovmskb %xmm0, %rax
694 lea 16(%rcx), %rcx
695 movaps %xmm2, %xmm1
696 # ifdef USE_AS_STRNCPY
697 sub $16, %r8
698 jbe L(StrncpyExit4Case2OrCase3)
699 # endif
700 test %rax, %rax
701 jnz L(Shl4LoopExit)
702
703 palignr $4, %xmm3, %xmm2
704 movaps %xmm2, (%rdx)
705 movaps 28(%rcx), %xmm2
706
707 pcmpeqb %xmm2, %xmm0
708 lea 16(%rdx), %rdx
709 pmovmskb %xmm0, %rax
710 lea 16(%rcx), %rcx
711 movaps %xmm2, %xmm3
712 # ifdef USE_AS_STRNCPY
713 sub $16, %r8
714 jbe L(StrncpyExit4Case2OrCase3)
715 # endif
716 test %rax, %rax
717 jnz L(Shl4LoopExit)
718
719 palignr $4, %xmm1, %xmm2
720 movaps %xmm2, (%rdx)
721 movaps 28(%rcx), %xmm2
722
723 pcmpeqb %xmm2, %xmm0
724 lea 16(%rdx), %rdx
725 pmovmskb %xmm0, %rax
726 lea 16(%rcx), %rcx
727 # ifdef USE_AS_STRNCPY
728 sub $16, %r8
729 jbe L(StrncpyExit4Case2OrCase3)
730 # endif
731 test %rax, %rax
732 jnz L(Shl4LoopExit)
733
734 palignr $4, %xmm3, %xmm2
735 movaps %xmm2, (%rdx)
736 lea 28(%rcx), %rcx
737 lea 16(%rdx), %rdx
738
739 mov %rcx, %rax
740 and $-0x40, %rcx
741 sub %rcx, %rax
742 lea -12(%rcx), %rcx
743 sub %rax, %rdx
744 # ifdef USE_AS_STRNCPY
745 add %rax, %r8
746 # endif
747 movaps -4(%rcx), %xmm1
748
749 /* 64 bytes loop */
750 .p2align 4
751 L(Shl4LoopStart):
752 movaps 12(%rcx), %xmm2
753 movaps 28(%rcx), %xmm3
754 movaps %xmm3, %xmm6
755 movaps 44(%rcx), %xmm4
756 movaps %xmm4, %xmm7
757 movaps 60(%rcx), %xmm5
758 pminub %xmm2, %xmm6
759 pminub %xmm5, %xmm7
760 pminub %xmm6, %xmm7
761 pcmpeqb %xmm0, %xmm7
762 pmovmskb %xmm7, %rax
763 movaps %xmm5, %xmm7
764 palignr $4, %xmm4, %xmm5
765 test %rax, %rax
766 palignr $4, %xmm3, %xmm4
767 jnz L(Shl4Start)
768 # ifdef USE_AS_STRNCPY
769 sub $64, %r8
770 jbe L(StrncpyLeave4)
771 # endif
772 palignr $4, %xmm2, %xmm3
773 lea 64(%rcx), %rcx
774 palignr $4, %xmm1, %xmm2
775 movaps %xmm7, %xmm1
776 movaps %xmm5, 48(%rdx)
777 movaps %xmm4, 32(%rdx)
778 movaps %xmm3, 16(%rdx)
779 movaps %xmm2, (%rdx)
780 lea 64(%rdx), %rdx
781 jmp L(Shl4LoopStart)
782
783 L(Shl4LoopExit):
784 movdqu -4(%rcx), %xmm1
785 mov $12, %rsi
786 movdqu %xmm1, -4(%rdx)
787 jmp L(CopyFrom1To16Bytes)
788
789 .p2align 4
790 L(Shl5):
791 movaps -5(%rcx), %xmm1
792 movaps 11(%rcx), %xmm2
793 L(Shl5Start):
794 pcmpeqb %xmm2, %xmm0
795 pmovmskb %xmm0, %rax
796 movaps %xmm2, %xmm3
797 # ifdef USE_AS_STRNCPY
798 sub $16, %r8
799 jbe L(StrncpyExit5Case2OrCase3)
800 # endif
801 test %rax, %rax
802 jnz L(Shl5LoopExit)
803
804 palignr $5, %xmm1, %xmm2
805 movaps %xmm2, (%rdx)
806 movaps 27(%rcx), %xmm2
807
808 pcmpeqb %xmm2, %xmm0
809 lea 16(%rdx), %rdx
810 pmovmskb %xmm0, %rax
811 lea 16(%rcx), %rcx
812 movaps %xmm2, %xmm1
813 # ifdef USE_AS_STRNCPY
814 sub $16, %r8
815 jbe L(StrncpyExit5Case2OrCase3)
816 # endif
817 test %rax, %rax
818 jnz L(Shl5LoopExit)
819
820 palignr $5, %xmm3, %xmm2
821 movaps %xmm2, (%rdx)
822 movaps 27(%rcx), %xmm2
823
824 pcmpeqb %xmm2, %xmm0
825 lea 16(%rdx), %rdx
826 pmovmskb %xmm0, %rax
827 lea 16(%rcx), %rcx
828 movaps %xmm2, %xmm3
829 # ifdef USE_AS_STRNCPY
830 sub $16, %r8
831 jbe L(StrncpyExit5Case2OrCase3)
832 # endif
833 test %rax, %rax
834 jnz L(Shl5LoopExit)
835
836 palignr $5, %xmm1, %xmm2
837 movaps %xmm2, (%rdx)
838 movaps 27(%rcx), %xmm2
839
840 pcmpeqb %xmm2, %xmm0
841 lea 16(%rdx), %rdx
842 pmovmskb %xmm0, %rax
843 lea 16(%rcx), %rcx
844 # ifdef USE_AS_STRNCPY
845 sub $16, %r8
846 jbe L(StrncpyExit5Case2OrCase3)
847 # endif
848 test %rax, %rax
849 jnz L(Shl5LoopExit)
850
851 palignr $5, %xmm3, %xmm2
852 movaps %xmm2, (%rdx)
853 lea 27(%rcx), %rcx
854 lea 16(%rdx), %rdx
855
856 mov %rcx, %rax
857 and $-0x40, %rcx
858 sub %rcx, %rax
859 lea -11(%rcx), %rcx
860 sub %rax, %rdx
861 # ifdef USE_AS_STRNCPY
862 add %rax, %r8
863 # endif
864 movaps -5(%rcx), %xmm1
865
866 /* 64 bytes loop */
867 .p2align 4
868 L(Shl5LoopStart):
869 movaps 11(%rcx), %xmm2
870 movaps 27(%rcx), %xmm3
871 movaps %xmm3, %xmm6
872 movaps 43(%rcx), %xmm4
873 movaps %xmm4, %xmm7
874 movaps 59(%rcx), %xmm5
875 pminub %xmm2, %xmm6
876 pminub %xmm5, %xmm7
877 pminub %xmm6, %xmm7
878 pcmpeqb %xmm0, %xmm7
879 pmovmskb %xmm7, %rax
880 movaps %xmm5, %xmm7
881 palignr $5, %xmm4, %xmm5
882 test %rax, %rax
883 palignr $5, %xmm3, %xmm4
884 jnz L(Shl5Start)
885 # ifdef USE_AS_STRNCPY
886 sub $64, %r8
887 jbe L(StrncpyLeave5)
888 # endif
889 palignr $5, %xmm2, %xmm3
890 lea 64(%rcx), %rcx
891 palignr $5, %xmm1, %xmm2
892 movaps %xmm7, %xmm1
893 movaps %xmm5, 48(%rdx)
894 movaps %xmm4, 32(%rdx)
895 movaps %xmm3, 16(%rdx)
896 movaps %xmm2, (%rdx)
897 lea 64(%rdx), %rdx
898 jmp L(Shl5LoopStart)
899
900 L(Shl5LoopExit):
901 movdqu -5(%rcx), %xmm1
902 mov $11, %rsi
903 movdqu %xmm1, -5(%rdx)
904 jmp L(CopyFrom1To16Bytes)
905
906 .p2align 4
907 L(Shl6):
908 movaps -6(%rcx), %xmm1
909 movaps 10(%rcx), %xmm2
910 L(Shl6Start):
911 pcmpeqb %xmm2, %xmm0
912 pmovmskb %xmm0, %rax
913 movaps %xmm2, %xmm3
914 # ifdef USE_AS_STRNCPY
915 sub $16, %r8
916 jbe L(StrncpyExit6Case2OrCase3)
917 # endif
918 test %rax, %rax
919 jnz L(Shl6LoopExit)
920
921 palignr $6, %xmm1, %xmm2
922 movaps %xmm2, (%rdx)
923 movaps 26(%rcx), %xmm2
924
925 pcmpeqb %xmm2, %xmm0
926 lea 16(%rdx), %rdx
927 pmovmskb %xmm0, %rax
928 lea 16(%rcx), %rcx
929 movaps %xmm2, %xmm1
930 # ifdef USE_AS_STRNCPY
931 sub $16, %r8
932 jbe L(StrncpyExit6Case2OrCase3)
933 # endif
934 test %rax, %rax
935 jnz L(Shl6LoopExit)
936
937 palignr $6, %xmm3, %xmm2
938 movaps %xmm2, (%rdx)
939 movaps 26(%rcx), %xmm2
940
941 pcmpeqb %xmm2, %xmm0
942 lea 16(%rdx), %rdx
943 pmovmskb %xmm0, %rax
944 lea 16(%rcx), %rcx
945 movaps %xmm2, %xmm3
946 # ifdef USE_AS_STRNCPY
947 sub $16, %r8
948 jbe L(StrncpyExit6Case2OrCase3)
949 # endif
950 test %rax, %rax
951 jnz L(Shl6LoopExit)
952
953 palignr $6, %xmm1, %xmm2
954 movaps %xmm2, (%rdx)
955 movaps 26(%rcx), %xmm2
956
957 pcmpeqb %xmm2, %xmm0
958 lea 16(%rdx), %rdx
959 pmovmskb %xmm0, %rax
960 lea 16(%rcx), %rcx
961 # ifdef USE_AS_STRNCPY
962 sub $16, %r8
963 jbe L(StrncpyExit6Case2OrCase3)
964 # endif
965 test %rax, %rax
966 jnz L(Shl6LoopExit)
967
968 palignr $6, %xmm3, %xmm2
969 movaps %xmm2, (%rdx)
970 lea 26(%rcx), %rcx
971 lea 16(%rdx), %rdx
972
973 mov %rcx, %rax
974 and $-0x40, %rcx
975 sub %rcx, %rax
976 lea -10(%rcx), %rcx
977 sub %rax, %rdx
978 # ifdef USE_AS_STRNCPY
979 add %rax, %r8
980 # endif
981 movaps -6(%rcx), %xmm1
982
983 /* 64 bytes loop */
984 .p2align 4
985 L(Shl6LoopStart):
986 movaps 10(%rcx), %xmm2
987 movaps 26(%rcx), %xmm3
988 movaps %xmm3, %xmm6
989 movaps 42(%rcx), %xmm4
990 movaps %xmm4, %xmm7
991 movaps 58(%rcx), %xmm5
992 pminub %xmm2, %xmm6
993 pminub %xmm5, %xmm7
994 pminub %xmm6, %xmm7
995 pcmpeqb %xmm0, %xmm7
996 pmovmskb %xmm7, %rax
997 movaps %xmm5, %xmm7
998 palignr $6, %xmm4, %xmm5
999 test %rax, %rax
1000 palignr $6, %xmm3, %xmm4
1001 jnz L(Shl6Start)
1002 # ifdef USE_AS_STRNCPY
1003 sub $64, %r8
1004 jbe L(StrncpyLeave6)
1005 # endif
1006 palignr $6, %xmm2, %xmm3
1007 lea 64(%rcx), %rcx
1008 palignr $6, %xmm1, %xmm2
1009 movaps %xmm7, %xmm1
1010 movaps %xmm5, 48(%rdx)
1011 movaps %xmm4, 32(%rdx)
1012 movaps %xmm3, 16(%rdx)
1013 movaps %xmm2, (%rdx)
1014 lea 64(%rdx), %rdx
1015 jmp L(Shl6LoopStart)
1016
1017 L(Shl6LoopExit):
1018 mov (%rcx), %r9
1019 mov 6(%rcx), %esi
1020 mov %r9, (%rdx)
1021 mov %esi, 6(%rdx)
1022 mov $10, %rsi
1023 jmp L(CopyFrom1To16Bytes)
1024
1025 .p2align 4
1026 L(Shl7):
1027 movaps -7(%rcx), %xmm1
1028 movaps 9(%rcx), %xmm2
1029 L(Shl7Start):
1030 pcmpeqb %xmm2, %xmm0
1031 pmovmskb %xmm0, %rax
1032 movaps %xmm2, %xmm3
1033 # ifdef USE_AS_STRNCPY
1034 sub $16, %r8
1035 jbe L(StrncpyExit7Case2OrCase3)
1036 # endif
1037 test %rax, %rax
1038 jnz L(Shl7LoopExit)
1039
1040 palignr $7, %xmm1, %xmm2
1041 movaps %xmm2, (%rdx)
1042 movaps 25(%rcx), %xmm2
1043
1044 pcmpeqb %xmm2, %xmm0
1045 lea 16(%rdx), %rdx
1046 pmovmskb %xmm0, %rax
1047 lea 16(%rcx), %rcx
1048 movaps %xmm2, %xmm1
1049 # ifdef USE_AS_STRNCPY
1050 sub $16, %r8
1051 jbe L(StrncpyExit7Case2OrCase3)
1052 # endif
1053 test %rax, %rax
1054 jnz L(Shl7LoopExit)
1055
1056 palignr $7, %xmm3, %xmm2
1057 movaps %xmm2, (%rdx)
1058 movaps 25(%rcx), %xmm2
1059
1060 pcmpeqb %xmm2, %xmm0
1061 lea 16(%rdx), %rdx
1062 pmovmskb %xmm0, %rax
1063 lea 16(%rcx), %rcx
1064 movaps %xmm2, %xmm3
1065 # ifdef USE_AS_STRNCPY
1066 sub $16, %r8
1067 jbe L(StrncpyExit7Case2OrCase3)
1068 # endif
1069 test %rax, %rax
1070 jnz L(Shl7LoopExit)
1071
1072 palignr $7, %xmm1, %xmm2
1073 movaps %xmm2, (%rdx)
1074 movaps 25(%rcx), %xmm2
1075
1076 pcmpeqb %xmm2, %xmm0
1077 lea 16(%rdx), %rdx
1078 pmovmskb %xmm0, %rax
1079 lea 16(%rcx), %rcx
1080 # ifdef USE_AS_STRNCPY
1081 sub $16, %r8
1082 jbe L(StrncpyExit7Case2OrCase3)
1083 # endif
1084 test %rax, %rax
1085 jnz L(Shl7LoopExit)
1086
1087 palignr $7, %xmm3, %xmm2
1088 movaps %xmm2, (%rdx)
1089 lea 25(%rcx), %rcx
1090 lea 16(%rdx), %rdx
1091
1092 mov %rcx, %rax
1093 and $-0x40, %rcx
1094 sub %rcx, %rax
1095 lea -9(%rcx), %rcx
1096 sub %rax, %rdx
1097 # ifdef USE_AS_STRNCPY
1098 add %rax, %r8
1099 # endif
1100 movaps -7(%rcx), %xmm1
1101
1102 /* 64 bytes loop */
1103 .p2align 4
1104 L(Shl7LoopStart):
1105 movaps 9(%rcx), %xmm2
1106 movaps 25(%rcx), %xmm3
1107 movaps %xmm3, %xmm6
1108 movaps 41(%rcx), %xmm4
1109 movaps %xmm4, %xmm7
1110 movaps 57(%rcx), %xmm5
1111 pminub %xmm2, %xmm6
1112 pminub %xmm5, %xmm7
1113 pminub %xmm6, %xmm7
1114 pcmpeqb %xmm0, %xmm7
1115 pmovmskb %xmm7, %rax
1116 movaps %xmm5, %xmm7
1117 palignr $7, %xmm4, %xmm5
1118 test %rax, %rax
1119 palignr $7, %xmm3, %xmm4
1120 jnz L(Shl7Start)
1121 # ifdef USE_AS_STRNCPY
1122 sub $64, %r8
1123 jbe L(StrncpyLeave7)
1124 # endif
1125 palignr $7, %xmm2, %xmm3
1126 lea 64(%rcx), %rcx
1127 palignr $7, %xmm1, %xmm2
1128 movaps %xmm7, %xmm1
1129 movaps %xmm5, 48(%rdx)
1130 movaps %xmm4, 32(%rdx)
1131 movaps %xmm3, 16(%rdx)
1132 movaps %xmm2, (%rdx)
1133 lea 64(%rdx), %rdx
1134 jmp L(Shl7LoopStart)
1135
1136 L(Shl7LoopExit):
1137 mov (%rcx), %r9
1138 mov 5(%rcx), %esi
1139 mov %r9, (%rdx)
1140 mov %esi, 5(%rdx)
1141 mov $9, %rsi
1142 jmp L(CopyFrom1To16Bytes)
1143
1144 .p2align 4
1145 L(Shl8):
1146 movaps -8(%rcx), %xmm1
1147 movaps 8(%rcx), %xmm2
1148 L(Shl8Start):
1149 pcmpeqb %xmm2, %xmm0
1150 pmovmskb %xmm0, %rax
1151 movaps %xmm2, %xmm3
1152 # ifdef USE_AS_STRNCPY
1153 sub $16, %r8
1154 jbe L(StrncpyExit8Case2OrCase3)
1155 # endif
1156 test %rax, %rax
1157 jnz L(Shl8LoopExit)
1158
1159 palignr $8, %xmm1, %xmm2
1160 movaps %xmm2, (%rdx)
1161 movaps 24(%rcx), %xmm2
1162
1163 pcmpeqb %xmm2, %xmm0
1164 lea 16(%rdx), %rdx
1165 pmovmskb %xmm0, %rax
1166 lea 16(%rcx), %rcx
1167 movaps %xmm2, %xmm1
1168 # ifdef USE_AS_STRNCPY
1169 sub $16, %r8
1170 jbe L(StrncpyExit8Case2OrCase3)
1171 # endif
1172 test %rax, %rax
1173 jnz L(Shl8LoopExit)
1174
1175 palignr $8, %xmm3, %xmm2
1176 movaps %xmm2, (%rdx)
1177 movaps 24(%rcx), %xmm2
1178
1179 pcmpeqb %xmm2, %xmm0
1180 lea 16(%rdx), %rdx
1181 pmovmskb %xmm0, %rax
1182 lea 16(%rcx), %rcx
1183 movaps %xmm2, %xmm3
1184 # ifdef USE_AS_STRNCPY
1185 sub $16, %r8
1186 jbe L(StrncpyExit8Case2OrCase3)
1187 # endif
1188 test %rax, %rax
1189 jnz L(Shl8LoopExit)
1190
1191 palignr $8, %xmm1, %xmm2
1192 movaps %xmm2, (%rdx)
1193 movaps 24(%rcx), %xmm2
1194
1195 pcmpeqb %xmm2, %xmm0
1196 lea 16(%rdx), %rdx
1197 pmovmskb %xmm0, %rax
1198 lea 16(%rcx), %rcx
1199 # ifdef USE_AS_STRNCPY
1200 sub $16, %r8
1201 jbe L(StrncpyExit8Case2OrCase3)
1202 # endif
1203 test %rax, %rax
1204 jnz L(Shl8LoopExit)
1205
1206 palignr $8, %xmm3, %xmm2
1207 movaps %xmm2, (%rdx)
1208 lea 24(%rcx), %rcx
1209 lea 16(%rdx), %rdx
1210
1211 mov %rcx, %rax
1212 and $-0x40, %rcx
1213 sub %rcx, %rax
1214 lea -8(%rcx), %rcx
1215 sub %rax, %rdx
1216 # ifdef USE_AS_STRNCPY
1217 add %rax, %r8
1218 # endif
1219 movaps -8(%rcx), %xmm1
1220
1221 /* 64 bytes loop */
1222 .p2align 4
1223 L(Shl8LoopStart):
1224 movaps 8(%rcx), %xmm2
1225 movaps 24(%rcx), %xmm3
1226 movaps %xmm3, %xmm6
1227 movaps 40(%rcx), %xmm4
1228 movaps %xmm4, %xmm7
1229 movaps 56(%rcx), %xmm5
1230 pminub %xmm2, %xmm6
1231 pminub %xmm5, %xmm7
1232 pminub %xmm6, %xmm7
1233 pcmpeqb %xmm0, %xmm7
1234 pmovmskb %xmm7, %rax
1235 movaps %xmm5, %xmm7
1236 palignr $8, %xmm4, %xmm5
1237 test %rax, %rax
1238 palignr $8, %xmm3, %xmm4
1239 jnz L(Shl8Start)
1240 # ifdef USE_AS_STRNCPY
1241 sub $64, %r8
1242 jbe L(StrncpyLeave8)
1243 # endif
1244 palignr $8, %xmm2, %xmm3
1245 lea 64(%rcx), %rcx
1246 palignr $8, %xmm1, %xmm2
1247 movaps %xmm7, %xmm1
1248 movaps %xmm5, 48(%rdx)
1249 movaps %xmm4, 32(%rdx)
1250 movaps %xmm3, 16(%rdx)
1251 movaps %xmm2, (%rdx)
1252 lea 64(%rdx), %rdx
1253 jmp L(Shl8LoopStart)
1254
1255 L(Shl8LoopExit):
1256 mov (%rcx), %r9
1257 mov $8, %rsi
1258 mov %r9, (%rdx)
1259 jmp L(CopyFrom1To16Bytes)
1260
1261 .p2align 4
1262 L(Shl9):
1263 movaps -9(%rcx), %xmm1
1264 movaps 7(%rcx), %xmm2
1265 L(Shl9Start):
1266 pcmpeqb %xmm2, %xmm0
1267 pmovmskb %xmm0, %rax
1268 movaps %xmm2, %xmm3
1269 # ifdef USE_AS_STRNCPY
1270 sub $16, %r8
1271 jbe L(StrncpyExit9Case2OrCase3)
1272 # endif
1273 test %rax, %rax
1274 jnz L(Shl9LoopExit)
1275
1276 palignr $9, %xmm1, %xmm2
1277 movaps %xmm2, (%rdx)
1278 movaps 23(%rcx), %xmm2
1279
1280 pcmpeqb %xmm2, %xmm0
1281 lea 16(%rdx), %rdx
1282 pmovmskb %xmm0, %rax
1283 lea 16(%rcx), %rcx
1284 movaps %xmm2, %xmm1
1285 # ifdef USE_AS_STRNCPY
1286 sub $16, %r8
1287 jbe L(StrncpyExit9Case2OrCase3)
1288 # endif
1289 test %rax, %rax
1290 jnz L(Shl9LoopExit)
1291
1292 palignr $9, %xmm3, %xmm2
1293 movaps %xmm2, (%rdx)
1294 movaps 23(%rcx), %xmm2
1295
1296 pcmpeqb %xmm2, %xmm0
1297 lea 16(%rdx), %rdx
1298 pmovmskb %xmm0, %rax
1299 lea 16(%rcx), %rcx
1300 movaps %xmm2, %xmm3
1301 # ifdef USE_AS_STRNCPY
1302 sub $16, %r8
1303 jbe L(StrncpyExit9Case2OrCase3)
1304 # endif
1305 test %rax, %rax
1306 jnz L(Shl9LoopExit)
1307
1308 palignr $9, %xmm1, %xmm2
1309 movaps %xmm2, (%rdx)
1310 movaps 23(%rcx), %xmm2
1311
1312 pcmpeqb %xmm2, %xmm0
1313 lea 16(%rdx), %rdx
1314 pmovmskb %xmm0, %rax
1315 lea 16(%rcx), %rcx
1316 # ifdef USE_AS_STRNCPY
1317 sub $16, %r8
1318 jbe L(StrncpyExit9Case2OrCase3)
1319 # endif
1320 test %rax, %rax
1321 jnz L(Shl9LoopExit)
1322
1323 palignr $9, %xmm3, %xmm2
1324 movaps %xmm2, (%rdx)
1325 lea 23(%rcx), %rcx
1326 lea 16(%rdx), %rdx
1327
1328 mov %rcx, %rax
1329 and $-0x40, %rcx
1330 sub %rcx, %rax
1331 lea -7(%rcx), %rcx
1332 sub %rax, %rdx
1333 # ifdef USE_AS_STRNCPY
1334 add %rax, %r8
1335 # endif
1336 movaps -9(%rcx), %xmm1
1337
1338 /* 64 bytes loop */
1339 .p2align 4
1340 L(Shl9LoopStart):
1341 movaps 7(%rcx), %xmm2
1342 movaps 23(%rcx), %xmm3
1343 movaps %xmm3, %xmm6
1344 movaps 39(%rcx), %xmm4
1345 movaps %xmm4, %xmm7
1346 movaps 55(%rcx), %xmm5
1347 pminub %xmm2, %xmm6
1348 pminub %xmm5, %xmm7
1349 pminub %xmm6, %xmm7
1350 pcmpeqb %xmm0, %xmm7
1351 pmovmskb %xmm7, %rax
1352 movaps %xmm5, %xmm7
1353 palignr $9, %xmm4, %xmm5
1354 test %rax, %rax
1355 palignr $9, %xmm3, %xmm4
1356 jnz L(Shl9Start)
1357 # ifdef USE_AS_STRNCPY
1358 sub $64, %r8
1359 jbe L(StrncpyLeave9)
1360 # endif
1361 palignr $9, %xmm2, %xmm3
1362 lea 64(%rcx), %rcx
1363 palignr $9, %xmm1, %xmm2
1364 movaps %xmm7, %xmm1
1365 movaps %xmm5, 48(%rdx)
1366 movaps %xmm4, 32(%rdx)
1367 movaps %xmm3, 16(%rdx)
1368 movaps %xmm2, (%rdx)
1369 lea 64(%rdx), %rdx
1370 jmp L(Shl9LoopStart)
1371
1372 L(Shl9LoopExit):
1373 mov -1(%rcx), %r9
1374 mov $7, %rsi
1375 mov %r9, -1(%rdx)
1376 jmp L(CopyFrom1To16Bytes)
1377
1378 .p2align 4
1379 L(Shl10):
1380 movaps -10(%rcx), %xmm1
1381 movaps 6(%rcx), %xmm2
1382 L(Shl10Start):
1383 pcmpeqb %xmm2, %xmm0
1384 pmovmskb %xmm0, %rax
1385 movaps %xmm2, %xmm3
1386 # ifdef USE_AS_STRNCPY
1387 sub $16, %r8
1388 jbe L(StrncpyExit10Case2OrCase3)
1389 # endif
1390 test %rax, %rax
1391 jnz L(Shl10LoopExit)
1392
1393 palignr $10, %xmm1, %xmm2
1394 movaps %xmm2, (%rdx)
1395 movaps 22(%rcx), %xmm2
1396
1397 pcmpeqb %xmm2, %xmm0
1398 lea 16(%rdx), %rdx
1399 pmovmskb %xmm0, %rax
1400 lea 16(%rcx), %rcx
1401 movaps %xmm2, %xmm1
1402 # ifdef USE_AS_STRNCPY
1403 sub $16, %r8
1404 jbe L(StrncpyExit10Case2OrCase3)
1405 # endif
1406 test %rax, %rax
1407 jnz L(Shl10LoopExit)
1408
1409 palignr $10, %xmm3, %xmm2
1410 movaps %xmm2, (%rdx)
1411 movaps 22(%rcx), %xmm2
1412
1413 pcmpeqb %xmm2, %xmm0
1414 lea 16(%rdx), %rdx
1415 pmovmskb %xmm0, %rax
1416 lea 16(%rcx), %rcx
1417 movaps %xmm2, %xmm3
1418 # ifdef USE_AS_STRNCPY
1419 sub $16, %r8
1420 jbe L(StrncpyExit10Case2OrCase3)
1421 # endif
1422 test %rax, %rax
1423 jnz L(Shl10LoopExit)
1424
1425 palignr $10, %xmm1, %xmm2
1426 movaps %xmm2, (%rdx)
1427 movaps 22(%rcx), %xmm2
1428
1429 pcmpeqb %xmm2, %xmm0
1430 lea 16(%rdx), %rdx
1431 pmovmskb %xmm0, %rax
1432 lea 16(%rcx), %rcx
1433 # ifdef USE_AS_STRNCPY
1434 sub $16, %r8
1435 jbe L(StrncpyExit10Case2OrCase3)
1436 # endif
1437 test %rax, %rax
1438 jnz L(Shl10LoopExit)
1439
1440 palignr $10, %xmm3, %xmm2
1441 movaps %xmm2, (%rdx)
1442 lea 22(%rcx), %rcx
1443 lea 16(%rdx), %rdx
1444
1445 mov %rcx, %rax
1446 and $-0x40, %rcx
1447 sub %rcx, %rax
1448 lea -6(%rcx), %rcx
1449 sub %rax, %rdx
1450 # ifdef USE_AS_STRNCPY
1451 add %rax, %r8
1452 # endif
1453 movaps -10(%rcx), %xmm1
1454
1455 /* 64 bytes loop */
1456 .p2align 4
1457 L(Shl10LoopStart):
1458 movaps 6(%rcx), %xmm2
1459 movaps 22(%rcx), %xmm3
1460 movaps %xmm3, %xmm6
1461 movaps 38(%rcx), %xmm4
1462 movaps %xmm4, %xmm7
1463 movaps 54(%rcx), %xmm5
1464 pminub %xmm2, %xmm6
1465 pminub %xmm5, %xmm7
1466 pminub %xmm6, %xmm7
1467 pcmpeqb %xmm0, %xmm7
1468 pmovmskb %xmm7, %rax
1469 movaps %xmm5, %xmm7
1470 palignr $10, %xmm4, %xmm5
1471 test %rax, %rax
1472 palignr $10, %xmm3, %xmm4
1473 jnz L(Shl10Start)
1474 # ifdef USE_AS_STRNCPY
1475 sub $64, %r8
1476 jbe L(StrncpyLeave10)
1477 # endif
1478 palignr $10, %xmm2, %xmm3
1479 lea 64(%rcx), %rcx
1480 palignr $10, %xmm1, %xmm2
1481 movaps %xmm7, %xmm1
1482 movaps %xmm5, 48(%rdx)
1483 movaps %xmm4, 32(%rdx)
1484 movaps %xmm3, 16(%rdx)
1485 movaps %xmm2, (%rdx)
1486 lea 64(%rdx), %rdx
1487 jmp L(Shl10LoopStart)
1488
1489 L(Shl10LoopExit):
1490 mov -2(%rcx), %r9
1491 mov $6, %rsi
1492 mov %r9, -2(%rdx)
1493 jmp L(CopyFrom1To16Bytes)
1494
1495 .p2align 4
1496 L(Shl11):
1497 movaps -11(%rcx), %xmm1
1498 movaps 5(%rcx), %xmm2
1499 L(Shl11Start):
1500 pcmpeqb %xmm2, %xmm0
1501 pmovmskb %xmm0, %rax
1502 movaps %xmm2, %xmm3
1503 # ifdef USE_AS_STRNCPY
1504 sub $16, %r8
1505 jbe L(StrncpyExit11Case2OrCase3)
1506 # endif
1507 test %rax, %rax
1508 jnz L(Shl11LoopExit)
1509
1510 palignr $11, %xmm1, %xmm2
1511 movaps %xmm2, (%rdx)
1512 movaps 21(%rcx), %xmm2
1513
1514 pcmpeqb %xmm2, %xmm0
1515 lea 16(%rdx), %rdx
1516 pmovmskb %xmm0, %rax
1517 lea 16(%rcx), %rcx
1518 movaps %xmm2, %xmm1
1519 # ifdef USE_AS_STRNCPY
1520 sub $16, %r8
1521 jbe L(StrncpyExit11Case2OrCase3)
1522 # endif
1523 test %rax, %rax
1524 jnz L(Shl11LoopExit)
1525
1526 palignr $11, %xmm3, %xmm2
1527 movaps %xmm2, (%rdx)
1528 movaps 21(%rcx), %xmm2
1529
1530 pcmpeqb %xmm2, %xmm0
1531 lea 16(%rdx), %rdx
1532 pmovmskb %xmm0, %rax
1533 lea 16(%rcx), %rcx
1534 movaps %xmm2, %xmm3
1535 # ifdef USE_AS_STRNCPY
1536 sub $16, %r8
1537 jbe L(StrncpyExit11Case2OrCase3)
1538 # endif
1539 test %rax, %rax
1540 jnz L(Shl11LoopExit)
1541
1542 palignr $11, %xmm1, %xmm2
1543 movaps %xmm2, (%rdx)
1544 movaps 21(%rcx), %xmm2
1545
1546 pcmpeqb %xmm2, %xmm0
1547 lea 16(%rdx), %rdx
1548 pmovmskb %xmm0, %rax
1549 lea 16(%rcx), %rcx
1550 # ifdef USE_AS_STRNCPY
1551 sub $16, %r8
1552 jbe L(StrncpyExit11Case2OrCase3)
1553 # endif
1554 test %rax, %rax
1555 jnz L(Shl11LoopExit)
1556
1557 palignr $11, %xmm3, %xmm2
1558 movaps %xmm2, (%rdx)
1559 lea 21(%rcx), %rcx
1560 lea 16(%rdx), %rdx
1561
1562 mov %rcx, %rax
1563 and $-0x40, %rcx
1564 sub %rcx, %rax
1565 lea -5(%rcx), %rcx
1566 sub %rax, %rdx
1567 # ifdef USE_AS_STRNCPY
1568 add %rax, %r8
1569 # endif
1570 movaps -11(%rcx), %xmm1
1571
1572 /* 64 bytes loop */
1573 .p2align 4
1574 L(Shl11LoopStart):
1575 movaps 5(%rcx), %xmm2
1576 movaps 21(%rcx), %xmm3
1577 movaps %xmm3, %xmm6
1578 movaps 37(%rcx), %xmm4
1579 movaps %xmm4, %xmm7
1580 movaps 53(%rcx), %xmm5
1581 pminub %xmm2, %xmm6
1582 pminub %xmm5, %xmm7
1583 pminub %xmm6, %xmm7
1584 pcmpeqb %xmm0, %xmm7
1585 pmovmskb %xmm7, %rax
1586 movaps %xmm5, %xmm7
1587 palignr $11, %xmm4, %xmm5
1588 test %rax, %rax
1589 palignr $11, %xmm3, %xmm4
1590 jnz L(Shl11Start)
1591 # ifdef USE_AS_STRNCPY
1592 sub $64, %r8
1593 jbe L(StrncpyLeave11)
1594 # endif
1595 palignr $11, %xmm2, %xmm3
1596 lea 64(%rcx), %rcx
1597 palignr $11, %xmm1, %xmm2
1598 movaps %xmm7, %xmm1
1599 movaps %xmm5, 48(%rdx)
1600 movaps %xmm4, 32(%rdx)
1601 movaps %xmm3, 16(%rdx)
1602 movaps %xmm2, (%rdx)
1603 lea 64(%rdx), %rdx
1604 jmp L(Shl11LoopStart)
1605
1606 L(Shl11LoopExit):
1607 mov -3(%rcx), %r9
1608 mov $5, %rsi
1609 mov %r9, -3(%rdx)
1610 jmp L(CopyFrom1To16Bytes)
1611
1612 .p2align 4
1613 L(Shl12):
1614 movaps -12(%rcx), %xmm1
1615 movaps 4(%rcx), %xmm2
1616 L(Shl12Start):
1617 pcmpeqb %xmm2, %xmm0
1618 pmovmskb %xmm0, %rax
1619 movaps %xmm2, %xmm3
1620 # ifdef USE_AS_STRNCPY
1621 sub $16, %r8
1622 jbe L(StrncpyExit12Case2OrCase3)
1623 # endif
1624 test %rax, %rax
1625 jnz L(Shl12LoopExit)
1626
1627 palignr $12, %xmm1, %xmm2
1628 movaps %xmm2, (%rdx)
1629 movaps 20(%rcx), %xmm2
1630
1631 pcmpeqb %xmm2, %xmm0
1632 lea 16(%rdx), %rdx
1633 pmovmskb %xmm0, %rax
1634 lea 16(%rcx), %rcx
1635 movaps %xmm2, %xmm1
1636 # ifdef USE_AS_STRNCPY
1637 sub $16, %r8
1638 jbe L(StrncpyExit12Case2OrCase3)
1639 # endif
1640 test %rax, %rax
1641 jnz L(Shl12LoopExit)
1642
1643 palignr $12, %xmm3, %xmm2
1644 movaps %xmm2, (%rdx)
1645 movaps 20(%rcx), %xmm2
1646
1647 pcmpeqb %xmm2, %xmm0
1648 lea 16(%rdx), %rdx
1649 pmovmskb %xmm0, %rax
1650 lea 16(%rcx), %rcx
1651 movaps %xmm2, %xmm3
1652 # ifdef USE_AS_STRNCPY
1653 sub $16, %r8
1654 jbe L(StrncpyExit12Case2OrCase3)
1655 # endif
1656 test %rax, %rax
1657 jnz L(Shl12LoopExit)
1658
1659 palignr $12, %xmm1, %xmm2
1660 movaps %xmm2, (%rdx)
1661 movaps 20(%rcx), %xmm2
1662
1663 pcmpeqb %xmm2, %xmm0
1664 lea 16(%rdx), %rdx
1665 pmovmskb %xmm0, %rax
1666 lea 16(%rcx), %rcx
1667 # ifdef USE_AS_STRNCPY
1668 sub $16, %r8
1669 jbe L(StrncpyExit12Case2OrCase3)
1670 # endif
1671 test %rax, %rax
1672 jnz L(Shl12LoopExit)
1673
1674 palignr $12, %xmm3, %xmm2
1675 movaps %xmm2, (%rdx)
1676 lea 20(%rcx), %rcx
1677 lea 16(%rdx), %rdx
1678
1679 mov %rcx, %rax
1680 and $-0x40, %rcx
1681 sub %rcx, %rax
1682 lea -4(%rcx), %rcx
1683 sub %rax, %rdx
1684 # ifdef USE_AS_STRNCPY
1685 add %rax, %r8
1686 # endif
1687 movaps -12(%rcx), %xmm1
1688
1689 /* 64 bytes loop */
1690 .p2align 4
1691 L(Shl12LoopStart):
1692 movaps 4(%rcx), %xmm2
1693 movaps 20(%rcx), %xmm3
1694 movaps %xmm3, %xmm6
1695 movaps 36(%rcx), %xmm4
1696 movaps %xmm4, %xmm7
1697 movaps 52(%rcx), %xmm5
1698 pminub %xmm2, %xmm6
1699 pminub %xmm5, %xmm7
1700 pminub %xmm6, %xmm7
1701 pcmpeqb %xmm0, %xmm7
1702 pmovmskb %xmm7, %rax
1703 movaps %xmm5, %xmm7
1704 palignr $12, %xmm4, %xmm5
1705 test %rax, %rax
1706 palignr $12, %xmm3, %xmm4
1707 jnz L(Shl12Start)
1708 # ifdef USE_AS_STRNCPY
1709 sub $64, %r8
1710 jbe L(StrncpyLeave12)
1711 # endif
1712 palignr $12, %xmm2, %xmm3
1713 lea 64(%rcx), %rcx
1714 palignr $12, %xmm1, %xmm2
1715 movaps %xmm7, %xmm1
1716 movaps %xmm5, 48(%rdx)
1717 movaps %xmm4, 32(%rdx)
1718 movaps %xmm3, 16(%rdx)
1719 movaps %xmm2, (%rdx)
1720 lea 64(%rdx), %rdx
1721 jmp L(Shl12LoopStart)
1722
1723 L(Shl12LoopExit):
1724 mov (%rcx), %r9d
1725 mov $4, %rsi
1726 mov %r9d, (%rdx)
1727 jmp L(CopyFrom1To16Bytes)
1728
1729 .p2align 4
1730 L(Shl13):
1731 movaps -13(%rcx), %xmm1
1732 movaps 3(%rcx), %xmm2
1733 L(Shl13Start):
1734 pcmpeqb %xmm2, %xmm0
1735 pmovmskb %xmm0, %rax
1736 movaps %xmm2, %xmm3
1737 # ifdef USE_AS_STRNCPY
1738 sub $16, %r8
1739 jbe L(StrncpyExit13Case2OrCase3)
1740 # endif
1741 test %rax, %rax
1742 jnz L(Shl13LoopExit)
1743
1744 palignr $13, %xmm1, %xmm2
1745 movaps %xmm2, (%rdx)
1746 movaps 19(%rcx), %xmm2
1747
1748 pcmpeqb %xmm2, %xmm0
1749 lea 16(%rdx), %rdx
1750 pmovmskb %xmm0, %rax
1751 lea 16(%rcx), %rcx
1752 movaps %xmm2, %xmm1
1753 # ifdef USE_AS_STRNCPY
1754 sub $16, %r8
1755 jbe L(StrncpyExit13Case2OrCase3)
1756 # endif
1757 test %rax, %rax
1758 jnz L(Shl13LoopExit)
1759
1760 palignr $13, %xmm3, %xmm2
1761 movaps %xmm2, (%rdx)
1762 movaps 19(%rcx), %xmm2
1763
1764 pcmpeqb %xmm2, %xmm0
1765 lea 16(%rdx), %rdx
1766 pmovmskb %xmm0, %rax
1767 lea 16(%rcx), %rcx
1768 movaps %xmm2, %xmm3
1769 # ifdef USE_AS_STRNCPY
1770 sub $16, %r8
1771 jbe L(StrncpyExit13Case2OrCase3)
1772 # endif
1773 test %rax, %rax
1774 jnz L(Shl13LoopExit)
1775
1776 palignr $13, %xmm1, %xmm2
1777 movaps %xmm2, (%rdx)
1778 movaps 19(%rcx), %xmm2
1779
1780 pcmpeqb %xmm2, %xmm0
1781 lea 16(%rdx), %rdx
1782 pmovmskb %xmm0, %rax
1783 lea 16(%rcx), %rcx
1784 # ifdef USE_AS_STRNCPY
1785 sub $16, %r8
1786 jbe L(StrncpyExit13Case2OrCase3)
1787 # endif
1788 test %rax, %rax
1789 jnz L(Shl13LoopExit)
1790
1791 palignr $13, %xmm3, %xmm2
1792 movaps %xmm2, (%rdx)
1793 lea 19(%rcx), %rcx
1794 lea 16(%rdx), %rdx
1795
1796 mov %rcx, %rax
1797 and $-0x40, %rcx
1798 sub %rcx, %rax
1799 lea -3(%rcx), %rcx
1800 sub %rax, %rdx
1801 # ifdef USE_AS_STRNCPY
1802 add %rax, %r8
1803 # endif
1804 movaps -13(%rcx), %xmm1
1805
1806 /* 64 bytes loop */
1807 .p2align 4
1808 L(Shl13LoopStart):
1809 movaps 3(%rcx), %xmm2
1810 movaps 19(%rcx), %xmm3
1811 movaps %xmm3, %xmm6
1812 movaps 35(%rcx), %xmm4
1813 movaps %xmm4, %xmm7
1814 movaps 51(%rcx), %xmm5
1815 pminub %xmm2, %xmm6
1816 pminub %xmm5, %xmm7
1817 pminub %xmm6, %xmm7
1818 pcmpeqb %xmm0, %xmm7
1819 pmovmskb %xmm7, %rax
1820 movaps %xmm5, %xmm7
1821 palignr $13, %xmm4, %xmm5
1822 test %rax, %rax
1823 palignr $13, %xmm3, %xmm4
1824 jnz L(Shl13Start)
1825 # ifdef USE_AS_STRNCPY
1826 sub $64, %r8
1827 jbe L(StrncpyLeave13)
1828 # endif
1829 palignr $13, %xmm2, %xmm3
1830 lea 64(%rcx), %rcx
1831 palignr $13, %xmm1, %xmm2
1832 movaps %xmm7, %xmm1
1833 movaps %xmm5, 48(%rdx)
1834 movaps %xmm4, 32(%rdx)
1835 movaps %xmm3, 16(%rdx)
1836 movaps %xmm2, (%rdx)
1837 lea 64(%rdx), %rdx
1838 jmp L(Shl13LoopStart)
1839
1840 L(Shl13LoopExit):
1841 mov -1(%rcx), %r9d
1842 mov $3, %rsi
1843 mov %r9d, -1(%rdx)
1844 jmp L(CopyFrom1To16Bytes)
1845
1846 .p2align 4
1847 L(Shl14):
1848 movaps -14(%rcx), %xmm1
1849 movaps 2(%rcx), %xmm2
1850 L(Shl14Start):
1851 pcmpeqb %xmm2, %xmm0
1852 pmovmskb %xmm0, %rax
1853 movaps %xmm2, %xmm3
1854 # ifdef USE_AS_STRNCPY
1855 sub $16, %r8
1856 jbe L(StrncpyExit14Case2OrCase3)
1857 # endif
1858 test %rax, %rax
1859 jnz L(Shl14LoopExit)
1860
1861 palignr $14, %xmm1, %xmm2
1862 movaps %xmm2, (%rdx)
1863 movaps 18(%rcx), %xmm2
1864
1865 pcmpeqb %xmm2, %xmm0
1866 lea 16(%rdx), %rdx
1867 pmovmskb %xmm0, %rax
1868 lea 16(%rcx), %rcx
1869 movaps %xmm2, %xmm1
1870 # ifdef USE_AS_STRNCPY
1871 sub $16, %r8
1872 jbe L(StrncpyExit14Case2OrCase3)
1873 # endif
1874 test %rax, %rax
1875 jnz L(Shl14LoopExit)
1876
1877 palignr $14, %xmm3, %xmm2
1878 movaps %xmm2, (%rdx)
1879 movaps 18(%rcx), %xmm2
1880
1881 pcmpeqb %xmm2, %xmm0
1882 lea 16(%rdx), %rdx
1883 pmovmskb %xmm0, %rax
1884 lea 16(%rcx), %rcx
1885 movaps %xmm2, %xmm3
1886 # ifdef USE_AS_STRNCPY
1887 sub $16, %r8
1888 jbe L(StrncpyExit14Case2OrCase3)
1889 # endif
1890 test %rax, %rax
1891 jnz L(Shl14LoopExit)
1892
1893 palignr $14, %xmm1, %xmm2
1894 movaps %xmm2, (%rdx)
1895 movaps 18(%rcx), %xmm2
1896
1897 pcmpeqb %xmm2, %xmm0
1898 lea 16(%rdx), %rdx
1899 pmovmskb %xmm0, %rax
1900 lea 16(%rcx), %rcx
1901 # ifdef USE_AS_STRNCPY
1902 sub $16, %r8
1903 jbe L(StrncpyExit14Case2OrCase3)
1904 # endif
1905 test %rax, %rax
1906 jnz L(Shl14LoopExit)
1907
1908 palignr $14, %xmm3, %xmm2
1909 movaps %xmm2, (%rdx)
1910 lea 18(%rcx), %rcx
1911 lea 16(%rdx), %rdx
1912
1913 mov %rcx, %rax
1914 and $-0x40, %rcx
1915 sub %rcx, %rax
1916 lea -2(%rcx), %rcx
1917 sub %rax, %rdx
1918 # ifdef USE_AS_STRNCPY
1919 add %rax, %r8
1920 # endif
1921 movaps -14(%rcx), %xmm1
1922
1923 /* 64 bytes loop */
1924 .p2align 4
1925 L(Shl14LoopStart):
1926 movaps 2(%rcx), %xmm2
1927 movaps 18(%rcx), %xmm3
1928 movaps %xmm3, %xmm6
1929 movaps 34(%rcx), %xmm4
1930 movaps %xmm4, %xmm7
1931 movaps 50(%rcx), %xmm5
1932 pminub %xmm2, %xmm6
1933 pminub %xmm5, %xmm7
1934 pminub %xmm6, %xmm7
1935 pcmpeqb %xmm0, %xmm7
1936 pmovmskb %xmm7, %rax
1937 movaps %xmm5, %xmm7
1938 palignr $14, %xmm4, %xmm5
1939 test %rax, %rax
1940 palignr $14, %xmm3, %xmm4
1941 jnz L(Shl14Start)
1942 # ifdef USE_AS_STRNCPY
1943 sub $64, %r8
1944 jbe L(StrncpyLeave14)
1945 # endif
1946 palignr $14, %xmm2, %xmm3
1947 lea 64(%rcx), %rcx
1948 palignr $14, %xmm1, %xmm2
1949 movaps %xmm7, %xmm1
1950 movaps %xmm5, 48(%rdx)
1951 movaps %xmm4, 32(%rdx)
1952 movaps %xmm3, 16(%rdx)
1953 movaps %xmm2, (%rdx)
1954 lea 64(%rdx), %rdx
1955 jmp L(Shl14LoopStart)
1956
1957 L(Shl14LoopExit):
1958 mov -2(%rcx), %r9d
1959 mov $2, %rsi
1960 mov %r9d, -2(%rdx)
1961 jmp L(CopyFrom1To16Bytes)
1962
1963 .p2align 4
1964 L(Shl15):
1965 movaps -15(%rcx), %xmm1
1966 movaps 1(%rcx), %xmm2
1967 L(Shl15Start):
1968 pcmpeqb %xmm2, %xmm0
1969 pmovmskb %xmm0, %rax
1970 movaps %xmm2, %xmm3
1971 # ifdef USE_AS_STRNCPY
1972 sub $16, %r8
1973 jbe L(StrncpyExit15Case2OrCase3)
1974 # endif
1975 test %rax, %rax
1976 jnz L(Shl15LoopExit)
1977
1978 palignr $15, %xmm1, %xmm2
1979 movaps %xmm2, (%rdx)
1980 movaps 17(%rcx), %xmm2
1981
1982 pcmpeqb %xmm2, %xmm0
1983 lea 16(%rdx), %rdx
1984 pmovmskb %xmm0, %rax
1985 lea 16(%rcx), %rcx
1986 movaps %xmm2, %xmm1
1987 # ifdef USE_AS_STRNCPY
1988 sub $16, %r8
1989 jbe L(StrncpyExit15Case2OrCase3)
1990 # endif
1991 test %rax, %rax
1992 jnz L(Shl15LoopExit)
1993
1994 palignr $15, %xmm3, %xmm2
1995 movaps %xmm2, (%rdx)
1996 movaps 17(%rcx), %xmm2
1997
1998 pcmpeqb %xmm2, %xmm0
1999 lea 16(%rdx), %rdx
2000 pmovmskb %xmm0, %rax
2001 lea 16(%rcx), %rcx
2002 movaps %xmm2, %xmm3
2003 # ifdef USE_AS_STRNCPY
2004 sub $16, %r8
2005 jbe L(StrncpyExit15Case2OrCase3)
2006 # endif
2007 test %rax, %rax
2008 jnz L(Shl15LoopExit)
2009
2010 palignr $15, %xmm1, %xmm2
2011 movaps %xmm2, (%rdx)
2012 movaps 17(%rcx), %xmm2
2013
2014 pcmpeqb %xmm2, %xmm0
2015 lea 16(%rdx), %rdx
2016 pmovmskb %xmm0, %rax
2017 lea 16(%rcx), %rcx
2018 # ifdef USE_AS_STRNCPY
2019 sub $16, %r8
2020 jbe L(StrncpyExit15Case2OrCase3)
2021 # endif
2022 test %rax, %rax
2023 jnz L(Shl15LoopExit)
2024
2025 palignr $15, %xmm3, %xmm2
2026 movaps %xmm2, (%rdx)
2027 lea 17(%rcx), %rcx
2028 lea 16(%rdx), %rdx
2029
2030 mov %rcx, %rax
2031 and $-0x40, %rcx
2032 sub %rcx, %rax
2033 lea -1(%rcx), %rcx
2034 sub %rax, %rdx
2035 # ifdef USE_AS_STRNCPY
2036 add %rax, %r8
2037 # endif
2038 movaps -15(%rcx), %xmm1
2039
2040 /* 64 bytes loop */
2041 .p2align 4
2042 L(Shl15LoopStart):
2043 movaps 1(%rcx), %xmm2
2044 movaps 17(%rcx), %xmm3
2045 movaps %xmm3, %xmm6
2046 movaps 33(%rcx), %xmm4
2047 movaps %xmm4, %xmm7
2048 movaps 49(%rcx), %xmm5
2049 pminub %xmm2, %xmm6
2050 pminub %xmm5, %xmm7
2051 pminub %xmm6, %xmm7
2052 pcmpeqb %xmm0, %xmm7
2053 pmovmskb %xmm7, %rax
2054 movaps %xmm5, %xmm7
2055 palignr $15, %xmm4, %xmm5
2056 test %rax, %rax
2057 palignr $15, %xmm3, %xmm4
2058 jnz L(Shl15Start)
2059 # ifdef USE_AS_STRNCPY
2060 sub $64, %r8
2061 jbe L(StrncpyLeave15)
2062 # endif
2063 palignr $15, %xmm2, %xmm3
2064 lea 64(%rcx), %rcx
2065 palignr $15, %xmm1, %xmm2
2066 movaps %xmm7, %xmm1
2067 movaps %xmm5, 48(%rdx)
2068 movaps %xmm4, 32(%rdx)
2069 movaps %xmm3, 16(%rdx)
2070 movaps %xmm2, (%rdx)
2071 lea 64(%rdx), %rdx
2072 jmp L(Shl15LoopStart)
2073
2074 L(Shl15LoopExit):
2075 mov -3(%rcx), %r9d
2076 mov $1, %rsi
2077 mov %r9d, -3(%rdx)
2078 # ifdef USE_AS_STRCAT
2079 jmp L(CopyFrom1To16Bytes)
2080 # endif
2081
2082 # ifndef USE_AS_STRCAT
2083
2084 .p2align 4
2085 L(CopyFrom1To16Bytes):
2086 # ifdef USE_AS_STRNCPY
2087 add $16, %r8
2088 # endif
2089 add %rsi, %rdx
2090 add %rsi, %rcx
2091
2092 test %al, %al
2093 jz L(ExitHigh)
2094 test $0x01, %al
2095 jnz L(Exit1)
2096 test $0x02, %al
2097 jnz L(Exit2)
2098 test $0x04, %al
2099 jnz L(Exit3)
2100 test $0x08, %al
2101 jnz L(Exit4)
2102 test $0x10, %al
2103 jnz L(Exit5)
2104 test $0x20, %al
2105 jnz L(Exit6)
2106 test $0x40, %al
2107 jnz L(Exit7)
2108
2109 .p2align 4
2110 L(Exit8):
2111 mov (%rcx), %rax
2112 mov %rax, (%rdx)
2113 # ifdef USE_AS_STPCPY
2114 lea 7(%rdx), %rax
2115 # else
2116 mov %rdi, %rax
2117 # endif
2118 # ifdef USE_AS_STRNCPY
2119 sub $8, %r8
2120 lea 8(%rdx), %rcx
2121 jnz L(StrncpyFillTailWithZero1)
2122 # ifdef USE_AS_STPCPY
2123 cmpb $1, (%rax)
2124 sbb $-1, %rax
2125 # endif
2126 # endif
2127 ret
2128
2129 .p2align 4
2130 L(ExitHigh):
2131 test $0x01, %ah
2132 jnz L(Exit9)
2133 test $0x02, %ah
2134 jnz L(Exit10)
2135 test $0x04, %ah
2136 jnz L(Exit11)
2137 test $0x08, %ah
2138 jnz L(Exit12)
2139 test $0x10, %ah
2140 jnz L(Exit13)
2141 test $0x20, %ah
2142 jnz L(Exit14)
2143 test $0x40, %ah
2144 jnz L(Exit15)
2145
2146 .p2align 4
2147 L(Exit16):
2148 mov (%rcx), %rax
2149 mov %rax, (%rdx)
2150 mov 8(%rcx), %rax
2151 mov %rax, 8(%rdx)
2152 # ifdef USE_AS_STPCPY
2153 lea 15(%rdx), %rax
2154 # else
2155 mov %rdi, %rax
2156 # endif
2157 # ifdef USE_AS_STRNCPY
2158 sub $16, %r8
2159 lea 16(%rdx), %rcx
2160 jnz L(StrncpyFillTailWithZero1)
2161 # ifdef USE_AS_STPCPY
2162 cmpb $1, (%rax)
2163 sbb $-1, %rax
2164 # endif
2165 # endif
2166 ret
2167
2168 # ifdef USE_AS_STRNCPY
2169
2170 .p2align 4
2171 L(CopyFrom1To16BytesCase2):
2172 add $16, %r8
2173 add %rsi, %rcx
2174 lea (%rsi, %rdx), %rsi
2175 lea -9(%r8), %rdx
2176 and $1<<7, %dh
2177 or %al, %dh
2178 test %dh, %dh
2179 lea (%rsi), %rdx
2180 jz L(ExitHighCase2)
2181
2182 cmp $1, %r8
2183 je L(Exit1)
2184 test $0x01, %al
2185 jnz L(Exit1)
2186 cmp $2, %r8
2187 je L(Exit2)
2188 test $0x02, %al
2189 jnz L(Exit2)
2190 cmp $3, %r8
2191 je L(Exit3)
2192 test $0x04, %al
2193 jnz L(Exit3)
2194 cmp $4, %r8
2195 je L(Exit4)
2196 test $0x08, %al
2197 jnz L(Exit4)
2198 cmp $5, %r8
2199 je L(Exit5)
2200 test $0x10, %al
2201 jnz L(Exit5)
2202 cmp $6, %r8
2203 je L(Exit6)
2204 test $0x20, %al
2205 jnz L(Exit6)
2206 cmp $7, %r8
2207 je L(Exit7)
2208 test $0x40, %al
2209 jnz L(Exit7)
2210 jmp L(Exit8)
2211
2212 .p2align 4
2213 L(ExitHighCase2):
2214 cmp $9, %r8
2215 je L(Exit9)
2216 test $0x01, %ah
2217 jnz L(Exit9)
2218 cmp $10, %r8
2219 je L(Exit10)
2220 test $0x02, %ah
2221 jnz L(Exit10)
2222 cmp $11, %r8
2223 je L(Exit11)
2224 test $0x04, %ah
2225 jnz L(Exit11)
2226 cmp $12, %r8
2227 je L(Exit12)
2228 test $0x8, %ah
2229 jnz L(Exit12)
2230 cmp $13, %r8
2231 je L(Exit13)
2232 test $0x10, %ah
2233 jnz L(Exit13)
2234 cmp $14, %r8
2235 je L(Exit14)
2236 test $0x20, %ah
2237 jnz L(Exit14)
2238 cmp $15, %r8
2239 je L(Exit15)
2240 test $0x40, %ah
2241 jnz L(Exit15)
2242 jmp L(Exit16)
2243
2244 L(CopyFrom1To16BytesCase2OrCase3):
2245 test %rax, %rax
2246 jnz L(CopyFrom1To16BytesCase2)
2247
2248 .p2align 4
2249 L(CopyFrom1To16BytesCase3):
2250 add $16, %r8
2251 add %rsi, %rdx
2252 add %rsi, %rcx
2253
2254 cmp $16, %r8
2255 je L(Exit16)
2256 cmp $8, %r8
2257 je L(Exit8)
2258 jg L(More8Case3)
2259 cmp $4, %r8
2260 je L(Exit4)
2261 jg L(More4Case3)
2262 cmp $2, %r8
2263 jl L(Exit1)
2264 je L(Exit2)
2265 jg L(Exit3)
2266 L(More8Case3): /* but less than 16 */
2267 cmp $12, %r8
2268 je L(Exit12)
2269 jl L(Less12Case3)
2270 cmp $14, %r8
2271 jl L(Exit13)
2272 je L(Exit14)
2273 jg L(Exit15)
2274 L(More4Case3): /* but less than 8 */
2275 cmp $6, %r8
2276 jl L(Exit5)
2277 je L(Exit6)
2278 jg L(Exit7)
2279 L(Less12Case3): /* but more than 8 */
2280 cmp $10, %r8
2281 jl L(Exit9)
2282 je L(Exit10)
2283 jg L(Exit11)
2284 # endif
2285
2286 .p2align 4
2287 L(Exit1):
2288 movb (%rcx), %al
2289 movb %al, (%rdx)
2290 # ifdef USE_AS_STPCPY
2291 lea (%rdx), %rax
2292 # else
2293 mov %rdi, %rax
2294 # endif
2295 # ifdef USE_AS_STRNCPY
2296 sub $1, %r8
2297 lea 1(%rdx), %rcx
2298 jnz L(StrncpyFillTailWithZero1)
2299 # ifdef USE_AS_STPCPY
2300 cmpb $1, (%rax)
2301 sbb $-1, %rax
2302 # endif
2303 # endif
2304 ret
2305
2306 .p2align 4
2307 L(Exit2):
2308 movw (%rcx), %ax
2309 movw %ax, (%rdx)
2310 # ifdef USE_AS_STPCPY
2311 lea 1(%rdx), %rax
2312 # else
2313 mov %rdi, %rax
2314 # endif
2315 # ifdef USE_AS_STRNCPY
2316 sub $2, %r8
2317 lea 2(%rdx), %rcx
2318 jnz L(StrncpyFillTailWithZero1)
2319 # ifdef USE_AS_STPCPY
2320 cmpb $1, (%rax)
2321 sbb $-1, %rax
2322 # endif
2323 # endif
2324 ret
2325
2326 .p2align 4
2327 L(Exit3):
2328 movw (%rcx), %ax
2329 movw %ax, (%rdx)
2330 movb 2(%rcx), %al
2331 movb %al, 2(%rdx)
2332 # ifdef USE_AS_STPCPY
2333 lea 2(%rdx), %rax
2334 # else
2335 mov %rdi, %rax
2336 # endif
2337 # ifdef USE_AS_STRNCPY
2338 sub $3, %r8
2339 lea 3(%rdx), %rcx
2340 jnz L(StrncpyFillTailWithZero1)
2341 # ifdef USE_AS_STPCPY
2342 cmpb $1, (%rax)
2343 sbb $-1, %rax
2344 # endif
2345 # endif
2346 ret
2347
2348 .p2align 4
2349 L(Exit4):
2350 movl (%rcx), %eax
2351 movl %eax, (%rdx)
2352 # ifdef USE_AS_STPCPY
2353 lea 3(%rdx), %rax
2354 # else
2355 mov %rdi, %rax
2356 # endif
2357 # ifdef USE_AS_STRNCPY
2358 sub $4, %r8
2359 lea 4(%rdx), %rcx
2360 jnz L(StrncpyFillTailWithZero1)
2361 # ifdef USE_AS_STPCPY
2362 cmpb $1, (%rax)
2363 sbb $-1, %rax
2364 # endif
2365 # endif
2366 ret
2367
2368 .p2align 4
2369 L(Exit5):
2370 movl (%rcx), %eax
2371 movl %eax, (%rdx)
2372 movb 4(%rcx), %al
2373 movb %al, 4(%rdx)
2374 # ifdef USE_AS_STPCPY
2375 lea 4(%rdx), %rax
2376 # else
2377 mov %rdi, %rax
2378 # endif
2379 # ifdef USE_AS_STRNCPY
2380 sub $5, %r8
2381 lea 5(%rdx), %rcx
2382 jnz L(StrncpyFillTailWithZero1)
2383 # ifdef USE_AS_STPCPY
2384 cmpb $1, (%rax)
2385 sbb $-1, %rax
2386 # endif
2387 # endif
2388 ret
2389
2390 .p2align 4
2391 L(Exit6):
2392 movl (%rcx), %eax
2393 movl %eax, (%rdx)
2394 movw 4(%rcx), %ax
2395 movw %ax, 4(%rdx)
2396 # ifdef USE_AS_STPCPY
2397 lea 5(%rdx), %rax
2398 # else
2399 mov %rdi, %rax
2400 # endif
2401 # ifdef USE_AS_STRNCPY
2402 sub $6, %r8
2403 lea 6(%rdx), %rcx
2404 jnz L(StrncpyFillTailWithZero1)
2405 # ifdef USE_AS_STPCPY
2406 cmpb $1, (%rax)
2407 sbb $-1, %rax
2408 # endif
2409 # endif
2410 ret
2411
2412 .p2align 4
2413 L(Exit7):
2414 movl (%rcx), %eax
2415 movl %eax, (%rdx)
2416 movl 3(%rcx), %eax
2417 movl %eax, 3(%rdx)
2418 # ifdef USE_AS_STPCPY
2419 lea 6(%rdx), %rax
2420 # else
2421 mov %rdi, %rax
2422 # endif
2423 # ifdef USE_AS_STRNCPY
2424 sub $7, %r8
2425 lea 7(%rdx), %rcx
2426 jnz L(StrncpyFillTailWithZero1)
2427 # ifdef USE_AS_STPCPY
2428 cmpb $1, (%rax)
2429 sbb $-1, %rax
2430 # endif
2431 # endif
2432 ret
2433
2434 .p2align 4
2435 L(Exit9):
2436 mov (%rcx), %rax
2437 mov %rax, (%rdx)
2438 mov 5(%rcx), %eax
2439 mov %eax, 5(%rdx)
2440 # ifdef USE_AS_STPCPY
2441 lea 8(%rdx), %rax
2442 # else
2443 mov %rdi, %rax
2444 # endif
2445 # ifdef USE_AS_STRNCPY
2446 sub $9, %r8
2447 lea 9(%rdx), %rcx
2448 jnz L(StrncpyFillTailWithZero1)
2449 # ifdef USE_AS_STPCPY
2450 cmpb $1, (%rax)
2451 sbb $-1, %rax
2452 # endif
2453 # endif
2454 ret
2455
2456 .p2align 4
2457 L(Exit10):
2458 mov (%rcx), %rax
2459 mov %rax, (%rdx)
2460 mov 6(%rcx), %eax
2461 mov %eax, 6(%rdx)
2462 # ifdef USE_AS_STPCPY
2463 lea 9(%rdx), %rax
2464 # else
2465 mov %rdi, %rax
2466 # endif
2467 # ifdef USE_AS_STRNCPY
2468 sub $10, %r8
2469 lea 10(%rdx), %rcx
2470 jnz L(StrncpyFillTailWithZero1)
2471 # ifdef USE_AS_STPCPY
2472 cmpb $1, (%rax)
2473 sbb $-1, %rax
2474 # endif
2475 # endif
2476 ret
2477
2478 .p2align 4
2479 L(Exit11):
2480 mov (%rcx), %rax
2481 mov %rax, (%rdx)
2482 mov 7(%rcx), %eax
2483 mov %eax, 7(%rdx)
2484 # ifdef USE_AS_STPCPY
2485 lea 10(%rdx), %rax
2486 # else
2487 mov %rdi, %rax
2488 # endif
2489 # ifdef USE_AS_STRNCPY
2490 sub $11, %r8
2491 lea 11(%rdx), %rcx
2492 jnz L(StrncpyFillTailWithZero1)
2493 # ifdef USE_AS_STPCPY
2494 cmpb $1, (%rax)
2495 sbb $-1, %rax
2496 # endif
2497 # endif
2498 ret
2499
2500 .p2align 4
2501 L(Exit12):
2502 mov (%rcx), %rax
2503 mov %rax, (%rdx)
2504 mov 8(%rcx), %eax
2505 mov %eax, 8(%rdx)
2506 # ifdef USE_AS_STPCPY
2507 lea 11(%rdx), %rax
2508 # else
2509 mov %rdi, %rax
2510 # endif
2511 # ifdef USE_AS_STRNCPY
2512 sub $12, %r8
2513 lea 12(%rdx), %rcx
2514 jnz L(StrncpyFillTailWithZero1)
2515 # ifdef USE_AS_STPCPY
2516 cmpb $1, (%rax)
2517 sbb $-1, %rax
2518 # endif
2519 # endif
2520 ret
2521
2522 .p2align 4
2523 L(Exit13):
2524 mov (%rcx), %rax
2525 mov %rax, (%rdx)
2526 mov 5(%rcx), %rax
2527 mov %rax, 5(%rdx)
2528 # ifdef USE_AS_STPCPY
2529 lea 12(%rdx), %rax
2530 # else
2531 mov %rdi, %rax
2532 # endif
2533 # ifdef USE_AS_STRNCPY
2534 sub $13, %r8
2535 lea 13(%rdx), %rcx
2536 jnz L(StrncpyFillTailWithZero1)
2537 # ifdef USE_AS_STPCPY
2538 cmpb $1, (%rax)
2539 sbb $-1, %rax
2540 # endif
2541 # endif
2542 ret
2543
2544 .p2align 4
2545 L(Exit14):
2546 mov (%rcx), %rax
2547 mov %rax, (%rdx)
2548 mov 6(%rcx), %rax
2549 mov %rax, 6(%rdx)
2550 # ifdef USE_AS_STPCPY
2551 lea 13(%rdx), %rax
2552 # else
2553 mov %rdi, %rax
2554 # endif
2555 # ifdef USE_AS_STRNCPY
2556 sub $14, %r8
2557 lea 14(%rdx), %rcx
2558 jnz L(StrncpyFillTailWithZero1)
2559 # ifdef USE_AS_STPCPY
2560 cmpb $1, (%rax)
2561 sbb $-1, %rax
2562 # endif
2563 # endif
2564 ret
2565
2566 .p2align 4
2567 L(Exit15):
2568 mov (%rcx), %rax
2569 mov %rax, (%rdx)
2570 mov 7(%rcx), %rax
2571 mov %rax, 7(%rdx)
2572 # ifdef USE_AS_STPCPY
2573 lea 14(%rdx), %rax
2574 # else
2575 mov %rdi, %rax
2576 # endif
2577 # ifdef USE_AS_STRNCPY
2578 sub $15, %r8
2579 lea 15(%rdx), %rcx
2580 jnz L(StrncpyFillTailWithZero1)
2581 # ifdef USE_AS_STPCPY
2582 cmpb $1, (%rax)
2583 sbb $-1, %rax
2584 # endif
2585 # endif
2586 ret
2587
2588 # ifdef USE_AS_STRNCPY
2589 .p2align 4
2590 L(Fill0):
2591 ret
2592
2593 .p2align 4
2594 L(Fill1):
2595 movb %dl, (%rcx)
2596 ret
2597
2598 .p2align 4
2599 L(Fill2):
2600 movw %dx, (%rcx)
2601 ret
2602
2603 .p2align 4
2604 L(Fill3):
2605 movw %dx, (%rcx)
2606 movb %dl, 2(%rcx)
2607 ret
2608
2609 .p2align 4
2610 L(Fill4):
2611 movl %edx, (%rcx)
2612 ret
2613
2614 .p2align 4
2615 L(Fill5):
2616 movl %edx, (%rcx)
2617 movb %dl, 4(%rcx)
2618 ret
2619
2620 .p2align 4
2621 L(Fill6):
2622 movl %edx, (%rcx)
2623 movw %dx, 4(%rcx)
2624 ret
2625
2626 .p2align 4
2627 L(Fill7):
2628 movl %edx, (%rcx)
2629 movl %edx, 3(%rcx)
2630 ret
2631
2632 .p2align 4
2633 L(Fill8):
2634 mov %rdx, (%rcx)
2635 ret
2636
2637 .p2align 4
2638 L(Fill9):
2639 mov %rdx, (%rcx)
2640 movb %dl, 8(%rcx)
2641 ret
2642
2643 .p2align 4
2644 L(Fill10):
2645 mov %rdx, (%rcx)
2646 movw %dx, 8(%rcx)
2647 ret
2648
2649 .p2align 4
2650 L(Fill11):
2651 mov %rdx, (%rcx)
2652 movl %edx, 7(%rcx)
2653 ret
2654
2655 .p2align 4
2656 L(Fill12):
2657 mov %rdx, (%rcx)
2658 movl %edx, 8(%rcx)
2659 ret
2660
2661 .p2align 4
2662 L(Fill13):
2663 mov %rdx, (%rcx)
2664 mov %rdx, 5(%rcx)
2665 ret
2666
2667 .p2align 4
2668 L(Fill14):
2669 mov %rdx, (%rcx)
2670 mov %rdx, 6(%rcx)
2671 ret
2672
2673 .p2align 4
2674 L(Fill15):
2675 mov %rdx, (%rcx)
2676 mov %rdx, 7(%rcx)
2677 ret
2678
2679 .p2align 4
2680 L(Fill16):
2681 mov %rdx, (%rcx)
2682 mov %rdx, 8(%rcx)
2683 ret
2684
2685 .p2align 4
2686 L(StrncpyFillExit1):
2687 lea 16(%r8), %r8
2688 L(FillFrom1To16Bytes):
2689 test %r8, %r8
2690 jz L(Fill0)
2691 cmp $16, %r8
2692 je L(Fill16)
2693 cmp $8, %r8
2694 je L(Fill8)
2695 jg L(FillMore8)
2696 cmp $4, %r8
2697 je L(Fill4)
2698 jg L(FillMore4)
2699 cmp $2, %r8
2700 jl L(Fill1)
2701 je L(Fill2)
2702 jg L(Fill3)
2703 L(FillMore8): /* but less than 16 */
2704 cmp $12, %r8
2705 je L(Fill12)
2706 jl L(FillLess12)
2707 cmp $14, %r8
2708 jl L(Fill13)
2709 je L(Fill14)
2710 jg L(Fill15)
2711 L(FillMore4): /* but less than 8 */
2712 cmp $6, %r8
2713 jl L(Fill5)
2714 je L(Fill6)
2715 jg L(Fill7)
2716 L(FillLess12): /* but more than 8 */
2717 cmp $10, %r8
2718 jl L(Fill9)
2719 je L(Fill10)
2720 jmp L(Fill11)
2721
2722 .p2align 4
2723 L(StrncpyFillTailWithZero1):
2724 xor %rdx, %rdx
2725 sub $16, %r8
2726 jbe L(StrncpyFillExit1)
2727
2728 pxor %xmm0, %xmm0
2729 mov %rdx, (%rcx)
2730 mov %rdx, 8(%rcx)
2731
2732 lea 16(%rcx), %rcx
2733
2734 mov %rcx, %rdx
2735 and $0xf, %rdx
2736 sub %rdx, %rcx
2737 add %rdx, %r8
2738 xor %rdx, %rdx
2739 sub $64, %r8
2740 jb L(StrncpyFillLess64)
2741
2742 L(StrncpyFillLoopMovdqa):
2743 movdqa %xmm0, (%rcx)
2744 movdqa %xmm0, 16(%rcx)
2745 movdqa %xmm0, 32(%rcx)
2746 movdqa %xmm0, 48(%rcx)
2747 lea 64(%rcx), %rcx
2748 sub $64, %r8
2749 jae L(StrncpyFillLoopMovdqa)
2750
2751 L(StrncpyFillLess64):
2752 add $32, %r8
2753 jl L(StrncpyFillLess32)
2754 movdqa %xmm0, (%rcx)
2755 movdqa %xmm0, 16(%rcx)
2756 lea 32(%rcx), %rcx
2757 sub $16, %r8
2758 jl L(StrncpyFillExit1)
2759 movdqa %xmm0, (%rcx)
2760 lea 16(%rcx), %rcx
2761 jmp L(FillFrom1To16Bytes)
2762
2763 L(StrncpyFillLess32):
2764 add $16, %r8
2765 jl L(StrncpyFillExit1)
2766 movdqa %xmm0, (%rcx)
2767 lea 16(%rcx), %rcx
2768 jmp L(FillFrom1To16Bytes)
2769
2770 .p2align 4
2771 L(Exit0):
2772 mov %rdx, %rax
2773 ret
2774
2775 .p2align 4
2776 L(StrncpyExit15Bytes):
2777 cmp $9, %r8
2778 je L(Exit9)
2779 cmpb $0, 8(%rcx)
2780 jz L(Exit9)
2781 cmp $10, %r8
2782 je L(Exit10)
2783 cmpb $0, 9(%rcx)
2784 jz L(Exit10)
2785 cmp $11, %r8
2786 je L(Exit11)
2787 cmpb $0, 10(%rcx)
2788 jz L(Exit11)
2789 cmp $12, %r8
2790 je L(Exit12)
2791 cmpb $0, 11(%rcx)
2792 jz L(Exit12)
2793 cmp $13, %r8
2794 je L(Exit13)
2795 cmpb $0, 12(%rcx)
2796 jz L(Exit13)
2797 cmp $14, %r8
2798 je L(Exit14)
2799 cmpb $0, 13(%rcx)
2800 jz L(Exit14)
2801 mov (%rcx), %rax
2802 mov %rax, (%rdx)
2803 mov 7(%rcx), %rax
2804 mov %rax, 7(%rdx)
2805 # ifdef USE_AS_STPCPY
2806 lea 14(%rdx), %rax
2807 cmpb $1, (%rax)
2808 sbb $-1, %rax
2809 # else
2810 mov %rdi, %rax
2811 # endif
2812 ret
2813
2814 .p2align 4
2815 L(StrncpyExit8Bytes):
2816 cmp $1, %r8
2817 je L(Exit1)
2818 cmpb $0, (%rcx)
2819 jz L(Exit1)
2820 cmp $2, %r8
2821 je L(Exit2)
2822 cmpb $0, 1(%rcx)
2823 jz L(Exit2)
2824 cmp $3, %r8
2825 je L(Exit3)
2826 cmpb $0, 2(%rcx)
2827 jz L(Exit3)
2828 cmp $4, %r8
2829 je L(Exit4)
2830 cmpb $0, 3(%rcx)
2831 jz L(Exit4)
2832 cmp $5, %r8
2833 je L(Exit5)
2834 cmpb $0, 4(%rcx)
2835 jz L(Exit5)
2836 cmp $6, %r8
2837 je L(Exit6)
2838 cmpb $0, 5(%rcx)
2839 jz L(Exit6)
2840 cmp $7, %r8
2841 je L(Exit7)
2842 cmpb $0, 6(%rcx)
2843 jz L(Exit7)
2844 mov (%rcx), %rax
2845 mov %rax, (%rdx)
2846 # ifdef USE_AS_STPCPY
2847 lea 7(%rdx), %rax
2848 cmpb $1, (%rax)
2849 sbb $-1, %rax
2850 # else
2851 mov %rdi, %rax
2852 # endif
2853 ret
2854
2855 # endif
2856 # endif
2857
2858 # ifdef USE_AS_STRNCPY
2859 .p2align 4
2860 L(StrncpyLeaveCase2OrCase3):
2861 test %rax, %rax
2862 jnz L(Aligned64LeaveCase2)
2863
2864 L(Aligned64LeaveCase3):
2865 lea 64(%r8), %r8
2866 sub $16, %r8
2867 jbe L(CopyFrom1To16BytesCase3)
2868 movaps %xmm4, -64(%rdx)
2869 lea 16(%rsi), %rsi
2870 sub $16, %r8
2871 jbe L(CopyFrom1To16BytesCase3)
2872 movaps %xmm5, -48(%rdx)
2873 lea 16(%rsi), %rsi
2874 sub $16, %r8
2875 jbe L(CopyFrom1To16BytesCase3)
2876 movaps %xmm6, -32(%rdx)
2877 lea 16(%rsi), %rsi
2878 lea -16(%r8), %r8
2879 jmp L(CopyFrom1To16BytesCase3)
2880
2881 L(Aligned64LeaveCase2):
2882 pcmpeqb %xmm4, %xmm0
2883 pmovmskb %xmm0, %rax
2884 add $48, %r8
2885 jle L(CopyFrom1To16BytesCase2OrCase3)
2886 test %rax, %rax
2887 jnz L(CopyFrom1To16Bytes)
2888
2889 pcmpeqb %xmm5, %xmm0
2890 pmovmskb %xmm0, %rax
2891 movaps %xmm4, -64(%rdx)
2892 lea 16(%rsi), %rsi
2893 sub $16, %r8
2894 jbe L(CopyFrom1To16BytesCase2OrCase3)
2895 test %rax, %rax
2896 jnz L(CopyFrom1To16Bytes)
2897
2898 pcmpeqb %xmm6, %xmm0
2899 pmovmskb %xmm0, %rax
2900 movaps %xmm5, -48(%rdx)
2901 lea 16(%rsi), %rsi
2902 sub $16, %r8
2903 jbe L(CopyFrom1To16BytesCase2OrCase3)
2904 test %rax, %rax
2905 jnz L(CopyFrom1To16Bytes)
2906
2907 pcmpeqb %xmm7, %xmm0
2908 pmovmskb %xmm0, %rax
2909 movaps %xmm6, -32(%rdx)
2910 lea 16(%rsi), %rsi
2911 lea -16(%r8), %r8
2912 jmp L(CopyFrom1To16BytesCase2)
2913 /*--------------------------------------------------*/
2914 .p2align 4
2915 L(StrncpyExit1Case2OrCase3):
2916 movdqu -1(%rcx), %xmm0
2917 movdqu %xmm0, -1(%rdx)
2918 mov $15, %rsi
2919 test %rax, %rax
2920 jnz L(CopyFrom1To16BytesCase2)
2921 jmp L(CopyFrom1To16BytesCase3)
2922
2923 .p2align 4
2924 L(StrncpyExit2Case2OrCase3):
2925 movdqu -2(%rcx), %xmm0
2926 movdqu %xmm0, -2(%rdx)
2927 mov $14, %rsi
2928 test %rax, %rax
2929 jnz L(CopyFrom1To16BytesCase2)
2930 jmp L(CopyFrom1To16BytesCase3)
2931
2932 .p2align 4
2933 L(StrncpyExit3Case2OrCase3):
2934 movdqu -3(%rcx), %xmm0
2935 movdqu %xmm0, -3(%rdx)
2936 mov $13, %rsi
2937 test %rax, %rax
2938 jnz L(CopyFrom1To16BytesCase2)
2939 jmp L(CopyFrom1To16BytesCase3)
2940
2941 .p2align 4
2942 L(StrncpyExit4Case2OrCase3):
2943 movdqu -4(%rcx), %xmm0
2944 movdqu %xmm0, -4(%rdx)
2945 mov $12, %rsi
2946 test %rax, %rax
2947 jnz L(CopyFrom1To16BytesCase2)
2948 jmp L(CopyFrom1To16BytesCase3)
2949
2950 .p2align 4
2951 L(StrncpyExit5Case2OrCase3):
2952 movdqu -5(%rcx), %xmm0
2953 movdqu %xmm0, -5(%rdx)
2954 mov $11, %rsi
2955 test %rax, %rax
2956 jnz L(CopyFrom1To16BytesCase2)
2957 jmp L(CopyFrom1To16BytesCase3)
2958
2959 .p2align 4
2960 L(StrncpyExit6Case2OrCase3):
2961 mov (%rcx), %rsi
2962 mov 6(%rcx), %r9d
2963 mov %r9d, 6(%rdx)
2964 mov %rsi, (%rdx)
2965 test %rax, %rax
2966 mov $10, %rsi
2967 jnz L(CopyFrom1To16BytesCase2)
2968 jmp L(CopyFrom1To16BytesCase3)
2969
2970 .p2align 4
2971 L(StrncpyExit7Case2OrCase3):
2972 mov (%rcx), %rsi
2973 mov 5(%rcx), %r9d
2974 mov %r9d, 5(%rdx)
2975 mov %rsi, (%rdx)
2976 test %rax, %rax
2977 mov $9, %rsi
2978 jnz L(CopyFrom1To16BytesCase2)
2979 jmp L(CopyFrom1To16BytesCase3)
2980
2981 .p2align 4
2982 L(StrncpyExit8Case2OrCase3):
2983 mov (%rcx), %r9
2984 mov $8, %rsi
2985 mov %r9, (%rdx)
2986 test %rax, %rax
2987 jnz L(CopyFrom1To16BytesCase2)
2988 jmp L(CopyFrom1To16BytesCase3)
2989
2990 .p2align 4
2991 L(StrncpyExit9Case2OrCase3):
2992 mov -1(%rcx), %r9
2993 mov $7, %rsi
2994 mov %r9, -1(%rdx)
2995 test %rax, %rax
2996 jnz L(CopyFrom1To16BytesCase2)
2997 jmp L(CopyFrom1To16BytesCase3)
2998
2999 .p2align 4
3000 L(StrncpyExit10Case2OrCase3):
3001 mov -2(%rcx), %r9
3002 mov $6, %rsi
3003 mov %r9, -2(%rdx)
3004 test %rax, %rax
3005 jnz L(CopyFrom1To16BytesCase2)
3006 jmp L(CopyFrom1To16BytesCase3)
3007
3008 .p2align 4
3009 L(StrncpyExit11Case2OrCase3):
3010 mov -3(%rcx), %r9
3011 mov $5, %rsi
3012 mov %r9, -3(%rdx)
3013 test %rax, %rax
3014 jnz L(CopyFrom1To16BytesCase2)
3015 jmp L(CopyFrom1To16BytesCase3)
3016
3017 .p2align 4
3018 L(StrncpyExit12Case2OrCase3):
3019 mov (%rcx), %r9d
3020 mov $4, %rsi
3021 mov %r9d, (%rdx)
3022 test %rax, %rax
3023 jnz L(CopyFrom1To16BytesCase2)
3024 jmp L(CopyFrom1To16BytesCase3)
3025
3026 .p2align 4
3027 L(StrncpyExit13Case2OrCase3):
3028 mov -1(%rcx), %r9d
3029 mov $3, %rsi
3030 mov %r9d, -1(%rdx)
3031 test %rax, %rax
3032 jnz L(CopyFrom1To16BytesCase2)
3033 jmp L(CopyFrom1To16BytesCase3)
3034
3035 .p2align 4
3036 L(StrncpyExit14Case2OrCase3):
3037 mov -2(%rcx), %r9d
3038 mov $2, %rsi
3039 mov %r9d, -2(%rdx)
3040 test %rax, %rax
3041 jnz L(CopyFrom1To16BytesCase2)
3042 jmp L(CopyFrom1To16BytesCase3)
3043
3044 .p2align 4
3045 L(StrncpyExit15Case2OrCase3):
3046 mov -3(%rcx), %r9d
3047 mov $1, %rsi
3048 mov %r9d, -3(%rdx)
3049 test %rax, %rax
3050 jnz L(CopyFrom1To16BytesCase2)
3051 jmp L(CopyFrom1To16BytesCase3)
3052
3053 .p2align 4
3054 L(StrncpyLeave1):
3055 movaps %xmm2, %xmm3
3056 add $48, %r8
3057 jle L(StrncpyExit1)
3058 palignr $1, %xmm1, %xmm2
3059 movaps %xmm2, (%rdx)
3060 movaps 31(%rcx), %xmm2
3061 lea 16(%rsi), %rsi
3062 sub $16, %r8
3063 jbe L(StrncpyExit1)
3064 palignr $1, %xmm3, %xmm2
3065 movaps %xmm2, 16(%rdx)
3066 lea 16(%rsi), %rsi
3067 sub $16, %r8
3068 jbe L(StrncpyExit1)
3069 movaps %xmm4, 32(%rdx)
3070 lea 16(%rsi), %rsi
3071 sub $16, %r8
3072 jbe L(StrncpyExit1)
3073 movaps %xmm5, 48(%rdx)
3074 lea 16(%rsi), %rsi
3075 lea -16(%r8), %r8
3076
3077 L(StrncpyExit1):
3078 lea 15(%rdx, %rsi), %rdx
3079 lea 15(%rcx, %rsi), %rcx
3080 mov -15(%rcx), %rsi
3081 mov -8(%rcx), %rax
3082 mov %rsi, -15(%rdx)
3083 mov %rax, -8(%rdx)
3084 xor %rsi, %rsi
3085 jmp L(CopyFrom1To16BytesCase3)
3086
3087 .p2align 4
3088 L(StrncpyLeave2):
3089 movaps %xmm2, %xmm3
3090 add $48, %r8
3091 jle L(StrncpyExit2)
3092 palignr $2, %xmm1, %xmm2
3093 movaps %xmm2, (%rdx)
3094 movaps 30(%rcx), %xmm2
3095 lea 16(%rsi), %rsi
3096 sub $16, %r8
3097 jbe L(StrncpyExit2)
3098 palignr $2, %xmm3, %xmm2
3099 movaps %xmm2, 16(%rdx)
3100 lea 16(%rsi), %rsi
3101 sub $16, %r8
3102 jbe L(StrncpyExit2)
3103 movaps %xmm4, 32(%rdx)
3104 lea 16(%rsi), %rsi
3105 sub $16, %r8
3106 jbe L(StrncpyExit2)
3107 movaps %xmm5, 48(%rdx)
3108 lea 16(%rsi), %rsi
3109 lea -16(%r8), %r8
3110
3111 L(StrncpyExit2):
3112 lea 14(%rdx, %rsi), %rdx
3113 lea 14(%rcx, %rsi), %rcx
3114 mov -14(%rcx), %rsi
3115 mov -8(%rcx), %rax
3116 mov %rsi, -14(%rdx)
3117 mov %rax, -8(%rdx)
3118 xor %rsi, %rsi
3119 jmp L(CopyFrom1To16BytesCase3)
3120
3121 .p2align 4
3122 L(StrncpyLeave3):
3123 movaps %xmm2, %xmm3
3124 add $48, %r8
3125 jle L(StrncpyExit3)
3126 palignr $3, %xmm1, %xmm2
3127 movaps %xmm2, (%rdx)
3128 movaps 29(%rcx), %xmm2
3129 lea 16(%rsi), %rsi
3130 sub $16, %r8
3131 jbe L(StrncpyExit3)
3132 palignr $3, %xmm3, %xmm2
3133 movaps %xmm2, 16(%rdx)
3134 lea 16(%rsi), %rsi
3135 sub $16, %r8
3136 jbe L(StrncpyExit3)
3137 movaps %xmm4, 32(%rdx)
3138 lea 16(%rsi), %rsi
3139 sub $16, %r8
3140 jbe L(StrncpyExit3)
3141 movaps %xmm5, 48(%rdx)
3142 lea 16(%rsi), %rsi
3143 lea -16(%r8), %r8
3144
3145 L(StrncpyExit3):
3146 lea 13(%rdx, %rsi), %rdx
3147 lea 13(%rcx, %rsi), %rcx
3148 mov -13(%rcx), %rsi
3149 mov -8(%rcx), %rax
3150 mov %rsi, -13(%rdx)
3151 mov %rax, -8(%rdx)
3152 xor %rsi, %rsi
3153 jmp L(CopyFrom1To16BytesCase3)
3154
3155 .p2align 4
3156 L(StrncpyLeave4):
3157 movaps %xmm2, %xmm3
3158 add $48, %r8
3159 jle L(StrncpyExit4)
3160 palignr $4, %xmm1, %xmm2
3161 movaps %xmm2, (%rdx)
3162 movaps 28(%rcx), %xmm2
3163 lea 16(%rsi), %rsi
3164 sub $16, %r8
3165 jbe L(StrncpyExit4)
3166 palignr $4, %xmm3, %xmm2
3167 movaps %xmm2, 16(%rdx)
3168 lea 16(%rsi), %rsi
3169 sub $16, %r8
3170 jbe L(StrncpyExit4)
3171 movaps %xmm4, 32(%rdx)
3172 lea 16(%rsi), %rsi
3173 sub $16, %r8
3174 jbe L(StrncpyExit4)
3175 movaps %xmm5, 48(%rdx)
3176 lea 16(%rsi), %rsi
3177 lea -16(%r8), %r8
3178
3179 L(StrncpyExit4):
3180 lea 12(%rdx, %rsi), %rdx
3181 lea 12(%rcx, %rsi), %rcx
3182 mov -12(%rcx), %rsi
3183 mov -4(%rcx), %eax
3184 mov %rsi, -12(%rdx)
3185 mov %eax, -4(%rdx)
3186 xor %rsi, %rsi
3187 jmp L(CopyFrom1To16BytesCase3)
3188
3189 .p2align 4
3190 L(StrncpyLeave5):
3191 movaps %xmm2, %xmm3
3192 add $48, %r8
3193 jle L(StrncpyExit5)
3194 palignr $5, %xmm1, %xmm2
3195 movaps %xmm2, (%rdx)
3196 movaps 27(%rcx), %xmm2
3197 lea 16(%rsi), %rsi
3198 sub $16, %r8
3199 jbe L(StrncpyExit5)
3200 palignr $5, %xmm3, %xmm2
3201 movaps %xmm2, 16(%rdx)
3202 lea 16(%rsi), %rsi
3203 sub $16, %r8
3204 jbe L(StrncpyExit5)
3205 movaps %xmm4, 32(%rdx)
3206 lea 16(%rsi), %rsi
3207 sub $16, %r8
3208 jbe L(StrncpyExit5)
3209 movaps %xmm5, 48(%rdx)
3210 lea 16(%rsi), %rsi
3211 lea -16(%r8), %r8
3212
3213 L(StrncpyExit5):
3214 lea 11(%rdx, %rsi), %rdx
3215 lea 11(%rcx, %rsi), %rcx
3216 mov -11(%rcx), %rsi
3217 mov -4(%rcx), %eax
3218 mov %rsi, -11(%rdx)
3219 mov %eax, -4(%rdx)
3220 xor %rsi, %rsi
3221 jmp L(CopyFrom1To16BytesCase3)
3222
3223 .p2align 4
3224 L(StrncpyLeave6):
3225 movaps %xmm2, %xmm3
3226 add $48, %r8
3227 jle L(StrncpyExit6)
3228 palignr $6, %xmm1, %xmm2
3229 movaps %xmm2, (%rdx)
3230 movaps 26(%rcx), %xmm2
3231 lea 16(%rsi), %rsi
3232 sub $16, %r8
3233 jbe L(StrncpyExit6)
3234 palignr $6, %xmm3, %xmm2
3235 movaps %xmm2, 16(%rdx)
3236 lea 16(%rsi), %rsi
3237 sub $16, %r8
3238 jbe L(StrncpyExit6)
3239 movaps %xmm4, 32(%rdx)
3240 lea 16(%rsi), %rsi
3241 sub $16, %r8
3242 jbe L(StrncpyExit6)
3243 movaps %xmm5, 48(%rdx)
3244 lea 16(%rsi), %rsi
3245 lea -16(%r8), %r8
3246
3247 L(StrncpyExit6):
3248 lea 10(%rdx, %rsi), %rdx
3249 lea 10(%rcx, %rsi), %rcx
3250 mov -10(%rcx), %rsi
3251 movw -2(%rcx), %ax
3252 mov %rsi, -10(%rdx)
3253 movw %ax, -2(%rdx)
3254 xor %rsi, %rsi
3255 jmp L(CopyFrom1To16BytesCase3)
3256
3257 .p2align 4
3258 L(StrncpyLeave7):
3259 movaps %xmm2, %xmm3
3260 add $48, %r8
3261 jle L(StrncpyExit7)
3262 palignr $7, %xmm1, %xmm2
3263 movaps %xmm2, (%rdx)
3264 movaps 25(%rcx), %xmm2
3265 lea 16(%rsi), %rsi
3266 sub $16, %r8
3267 jbe L(StrncpyExit7)
3268 palignr $7, %xmm3, %xmm2
3269 movaps %xmm2, 16(%rdx)
3270 lea 16(%rsi), %rsi
3271 sub $16, %r8
3272 jbe L(StrncpyExit7)
3273 movaps %xmm4, 32(%rdx)
3274 lea 16(%rsi), %rsi
3275 sub $16, %r8
3276 jbe L(StrncpyExit7)
3277 movaps %xmm5, 48(%rdx)
3278 lea 16(%rsi), %rsi
3279 lea -16(%r8), %r8
3280
3281 L(StrncpyExit7):
3282 lea 9(%rdx, %rsi), %rdx
3283 lea 9(%rcx, %rsi), %rcx
3284 mov -9(%rcx), %rsi
3285 movb -1(%rcx), %ah
3286 mov %rsi, -9(%rdx)
3287 movb %ah, -1(%rdx)
3288 xor %rsi, %rsi
3289 jmp L(CopyFrom1To16BytesCase3)
3290
3291 .p2align 4
3292 L(StrncpyLeave8):
3293 movaps %xmm2, %xmm3
3294 add $48, %r8
3295 jle L(StrncpyExit8)
3296 palignr $8, %xmm1, %xmm2
3297 movaps %xmm2, (%rdx)
3298 movaps 24(%rcx), %xmm2
3299 lea 16(%rsi), %rsi
3300 sub $16, %r8
3301 jbe L(StrncpyExit8)
3302 palignr $8, %xmm3, %xmm2
3303 movaps %xmm2, 16(%rdx)
3304 lea 16(%rsi), %rsi
3305 sub $16, %r8
3306 jbe L(StrncpyExit8)
3307 movaps %xmm4, 32(%rdx)
3308 lea 16(%rsi), %rsi
3309 sub $16, %r8
3310 jbe L(StrncpyExit8)
3311 movaps %xmm5, 48(%rdx)
3312 lea 16(%rsi), %rsi
3313 lea -16(%r8), %r8
3314
3315 L(StrncpyExit8):
3316 lea 8(%rdx, %rsi), %rdx
3317 lea 8(%rcx, %rsi), %rcx
3318 mov -8(%rcx), %rax
3319 xor %rsi, %rsi
3320 mov %rax, -8(%rdx)
3321 jmp L(CopyFrom1To16BytesCase3)
3322
3323 .p2align 4
3324 L(StrncpyLeave9):
3325 movaps %xmm2, %xmm3
3326 add $48, %r8
3327 jle L(StrncpyExit9)
3328 palignr $9, %xmm1, %xmm2
3329 movaps %xmm2, (%rdx)
3330 movaps 23(%rcx), %xmm2
3331 lea 16(%rsi), %rsi
3332 sub $16, %r8
3333 jbe L(StrncpyExit9)
3334 palignr $9, %xmm3, %xmm2
3335 movaps %xmm2, 16(%rdx)
3336 lea 16(%rsi), %rsi
3337 sub $16, %r8
3338 jbe L(StrncpyExit9)
3339 movaps %xmm4, 32(%rdx)
3340 lea 16(%rsi), %rsi
3341 sub $16, %r8
3342 jbe L(StrncpyExit9)
3343 movaps %xmm5, 48(%rdx)
3344 lea 16(%rsi), %rsi
3345 lea -16(%r8), %r8
3346
3347 L(StrncpyExit9):
3348 lea 7(%rdx, %rsi), %rdx
3349 lea 7(%rcx, %rsi), %rcx
3350 mov -8(%rcx), %rax
3351 xor %rsi, %rsi
3352 mov %rax, -8(%rdx)
3353 jmp L(CopyFrom1To16BytesCase3)
3354
3355 .p2align 4
3356 L(StrncpyLeave10):
3357 movaps %xmm2, %xmm3
3358 add $48, %r8
3359 jle L(StrncpyExit10)
3360 palignr $10, %xmm1, %xmm2
3361 movaps %xmm2, (%rdx)
3362 movaps 22(%rcx), %xmm2
3363 lea 16(%rsi), %rsi
3364 sub $16, %r8
3365 jbe L(StrncpyExit10)
3366 palignr $10, %xmm3, %xmm2
3367 movaps %xmm2, 16(%rdx)
3368 lea 16(%rsi), %rsi
3369 sub $16, %r8
3370 jbe L(StrncpyExit10)
3371 movaps %xmm4, 32(%rdx)
3372 lea 16(%rsi), %rsi
3373 sub $16, %r8
3374 jbe L(StrncpyExit10)
3375 movaps %xmm5, 48(%rdx)
3376 lea 16(%rsi), %rsi
3377 lea -16(%r8), %r8
3378
3379 L(StrncpyExit10):
3380 lea 6(%rdx, %rsi), %rdx
3381 lea 6(%rcx, %rsi), %rcx
3382 mov -8(%rcx), %rax
3383 xor %rsi, %rsi
3384 mov %rax, -8(%rdx)
3385 jmp L(CopyFrom1To16BytesCase3)
3386
3387 .p2align 4
3388 L(StrncpyLeave11):
3389 movaps %xmm2, %xmm3
3390 add $48, %r8
3391 jle L(StrncpyExit11)
3392 palignr $11, %xmm1, %xmm2
3393 movaps %xmm2, (%rdx)
3394 movaps 21(%rcx), %xmm2
3395 lea 16(%rsi), %rsi
3396 sub $16, %r8
3397 jbe L(StrncpyExit11)
3398 palignr $11, %xmm3, %xmm2
3399 movaps %xmm2, 16(%rdx)
3400 lea 16(%rsi), %rsi
3401 sub $16, %r8
3402 jbe L(StrncpyExit11)
3403 movaps %xmm4, 32(%rdx)
3404 lea 16(%rsi), %rsi
3405 sub $16, %r8
3406 jbe L(StrncpyExit11)
3407 movaps %xmm5, 48(%rdx)
3408 lea 16(%rsi), %rsi
3409 lea -16(%r8), %r8
3410
3411 L(StrncpyExit11):
3412 lea 5(%rdx, %rsi), %rdx
3413 lea 5(%rcx, %rsi), %rcx
3414 mov -8(%rcx), %rax
3415 xor %rsi, %rsi
3416 mov %rax, -8(%rdx)
3417 jmp L(CopyFrom1To16BytesCase3)
3418
3419 .p2align 4
3420 L(StrncpyLeave12):
3421 movaps %xmm2, %xmm3
3422 add $48, %r8
3423 jle L(StrncpyExit12)
3424 palignr $12, %xmm1, %xmm2
3425 movaps %xmm2, (%rdx)
3426 movaps 20(%rcx), %xmm2
3427 lea 16(%rsi), %rsi
3428 sub $16, %r8
3429 jbe L(StrncpyExit12)
3430 palignr $12, %xmm3, %xmm2
3431 movaps %xmm2, 16(%rdx)
3432 lea 16(%rsi), %rsi
3433 sub $16, %r8
3434 jbe L(StrncpyExit12)
3435 movaps %xmm4, 32(%rdx)
3436 lea 16(%rsi), %rsi
3437 sub $16, %r8
3438 jbe L(StrncpyExit12)
3439 movaps %xmm5, 48(%rdx)
3440 lea 16(%rsi), %rsi
3441 lea -16(%r8), %r8
3442
3443 L(StrncpyExit12):
3444 lea 4(%rdx, %rsi), %rdx
3445 lea 4(%rcx, %rsi), %rcx
3446 mov -4(%rcx), %eax
3447 xor %rsi, %rsi
3448 mov %eax, -4(%rdx)
3449 jmp L(CopyFrom1To16BytesCase3)
3450
3451 .p2align 4
3452 L(StrncpyLeave13):
3453 movaps %xmm2, %xmm3
3454 add $48, %r8
3455 jle L(StrncpyExit13)
3456 palignr $13, %xmm1, %xmm2
3457 movaps %xmm2, (%rdx)
3458 movaps 19(%rcx), %xmm2
3459 lea 16(%rsi), %rsi
3460 sub $16, %r8
3461 jbe L(StrncpyExit13)
3462 palignr $13, %xmm3, %xmm2
3463 movaps %xmm2, 16(%rdx)
3464 lea 16(%rsi), %rsi
3465 sub $16, %r8
3466 jbe L(StrncpyExit13)
3467 movaps %xmm4, 32(%rdx)
3468 lea 16(%rsi), %rsi
3469 sub $16, %r8
3470 jbe L(StrncpyExit13)
3471 movaps %xmm5, 48(%rdx)
3472 lea 16(%rsi), %rsi
3473 lea -16(%r8), %r8
3474
3475 L(StrncpyExit13):
3476 lea 3(%rdx, %rsi), %rdx
3477 lea 3(%rcx, %rsi), %rcx
3478 mov -4(%rcx), %eax
3479 xor %rsi, %rsi
3480 mov %eax, -4(%rdx)
3481 jmp L(CopyFrom1To16BytesCase3)
3482
3483 .p2align 4
3484 L(StrncpyLeave14):
3485 movaps %xmm2, %xmm3
3486 add $48, %r8
3487 jle L(StrncpyExit14)
3488 palignr $14, %xmm1, %xmm2
3489 movaps %xmm2, (%rdx)
3490 movaps 18(%rcx), %xmm2
3491 lea 16(%rsi), %rsi
3492 sub $16, %r8
3493 jbe L(StrncpyExit14)
3494 palignr $14, %xmm3, %xmm2
3495 movaps %xmm2, 16(%rdx)
3496 lea 16(%rsi), %rsi
3497 sub $16, %r8
3498 jbe L(StrncpyExit14)
3499 movaps %xmm4, 32(%rdx)
3500 lea 16(%rsi), %rsi
3501 sub $16, %r8
3502 jbe L(StrncpyExit14)
3503 movaps %xmm5, 48(%rdx)
3504 lea 16(%rsi), %rsi
3505 lea -16(%r8), %r8
3506
3507 L(StrncpyExit14):
3508 lea 2(%rdx, %rsi), %rdx
3509 lea 2(%rcx, %rsi), %rcx
3510 movw -2(%rcx), %ax
3511 xor %rsi, %rsi
3512 movw %ax, -2(%rdx)
3513 jmp L(CopyFrom1To16BytesCase3)
3514
3515 .p2align 4
3516 L(StrncpyLeave15):
3517 movaps %xmm2, %xmm3
3518 add $48, %r8
3519 jle L(StrncpyExit15)
3520 palignr $15, %xmm1, %xmm2
3521 movaps %xmm2, (%rdx)
3522 movaps 17(%rcx), %xmm2
3523 lea 16(%rsi), %rsi
3524 sub $16, %r8
3525 jbe L(StrncpyExit15)
3526 palignr $15, %xmm3, %xmm2
3527 movaps %xmm2, 16(%rdx)
3528 lea 16(%rsi), %rsi
3529 sub $16, %r8
3530 jbe L(StrncpyExit15)
3531 movaps %xmm4, 32(%rdx)
3532 lea 16(%rsi), %rsi
3533 sub $16, %r8
3534 jbe L(StrncpyExit15)
3535 movaps %xmm5, 48(%rdx)
3536 lea 16(%rsi), %rsi
3537 lea -16(%r8), %r8
3538
3539 L(StrncpyExit15):
3540 lea 1(%rdx, %rsi), %rdx
3541 lea 1(%rcx, %rsi), %rcx
3542 movb -1(%rcx), %ah
3543 xor %rsi, %rsi
3544 movb %ah, -1(%rdx)
3545 jmp L(CopyFrom1To16BytesCase3)
3546
3547 # endif
3548 # ifndef USE_AS_STRCAT
3549 END (STRCPY)
3550 # endif
3551 #endif