]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/strcpy-ssse3.S
Fix overflow bug is optimized strncat for x86-64
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / strcpy-ssse3.S
1 /* strcpy with SSSE3
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #ifndef NOT_IN_libc
22
23 # ifndef USE_AS_STRCAT
24 # include <sysdep.h>
25
26 # ifndef STRCPY
27 # define STRCPY __strcpy_ssse3
28 # endif
29
30 .section .text.ssse3,"ax",@progbits
31 ENTRY (STRCPY)
32 mov %rsi, %rcx
33 # ifdef USE_AS_STRNCPY
34 mov %rdx, %r8
35 # endif
36 mov %rdi, %rdx
37 # ifdef USE_AS_STRNCPY
38 test %r8, %r8
39 jz L(Exit0)
40 cmp $8, %r8
41 jbe L(StrncpyExit8Bytes)
42 # endif
43 cmpb $0, (%rcx)
44 jz L(Exit1)
45 cmpb $0, 1(%rcx)
46 jz L(Exit2)
47 cmpb $0, 2(%rcx)
48 jz L(Exit3)
49 cmpb $0, 3(%rcx)
50 jz L(Exit4)
51 cmpb $0, 4(%rcx)
52 jz L(Exit5)
53 cmpb $0, 5(%rcx)
54 jz L(Exit6)
55 cmpb $0, 6(%rcx)
56 jz L(Exit7)
57 cmpb $0, 7(%rcx)
58 jz L(Exit8)
59 # ifdef USE_AS_STRNCPY
60 cmp $16, %r8
61 jb L(StrncpyExit15Bytes)
62 # endif
63 cmpb $0, 8(%rcx)
64 jz L(Exit9)
65 cmpb $0, 9(%rcx)
66 jz L(Exit10)
67 cmpb $0, 10(%rcx)
68 jz L(Exit11)
69 cmpb $0, 11(%rcx)
70 jz L(Exit12)
71 cmpb $0, 12(%rcx)
72 jz L(Exit13)
73 cmpb $0, 13(%rcx)
74 jz L(Exit14)
75 cmpb $0, 14(%rcx)
76 jz L(Exit15)
77 # ifdef USE_AS_STRNCPY
78 cmp $16, %r8
79 je L(Exit16)
80 # endif
81 cmpb $0, 15(%rcx)
82 jz L(Exit16)
83 # endif
84
85 # ifdef USE_AS_STRNCPY
86 mov %rcx, %rsi
87 sub $16, %r8
88 and $0xf, %rsi
89
90 /* add 16 bytes rcx_shift to r8 */
91 add %rsi, %r8
92 # endif
93 lea 16(%rcx), %rsi
94 /* Now:
95 rsi = alignment_16(rcx) + rcx_shift + 16;
96 rcx_shift = rcx - alignment_16(rcx)
97 */
98 and $-16, %rsi
99 /* Now:
100 rsi = alignment_16(rcx) + 16
101 */
102 pxor %xmm0, %xmm0
103 mov (%rcx), %r9
104 mov %r9, (%rdx)
105 /*
106 look if there is zero symbol in next 16 bytes of string
107 from rsi to rsi + 15 and form mask in xmm0
108 */
109 pcmpeqb (%rsi), %xmm0
110 mov 8(%rcx), %r9
111 mov %r9, 8(%rdx)
112
113 /* convert byte mask in xmm0 to bit mask */
114
115 pmovmskb %xmm0, %rax
116 sub %rcx, %rsi
117
118 /* rsi = 16 - rcx_shift */
119
120 /* rax = 0: there isn't end of string from position rsi to rsi+15 */
121
122 # ifdef USE_AS_STRNCPY
123 sub $16, %r8
124 jbe L(CopyFrom1To16BytesCase2OrCase3)
125 # endif
126 test %rax, %rax
127 jnz L(CopyFrom1To16Bytes)
128
129 mov %rdx, %rax
130 lea 16(%rdx), %rdx
131 /* Now:
132 rdx = rdx + 16 = alignment_16(rdx) + rdx_shift + 16
133 */
134 and $-16, %rdx
135
136 /* Now: rdx = alignment_16(rdx) + 16 */
137
138 sub %rdx, %rax
139
140 /* Now: rax = rdx_shift - 16 */
141
142 # ifdef USE_AS_STRNCPY
143 add %rax, %rsi
144 lea -1(%rsi), %rsi
145 and $1<<31, %esi
146 test %rsi, %rsi
147 jnz L(ContinueCopy)
148 lea 16(%r8), %r8
149
150 L(ContinueCopy):
151 # endif
152 sub %rax, %rcx
153 /* Now:
154 case rcx_shift >= rdx_shift:
155 rcx = alignment_16(rcx) + (rcx_shift - rdx_shift) + 16
156 case rcx_shift < rdx_shift:
157 rcx = alignment_16(rcx) + (16 + rcx_shift - rdx_shift)
158 */
159 mov %rcx, %rax
160 and $0xf, %rax
161 /* Now:
162 case rcx_shift >= rdx_shift: rax = rcx_shift - rdx_shift
163 case rcx_shift < rdx_shift: rax = (16 + rcx_shift - rdx_shift)
164 rax can be 0, 1, ..., 15
165 */
166 mov $0, %rsi
167
168 /* case: rcx_shift == rdx_shift */
169
170 jz L(Align16Both)
171
172 cmp $8, %rax
173 jae L(ShlHigh8)
174 cmp $1, %rax
175 je L(Shl1)
176 cmp $2, %rax
177 je L(Shl2)
178 cmp $3, %rax
179 je L(Shl3)
180 cmp $4, %rax
181 je L(Shl4)
182 cmp $5, %rax
183 je L(Shl5)
184 cmp $6, %rax
185 je L(Shl6)
186 jmp L(Shl7)
187
188 L(ShlHigh8):
189 je L(Shl8)
190 cmp $9, %rax
191 je L(Shl9)
192 cmp $10, %rax
193 je L(Shl10)
194 cmp $11, %rax
195 je L(Shl11)
196 cmp $12, %rax
197 je L(Shl12)
198 cmp $13, %rax
199 je L(Shl13)
200 cmp $14, %rax
201 je L(Shl14)
202 jmp L(Shl15)
203
204 L(Align16Both):
205 movaps (%rcx), %xmm1
206 movaps 16(%rcx), %xmm2
207 movaps %xmm1, (%rdx)
208 pcmpeqb %xmm2, %xmm0
209 pmovmskb %xmm0, %rax
210 lea 16(%rsi), %rsi
211 # ifdef USE_AS_STRNCPY
212 sub $16, %r8
213 jbe L(CopyFrom1To16BytesCase2OrCase3)
214 # endif
215 test %rax, %rax
216 jnz L(CopyFrom1To16Bytes)
217
218 movaps 16(%rcx, %rsi), %xmm3
219 movaps %xmm2, (%rdx, %rsi)
220 pcmpeqb %xmm3, %xmm0
221 pmovmskb %xmm0, %rax
222 lea 16(%rsi), %rsi
223 # ifdef USE_AS_STRNCPY
224 sub $16, %r8
225 jbe L(CopyFrom1To16BytesCase2OrCase3)
226 # endif
227 test %rax, %rax
228 jnz L(CopyFrom1To16Bytes)
229
230 movaps 16(%rcx, %rsi), %xmm4
231 movaps %xmm3, (%rdx, %rsi)
232 pcmpeqb %xmm4, %xmm0
233 pmovmskb %xmm0, %rax
234 lea 16(%rsi), %rsi
235 # ifdef USE_AS_STRNCPY
236 sub $16, %r8
237 jbe L(CopyFrom1To16BytesCase2OrCase3)
238 # endif
239 test %rax, %rax
240 jnz L(CopyFrom1To16Bytes)
241
242 movaps 16(%rcx, %rsi), %xmm1
243 movaps %xmm4, (%rdx, %rsi)
244 pcmpeqb %xmm1, %xmm0
245 pmovmskb %xmm0, %rax
246 lea 16(%rsi), %rsi
247 # ifdef USE_AS_STRNCPY
248 sub $16, %r8
249 jbe L(CopyFrom1To16BytesCase2OrCase3)
250 # endif
251 test %rax, %rax
252 jnz L(CopyFrom1To16Bytes)
253
254 movaps 16(%rcx, %rsi), %xmm2
255 movaps %xmm1, (%rdx, %rsi)
256 pcmpeqb %xmm2, %xmm0
257 pmovmskb %xmm0, %rax
258 lea 16(%rsi), %rsi
259 # ifdef USE_AS_STRNCPY
260 sub $16, %r8
261 jbe L(CopyFrom1To16BytesCase2OrCase3)
262 # endif
263 test %rax, %rax
264 jnz L(CopyFrom1To16Bytes)
265
266 movaps 16(%rcx, %rsi), %xmm3
267 movaps %xmm2, (%rdx, %rsi)
268 pcmpeqb %xmm3, %xmm0
269 pmovmskb %xmm0, %rax
270 lea 16(%rsi), %rsi
271 # ifdef USE_AS_STRNCPY
272 sub $16, %r8
273 jbe L(CopyFrom1To16BytesCase2OrCase3)
274 # endif
275 test %rax, %rax
276 jnz L(CopyFrom1To16Bytes)
277
278 movaps %xmm3, (%rdx, %rsi)
279 mov %rcx, %rax
280 lea 16(%rcx, %rsi), %rcx
281 and $-0x40, %rcx
282 sub %rcx, %rax
283 sub %rax, %rdx
284 # ifdef USE_AS_STRNCPY
285 lea 48+64(%r8, %rax), %r8
286 # endif
287 mov $-0x40, %rsi
288
289 L(Aligned64Loop):
290 movaps (%rcx), %xmm2
291 movaps %xmm2, %xmm4
292 movaps 16(%rcx), %xmm5
293 movaps 32(%rcx), %xmm3
294 movaps %xmm3, %xmm6
295 movaps 48(%rcx), %xmm7
296 pminub %xmm5, %xmm2
297 pminub %xmm7, %xmm3
298 pminub %xmm2, %xmm3
299 pcmpeqb %xmm0, %xmm3
300 pmovmskb %xmm3, %rax
301 lea 64(%rdx), %rdx
302 lea 64(%rcx), %rcx
303 # ifdef USE_AS_STRNCPY
304 sub $64, %r8
305 jbe L(StrncpyLeaveCase2OrCase3)
306 # endif
307 test %rax, %rax
308 jnz L(Aligned64Leave)
309 movaps %xmm4, -64(%rdx)
310 movaps %xmm5, -48(%rdx)
311 movaps %xmm6, -32(%rdx)
312 movaps %xmm7, -16(%rdx)
313 jmp L(Aligned64Loop)
314
315 L(Aligned64Leave):
316 # ifdef USE_AS_STRNCPY
317 lea 48(%r8), %r8
318 # endif
319 pcmpeqb %xmm4, %xmm0
320 pmovmskb %xmm0, %rax
321 test %rax, %rax
322 jnz L(CopyFrom1To16Bytes)
323
324 pcmpeqb %xmm5, %xmm0
325 # ifdef USE_AS_STRNCPY
326 lea -16(%r8), %r8
327 # endif
328 pmovmskb %xmm0, %rax
329 movaps %xmm4, -64(%rdx)
330 test %rax, %rax
331 lea 16(%rsi), %rsi
332 jnz L(CopyFrom1To16Bytes)
333
334 pcmpeqb %xmm6, %xmm0
335 # ifdef USE_AS_STRNCPY
336 lea -16(%r8), %r8
337 # endif
338 pmovmskb %xmm0, %rax
339 movaps %xmm5, -48(%rdx)
340 test %rax, %rax
341 lea 16(%rsi), %rsi
342 jnz L(CopyFrom1To16Bytes)
343
344 movaps %xmm6, -32(%rdx)
345 pcmpeqb %xmm7, %xmm0
346 # ifdef USE_AS_STRNCPY
347 lea -16(%r8), %r8
348 # endif
349 pmovmskb %xmm0, %rax
350 lea 16(%rsi), %rsi
351 jmp L(CopyFrom1To16Bytes)
352
353 .p2align 4
354 L(Shl1):
355 movaps -1(%rcx), %xmm1
356 movaps 15(%rcx), %xmm2
357 L(Shl1Start):
358 pcmpeqb %xmm2, %xmm0
359 pmovmskb %xmm0, %rax
360 movaps %xmm2, %xmm3
361 # ifdef USE_AS_STRNCPY
362 sub $16, %r8
363 jbe L(StrncpyExit1Case2OrCase3)
364 # endif
365 test %rax, %rax
366 jnz L(Shl1LoopExit)
367
368 palignr $1, %xmm1, %xmm2
369 movaps %xmm3, %xmm1
370 movaps %xmm2, (%rdx)
371 movaps 31(%rcx), %xmm2
372
373 pcmpeqb %xmm2, %xmm0
374 lea 16(%rdx), %rdx
375 pmovmskb %xmm0, %rax
376 lea 16(%rcx), %rcx
377 movaps %xmm2, %xmm3
378 # ifdef USE_AS_STRNCPY
379 sub $16, %r8
380 jbe L(StrncpyExit1Case2OrCase3)
381 # endif
382 test %rax, %rax
383 jnz L(Shl1LoopExit)
384
385 palignr $1, %xmm1, %xmm2
386 movaps %xmm2, (%rdx)
387 movaps 31(%rcx), %xmm2
388 movaps %xmm3, %xmm1
389
390 pcmpeqb %xmm2, %xmm0
391 lea 16(%rdx), %rdx
392 pmovmskb %xmm0, %rax
393 lea 16(%rcx), %rcx
394 movaps %xmm2, %xmm3
395 # ifdef USE_AS_STRNCPY
396 sub $16, %r8
397 jbe L(StrncpyExit1Case2OrCase3)
398 # endif
399 test %rax, %rax
400 jnz L(Shl1LoopExit)
401
402 palignr $1, %xmm1, %xmm2
403 movaps %xmm3, %xmm1
404 movaps %xmm2, (%rdx)
405 movaps 31(%rcx), %xmm2
406
407 pcmpeqb %xmm2, %xmm0
408 lea 16(%rdx), %rdx
409 pmovmskb %xmm0, %rax
410 lea 16(%rcx), %rcx
411 movaps %xmm2, %xmm3
412 # ifdef USE_AS_STRNCPY
413 sub $16, %r8
414 jbe L(StrncpyExit1Case2OrCase3)
415 # endif
416 test %rax, %rax
417 jnz L(Shl1LoopExit)
418
419 palignr $1, %xmm1, %xmm2
420 movaps %xmm3, %xmm1
421 movaps %xmm2, (%rdx)
422 lea 31(%rcx), %rcx
423 lea 16(%rdx), %rdx
424
425 mov %rcx, %rax
426 and $-0x40, %rcx
427 sub %rcx, %rax
428 lea -15(%rcx), %rcx
429 sub %rax, %rdx
430 # ifdef USE_AS_STRNCPY
431 add %rax, %r8
432 # endif
433 movaps -1(%rcx), %xmm1
434
435 L(Shl1LoopStart):
436 movaps 15(%rcx), %xmm2
437 movaps 31(%rcx), %xmm3
438 movaps %xmm3, %xmm6
439 movaps 47(%rcx), %xmm4
440 movaps %xmm4, %xmm7
441 movaps 63(%rcx), %xmm5
442 pminub %xmm2, %xmm6
443 pminub %xmm5, %xmm7
444 pminub %xmm6, %xmm7
445 pcmpeqb %xmm0, %xmm7
446 pmovmskb %xmm7, %rax
447 movaps %xmm5, %xmm7
448 palignr $1, %xmm4, %xmm5
449 test %rax, %rax
450 palignr $1, %xmm3, %xmm4
451 jnz L(Shl1Start)
452 # ifdef USE_AS_STRNCPY
453 sub $64, %r8
454 jbe L(StrncpyLeave1)
455 # endif
456 palignr $1, %xmm2, %xmm3
457 lea 64(%rcx), %rcx
458 palignr $1, %xmm1, %xmm2
459 movaps %xmm7, %xmm1
460 movaps %xmm5, 48(%rdx)
461 movaps %xmm4, 32(%rdx)
462 movaps %xmm3, 16(%rdx)
463 movaps %xmm2, (%rdx)
464 lea 64(%rdx), %rdx
465 jmp L(Shl1LoopStart)
466
467 L(Shl1LoopExit):
468 movaps (%rdx), %xmm6
469 psrldq $15, %xmm6
470 mov $15, %rsi
471 palignr $1, %xmm1, %xmm6
472 movaps %xmm6, (%rdx)
473 jmp L(CopyFrom1To16Bytes)
474
475 .p2align 4
476 L(Shl2):
477 movaps -2(%rcx), %xmm1
478 movaps 14(%rcx), %xmm2
479 L(Shl2Start):
480 pcmpeqb %xmm2, %xmm0
481 pmovmskb %xmm0, %rax
482 movaps %xmm2, %xmm3
483 # ifdef USE_AS_STRNCPY
484 sub $16, %r8
485 jbe L(StrncpyExit2Case2OrCase3)
486 # endif
487 test %rax, %rax
488 jnz L(Shl2LoopExit)
489
490 palignr $2, %xmm1, %xmm2
491 movaps %xmm3, %xmm1
492 movaps %xmm2, (%rdx)
493 movaps 30(%rcx), %xmm2
494
495 pcmpeqb %xmm2, %xmm0
496 lea 16(%rdx), %rdx
497 pmovmskb %xmm0, %rax
498 lea 16(%rcx), %rcx
499 movaps %xmm2, %xmm3
500 # ifdef USE_AS_STRNCPY
501 sub $16, %r8
502 jbe L(StrncpyExit2Case2OrCase3)
503 # endif
504 test %rax, %rax
505 jnz L(Shl2LoopExit)
506
507 palignr $2, %xmm1, %xmm2
508 movaps %xmm2, (%rdx)
509 movaps 30(%rcx), %xmm2
510 movaps %xmm3, %xmm1
511
512 pcmpeqb %xmm2, %xmm0
513 lea 16(%rdx), %rdx
514 pmovmskb %xmm0, %rax
515 lea 16(%rcx), %rcx
516 movaps %xmm2, %xmm3
517 # ifdef USE_AS_STRNCPY
518 sub $16, %r8
519 jbe L(StrncpyExit2Case2OrCase3)
520 # endif
521 test %rax, %rax
522 jnz L(Shl2LoopExit)
523
524 palignr $2, %xmm1, %xmm2
525 movaps %xmm3, %xmm1
526 movaps %xmm2, (%rdx)
527 movaps 30(%rcx), %xmm2
528
529 pcmpeqb %xmm2, %xmm0
530 lea 16(%rdx), %rdx
531 pmovmskb %xmm0, %rax
532 lea 16(%rcx), %rcx
533 movaps %xmm2, %xmm3
534 # ifdef USE_AS_STRNCPY
535 sub $16, %r8
536 jbe L(StrncpyExit2Case2OrCase3)
537 # endif
538 test %rax, %rax
539 jnz L(Shl2LoopExit)
540
541 palignr $2, %xmm1, %xmm2
542 movaps %xmm3, %xmm1
543 movaps %xmm2, (%rdx)
544 lea 30(%rcx), %rcx
545 lea 16(%rdx), %rdx
546
547 mov %rcx, %rax
548 and $-0x40, %rcx
549 sub %rcx, %rax
550 lea -14(%rcx), %rcx
551 sub %rax, %rdx
552 # ifdef USE_AS_STRNCPY
553 add %rax, %r8
554 # endif
555 movaps -2(%rcx), %xmm1
556
557 L(Shl2LoopStart):
558 movaps 14(%rcx), %xmm2
559 movaps 30(%rcx), %xmm3
560 movaps %xmm3, %xmm6
561 movaps 46(%rcx), %xmm4
562 movaps %xmm4, %xmm7
563 movaps 62(%rcx), %xmm5
564 pminub %xmm2, %xmm6
565 pminub %xmm5, %xmm7
566 pminub %xmm6, %xmm7
567 pcmpeqb %xmm0, %xmm7
568 pmovmskb %xmm7, %rax
569 movaps %xmm5, %xmm7
570 palignr $2, %xmm4, %xmm5
571 test %rax, %rax
572 palignr $2, %xmm3, %xmm4
573 jnz L(Shl2Start)
574 # ifdef USE_AS_STRNCPY
575 sub $64, %r8
576 jbe L(StrncpyLeave2)
577 # endif
578 palignr $2, %xmm2, %xmm3
579 lea 64(%rcx), %rcx
580 palignr $2, %xmm1, %xmm2
581 movaps %xmm7, %xmm1
582 movaps %xmm5, 48(%rdx)
583 movaps %xmm4, 32(%rdx)
584 movaps %xmm3, 16(%rdx)
585 movaps %xmm2, (%rdx)
586 lea 64(%rdx), %rdx
587 jmp L(Shl2LoopStart)
588
589 L(Shl2LoopExit):
590 movaps (%rdx), %xmm6
591 psrldq $14, %xmm6
592 mov $14, %rsi
593 palignr $2, %xmm1, %xmm6
594 movaps %xmm6, (%rdx)
595 jmp L(CopyFrom1To16Bytes)
596
597 .p2align 4
598 L(Shl3):
599 movaps -3(%rcx), %xmm1
600 movaps 13(%rcx), %xmm2
601 L(Shl3Start):
602 pcmpeqb %xmm2, %xmm0
603 pmovmskb %xmm0, %rax
604 movaps %xmm2, %xmm3
605 # ifdef USE_AS_STRNCPY
606 sub $16, %r8
607 jbe L(StrncpyExit3Case2OrCase3)
608 # endif
609 test %rax, %rax
610 jnz L(Shl3LoopExit)
611
612 palignr $3, %xmm1, %xmm2
613 movaps %xmm3, %xmm1
614 movaps %xmm2, (%rdx)
615 movaps 29(%rcx), %xmm2
616
617 pcmpeqb %xmm2, %xmm0
618 lea 16(%rdx), %rdx
619 pmovmskb %xmm0, %rax
620 lea 16(%rcx), %rcx
621 movaps %xmm2, %xmm3
622 # ifdef USE_AS_STRNCPY
623 sub $16, %r8
624 jbe L(StrncpyExit3Case2OrCase3)
625 # endif
626 test %rax, %rax
627 jnz L(Shl3LoopExit)
628
629 palignr $3, %xmm1, %xmm2
630 movaps %xmm2, (%rdx)
631 movaps 29(%rcx), %xmm2
632 movaps %xmm3, %xmm1
633
634 pcmpeqb %xmm2, %xmm0
635 lea 16(%rdx), %rdx
636 pmovmskb %xmm0, %rax
637 lea 16(%rcx), %rcx
638 movaps %xmm2, %xmm3
639 # ifdef USE_AS_STRNCPY
640 sub $16, %r8
641 jbe L(StrncpyExit3Case2OrCase3)
642 # endif
643 test %rax, %rax
644 jnz L(Shl3LoopExit)
645
646 palignr $3, %xmm1, %xmm2
647 movaps %xmm3, %xmm1
648 movaps %xmm2, (%rdx)
649 movaps 29(%rcx), %xmm2
650
651 pcmpeqb %xmm2, %xmm0
652 lea 16(%rdx), %rdx
653 pmovmskb %xmm0, %rax
654 lea 16(%rcx), %rcx
655 movaps %xmm2, %xmm3
656 # ifdef USE_AS_STRNCPY
657 sub $16, %r8
658 jbe L(StrncpyExit3Case2OrCase3)
659 # endif
660 test %rax, %rax
661 jnz L(Shl3LoopExit)
662
663 palignr $3, %xmm1, %xmm2
664 movaps %xmm3, %xmm1
665 movaps %xmm2, (%rdx)
666 lea 29(%rcx), %rcx
667 lea 16(%rdx), %rdx
668
669 mov %rcx, %rax
670 and $-0x40, %rcx
671 sub %rcx, %rax
672 lea -13(%rcx), %rcx
673 sub %rax, %rdx
674 # ifdef USE_AS_STRNCPY
675 add %rax, %r8
676 # endif
677 movaps -3(%rcx), %xmm1
678
679 L(Shl3LoopStart):
680 movaps 13(%rcx), %xmm2
681 movaps 29(%rcx), %xmm3
682 movaps %xmm3, %xmm6
683 movaps 45(%rcx), %xmm4
684 movaps %xmm4, %xmm7
685 movaps 61(%rcx), %xmm5
686 pminub %xmm2, %xmm6
687 pminub %xmm5, %xmm7
688 pminub %xmm6, %xmm7
689 pcmpeqb %xmm0, %xmm7
690 pmovmskb %xmm7, %rax
691 movaps %xmm5, %xmm7
692 palignr $3, %xmm4, %xmm5
693 test %rax, %rax
694 palignr $3, %xmm3, %xmm4
695 jnz L(Shl3Start)
696 # ifdef USE_AS_STRNCPY
697 sub $64, %r8
698 jbe L(StrncpyLeave3)
699 # endif
700 palignr $3, %xmm2, %xmm3
701 lea 64(%rcx), %rcx
702 palignr $3, %xmm1, %xmm2
703 movaps %xmm7, %xmm1
704 movaps %xmm5, 48(%rdx)
705 movaps %xmm4, 32(%rdx)
706 movaps %xmm3, 16(%rdx)
707 movaps %xmm2, (%rdx)
708 lea 64(%rdx), %rdx
709 jmp L(Shl3LoopStart)
710
711 L(Shl3LoopExit):
712 movaps (%rdx), %xmm6
713 psrldq $13, %xmm6
714 mov $13, %rsi
715 palignr $3, %xmm1, %xmm6
716 movaps %xmm6, (%rdx)
717 jmp L(CopyFrom1To16Bytes)
718
719 .p2align 4
720 L(Shl4):
721 movaps -4(%rcx), %xmm1
722 movaps 12(%rcx), %xmm2
723 L(Shl4Start):
724 pcmpeqb %xmm2, %xmm0
725 pmovmskb %xmm0, %rax
726 movaps %xmm2, %xmm3
727 # ifdef USE_AS_STRNCPY
728 sub $16, %r8
729 jbe L(StrncpyExit4Case2OrCase3)
730 # endif
731 test %rax, %rax
732 jnz L(Shl4LoopExit)
733
734 palignr $4, %xmm1, %xmm2
735 movaps %xmm3, %xmm1
736 movaps %xmm2, (%rdx)
737 movaps 28(%rcx), %xmm2
738
739 pcmpeqb %xmm2, %xmm0
740 lea 16(%rdx), %rdx
741 pmovmskb %xmm0, %rax
742 lea 16(%rcx), %rcx
743 movaps %xmm2, %xmm3
744 # ifdef USE_AS_STRNCPY
745 sub $16, %r8
746 jbe L(StrncpyExit4Case2OrCase3)
747 # endif
748 test %rax, %rax
749 jnz L(Shl4LoopExit)
750
751 palignr $4, %xmm1, %xmm2
752 movaps %xmm2, (%rdx)
753 movaps 28(%rcx), %xmm2
754 movaps %xmm3, %xmm1
755
756 pcmpeqb %xmm2, %xmm0
757 lea 16(%rdx), %rdx
758 pmovmskb %xmm0, %rax
759 lea 16(%rcx), %rcx
760 movaps %xmm2, %xmm3
761 # ifdef USE_AS_STRNCPY
762 sub $16, %r8
763 jbe L(StrncpyExit4Case2OrCase3)
764 # endif
765 test %rax, %rax
766 jnz L(Shl4LoopExit)
767
768 palignr $4, %xmm1, %xmm2
769 movaps %xmm3, %xmm1
770 movaps %xmm2, (%rdx)
771 movaps 28(%rcx), %xmm2
772
773 pcmpeqb %xmm2, %xmm0
774 lea 16(%rdx), %rdx
775 pmovmskb %xmm0, %rax
776 lea 16(%rcx), %rcx
777 movaps %xmm2, %xmm3
778 # ifdef USE_AS_STRNCPY
779 sub $16, %r8
780 jbe L(StrncpyExit4Case2OrCase3)
781 # endif
782 test %rax, %rax
783 jnz L(Shl4LoopExit)
784
785 palignr $4, %xmm1, %xmm2
786 movaps %xmm3, %xmm1
787 movaps %xmm2, (%rdx)
788 lea 28(%rcx), %rcx
789 lea 16(%rdx), %rdx
790
791 mov %rcx, %rax
792 and $-0x40, %rcx
793 sub %rcx, %rax
794 lea -12(%rcx), %rcx
795 sub %rax, %rdx
796 # ifdef USE_AS_STRNCPY
797 add %rax, %r8
798 # endif
799 movaps -4(%rcx), %xmm1
800
801 L(Shl4LoopStart):
802 movaps 12(%rcx), %xmm2
803 movaps 28(%rcx), %xmm3
804 movaps %xmm3, %xmm6
805 movaps 44(%rcx), %xmm4
806 movaps %xmm4, %xmm7
807 movaps 60(%rcx), %xmm5
808 pminub %xmm2, %xmm6
809 pminub %xmm5, %xmm7
810 pminub %xmm6, %xmm7
811 pcmpeqb %xmm0, %xmm7
812 pmovmskb %xmm7, %rax
813 movaps %xmm5, %xmm7
814 palignr $4, %xmm4, %xmm5
815 test %rax, %rax
816 palignr $4, %xmm3, %xmm4
817 jnz L(Shl4Start)
818 # ifdef USE_AS_STRNCPY
819 sub $64, %r8
820 jbe L(StrncpyLeave4)
821 # endif
822 palignr $4, %xmm2, %xmm3
823 lea 64(%rcx), %rcx
824 palignr $4, %xmm1, %xmm2
825 movaps %xmm7, %xmm1
826 movaps %xmm5, 48(%rdx)
827 movaps %xmm4, 32(%rdx)
828 movaps %xmm3, 16(%rdx)
829 movaps %xmm2, (%rdx)
830 lea 64(%rdx), %rdx
831 jmp L(Shl4LoopStart)
832
833 L(Shl4LoopExit):
834 movaps (%rdx), %xmm6
835 psrldq $12, %xmm6
836 mov $12, %rsi
837 palignr $4, %xmm1, %xmm6
838 movaps %xmm6, (%rdx)
839 jmp L(CopyFrom1To16Bytes)
840
841 .p2align 4
842 L(Shl5):
843 movaps -5(%rcx), %xmm1
844 movaps 11(%rcx), %xmm2
845 L(Shl5Start):
846 pcmpeqb %xmm2, %xmm0
847 pmovmskb %xmm0, %rax
848 movaps %xmm2, %xmm3
849 # ifdef USE_AS_STRNCPY
850 sub $16, %r8
851 jbe L(StrncpyExit5Case2OrCase3)
852 # endif
853 test %rax, %rax
854 jnz L(Shl5LoopExit)
855
856 palignr $5, %xmm1, %xmm2
857 movaps %xmm3, %xmm1
858 movaps %xmm2, (%rdx)
859 movaps 27(%rcx), %xmm2
860
861 pcmpeqb %xmm2, %xmm0
862 lea 16(%rdx), %rdx
863 pmovmskb %xmm0, %rax
864 lea 16(%rcx), %rcx
865 movaps %xmm2, %xmm3
866 # ifdef USE_AS_STRNCPY
867 sub $16, %r8
868 jbe L(StrncpyExit5Case2OrCase3)
869 # endif
870 test %rax, %rax
871 jnz L(Shl5LoopExit)
872
873 palignr $5, %xmm1, %xmm2
874 movaps %xmm2, (%rdx)
875 movaps 27(%rcx), %xmm2
876 movaps %xmm3, %xmm1
877
878 pcmpeqb %xmm2, %xmm0
879 lea 16(%rdx), %rdx
880 pmovmskb %xmm0, %rax
881 lea 16(%rcx), %rcx
882 movaps %xmm2, %xmm3
883 # ifdef USE_AS_STRNCPY
884 sub $16, %r8
885 jbe L(StrncpyExit5Case2OrCase3)
886 # endif
887 test %rax, %rax
888 jnz L(Shl5LoopExit)
889
890 palignr $5, %xmm1, %xmm2
891 movaps %xmm3, %xmm1
892 movaps %xmm2, (%rdx)
893 movaps 27(%rcx), %xmm2
894
895 pcmpeqb %xmm2, %xmm0
896 lea 16(%rdx), %rdx
897 pmovmskb %xmm0, %rax
898 lea 16(%rcx), %rcx
899 movaps %xmm2, %xmm3
900 # ifdef USE_AS_STRNCPY
901 sub $16, %r8
902 jbe L(StrncpyExit5Case2OrCase3)
903 # endif
904 test %rax, %rax
905 jnz L(Shl5LoopExit)
906
907 palignr $5, %xmm1, %xmm2
908 movaps %xmm3, %xmm1
909 movaps %xmm2, (%rdx)
910 lea 27(%rcx), %rcx
911 lea 16(%rdx), %rdx
912
913 mov %rcx, %rax
914 and $-0x40, %rcx
915 sub %rcx, %rax
916 lea -11(%rcx), %rcx
917 sub %rax, %rdx
918 # ifdef USE_AS_STRNCPY
919 add %rax, %r8
920 # endif
921 movaps -5(%rcx), %xmm1
922
923 L(Shl5LoopStart):
924 movaps 11(%rcx), %xmm2
925 movaps 27(%rcx), %xmm3
926 movaps %xmm3, %xmm6
927 movaps 43(%rcx), %xmm4
928 movaps %xmm4, %xmm7
929 movaps 59(%rcx), %xmm5
930 pminub %xmm2, %xmm6
931 pminub %xmm5, %xmm7
932 pminub %xmm6, %xmm7
933 pcmpeqb %xmm0, %xmm7
934 pmovmskb %xmm7, %rax
935 movaps %xmm5, %xmm7
936 palignr $5, %xmm4, %xmm5
937 test %rax, %rax
938 palignr $5, %xmm3, %xmm4
939 jnz L(Shl5Start)
940 # ifdef USE_AS_STRNCPY
941 sub $64, %r8
942 jbe L(StrncpyLeave5)
943 # endif
944 palignr $5, %xmm2, %xmm3
945 lea 64(%rcx), %rcx
946 palignr $5, %xmm1, %xmm2
947 movaps %xmm7, %xmm1
948 movaps %xmm5, 48(%rdx)
949 movaps %xmm4, 32(%rdx)
950 movaps %xmm3, 16(%rdx)
951 movaps %xmm2, (%rdx)
952 lea 64(%rdx), %rdx
953 jmp L(Shl5LoopStart)
954
955 L(Shl5LoopExit):
956 movaps (%rdx), %xmm6
957 psrldq $11, %xmm6
958 mov $11, %rsi
959 palignr $5, %xmm1, %xmm6
960 movaps %xmm6, (%rdx)
961 jmp L(CopyFrom1To16Bytes)
962
963 .p2align 4
964 L(Shl6):
965 movaps -6(%rcx), %xmm1
966 movaps 10(%rcx), %xmm2
967 L(Shl6Start):
968 pcmpeqb %xmm2, %xmm0
969 pmovmskb %xmm0, %rax
970 movaps %xmm2, %xmm3
971 # ifdef USE_AS_STRNCPY
972 sub $16, %r8
973 jbe L(StrncpyExit6Case2OrCase3)
974 # endif
975 test %rax, %rax
976 jnz L(Shl6LoopExit)
977
978 palignr $6, %xmm1, %xmm2
979 movaps %xmm3, %xmm1
980 movaps %xmm2, (%rdx)
981 movaps 26(%rcx), %xmm2
982
983 pcmpeqb %xmm2, %xmm0
984 lea 16(%rdx), %rdx
985 pmovmskb %xmm0, %rax
986 lea 16(%rcx), %rcx
987 movaps %xmm2, %xmm3
988 # ifdef USE_AS_STRNCPY
989 sub $16, %r8
990 jbe L(StrncpyExit6Case2OrCase3)
991 # endif
992 test %rax, %rax
993 jnz L(Shl6LoopExit)
994
995 palignr $6, %xmm1, %xmm2
996 movaps %xmm2, (%rdx)
997 movaps 26(%rcx), %xmm2
998 movaps %xmm3, %xmm1
999
1000 pcmpeqb %xmm2, %xmm0
1001 lea 16(%rdx), %rdx
1002 pmovmskb %xmm0, %rax
1003 lea 16(%rcx), %rcx
1004 movaps %xmm2, %xmm3
1005 # ifdef USE_AS_STRNCPY
1006 sub $16, %r8
1007 jbe L(StrncpyExit6Case2OrCase3)
1008 # endif
1009 test %rax, %rax
1010 jnz L(Shl6LoopExit)
1011
1012 palignr $6, %xmm1, %xmm2
1013 movaps %xmm3, %xmm1
1014 movaps %xmm2, (%rdx)
1015 movaps 26(%rcx), %xmm2
1016
1017 pcmpeqb %xmm2, %xmm0
1018 lea 16(%rdx), %rdx
1019 pmovmskb %xmm0, %rax
1020 lea 16(%rcx), %rcx
1021 movaps %xmm2, %xmm3
1022 # ifdef USE_AS_STRNCPY
1023 sub $16, %r8
1024 jbe L(StrncpyExit6Case2OrCase3)
1025 # endif
1026 test %rax, %rax
1027 jnz L(Shl6LoopExit)
1028
1029 palignr $6, %xmm1, %xmm2
1030 movaps %xmm3, %xmm1
1031 movaps %xmm2, (%rdx)
1032 lea 26(%rcx), %rcx
1033 lea 16(%rdx), %rdx
1034
1035 mov %rcx, %rax
1036 and $-0x40, %rcx
1037 sub %rcx, %rax
1038 lea -10(%rcx), %rcx
1039 sub %rax, %rdx
1040 # ifdef USE_AS_STRNCPY
1041 add %rax, %r8
1042 # endif
1043 movaps -6(%rcx), %xmm1
1044
1045 L(Shl6LoopStart):
1046 movaps 10(%rcx), %xmm2
1047 movaps 26(%rcx), %xmm3
1048 movaps %xmm3, %xmm6
1049 movaps 42(%rcx), %xmm4
1050 movaps %xmm4, %xmm7
1051 movaps 58(%rcx), %xmm5
1052 pminub %xmm2, %xmm6
1053 pminub %xmm5, %xmm7
1054 pminub %xmm6, %xmm7
1055 pcmpeqb %xmm0, %xmm7
1056 pmovmskb %xmm7, %rax
1057 movaps %xmm5, %xmm7
1058 palignr $6, %xmm4, %xmm5
1059 test %rax, %rax
1060 palignr $6, %xmm3, %xmm4
1061 jnz L(Shl6Start)
1062 # ifdef USE_AS_STRNCPY
1063 sub $64, %r8
1064 jbe L(StrncpyLeave6)
1065 # endif
1066 palignr $6, %xmm2, %xmm3
1067 lea 64(%rcx), %rcx
1068 palignr $6, %xmm1, %xmm2
1069 movaps %xmm7, %xmm1
1070 movaps %xmm5, 48(%rdx)
1071 movaps %xmm4, 32(%rdx)
1072 movaps %xmm3, 16(%rdx)
1073 movaps %xmm2, (%rdx)
1074 lea 64(%rdx), %rdx
1075 jmp L(Shl6LoopStart)
1076
1077 L(Shl6LoopExit):
1078 movaps (%rdx), %xmm6
1079 psrldq $10, %xmm6
1080 mov $10, %rsi
1081 palignr $6, %xmm1, %xmm6
1082 movaps %xmm6, (%rdx)
1083 jmp L(CopyFrom1To16Bytes)
1084
1085 .p2align 4
1086 L(Shl7):
1087 movaps -7(%rcx), %xmm1
1088 movaps 9(%rcx), %xmm2
1089 L(Shl7Start):
1090 pcmpeqb %xmm2, %xmm0
1091 pmovmskb %xmm0, %rax
1092 movaps %xmm2, %xmm3
1093 # ifdef USE_AS_STRNCPY
1094 sub $16, %r8
1095 jbe L(StrncpyExit7Case2OrCase3)
1096 # endif
1097 test %rax, %rax
1098 jnz L(Shl7LoopExit)
1099
1100 palignr $7, %xmm1, %xmm2
1101 movaps %xmm3, %xmm1
1102 movaps %xmm2, (%rdx)
1103 movaps 25(%rcx), %xmm2
1104
1105 pcmpeqb %xmm2, %xmm0
1106 lea 16(%rdx), %rdx
1107 pmovmskb %xmm0, %rax
1108 lea 16(%rcx), %rcx
1109 movaps %xmm2, %xmm3
1110 # ifdef USE_AS_STRNCPY
1111 sub $16, %r8
1112 jbe L(StrncpyExit7Case2OrCase3)
1113 # endif
1114 test %rax, %rax
1115 jnz L(Shl7LoopExit)
1116
1117 palignr $7, %xmm1, %xmm2
1118 movaps %xmm2, (%rdx)
1119 movaps 25(%rcx), %xmm2
1120 movaps %xmm3, %xmm1
1121
1122 pcmpeqb %xmm2, %xmm0
1123 lea 16(%rdx), %rdx
1124 pmovmskb %xmm0, %rax
1125 lea 16(%rcx), %rcx
1126 movaps %xmm2, %xmm3
1127 # ifdef USE_AS_STRNCPY
1128 sub $16, %r8
1129 jbe L(StrncpyExit7Case2OrCase3)
1130 # endif
1131 test %rax, %rax
1132 jnz L(Shl7LoopExit)
1133
1134 palignr $7, %xmm1, %xmm2
1135 movaps %xmm3, %xmm1
1136 movaps %xmm2, (%rdx)
1137 movaps 25(%rcx), %xmm2
1138
1139 pcmpeqb %xmm2, %xmm0
1140 lea 16(%rdx), %rdx
1141 pmovmskb %xmm0, %rax
1142 lea 16(%rcx), %rcx
1143 movaps %xmm2, %xmm3
1144 # ifdef USE_AS_STRNCPY
1145 sub $16, %r8
1146 jbe L(StrncpyExit7Case2OrCase3)
1147 # endif
1148 test %rax, %rax
1149 jnz L(Shl7LoopExit)
1150
1151 palignr $7, %xmm1, %xmm2
1152 movaps %xmm3, %xmm1
1153 movaps %xmm2, (%rdx)
1154 lea 25(%rcx), %rcx
1155 lea 16(%rdx), %rdx
1156
1157 mov %rcx, %rax
1158 and $-0x40, %rcx
1159 sub %rcx, %rax
1160 lea -9(%rcx), %rcx
1161 sub %rax, %rdx
1162 # ifdef USE_AS_STRNCPY
1163 add %rax, %r8
1164 # endif
1165 movaps -7(%rcx), %xmm1
1166
1167 L(Shl7LoopStart):
1168 movaps 9(%rcx), %xmm2
1169 movaps 25(%rcx), %xmm3
1170 movaps %xmm3, %xmm6
1171 movaps 41(%rcx), %xmm4
1172 movaps %xmm4, %xmm7
1173 movaps 57(%rcx), %xmm5
1174 pminub %xmm2, %xmm6
1175 pminub %xmm5, %xmm7
1176 pminub %xmm6, %xmm7
1177 pcmpeqb %xmm0, %xmm7
1178 pmovmskb %xmm7, %rax
1179 movaps %xmm5, %xmm7
1180 palignr $7, %xmm4, %xmm5
1181 test %rax, %rax
1182 palignr $7, %xmm3, %xmm4
1183 jnz L(Shl7Start)
1184 # ifdef USE_AS_STRNCPY
1185 sub $64, %r8
1186 jbe L(StrncpyLeave7)
1187 # endif
1188 palignr $7, %xmm2, %xmm3
1189 lea 64(%rcx), %rcx
1190 palignr $7, %xmm1, %xmm2
1191 movaps %xmm7, %xmm1
1192 movaps %xmm5, 48(%rdx)
1193 movaps %xmm4, 32(%rdx)
1194 movaps %xmm3, 16(%rdx)
1195 movaps %xmm2, (%rdx)
1196 lea 64(%rdx), %rdx
1197 jmp L(Shl7LoopStart)
1198
1199 L(Shl7LoopExit):
1200 movaps (%rdx), %xmm6
1201 psrldq $9, %xmm6
1202 mov $9, %rsi
1203 palignr $7, %xmm1, %xmm6
1204 movaps %xmm6, (%rdx)
1205 jmp L(CopyFrom1To16Bytes)
1206
1207 .p2align 4
1208 L(Shl8):
1209 movaps -8(%rcx), %xmm1
1210 movaps 8(%rcx), %xmm2
1211 L(Shl8Start):
1212 pcmpeqb %xmm2, %xmm0
1213 pmovmskb %xmm0, %rax
1214 movaps %xmm2, %xmm3
1215 # ifdef USE_AS_STRNCPY
1216 sub $16, %r8
1217 jbe L(StrncpyExit8Case2OrCase3)
1218 # endif
1219 test %rax, %rax
1220 jnz L(Shl8LoopExit)
1221
1222 palignr $8, %xmm1, %xmm2
1223 movaps %xmm3, %xmm1
1224 movaps %xmm2, (%rdx)
1225 movaps 24(%rcx), %xmm2
1226
1227 pcmpeqb %xmm2, %xmm0
1228 lea 16(%rdx), %rdx
1229 pmovmskb %xmm0, %rax
1230 lea 16(%rcx), %rcx
1231 movaps %xmm2, %xmm3
1232 # ifdef USE_AS_STRNCPY
1233 sub $16, %r8
1234 jbe L(StrncpyExit8Case2OrCase3)
1235 # endif
1236 test %rax, %rax
1237 jnz L(Shl8LoopExit)
1238
1239 palignr $8, %xmm1, %xmm2
1240 movaps %xmm2, (%rdx)
1241 movaps 24(%rcx), %xmm2
1242 movaps %xmm3, %xmm1
1243
1244 pcmpeqb %xmm2, %xmm0
1245 lea 16(%rdx), %rdx
1246 pmovmskb %xmm0, %rax
1247 lea 16(%rcx), %rcx
1248 movaps %xmm2, %xmm3
1249 # ifdef USE_AS_STRNCPY
1250 sub $16, %r8
1251 jbe L(StrncpyExit8Case2OrCase3)
1252 # endif
1253 test %rax, %rax
1254 jnz L(Shl8LoopExit)
1255
1256 palignr $8, %xmm1, %xmm2
1257 movaps %xmm3, %xmm1
1258 movaps %xmm2, (%rdx)
1259 movaps 24(%rcx), %xmm2
1260
1261 pcmpeqb %xmm2, %xmm0
1262 lea 16(%rdx), %rdx
1263 pmovmskb %xmm0, %rax
1264 lea 16(%rcx), %rcx
1265 movaps %xmm2, %xmm3
1266 # ifdef USE_AS_STRNCPY
1267 sub $16, %r8
1268 jbe L(StrncpyExit8Case2OrCase3)
1269 # endif
1270 test %rax, %rax
1271 jnz L(Shl8LoopExit)
1272
1273 palignr $8, %xmm1, %xmm2
1274 movaps %xmm3, %xmm1
1275 movaps %xmm2, (%rdx)
1276 lea 24(%rcx), %rcx
1277 lea 16(%rdx), %rdx
1278
1279 mov %rcx, %rax
1280 and $-0x40, %rcx
1281 sub %rcx, %rax
1282 lea -8(%rcx), %rcx
1283 sub %rax, %rdx
1284 # ifdef USE_AS_STRNCPY
1285 add %rax, %r8
1286 # endif
1287 movaps -8(%rcx), %xmm1
1288
1289 L(Shl8LoopStart):
1290 movaps 8(%rcx), %xmm2
1291 movaps 24(%rcx), %xmm3
1292 movaps %xmm3, %xmm6
1293 movaps 40(%rcx), %xmm4
1294 movaps %xmm4, %xmm7
1295 movaps 56(%rcx), %xmm5
1296 pminub %xmm2, %xmm6
1297 pminub %xmm5, %xmm7
1298 pminub %xmm6, %xmm7
1299 pcmpeqb %xmm0, %xmm7
1300 pmovmskb %xmm7, %rax
1301 movaps %xmm5, %xmm7
1302 palignr $8, %xmm4, %xmm5
1303 test %rax, %rax
1304 palignr $8, %xmm3, %xmm4
1305 jnz L(Shl8Start)
1306 # ifdef USE_AS_STRNCPY
1307 sub $64, %r8
1308 jbe L(StrncpyLeave8)
1309 # endif
1310 palignr $8, %xmm2, %xmm3
1311 lea 64(%rcx), %rcx
1312 palignr $8, %xmm1, %xmm2
1313 movaps %xmm7, %xmm1
1314 movaps %xmm5, 48(%rdx)
1315 movaps %xmm4, 32(%rdx)
1316 movaps %xmm3, 16(%rdx)
1317 movaps %xmm2, (%rdx)
1318 lea 64(%rdx), %rdx
1319 jmp L(Shl8LoopStart)
1320
1321 L(Shl8LoopExit):
1322 movaps (%rdx), %xmm6
1323 psrldq $8, %xmm6
1324 mov $8, %rsi
1325 palignr $8, %xmm1, %xmm6
1326 movaps %xmm6, (%rdx)
1327 jmp L(CopyFrom1To16Bytes)
1328
1329 .p2align 4
1330 L(Shl9):
1331 movaps -9(%rcx), %xmm1
1332 movaps 7(%rcx), %xmm2
1333 L(Shl9Start):
1334 pcmpeqb %xmm2, %xmm0
1335 pmovmskb %xmm0, %rax
1336 movaps %xmm2, %xmm3
1337 # ifdef USE_AS_STRNCPY
1338 sub $16, %r8
1339 jbe L(StrncpyExit9Case2OrCase3)
1340 # endif
1341 test %rax, %rax
1342 jnz L(Shl9LoopExit)
1343
1344 palignr $9, %xmm1, %xmm2
1345 movaps %xmm3, %xmm1
1346 movaps %xmm2, (%rdx)
1347 movaps 23(%rcx), %xmm2
1348
1349 pcmpeqb %xmm2, %xmm0
1350 lea 16(%rdx), %rdx
1351 pmovmskb %xmm0, %rax
1352 lea 16(%rcx), %rcx
1353 movaps %xmm2, %xmm3
1354 # ifdef USE_AS_STRNCPY
1355 sub $16, %r8
1356 jbe L(StrncpyExit9Case2OrCase3)
1357 # endif
1358 test %rax, %rax
1359 jnz L(Shl9LoopExit)
1360
1361 palignr $9, %xmm1, %xmm2
1362 movaps %xmm2, (%rdx)
1363 movaps 23(%rcx), %xmm2
1364 movaps %xmm3, %xmm1
1365
1366 pcmpeqb %xmm2, %xmm0
1367 lea 16(%rdx), %rdx
1368 pmovmskb %xmm0, %rax
1369 lea 16(%rcx), %rcx
1370 movaps %xmm2, %xmm3
1371 # ifdef USE_AS_STRNCPY
1372 sub $16, %r8
1373 jbe L(StrncpyExit9Case2OrCase3)
1374 # endif
1375 test %rax, %rax
1376 jnz L(Shl9LoopExit)
1377
1378 palignr $9, %xmm1, %xmm2
1379 movaps %xmm3, %xmm1
1380 movaps %xmm2, (%rdx)
1381 movaps 23(%rcx), %xmm2
1382
1383 pcmpeqb %xmm2, %xmm0
1384 lea 16(%rdx), %rdx
1385 pmovmskb %xmm0, %rax
1386 lea 16(%rcx), %rcx
1387 movaps %xmm2, %xmm3
1388 # ifdef USE_AS_STRNCPY
1389 sub $16, %r8
1390 jbe L(StrncpyExit9Case2OrCase3)
1391 # endif
1392 test %rax, %rax
1393 jnz L(Shl9LoopExit)
1394
1395 palignr $9, %xmm1, %xmm2
1396 movaps %xmm3, %xmm1
1397 movaps %xmm2, (%rdx)
1398 lea 23(%rcx), %rcx
1399 lea 16(%rdx), %rdx
1400
1401 mov %rcx, %rax
1402 and $-0x40, %rcx
1403 sub %rcx, %rax
1404 lea -7(%rcx), %rcx
1405 sub %rax, %rdx
1406 # ifdef USE_AS_STRNCPY
1407 add %rax, %r8
1408 # endif
1409 movaps -9(%rcx), %xmm1
1410
1411 L(Shl9LoopStart):
1412 movaps 7(%rcx), %xmm2
1413 movaps 23(%rcx), %xmm3
1414 movaps %xmm3, %xmm6
1415 movaps 39(%rcx), %xmm4
1416 movaps %xmm4, %xmm7
1417 movaps 55(%rcx), %xmm5
1418 pminub %xmm2, %xmm6
1419 pminub %xmm5, %xmm7
1420 pminub %xmm6, %xmm7
1421 pcmpeqb %xmm0, %xmm7
1422 pmovmskb %xmm7, %rax
1423 movaps %xmm5, %xmm7
1424 palignr $9, %xmm4, %xmm5
1425 test %rax, %rax
1426 palignr $9, %xmm3, %xmm4
1427 jnz L(Shl9Start)
1428 # ifdef USE_AS_STRNCPY
1429 sub $64, %r8
1430 jbe L(StrncpyLeave9)
1431 # endif
1432 palignr $9, %xmm2, %xmm3
1433 lea 64(%rcx), %rcx
1434 palignr $9, %xmm1, %xmm2
1435 movaps %xmm7, %xmm1
1436 movaps %xmm5, 48(%rdx)
1437 movaps %xmm4, 32(%rdx)
1438 movaps %xmm3, 16(%rdx)
1439 movaps %xmm2, (%rdx)
1440 lea 64(%rdx), %rdx
1441 jmp L(Shl9LoopStart)
1442
1443 L(Shl9LoopExit):
1444 movaps (%rdx), %xmm6
1445 psrldq $7, %xmm6
1446 mov $7, %rsi
1447 palignr $9, %xmm1, %xmm6
1448 movaps %xmm6, (%rdx)
1449 jmp L(CopyFrom1To16Bytes)
1450
1451 .p2align 4
1452 L(Shl10):
1453 movaps -10(%rcx), %xmm1
1454 movaps 6(%rcx), %xmm2
1455 L(Shl10Start):
1456 pcmpeqb %xmm2, %xmm0
1457 pmovmskb %xmm0, %rax
1458 movaps %xmm2, %xmm3
1459 # ifdef USE_AS_STRNCPY
1460 sub $16, %r8
1461 jbe L(StrncpyExit10Case2OrCase3)
1462 # endif
1463 test %rax, %rax
1464 jnz L(Shl10LoopExit)
1465
1466 palignr $10, %xmm1, %xmm2
1467 movaps %xmm3, %xmm1
1468 movaps %xmm2, (%rdx)
1469 movaps 22(%rcx), %xmm2
1470
1471 pcmpeqb %xmm2, %xmm0
1472 lea 16(%rdx), %rdx
1473 pmovmskb %xmm0, %rax
1474 lea 16(%rcx), %rcx
1475 movaps %xmm2, %xmm3
1476 # ifdef USE_AS_STRNCPY
1477 sub $16, %r8
1478 jbe L(StrncpyExit10Case2OrCase3)
1479 # endif
1480 test %rax, %rax
1481 jnz L(Shl10LoopExit)
1482
1483 palignr $10, %xmm1, %xmm2
1484 movaps %xmm2, (%rdx)
1485 movaps 22(%rcx), %xmm2
1486 movaps %xmm3, %xmm1
1487
1488 pcmpeqb %xmm2, %xmm0
1489 lea 16(%rdx), %rdx
1490 pmovmskb %xmm0, %rax
1491 lea 16(%rcx), %rcx
1492 movaps %xmm2, %xmm3
1493 # ifdef USE_AS_STRNCPY
1494 sub $16, %r8
1495 jbe L(StrncpyExit10Case2OrCase3)
1496 # endif
1497 test %rax, %rax
1498 jnz L(Shl10LoopExit)
1499
1500 palignr $10, %xmm1, %xmm2
1501 movaps %xmm3, %xmm1
1502 movaps %xmm2, (%rdx)
1503 movaps 22(%rcx), %xmm2
1504
1505 pcmpeqb %xmm2, %xmm0
1506 lea 16(%rdx), %rdx
1507 pmovmskb %xmm0, %rax
1508 lea 16(%rcx), %rcx
1509 movaps %xmm2, %xmm3
1510 # ifdef USE_AS_STRNCPY
1511 sub $16, %r8
1512 jbe L(StrncpyExit10Case2OrCase3)
1513 # endif
1514 test %rax, %rax
1515 jnz L(Shl10LoopExit)
1516
1517 palignr $10, %xmm1, %xmm2
1518 movaps %xmm3, %xmm1
1519 movaps %xmm2, (%rdx)
1520 lea 22(%rcx), %rcx
1521 lea 16(%rdx), %rdx
1522
1523 mov %rcx, %rax
1524 and $-0x40, %rcx
1525 sub %rcx, %rax
1526 lea -6(%rcx), %rcx
1527 sub %rax, %rdx
1528 # ifdef USE_AS_STRNCPY
1529 add %rax, %r8
1530 # endif
1531 movaps -10(%rcx), %xmm1
1532
1533 L(Shl10LoopStart):
1534 movaps 6(%rcx), %xmm2
1535 movaps 22(%rcx), %xmm3
1536 movaps %xmm3, %xmm6
1537 movaps 38(%rcx), %xmm4
1538 movaps %xmm4, %xmm7
1539 movaps 54(%rcx), %xmm5
1540 pminub %xmm2, %xmm6
1541 pminub %xmm5, %xmm7
1542 pminub %xmm6, %xmm7
1543 pcmpeqb %xmm0, %xmm7
1544 pmovmskb %xmm7, %rax
1545 movaps %xmm5, %xmm7
1546 palignr $10, %xmm4, %xmm5
1547 test %rax, %rax
1548 palignr $10, %xmm3, %xmm4
1549 jnz L(Shl10Start)
1550 # ifdef USE_AS_STRNCPY
1551 sub $64, %r8
1552 jbe L(StrncpyLeave10)
1553 # endif
1554 palignr $10, %xmm2, %xmm3
1555 lea 64(%rcx), %rcx
1556 palignr $10, %xmm1, %xmm2
1557 movaps %xmm7, %xmm1
1558 movaps %xmm5, 48(%rdx)
1559 movaps %xmm4, 32(%rdx)
1560 movaps %xmm3, 16(%rdx)
1561 movaps %xmm2, (%rdx)
1562 lea 64(%rdx), %rdx
1563 jmp L(Shl10LoopStart)
1564
1565 L(Shl10LoopExit):
1566 movaps (%rdx), %xmm6
1567 psrldq $6, %xmm6
1568 mov $6, %rsi
1569 palignr $10, %xmm1, %xmm6
1570 movaps %xmm6, (%rdx)
1571 jmp L(CopyFrom1To16Bytes)
1572
1573 .p2align 4
1574 L(Shl11):
1575 movaps -11(%rcx), %xmm1
1576 movaps 5(%rcx), %xmm2
1577 L(Shl11Start):
1578 pcmpeqb %xmm2, %xmm0
1579 pmovmskb %xmm0, %rax
1580 movaps %xmm2, %xmm3
1581 # ifdef USE_AS_STRNCPY
1582 sub $16, %r8
1583 jbe L(StrncpyExit11Case2OrCase3)
1584 # endif
1585 test %rax, %rax
1586 jnz L(Shl11LoopExit)
1587
1588 palignr $11, %xmm1, %xmm2
1589 movaps %xmm3, %xmm1
1590 movaps %xmm2, (%rdx)
1591 movaps 21(%rcx), %xmm2
1592
1593 pcmpeqb %xmm2, %xmm0
1594 lea 16(%rdx), %rdx
1595 pmovmskb %xmm0, %rax
1596 lea 16(%rcx), %rcx
1597 movaps %xmm2, %xmm3
1598 # ifdef USE_AS_STRNCPY
1599 sub $16, %r8
1600 jbe L(StrncpyExit11Case2OrCase3)
1601 # endif
1602 test %rax, %rax
1603 jnz L(Shl11LoopExit)
1604
1605 palignr $11, %xmm1, %xmm2
1606 movaps %xmm2, (%rdx)
1607 movaps 21(%rcx), %xmm2
1608 movaps %xmm3, %xmm1
1609
1610 pcmpeqb %xmm2, %xmm0
1611 lea 16(%rdx), %rdx
1612 pmovmskb %xmm0, %rax
1613 lea 16(%rcx), %rcx
1614 movaps %xmm2, %xmm3
1615 # ifdef USE_AS_STRNCPY
1616 sub $16, %r8
1617 jbe L(StrncpyExit11Case2OrCase3)
1618 # endif
1619 test %rax, %rax
1620 jnz L(Shl11LoopExit)
1621
1622 palignr $11, %xmm1, %xmm2
1623 movaps %xmm3, %xmm1
1624 movaps %xmm2, (%rdx)
1625 movaps 21(%rcx), %xmm2
1626
1627 pcmpeqb %xmm2, %xmm0
1628 lea 16(%rdx), %rdx
1629 pmovmskb %xmm0, %rax
1630 lea 16(%rcx), %rcx
1631 movaps %xmm2, %xmm3
1632 # ifdef USE_AS_STRNCPY
1633 sub $16, %r8
1634 jbe L(StrncpyExit11Case2OrCase3)
1635 # endif
1636 test %rax, %rax
1637 jnz L(Shl11LoopExit)
1638
1639 palignr $11, %xmm1, %xmm2
1640 movaps %xmm3, %xmm1
1641 movaps %xmm2, (%rdx)
1642 lea 21(%rcx), %rcx
1643 lea 16(%rdx), %rdx
1644
1645 mov %rcx, %rax
1646 and $-0x40, %rcx
1647 sub %rcx, %rax
1648 lea -5(%rcx), %rcx
1649 sub %rax, %rdx
1650 # ifdef USE_AS_STRNCPY
1651 add %rax, %r8
1652 # endif
1653 movaps -11(%rcx), %xmm1
1654
1655 L(Shl11LoopStart):
1656 movaps 5(%rcx), %xmm2
1657 movaps 21(%rcx), %xmm3
1658 movaps %xmm3, %xmm6
1659 movaps 37(%rcx), %xmm4
1660 movaps %xmm4, %xmm7
1661 movaps 53(%rcx), %xmm5
1662 pminub %xmm2, %xmm6
1663 pminub %xmm5, %xmm7
1664 pminub %xmm6, %xmm7
1665 pcmpeqb %xmm0, %xmm7
1666 pmovmskb %xmm7, %rax
1667 movaps %xmm5, %xmm7
1668 palignr $11, %xmm4, %xmm5
1669 test %rax, %rax
1670 palignr $11, %xmm3, %xmm4
1671 jnz L(Shl11Start)
1672 # ifdef USE_AS_STRNCPY
1673 sub $64, %r8
1674 jbe L(StrncpyLeave11)
1675 # endif
1676 palignr $11, %xmm2, %xmm3
1677 lea 64(%rcx), %rcx
1678 palignr $11, %xmm1, %xmm2
1679 movaps %xmm7, %xmm1
1680 movaps %xmm5, 48(%rdx)
1681 movaps %xmm4, 32(%rdx)
1682 movaps %xmm3, 16(%rdx)
1683 movaps %xmm2, (%rdx)
1684 lea 64(%rdx), %rdx
1685 jmp L(Shl11LoopStart)
1686
1687 L(Shl11LoopExit):
1688 movaps (%rdx), %xmm6
1689 psrldq $5, %xmm6
1690 mov $5, %rsi
1691 palignr $11, %xmm1, %xmm6
1692 movaps %xmm6, (%rdx)
1693 jmp L(CopyFrom1To16Bytes)
1694
1695 .p2align 4
1696 L(Shl12):
1697 movaps -12(%rcx), %xmm1
1698 movaps 4(%rcx), %xmm2
1699 L(Shl12Start):
1700 pcmpeqb %xmm2, %xmm0
1701 pmovmskb %xmm0, %rax
1702 movaps %xmm2, %xmm3
1703 # ifdef USE_AS_STRNCPY
1704 sub $16, %r8
1705 jbe L(StrncpyExit12Case2OrCase3)
1706 # endif
1707 test %rax, %rax
1708 jnz L(Shl12LoopExit)
1709
1710 palignr $12, %xmm1, %xmm2
1711 movaps %xmm3, %xmm1
1712 movaps %xmm2, (%rdx)
1713 movaps 20(%rcx), %xmm2
1714
1715 pcmpeqb %xmm2, %xmm0
1716 lea 16(%rdx), %rdx
1717 pmovmskb %xmm0, %rax
1718 lea 16(%rcx), %rcx
1719 movaps %xmm2, %xmm3
1720 # ifdef USE_AS_STRNCPY
1721 sub $16, %r8
1722 jbe L(StrncpyExit12Case2OrCase3)
1723 # endif
1724 test %rax, %rax
1725 jnz L(Shl12LoopExit)
1726
1727 palignr $12, %xmm1, %xmm2
1728 movaps %xmm2, (%rdx)
1729 movaps 20(%rcx), %xmm2
1730 movaps %xmm3, %xmm1
1731
1732 pcmpeqb %xmm2, %xmm0
1733 lea 16(%rdx), %rdx
1734 pmovmskb %xmm0, %rax
1735 lea 16(%rcx), %rcx
1736 movaps %xmm2, %xmm3
1737 # ifdef USE_AS_STRNCPY
1738 sub $16, %r8
1739 jbe L(StrncpyExit12Case2OrCase3)
1740 # endif
1741 test %rax, %rax
1742 jnz L(Shl12LoopExit)
1743
1744 palignr $12, %xmm1, %xmm2
1745 movaps %xmm3, %xmm1
1746 movaps %xmm2, (%rdx)
1747 movaps 20(%rcx), %xmm2
1748
1749 pcmpeqb %xmm2, %xmm0
1750 lea 16(%rdx), %rdx
1751 pmovmskb %xmm0, %rax
1752 lea 16(%rcx), %rcx
1753 movaps %xmm2, %xmm3
1754 # ifdef USE_AS_STRNCPY
1755 sub $16, %r8
1756 jbe L(StrncpyExit12Case2OrCase3)
1757 # endif
1758 test %rax, %rax
1759 jnz L(Shl12LoopExit)
1760
1761 palignr $12, %xmm1, %xmm2
1762 movaps %xmm3, %xmm1
1763 movaps %xmm2, (%rdx)
1764 lea 20(%rcx), %rcx
1765 lea 16(%rdx), %rdx
1766
1767 mov %rcx, %rax
1768 and $-0x40, %rcx
1769 sub %rcx, %rax
1770 lea -4(%rcx), %rcx
1771 sub %rax, %rdx
1772 # ifdef USE_AS_STRNCPY
1773 add %rax, %r8
1774 # endif
1775 movaps -12(%rcx), %xmm1
1776
1777 L(Shl12LoopStart):
1778 movaps 4(%rcx), %xmm2
1779 movaps 20(%rcx), %xmm3
1780 movaps %xmm3, %xmm6
1781 movaps 36(%rcx), %xmm4
1782 movaps %xmm4, %xmm7
1783 movaps 52(%rcx), %xmm5
1784 pminub %xmm2, %xmm6
1785 pminub %xmm5, %xmm7
1786 pminub %xmm6, %xmm7
1787 pcmpeqb %xmm0, %xmm7
1788 pmovmskb %xmm7, %rax
1789 movaps %xmm5, %xmm7
1790 palignr $12, %xmm4, %xmm5
1791 test %rax, %rax
1792 palignr $12, %xmm3, %xmm4
1793 jnz L(Shl12Start)
1794 # ifdef USE_AS_STRNCPY
1795 sub $64, %r8
1796 jbe L(StrncpyLeave12)
1797 # endif
1798 palignr $12, %xmm2, %xmm3
1799 lea 64(%rcx), %rcx
1800 palignr $12, %xmm1, %xmm2
1801 movaps %xmm7, %xmm1
1802 movaps %xmm5, 48(%rdx)
1803 movaps %xmm4, 32(%rdx)
1804 movaps %xmm3, 16(%rdx)
1805 movaps %xmm2, (%rdx)
1806 lea 64(%rdx), %rdx
1807 jmp L(Shl12LoopStart)
1808
1809 L(Shl12LoopExit):
1810 movaps (%rdx), %xmm6
1811 psrldq $4, %xmm6
1812 mov $4, %rsi
1813 palignr $12, %xmm1, %xmm6
1814 movaps %xmm6, (%rdx)
1815 jmp L(CopyFrom1To16Bytes)
1816
1817 .p2align 4
1818 L(Shl13):
1819 movaps -13(%rcx), %xmm1
1820 movaps 3(%rcx), %xmm2
1821 L(Shl13Start):
1822 pcmpeqb %xmm2, %xmm0
1823 pmovmskb %xmm0, %rax
1824 movaps %xmm2, %xmm3
1825 # ifdef USE_AS_STRNCPY
1826 sub $16, %r8
1827 jbe L(StrncpyExit13Case2OrCase3)
1828 # endif
1829 test %rax, %rax
1830 jnz L(Shl13LoopExit)
1831
1832 palignr $13, %xmm1, %xmm2
1833 movaps %xmm3, %xmm1
1834 movaps %xmm2, (%rdx)
1835 movaps 19(%rcx), %xmm2
1836
1837 pcmpeqb %xmm2, %xmm0
1838 lea 16(%rdx), %rdx
1839 pmovmskb %xmm0, %rax
1840 lea 16(%rcx), %rcx
1841 movaps %xmm2, %xmm3
1842 # ifdef USE_AS_STRNCPY
1843 sub $16, %r8
1844 jbe L(StrncpyExit13Case2OrCase3)
1845 # endif
1846 test %rax, %rax
1847 jnz L(Shl13LoopExit)
1848
1849 palignr $13, %xmm1, %xmm2
1850 movaps %xmm2, (%rdx)
1851 movaps 19(%rcx), %xmm2
1852 movaps %xmm3, %xmm1
1853
1854 pcmpeqb %xmm2, %xmm0
1855 lea 16(%rdx), %rdx
1856 pmovmskb %xmm0, %rax
1857 lea 16(%rcx), %rcx
1858 movaps %xmm2, %xmm3
1859 # ifdef USE_AS_STRNCPY
1860 sub $16, %r8
1861 jbe L(StrncpyExit13Case2OrCase3)
1862 # endif
1863 test %rax, %rax
1864 jnz L(Shl13LoopExit)
1865
1866 palignr $13, %xmm1, %xmm2
1867 movaps %xmm3, %xmm1
1868 movaps %xmm2, (%rdx)
1869 movaps 19(%rcx), %xmm2
1870
1871 pcmpeqb %xmm2, %xmm0
1872 lea 16(%rdx), %rdx
1873 pmovmskb %xmm0, %rax
1874 lea 16(%rcx), %rcx
1875 movaps %xmm2, %xmm3
1876 # ifdef USE_AS_STRNCPY
1877 sub $16, %r8
1878 jbe L(StrncpyExit13Case2OrCase3)
1879 # endif
1880 test %rax, %rax
1881 jnz L(Shl13LoopExit)
1882
1883 palignr $13, %xmm1, %xmm2
1884 movaps %xmm3, %xmm1
1885 movaps %xmm2, (%rdx)
1886 lea 19(%rcx), %rcx
1887 lea 16(%rdx), %rdx
1888
1889 mov %rcx, %rax
1890 and $-0x40, %rcx
1891 sub %rcx, %rax
1892 lea -3(%rcx), %rcx
1893 sub %rax, %rdx
1894 # ifdef USE_AS_STRNCPY
1895 add %rax, %r8
1896 # endif
1897 movaps -13(%rcx), %xmm1
1898
1899 L(Shl13LoopStart):
1900 movaps 3(%rcx), %xmm2
1901 movaps 19(%rcx), %xmm3
1902 movaps %xmm3, %xmm6
1903 movaps 35(%rcx), %xmm4
1904 movaps %xmm4, %xmm7
1905 movaps 51(%rcx), %xmm5
1906 pminub %xmm2, %xmm6
1907 pminub %xmm5, %xmm7
1908 pminub %xmm6, %xmm7
1909 pcmpeqb %xmm0, %xmm7
1910 pmovmskb %xmm7, %rax
1911 movaps %xmm5, %xmm7
1912 palignr $13, %xmm4, %xmm5
1913 test %rax, %rax
1914 palignr $13, %xmm3, %xmm4
1915 jnz L(Shl13Start)
1916 # ifdef USE_AS_STRNCPY
1917 sub $64, %r8
1918 jbe L(StrncpyLeave13)
1919 # endif
1920 palignr $13, %xmm2, %xmm3
1921 lea 64(%rcx), %rcx
1922 palignr $13, %xmm1, %xmm2
1923 movaps %xmm7, %xmm1
1924 movaps %xmm5, 48(%rdx)
1925 movaps %xmm4, 32(%rdx)
1926 movaps %xmm3, 16(%rdx)
1927 movaps %xmm2, (%rdx)
1928 lea 64(%rdx), %rdx
1929 jmp L(Shl13LoopStart)
1930
1931 L(Shl13LoopExit):
1932 movaps (%rdx), %xmm6
1933 psrldq $3, %xmm6
1934 mov $3, %rsi
1935 palignr $13, %xmm1, %xmm6
1936 movaps %xmm6, (%rdx)
1937 jmp L(CopyFrom1To16Bytes)
1938
1939 .p2align 4
1940 L(Shl14):
1941 movaps -14(%rcx), %xmm1
1942 movaps 2(%rcx), %xmm2
1943 L(Shl14Start):
1944 pcmpeqb %xmm2, %xmm0
1945 pmovmskb %xmm0, %rax
1946 movaps %xmm2, %xmm3
1947 # ifdef USE_AS_STRNCPY
1948 sub $16, %r8
1949 jbe L(StrncpyExit14Case2OrCase3)
1950 # endif
1951 test %rax, %rax
1952 jnz L(Shl14LoopExit)
1953
1954 palignr $14, %xmm1, %xmm2
1955 movaps %xmm3, %xmm1
1956 movaps %xmm2, (%rdx)
1957 movaps 18(%rcx), %xmm2
1958
1959 pcmpeqb %xmm2, %xmm0
1960 lea 16(%rdx), %rdx
1961 pmovmskb %xmm0, %rax
1962 lea 16(%rcx), %rcx
1963 movaps %xmm2, %xmm3
1964 # ifdef USE_AS_STRNCPY
1965 sub $16, %r8
1966 jbe L(StrncpyExit14Case2OrCase3)
1967 # endif
1968 test %rax, %rax
1969 jnz L(Shl14LoopExit)
1970
1971 palignr $14, %xmm1, %xmm2
1972 movaps %xmm2, (%rdx)
1973 movaps 18(%rcx), %xmm2
1974 movaps %xmm3, %xmm1
1975
1976 pcmpeqb %xmm2, %xmm0
1977 lea 16(%rdx), %rdx
1978 pmovmskb %xmm0, %rax
1979 lea 16(%rcx), %rcx
1980 movaps %xmm2, %xmm3
1981 # ifdef USE_AS_STRNCPY
1982 sub $16, %r8
1983 jbe L(StrncpyExit14Case2OrCase3)
1984 # endif
1985 test %rax, %rax
1986 jnz L(Shl14LoopExit)
1987
1988 palignr $14, %xmm1, %xmm2
1989 movaps %xmm3, %xmm1
1990 movaps %xmm2, (%rdx)
1991 movaps 18(%rcx), %xmm2
1992
1993 pcmpeqb %xmm2, %xmm0
1994 lea 16(%rdx), %rdx
1995 pmovmskb %xmm0, %rax
1996 lea 16(%rcx), %rcx
1997 movaps %xmm2, %xmm3
1998 # ifdef USE_AS_STRNCPY
1999 sub $16, %r8
2000 jbe L(StrncpyExit14Case2OrCase3)
2001 # endif
2002 test %rax, %rax
2003 jnz L(Shl14LoopExit)
2004
2005 palignr $14, %xmm1, %xmm2
2006 movaps %xmm3, %xmm1
2007 movaps %xmm2, (%rdx)
2008 lea 18(%rcx), %rcx
2009 lea 16(%rdx), %rdx
2010
2011 mov %rcx, %rax
2012 and $-0x40, %rcx
2013 sub %rcx, %rax
2014 lea -2(%rcx), %rcx
2015 sub %rax, %rdx
2016 # ifdef USE_AS_STRNCPY
2017 add %rax, %r8
2018 # endif
2019 movaps -14(%rcx), %xmm1
2020
2021 L(Shl14LoopStart):
2022 movaps 2(%rcx), %xmm2
2023 movaps 18(%rcx), %xmm3
2024 movaps %xmm3, %xmm6
2025 movaps 34(%rcx), %xmm4
2026 movaps %xmm4, %xmm7
2027 movaps 50(%rcx), %xmm5
2028 pminub %xmm2, %xmm6
2029 pminub %xmm5, %xmm7
2030 pminub %xmm6, %xmm7
2031 pcmpeqb %xmm0, %xmm7
2032 pmovmskb %xmm7, %rax
2033 movaps %xmm5, %xmm7
2034 palignr $14, %xmm4, %xmm5
2035 test %rax, %rax
2036 palignr $14, %xmm3, %xmm4
2037 jnz L(Shl14Start)
2038 # ifdef USE_AS_STRNCPY
2039 sub $64, %r8
2040 jbe L(StrncpyLeave14)
2041 # endif
2042 palignr $14, %xmm2, %xmm3
2043 lea 64(%rcx), %rcx
2044 palignr $14, %xmm1, %xmm2
2045 movaps %xmm7, %xmm1
2046 movaps %xmm5, 48(%rdx)
2047 movaps %xmm4, 32(%rdx)
2048 movaps %xmm3, 16(%rdx)
2049 movaps %xmm2, (%rdx)
2050 lea 64(%rdx), %rdx
2051 jmp L(Shl14LoopStart)
2052
2053 L(Shl14LoopExit):
2054 movaps (%rdx), %xmm6
2055 psrldq $2, %xmm6
2056 mov $2, %rsi
2057 palignr $14, %xmm1, %xmm6
2058 movaps %xmm6, (%rdx)
2059 jmp L(CopyFrom1To16Bytes)
2060
2061 .p2align 4
2062 L(Shl15):
2063 movaps -15(%rcx), %xmm1
2064 movaps 1(%rcx), %xmm2
2065 L(Shl15Start):
2066 pcmpeqb %xmm2, %xmm0
2067 pmovmskb %xmm0, %rax
2068 movaps %xmm2, %xmm3
2069 # ifdef USE_AS_STRNCPY
2070 sub $16, %r8
2071 jbe L(StrncpyExit15Case2OrCase3)
2072 # endif
2073 test %rax, %rax
2074 jnz L(Shl15LoopExit)
2075
2076 palignr $15, %xmm1, %xmm2
2077 movaps %xmm3, %xmm1
2078 movaps %xmm2, (%rdx)
2079 movaps 17(%rcx), %xmm2
2080
2081 pcmpeqb %xmm2, %xmm0
2082 lea 16(%rdx), %rdx
2083 pmovmskb %xmm0, %rax
2084 lea 16(%rcx), %rcx
2085 movaps %xmm2, %xmm3
2086 # ifdef USE_AS_STRNCPY
2087 sub $16, %r8
2088 jbe L(StrncpyExit15Case2OrCase3)
2089 # endif
2090 test %rax, %rax
2091 jnz L(Shl15LoopExit)
2092
2093 palignr $15, %xmm1, %xmm2
2094 movaps %xmm2, (%rdx)
2095 movaps 17(%rcx), %xmm2
2096 movaps %xmm3, %xmm1
2097
2098 pcmpeqb %xmm2, %xmm0
2099 lea 16(%rdx), %rdx
2100 pmovmskb %xmm0, %rax
2101 lea 16(%rcx), %rcx
2102 movaps %xmm2, %xmm3
2103 # ifdef USE_AS_STRNCPY
2104 sub $16, %r8
2105 jbe L(StrncpyExit15Case2OrCase3)
2106 # endif
2107 test %rax, %rax
2108 jnz L(Shl15LoopExit)
2109
2110 palignr $15, %xmm1, %xmm2
2111 movaps %xmm3, %xmm1
2112 movaps %xmm2, (%rdx)
2113 movaps 17(%rcx), %xmm2
2114
2115 pcmpeqb %xmm2, %xmm0
2116 lea 16(%rdx), %rdx
2117 pmovmskb %xmm0, %rax
2118 lea 16(%rcx), %rcx
2119 movaps %xmm2, %xmm3
2120 # ifdef USE_AS_STRNCPY
2121 sub $16, %r8
2122 jbe L(StrncpyExit15Case2OrCase3)
2123 # endif
2124 test %rax, %rax
2125 jnz L(Shl15LoopExit)
2126
2127 palignr $15, %xmm1, %xmm2
2128 movaps %xmm3, %xmm1
2129 movaps %xmm2, (%rdx)
2130 lea 17(%rcx), %rcx
2131 lea 16(%rdx), %rdx
2132
2133 mov %rcx, %rax
2134 and $-0x40, %rcx
2135 sub %rcx, %rax
2136 lea -1(%rcx), %rcx
2137 sub %rax, %rdx
2138 # ifdef USE_AS_STRNCPY
2139 add %rax, %r8
2140 # endif
2141 movaps -15(%rcx), %xmm1
2142
2143 L(Shl15LoopStart):
2144 movaps 1(%rcx), %xmm2
2145 movaps 17(%rcx), %xmm3
2146 movaps %xmm3, %xmm6
2147 movaps 33(%rcx), %xmm4
2148 movaps %xmm4, %xmm7
2149 movaps 49(%rcx), %xmm5
2150 pminub %xmm2, %xmm6
2151 pminub %xmm5, %xmm7
2152 pminub %xmm6, %xmm7
2153 pcmpeqb %xmm0, %xmm7
2154 pmovmskb %xmm7, %rax
2155 movaps %xmm5, %xmm7
2156 palignr $15, %xmm4, %xmm5
2157 test %rax, %rax
2158 palignr $15, %xmm3, %xmm4
2159 jnz L(Shl15Start)
2160 # ifdef USE_AS_STRNCPY
2161 sub $64, %r8
2162 jbe L(StrncpyLeave15)
2163 # endif
2164 palignr $15, %xmm2, %xmm3
2165 lea 64(%rcx), %rcx
2166 palignr $15, %xmm1, %xmm2
2167 movaps %xmm7, %xmm1
2168 movaps %xmm5, 48(%rdx)
2169 movaps %xmm4, 32(%rdx)
2170 movaps %xmm3, 16(%rdx)
2171 movaps %xmm2, (%rdx)
2172 lea 64(%rdx), %rdx
2173 jmp L(Shl15LoopStart)
2174
2175 L(Shl15LoopExit):
2176 movaps (%rdx), %xmm6
2177 psrldq $1, %xmm6
2178 mov $1, %rsi
2179 palignr $15, %xmm1, %xmm6
2180 movaps %xmm6, (%rdx)
2181 # ifdef USE_AS_STRCAT
2182 jmp L(CopyFrom1To16Bytes)
2183 # endif
2184
2185 # ifndef USE_AS_STRCAT
2186 .p2align 4
2187 L(CopyFrom1To16Bytes):
2188 # ifdef USE_AS_STRNCPY
2189 add $16, %r8
2190 # endif
2191 add %rsi, %rdx
2192 add %rsi, %rcx
2193
2194 test %al, %al
2195 jz L(ExitHigh)
2196 test $0x01, %al
2197 jnz L(Exit1)
2198 test $0x02, %al
2199 jnz L(Exit2)
2200 test $0x04, %al
2201 jnz L(Exit3)
2202 test $0x08, %al
2203 jnz L(Exit4)
2204 test $0x10, %al
2205 jnz L(Exit5)
2206 test $0x20, %al
2207 jnz L(Exit6)
2208 test $0x40, %al
2209 jnz L(Exit7)
2210
2211 .p2align 4
2212 L(Exit8):
2213 mov (%rcx), %rax
2214 mov %rax, (%rdx)
2215 # ifdef USE_AS_STPCPY
2216 lea 7(%rdx), %rax
2217 # else
2218 mov %rdi, %rax
2219 # endif
2220 # ifdef USE_AS_STRNCPY
2221 sub $8, %r8
2222 lea 8(%rdx), %rcx
2223 jnz L(StrncpyFillTailWithZero1)
2224 # ifdef USE_AS_STPCPY
2225 cmpb $1, (%rax)
2226 sbb $-1, %rax
2227 # endif
2228 # endif
2229 ret
2230
2231 .p2align 4
2232 L(ExitHigh):
2233 test $0x01, %ah
2234 jnz L(Exit9)
2235 test $0x02, %ah
2236 jnz L(Exit10)
2237 test $0x04, %ah
2238 jnz L(Exit11)
2239 test $0x08, %ah
2240 jnz L(Exit12)
2241 test $0x10, %ah
2242 jnz L(Exit13)
2243 test $0x20, %ah
2244 jnz L(Exit14)
2245 test $0x40, %ah
2246 jnz L(Exit15)
2247
2248 .p2align 4
2249 L(Exit16):
2250 mov (%rcx), %rax
2251 mov %rax, (%rdx)
2252 mov 8(%rcx), %rax
2253 mov %rax, 8(%rdx)
2254 # ifdef USE_AS_STPCPY
2255 lea 15(%rdx), %rax
2256 # else
2257 mov %rdi, %rax
2258 # endif
2259 # ifdef USE_AS_STRNCPY
2260 sub $16, %r8
2261 lea 16(%rdx), %rcx
2262 jnz L(StrncpyFillTailWithZero1)
2263 # ifdef USE_AS_STPCPY
2264 cmpb $1, (%rax)
2265 sbb $-1, %rax
2266 # endif
2267 # endif
2268 ret
2269
2270 # ifdef USE_AS_STRNCPY
2271
2272 .p2align 4
2273 L(CopyFrom1To16BytesCase2):
2274 add $16, %r8
2275 add %rsi, %rcx
2276 lea (%rsi, %rdx), %rsi
2277 lea -9(%r8), %rdx
2278 and $1<<7, %dh
2279 or %al, %dh
2280 test %dh, %dh
2281 lea (%rsi), %rdx
2282 jz L(ExitHighCase2)
2283
2284 cmp $1, %r8
2285 je L(Exit1)
2286 test $0x01, %al
2287 jnz L(Exit1)
2288 cmp $2, %r8
2289 je L(Exit2)
2290 test $0x02, %al
2291 jnz L(Exit2)
2292 cmp $3, %r8
2293 je L(Exit3)
2294 test $0x04, %al
2295 jnz L(Exit3)
2296 cmp $4, %r8
2297 je L(Exit4)
2298 test $0x08, %al
2299 jnz L(Exit4)
2300 cmp $5, %r8
2301 je L(Exit5)
2302 test $0x10, %al
2303 jnz L(Exit5)
2304 cmp $6, %r8
2305 je L(Exit6)
2306 test $0x20, %al
2307 jnz L(Exit6)
2308 cmp $7, %r8
2309 je L(Exit7)
2310 test $0x40, %al
2311 jnz L(Exit7)
2312 jmp L(Exit8)
2313
2314 .p2align 4
2315 L(ExitHighCase2):
2316 cmp $9, %r8
2317 je L(Exit9)
2318 test $0x01, %ah
2319 jnz L(Exit9)
2320 cmp $10, %r8
2321 je L(Exit10)
2322 test $0x02, %ah
2323 jnz L(Exit10)
2324 cmp $11, %r8
2325 je L(Exit11)
2326 test $0x04, %ah
2327 jnz L(Exit11)
2328 cmp $12, %r8
2329 je L(Exit12)
2330 test $0x8, %ah
2331 jnz L(Exit12)
2332 cmp $13, %r8
2333 je L(Exit13)
2334 test $0x10, %ah
2335 jnz L(Exit13)
2336 cmp $14, %r8
2337 je L(Exit14)
2338 test $0x20, %ah
2339 jnz L(Exit14)
2340 cmp $15, %r8
2341 je L(Exit15)
2342 test $0x40, %ah
2343 jnz L(Exit15)
2344 jmp L(Exit16)
2345
2346 L(CopyFrom1To16BytesCase2OrCase3):
2347 test %rax, %rax
2348 jnz L(CopyFrom1To16BytesCase2)
2349
2350 .p2align 4
2351 L(CopyFrom1To16BytesCase3):
2352 add $16, %r8
2353 add %rsi, %rdx
2354 add %rsi, %rcx
2355
2356 cmp $16, %r8
2357 je L(Exit16)
2358 cmp $8, %r8
2359 je L(Exit8)
2360 jg L(More8Case3)
2361 cmp $4, %r8
2362 je L(Exit4)
2363 jg L(More4Case3)
2364 cmp $2, %r8
2365 jl L(Exit1)
2366 je L(Exit2)
2367 jg L(Exit3)
2368 L(More8Case3): /* but less than 16 */
2369 cmp $12, %r8
2370 je L(Exit12)
2371 jl L(Less12Case3)
2372 cmp $14, %r8
2373 jl L(Exit13)
2374 je L(Exit14)
2375 jg L(Exit15)
2376 L(More4Case3): /* but less than 8 */
2377 cmp $6, %r8
2378 jl L(Exit5)
2379 je L(Exit6)
2380 jg L(Exit7)
2381 L(Less12Case3): /* but more than 8 */
2382 cmp $10, %r8
2383 jl L(Exit9)
2384 je L(Exit10)
2385 jg L(Exit11)
2386 # endif
2387
2388 .p2align 4
2389 L(Exit1):
2390 movb (%rcx), %al
2391 movb %al, (%rdx)
2392 # ifdef USE_AS_STPCPY
2393 lea (%rdx), %rax
2394 # else
2395 mov %rdi, %rax
2396 # endif
2397 # ifdef USE_AS_STRNCPY
2398 sub $1, %r8
2399 lea 1(%rdx), %rcx
2400 jnz L(StrncpyFillTailWithZero1)
2401 # ifdef USE_AS_STPCPY
2402 cmpb $1, (%rax)
2403 sbb $-1, %rax
2404 # endif
2405 # endif
2406 ret
2407
2408 .p2align 4
2409 L(Exit2):
2410 movw (%rcx), %ax
2411 movw %ax, (%rdx)
2412 # ifdef USE_AS_STPCPY
2413 lea 1(%rdx), %rax
2414 # else
2415 mov %rdi, %rax
2416 # endif
2417 # ifdef USE_AS_STRNCPY
2418 sub $2, %r8
2419 lea 2(%rdx), %rcx
2420 jnz L(StrncpyFillTailWithZero1)
2421 # ifdef USE_AS_STPCPY
2422 cmpb $1, (%rax)
2423 sbb $-1, %rax
2424 # endif
2425 # endif
2426 ret
2427
2428 .p2align 4
2429 L(Exit3):
2430 movw (%rcx), %ax
2431 movw %ax, (%rdx)
2432 movb 2(%rcx), %al
2433 movb %al, 2(%rdx)
2434 # ifdef USE_AS_STPCPY
2435 lea 2(%rdx), %rax
2436 # else
2437 mov %rdi, %rax
2438 # endif
2439 # ifdef USE_AS_STRNCPY
2440 sub $3, %r8
2441 lea 3(%rdx), %rcx
2442 jnz L(StrncpyFillTailWithZero1)
2443 # ifdef USE_AS_STPCPY
2444 cmpb $1, (%rax)
2445 sbb $-1, %rax
2446 # endif
2447 # endif
2448 ret
2449
2450 .p2align 4
2451 L(Exit4):
2452 movl (%rcx), %eax
2453 movl %eax, (%rdx)
2454 # ifdef USE_AS_STPCPY
2455 lea 3(%rdx), %rax
2456 # else
2457 mov %rdi, %rax
2458 # endif
2459 # ifdef USE_AS_STRNCPY
2460 sub $4, %r8
2461 lea 4(%rdx), %rcx
2462 jnz L(StrncpyFillTailWithZero1)
2463 # ifdef USE_AS_STPCPY
2464 cmpb $1, (%rax)
2465 sbb $-1, %rax
2466 # endif
2467 # endif
2468 ret
2469
2470 .p2align 4
2471 L(Exit5):
2472 movl (%rcx), %eax
2473 movl %eax, (%rdx)
2474 movb 4(%rcx), %al
2475 movb %al, 4(%rdx)
2476 # ifdef USE_AS_STPCPY
2477 lea 4(%rdx), %rax
2478 # else
2479 mov %rdi, %rax
2480 # endif
2481 # ifdef USE_AS_STRNCPY
2482 sub $5, %r8
2483 lea 5(%rdx), %rcx
2484 jnz L(StrncpyFillTailWithZero1)
2485 # ifdef USE_AS_STPCPY
2486 cmpb $1, (%rax)
2487 sbb $-1, %rax
2488 # endif
2489 # endif
2490 ret
2491
2492 .p2align 4
2493 L(Exit6):
2494 movl (%rcx), %eax
2495 movl %eax, (%rdx)
2496 movw 4(%rcx), %ax
2497 movw %ax, 4(%rdx)
2498 # ifdef USE_AS_STPCPY
2499 lea 5(%rdx), %rax
2500 # else
2501 mov %rdi, %rax
2502 # endif
2503 # ifdef USE_AS_STRNCPY
2504 sub $6, %r8
2505 lea 6(%rdx), %rcx
2506 jnz L(StrncpyFillTailWithZero1)
2507 # ifdef USE_AS_STPCPY
2508 cmpb $1, (%rax)
2509 sbb $-1, %rax
2510 # endif
2511 # endif
2512 ret
2513
2514 .p2align 4
2515 L(Exit7):
2516 movl (%rcx), %eax
2517 movl %eax, (%rdx)
2518 movl 3(%rcx), %eax
2519 movl %eax, 3(%rdx)
2520 # ifdef USE_AS_STPCPY
2521 lea 6(%rdx), %rax
2522 # else
2523 mov %rdi, %rax
2524 # endif
2525 # ifdef USE_AS_STRNCPY
2526 sub $7, %r8
2527 lea 7(%rdx), %rcx
2528 jnz L(StrncpyFillTailWithZero1)
2529 # ifdef USE_AS_STPCPY
2530 cmpb $1, (%rax)
2531 sbb $-1, %rax
2532 # endif
2533 # endif
2534 ret
2535
2536 .p2align 4
2537 L(Exit9):
2538 mov (%rcx), %rax
2539 mov %rax, (%rdx)
2540 mov 5(%rcx), %eax
2541 mov %eax, 5(%rdx)
2542 # ifdef USE_AS_STPCPY
2543 lea 8(%rdx), %rax
2544 # else
2545 mov %rdi, %rax
2546 # endif
2547 # ifdef USE_AS_STRNCPY
2548 sub $9, %r8
2549 lea 9(%rdx), %rcx
2550 jnz L(StrncpyFillTailWithZero1)
2551 # ifdef USE_AS_STPCPY
2552 cmpb $1, (%rax)
2553 sbb $-1, %rax
2554 # endif
2555 # endif
2556 ret
2557
2558 .p2align 4
2559 L(Exit10):
2560 mov (%rcx), %rax
2561 mov %rax, (%rdx)
2562 mov 6(%rcx), %eax
2563 mov %eax, 6(%rdx)
2564 # ifdef USE_AS_STPCPY
2565 lea 9(%rdx), %rax
2566 # else
2567 mov %rdi, %rax
2568 # endif
2569 # ifdef USE_AS_STRNCPY
2570 sub $10, %r8
2571 lea 10(%rdx), %rcx
2572 jnz L(StrncpyFillTailWithZero1)
2573 # ifdef USE_AS_STPCPY
2574 cmpb $1, (%rax)
2575 sbb $-1, %rax
2576 # endif
2577 # endif
2578 ret
2579
2580 .p2align 4
2581 L(Exit11):
2582 mov (%rcx), %rax
2583 mov %rax, (%rdx)
2584 mov 7(%rcx), %eax
2585 mov %eax, 7(%rdx)
2586 # ifdef USE_AS_STPCPY
2587 lea 10(%rdx), %rax
2588 # else
2589 mov %rdi, %rax
2590 # endif
2591 # ifdef USE_AS_STRNCPY
2592 sub $11, %r8
2593 lea 11(%rdx), %rcx
2594 jnz L(StrncpyFillTailWithZero1)
2595 # ifdef USE_AS_STPCPY
2596 cmpb $1, (%rax)
2597 sbb $-1, %rax
2598 # endif
2599 # endif
2600 ret
2601
2602 .p2align 4
2603 L(Exit12):
2604 mov (%rcx), %rax
2605 mov %rax, (%rdx)
2606 mov 8(%rcx), %eax
2607 mov %eax, 8(%rdx)
2608 # ifdef USE_AS_STPCPY
2609 lea 11(%rdx), %rax
2610 # else
2611 mov %rdi, %rax
2612 # endif
2613 # ifdef USE_AS_STRNCPY
2614 sub $12, %r8
2615 lea 12(%rdx), %rcx
2616 jnz L(StrncpyFillTailWithZero1)
2617 # ifdef USE_AS_STPCPY
2618 cmpb $1, (%rax)
2619 sbb $-1, %rax
2620 # endif
2621 # endif
2622 ret
2623
2624 .p2align 4
2625 L(Exit13):
2626 mov (%rcx), %rax
2627 mov %rax, (%rdx)
2628 mov 5(%rcx), %rax
2629 mov %rax, 5(%rdx)
2630 # ifdef USE_AS_STPCPY
2631 lea 12(%rdx), %rax
2632 # else
2633 mov %rdi, %rax
2634 # endif
2635 # ifdef USE_AS_STRNCPY
2636 sub $13, %r8
2637 lea 13(%rdx), %rcx
2638 jnz L(StrncpyFillTailWithZero1)
2639 # ifdef USE_AS_STPCPY
2640 cmpb $1, (%rax)
2641 sbb $-1, %rax
2642 # endif
2643 # endif
2644 ret
2645
2646 .p2align 4
2647 L(Exit14):
2648 mov (%rcx), %rax
2649 mov %rax, (%rdx)
2650 mov 6(%rcx), %rax
2651 mov %rax, 6(%rdx)
2652 # ifdef USE_AS_STPCPY
2653 lea 13(%rdx), %rax
2654 # else
2655 mov %rdi, %rax
2656 # endif
2657 # ifdef USE_AS_STRNCPY
2658 sub $14, %r8
2659 lea 14(%rdx), %rcx
2660 jnz L(StrncpyFillTailWithZero1)
2661 # ifdef USE_AS_STPCPY
2662 cmpb $1, (%rax)
2663 sbb $-1, %rax
2664 # endif
2665 # endif
2666 ret
2667
2668 .p2align 4
2669 L(Exit15):
2670 mov (%rcx), %rax
2671 mov %rax, (%rdx)
2672 mov 7(%rcx), %rax
2673 mov %rax, 7(%rdx)
2674 # ifdef USE_AS_STPCPY
2675 lea 14(%rdx), %rax
2676 # else
2677 mov %rdi, %rax
2678 # endif
2679 # ifdef USE_AS_STRNCPY
2680 sub $15, %r8
2681 lea 15(%rdx), %rcx
2682 jnz L(StrncpyFillTailWithZero1)
2683 # ifdef USE_AS_STPCPY
2684 cmpb $1, (%rax)
2685 sbb $-1, %rax
2686 # endif
2687 # endif
2688 ret
2689
2690 # ifdef USE_AS_STRNCPY
2691 .p2align 4
2692 L(Fill0):
2693 ret
2694
2695 .p2align 4
2696 L(Fill1):
2697 movb %dl, (%rcx)
2698 ret
2699
2700 .p2align 4
2701 L(Fill2):
2702 movw %dx, (%rcx)
2703 ret
2704
2705 .p2align 4
2706 L(Fill3):
2707 movw %dx, (%rcx)
2708 movb %dl, 2(%rcx)
2709 ret
2710
2711 .p2align 4
2712 L(Fill4):
2713 movl %edx, (%rcx)
2714 ret
2715
2716 .p2align 4
2717 L(Fill5):
2718 movl %edx, (%rcx)
2719 movb %dl, 4(%rcx)
2720 ret
2721
2722 .p2align 4
2723 L(Fill6):
2724 movl %edx, (%rcx)
2725 movw %dx, 4(%rcx)
2726 ret
2727
2728 .p2align 4
2729 L(Fill7):
2730 movl %edx, (%rcx)
2731 movl %edx, 3(%rcx)
2732 ret
2733
2734 .p2align 4
2735 L(Fill8):
2736 mov %rdx, (%rcx)
2737 ret
2738
2739 .p2align 4
2740 L(Fill9):
2741 mov %rdx, (%rcx)
2742 movb %dl, 8(%rcx)
2743 ret
2744
2745 .p2align 4
2746 L(Fill10):
2747 mov %rdx, (%rcx)
2748 movw %dx, 8(%rcx)
2749 ret
2750
2751 .p2align 4
2752 L(Fill11):
2753 mov %rdx, (%rcx)
2754 movl %edx, 7(%rcx)
2755 ret
2756
2757 .p2align 4
2758 L(Fill12):
2759 mov %rdx, (%rcx)
2760 movl %edx, 8(%rcx)
2761 ret
2762
2763 .p2align 4
2764 L(Fill13):
2765 mov %rdx, (%rcx)
2766 mov %rdx, 5(%rcx)
2767 ret
2768
2769 .p2align 4
2770 L(Fill14):
2771 mov %rdx, (%rcx)
2772 mov %rdx, 6(%rcx)
2773 ret
2774
2775 .p2align 4
2776 L(Fill15):
2777 mov %rdx, (%rcx)
2778 mov %rdx, 7(%rcx)
2779 ret
2780
2781 .p2align 4
2782 L(Fill16):
2783 mov %rdx, (%rcx)
2784 mov %rdx, 8(%rcx)
2785 ret
2786
2787 .p2align 4
2788 L(StrncpyFillExit1):
2789 lea 16(%r8), %r8
2790 L(FillFrom1To16Bytes):
2791 test %r8, %r8
2792 jz L(Fill0)
2793 cmp $16, %r8
2794 je L(Fill16)
2795 cmp $8, %r8
2796 je L(Fill8)
2797 jg L(FillMore8)
2798 cmp $4, %r8
2799 je L(Fill4)
2800 jg L(FillMore4)
2801 cmp $2, %r8
2802 jl L(Fill1)
2803 je L(Fill2)
2804 jg L(Fill3)
2805 L(FillMore8): /* but less than 16 */
2806 cmp $12, %r8
2807 je L(Fill12)
2808 jl L(FillLess12)
2809 cmp $14, %r8
2810 jl L(Fill13)
2811 je L(Fill14)
2812 jg L(Fill15)
2813 L(FillMore4): /* but less than 8 */
2814 cmp $6, %r8
2815 jl L(Fill5)
2816 je L(Fill6)
2817 jg L(Fill7)
2818 L(FillLess12): /* but more than 8 */
2819 cmp $10, %r8
2820 jl L(Fill9)
2821 je L(Fill10)
2822 jmp L(Fill11)
2823
2824 .p2align 4
2825 L(StrncpyFillTailWithZero1):
2826 xor %rdx, %rdx
2827 sub $16, %r8
2828 jbe L(StrncpyFillExit1)
2829
2830 pxor %xmm0, %xmm0
2831 mov %rdx, (%rcx)
2832 mov %rdx, 8(%rcx)
2833
2834 lea 16(%rcx), %rcx
2835
2836 mov %rcx, %rdx
2837 and $0xf, %rdx
2838 sub %rdx, %rcx
2839 add %rdx, %r8
2840 xor %rdx, %rdx
2841 sub $64, %r8
2842 jb L(StrncpyFillLess64)
2843
2844 L(StrncpyFillLoopMovdqa):
2845 movdqa %xmm0, (%rcx)
2846 movdqa %xmm0, 16(%rcx)
2847 movdqa %xmm0, 32(%rcx)
2848 movdqa %xmm0, 48(%rcx)
2849 lea 64(%rcx), %rcx
2850 sub $64, %r8
2851 jae L(StrncpyFillLoopMovdqa)
2852
2853 L(StrncpyFillLess64):
2854 add $32, %r8
2855 jl L(StrncpyFillLess32)
2856 movdqa %xmm0, (%rcx)
2857 movdqa %xmm0, 16(%rcx)
2858 lea 32(%rcx), %rcx
2859 sub $16, %r8
2860 jl L(StrncpyFillExit1)
2861 movdqa %xmm0, (%rcx)
2862 lea 16(%rcx), %rcx
2863 jmp L(FillFrom1To16Bytes)
2864
2865 L(StrncpyFillLess32):
2866 add $16, %r8
2867 jl L(StrncpyFillExit1)
2868 movdqa %xmm0, (%rcx)
2869 lea 16(%rcx), %rcx
2870 jmp L(FillFrom1To16Bytes)
2871
2872 .p2align 4
2873 L(Exit0):
2874 mov %rdx, %rax
2875 ret
2876
2877 .p2align 4
2878 L(StrncpyExit15Bytes):
2879 cmp $9, %r8
2880 je L(Exit9)
2881 cmpb $0, 8(%rcx)
2882 jz L(Exit9)
2883 cmp $10, %r8
2884 je L(Exit10)
2885 cmpb $0, 9(%rcx)
2886 jz L(Exit10)
2887 cmp $11, %r8
2888 je L(Exit11)
2889 cmpb $0, 10(%rcx)
2890 jz L(Exit11)
2891 cmp $12, %r8
2892 je L(Exit12)
2893 cmpb $0, 11(%rcx)
2894 jz L(Exit12)
2895 cmp $13, %r8
2896 je L(Exit13)
2897 cmpb $0, 12(%rcx)
2898 jz L(Exit13)
2899 cmp $14, %r8
2900 je L(Exit14)
2901 cmpb $0, 13(%rcx)
2902 jz L(Exit14)
2903 mov (%rcx), %rax
2904 mov %rax, (%rdx)
2905 mov 7(%rcx), %rax
2906 mov %rax, 7(%rdx)
2907 # ifdef USE_AS_STPCPY
2908 lea 14(%rdx), %rax
2909 cmpb $1, (%rax)
2910 sbb $-1, %rax
2911 # else
2912 mov %rdi, %rax
2913 # endif
2914 ret
2915
2916 .p2align 4
2917 L(StrncpyExit8Bytes):
2918 cmp $1, %r8
2919 je L(Exit1)
2920 cmpb $0, (%rcx)
2921 jz L(Exit1)
2922 cmp $2, %r8
2923 je L(Exit2)
2924 cmpb $0, 1(%rcx)
2925 jz L(Exit2)
2926 cmp $3, %r8
2927 je L(Exit3)
2928 cmpb $0, 2(%rcx)
2929 jz L(Exit3)
2930 cmp $4, %r8
2931 je L(Exit4)
2932 cmpb $0, 3(%rcx)
2933 jz L(Exit4)
2934 cmp $5, %r8
2935 je L(Exit5)
2936 cmpb $0, 4(%rcx)
2937 jz L(Exit5)
2938 cmp $6, %r8
2939 je L(Exit6)
2940 cmpb $0, 5(%rcx)
2941 jz L(Exit6)
2942 cmp $7, %r8
2943 je L(Exit7)
2944 cmpb $0, 6(%rcx)
2945 jz L(Exit7)
2946 mov (%rcx), %rax
2947 mov %rax, (%rdx)
2948 # ifdef USE_AS_STPCPY
2949 lea 7(%rdx), %rax
2950 cmpb $1, (%rax)
2951 sbb $-1, %rax
2952 # else
2953 mov %rdi, %rax
2954 # endif
2955 ret
2956
2957 # endif
2958
2959 # endif
2960
2961 # ifdef USE_AS_STRNCPY
2962
2963 L(StrncpyLeaveCase2OrCase3):
2964 test %rax, %rax
2965 jnz L(Aligned64LeaveCase2)
2966
2967 L(Aligned64LeaveCase3):
2968 lea 64(%r8), %r8
2969 sub $16, %r8
2970 jbe L(CopyFrom1To16BytesCase3)
2971 movaps %xmm4, -64(%rdx)
2972 lea 16(%rsi), %rsi
2973 sub $16, %r8
2974 jbe L(CopyFrom1To16BytesCase3)
2975 movaps %xmm5, -48(%rdx)
2976 lea 16(%rsi), %rsi
2977 sub $16, %r8
2978 jbe L(CopyFrom1To16BytesCase3)
2979 movaps %xmm6, -32(%rdx)
2980 lea 16(%rsi), %rsi
2981 lea -16(%r8), %r8
2982 jmp L(CopyFrom1To16BytesCase3)
2983
2984 L(Aligned64LeaveCase2):
2985 pcmpeqb %xmm4, %xmm0
2986 pmovmskb %xmm0, %rax
2987 add $48, %r8
2988 jle L(CopyFrom1To16BytesCase2OrCase3)
2989 test %rax, %rax
2990 jnz L(CopyFrom1To16Bytes)
2991
2992 pcmpeqb %xmm5, %xmm0
2993 pmovmskb %xmm0, %rax
2994 movaps %xmm4, -64(%rdx)
2995 lea 16(%rsi), %rsi
2996 sub $16, %r8
2997 jbe L(CopyFrom1To16BytesCase2OrCase3)
2998 test %rax, %rax
2999 jnz L(CopyFrom1To16Bytes)
3000
3001 pcmpeqb %xmm6, %xmm0
3002 pmovmskb %xmm0, %rax
3003 movaps %xmm5, -48(%rdx)
3004 lea 16(%rsi), %rsi
3005 sub $16, %r8
3006 jbe L(CopyFrom1To16BytesCase2OrCase3)
3007 test %rax, %rax
3008 jnz L(CopyFrom1To16Bytes)
3009
3010 pcmpeqb %xmm7, %xmm0
3011 pmovmskb %xmm0, %rax
3012 movaps %xmm6, -32(%rdx)
3013 lea 16(%rsi), %rsi
3014 lea -16(%r8), %r8
3015 jmp L(CopyFrom1To16BytesCase2)
3016 /*--------------------------------------------------*/
3017 L(StrncpyExit1Case2OrCase3):
3018 movaps (%rdx), %xmm6
3019 psrldq $15, %xmm6
3020 mov $15, %rsi
3021 palignr $1, %xmm1, %xmm6
3022 movaps %xmm6, (%rdx)
3023 test %rax, %rax
3024 jnz L(CopyFrom1To16BytesCase2)
3025 jmp L(CopyFrom1To16BytesCase3)
3026
3027 L(StrncpyExit2Case2OrCase3):
3028 movaps (%rdx), %xmm6
3029 psrldq $14, %xmm6
3030 mov $14, %rsi
3031 palignr $2, %xmm1, %xmm6
3032 movaps %xmm6, (%rdx)
3033 test %rax, %rax
3034 jnz L(CopyFrom1To16BytesCase2)
3035 jmp L(CopyFrom1To16BytesCase3)
3036
3037 L(StrncpyExit3Case2OrCase3):
3038 movaps (%rdx), %xmm6
3039 psrldq $13, %xmm6
3040 mov $13, %rsi
3041 palignr $3, %xmm1, %xmm6
3042 movaps %xmm6, (%rdx)
3043 test %rax, %rax
3044 jnz L(CopyFrom1To16BytesCase2)
3045 jmp L(CopyFrom1To16BytesCase3)
3046
3047 L(StrncpyExit4Case2OrCase3):
3048 movaps (%rdx), %xmm6
3049 psrldq $12, %xmm6
3050 mov $12, %rsi
3051 palignr $4, %xmm1, %xmm6
3052 movaps %xmm6, (%rdx)
3053 test %rax, %rax
3054 jnz L(CopyFrom1To16BytesCase2)
3055 jmp L(CopyFrom1To16BytesCase3)
3056
3057 L(StrncpyExit5Case2OrCase3):
3058 movaps (%rdx), %xmm6
3059 psrldq $11, %xmm6
3060 mov $11, %rsi
3061 palignr $5, %xmm1, %xmm6
3062 movaps %xmm6, (%rdx)
3063 test %rax, %rax
3064 jnz L(CopyFrom1To16BytesCase2)
3065 jmp L(CopyFrom1To16BytesCase3)
3066
3067 L(StrncpyExit6Case2OrCase3):
3068 movaps (%rdx), %xmm6
3069 psrldq $10, %xmm6
3070 mov $10, %rsi
3071 palignr $6, %xmm1, %xmm6
3072 movaps %xmm6, (%rdx)
3073 test %rax, %rax
3074 jnz L(CopyFrom1To16BytesCase2)
3075 jmp L(CopyFrom1To16BytesCase3)
3076
3077 L(StrncpyExit7Case2OrCase3):
3078 movaps (%rdx), %xmm6
3079 psrldq $9, %xmm6
3080 mov $9, %rsi
3081 palignr $7, %xmm1, %xmm6
3082 movaps %xmm6, (%rdx)
3083 test %rax, %rax
3084 jnz L(CopyFrom1To16BytesCase2)
3085 jmp L(CopyFrom1To16BytesCase3)
3086
3087 L(StrncpyExit8Case2OrCase3):
3088 movaps (%rdx), %xmm6
3089 psrldq $8, %xmm6
3090 mov $8, %rsi
3091 palignr $8, %xmm1, %xmm6
3092 movaps %xmm6, (%rdx)
3093 test %rax, %rax
3094 jnz L(CopyFrom1To16BytesCase2)
3095 jmp L(CopyFrom1To16BytesCase3)
3096
3097 L(StrncpyExit9Case2OrCase3):
3098 movaps (%rdx), %xmm6
3099 psrldq $7, %xmm6
3100 mov $7, %rsi
3101 palignr $9, %xmm1, %xmm6
3102 movaps %xmm6, (%rdx)
3103 test %rax, %rax
3104 jnz L(CopyFrom1To16BytesCase2)
3105 jmp L(CopyFrom1To16BytesCase3)
3106
3107 L(StrncpyExit10Case2OrCase3):
3108 movaps (%rdx), %xmm6
3109 psrldq $6, %xmm6
3110 mov $6, %rsi
3111 palignr $10, %xmm1, %xmm6
3112 movaps %xmm6, (%rdx)
3113 test %rax, %rax
3114 jnz L(CopyFrom1To16BytesCase2)
3115 jmp L(CopyFrom1To16BytesCase3)
3116
3117 L(StrncpyExit11Case2OrCase3):
3118 movaps (%rdx), %xmm6
3119 psrldq $5, %xmm6
3120 mov $5, %rsi
3121 palignr $11, %xmm1, %xmm6
3122 movaps %xmm6, (%rdx)
3123 test %rax, %rax
3124 jnz L(CopyFrom1To16BytesCase2)
3125 jmp L(CopyFrom1To16BytesCase3)
3126
3127 L(StrncpyExit12Case2OrCase3):
3128 movaps (%rdx), %xmm6
3129 psrldq $4, %xmm6
3130 mov $4, %rsi
3131 palignr $12, %xmm1, %xmm6
3132 movaps %xmm6, (%rdx)
3133 test %rax, %rax
3134 jnz L(CopyFrom1To16BytesCase2)
3135 jmp L(CopyFrom1To16BytesCase3)
3136
3137 L(StrncpyExit13Case2OrCase3):
3138 movaps (%rdx), %xmm6
3139 psrldq $3, %xmm6
3140 mov $3, %rsi
3141 palignr $13, %xmm1, %xmm6
3142 movaps %xmm6, (%rdx)
3143 test %rax, %rax
3144 jnz L(CopyFrom1To16BytesCase2)
3145 jmp L(CopyFrom1To16BytesCase3)
3146
3147 L(StrncpyExit14Case2OrCase3):
3148 movaps (%rdx), %xmm6
3149 psrldq $2, %xmm6
3150 mov $2, %rsi
3151 palignr $14, %xmm1, %xmm6
3152 movaps %xmm6, (%rdx)
3153 test %rax, %rax
3154 jnz L(CopyFrom1To16BytesCase2)
3155 jmp L(CopyFrom1To16BytesCase3)
3156
3157 L(StrncpyExit15Case2OrCase3):
3158 movaps (%rdx), %xmm6
3159 psrldq $1, %xmm6
3160 mov $1, %rsi
3161 palignr $15, %xmm1, %xmm6
3162 movaps %xmm6, (%rdx)
3163 test %rax, %rax
3164 jnz L(CopyFrom1To16BytesCase2)
3165 jmp L(CopyFrom1To16BytesCase3)
3166
3167 L(StrncpyLeave1):
3168 movaps %xmm2, %xmm3
3169 add $48, %r8
3170 jle L(StrncpyExit1)
3171 palignr $1, %xmm1, %xmm2
3172 movaps %xmm3, %xmm1
3173 movaps %xmm2, (%rdx)
3174 movaps 31(%rcx), %xmm2
3175 lea 16(%rsi), %rsi
3176 movaps %xmm2, %xmm3
3177 sub $16, %r8
3178 jbe L(StrncpyExit1)
3179 palignr $1, %xmm1, %xmm2
3180 movaps %xmm2, 16(%rdx)
3181 movaps 31+16(%rcx), %xmm2
3182 movaps %xmm3, %xmm1
3183 lea 16(%rsi), %rsi
3184 sub $16, %r8
3185 jbe L(StrncpyExit1)
3186 movaps %xmm2, %xmm1
3187 movaps %xmm4, 32(%rdx)
3188 lea 16(%rsi), %rsi
3189 sub $16, %r8
3190 jbe L(StrncpyExit1)
3191 movaps %xmm7, %xmm1
3192 movaps %xmm5, 48(%rdx)
3193 lea 16(%rsi), %rsi
3194 lea -16(%r8), %r8
3195
3196 L(StrncpyExit1):
3197 movaps (%rdx, %rsi), %xmm6
3198 psrldq $15, %xmm6
3199 palignr $1, %xmm1, %xmm6
3200 movaps %xmm6, (%rdx, %rsi)
3201 lea 15(%rsi), %rsi
3202 jmp L(CopyFrom1To16BytesCase3)
3203
3204 L(StrncpyLeave2):
3205 movaps %xmm2, %xmm3
3206 add $48, %r8
3207 jle L(StrncpyExit2)
3208 palignr $2, %xmm1, %xmm2
3209 movaps %xmm3, %xmm1
3210 movaps %xmm2, (%rdx)
3211 movaps 30(%rcx), %xmm2
3212 lea 16(%rsi), %rsi
3213 movaps %xmm2, %xmm3
3214 sub $16, %r8
3215 jbe L(StrncpyExit2)
3216 palignr $2, %xmm1, %xmm2
3217 movaps %xmm2, 16(%rdx)
3218 movaps 30+16(%rcx), %xmm2
3219 movaps %xmm3, %xmm1
3220 lea 16(%rsi), %rsi
3221 sub $16, %r8
3222 jbe L(StrncpyExit2)
3223 movaps %xmm2, %xmm1
3224 movaps %xmm4, 32(%rdx)
3225 lea 16(%rsi), %rsi
3226 sub $16, %r8
3227 jbe L(StrncpyExit2)
3228 movaps %xmm7, %xmm1
3229 movaps %xmm5, 48(%rdx)
3230 lea 16(%rsi), %rsi
3231 lea -16(%r8), %r8
3232
3233 L(StrncpyExit2):
3234 movaps (%rdx, %rsi), %xmm6
3235 psrldq $14, %xmm6
3236 palignr $2, %xmm1, %xmm6
3237 movaps %xmm6, (%rdx, %rsi)
3238 lea 14(%rsi), %rsi
3239 jmp L(CopyFrom1To16BytesCase3)
3240
3241 L(StrncpyLeave3):
3242 movaps %xmm2, %xmm3
3243 add $48, %r8
3244 jle L(StrncpyExit3)
3245 palignr $3, %xmm1, %xmm2
3246 movaps %xmm3, %xmm1
3247 movaps %xmm2, (%rdx)
3248 movaps 29(%rcx), %xmm2
3249 lea 16(%rsi), %rsi
3250 movaps %xmm2, %xmm3
3251 sub $16, %r8
3252 jbe L(StrncpyExit3)
3253 palignr $3, %xmm1, %xmm2
3254 movaps %xmm2, 16(%rdx)
3255 movaps 29+16(%rcx), %xmm2
3256 movaps %xmm3, %xmm1
3257 lea 16(%rsi), %rsi
3258 sub $16, %r8
3259 jbe L(StrncpyExit3)
3260 movaps %xmm2, %xmm1
3261 movaps %xmm4, 32(%rdx)
3262 lea 16(%rsi), %rsi
3263 sub $16, %r8
3264 jbe L(StrncpyExit3)
3265 movaps %xmm7, %xmm1
3266 movaps %xmm5, 48(%rdx)
3267 lea 16(%rsi), %rsi
3268 lea -16(%r8), %r8
3269
3270 L(StrncpyExit3):
3271 movaps (%rdx, %rsi), %xmm6
3272 psrldq $13, %xmm6
3273 palignr $3, %xmm1, %xmm6
3274 movaps %xmm6, (%rdx, %rsi)
3275 lea 13(%rsi), %rsi
3276 jmp L(CopyFrom1To16BytesCase3)
3277
3278 L(StrncpyLeave4):
3279 movaps %xmm2, %xmm3
3280 add $48, %r8
3281 jle L(StrncpyExit4)
3282 palignr $4, %xmm1, %xmm2
3283 movaps %xmm3, %xmm1
3284 movaps %xmm2, (%rdx)
3285 movaps 28(%rcx), %xmm2
3286 lea 16(%rsi), %rsi
3287 movaps %xmm2, %xmm3
3288 sub $16, %r8
3289 jbe L(StrncpyExit4)
3290 palignr $4, %xmm1, %xmm2
3291 movaps %xmm2, 16(%rdx)
3292 movaps 28+16(%rcx), %xmm2
3293 movaps %xmm3, %xmm1
3294 lea 16(%rsi), %rsi
3295 sub $16, %r8
3296 jbe L(StrncpyExit4)
3297 movaps %xmm2, %xmm1
3298 movaps %xmm4, 32(%rdx)
3299 lea 16(%rsi), %rsi
3300 sub $16, %r8
3301 jbe L(StrncpyExit4)
3302 movaps %xmm7, %xmm1
3303 movaps %xmm5, 48(%rdx)
3304 lea 16(%rsi), %rsi
3305 lea -16(%r8), %r8
3306
3307 L(StrncpyExit4):
3308 movaps (%rdx, %rsi), %xmm6
3309 psrldq $12, %xmm6
3310 palignr $4, %xmm1, %xmm6
3311 movaps %xmm6, (%rdx, %rsi)
3312 lea 12(%rsi), %rsi
3313 jmp L(CopyFrom1To16BytesCase3)
3314
3315 L(StrncpyLeave5):
3316 movaps %xmm2, %xmm3
3317 add $48, %r8
3318 jle L(StrncpyExit5)
3319 palignr $5, %xmm1, %xmm2
3320 movaps %xmm3, %xmm1
3321 movaps %xmm2, (%rdx)
3322 movaps 27(%rcx), %xmm2
3323 lea 16(%rsi), %rsi
3324 movaps %xmm2, %xmm3
3325 sub $16, %r8
3326 jbe L(StrncpyExit5)
3327 palignr $5, %xmm1, %xmm2
3328 movaps %xmm2, 16(%rdx)
3329 movaps 27+16(%rcx), %xmm2
3330 movaps %xmm3, %xmm1
3331 lea 16(%rsi), %rsi
3332 sub $16, %r8
3333 jbe L(StrncpyExit5)
3334 movaps %xmm2, %xmm1
3335 movaps %xmm4, 32(%rdx)
3336 lea 16(%rsi), %rsi
3337 sub $16, %r8
3338 jbe L(StrncpyExit5)
3339 movaps %xmm7, %xmm1
3340 movaps %xmm5, 48(%rdx)
3341 lea 16(%rsi), %rsi
3342 lea -16(%r8), %r8
3343
3344 L(StrncpyExit5):
3345 movaps (%rdx, %rsi), %xmm6
3346 psrldq $11, %xmm6
3347 palignr $5, %xmm1, %xmm6
3348 movaps %xmm6, (%rdx, %rsi)
3349 lea 11(%rsi), %rsi
3350 jmp L(CopyFrom1To16BytesCase3)
3351
3352 L(StrncpyLeave6):
3353 movaps %xmm2, %xmm3
3354 add $48, %r8
3355 jle L(StrncpyExit6)
3356 palignr $6, %xmm1, %xmm2
3357 movaps %xmm3, %xmm1
3358 movaps %xmm2, (%rdx)
3359 movaps 26(%rcx), %xmm2
3360 lea 16(%rsi), %rsi
3361 movaps %xmm2, %xmm3
3362 sub $16, %r8
3363 jbe L(StrncpyExit6)
3364 palignr $6, %xmm1, %xmm2
3365 movaps %xmm2, 16(%rdx)
3366 movaps 26+16(%rcx), %xmm2
3367 movaps %xmm3, %xmm1
3368 lea 16(%rsi), %rsi
3369 sub $16, %r8
3370 jbe L(StrncpyExit6)
3371 movaps %xmm2, %xmm1
3372 movaps %xmm4, 32(%rdx)
3373 lea 16(%rsi), %rsi
3374 sub $16, %r8
3375 jbe L(StrncpyExit6)
3376 movaps %xmm7, %xmm1
3377 movaps %xmm5, 48(%rdx)
3378 lea 16(%rsi), %rsi
3379 lea -16(%r8), %r8
3380
3381 L(StrncpyExit6):
3382 movaps (%rdx, %rsi), %xmm6
3383 psrldq $10, %xmm6
3384 palignr $6, %xmm1, %xmm6
3385 movaps %xmm6, (%rdx, %rsi)
3386 lea 10(%rsi), %rsi
3387 jmp L(CopyFrom1To16BytesCase3)
3388
3389 L(StrncpyLeave7):
3390 movaps %xmm2, %xmm3
3391 add $48, %r8
3392 jle L(StrncpyExit7)
3393 palignr $7, %xmm1, %xmm2
3394 movaps %xmm3, %xmm1
3395 movaps %xmm2, (%rdx)
3396 movaps 25(%rcx), %xmm2
3397 lea 16(%rsi), %rsi
3398 movaps %xmm2, %xmm3
3399 sub $16, %r8
3400 jbe L(StrncpyExit7)
3401 palignr $7, %xmm1, %xmm2
3402 movaps %xmm2, 16(%rdx)
3403 movaps 25+16(%rcx), %xmm2
3404 movaps %xmm3, %xmm1
3405 lea 16(%rsi), %rsi
3406 sub $16, %r8
3407 jbe L(StrncpyExit7)
3408 movaps %xmm2, %xmm1
3409 movaps %xmm4, 32(%rdx)
3410 lea 16(%rsi), %rsi
3411 sub $16, %r8
3412 jbe L(StrncpyExit7)
3413 movaps %xmm7, %xmm1
3414 movaps %xmm5, 48(%rdx)
3415 lea 16(%rsi), %rsi
3416 lea -16(%r8), %r8
3417
3418 L(StrncpyExit7):
3419 movaps (%rdx, %rsi), %xmm6
3420 psrldq $9, %xmm6
3421 palignr $7, %xmm1, %xmm6
3422 movaps %xmm6, (%rdx, %rsi)
3423 lea 9(%rsi), %rsi
3424 jmp L(CopyFrom1To16BytesCase3)
3425
3426 L(StrncpyLeave8):
3427 movaps %xmm2, %xmm3
3428 add $48, %r8
3429 jle L(StrncpyExit8)
3430 palignr $8, %xmm1, %xmm2
3431 movaps %xmm3, %xmm1
3432 movaps %xmm2, (%rdx)
3433 movaps 24(%rcx), %xmm2
3434 lea 16(%rsi), %rsi
3435 movaps %xmm2, %xmm3
3436 sub $16, %r8
3437 jbe L(StrncpyExit8)
3438 palignr $8, %xmm1, %xmm2
3439 movaps %xmm2, 16(%rdx)
3440 movaps 24+16(%rcx), %xmm2
3441 movaps %xmm3, %xmm1
3442 lea 16(%rsi), %rsi
3443 sub $16, %r8
3444 jbe L(StrncpyExit8)
3445 movaps %xmm2, %xmm1
3446 movaps %xmm4, 32(%rdx)
3447 lea 16(%rsi), %rsi
3448 sub $16, %r8
3449 jbe L(StrncpyExit8)
3450 movaps %xmm7, %xmm1
3451 movaps %xmm5, 48(%rdx)
3452 lea 16(%rsi), %rsi
3453 lea -16(%r8), %r8
3454
3455 L(StrncpyExit8):
3456 movaps (%rdx, %rsi), %xmm6
3457 psrldq $8, %xmm6
3458 palignr $8, %xmm1, %xmm6
3459 movaps %xmm6, (%rdx, %rsi)
3460 lea 8(%rsi), %rsi
3461 jmp L(CopyFrom1To16BytesCase3)
3462
3463 L(StrncpyLeave9):
3464 movaps %xmm2, %xmm3
3465 add $48, %r8
3466 jle L(StrncpyExit9)
3467 palignr $9, %xmm1, %xmm2
3468 movaps %xmm3, %xmm1
3469 movaps %xmm2, (%rdx)
3470 movaps 23(%rcx), %xmm2
3471 lea 16(%rsi), %rsi
3472 movaps %xmm2, %xmm3
3473 sub $16, %r8
3474 jbe L(StrncpyExit9)
3475 palignr $9, %xmm1, %xmm2
3476 movaps %xmm2, 16(%rdx)
3477 movaps 23+16(%rcx), %xmm2
3478 movaps %xmm3, %xmm1
3479 lea 16(%rsi), %rsi
3480 sub $16, %r8
3481 jbe L(StrncpyExit9)
3482 movaps %xmm2, %xmm1
3483 movaps %xmm4, 32(%rdx)
3484 lea 16(%rsi), %rsi
3485 sub $16, %r8
3486 jbe L(StrncpyExit9)
3487 movaps %xmm7, %xmm1
3488 movaps %xmm5, 48(%rdx)
3489 lea 16(%rsi), %rsi
3490 lea -16(%r8), %r8
3491
3492 L(StrncpyExit9):
3493 movaps (%rdx, %rsi), %xmm6
3494 psrldq $7, %xmm6
3495 palignr $9, %xmm1, %xmm6
3496 movaps %xmm6, (%rdx, %rsi)
3497 lea 7(%rsi), %rsi
3498 jmp L(CopyFrom1To16BytesCase3)
3499
3500 L(StrncpyLeave10):
3501 movaps %xmm2, %xmm3
3502 add $48, %r8
3503 jle L(StrncpyExit10)
3504 palignr $10, %xmm1, %xmm2
3505 movaps %xmm3, %xmm1
3506 movaps %xmm2, (%rdx)
3507 movaps 22(%rcx), %xmm2
3508 lea 16(%rsi), %rsi
3509 movaps %xmm2, %xmm3
3510 sub $16, %r8
3511 jbe L(StrncpyExit10)
3512 palignr $10, %xmm1, %xmm2
3513 movaps %xmm2, 16(%rdx)
3514 movaps 22+16(%rcx), %xmm2
3515 movaps %xmm3, %xmm1
3516 lea 16(%rsi), %rsi
3517 sub $16, %r8
3518 jbe L(StrncpyExit10)
3519 movaps %xmm2, %xmm1
3520 movaps %xmm4, 32(%rdx)
3521 lea 16(%rsi), %rsi
3522 sub $16, %r8
3523 jbe L(StrncpyExit10)
3524 movaps %xmm7, %xmm1
3525 movaps %xmm5, 48(%rdx)
3526 lea 16(%rsi), %rsi
3527 lea -16(%r8), %r8
3528
3529 L(StrncpyExit10):
3530 movaps (%rdx, %rsi), %xmm6
3531 psrldq $6, %xmm6
3532 palignr $10, %xmm1, %xmm6
3533 movaps %xmm6, (%rdx, %rsi)
3534 lea 6(%rsi), %rsi
3535 jmp L(CopyFrom1To16BytesCase3)
3536
3537 L(StrncpyLeave11):
3538 movaps %xmm2, %xmm3
3539 add $48, %r8
3540 jle L(StrncpyExit11)
3541 palignr $11, %xmm1, %xmm2
3542 movaps %xmm3, %xmm1
3543 movaps %xmm2, (%rdx)
3544 movaps 21(%rcx), %xmm2
3545 lea 16(%rsi), %rsi
3546 movaps %xmm2, %xmm3
3547 sub $16, %r8
3548 jbe L(StrncpyExit11)
3549 palignr $11, %xmm1, %xmm2
3550 movaps %xmm2, 16(%rdx)
3551 movaps 21+16(%rcx), %xmm2
3552 movaps %xmm3, %xmm1
3553 lea 16(%rsi), %rsi
3554 sub $16, %r8
3555 jbe L(StrncpyExit11)
3556 movaps %xmm2, %xmm1
3557 movaps %xmm4, 32(%rdx)
3558 lea 16(%rsi), %rsi
3559 sub $16, %r8
3560 jbe L(StrncpyExit11)
3561 movaps %xmm7, %xmm1
3562 movaps %xmm5, 48(%rdx)
3563 lea 16(%rsi), %rsi
3564 lea -16(%r8), %r8
3565
3566 L(StrncpyExit11):
3567 movaps (%rdx, %rsi), %xmm6
3568 psrldq $5, %xmm6
3569 palignr $11, %xmm1, %xmm6
3570 movaps %xmm6, (%rdx, %rsi)
3571 lea 5(%rsi), %rsi
3572 jmp L(CopyFrom1To16BytesCase3)
3573
3574 L(StrncpyLeave12):
3575 movaps %xmm2, %xmm3
3576 add $48, %r8
3577 jle L(StrncpyExit12)
3578 palignr $12, %xmm1, %xmm2
3579 movaps %xmm3, %xmm1
3580 movaps %xmm2, (%rdx)
3581 movaps 20(%rcx), %xmm2
3582 lea 16(%rsi), %rsi
3583 movaps %xmm2, %xmm3
3584 sub $16, %r8
3585 jbe L(StrncpyExit12)
3586 palignr $12, %xmm1, %xmm2
3587 movaps %xmm2, 16(%rdx)
3588 movaps 20+16(%rcx), %xmm2
3589 movaps %xmm3, %xmm1
3590 lea 16(%rsi), %rsi
3591 sub $16, %r8
3592 jbe L(StrncpyExit12)
3593 movaps %xmm2, %xmm1
3594 movaps %xmm4, 32(%rdx)
3595 lea 16(%rsi), %rsi
3596 sub $16, %r8
3597 jbe L(StrncpyExit12)
3598 movaps %xmm7, %xmm1
3599 movaps %xmm5, 48(%rdx)
3600 lea 16(%rsi), %rsi
3601 lea -16(%r8), %r8
3602
3603 L(StrncpyExit12):
3604 movaps (%rdx, %rsi), %xmm6
3605 psrldq $4, %xmm6
3606 palignr $12, %xmm1, %xmm6
3607 movaps %xmm6, (%rdx, %rsi)
3608 lea 4(%rsi), %rsi
3609 jmp L(CopyFrom1To16BytesCase3)
3610
3611 L(StrncpyLeave13):
3612 movaps %xmm2, %xmm3
3613 add $48, %r8
3614 jle L(StrncpyExit13)
3615 palignr $13, %xmm1, %xmm2
3616 movaps %xmm3, %xmm1
3617 movaps %xmm2, (%rdx)
3618 movaps 19(%rcx), %xmm2
3619 lea 16(%rsi), %rsi
3620 movaps %xmm2, %xmm3
3621 sub $16, %r8
3622 jbe L(StrncpyExit13)
3623 palignr $13, %xmm1, %xmm2
3624 movaps %xmm2, 16(%rdx)
3625 movaps 19+16(%rcx), %xmm2
3626 movaps %xmm3, %xmm1
3627 lea 16(%rsi), %rsi
3628 sub $16, %r8
3629 jbe L(StrncpyExit13)
3630 movaps %xmm2, %xmm1
3631 movaps %xmm4, 32(%rdx)
3632 lea 16(%rsi), %rsi
3633 sub $16, %r8
3634 jbe L(StrncpyExit13)
3635 movaps %xmm7, %xmm1
3636 movaps %xmm5, 48(%rdx)
3637 lea 16(%rsi), %rsi
3638 lea -16(%r8), %r8
3639
3640 L(StrncpyExit13):
3641 movaps (%rdx, %rsi), %xmm6
3642 psrldq $3, %xmm6
3643 palignr $13, %xmm1, %xmm6
3644 movaps %xmm6, (%rdx, %rsi)
3645 lea 3(%rsi), %rsi
3646 jmp L(CopyFrom1To16BytesCase3)
3647
3648 L(StrncpyLeave14):
3649 movaps %xmm2, %xmm3
3650 add $48, %r8
3651 jle L(StrncpyExit14)
3652 palignr $14, %xmm1, %xmm2
3653 movaps %xmm3, %xmm1
3654 movaps %xmm2, (%rdx)
3655 movaps 18(%rcx), %xmm2
3656 lea 16(%rsi), %rsi
3657 movaps %xmm2, %xmm3
3658 sub $16, %r8
3659 jbe L(StrncpyExit14)
3660 palignr $14, %xmm1, %xmm2
3661 movaps %xmm2, 16(%rdx)
3662 movaps 18+16(%rcx), %xmm2
3663 movaps %xmm3, %xmm1
3664 lea 16(%rsi), %rsi
3665 sub $16, %r8
3666 jbe L(StrncpyExit14)
3667 movaps %xmm2, %xmm1
3668 movaps %xmm4, 32(%rdx)
3669 lea 16(%rsi), %rsi
3670 sub $16, %r8
3671 jbe L(StrncpyExit14)
3672 movaps %xmm7, %xmm1
3673 movaps %xmm5, 48(%rdx)
3674 lea 16(%rsi), %rsi
3675 lea -16(%r8), %r8
3676
3677 L(StrncpyExit14):
3678 movaps (%rdx, %rsi), %xmm6
3679 psrldq $2, %xmm6
3680 palignr $14, %xmm1, %xmm6
3681 movaps %xmm6, (%rdx, %rsi)
3682 lea 2(%rsi), %rsi
3683 jmp L(CopyFrom1To16BytesCase3)
3684
3685 L(StrncpyLeave15):
3686 movaps %xmm2, %xmm3
3687 add $48, %r8
3688 jle L(StrncpyExit15)
3689 palignr $15, %xmm1, %xmm2
3690 movaps %xmm3, %xmm1
3691 movaps %xmm2, (%rdx)
3692 movaps 17(%rcx), %xmm2
3693 lea 16(%rsi), %rsi
3694 movaps %xmm2, %xmm3
3695 sub $16, %r8
3696 jbe L(StrncpyExit15)
3697 palignr $15, %xmm1, %xmm2
3698 movaps %xmm2, 16(%rdx)
3699 movaps 17+16(%rcx), %xmm2
3700 movaps %xmm3, %xmm1
3701 lea 16(%rsi), %rsi
3702 sub $16, %r8
3703 jbe L(StrncpyExit15)
3704 movaps %xmm2, %xmm1
3705 movaps %xmm4, 32(%rdx)
3706 lea 16(%rsi), %rsi
3707 sub $16, %r8
3708 jbe L(StrncpyExit15)
3709 movaps %xmm7, %xmm1
3710 movaps %xmm5, 48(%rdx)
3711 lea 16(%rsi), %rsi
3712 lea -16(%r8), %r8
3713
3714 L(StrncpyExit15):
3715 movaps (%rdx, %rsi), %xmm6
3716 psrldq $1, %xmm6
3717 palignr $15, %xmm1, %xmm6
3718 movaps %xmm6, (%rdx, %rsi)
3719 lea 1(%rsi), %rsi
3720 jmp L(CopyFrom1To16BytesCase3)
3721 # endif
3722 # ifndef USE_AS_STRCAT
3723 END (STRCPY)
3724 # endif
3725 #endif