]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strcpy-ssse3.S
Optimized st{r,p}{,n}cpy for SSE2/SSSE3 on x86-32
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strcpy-ssse3.S
1 /* strcpy with SSSE3
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21
22 #ifndef NOT_IN_libc
23
24 # include <sysdep.h>
25
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
29
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
32 cfi_restore (REG)
33
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
36
37 # ifndef STRCPY
38 # define STRCPY __strcpy_ssse3
39 # endif
40
41 # ifdef USE_AS_STRNCPY
42 # define PARMS 8
43 # define ENTRANCE PUSH(%ebx)
44 # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
45 # define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
46 # else
47 # define PARMS 4
48 # define ENTRANCE
49 # define RETURN ret
50 # define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
51 # endif
52
53 # define STR1 PARMS
54 # define STR2 STR1+4
55 # define LEN STR2+4
56
57 /* In this code following instructions are used for copying:
58 movb - 1 byte
59 movw - 2 byte
60 movl - 4 byte
61 movlpd - 8 byte
62 movaps - 16 byte - requires 16 byte alignment
63 of sourse and destination adresses.
64 16 byte alignment: adress is 32bit value,
65 right four bit of adress shall be 0.
66 */
67
68 .text
69 ENTRY (STRCPY)
70 ENTRANCE
71 mov STR1(%esp), %edx
72 mov STR2(%esp), %ecx
73 # ifdef USE_AS_STRNCPY
74 movl LEN(%esp), %ebx
75 test %ebx, %ebx
76 jz L(ExitTail0)
77 cmp $8, %ebx
78 jbe L(StrncpyExit8Bytes)
79 # endif
80 cmpb $0, (%ecx)
81 jz L(ExitTail1)
82 cmpb $0, 1(%ecx)
83 jz L(ExitTail2)
84 cmpb $0, 2(%ecx)
85 jz L(ExitTail3)
86 cmpb $0, 3(%ecx)
87 jz L(ExitTail4)
88 cmpb $0, 4(%ecx)
89 jz L(ExitTail5)
90 cmpb $0, 5(%ecx)
91 jz L(ExitTail6)
92 cmpb $0, 6(%ecx)
93 jz L(ExitTail7)
94 cmpb $0, 7(%ecx)
95 jz L(ExitTail8)
96 # ifdef USE_AS_STRNCPY
97 cmp $16, %ebx
98 jb L(StrncpyExit15Bytes)
99 # endif
100 cmpb $0, 8(%ecx)
101 jz L(ExitTail9)
102 cmpb $0, 9(%ecx)
103 jz L(ExitTail10)
104 cmpb $0, 10(%ecx)
105 jz L(ExitTail11)
106 cmpb $0, 11(%ecx)
107 jz L(ExitTail12)
108 cmpb $0, 12(%ecx)
109 jz L(ExitTail13)
110 cmpb $0, 13(%ecx)
111 jz L(ExitTail14)
112 cmpb $0, 14(%ecx)
113 jz L(ExitTail15)
114 # ifdef USE_AS_STRNCPY
115 cmp $16, %ebx
116 je L(ExitTail16)
117 # endif
118 cmpb $0, 15(%ecx)
119 jz L(ExitTail16)
120
121 PUSH (%edi)
122 mov %edx, %edi
123 PUSH (%esi)
124 # ifdef USE_AS_STRNCPY
125 mov %ecx, %esi
126 and $0xf, %esi
127
128 /* add 16 bytes ecx_shift to ebx */
129
130 add %esi, %ebx
131 # endif
132 lea 16(%ecx), %esi
133 /* Now:
134 esi = alignment_16(ecx) + ecx_shift + 16;
135 ecx_shift = ecx - alignment_16(ecx)
136 */
137 and $-16, %esi
138 /* Now:
139 esi = alignment_16(ecx) + 16
140 */
141 pxor %xmm0, %xmm0
142 movlpd (%ecx), %xmm1
143 movlpd %xmm1, (%edx)
144 /*
145 look if there is zero symbol in next 16 bytes of string
146 from esi to esi + 15 and form mask in xmm0
147 */
148 pcmpeqb (%esi), %xmm0
149 movlpd 8(%ecx), %xmm1
150 movlpd %xmm1, 8(%edx)
151
152 /* convert byte mask in xmm0 to bit mask */
153
154 pmovmskb %xmm0, %eax
155 sub %ecx, %esi
156
157 /* esi = 16 - ecx_shift */
158
159 /* eax = 0: there isn't end of string from position esi to esi+15 */
160
161 # ifdef USE_AS_STRNCPY
162 sub $32, %ebx
163 jbe L(CopyFrom1To16BytesCase2OrCase3)
164 # endif
165 test %eax, %eax
166 jnz L(CopyFrom1To16Bytes)
167
168 mov %edx, %eax
169 lea 16(%edx), %edx
170 /* Now:
171 edx = edx + 16 = alignment_16(edx) + edx_shift + 16
172 */
173 and $-16, %edx
174
175 /* Now: edx = alignment_16(edx) + 16 */
176
177 sub %edx, %eax
178
179 /* Now: eax = edx_shift - 16 */
180
181 # ifdef USE_AS_STRNCPY
182 add %eax, %esi
183 lea -1(%esi), %esi
184 and $1<<31, %esi
185 test %esi, %esi
186 jnz L(ContinueCopy)
187 lea 16(%ebx), %ebx
188
189 L(ContinueCopy):
190 # endif
191 sub %eax, %ecx
192 /* Now:
193 case ecx_shift >= edx_shift:
194 ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16
195 case ecx_shift < edx_shift:
196 ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift)
197 */
198 mov %ecx, %eax
199 and $0xf, %eax
200 /* Now:
201 case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift
202 case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift)
203 eax can be 0, 1, ..., 15
204 */
205 mov $0, %esi
206
207 /* case: ecx_shift == edx_shift */
208
209 jz L(Align16Both)
210
211 cmp $8, %eax
212 jae L(ShlHigh8)
213 cmp $1, %eax
214 je L(Shl1)
215 cmp $2, %eax
216 je L(Shl2)
217 cmp $3, %eax
218 je L(Shl3)
219 cmp $4, %eax
220 je L(Shl4)
221 cmp $5, %eax
222 je L(Shl5)
223 cmp $6, %eax
224 je L(Shl6)
225 jmp L(Shl7)
226
227 L(ShlHigh8):
228 je L(Shl8)
229 cmp $9, %eax
230 je L(Shl9)
231 cmp $10, %eax
232 je L(Shl10)
233 cmp $11, %eax
234 je L(Shl11)
235 cmp $12, %eax
236 je L(Shl12)
237 cmp $13, %eax
238 je L(Shl13)
239 cmp $14, %eax
240 je L(Shl14)
241 jmp L(Shl15)
242
243 L(Align16Both):
244 movaps (%ecx), %xmm1
245 movaps 16(%ecx), %xmm2
246 movaps %xmm1, (%edx)
247 pcmpeqb %xmm2, %xmm0
248 pmovmskb %xmm0, %eax
249 lea 16(%esi), %esi
250 # ifdef USE_AS_STRNCPY
251 sub $16, %ebx
252 jbe L(CopyFrom1To16BytesCase2OrCase3)
253 # endif
254 test %eax, %eax
255 jnz L(CopyFrom1To16Bytes)
256
257 movaps 16(%ecx, %esi), %xmm3
258 movaps %xmm2, (%edx, %esi)
259 pcmpeqb %xmm3, %xmm0
260 pmovmskb %xmm0, %eax
261 lea 16(%esi), %esi
262 # ifdef USE_AS_STRNCPY
263 sub $16, %ebx
264 jbe L(CopyFrom1To16BytesCase2OrCase3)
265 # endif
266 test %eax, %eax
267 jnz L(CopyFrom1To16Bytes)
268
269 movaps 16(%ecx, %esi), %xmm4
270 movaps %xmm3, (%edx, %esi)
271 pcmpeqb %xmm4, %xmm0
272 pmovmskb %xmm0, %eax
273 lea 16(%esi), %esi
274 # ifdef USE_AS_STRNCPY
275 sub $16, %ebx
276 jbe L(CopyFrom1To16BytesCase2OrCase3)
277 # endif
278 test %eax, %eax
279 jnz L(CopyFrom1To16Bytes)
280
281 movaps 16(%ecx, %esi), %xmm1
282 movaps %xmm4, (%edx, %esi)
283 pcmpeqb %xmm1, %xmm0
284 pmovmskb %xmm0, %eax
285 lea 16(%esi), %esi
286 # ifdef USE_AS_STRNCPY
287 sub $16, %ebx
288 jbe L(CopyFrom1To16BytesCase2OrCase3)
289 # endif
290 test %eax, %eax
291 jnz L(CopyFrom1To16Bytes)
292
293 movaps 16(%ecx, %esi), %xmm2
294 movaps %xmm1, (%edx, %esi)
295 pcmpeqb %xmm2, %xmm0
296 pmovmskb %xmm0, %eax
297 lea 16(%esi), %esi
298 # ifdef USE_AS_STRNCPY
299 sub $16, %ebx
300 jbe L(CopyFrom1To16BytesCase2OrCase3)
301 # endif
302 test %eax, %eax
303 jnz L(CopyFrom1To16Bytes)
304
305 movaps 16(%ecx, %esi), %xmm3
306 movaps %xmm2, (%edx, %esi)
307 pcmpeqb %xmm3, %xmm0
308 pmovmskb %xmm0, %eax
309 lea 16(%esi), %esi
310 # ifdef USE_AS_STRNCPY
311 sub $16, %ebx
312 jbe L(CopyFrom1To16BytesCase2OrCase3)
313 # endif
314 test %eax, %eax
315 jnz L(CopyFrom1To16Bytes)
316
317 movaps %xmm3, (%edx, %esi)
318 mov %ecx, %eax
319 lea 16(%ecx, %esi), %ecx
320 and $-0x40, %ecx
321 sub %ecx, %eax
322 sub %eax, %edx
323 # ifdef USE_AS_STRNCPY
324 lea 48+64(%ebx, %eax), %ebx
325 # endif
326 mov $-0x40, %esi
327
328 L(Aligned64Loop):
329 movaps (%ecx), %xmm2
330 movaps 32(%ecx), %xmm3
331 movaps %xmm2, %xmm4
332 movaps 16(%ecx), %xmm5
333 movaps %xmm3, %xmm6
334 movaps 48(%ecx), %xmm7
335 pminub %xmm5, %xmm2
336 pminub %xmm7, %xmm3
337 pminub %xmm2, %xmm3
338 lea 64(%edx), %edx
339 pcmpeqb %xmm0, %xmm3
340 lea 64(%ecx), %ecx
341 pmovmskb %xmm3, %eax
342 # ifdef USE_AS_STRNCPY
343 sub $64, %ebx
344 jbe L(StrncpyLeaveCase2OrCase3)
345 # endif
346 test %eax, %eax
347 jnz L(Aligned64Leave)
348 movaps %xmm4, -64(%edx)
349 movaps %xmm5, -48(%edx)
350 movaps %xmm6, -32(%edx)
351 movaps %xmm7, -16(%edx)
352 jmp L(Aligned64Loop)
353
354 L(Aligned64Leave):
355 # ifdef USE_AS_STRNCPY
356 lea 48(%ebx), %ebx
357 # endif
358 pcmpeqb %xmm4, %xmm0
359 pmovmskb %xmm0, %eax
360 test %eax, %eax
361 jnz L(CopyFrom1To16Bytes)
362
363 pcmpeqb %xmm5, %xmm0
364 # ifdef USE_AS_STRNCPY
365 lea -16(%ebx), %ebx
366 # endif
367 pmovmskb %xmm0, %eax
368 movaps %xmm4, -64(%edx)
369 test %eax, %eax
370 lea 16(%esi), %esi
371 jnz L(CopyFrom1To16Bytes)
372
373 pcmpeqb %xmm6, %xmm0
374 # ifdef USE_AS_STRNCPY
375 lea -16(%ebx), %ebx
376 # endif
377 pmovmskb %xmm0, %eax
378 movaps %xmm5, -48(%edx)
379 test %eax, %eax
380 lea 16(%esi), %esi
381 jnz L(CopyFrom1To16Bytes)
382
383 movaps %xmm6, -32(%edx)
384 pcmpeqb %xmm7, %xmm0
385 # ifdef USE_AS_STRNCPY
386 lea -16(%ebx), %ebx
387 # endif
388 pmovmskb %xmm0, %eax
389 lea 16(%esi), %esi
390 jmp L(CopyFrom1To16Bytes)
391
392 .p2align 4
393 L(Shl1):
394 movaps -1(%ecx), %xmm1
395 movaps 15(%ecx), %xmm2
396 L(Shl1Start):
397 pcmpeqb %xmm2, %xmm0
398 pmovmskb %xmm0, %eax
399 movaps %xmm2, %xmm3
400 # ifdef USE_AS_STRNCPY
401 sub $16, %ebx
402 jbe L(StrncpyExit1Case2OrCase3)
403 # endif
404 test %eax, %eax
405 jnz L(Shl1LoopExit)
406
407 palignr $1, %xmm1, %xmm2
408 movaps %xmm3, %xmm1
409 movaps %xmm2, (%edx)
410 movaps 31(%ecx), %xmm2
411
412 pcmpeqb %xmm2, %xmm0
413 lea 16(%edx), %edx
414 pmovmskb %xmm0, %eax
415 lea 16(%ecx), %ecx
416 movaps %xmm2, %xmm3
417 # ifdef USE_AS_STRNCPY
418 sub $16, %ebx
419 jbe L(StrncpyExit1Case2OrCase3)
420 # endif
421 test %eax, %eax
422 jnz L(Shl1LoopExit)
423
424 palignr $1, %xmm1, %xmm2
425 movaps %xmm2, (%edx)
426 movaps 31(%ecx), %xmm2
427 movaps %xmm3, %xmm1
428
429 pcmpeqb %xmm2, %xmm0
430 lea 16(%edx), %edx
431 pmovmskb %xmm0, %eax
432 lea 16(%ecx), %ecx
433 movaps %xmm2, %xmm3
434 # ifdef USE_AS_STRNCPY
435 sub $16, %ebx
436 jbe L(StrncpyExit1Case2OrCase3)
437 # endif
438 test %eax, %eax
439 jnz L(Shl1LoopExit)
440
441 palignr $1, %xmm1, %xmm2
442 movaps %xmm3, %xmm1
443 movaps %xmm2, (%edx)
444 movaps 31(%ecx), %xmm2
445
446 pcmpeqb %xmm2, %xmm0
447 lea 16(%edx), %edx
448 pmovmskb %xmm0, %eax
449 lea 16(%ecx), %ecx
450 movaps %xmm2, %xmm3
451 # ifdef USE_AS_STRNCPY
452 sub $16, %ebx
453 jbe L(StrncpyExit1Case2OrCase3)
454 # endif
455 test %eax, %eax
456 jnz L(Shl1LoopExit)
457
458 palignr $1, %xmm1, %xmm2
459 movaps %xmm3, %xmm1
460 movaps %xmm2, (%edx)
461 lea 31(%ecx), %ecx
462 lea 16(%edx), %edx
463
464 mov %ecx, %eax
465 and $-0x40, %ecx
466 sub %ecx, %eax
467 lea -15(%ecx), %ecx
468 sub %eax, %edx
469 # ifdef USE_AS_STRNCPY
470 add %eax, %ebx
471 # endif
472 movaps -1(%ecx), %xmm1
473
474 L(Shl1LoopStart):
475 movaps 15(%ecx), %xmm2
476 movaps 31(%ecx), %xmm3
477 movaps %xmm3, %xmm6
478 movaps 47(%ecx), %xmm4
479 movaps %xmm4, %xmm7
480 movaps 63(%ecx), %xmm5
481 pminub %xmm2, %xmm6
482 pminub %xmm5, %xmm7
483 pminub %xmm6, %xmm7
484 pcmpeqb %xmm0, %xmm7
485 pmovmskb %xmm7, %eax
486 movaps %xmm5, %xmm7
487 palignr $1, %xmm4, %xmm5
488 test %eax, %eax
489 palignr $1, %xmm3, %xmm4
490 jnz L(Shl1Start)
491 # ifdef USE_AS_STRNCPY
492 sub $64, %ebx
493 jbe L(StrncpyLeave1)
494 # endif
495 palignr $1, %xmm2, %xmm3
496 lea 64(%ecx), %ecx
497 palignr $1, %xmm1, %xmm2
498 movaps %xmm7, %xmm1
499 movaps %xmm5, 48(%edx)
500 movaps %xmm4, 32(%edx)
501 movaps %xmm3, 16(%edx)
502 movaps %xmm2, (%edx)
503 lea 64(%edx), %edx
504 jmp L(Shl1LoopStart)
505
506 L(Shl1LoopExit):
507 movaps (%edx), %xmm6
508 psrldq $15, %xmm6
509 mov $15, %esi
510 palignr $1, %xmm1, %xmm6
511 movaps %xmm6, (%edx)
512 jmp L(CopyFrom1To16Bytes)
513
514 .p2align 4
515 L(Shl2):
516 movaps -2(%ecx), %xmm1
517 movaps 14(%ecx), %xmm2
518 L(Shl2Start):
519 pcmpeqb %xmm2, %xmm0
520 pmovmskb %xmm0, %eax
521 movaps %xmm2, %xmm3
522 # ifdef USE_AS_STRNCPY
523 sub $16, %ebx
524 jbe L(StrncpyExit2Case2OrCase3)
525 # endif
526 test %eax, %eax
527 jnz L(Shl2LoopExit)
528
529 palignr $2, %xmm1, %xmm2
530 movaps %xmm3, %xmm1
531 movaps %xmm2, (%edx)
532 movaps 30(%ecx), %xmm2
533
534 pcmpeqb %xmm2, %xmm0
535 lea 16(%edx), %edx
536 pmovmskb %xmm0, %eax
537 lea 16(%ecx), %ecx
538 movaps %xmm2, %xmm3
539 # ifdef USE_AS_STRNCPY
540 sub $16, %ebx
541 jbe L(StrncpyExit2Case2OrCase3)
542 # endif
543 test %eax, %eax
544 jnz L(Shl2LoopExit)
545
546 palignr $2, %xmm1, %xmm2
547 movaps %xmm2, (%edx)
548 movaps 30(%ecx), %xmm2
549 movaps %xmm3, %xmm1
550
551 pcmpeqb %xmm2, %xmm0
552 lea 16(%edx), %edx
553 pmovmskb %xmm0, %eax
554 lea 16(%ecx), %ecx
555 movaps %xmm2, %xmm3
556 # ifdef USE_AS_STRNCPY
557 sub $16, %ebx
558 jbe L(StrncpyExit2Case2OrCase3)
559 # endif
560 test %eax, %eax
561 jnz L(Shl2LoopExit)
562
563 palignr $2, %xmm1, %xmm2
564 movaps %xmm3, %xmm1
565 movaps %xmm2, (%edx)
566 movaps 30(%ecx), %xmm2
567
568 pcmpeqb %xmm2, %xmm0
569 lea 16(%edx), %edx
570 pmovmskb %xmm0, %eax
571 lea 16(%ecx), %ecx
572 movaps %xmm2, %xmm3
573 # ifdef USE_AS_STRNCPY
574 sub $16, %ebx
575 jbe L(StrncpyExit2Case2OrCase3)
576 # endif
577 test %eax, %eax
578 jnz L(Shl2LoopExit)
579
580 palignr $2, %xmm1, %xmm2
581 movaps %xmm3, %xmm1
582 movaps %xmm2, (%edx)
583 lea 30(%ecx), %ecx
584 lea 16(%edx), %edx
585
586 mov %ecx, %eax
587 and $-0x40, %ecx
588 sub %ecx, %eax
589 lea -14(%ecx), %ecx
590 sub %eax, %edx
591 # ifdef USE_AS_STRNCPY
592 add %eax, %ebx
593 # endif
594 movaps -2(%ecx), %xmm1
595
596 L(Shl2LoopStart):
597 movaps 14(%ecx), %xmm2
598 movaps 30(%ecx), %xmm3
599 movaps %xmm3, %xmm6
600 movaps 46(%ecx), %xmm4
601 movaps %xmm4, %xmm7
602 movaps 62(%ecx), %xmm5
603 pminub %xmm2, %xmm6
604 pminub %xmm5, %xmm7
605 pminub %xmm6, %xmm7
606 pcmpeqb %xmm0, %xmm7
607 pmovmskb %xmm7, %eax
608 movaps %xmm5, %xmm7
609 palignr $2, %xmm4, %xmm5
610 test %eax, %eax
611 palignr $2, %xmm3, %xmm4
612 jnz L(Shl2Start)
613 # ifdef USE_AS_STRNCPY
614 sub $64, %ebx
615 jbe L(StrncpyLeave2)
616 # endif
617 palignr $2, %xmm2, %xmm3
618 lea 64(%ecx), %ecx
619 palignr $2, %xmm1, %xmm2
620 movaps %xmm7, %xmm1
621 movaps %xmm5, 48(%edx)
622 movaps %xmm4, 32(%edx)
623 movaps %xmm3, 16(%edx)
624 movaps %xmm2, (%edx)
625 lea 64(%edx), %edx
626 jmp L(Shl2LoopStart)
627
628 L(Shl2LoopExit):
629 movaps (%edx), %xmm6
630 psrldq $14, %xmm6
631 mov $14, %esi
632 palignr $2, %xmm1, %xmm6
633 movaps %xmm6, (%edx)
634 jmp L(CopyFrom1To16Bytes)
635
636 .p2align 4
637 L(Shl3):
638 movaps -3(%ecx), %xmm1
639 movaps 13(%ecx), %xmm2
640 L(Shl3Start):
641 pcmpeqb %xmm2, %xmm0
642 pmovmskb %xmm0, %eax
643 movaps %xmm2, %xmm3
644 # ifdef USE_AS_STRNCPY
645 sub $16, %ebx
646 jbe L(StrncpyExit3Case2OrCase3)
647 # endif
648 test %eax, %eax
649 jnz L(Shl3LoopExit)
650
651 palignr $3, %xmm1, %xmm2
652 movaps %xmm3, %xmm1
653 movaps %xmm2, (%edx)
654 movaps 29(%ecx), %xmm2
655
656 pcmpeqb %xmm2, %xmm0
657 lea 16(%edx), %edx
658 pmovmskb %xmm0, %eax
659 lea 16(%ecx), %ecx
660 movaps %xmm2, %xmm3
661 # ifdef USE_AS_STRNCPY
662 sub $16, %ebx
663 jbe L(StrncpyExit3Case2OrCase3)
664 # endif
665 test %eax, %eax
666 jnz L(Shl3LoopExit)
667
668 palignr $3, %xmm1, %xmm2
669 movaps %xmm2, (%edx)
670 movaps 29(%ecx), %xmm2
671 movaps %xmm3, %xmm1
672
673 pcmpeqb %xmm2, %xmm0
674 lea 16(%edx), %edx
675 pmovmskb %xmm0, %eax
676 lea 16(%ecx), %ecx
677 movaps %xmm2, %xmm3
678 # ifdef USE_AS_STRNCPY
679 sub $16, %ebx
680 jbe L(StrncpyExit3Case2OrCase3)
681 # endif
682 test %eax, %eax
683 jnz L(Shl3LoopExit)
684
685 palignr $3, %xmm1, %xmm2
686 movaps %xmm3, %xmm1
687 movaps %xmm2, (%edx)
688 movaps 29(%ecx), %xmm2
689
690 pcmpeqb %xmm2, %xmm0
691 lea 16(%edx), %edx
692 pmovmskb %xmm0, %eax
693 lea 16(%ecx), %ecx
694 movaps %xmm2, %xmm3
695 # ifdef USE_AS_STRNCPY
696 sub $16, %ebx
697 jbe L(StrncpyExit3Case2OrCase3)
698 # endif
699 test %eax, %eax
700 jnz L(Shl3LoopExit)
701
702 palignr $3, %xmm1, %xmm2
703 movaps %xmm3, %xmm1
704 movaps %xmm2, (%edx)
705 lea 29(%ecx), %ecx
706 lea 16(%edx), %edx
707
708 mov %ecx, %eax
709 and $-0x40, %ecx
710 sub %ecx, %eax
711 lea -13(%ecx), %ecx
712 sub %eax, %edx
713 # ifdef USE_AS_STRNCPY
714 add %eax, %ebx
715 # endif
716 movaps -3(%ecx), %xmm1
717
718 L(Shl3LoopStart):
719 movaps 13(%ecx), %xmm2
720 movaps 29(%ecx), %xmm3
721 movaps %xmm3, %xmm6
722 movaps 45(%ecx), %xmm4
723 movaps %xmm4, %xmm7
724 movaps 61(%ecx), %xmm5
725 pminub %xmm2, %xmm6
726 pminub %xmm5, %xmm7
727 pminub %xmm6, %xmm7
728 pcmpeqb %xmm0, %xmm7
729 pmovmskb %xmm7, %eax
730 movaps %xmm5, %xmm7
731 palignr $3, %xmm4, %xmm5
732 test %eax, %eax
733 palignr $3, %xmm3, %xmm4
734 jnz L(Shl3Start)
735 # ifdef USE_AS_STRNCPY
736 sub $64, %ebx
737 jbe L(StrncpyLeave3)
738 # endif
739 palignr $3, %xmm2, %xmm3
740 lea 64(%ecx), %ecx
741 palignr $3, %xmm1, %xmm2
742 movaps %xmm7, %xmm1
743 movaps %xmm5, 48(%edx)
744 movaps %xmm4, 32(%edx)
745 movaps %xmm3, 16(%edx)
746 movaps %xmm2, (%edx)
747 lea 64(%edx), %edx
748 jmp L(Shl3LoopStart)
749
750 L(Shl3LoopExit):
751 movaps (%edx), %xmm6
752 psrldq $13, %xmm6
753 mov $13, %esi
754 palignr $3, %xmm1, %xmm6
755 movaps %xmm6, (%edx)
756 jmp L(CopyFrom1To16Bytes)
757
758 .p2align 4
759 L(Shl4):
760 movaps -4(%ecx), %xmm1
761 movaps 12(%ecx), %xmm2
762 L(Shl4Start):
763 pcmpeqb %xmm2, %xmm0
764 pmovmskb %xmm0, %eax
765 movaps %xmm2, %xmm3
766 # ifdef USE_AS_STRNCPY
767 sub $16, %ebx
768 jbe L(StrncpyExit4Case2OrCase3)
769 # endif
770 test %eax, %eax
771 jnz L(Shl4LoopExit)
772
773 palignr $4, %xmm1, %xmm2
774 movaps %xmm3, %xmm1
775 movaps %xmm2, (%edx)
776 movaps 28(%ecx), %xmm2
777
778 pcmpeqb %xmm2, %xmm0
779 lea 16(%edx), %edx
780 pmovmskb %xmm0, %eax
781 lea 16(%ecx), %ecx
782 movaps %xmm2, %xmm3
783 # ifdef USE_AS_STRNCPY
784 sub $16, %ebx
785 jbe L(StrncpyExit4Case2OrCase3)
786 # endif
787 test %eax, %eax
788 jnz L(Shl4LoopExit)
789
790 palignr $4, %xmm1, %xmm2
791 movaps %xmm2, (%edx)
792 movaps 28(%ecx), %xmm2
793 movaps %xmm3, %xmm1
794
795 pcmpeqb %xmm2, %xmm0
796 lea 16(%edx), %edx
797 pmovmskb %xmm0, %eax
798 lea 16(%ecx), %ecx
799 movaps %xmm2, %xmm3
800 # ifdef USE_AS_STRNCPY
801 sub $16, %ebx
802 jbe L(StrncpyExit4Case2OrCase3)
803 # endif
804 test %eax, %eax
805 jnz L(Shl4LoopExit)
806
807 palignr $4, %xmm1, %xmm2
808 movaps %xmm3, %xmm1
809 movaps %xmm2, (%edx)
810 movaps 28(%ecx), %xmm2
811
812 pcmpeqb %xmm2, %xmm0
813 lea 16(%edx), %edx
814 pmovmskb %xmm0, %eax
815 lea 16(%ecx), %ecx
816 movaps %xmm2, %xmm3
817 # ifdef USE_AS_STRNCPY
818 sub $16, %ebx
819 jbe L(StrncpyExit4Case2OrCase3)
820 # endif
821 test %eax, %eax
822 jnz L(Shl4LoopExit)
823
824 palignr $4, %xmm1, %xmm2
825 movaps %xmm3, %xmm1
826 movaps %xmm2, (%edx)
827 lea 28(%ecx), %ecx
828 lea 16(%edx), %edx
829
830 mov %ecx, %eax
831 and $-0x40, %ecx
832 sub %ecx, %eax
833 lea -12(%ecx), %ecx
834 sub %eax, %edx
835 # ifdef USE_AS_STRNCPY
836 add %eax, %ebx
837 # endif
838 movaps -4(%ecx), %xmm1
839
840 L(Shl4LoopStart):
841 movaps 12(%ecx), %xmm2
842 movaps 28(%ecx), %xmm3
843 movaps %xmm3, %xmm6
844 movaps 44(%ecx), %xmm4
845 movaps %xmm4, %xmm7
846 movaps 60(%ecx), %xmm5
847 pminub %xmm2, %xmm6
848 pminub %xmm5, %xmm7
849 pminub %xmm6, %xmm7
850 pcmpeqb %xmm0, %xmm7
851 pmovmskb %xmm7, %eax
852 movaps %xmm5, %xmm7
853 palignr $4, %xmm4, %xmm5
854 test %eax, %eax
855 palignr $4, %xmm3, %xmm4
856 jnz L(Shl4Start)
857 # ifdef USE_AS_STRNCPY
858 sub $64, %ebx
859 jbe L(StrncpyLeave4)
860 # endif
861 palignr $4, %xmm2, %xmm3
862 lea 64(%ecx), %ecx
863 palignr $4, %xmm1, %xmm2
864 movaps %xmm7, %xmm1
865 movaps %xmm5, 48(%edx)
866 movaps %xmm4, 32(%edx)
867 movaps %xmm3, 16(%edx)
868 movaps %xmm2, (%edx)
869 lea 64(%edx), %edx
870 jmp L(Shl4LoopStart)
871
872 L(Shl4LoopExit):
873 movaps (%edx), %xmm6
874 psrldq $12, %xmm6
875 mov $12, %esi
876 palignr $4, %xmm1, %xmm6
877 movaps %xmm6, (%edx)
878 jmp L(CopyFrom1To16Bytes)
879
880 .p2align 4
881 L(Shl5):
882 movaps -5(%ecx), %xmm1
883 movaps 11(%ecx), %xmm2
884 L(Shl5Start):
885 pcmpeqb %xmm2, %xmm0
886 pmovmskb %xmm0, %eax
887 movaps %xmm2, %xmm3
888 # ifdef USE_AS_STRNCPY
889 sub $16, %ebx
890 jbe L(StrncpyExit5Case2OrCase3)
891 # endif
892 test %eax, %eax
893 jnz L(Shl5LoopExit)
894
895 palignr $5, %xmm1, %xmm2
896 movaps %xmm3, %xmm1
897 movaps %xmm2, (%edx)
898 movaps 27(%ecx), %xmm2
899
900 pcmpeqb %xmm2, %xmm0
901 lea 16(%edx), %edx
902 pmovmskb %xmm0, %eax
903 lea 16(%ecx), %ecx
904 movaps %xmm2, %xmm3
905 # ifdef USE_AS_STRNCPY
906 sub $16, %ebx
907 jbe L(StrncpyExit5Case2OrCase3)
908 # endif
909 test %eax, %eax
910 jnz L(Shl5LoopExit)
911
912 palignr $5, %xmm1, %xmm2
913 movaps %xmm2, (%edx)
914 movaps 27(%ecx), %xmm2
915 movaps %xmm3, %xmm1
916
917 pcmpeqb %xmm2, %xmm0
918 lea 16(%edx), %edx
919 pmovmskb %xmm0, %eax
920 lea 16(%ecx), %ecx
921 movaps %xmm2, %xmm3
922 # ifdef USE_AS_STRNCPY
923 sub $16, %ebx
924 jbe L(StrncpyExit5Case2OrCase3)
925 # endif
926 test %eax, %eax
927 jnz L(Shl5LoopExit)
928
929 palignr $5, %xmm1, %xmm2
930 movaps %xmm3, %xmm1
931 movaps %xmm2, (%edx)
932 movaps 27(%ecx), %xmm2
933
934 pcmpeqb %xmm2, %xmm0
935 lea 16(%edx), %edx
936 pmovmskb %xmm0, %eax
937 lea 16(%ecx), %ecx
938 movaps %xmm2, %xmm3
939 # ifdef USE_AS_STRNCPY
940 sub $16, %ebx
941 jbe L(StrncpyExit5Case2OrCase3)
942 # endif
943 test %eax, %eax
944 jnz L(Shl5LoopExit)
945
946 palignr $5, %xmm1, %xmm2
947 movaps %xmm3, %xmm1
948 movaps %xmm2, (%edx)
949 lea 27(%ecx), %ecx
950 lea 16(%edx), %edx
951
952 mov %ecx, %eax
953 and $-0x40, %ecx
954 sub %ecx, %eax
955 lea -11(%ecx), %ecx
956 sub %eax, %edx
957 # ifdef USE_AS_STRNCPY
958 add %eax, %ebx
959 # endif
960 movaps -5(%ecx), %xmm1
961
962 L(Shl5LoopStart):
963 movaps 11(%ecx), %xmm2
964 movaps 27(%ecx), %xmm3
965 movaps %xmm3, %xmm6
966 movaps 43(%ecx), %xmm4
967 movaps %xmm4, %xmm7
968 movaps 59(%ecx), %xmm5
969 pminub %xmm2, %xmm6
970 pminub %xmm5, %xmm7
971 pminub %xmm6, %xmm7
972 pcmpeqb %xmm0, %xmm7
973 pmovmskb %xmm7, %eax
974 movaps %xmm5, %xmm7
975 palignr $5, %xmm4, %xmm5
976 test %eax, %eax
977 palignr $5, %xmm3, %xmm4
978 jnz L(Shl5Start)
979 # ifdef USE_AS_STRNCPY
980 sub $64, %ebx
981 jbe L(StrncpyLeave5)
982 # endif
983 palignr $5, %xmm2, %xmm3
984 lea 64(%ecx), %ecx
985 palignr $5, %xmm1, %xmm2
986 movaps %xmm7, %xmm1
987 movaps %xmm5, 48(%edx)
988 movaps %xmm4, 32(%edx)
989 movaps %xmm3, 16(%edx)
990 movaps %xmm2, (%edx)
991 lea 64(%edx), %edx
992 jmp L(Shl5LoopStart)
993
994 L(Shl5LoopExit):
995 movaps (%edx), %xmm6
996 psrldq $11, %xmm6
997 mov $11, %esi
998 palignr $5, %xmm1, %xmm6
999 movaps %xmm6, (%edx)
1000 jmp L(CopyFrom1To16Bytes)
1001
1002 .p2align 4
1003 L(Shl6):
1004 movaps -6(%ecx), %xmm1
1005 movaps 10(%ecx), %xmm2
1006 L(Shl6Start):
1007 pcmpeqb %xmm2, %xmm0
1008 pmovmskb %xmm0, %eax
1009 movaps %xmm2, %xmm3
1010 # ifdef USE_AS_STRNCPY
1011 sub $16, %ebx
1012 jbe L(StrncpyExit6Case2OrCase3)
1013 # endif
1014 test %eax, %eax
1015 jnz L(Shl6LoopExit)
1016
1017 palignr $6, %xmm1, %xmm2
1018 movaps %xmm3, %xmm1
1019 movaps %xmm2, (%edx)
1020 movaps 26(%ecx), %xmm2
1021
1022 pcmpeqb %xmm2, %xmm0
1023 lea 16(%edx), %edx
1024 pmovmskb %xmm0, %eax
1025 lea 16(%ecx), %ecx
1026 movaps %xmm2, %xmm3
1027 # ifdef USE_AS_STRNCPY
1028 sub $16, %ebx
1029 jbe L(StrncpyExit6Case2OrCase3)
1030 # endif
1031 test %eax, %eax
1032 jnz L(Shl6LoopExit)
1033
1034 palignr $6, %xmm1, %xmm2
1035 movaps %xmm2, (%edx)
1036 movaps 26(%ecx), %xmm2
1037 movaps %xmm3, %xmm1
1038
1039 pcmpeqb %xmm2, %xmm0
1040 lea 16(%edx), %edx
1041 pmovmskb %xmm0, %eax
1042 lea 16(%ecx), %ecx
1043 movaps %xmm2, %xmm3
1044 # ifdef USE_AS_STRNCPY
1045 sub $16, %ebx
1046 jbe L(StrncpyExit6Case2OrCase3)
1047 # endif
1048 test %eax, %eax
1049 jnz L(Shl6LoopExit)
1050
1051 palignr $6, %xmm1, %xmm2
1052 movaps %xmm3, %xmm1
1053 movaps %xmm2, (%edx)
1054 movaps 26(%ecx), %xmm2
1055
1056 pcmpeqb %xmm2, %xmm0
1057 lea 16(%edx), %edx
1058 pmovmskb %xmm0, %eax
1059 lea 16(%ecx), %ecx
1060 movaps %xmm2, %xmm3
1061 # ifdef USE_AS_STRNCPY
1062 sub $16, %ebx
1063 jbe L(StrncpyExit6Case2OrCase3)
1064 # endif
1065 test %eax, %eax
1066 jnz L(Shl6LoopExit)
1067
1068 palignr $6, %xmm1, %xmm2
1069 movaps %xmm3, %xmm1
1070 movaps %xmm2, (%edx)
1071 lea 26(%ecx), %ecx
1072 lea 16(%edx), %edx
1073
1074 mov %ecx, %eax
1075 and $-0x40, %ecx
1076 sub %ecx, %eax
1077 lea -10(%ecx), %ecx
1078 sub %eax, %edx
1079 # ifdef USE_AS_STRNCPY
1080 add %eax, %ebx
1081 # endif
1082 movaps -6(%ecx), %xmm1
1083
1084 L(Shl6LoopStart):
1085 movaps 10(%ecx), %xmm2
1086 movaps 26(%ecx), %xmm3
1087 movaps %xmm3, %xmm6
1088 movaps 42(%ecx), %xmm4
1089 movaps %xmm4, %xmm7
1090 movaps 58(%ecx), %xmm5
1091 pminub %xmm2, %xmm6
1092 pminub %xmm5, %xmm7
1093 pminub %xmm6, %xmm7
1094 pcmpeqb %xmm0, %xmm7
1095 pmovmskb %xmm7, %eax
1096 movaps %xmm5, %xmm7
1097 palignr $6, %xmm4, %xmm5
1098 test %eax, %eax
1099 palignr $6, %xmm3, %xmm4
1100 jnz L(Shl6Start)
1101 # ifdef USE_AS_STRNCPY
1102 sub $64, %ebx
1103 jbe L(StrncpyLeave6)
1104 # endif
1105 palignr $6, %xmm2, %xmm3
1106 lea 64(%ecx), %ecx
1107 palignr $6, %xmm1, %xmm2
1108 movaps %xmm7, %xmm1
1109 movaps %xmm5, 48(%edx)
1110 movaps %xmm4, 32(%edx)
1111 movaps %xmm3, 16(%edx)
1112 movaps %xmm2, (%edx)
1113 lea 64(%edx), %edx
1114 jmp L(Shl6LoopStart)
1115
1116 L(Shl6LoopExit):
1117 movaps (%edx), %xmm6
1118 psrldq $10, %xmm6
1119 mov $10, %esi
1120 palignr $6, %xmm1, %xmm6
1121 movaps %xmm6, (%edx)
1122 jmp L(CopyFrom1To16Bytes)
1123
1124 .p2align 4
1125 L(Shl7):
1126 movaps -7(%ecx), %xmm1
1127 movaps 9(%ecx), %xmm2
1128 L(Shl7Start):
1129 pcmpeqb %xmm2, %xmm0
1130 pmovmskb %xmm0, %eax
1131 movaps %xmm2, %xmm3
1132 # ifdef USE_AS_STRNCPY
1133 sub $16, %ebx
1134 jbe L(StrncpyExit7Case2OrCase3)
1135 # endif
1136 test %eax, %eax
1137 jnz L(Shl7LoopExit)
1138
1139 palignr $7, %xmm1, %xmm2
1140 movaps %xmm3, %xmm1
1141 movaps %xmm2, (%edx)
1142 movaps 25(%ecx), %xmm2
1143
1144 pcmpeqb %xmm2, %xmm0
1145 lea 16(%edx), %edx
1146 pmovmskb %xmm0, %eax
1147 lea 16(%ecx), %ecx
1148 movaps %xmm2, %xmm3
1149 # ifdef USE_AS_STRNCPY
1150 sub $16, %ebx
1151 jbe L(StrncpyExit7Case2OrCase3)
1152 # endif
1153 test %eax, %eax
1154 jnz L(Shl7LoopExit)
1155
1156 palignr $7, %xmm1, %xmm2
1157 movaps %xmm2, (%edx)
1158 movaps 25(%ecx), %xmm2
1159 movaps %xmm3, %xmm1
1160
1161 pcmpeqb %xmm2, %xmm0
1162 lea 16(%edx), %edx
1163 pmovmskb %xmm0, %eax
1164 lea 16(%ecx), %ecx
1165 movaps %xmm2, %xmm3
1166 # ifdef USE_AS_STRNCPY
1167 sub $16, %ebx
1168 jbe L(StrncpyExit7Case2OrCase3)
1169 # endif
1170 test %eax, %eax
1171 jnz L(Shl7LoopExit)
1172
1173 palignr $7, %xmm1, %xmm2
1174 movaps %xmm3, %xmm1
1175 movaps %xmm2, (%edx)
1176 movaps 25(%ecx), %xmm2
1177
1178 pcmpeqb %xmm2, %xmm0
1179 lea 16(%edx), %edx
1180 pmovmskb %xmm0, %eax
1181 lea 16(%ecx), %ecx
1182 movaps %xmm2, %xmm3
1183 # ifdef USE_AS_STRNCPY
1184 sub $16, %ebx
1185 jbe L(StrncpyExit7Case2OrCase3)
1186 # endif
1187 test %eax, %eax
1188 jnz L(Shl7LoopExit)
1189
1190 palignr $7, %xmm1, %xmm2
1191 movaps %xmm3, %xmm1
1192 movaps %xmm2, (%edx)
1193 lea 25(%ecx), %ecx
1194 lea 16(%edx), %edx
1195
1196 mov %ecx, %eax
1197 and $-0x40, %ecx
1198 sub %ecx, %eax
1199 lea -9(%ecx), %ecx
1200 sub %eax, %edx
1201 # ifdef USE_AS_STRNCPY
1202 add %eax, %ebx
1203 # endif
1204 movaps -7(%ecx), %xmm1
1205
1206 L(Shl7LoopStart):
1207 movaps 9(%ecx), %xmm2
1208 movaps 25(%ecx), %xmm3
1209 movaps %xmm3, %xmm6
1210 movaps 41(%ecx), %xmm4
1211 movaps %xmm4, %xmm7
1212 movaps 57(%ecx), %xmm5
1213 pminub %xmm2, %xmm6
1214 pminub %xmm5, %xmm7
1215 pminub %xmm6, %xmm7
1216 pcmpeqb %xmm0, %xmm7
1217 pmovmskb %xmm7, %eax
1218 movaps %xmm5, %xmm7
1219 palignr $7, %xmm4, %xmm5
1220 test %eax, %eax
1221 palignr $7, %xmm3, %xmm4
1222 jnz L(Shl7Start)
1223 # ifdef USE_AS_STRNCPY
1224 sub $64, %ebx
1225 jbe L(StrncpyLeave7)
1226 # endif
1227 palignr $7, %xmm2, %xmm3
1228 lea 64(%ecx), %ecx
1229 palignr $7, %xmm1, %xmm2
1230 movaps %xmm7, %xmm1
1231 movaps %xmm5, 48(%edx)
1232 movaps %xmm4, 32(%edx)
1233 movaps %xmm3, 16(%edx)
1234 movaps %xmm2, (%edx)
1235 lea 64(%edx), %edx
1236 jmp L(Shl7LoopStart)
1237
1238 L(Shl7LoopExit):
1239 movaps (%edx), %xmm6
1240 psrldq $9, %xmm6
1241 mov $9, %esi
1242 palignr $7, %xmm1, %xmm6
1243 movaps %xmm6, (%edx)
1244 jmp L(CopyFrom1To16Bytes)
1245
1246 .p2align 4
1247 L(Shl8):
1248 movaps -8(%ecx), %xmm1
1249 movaps 8(%ecx), %xmm2
1250 L(Shl8Start):
1251 pcmpeqb %xmm2, %xmm0
1252 pmovmskb %xmm0, %eax
1253 movaps %xmm2, %xmm3
1254 # ifdef USE_AS_STRNCPY
1255 sub $16, %ebx
1256 jbe L(StrncpyExit8Case2OrCase3)
1257 # endif
1258 test %eax, %eax
1259 jnz L(Shl8LoopExit)
1260
1261 palignr $8, %xmm1, %xmm2
1262 movaps %xmm3, %xmm1
1263 movaps %xmm2, (%edx)
1264 movaps 24(%ecx), %xmm2
1265
1266 pcmpeqb %xmm2, %xmm0
1267 lea 16(%edx), %edx
1268 pmovmskb %xmm0, %eax
1269 lea 16(%ecx), %ecx
1270 movaps %xmm2, %xmm3
1271 # ifdef USE_AS_STRNCPY
1272 sub $16, %ebx
1273 jbe L(StrncpyExit8Case2OrCase3)
1274 # endif
1275 test %eax, %eax
1276 jnz L(Shl8LoopExit)
1277
1278 palignr $8, %xmm1, %xmm2
1279 movaps %xmm2, (%edx)
1280 movaps 24(%ecx), %xmm2
1281 movaps %xmm3, %xmm1
1282
1283 pcmpeqb %xmm2, %xmm0
1284 lea 16(%edx), %edx
1285 pmovmskb %xmm0, %eax
1286 lea 16(%ecx), %ecx
1287 movaps %xmm2, %xmm3
1288 # ifdef USE_AS_STRNCPY
1289 sub $16, %ebx
1290 jbe L(StrncpyExit8Case2OrCase3)
1291 # endif
1292 test %eax, %eax
1293 jnz L(Shl8LoopExit)
1294
1295 palignr $8, %xmm1, %xmm2
1296 movaps %xmm3, %xmm1
1297 movaps %xmm2, (%edx)
1298 movaps 24(%ecx), %xmm2
1299
1300 pcmpeqb %xmm2, %xmm0
1301 lea 16(%edx), %edx
1302 pmovmskb %xmm0, %eax
1303 lea 16(%ecx), %ecx
1304 movaps %xmm2, %xmm3
1305 # ifdef USE_AS_STRNCPY
1306 sub $16, %ebx
1307 jbe L(StrncpyExit8Case2OrCase3)
1308 # endif
1309 test %eax, %eax
1310 jnz L(Shl8LoopExit)
1311
1312 palignr $8, %xmm1, %xmm2
1313 movaps %xmm3, %xmm1
1314 movaps %xmm2, (%edx)
1315 lea 24(%ecx), %ecx
1316 lea 16(%edx), %edx
1317
1318 mov %ecx, %eax
1319 and $-0x40, %ecx
1320 sub %ecx, %eax
1321 lea -8(%ecx), %ecx
1322 sub %eax, %edx
1323 # ifdef USE_AS_STRNCPY
1324 add %eax, %ebx
1325 # endif
1326 movaps -8(%ecx), %xmm1
1327
1328 L(Shl8LoopStart):
1329 movaps 8(%ecx), %xmm2
1330 movaps 24(%ecx), %xmm3
1331 movaps %xmm3, %xmm6
1332 movaps 40(%ecx), %xmm4
1333 movaps %xmm4, %xmm7
1334 movaps 56(%ecx), %xmm5
1335 pminub %xmm2, %xmm6
1336 pminub %xmm5, %xmm7
1337 pminub %xmm6, %xmm7
1338 pcmpeqb %xmm0, %xmm7
1339 pmovmskb %xmm7, %eax
1340 movaps %xmm5, %xmm7
1341 palignr $8, %xmm4, %xmm5
1342 test %eax, %eax
1343 palignr $8, %xmm3, %xmm4
1344 jnz L(Shl8Start)
1345 # ifdef USE_AS_STRNCPY
1346 sub $64, %ebx
1347 jbe L(StrncpyLeave8)
1348 # endif
1349 palignr $8, %xmm2, %xmm3
1350 lea 64(%ecx), %ecx
1351 palignr $8, %xmm1, %xmm2
1352 movaps %xmm7, %xmm1
1353 movaps %xmm5, 48(%edx)
1354 movaps %xmm4, 32(%edx)
1355 movaps %xmm3, 16(%edx)
1356 movaps %xmm2, (%edx)
1357 lea 64(%edx), %edx
1358 jmp L(Shl8LoopStart)
1359
1360 L(Shl8LoopExit):
1361 movaps (%edx), %xmm6
1362 psrldq $8, %xmm6
1363 mov $8, %esi
1364 palignr $8, %xmm1, %xmm6
1365 movaps %xmm6, (%edx)
1366 jmp L(CopyFrom1To16Bytes)
1367
1368 .p2align 4
1369 L(Shl9):
1370 movaps -9(%ecx), %xmm1
1371 movaps 7(%ecx), %xmm2
1372 L(Shl9Start):
1373 pcmpeqb %xmm2, %xmm0
1374 pmovmskb %xmm0, %eax
1375 movaps %xmm2, %xmm3
1376 # ifdef USE_AS_STRNCPY
1377 sub $16, %ebx
1378 jbe L(StrncpyExit9Case2OrCase3)
1379 # endif
1380 test %eax, %eax
1381 jnz L(Shl9LoopExit)
1382
1383 palignr $9, %xmm1, %xmm2
1384 movaps %xmm3, %xmm1
1385 movaps %xmm2, (%edx)
1386 movaps 23(%ecx), %xmm2
1387
1388 pcmpeqb %xmm2, %xmm0
1389 lea 16(%edx), %edx
1390 pmovmskb %xmm0, %eax
1391 lea 16(%ecx), %ecx
1392 movaps %xmm2, %xmm3
1393 # ifdef USE_AS_STRNCPY
1394 sub $16, %ebx
1395 jbe L(StrncpyExit9Case2OrCase3)
1396 # endif
1397 test %eax, %eax
1398 jnz L(Shl9LoopExit)
1399
1400 palignr $9, %xmm1, %xmm2
1401 movaps %xmm2, (%edx)
1402 movaps 23(%ecx), %xmm2
1403 movaps %xmm3, %xmm1
1404
1405 pcmpeqb %xmm2, %xmm0
1406 lea 16(%edx), %edx
1407 pmovmskb %xmm0, %eax
1408 lea 16(%ecx), %ecx
1409 movaps %xmm2, %xmm3
1410 # ifdef USE_AS_STRNCPY
1411 sub $16, %ebx
1412 jbe L(StrncpyExit9Case2OrCase3)
1413 # endif
1414 test %eax, %eax
1415 jnz L(Shl9LoopExit)
1416
1417 palignr $9, %xmm1, %xmm2
1418 movaps %xmm3, %xmm1
1419 movaps %xmm2, (%edx)
1420 movaps 23(%ecx), %xmm2
1421
1422 pcmpeqb %xmm2, %xmm0
1423 lea 16(%edx), %edx
1424 pmovmskb %xmm0, %eax
1425 lea 16(%ecx), %ecx
1426 movaps %xmm2, %xmm3
1427 # ifdef USE_AS_STRNCPY
1428 sub $16, %ebx
1429 jbe L(StrncpyExit9Case2OrCase3)
1430 # endif
1431 test %eax, %eax
1432 jnz L(Shl9LoopExit)
1433
1434 palignr $9, %xmm1, %xmm2
1435 movaps %xmm3, %xmm1
1436 movaps %xmm2, (%edx)
1437 lea 23(%ecx), %ecx
1438 lea 16(%edx), %edx
1439
1440 mov %ecx, %eax
1441 and $-0x40, %ecx
1442 sub %ecx, %eax
1443 lea -7(%ecx), %ecx
1444 sub %eax, %edx
1445 # ifdef USE_AS_STRNCPY
1446 add %eax, %ebx
1447 # endif
1448 movaps -9(%ecx), %xmm1
1449
1450 L(Shl9LoopStart):
1451 movaps 7(%ecx), %xmm2
1452 movaps 23(%ecx), %xmm3
1453 movaps %xmm3, %xmm6
1454 movaps 39(%ecx), %xmm4
1455 movaps %xmm4, %xmm7
1456 movaps 55(%ecx), %xmm5
1457 pminub %xmm2, %xmm6
1458 pminub %xmm5, %xmm7
1459 pminub %xmm6, %xmm7
1460 pcmpeqb %xmm0, %xmm7
1461 pmovmskb %xmm7, %eax
1462 movaps %xmm5, %xmm7
1463 palignr $9, %xmm4, %xmm5
1464 test %eax, %eax
1465 palignr $9, %xmm3, %xmm4
1466 jnz L(Shl9Start)
1467 # ifdef USE_AS_STRNCPY
1468 sub $64, %ebx
1469 jbe L(StrncpyLeave9)
1470 # endif
1471 palignr $9, %xmm2, %xmm3
1472 lea 64(%ecx), %ecx
1473 palignr $9, %xmm1, %xmm2
1474 movaps %xmm7, %xmm1
1475 movaps %xmm5, 48(%edx)
1476 movaps %xmm4, 32(%edx)
1477 movaps %xmm3, 16(%edx)
1478 movaps %xmm2, (%edx)
1479 lea 64(%edx), %edx
1480 jmp L(Shl9LoopStart)
1481
1482 L(Shl9LoopExit):
1483 movaps (%edx), %xmm6
1484 psrldq $7, %xmm6
1485 mov $7, %esi
1486 palignr $9, %xmm1, %xmm6
1487 movaps %xmm6, (%edx)
1488 jmp L(CopyFrom1To16Bytes)
1489
1490 .p2align 4
1491 L(Shl10):
1492 movaps -10(%ecx), %xmm1
1493 movaps 6(%ecx), %xmm2
1494 L(Shl10Start):
1495 pcmpeqb %xmm2, %xmm0
1496 pmovmskb %xmm0, %eax
1497 movaps %xmm2, %xmm3
1498 # ifdef USE_AS_STRNCPY
1499 sub $16, %ebx
1500 jbe L(StrncpyExit10Case2OrCase3)
1501 # endif
1502 test %eax, %eax
1503 jnz L(Shl10LoopExit)
1504
1505 palignr $10, %xmm1, %xmm2
1506 movaps %xmm3, %xmm1
1507 movaps %xmm2, (%edx)
1508 movaps 22(%ecx), %xmm2
1509
1510 pcmpeqb %xmm2, %xmm0
1511 lea 16(%edx), %edx
1512 pmovmskb %xmm0, %eax
1513 lea 16(%ecx), %ecx
1514 movaps %xmm2, %xmm3
1515 # ifdef USE_AS_STRNCPY
1516 sub $16, %ebx
1517 jbe L(StrncpyExit10Case2OrCase3)
1518 # endif
1519 test %eax, %eax
1520 jnz L(Shl10LoopExit)
1521
1522 palignr $10, %xmm1, %xmm2
1523 movaps %xmm2, (%edx)
1524 movaps 22(%ecx), %xmm2
1525 movaps %xmm3, %xmm1
1526
1527 pcmpeqb %xmm2, %xmm0
1528 lea 16(%edx), %edx
1529 pmovmskb %xmm0, %eax
1530 lea 16(%ecx), %ecx
1531 movaps %xmm2, %xmm3
1532 # ifdef USE_AS_STRNCPY
1533 sub $16, %ebx
1534 jbe L(StrncpyExit10Case2OrCase3)
1535 # endif
1536 test %eax, %eax
1537 jnz L(Shl10LoopExit)
1538
1539 palignr $10, %xmm1, %xmm2
1540 movaps %xmm3, %xmm1
1541 movaps %xmm2, (%edx)
1542 movaps 22(%ecx), %xmm2
1543
1544 pcmpeqb %xmm2, %xmm0
1545 lea 16(%edx), %edx
1546 pmovmskb %xmm0, %eax
1547 lea 16(%ecx), %ecx
1548 movaps %xmm2, %xmm3
1549 # ifdef USE_AS_STRNCPY
1550 sub $16, %ebx
1551 jbe L(StrncpyExit10Case2OrCase3)
1552 # endif
1553 test %eax, %eax
1554 jnz L(Shl10LoopExit)
1555
1556 palignr $10, %xmm1, %xmm2
1557 movaps %xmm3, %xmm1
1558 movaps %xmm2, (%edx)
1559 lea 22(%ecx), %ecx
1560 lea 16(%edx), %edx
1561
1562 mov %ecx, %eax
1563 and $-0x40, %ecx
1564 sub %ecx, %eax
1565 lea -6(%ecx), %ecx
1566 sub %eax, %edx
1567 # ifdef USE_AS_STRNCPY
1568 add %eax, %ebx
1569 # endif
1570 movaps -10(%ecx), %xmm1
1571
1572 L(Shl10LoopStart):
1573 movaps 6(%ecx), %xmm2
1574 movaps 22(%ecx), %xmm3
1575 movaps %xmm3, %xmm6
1576 movaps 38(%ecx), %xmm4
1577 movaps %xmm4, %xmm7
1578 movaps 54(%ecx), %xmm5
1579 pminub %xmm2, %xmm6
1580 pminub %xmm5, %xmm7
1581 pminub %xmm6, %xmm7
1582 pcmpeqb %xmm0, %xmm7
1583 pmovmskb %xmm7, %eax
1584 movaps %xmm5, %xmm7
1585 palignr $10, %xmm4, %xmm5
1586 test %eax, %eax
1587 palignr $10, %xmm3, %xmm4
1588 jnz L(Shl10Start)
1589 # ifdef USE_AS_STRNCPY
1590 sub $64, %ebx
1591 jbe L(StrncpyLeave10)
1592 # endif
1593 palignr $10, %xmm2, %xmm3
1594 lea 64(%ecx), %ecx
1595 palignr $10, %xmm1, %xmm2
1596 movaps %xmm7, %xmm1
1597 movaps %xmm5, 48(%edx)
1598 movaps %xmm4, 32(%edx)
1599 movaps %xmm3, 16(%edx)
1600 movaps %xmm2, (%edx)
1601 lea 64(%edx), %edx
1602 jmp L(Shl10LoopStart)
1603
1604 L(Shl10LoopExit):
1605 movaps (%edx), %xmm6
1606 psrldq $6, %xmm6
1607 mov $6, %esi
1608 palignr $10, %xmm1, %xmm6
1609 movaps %xmm6, (%edx)
1610 jmp L(CopyFrom1To16Bytes)
1611
1612 .p2align 4
1613 L(Shl11):
1614 movaps -11(%ecx), %xmm1
1615 movaps 5(%ecx), %xmm2
1616 L(Shl11Start):
1617 pcmpeqb %xmm2, %xmm0
1618 pmovmskb %xmm0, %eax
1619 movaps %xmm2, %xmm3
1620 # ifdef USE_AS_STRNCPY
1621 sub $16, %ebx
1622 jbe L(StrncpyExit11Case2OrCase3)
1623 # endif
1624 test %eax, %eax
1625 jnz L(Shl11LoopExit)
1626
1627 palignr $11, %xmm1, %xmm2
1628 movaps %xmm3, %xmm1
1629 movaps %xmm2, (%edx)
1630 movaps 21(%ecx), %xmm2
1631
1632 pcmpeqb %xmm2, %xmm0
1633 lea 16(%edx), %edx
1634 pmovmskb %xmm0, %eax
1635 lea 16(%ecx), %ecx
1636 movaps %xmm2, %xmm3
1637 # ifdef USE_AS_STRNCPY
1638 sub $16, %ebx
1639 jbe L(StrncpyExit11Case2OrCase3)
1640 # endif
1641 test %eax, %eax
1642 jnz L(Shl11LoopExit)
1643
1644 palignr $11, %xmm1, %xmm2
1645 movaps %xmm2, (%edx)
1646 movaps 21(%ecx), %xmm2
1647 movaps %xmm3, %xmm1
1648
1649 pcmpeqb %xmm2, %xmm0
1650 lea 16(%edx), %edx
1651 pmovmskb %xmm0, %eax
1652 lea 16(%ecx), %ecx
1653 movaps %xmm2, %xmm3
1654 # ifdef USE_AS_STRNCPY
1655 sub $16, %ebx
1656 jbe L(StrncpyExit11Case2OrCase3)
1657 # endif
1658 test %eax, %eax
1659 jnz L(Shl11LoopExit)
1660
1661 palignr $11, %xmm1, %xmm2
1662 movaps %xmm3, %xmm1
1663 movaps %xmm2, (%edx)
1664 movaps 21(%ecx), %xmm2
1665
1666 pcmpeqb %xmm2, %xmm0
1667 lea 16(%edx), %edx
1668 pmovmskb %xmm0, %eax
1669 lea 16(%ecx), %ecx
1670 movaps %xmm2, %xmm3
1671 # ifdef USE_AS_STRNCPY
1672 sub $16, %ebx
1673 jbe L(StrncpyExit11Case2OrCase3)
1674 # endif
1675 test %eax, %eax
1676 jnz L(Shl11LoopExit)
1677
1678 palignr $11, %xmm1, %xmm2
1679 movaps %xmm3, %xmm1
1680 movaps %xmm2, (%edx)
1681 lea 21(%ecx), %ecx
1682 lea 16(%edx), %edx
1683
1684 mov %ecx, %eax
1685 and $-0x40, %ecx
1686 sub %ecx, %eax
1687 lea -5(%ecx), %ecx
1688 sub %eax, %edx
1689 # ifdef USE_AS_STRNCPY
1690 add %eax, %ebx
1691 # endif
1692 movaps -11(%ecx), %xmm1
1693
1694 L(Shl11LoopStart):
1695 movaps 5(%ecx), %xmm2
1696 movaps 21(%ecx), %xmm3
1697 movaps %xmm3, %xmm6
1698 movaps 37(%ecx), %xmm4
1699 movaps %xmm4, %xmm7
1700 movaps 53(%ecx), %xmm5
1701 pminub %xmm2, %xmm6
1702 pminub %xmm5, %xmm7
1703 pminub %xmm6, %xmm7
1704 pcmpeqb %xmm0, %xmm7
1705 pmovmskb %xmm7, %eax
1706 movaps %xmm5, %xmm7
1707 palignr $11, %xmm4, %xmm5
1708 test %eax, %eax
1709 palignr $11, %xmm3, %xmm4
1710 jnz L(Shl11Start)
1711 # ifdef USE_AS_STRNCPY
1712 sub $64, %ebx
1713 jbe L(StrncpyLeave11)
1714 # endif
1715 palignr $11, %xmm2, %xmm3
1716 lea 64(%ecx), %ecx
1717 palignr $11, %xmm1, %xmm2
1718 movaps %xmm7, %xmm1
1719 movaps %xmm5, 48(%edx)
1720 movaps %xmm4, 32(%edx)
1721 movaps %xmm3, 16(%edx)
1722 movaps %xmm2, (%edx)
1723 lea 64(%edx), %edx
1724 jmp L(Shl11LoopStart)
1725
1726 L(Shl11LoopExit):
1727 movaps (%edx), %xmm6
1728 psrldq $5, %xmm6
1729 mov $5, %esi
1730 palignr $11, %xmm1, %xmm6
1731 movaps %xmm6, (%edx)
1732 jmp L(CopyFrom1To16Bytes)
1733
1734 .p2align 4
1735 L(Shl12):
1736 movaps -12(%ecx), %xmm1
1737 movaps 4(%ecx), %xmm2
1738 L(Shl12Start):
1739 pcmpeqb %xmm2, %xmm0
1740 pmovmskb %xmm0, %eax
1741 movaps %xmm2, %xmm3
1742 # ifdef USE_AS_STRNCPY
1743 sub $16, %ebx
1744 jbe L(StrncpyExit12Case2OrCase3)
1745 # endif
1746 test %eax, %eax
1747 jnz L(Shl12LoopExit)
1748
1749 palignr $12, %xmm1, %xmm2
1750 movaps %xmm3, %xmm1
1751 movaps %xmm2, (%edx)
1752 movaps 20(%ecx), %xmm2
1753
1754 pcmpeqb %xmm2, %xmm0
1755 lea 16(%edx), %edx
1756 pmovmskb %xmm0, %eax
1757 lea 16(%ecx), %ecx
1758 movaps %xmm2, %xmm3
1759 # ifdef USE_AS_STRNCPY
1760 sub $16, %ebx
1761 jbe L(StrncpyExit12Case2OrCase3)
1762 # endif
1763 test %eax, %eax
1764 jnz L(Shl12LoopExit)
1765
1766 palignr $12, %xmm1, %xmm2
1767 movaps %xmm2, (%edx)
1768 movaps 20(%ecx), %xmm2
1769 movaps %xmm3, %xmm1
1770
1771 pcmpeqb %xmm2, %xmm0
1772 lea 16(%edx), %edx
1773 pmovmskb %xmm0, %eax
1774 lea 16(%ecx), %ecx
1775 movaps %xmm2, %xmm3
1776 # ifdef USE_AS_STRNCPY
1777 sub $16, %ebx
1778 jbe L(StrncpyExit12Case2OrCase3)
1779 # endif
1780 test %eax, %eax
1781 jnz L(Shl12LoopExit)
1782
1783 palignr $12, %xmm1, %xmm2
1784 movaps %xmm3, %xmm1
1785 movaps %xmm2, (%edx)
1786 movaps 20(%ecx), %xmm2
1787
1788 pcmpeqb %xmm2, %xmm0
1789 lea 16(%edx), %edx
1790 pmovmskb %xmm0, %eax
1791 lea 16(%ecx), %ecx
1792 movaps %xmm2, %xmm3
1793 # ifdef USE_AS_STRNCPY
1794 sub $16, %ebx
1795 jbe L(StrncpyExit12Case2OrCase3)
1796 # endif
1797 test %eax, %eax
1798 jnz L(Shl12LoopExit)
1799
1800 palignr $12, %xmm1, %xmm2
1801 movaps %xmm3, %xmm1
1802 movaps %xmm2, (%edx)
1803 lea 20(%ecx), %ecx
1804 lea 16(%edx), %edx
1805
1806 mov %ecx, %eax
1807 and $-0x40, %ecx
1808 sub %ecx, %eax
1809 lea -4(%ecx), %ecx
1810 sub %eax, %edx
1811 # ifdef USE_AS_STRNCPY
1812 add %eax, %ebx
1813 # endif
1814 movaps -12(%ecx), %xmm1
1815
1816 L(Shl12LoopStart):
1817 movaps 4(%ecx), %xmm2
1818 movaps 20(%ecx), %xmm3
1819 movaps %xmm3, %xmm6
1820 movaps 36(%ecx), %xmm4
1821 movaps %xmm4, %xmm7
1822 movaps 52(%ecx), %xmm5
1823 pminub %xmm2, %xmm6
1824 pminub %xmm5, %xmm7
1825 pminub %xmm6, %xmm7
1826 pcmpeqb %xmm0, %xmm7
1827 pmovmskb %xmm7, %eax
1828 movaps %xmm5, %xmm7
1829 palignr $12, %xmm4, %xmm5
1830 test %eax, %eax
1831 palignr $12, %xmm3, %xmm4
1832 jnz L(Shl12Start)
1833 # ifdef USE_AS_STRNCPY
1834 sub $64, %ebx
1835 jbe L(StrncpyLeave12)
1836 # endif
1837 palignr $12, %xmm2, %xmm3
1838 lea 64(%ecx), %ecx
1839 palignr $12, %xmm1, %xmm2
1840 movaps %xmm7, %xmm1
1841 movaps %xmm5, 48(%edx)
1842 movaps %xmm4, 32(%edx)
1843 movaps %xmm3, 16(%edx)
1844 movaps %xmm2, (%edx)
1845 lea 64(%edx), %edx
1846 jmp L(Shl12LoopStart)
1847
1848 L(Shl12LoopExit):
1849 movaps (%edx), %xmm6
1850 psrldq $4, %xmm6
1851 mov $4, %esi
1852 palignr $12, %xmm1, %xmm6
1853 movaps %xmm6, (%edx)
1854 jmp L(CopyFrom1To16Bytes)
1855
1856 .p2align 4
1857 L(Shl13):
1858 movaps -13(%ecx), %xmm1
1859 movaps 3(%ecx), %xmm2
1860 L(Shl13Start):
1861 pcmpeqb %xmm2, %xmm0
1862 pmovmskb %xmm0, %eax
1863 movaps %xmm2, %xmm3
1864 # ifdef USE_AS_STRNCPY
1865 sub $16, %ebx
1866 jbe L(StrncpyExit13Case2OrCase3)
1867 # endif
1868 test %eax, %eax
1869 jnz L(Shl13LoopExit)
1870
1871 palignr $13, %xmm1, %xmm2
1872 movaps %xmm3, %xmm1
1873 movaps %xmm2, (%edx)
1874 movaps 19(%ecx), %xmm2
1875
1876 pcmpeqb %xmm2, %xmm0
1877 lea 16(%edx), %edx
1878 pmovmskb %xmm0, %eax
1879 lea 16(%ecx), %ecx
1880 movaps %xmm2, %xmm3
1881 # ifdef USE_AS_STRNCPY
1882 sub $16, %ebx
1883 jbe L(StrncpyExit13Case2OrCase3)
1884 # endif
1885 test %eax, %eax
1886 jnz L(Shl13LoopExit)
1887
1888 palignr $13, %xmm1, %xmm2
1889 movaps %xmm2, (%edx)
1890 movaps 19(%ecx), %xmm2
1891 movaps %xmm3, %xmm1
1892
1893 pcmpeqb %xmm2, %xmm0
1894 lea 16(%edx), %edx
1895 pmovmskb %xmm0, %eax
1896 lea 16(%ecx), %ecx
1897 movaps %xmm2, %xmm3
1898 # ifdef USE_AS_STRNCPY
1899 sub $16, %ebx
1900 jbe L(StrncpyExit13Case2OrCase3)
1901 # endif
1902 test %eax, %eax
1903 jnz L(Shl13LoopExit)
1904
1905 palignr $13, %xmm1, %xmm2
1906 movaps %xmm3, %xmm1
1907 movaps %xmm2, (%edx)
1908 movaps 19(%ecx), %xmm2
1909
1910 pcmpeqb %xmm2, %xmm0
1911 lea 16(%edx), %edx
1912 pmovmskb %xmm0, %eax
1913 lea 16(%ecx), %ecx
1914 movaps %xmm2, %xmm3
1915 # ifdef USE_AS_STRNCPY
1916 sub $16, %ebx
1917 jbe L(StrncpyExit13Case2OrCase3)
1918 # endif
1919 test %eax, %eax
1920 jnz L(Shl13LoopExit)
1921
1922 palignr $13, %xmm1, %xmm2
1923 movaps %xmm3, %xmm1
1924 movaps %xmm2, (%edx)
1925 lea 19(%ecx), %ecx
1926 lea 16(%edx), %edx
1927
1928 mov %ecx, %eax
1929 and $-0x40, %ecx
1930 sub %ecx, %eax
1931 lea -3(%ecx), %ecx
1932 sub %eax, %edx
1933 # ifdef USE_AS_STRNCPY
1934 add %eax, %ebx
1935 # endif
1936 movaps -13(%ecx), %xmm1
1937
1938 L(Shl13LoopStart):
1939 movaps 3(%ecx), %xmm2
1940 movaps 19(%ecx), %xmm3
1941 movaps %xmm3, %xmm6
1942 movaps 35(%ecx), %xmm4
1943 movaps %xmm4, %xmm7
1944 movaps 51(%ecx), %xmm5
1945 pminub %xmm2, %xmm6
1946 pminub %xmm5, %xmm7
1947 pminub %xmm6, %xmm7
1948 pcmpeqb %xmm0, %xmm7
1949 pmovmskb %xmm7, %eax
1950 movaps %xmm5, %xmm7
1951 palignr $13, %xmm4, %xmm5
1952 test %eax, %eax
1953 palignr $13, %xmm3, %xmm4
1954 jnz L(Shl13Start)
1955 # ifdef USE_AS_STRNCPY
1956 sub $64, %ebx
1957 jbe L(StrncpyLeave13)
1958 # endif
1959 palignr $13, %xmm2, %xmm3
1960 lea 64(%ecx), %ecx
1961 palignr $13, %xmm1, %xmm2
1962 movaps %xmm7, %xmm1
1963 movaps %xmm5, 48(%edx)
1964 movaps %xmm4, 32(%edx)
1965 movaps %xmm3, 16(%edx)
1966 movaps %xmm2, (%edx)
1967 lea 64(%edx), %edx
1968 jmp L(Shl13LoopStart)
1969
1970 L(Shl13LoopExit):
1971 movaps (%edx), %xmm6
1972 psrldq $3, %xmm6
1973 mov $3, %esi
1974 palignr $13, %xmm1, %xmm6
1975 movaps %xmm6, (%edx)
1976 jmp L(CopyFrom1To16Bytes)
1977
1978 .p2align 4
1979 L(Shl14):
1980 movaps -14(%ecx), %xmm1
1981 movaps 2(%ecx), %xmm2
1982 L(Shl14Start):
1983 pcmpeqb %xmm2, %xmm0
1984 pmovmskb %xmm0, %eax
1985 movaps %xmm2, %xmm3
1986 # ifdef USE_AS_STRNCPY
1987 sub $16, %ebx
1988 jbe L(StrncpyExit14Case2OrCase3)
1989 # endif
1990 test %eax, %eax
1991 jnz L(Shl14LoopExit)
1992
1993 palignr $14, %xmm1, %xmm2
1994 movaps %xmm3, %xmm1
1995 movaps %xmm2, (%edx)
1996 movaps 18(%ecx), %xmm2
1997
1998 pcmpeqb %xmm2, %xmm0
1999 lea 16(%edx), %edx
2000 pmovmskb %xmm0, %eax
2001 lea 16(%ecx), %ecx
2002 movaps %xmm2, %xmm3
2003 # ifdef USE_AS_STRNCPY
2004 sub $16, %ebx
2005 jbe L(StrncpyExit14Case2OrCase3)
2006 # endif
2007 test %eax, %eax
2008 jnz L(Shl14LoopExit)
2009
2010 palignr $14, %xmm1, %xmm2
2011 movaps %xmm2, (%edx)
2012 movaps 18(%ecx), %xmm2
2013 movaps %xmm3, %xmm1
2014
2015 pcmpeqb %xmm2, %xmm0
2016 lea 16(%edx), %edx
2017 pmovmskb %xmm0, %eax
2018 lea 16(%ecx), %ecx
2019 movaps %xmm2, %xmm3
2020 # ifdef USE_AS_STRNCPY
2021 sub $16, %ebx
2022 jbe L(StrncpyExit14Case2OrCase3)
2023 # endif
2024 test %eax, %eax
2025 jnz L(Shl14LoopExit)
2026
2027 palignr $14, %xmm1, %xmm2
2028 movaps %xmm3, %xmm1
2029 movaps %xmm2, (%edx)
2030 movaps 18(%ecx), %xmm2
2031
2032 pcmpeqb %xmm2, %xmm0
2033 lea 16(%edx), %edx
2034 pmovmskb %xmm0, %eax
2035 lea 16(%ecx), %ecx
2036 movaps %xmm2, %xmm3
2037 # ifdef USE_AS_STRNCPY
2038 sub $16, %ebx
2039 jbe L(StrncpyExit14Case2OrCase3)
2040 # endif
2041 test %eax, %eax
2042 jnz L(Shl14LoopExit)
2043
2044 palignr $14, %xmm1, %xmm2
2045 movaps %xmm3, %xmm1
2046 movaps %xmm2, (%edx)
2047 lea 18(%ecx), %ecx
2048 lea 16(%edx), %edx
2049
2050 mov %ecx, %eax
2051 and $-0x40, %ecx
2052 sub %ecx, %eax
2053 lea -2(%ecx), %ecx
2054 sub %eax, %edx
2055 # ifdef USE_AS_STRNCPY
2056 add %eax, %ebx
2057 # endif
2058 movaps -14(%ecx), %xmm1
2059
2060 L(Shl14LoopStart):
2061 movaps 2(%ecx), %xmm2
2062 movaps 18(%ecx), %xmm3
2063 movaps %xmm3, %xmm6
2064 movaps 34(%ecx), %xmm4
2065 movaps %xmm4, %xmm7
2066 movaps 50(%ecx), %xmm5
2067 pminub %xmm2, %xmm6
2068 pminub %xmm5, %xmm7
2069 pminub %xmm6, %xmm7
2070 pcmpeqb %xmm0, %xmm7
2071 pmovmskb %xmm7, %eax
2072 movaps %xmm5, %xmm7
2073 palignr $14, %xmm4, %xmm5
2074 test %eax, %eax
2075 palignr $14, %xmm3, %xmm4
2076 jnz L(Shl14Start)
2077 # ifdef USE_AS_STRNCPY
2078 sub $64, %ebx
2079 jbe L(StrncpyLeave14)
2080 # endif
2081 palignr $14, %xmm2, %xmm3
2082 lea 64(%ecx), %ecx
2083 palignr $14, %xmm1, %xmm2
2084 movaps %xmm7, %xmm1
2085 movaps %xmm5, 48(%edx)
2086 movaps %xmm4, 32(%edx)
2087 movaps %xmm3, 16(%edx)
2088 movaps %xmm2, (%edx)
2089 lea 64(%edx), %edx
2090 jmp L(Shl14LoopStart)
2091
2092 L(Shl14LoopExit):
2093 movaps (%edx), %xmm6
2094 psrldq $2, %xmm6
2095 mov $2, %esi
2096 palignr $14, %xmm1, %xmm6
2097 movaps %xmm6, (%edx)
2098 jmp L(CopyFrom1To16Bytes)
2099
2100 .p2align 4
2101 L(Shl15):
2102 movaps -15(%ecx), %xmm1
2103 movaps 1(%ecx), %xmm2
2104 L(Shl15Start):
2105 pcmpeqb %xmm2, %xmm0
2106 pmovmskb %xmm0, %eax
2107 movaps %xmm2, %xmm3
2108 # ifdef USE_AS_STRNCPY
2109 sub $16, %ebx
2110 jbe L(StrncpyExit15Case2OrCase3)
2111 # endif
2112 test %eax, %eax
2113 jnz L(Shl15LoopExit)
2114
2115 palignr $15, %xmm1, %xmm2
2116 movaps %xmm3, %xmm1
2117 movaps %xmm2, (%edx)
2118 movaps 17(%ecx), %xmm2
2119
2120 pcmpeqb %xmm2, %xmm0
2121 lea 16(%edx), %edx
2122 pmovmskb %xmm0, %eax
2123 lea 16(%ecx), %ecx
2124 movaps %xmm2, %xmm3
2125 # ifdef USE_AS_STRNCPY
2126 sub $16, %ebx
2127 jbe L(StrncpyExit15Case2OrCase3)
2128 # endif
2129 test %eax, %eax
2130 jnz L(Shl15LoopExit)
2131
2132 palignr $15, %xmm1, %xmm2
2133 movaps %xmm2, (%edx)
2134 movaps 17(%ecx), %xmm2
2135 movaps %xmm3, %xmm1
2136
2137 pcmpeqb %xmm2, %xmm0
2138 lea 16(%edx), %edx
2139 pmovmskb %xmm0, %eax
2140 lea 16(%ecx), %ecx
2141 movaps %xmm2, %xmm3
2142 # ifdef USE_AS_STRNCPY
2143 sub $16, %ebx
2144 jbe L(StrncpyExit15Case2OrCase3)
2145 # endif
2146 test %eax, %eax
2147 jnz L(Shl15LoopExit)
2148
2149 palignr $15, %xmm1, %xmm2
2150 movaps %xmm3, %xmm1
2151 movaps %xmm2, (%edx)
2152 movaps 17(%ecx), %xmm2
2153
2154 pcmpeqb %xmm2, %xmm0
2155 lea 16(%edx), %edx
2156 pmovmskb %xmm0, %eax
2157 lea 16(%ecx), %ecx
2158 movaps %xmm2, %xmm3
2159 # ifdef USE_AS_STRNCPY
2160 sub $16, %ebx
2161 jbe L(StrncpyExit15Case2OrCase3)
2162 # endif
2163 test %eax, %eax
2164 jnz L(Shl15LoopExit)
2165
2166 palignr $15, %xmm1, %xmm2
2167 movaps %xmm3, %xmm1
2168 movaps %xmm2, (%edx)
2169 lea 17(%ecx), %ecx
2170 lea 16(%edx), %edx
2171
2172 mov %ecx, %eax
2173 and $-0x40, %ecx
2174 sub %ecx, %eax
2175 lea -1(%ecx), %ecx
2176 sub %eax, %edx
2177 # ifdef USE_AS_STRNCPY
2178 add %eax, %ebx
2179 # endif
2180 movaps -15(%ecx), %xmm1
2181
2182 L(Shl15LoopStart):
2183 movaps 1(%ecx), %xmm2
2184 movaps 17(%ecx), %xmm3
2185 movaps %xmm3, %xmm6
2186 movaps 33(%ecx), %xmm4
2187 movaps %xmm4, %xmm7
2188 movaps 49(%ecx), %xmm5
2189 pminub %xmm2, %xmm6
2190 pminub %xmm5, %xmm7
2191 pminub %xmm6, %xmm7
2192 pcmpeqb %xmm0, %xmm7
2193 pmovmskb %xmm7, %eax
2194 movaps %xmm5, %xmm7
2195 palignr $15, %xmm4, %xmm5
2196 test %eax, %eax
2197 palignr $15, %xmm3, %xmm4
2198 jnz L(Shl15Start)
2199 # ifdef USE_AS_STRNCPY
2200 sub $64, %ebx
2201 jbe L(StrncpyLeave15)
2202 # endif
2203 palignr $15, %xmm2, %xmm3
2204 lea 64(%ecx), %ecx
2205 palignr $15, %xmm1, %xmm2
2206 movaps %xmm7, %xmm1
2207 movaps %xmm5, 48(%edx)
2208 movaps %xmm4, 32(%edx)
2209 movaps %xmm3, 16(%edx)
2210 movaps %xmm2, (%edx)
2211 lea 64(%edx), %edx
2212 jmp L(Shl15LoopStart)
2213
2214 L(Shl15LoopExit):
2215 movaps (%edx), %xmm6
2216 psrldq $1, %xmm6
2217 mov $1, %esi
2218 palignr $15, %xmm1, %xmm6
2219 movaps %xmm6, (%edx)
2220
2221 .p2align 4
2222 L(CopyFrom1To16Bytes):
2223 # ifdef USE_AS_STRNCPY
2224 add $16, %ebx
2225 # endif
2226 add %esi, %edx
2227 add %esi, %ecx
2228
2229 POP (%esi)
2230 test %al, %al
2231 jz L(ExitHigh)
2232 test $0x01, %al
2233 jnz L(Exit1)
2234 test $0x02, %al
2235 jnz L(Exit2)
2236 test $0x04, %al
2237 jnz L(Exit3)
2238 test $0x08, %al
2239 jnz L(Exit4)
2240 test $0x10, %al
2241 jnz L(Exit5)
2242 test $0x20, %al
2243 jnz L(Exit6)
2244 test $0x40, %al
2245 jnz L(Exit7)
2246
2247 .p2align 4
2248 L(Exit8):
2249 movlpd (%ecx), %xmm0
2250 movlpd %xmm0, (%edx)
2251 # ifdef USE_AS_STPCPY
2252 lea 7(%edx), %eax
2253 # else
2254 movl %edi, %eax
2255 # endif
2256 # ifdef USE_AS_STRNCPY
2257 sub $8, %ebx
2258 lea 8(%edx), %ecx
2259 jnz L(StrncpyFillTailWithZero1)
2260 # ifdef USE_AS_STPCPY
2261 cmpb $1, (%eax)
2262 sbb $-1, %eax
2263 # endif
2264 # endif
2265 RETURN1
2266
2267 .p2align 4
2268 L(ExitHigh):
2269 test $0x01, %ah
2270 jnz L(Exit9)
2271 test $0x02, %ah
2272 jnz L(Exit10)
2273 test $0x04, %ah
2274 jnz L(Exit11)
2275 test $0x08, %ah
2276 jnz L(Exit12)
2277 test $0x10, %ah
2278 jnz L(Exit13)
2279 test $0x20, %ah
2280 jnz L(Exit14)
2281 test $0x40, %ah
2282 jnz L(Exit15)
2283
2284 .p2align 4
2285 L(Exit16):
2286 movlpd (%ecx), %xmm0
2287 movlpd %xmm0, (%edx)
2288 movlpd 8(%ecx), %xmm0
2289 movlpd %xmm0, 8(%edx)
2290 # ifdef USE_AS_STPCPY
2291 lea 15(%edx), %eax
2292 # else
2293 movl %edi, %eax
2294 # endif
2295 # ifdef USE_AS_STRNCPY
2296 sub $16, %ebx
2297 lea 16(%edx), %ecx
2298 jnz L(StrncpyFillTailWithZero1)
2299 # ifdef USE_AS_STPCPY
2300 cmpb $1, (%eax)
2301 sbb $-1, %eax
2302 # endif
2303 # endif
2304 RETURN1
2305
2306 # ifdef USE_AS_STRNCPY
2307
2308 CFI_PUSH(%esi)
2309
2310 .p2align 4
2311 L(CopyFrom1To16BytesCase2):
2312 add $16, %ebx
2313 add %esi, %ecx
2314 lea (%esi, %edx), %esi
2315 lea -9(%ebx), %edx
2316 and $1<<7, %dh
2317 or %al, %dh
2318 test %dh, %dh
2319 lea (%esi), %edx
2320 POP (%esi)
2321 jz L(ExitHighCase2)
2322
2323 cmp $1, %ebx
2324 je L(Exit1)
2325 test $0x01, %al
2326 jnz L(Exit1)
2327 cmp $2, %ebx
2328 je L(Exit2)
2329 test $0x02, %al
2330 jnz L(Exit2)
2331 cmp $3, %ebx
2332 je L(Exit3)
2333 test $0x04, %al
2334 jnz L(Exit3)
2335 cmp $4, %ebx
2336 je L(Exit4)
2337 test $0x08, %al
2338 jnz L(Exit4)
2339 cmp $5, %ebx
2340 je L(Exit5)
2341 test $0x10, %al
2342 jnz L(Exit5)
2343 cmp $6, %ebx
2344 je L(Exit6)
2345 test $0x20, %al
2346 jnz L(Exit6)
2347 cmp $7, %ebx
2348 je L(Exit7)
2349 test $0x40, %al
2350 jnz L(Exit7)
2351 jmp L(Exit8)
2352
2353 .p2align 4
2354 L(ExitHighCase2):
2355 cmp $9, %ebx
2356 je L(Exit9)
2357 test $0x01, %ah
2358 jnz L(Exit9)
2359 cmp $10, %ebx
2360 je L(Exit10)
2361 test $0x02, %ah
2362 jnz L(Exit10)
2363 cmp $11, %ebx
2364 je L(Exit11)
2365 test $0x04, %ah
2366 jnz L(Exit11)
2367 cmp $12, %ebx
2368 je L(Exit12)
2369 test $0x8, %ah
2370 jnz L(Exit12)
2371 cmp $13, %ebx
2372 je L(Exit13)
2373 test $0x10, %ah
2374 jnz L(Exit13)
2375 cmp $14, %ebx
2376 je L(Exit14)
2377 test $0x20, %ah
2378 jnz L(Exit14)
2379 cmp $15, %ebx
2380 je L(Exit15)
2381 test $0x40, %ah
2382 jnz L(Exit15)
2383 jmp L(Exit16)
2384
2385 CFI_PUSH(%esi)
2386
2387 L(CopyFrom1To16BytesCase2OrCase3):
2388 test %eax, %eax
2389 jnz L(CopyFrom1To16BytesCase2)
2390
2391 .p2align 4
2392 L(CopyFrom1To16BytesCase3):
2393 add $16, %ebx
2394 add %esi, %edx
2395 add %esi, %ecx
2396
2397 POP (%esi)
2398 cmp $16, %ebx
2399 je L(Exit16)
2400 cmp $8, %ebx
2401 je L(Exit8)
2402 jg L(More8Case3)
2403 cmp $4, %ebx
2404 je L(Exit4)
2405 jg L(More4Case3)
2406 cmp $2, %ebx
2407 jl L(Exit1)
2408 je L(Exit2)
2409 jg L(Exit3)
2410 L(More8Case3): /* but less than 16 */
2411 cmp $12, %ebx
2412 je L(Exit12)
2413 jl L(Less12Case3)
2414 cmp $14, %ebx
2415 jl L(Exit13)
2416 je L(Exit14)
2417 jg L(Exit15)
2418 L(More4Case3): /* but less than 8 */
2419 cmp $6, %ebx
2420 jl L(Exit5)
2421 je L(Exit6)
2422 jg L(Exit7)
2423 L(Less12Case3): /* but more than 8 */
2424 cmp $10, %ebx
2425 jl L(Exit9)
2426 je L(Exit10)
2427 jg L(Exit11)
2428 # endif
2429
2430 .p2align 4
2431 L(Exit1):
2432 movb (%ecx), %al
2433 movb %al, (%edx)
2434 # ifdef USE_AS_STPCPY
2435 lea (%edx), %eax
2436 # else
2437 movl %edi, %eax
2438 # endif
2439 # ifdef USE_AS_STRNCPY
2440 sub $1, %ebx
2441 lea 1(%edx), %ecx
2442 jnz L(StrncpyFillTailWithZero1)
2443 # ifdef USE_AS_STPCPY
2444 cmpb $1, (%eax)
2445 sbb $-1, %eax
2446 # endif
2447 # endif
2448 RETURN1
2449
2450 .p2align 4
2451 L(Exit2):
2452 movw (%ecx), %ax
2453 movw %ax, (%edx)
2454 # ifdef USE_AS_STPCPY
2455 lea 1(%edx), %eax
2456 # else
2457 movl %edi, %eax
2458 # endif
2459 # ifdef USE_AS_STRNCPY
2460 sub $2, %ebx
2461 lea 2(%edx), %ecx
2462 jnz L(StrncpyFillTailWithZero1)
2463 # ifdef USE_AS_STPCPY
2464 cmpb $1, (%eax)
2465 sbb $-1, %eax
2466 # endif
2467 # endif
2468 RETURN1
2469
2470 .p2align 4
2471 L(Exit3):
2472 movw (%ecx), %ax
2473 movw %ax, (%edx)
2474 movb 2(%ecx), %al
2475 movb %al, 2(%edx)
2476 # ifdef USE_AS_STPCPY
2477 lea 2(%edx), %eax
2478 # else
2479 movl %edi, %eax
2480 # endif
2481 # ifdef USE_AS_STRNCPY
2482 sub $3, %ebx
2483 lea 3(%edx), %ecx
2484 jnz L(StrncpyFillTailWithZero1)
2485 # ifdef USE_AS_STPCPY
2486 cmpb $1, (%eax)
2487 sbb $-1, %eax
2488 # endif
2489 # endif
2490 RETURN1
2491
2492 .p2align 4
2493 L(Exit4):
2494 movl (%ecx), %eax
2495 movl %eax, (%edx)
2496 # ifdef USE_AS_STPCPY
2497 lea 3(%edx), %eax
2498 # else
2499 movl %edi, %eax
2500 # endif
2501 # ifdef USE_AS_STRNCPY
2502 sub $4, %ebx
2503 lea 4(%edx), %ecx
2504 jnz L(StrncpyFillTailWithZero1)
2505 # ifdef USE_AS_STPCPY
2506 cmpb $1, (%eax)
2507 sbb $-1, %eax
2508 # endif
2509 # endif
2510 RETURN1
2511
2512 .p2align 4
2513 L(Exit5):
2514 movl (%ecx), %eax
2515 movl %eax, (%edx)
2516 movb 4(%ecx), %al
2517 movb %al, 4(%edx)
2518 # ifdef USE_AS_STPCPY
2519 lea 4(%edx), %eax
2520 # else
2521 movl %edi, %eax
2522 # endif
2523 # ifdef USE_AS_STRNCPY
2524 sub $5, %ebx
2525 lea 5(%edx), %ecx
2526 jnz L(StrncpyFillTailWithZero1)
2527 # ifdef USE_AS_STPCPY
2528 cmpb $1, (%eax)
2529 sbb $-1, %eax
2530 # endif
2531 # endif
2532 RETURN1
2533
2534 .p2align 4
2535 L(Exit6):
2536 movl (%ecx), %eax
2537 movl %eax, (%edx)
2538 movw 4(%ecx), %ax
2539 movw %ax, 4(%edx)
2540 # ifdef USE_AS_STPCPY
2541 lea 5(%edx), %eax
2542 # else
2543 movl %edi, %eax
2544 # endif
2545 # ifdef USE_AS_STRNCPY
2546 sub $6, %ebx
2547 lea 6(%edx), %ecx
2548 jnz L(StrncpyFillTailWithZero1)
2549 # ifdef USE_AS_STPCPY
2550 cmpb $1, (%eax)
2551 sbb $-1, %eax
2552 # endif
2553 # endif
2554 RETURN1
2555
2556 .p2align 4
2557 L(Exit7):
2558 movl (%ecx), %eax
2559 movl %eax, (%edx)
2560 movl 3(%ecx), %eax
2561 movl %eax, 3(%edx)
2562 # ifdef USE_AS_STPCPY
2563 lea 6(%edx), %eax
2564 # else
2565 movl %edi, %eax
2566 # endif
2567 # ifdef USE_AS_STRNCPY
2568 sub $7, %ebx
2569 lea 7(%edx), %ecx
2570 jnz L(StrncpyFillTailWithZero1)
2571 # ifdef USE_AS_STPCPY
2572 cmpb $1, (%eax)
2573 sbb $-1, %eax
2574 # endif
2575 # endif
2576 RETURN1
2577
2578 .p2align 4
2579 L(Exit9):
2580 movlpd (%ecx), %xmm0
2581 movlpd %xmm0, (%edx)
2582 movb 8(%ecx), %al
2583 movb %al, 8(%edx)
2584 # ifdef USE_AS_STPCPY
2585 lea 8(%edx), %eax
2586 # else
2587 movl %edi, %eax
2588 # endif
2589 # ifdef USE_AS_STRNCPY
2590 sub $9, %ebx
2591 lea 9(%edx), %ecx
2592 jnz L(StrncpyFillTailWithZero1)
2593 # ifdef USE_AS_STPCPY
2594 cmpb $1, (%eax)
2595 sbb $-1, %eax
2596 # endif
2597 # endif
2598 RETURN1
2599
2600 .p2align 4
2601 L(Exit10):
2602 movlpd (%ecx), %xmm0
2603 movlpd %xmm0, (%edx)
2604 movw 8(%ecx), %ax
2605 movw %ax, 8(%edx)
2606 # ifdef USE_AS_STPCPY
2607 lea 9(%edx), %eax
2608 # else
2609 movl %edi, %eax
2610 # endif
2611 # ifdef USE_AS_STRNCPY
2612 sub $10, %ebx
2613 lea 10(%edx), %ecx
2614 jnz L(StrncpyFillTailWithZero1)
2615 # ifdef USE_AS_STPCPY
2616 cmpb $1, (%eax)
2617 sbb $-1, %eax
2618 # endif
2619 # endif
2620 RETURN1
2621
2622 .p2align 4
2623 L(Exit11):
2624 movlpd (%ecx), %xmm0
2625 movlpd %xmm0, (%edx)
2626 movl 7(%ecx), %eax
2627 movl %eax, 7(%edx)
2628 # ifdef USE_AS_STPCPY
2629 lea 10(%edx), %eax
2630 # else
2631 movl %edi, %eax
2632 # endif
2633 # ifdef USE_AS_STRNCPY
2634 sub $11, %ebx
2635 lea 11(%edx), %ecx
2636 jnz L(StrncpyFillTailWithZero1)
2637 # ifdef USE_AS_STPCPY
2638 cmpb $1, (%eax)
2639 sbb $-1, %eax
2640 # endif
2641 # endif
2642 RETURN1
2643
2644 .p2align 4
2645 L(Exit12):
2646 movlpd (%ecx), %xmm0
2647 movlpd %xmm0, (%edx)
2648 movl 8(%ecx), %eax
2649 movl %eax, 8(%edx)
2650 # ifdef USE_AS_STPCPY
2651 lea 11(%edx), %eax
2652 # else
2653 movl %edi, %eax
2654 # endif
2655 # ifdef USE_AS_STRNCPY
2656 sub $12, %ebx
2657 lea 12(%edx), %ecx
2658 jnz L(StrncpyFillTailWithZero1)
2659 # ifdef USE_AS_STPCPY
2660 cmpb $1, (%eax)
2661 sbb $-1, %eax
2662 # endif
2663 # endif
2664 RETURN1
2665
2666 .p2align 4
2667 L(Exit13):
2668 movlpd (%ecx), %xmm0
2669 movlpd %xmm0, (%edx)
2670 movlpd 5(%ecx), %xmm0
2671 movlpd %xmm0, 5(%edx)
2672 # ifdef USE_AS_STPCPY
2673 lea 12(%edx), %eax
2674 # else
2675 movl %edi, %eax
2676 # endif
2677 # ifdef USE_AS_STRNCPY
2678 sub $13, %ebx
2679 lea 13(%edx), %ecx
2680 jnz L(StrncpyFillTailWithZero1)
2681 # ifdef USE_AS_STPCPY
2682 cmpb $1, (%eax)
2683 sbb $-1, %eax
2684 # endif
2685 # endif
2686 RETURN1
2687
2688 .p2align 4
2689 L(Exit14):
2690 movlpd (%ecx), %xmm0
2691 movlpd %xmm0, (%edx)
2692 movlpd 6(%ecx), %xmm0
2693 movlpd %xmm0, 6(%edx)
2694 # ifdef USE_AS_STPCPY
2695 lea 13(%edx), %eax
2696 # else
2697 movl %edi, %eax
2698 # endif
2699 # ifdef USE_AS_STRNCPY
2700 sub $14, %ebx
2701 lea 14(%edx), %ecx
2702 jnz L(StrncpyFillTailWithZero1)
2703 # ifdef USE_AS_STPCPY
2704 cmpb $1, (%eax)
2705 sbb $-1, %eax
2706 # endif
2707 # endif
2708 RETURN1
2709
2710 .p2align 4
2711 L(Exit15):
2712 movlpd (%ecx), %xmm0
2713 movlpd %xmm0, (%edx)
2714 movlpd 7(%ecx), %xmm0
2715 movlpd %xmm0, 7(%edx)
2716 # ifdef USE_AS_STPCPY
2717 lea 14(%edx), %eax
2718 # else
2719 movl %edi, %eax
2720 # endif
2721 # ifdef USE_AS_STRNCPY
2722 sub $15, %ebx
2723 lea 15(%edx), %ecx
2724 jnz L(StrncpyFillTailWithZero1)
2725 # ifdef USE_AS_STPCPY
2726 cmpb $1, (%eax)
2727 sbb $-1, %eax
2728 # endif
2729 # endif
2730 RETURN1
2731
2732 CFI_POP (%edi)
2733
2734 # ifdef USE_AS_STRNCPY
2735 .p2align 4
2736 L(Fill0):
2737 RETURN
2738
2739 .p2align 4
2740 L(Fill1):
2741 movb %dl, (%ecx)
2742 RETURN
2743
2744 .p2align 4
2745 L(Fill2):
2746 movw %dx, (%ecx)
2747 RETURN
2748
2749 .p2align 4
2750 L(Fill3):
2751 movw %dx, (%ecx)
2752 movb %dl, 2(%ecx)
2753 RETURN
2754
2755 .p2align 4
2756 L(Fill4):
2757 movl %edx, (%ecx)
2758 RETURN
2759
2760 .p2align 4
2761 L(Fill5):
2762 movl %edx, (%ecx)
2763 movb %dl, 4(%ecx)
2764 RETURN
2765
2766 .p2align 4
2767 L(Fill6):
2768 movl %edx, (%ecx)
2769 movw %dx, 4(%ecx)
2770 RETURN
2771
2772 .p2align 4
2773 L(Fill7):
2774 movl %edx, (%ecx)
2775 movl %edx, 3(%ecx)
2776 RETURN
2777
2778 .p2align 4
2779 L(Fill8):
2780 movlpd %xmm0, (%ecx)
2781 RETURN
2782
2783 .p2align 4
2784 L(Fill9):
2785 movlpd %xmm0, (%ecx)
2786 movb %dl, 8(%ecx)
2787 RETURN
2788
2789 .p2align 4
2790 L(Fill10):
2791 movlpd %xmm0, (%ecx)
2792 movw %dx, 8(%ecx)
2793 RETURN
2794
2795 .p2align 4
2796 L(Fill11):
2797 movlpd %xmm0, (%ecx)
2798 movl %edx, 7(%ecx)
2799 RETURN
2800
2801 .p2align 4
2802 L(Fill12):
2803 movlpd %xmm0, (%ecx)
2804 movl %edx, 8(%ecx)
2805 RETURN
2806
2807 .p2align 4
2808 L(Fill13):
2809 movlpd %xmm0, (%ecx)
2810 movlpd %xmm0, 5(%ecx)
2811 RETURN
2812
2813 .p2align 4
2814 L(Fill14):
2815 movlpd %xmm0, (%ecx)
2816 movlpd %xmm0, 6(%ecx)
2817 RETURN
2818
2819 .p2align 4
2820 L(Fill15):
2821 movlpd %xmm0, (%ecx)
2822 movlpd %xmm0, 7(%ecx)
2823 RETURN
2824
2825 .p2align 4
2826 L(Fill16):
2827 movlpd %xmm0, (%ecx)
2828 movlpd %xmm0, 8(%ecx)
2829 RETURN
2830
2831 .p2align 4
2832 L(StrncpyFillExit1):
2833 lea 16(%ebx), %ebx
2834 L(FillFrom1To16Bytes):
2835 test %ebx, %ebx
2836 jz L(Fill0)
2837 cmp $16, %ebx
2838 je L(Fill16)
2839 cmp $8, %ebx
2840 je L(Fill8)
2841 jg L(FillMore8)
2842 cmp $4, %ebx
2843 je L(Fill4)
2844 jg L(FillMore4)
2845 cmp $2, %ebx
2846 jl L(Fill1)
2847 je L(Fill2)
2848 jg L(Fill3)
2849 L(FillMore8): /* but less than 16 */
2850 cmp $12, %ebx
2851 je L(Fill12)
2852 jl L(FillLess12)
2853 cmp $14, %ebx
2854 jl L(Fill13)
2855 je L(Fill14)
2856 jg L(Fill15)
2857 L(FillMore4): /* but less than 8 */
2858 cmp $6, %ebx
2859 jl L(Fill5)
2860 je L(Fill6)
2861 jg L(Fill7)
2862 L(FillLess12): /* but more than 8 */
2863 cmp $10, %ebx
2864 jl L(Fill9)
2865 je L(Fill10)
2866 jmp L(Fill11)
2867
2868 CFI_PUSH(%edi)
2869
2870 .p2align 4
2871 L(StrncpyFillTailWithZero1):
2872 POP (%edi)
2873 L(StrncpyFillTailWithZero):
2874 pxor %xmm0, %xmm0
2875 xor %edx, %edx
2876 sub $16, %ebx
2877 jbe L(StrncpyFillExit1)
2878
2879 movlpd %xmm0, (%ecx)
2880 movlpd %xmm0, 8(%ecx)
2881
2882 lea 16(%ecx), %ecx
2883
2884 mov %ecx, %edx
2885 and $0xf, %edx
2886 sub %edx, %ecx
2887 add %edx, %ebx
2888 xor %edx, %edx
2889 sub $64, %ebx
2890 jb L(StrncpyFillLess64)
2891
2892 L(StrncpyFillLoopMovdqa):
2893 movdqa %xmm0, (%ecx)
2894 movdqa %xmm0, 16(%ecx)
2895 movdqa %xmm0, 32(%ecx)
2896 movdqa %xmm0, 48(%ecx)
2897 lea 64(%ecx), %ecx
2898 sub $64, %ebx
2899 jae L(StrncpyFillLoopMovdqa)
2900
2901 L(StrncpyFillLess64):
2902 add $32, %ebx
2903 jl L(StrncpyFillLess32)
2904 movdqa %xmm0, (%ecx)
2905 movdqa %xmm0, 16(%ecx)
2906 lea 32(%ecx), %ecx
2907 sub $16, %ebx
2908 jl L(StrncpyFillExit1)
2909 movdqa %xmm0, (%ecx)
2910 lea 16(%ecx), %ecx
2911 jmp L(FillFrom1To16Bytes)
2912
2913 L(StrncpyFillLess32):
2914 add $16, %ebx
2915 jl L(StrncpyFillExit1)
2916 movdqa %xmm0, (%ecx)
2917 lea 16(%ecx), %ecx
2918 jmp L(FillFrom1To16Bytes)
2919 # endif
2920
2921 .p2align 4
2922 L(ExitTail1):
2923 movb (%ecx), %al
2924 movb %al, (%edx)
2925 # ifdef USE_AS_STPCPY
2926 lea (%edx), %eax
2927 # else
2928 movl %edx, %eax
2929 # endif
2930 # ifdef USE_AS_STRNCPY
2931 sub $1, %ebx
2932 lea 1(%edx), %ecx
2933 jnz L(StrncpyFillTailWithZero)
2934 # ifdef USE_AS_STPCPY
2935 cmpb $1, (%eax)
2936 sbb $-1, %eax
2937 # endif
2938 # endif
2939 RETURN
2940
2941 .p2align 4
2942 L(ExitTail2):
2943 movw (%ecx), %ax
2944 movw %ax, (%edx)
2945 # ifdef USE_AS_STPCPY
2946 lea 1(%edx), %eax
2947 # else
2948 movl %edx, %eax
2949 # endif
2950 # ifdef USE_AS_STRNCPY
2951 sub $2, %ebx
2952 lea 2(%edx), %ecx
2953 jnz L(StrncpyFillTailWithZero)
2954 # ifdef USE_AS_STPCPY
2955 cmpb $1, (%eax)
2956 sbb $-1, %eax
2957 # endif
2958 # endif
2959 RETURN
2960
2961 .p2align 4
2962 L(ExitTail3):
2963 movw (%ecx), %ax
2964 movw %ax, (%edx)
2965 movb 2(%ecx), %al
2966 movb %al, 2(%edx)
2967 # ifdef USE_AS_STPCPY
2968 lea 2(%edx), %eax
2969 # else
2970 movl %edx, %eax
2971 # endif
2972 # ifdef USE_AS_STRNCPY
2973 sub $3, %ebx
2974 lea 3(%edx), %ecx
2975 jnz L(StrncpyFillTailWithZero)
2976 # ifdef USE_AS_STPCPY
2977 cmpb $1, (%eax)
2978 sbb $-1, %eax
2979 # endif
2980 # endif
2981 RETURN
2982
2983 .p2align 4
2984 L(ExitTail4):
2985 movl (%ecx), %eax
2986 movl %eax, (%edx)
2987 # ifdef USE_AS_STPCPY
2988 lea 3(%edx), %eax
2989 # else
2990 movl %edx, %eax
2991 # endif
2992 # ifdef USE_AS_STRNCPY
2993 sub $4, %ebx
2994 lea 4(%edx), %ecx
2995 jnz L(StrncpyFillTailWithZero)
2996 # ifdef USE_AS_STPCPY
2997 cmpb $1, (%eax)
2998 sbb $-1, %eax
2999 # endif
3000 # endif
3001 RETURN
3002
3003 .p2align 4
3004 L(ExitTail5):
3005 movl (%ecx), %eax
3006 movl %eax, (%edx)
3007 movb 4(%ecx), %al
3008 movb %al, 4(%edx)
3009 # ifdef USE_AS_STPCPY
3010 lea 4(%edx), %eax
3011 # else
3012 movl %edx, %eax
3013 # endif
3014 # ifdef USE_AS_STRNCPY
3015 sub $5, %ebx
3016 lea 5(%edx), %ecx
3017 jnz L(StrncpyFillTailWithZero)
3018 # ifdef USE_AS_STPCPY
3019 cmpb $1, (%eax)
3020 sbb $-1, %eax
3021 # endif
3022 # endif
3023 RETURN
3024
3025 .p2align 4
3026 L(ExitTail6):
3027 movl (%ecx), %eax
3028 movl %eax, (%edx)
3029 movw 4(%ecx), %ax
3030 movw %ax, 4(%edx)
3031 # ifdef USE_AS_STPCPY
3032 lea 5(%edx), %eax
3033 # else
3034 movl %edx, %eax
3035 # endif
3036 # ifdef USE_AS_STRNCPY
3037 sub $6, %ebx
3038 lea 6(%edx), %ecx
3039 jnz L(StrncpyFillTailWithZero)
3040 # ifdef USE_AS_STPCPY
3041 cmpb $1, (%eax)
3042 sbb $-1, %eax
3043 # endif
3044 # endif
3045 RETURN
3046
3047 .p2align 4
3048 L(ExitTail7):
3049 movl (%ecx), %eax
3050 movl %eax, (%edx)
3051 movl 3(%ecx), %eax
3052 movl %eax, 3(%edx)
3053 # ifdef USE_AS_STPCPY
3054 lea 6(%edx), %eax
3055 # else
3056 movl %edx, %eax
3057 # endif
3058 # ifdef USE_AS_STRNCPY
3059 sub $7, %ebx
3060 lea 7(%edx), %ecx
3061 jnz L(StrncpyFillTailWithZero)
3062 # ifdef USE_AS_STPCPY
3063 cmpb $1, (%eax)
3064 sbb $-1, %eax
3065 # endif
3066 # endif
3067 RETURN
3068
3069 .p2align 4
3070 L(ExitTail9):
3071 movlpd (%ecx), %xmm0
3072 movlpd %xmm0, (%edx)
3073 movb 8(%ecx), %al
3074 movb %al, 8(%edx)
3075 # ifdef USE_AS_STPCPY
3076 lea 8(%edx), %eax
3077 # else
3078 movl %edx, %eax
3079 # endif
3080 # ifdef USE_AS_STRNCPY
3081 sub $9, %ebx
3082 lea 9(%edx), %ecx
3083 jnz L(StrncpyFillTailWithZero)
3084 # ifdef USE_AS_STPCPY
3085 cmpb $1, (%eax)
3086 sbb $-1, %eax
3087 # endif
3088 # endif
3089 RETURN
3090
3091 .p2align 4
3092 L(ExitTail10):
3093 movlpd (%ecx), %xmm0
3094 movlpd %xmm0, (%edx)
3095 movw 8(%ecx), %ax
3096 movw %ax, 8(%edx)
3097 # ifdef USE_AS_STPCPY
3098 lea 9(%edx), %eax
3099 # else
3100 movl %edx, %eax
3101 # endif
3102 # ifdef USE_AS_STRNCPY
3103 sub $10, %ebx
3104 lea 10(%edx), %ecx
3105 jnz L(StrncpyFillTailWithZero)
3106 # ifdef USE_AS_STPCPY
3107 cmpb $1, (%eax)
3108 sbb $-1, %eax
3109 # endif
3110 # endif
3111 RETURN
3112
3113 .p2align 4
3114 L(ExitTail11):
3115 movlpd (%ecx), %xmm0
3116 movlpd %xmm0, (%edx)
3117 movl 7(%ecx), %eax
3118 movl %eax, 7(%edx)
3119 # ifdef USE_AS_STPCPY
3120 lea 10(%edx), %eax
3121 # else
3122 movl %edx, %eax
3123 # endif
3124 # ifdef USE_AS_STRNCPY
3125 sub $11, %ebx
3126 lea 11(%edx), %ecx
3127 jnz L(StrncpyFillTailWithZero)
3128 # ifdef USE_AS_STPCPY
3129 cmpb $1, (%eax)
3130 sbb $-1, %eax
3131 # endif
3132 # endif
3133 RETURN
3134
3135 .p2align 4
3136 L(ExitTail12):
3137 movlpd (%ecx), %xmm0
3138 movlpd %xmm0, (%edx)
3139 movl 8(%ecx), %eax
3140 movl %eax, 8(%edx)
3141 # ifdef USE_AS_STPCPY
3142 lea 11(%edx), %eax
3143 # else
3144 movl %edx, %eax
3145 # endif
3146 # ifdef USE_AS_STRNCPY
3147 sub $12, %ebx
3148 lea 12(%edx), %ecx
3149 jnz L(StrncpyFillTailWithZero)
3150 # ifdef USE_AS_STPCPY
3151 cmpb $1, (%eax)
3152 sbb $-1, %eax
3153 # endif
3154 # endif
3155 RETURN
3156
3157 .p2align 4
3158 L(ExitTail13):
3159 movlpd (%ecx), %xmm0
3160 movlpd %xmm0, (%edx)
3161 movlpd 5(%ecx), %xmm0
3162 movlpd %xmm0, 5(%edx)
3163 # ifdef USE_AS_STPCPY
3164 lea 12(%edx), %eax
3165 # else
3166 movl %edx, %eax
3167 # endif
3168 # ifdef USE_AS_STRNCPY
3169 sub $13, %ebx
3170 lea 13(%edx), %ecx
3171 jnz L(StrncpyFillTailWithZero)
3172 # ifdef USE_AS_STPCPY
3173 cmpb $1, (%eax)
3174 sbb $-1, %eax
3175 # endif
3176 # endif
3177 RETURN
3178
3179 .p2align 4
3180 L(ExitTail14):
3181 movlpd (%ecx), %xmm0
3182 movlpd %xmm0, (%edx)
3183 movlpd 6(%ecx), %xmm0
3184 movlpd %xmm0, 6(%edx)
3185 # ifdef USE_AS_STPCPY
3186 lea 13(%edx), %eax
3187 # else
3188 movl %edx, %eax
3189 # endif
3190 # ifdef USE_AS_STRNCPY
3191 sub $14, %ebx
3192 lea 14(%edx), %ecx
3193 jnz L(StrncpyFillTailWithZero)
3194 # ifdef USE_AS_STPCPY
3195 cmpb $1, (%eax)
3196 sbb $-1, %eax
3197 # endif
3198 # endif
3199 RETURN
3200
3201 .p2align 4
3202 L(ExitTail16):
3203 movlpd (%ecx), %xmm0
3204 movlpd %xmm0, (%edx)
3205 movlpd 8(%ecx), %xmm0
3206 movlpd %xmm0, 8(%edx)
3207 # ifdef USE_AS_STPCPY
3208 lea 15(%edx), %eax
3209 # else
3210 movl %edx, %eax
3211 # endif
3212 # ifdef USE_AS_STRNCPY
3213 sub $16, %ebx
3214 lea 16(%edx), %ecx
3215 jnz L(StrncpyFillTailWithZero)
3216 # ifdef USE_AS_STPCPY
3217 cmpb $1, (%eax)
3218 sbb $-1, %eax
3219 # endif
3220 # endif
3221 RETURN
3222 # ifdef USE_AS_STRNCPY
3223 L(StrncpyLeaveCase2OrCase3):
3224 test %eax, %eax
3225 jnz L(Aligned64LeaveCase2)
3226
3227 L(Aligned64LeaveCase3):
3228 add $48, %ebx
3229 jle L(CopyFrom1To16BytesCase3)
3230 movaps %xmm4, -64(%edx)
3231 lea 16(%esi), %esi
3232 sub $16, %ebx
3233 jbe L(CopyFrom1To16BytesCase3)
3234 movaps %xmm5, -48(%edx)
3235 lea 16(%esi), %esi
3236 sub $16, %ebx
3237 jbe L(CopyFrom1To16BytesCase3)
3238 movaps %xmm6, -32(%edx)
3239 lea 16(%esi), %esi
3240 lea -16(%ebx), %ebx
3241 jmp L(CopyFrom1To16BytesCase3)
3242
3243 L(Aligned64LeaveCase2):
3244 pcmpeqb %xmm4, %xmm0
3245 pmovmskb %xmm0, %eax
3246 add $48, %ebx
3247 jle L(CopyFrom1To16BytesCase2OrCase3)
3248 test %eax, %eax
3249 jnz L(CopyFrom1To16Bytes)
3250
3251 pcmpeqb %xmm5, %xmm0
3252 pmovmskb %xmm0, %eax
3253 movaps %xmm4, -64(%edx)
3254 lea 16(%esi), %esi
3255 sub $16, %ebx
3256 jbe L(CopyFrom1To16BytesCase2OrCase3)
3257 test %eax, %eax
3258 jnz L(CopyFrom1To16Bytes)
3259
3260 pcmpeqb %xmm6, %xmm0
3261 pmovmskb %xmm0, %eax
3262 movaps %xmm5, -48(%edx)
3263 lea 16(%esi), %esi
3264 sub $16, %ebx
3265 jbe L(CopyFrom1To16BytesCase2OrCase3)
3266 test %eax, %eax
3267 jnz L(CopyFrom1To16Bytes)
3268
3269 pcmpeqb %xmm7, %xmm0
3270 pmovmskb %xmm0, %eax
3271 movaps %xmm6, -32(%edx)
3272 lea 16(%esi), %esi
3273 lea -16(%ebx), %ebx
3274 jmp L(CopyFrom1To16BytesCase2)
3275 /* -------------------------------------------------- */
3276 L(StrncpyExit1Case2OrCase3):
3277 movaps (%edx), %xmm6
3278 psrldq $15, %xmm6
3279 mov $15, %esi
3280 palignr $1, %xmm1, %xmm6
3281 movaps %xmm6, (%edx)
3282 test %eax, %eax
3283 jnz L(CopyFrom1To16BytesCase2)
3284 jmp L(CopyFrom1To16BytesCase3)
3285
3286 L(StrncpyExit2Case2OrCase3):
3287 movaps (%edx), %xmm6
3288 psrldq $14, %xmm6
3289 mov $14, %esi
3290 palignr $2, %xmm1, %xmm6
3291 movaps %xmm6, (%edx)
3292 test %eax, %eax
3293 jnz L(CopyFrom1To16BytesCase2)
3294 jmp L(CopyFrom1To16BytesCase3)
3295
3296 L(StrncpyExit3Case2OrCase3):
3297 movaps (%edx), %xmm6
3298 psrldq $13, %xmm6
3299 mov $13, %esi
3300 palignr $3, %xmm1, %xmm6
3301 movaps %xmm6, (%edx)
3302 test %eax, %eax
3303 jnz L(CopyFrom1To16BytesCase2)
3304 jmp L(CopyFrom1To16BytesCase3)
3305
3306 L(StrncpyExit4Case2OrCase3):
3307 movaps (%edx), %xmm6
3308 psrldq $12, %xmm6
3309 mov $12, %esi
3310 palignr $4, %xmm1, %xmm6
3311 movaps %xmm6, (%edx)
3312 test %eax, %eax
3313 jnz L(CopyFrom1To16BytesCase2)
3314 jmp L(CopyFrom1To16BytesCase3)
3315
3316 L(StrncpyExit5Case2OrCase3):
3317 movaps (%edx), %xmm6
3318 psrldq $11, %xmm6
3319 mov $11, %esi
3320 palignr $5, %xmm1, %xmm6
3321 movaps %xmm6, (%edx)
3322 test %eax, %eax
3323 jnz L(CopyFrom1To16BytesCase2)
3324 jmp L(CopyFrom1To16BytesCase3)
3325
3326 L(StrncpyExit6Case2OrCase3):
3327 movaps (%edx), %xmm6
3328 psrldq $10, %xmm6
3329 mov $10, %esi
3330 palignr $6, %xmm1, %xmm6
3331 movaps %xmm6, (%edx)
3332 test %eax, %eax
3333 jnz L(CopyFrom1To16BytesCase2)
3334 jmp L(CopyFrom1To16BytesCase3)
3335
3336 L(StrncpyExit7Case2OrCase3):
3337 movaps (%edx), %xmm6
3338 psrldq $9, %xmm6
3339 mov $9, %esi
3340 palignr $7, %xmm1, %xmm6
3341 movaps %xmm6, (%edx)
3342 test %eax, %eax
3343 jnz L(CopyFrom1To16BytesCase2)
3344 jmp L(CopyFrom1To16BytesCase3)
3345
3346 L(StrncpyExit8Case2OrCase3):
3347 movaps (%edx), %xmm6
3348 psrldq $8, %xmm6
3349 mov $8, %esi
3350 palignr $8, %xmm1, %xmm6
3351 movaps %xmm6, (%edx)
3352 test %eax, %eax
3353 jnz L(CopyFrom1To16BytesCase2)
3354 jmp L(CopyFrom1To16BytesCase3)
3355
3356 L(StrncpyExit9Case2OrCase3):
3357 movaps (%edx), %xmm6
3358 psrldq $7, %xmm6
3359 mov $7, %esi
3360 palignr $9, %xmm1, %xmm6
3361 movaps %xmm6, (%edx)
3362 test %eax, %eax
3363 jnz L(CopyFrom1To16BytesCase2)
3364 jmp L(CopyFrom1To16BytesCase3)
3365
3366 L(StrncpyExit10Case2OrCase3):
3367 movaps (%edx), %xmm6
3368 psrldq $6, %xmm6
3369 mov $6, %esi
3370 palignr $10, %xmm1, %xmm6
3371 movaps %xmm6, (%edx)
3372 test %eax, %eax
3373 jnz L(CopyFrom1To16BytesCase2)
3374 jmp L(CopyFrom1To16BytesCase3)
3375
3376 L(StrncpyExit11Case2OrCase3):
3377 movaps (%edx), %xmm6
3378 psrldq $5, %xmm6
3379 mov $5, %esi
3380 palignr $11, %xmm1, %xmm6
3381 movaps %xmm6, (%edx)
3382 test %eax, %eax
3383 jnz L(CopyFrom1To16BytesCase2)
3384 jmp L(CopyFrom1To16BytesCase3)
3385
3386 L(StrncpyExit12Case2OrCase3):
3387 movaps (%edx), %xmm6
3388 psrldq $4, %xmm6
3389 mov $4, %esi
3390 palignr $12, %xmm1, %xmm6
3391 movaps %xmm6, (%edx)
3392 test %eax, %eax
3393 jnz L(CopyFrom1To16BytesCase2)
3394 jmp L(CopyFrom1To16BytesCase3)
3395
3396 L(StrncpyExit13Case2OrCase3):
3397 movaps (%edx), %xmm6
3398 psrldq $3, %xmm6
3399 mov $3, %esi
3400 palignr $13, %xmm1, %xmm6
3401 movaps %xmm6, (%edx)
3402 test %eax, %eax
3403 jnz L(CopyFrom1To16BytesCase2)
3404 jmp L(CopyFrom1To16BytesCase3)
3405
3406 L(StrncpyExit14Case2OrCase3):
3407 movaps (%edx), %xmm6
3408 psrldq $2, %xmm6
3409 mov $2, %esi
3410 palignr $14, %xmm1, %xmm6
3411 movaps %xmm6, (%edx)
3412 test %eax, %eax
3413 jnz L(CopyFrom1To16BytesCase2)
3414 jmp L(CopyFrom1To16BytesCase3)
3415
3416 L(StrncpyExit15Case2OrCase3):
3417 movaps (%edx), %xmm6
3418 psrldq $1, %xmm6
3419 mov $1, %esi
3420 palignr $15, %xmm1, %xmm6
3421 movaps %xmm6, (%edx)
3422 test %eax, %eax
3423 jnz L(CopyFrom1To16BytesCase2)
3424 jmp L(CopyFrom1To16BytesCase3)
3425
3426 L(StrncpyLeave1):
3427 movaps %xmm2, %xmm3
3428 add $48, %ebx
3429 jle L(StrncpyExit1)
3430 palignr $1, %xmm1, %xmm2
3431 movaps %xmm3, %xmm1
3432 movaps %xmm2, (%edx)
3433 movaps 31(%ecx), %xmm2
3434 lea 16(%esi), %esi
3435 movaps %xmm2, %xmm3
3436 sub $16, %ebx
3437 jbe L(StrncpyExit1)
3438 palignr $1, %xmm1, %xmm2
3439 movaps %xmm2, 16(%edx)
3440 movaps 31+16(%ecx), %xmm2
3441 movaps %xmm3, %xmm1
3442 lea 16(%esi), %esi
3443 sub $16, %ebx
3444 jbe L(StrncpyExit1)
3445 movaps %xmm2, %xmm1
3446 movaps %xmm4, 32(%edx)
3447 lea 16(%esi), %esi
3448 sub $16, %ebx
3449 jbe L(StrncpyExit1)
3450 movaps %xmm7, %xmm1
3451 movaps %xmm5, 48(%edx)
3452 lea 16(%esi), %esi
3453 lea -16(%ebx), %ebx
3454
3455 L(StrncpyExit1):
3456 movaps (%edx, %esi), %xmm6
3457 psrldq $15, %xmm6
3458 palignr $1, %xmm1, %xmm6
3459 movaps %xmm6, (%edx, %esi)
3460 lea 15(%esi), %esi
3461 jmp L(CopyFrom1To16BytesCase3)
3462
3463 L(StrncpyLeave2):
3464 movaps %xmm2, %xmm3
3465 add $48, %ebx
3466 jle L(StrncpyExit2)
3467 palignr $2, %xmm1, %xmm2
3468 movaps %xmm3, %xmm1
3469 movaps %xmm2, (%edx)
3470 movaps 30(%ecx), %xmm2
3471 lea 16(%esi), %esi
3472 movaps %xmm2, %xmm3
3473 sub $16, %ebx
3474 jbe L(StrncpyExit2)
3475 palignr $2, %xmm1, %xmm2
3476 movaps %xmm2, 16(%edx)
3477 movaps 30+16(%ecx), %xmm2
3478 movaps %xmm3, %xmm1
3479 lea 16(%esi), %esi
3480 sub $16, %ebx
3481 jbe L(StrncpyExit2)
3482 movaps %xmm2, %xmm1
3483 movaps %xmm4, 32(%edx)
3484 lea 16(%esi), %esi
3485 sub $16, %ebx
3486 jbe L(StrncpyExit2)
3487 movaps %xmm7, %xmm1
3488 movaps %xmm5, 48(%edx)
3489 lea 16(%esi), %esi
3490 lea -16(%ebx), %ebx
3491
3492 L(StrncpyExit2):
3493 movaps (%edx, %esi), %xmm6
3494 psrldq $14, %xmm6
3495 palignr $2, %xmm1, %xmm6
3496 movaps %xmm6, (%edx, %esi)
3497 lea 14(%esi), %esi
3498 jmp L(CopyFrom1To16BytesCase3)
3499
3500 L(StrncpyLeave3):
3501 movaps %xmm2, %xmm3
3502 add $48, %ebx
3503 jle L(StrncpyExit3)
3504 palignr $3, %xmm1, %xmm2
3505 movaps %xmm3, %xmm1
3506 movaps %xmm2, (%edx)
3507 movaps 29(%ecx), %xmm2
3508 lea 16(%esi), %esi
3509 movaps %xmm2, %xmm3
3510 sub $16, %ebx
3511 jbe L(StrncpyExit3)
3512 palignr $3, %xmm1, %xmm2
3513 movaps %xmm2, 16(%edx)
3514 movaps 29+16(%ecx), %xmm2
3515 movaps %xmm3, %xmm1
3516 lea 16(%esi), %esi
3517 sub $16, %ebx
3518 jbe L(StrncpyExit3)
3519 movaps %xmm2, %xmm1
3520 movaps %xmm4, 32(%edx)
3521 lea 16(%esi), %esi
3522 sub $16, %ebx
3523 jbe L(StrncpyExit3)
3524 movaps %xmm7, %xmm1
3525 movaps %xmm5, 48(%edx)
3526 lea 16(%esi), %esi
3527 lea -16(%ebx), %ebx
3528
3529 L(StrncpyExit3):
3530 movaps (%edx, %esi), %xmm6
3531 psrldq $13, %xmm6
3532 palignr $3, %xmm1, %xmm6
3533 movaps %xmm6, (%edx, %esi)
3534 lea 13(%esi), %esi
3535 jmp L(CopyFrom1To16BytesCase3)
3536
3537 L(StrncpyLeave4):
3538 movaps %xmm2, %xmm3
3539 add $48, %ebx
3540 jle L(StrncpyExit4)
3541 palignr $4, %xmm1, %xmm2
3542 movaps %xmm3, %xmm1
3543 movaps %xmm2, (%edx)
3544 movaps 28(%ecx), %xmm2
3545 lea 16(%esi), %esi
3546 movaps %xmm2, %xmm3
3547 sub $16, %ebx
3548 jbe L(StrncpyExit4)
3549 palignr $4, %xmm1, %xmm2
3550 movaps %xmm2, 16(%edx)
3551 movaps 28+16(%ecx), %xmm2
3552 movaps %xmm3, %xmm1
3553 lea 16(%esi), %esi
3554 sub $16, %ebx
3555 jbe L(StrncpyExit4)
3556 movaps %xmm2, %xmm1
3557 movaps %xmm4, 32(%edx)
3558 lea 16(%esi), %esi
3559 sub $16, %ebx
3560 jbe L(StrncpyExit4)
3561 movaps %xmm7, %xmm1
3562 movaps %xmm5, 48(%edx)
3563 lea 16(%esi), %esi
3564 lea -16(%ebx), %ebx
3565
3566 L(StrncpyExit4):
3567 movaps (%edx, %esi), %xmm6
3568 psrldq $12, %xmm6
3569 palignr $4, %xmm1, %xmm6
3570 movaps %xmm6, (%edx, %esi)
3571 lea 12(%esi), %esi
3572 jmp L(CopyFrom1To16BytesCase3)
3573
3574 L(StrncpyLeave5):
3575 movaps %xmm2, %xmm3
3576 add $48, %ebx
3577 jle L(StrncpyExit5)
3578 palignr $5, %xmm1, %xmm2
3579 movaps %xmm3, %xmm1
3580 movaps %xmm2, (%edx)
3581 movaps 27(%ecx), %xmm2
3582 lea 16(%esi), %esi
3583 movaps %xmm2, %xmm3
3584 sub $16, %ebx
3585 jbe L(StrncpyExit5)
3586 palignr $5, %xmm1, %xmm2
3587 movaps %xmm2, 16(%edx)
3588 movaps 27+16(%ecx), %xmm2
3589 movaps %xmm3, %xmm1
3590 lea 16(%esi), %esi
3591 sub $16, %ebx
3592 jbe L(StrncpyExit5)
3593 movaps %xmm2, %xmm1
3594 movaps %xmm4, 32(%edx)
3595 lea 16(%esi), %esi
3596 sub $16, %ebx
3597 jbe L(StrncpyExit5)
3598 movaps %xmm7, %xmm1
3599 movaps %xmm5, 48(%edx)
3600 lea 16(%esi), %esi
3601 lea -16(%ebx), %ebx
3602
3603 L(StrncpyExit5):
3604 movaps (%edx, %esi), %xmm6
3605 psrldq $11, %xmm6
3606 palignr $5, %xmm1, %xmm6
3607 movaps %xmm6, (%edx, %esi)
3608 lea 11(%esi), %esi
3609 jmp L(CopyFrom1To16BytesCase3)
3610
3611 L(StrncpyLeave6):
3612 movaps %xmm2, %xmm3
3613 add $48, %ebx
3614 jle L(StrncpyExit6)
3615 palignr $6, %xmm1, %xmm2
3616 movaps %xmm3, %xmm1
3617 movaps %xmm2, (%edx)
3618 movaps 26(%ecx), %xmm2
3619 lea 16(%esi), %esi
3620 movaps %xmm2, %xmm3
3621 sub $16, %ebx
3622 jbe L(StrncpyExit6)
3623 palignr $6, %xmm1, %xmm2
3624 movaps %xmm2, 16(%edx)
3625 movaps 26+16(%ecx), %xmm2
3626 movaps %xmm3, %xmm1
3627 lea 16(%esi), %esi
3628 sub $16, %ebx
3629 jbe L(StrncpyExit6)
3630 movaps %xmm2, %xmm1
3631 movaps %xmm4, 32(%edx)
3632 lea 16(%esi), %esi
3633 sub $16, %ebx
3634 jbe L(StrncpyExit6)
3635 movaps %xmm7, %xmm1
3636 movaps %xmm5, 48(%edx)
3637 lea 16(%esi), %esi
3638 lea -16(%ebx), %ebx
3639
3640 L(StrncpyExit6):
3641 movaps (%edx, %esi), %xmm6
3642 psrldq $10, %xmm6
3643 palignr $6, %xmm1, %xmm6
3644 movaps %xmm6, (%edx, %esi)
3645 lea 10(%esi), %esi
3646 jmp L(CopyFrom1To16BytesCase3)
3647
3648 L(StrncpyLeave7):
3649 movaps %xmm2, %xmm3
3650 add $48, %ebx
3651 jle L(StrncpyExit7)
3652 palignr $7, %xmm1, %xmm2
3653 movaps %xmm3, %xmm1
3654 movaps %xmm2, (%edx)
3655 movaps 25(%ecx), %xmm2
3656 lea 16(%esi), %esi
3657 movaps %xmm2, %xmm3
3658 sub $16, %ebx
3659 jbe L(StrncpyExit7)
3660 palignr $7, %xmm1, %xmm2
3661 movaps %xmm2, 16(%edx)
3662 movaps 25+16(%ecx), %xmm2
3663 movaps %xmm3, %xmm1
3664 lea 16(%esi), %esi
3665 sub $16, %ebx
3666 jbe L(StrncpyExit7)
3667 movaps %xmm2, %xmm1
3668 movaps %xmm4, 32(%edx)
3669 lea 16(%esi), %esi
3670 sub $16, %ebx
3671 jbe L(StrncpyExit7)
3672 movaps %xmm7, %xmm1
3673 movaps %xmm5, 48(%edx)
3674 lea 16(%esi), %esi
3675 lea -16(%ebx), %ebx
3676
3677 L(StrncpyExit7):
3678 movaps (%edx, %esi), %xmm6
3679 psrldq $9, %xmm6
3680 palignr $7, %xmm1, %xmm6
3681 movaps %xmm6, (%edx, %esi)
3682 lea 9(%esi), %esi
3683 jmp L(CopyFrom1To16BytesCase3)
3684
3685 L(StrncpyLeave8):
3686 movaps %xmm2, %xmm3
3687 add $48, %ebx
3688 jle L(StrncpyExit8)
3689 palignr $8, %xmm1, %xmm2
3690 movaps %xmm3, %xmm1
3691 movaps %xmm2, (%edx)
3692 movaps 24(%ecx), %xmm2
3693 lea 16(%esi), %esi
3694 movaps %xmm2, %xmm3
3695 sub $16, %ebx
3696 jbe L(StrncpyExit8)
3697 palignr $8, %xmm1, %xmm2
3698 movaps %xmm2, 16(%edx)
3699 movaps 24+16(%ecx), %xmm2
3700 movaps %xmm3, %xmm1
3701 lea 16(%esi), %esi
3702 sub $16, %ebx
3703 jbe L(StrncpyExit8)
3704 movaps %xmm2, %xmm1
3705 movaps %xmm4, 32(%edx)
3706 lea 16(%esi), %esi
3707 sub $16, %ebx
3708 jbe L(StrncpyExit8)
3709 movaps %xmm7, %xmm1
3710 movaps %xmm5, 48(%edx)
3711 lea 16(%esi), %esi
3712 lea -16(%ebx), %ebx
3713
3714 L(StrncpyExit8):
3715 movaps (%edx, %esi), %xmm6
3716 psrldq $8, %xmm6
3717 palignr $8, %xmm1, %xmm6
3718 movaps %xmm6, (%edx, %esi)
3719 lea 8(%esi), %esi
3720 jmp L(CopyFrom1To16BytesCase3)
3721
3722 L(StrncpyLeave9):
3723 movaps %xmm2, %xmm3
3724 add $48, %ebx
3725 jle L(StrncpyExit9)
3726 palignr $9, %xmm1, %xmm2
3727 movaps %xmm3, %xmm1
3728 movaps %xmm2, (%edx)
3729 movaps 23(%ecx), %xmm2
3730 lea 16(%esi), %esi
3731 movaps %xmm2, %xmm3
3732 sub $16, %ebx
3733 jbe L(StrncpyExit9)
3734 palignr $9, %xmm1, %xmm2
3735 movaps %xmm2, 16(%edx)
3736 movaps 23+16(%ecx), %xmm2
3737 movaps %xmm3, %xmm1
3738 lea 16(%esi), %esi
3739 sub $16, %ebx
3740 jbe L(StrncpyExit9)
3741 movaps %xmm2, %xmm1
3742 movaps %xmm4, 32(%edx)
3743 lea 16(%esi), %esi
3744 sub $16, %ebx
3745 jbe L(StrncpyExit9)
3746 movaps %xmm7, %xmm1
3747 movaps %xmm5, 48(%edx)
3748 lea 16(%esi), %esi
3749 lea -16(%ebx), %ebx
3750
3751 L(StrncpyExit9):
3752 movaps (%edx, %esi), %xmm6
3753 psrldq $7, %xmm6
3754 palignr $9, %xmm1, %xmm6
3755 movaps %xmm6, (%edx, %esi)
3756 lea 7(%esi), %esi
3757 jmp L(CopyFrom1To16BytesCase3)
3758
3759 L(StrncpyLeave10):
3760 movaps %xmm2, %xmm3
3761 add $48, %ebx
3762 jle L(StrncpyExit10)
3763 palignr $10, %xmm1, %xmm2
3764 movaps %xmm3, %xmm1
3765 movaps %xmm2, (%edx)
3766 movaps 22(%ecx), %xmm2
3767 lea 16(%esi), %esi
3768 movaps %xmm2, %xmm3
3769 sub $16, %ebx
3770 jbe L(StrncpyExit10)
3771 palignr $10, %xmm1, %xmm2
3772 movaps %xmm2, 16(%edx)
3773 movaps 22+16(%ecx), %xmm2
3774 movaps %xmm3, %xmm1
3775 lea 16(%esi), %esi
3776 sub $16, %ebx
3777 jbe L(StrncpyExit10)
3778 movaps %xmm2, %xmm1
3779 movaps %xmm4, 32(%edx)
3780 lea 16(%esi), %esi
3781 sub $16, %ebx
3782 jbe L(StrncpyExit10)
3783 movaps %xmm7, %xmm1
3784 movaps %xmm5, 48(%edx)
3785 lea 16(%esi), %esi
3786 lea -16(%ebx), %ebx
3787
3788 L(StrncpyExit10):
3789 movaps (%edx, %esi), %xmm6
3790 psrldq $6, %xmm6
3791 palignr $10, %xmm1, %xmm6
3792 movaps %xmm6, (%edx, %esi)
3793 lea 6(%esi), %esi
3794 jmp L(CopyFrom1To16BytesCase3)
3795
3796 L(StrncpyLeave11):
3797 movaps %xmm2, %xmm3
3798 add $48, %ebx
3799 jle L(StrncpyExit11)
3800 palignr $11, %xmm1, %xmm2
3801 movaps %xmm3, %xmm1
3802 movaps %xmm2, (%edx)
3803 movaps 21(%ecx), %xmm2
3804 lea 16(%esi), %esi
3805 movaps %xmm2, %xmm3
3806 sub $16, %ebx
3807 jbe L(StrncpyExit11)
3808 palignr $11, %xmm1, %xmm2
3809 movaps %xmm2, 16(%edx)
3810 movaps 21+16(%ecx), %xmm2
3811 movaps %xmm3, %xmm1
3812 lea 16(%esi), %esi
3813 sub $16, %ebx
3814 jbe L(StrncpyExit11)
3815 movaps %xmm2, %xmm1
3816 movaps %xmm4, 32(%edx)
3817 lea 16(%esi), %esi
3818 sub $16, %ebx
3819 jbe L(StrncpyExit11)
3820 movaps %xmm7, %xmm1
3821 movaps %xmm5, 48(%edx)
3822 lea 16(%esi), %esi
3823 lea -16(%ebx), %ebx
3824
3825 L(StrncpyExit11):
3826 movaps (%edx, %esi), %xmm6
3827 psrldq $5, %xmm6
3828 palignr $11, %xmm1, %xmm6
3829 movaps %xmm6, (%edx, %esi)
3830 lea 5(%esi), %esi
3831 jmp L(CopyFrom1To16BytesCase3)
3832
3833 L(StrncpyLeave12):
3834 movaps %xmm2, %xmm3
3835 add $48, %ebx
3836 jle L(StrncpyExit12)
3837 palignr $12, %xmm1, %xmm2
3838 movaps %xmm3, %xmm1
3839 movaps %xmm2, (%edx)
3840 movaps 20(%ecx), %xmm2
3841 lea 16(%esi), %esi
3842 movaps %xmm2, %xmm3
3843 sub $16, %ebx
3844 jbe L(StrncpyExit12)
3845 palignr $12, %xmm1, %xmm2
3846 movaps %xmm2, 16(%edx)
3847 movaps 20+16(%ecx), %xmm2
3848 movaps %xmm3, %xmm1
3849 lea 16(%esi), %esi
3850 sub $16, %ebx
3851 jbe L(StrncpyExit12)
3852 movaps %xmm2, %xmm1
3853 movaps %xmm4, 32(%edx)
3854 lea 16(%esi), %esi
3855 sub $16, %ebx
3856 jbe L(StrncpyExit12)
3857 movaps %xmm7, %xmm1
3858 movaps %xmm5, 48(%edx)
3859 lea 16(%esi), %esi
3860 lea -16(%ebx), %ebx
3861
3862 L(StrncpyExit12):
3863 movaps (%edx, %esi), %xmm6
3864 psrldq $4, %xmm6
3865 palignr $12, %xmm1, %xmm6
3866 movaps %xmm6, (%edx, %esi)
3867 lea 4(%esi), %esi
3868 jmp L(CopyFrom1To16BytesCase3)
3869
3870 L(StrncpyLeave13):
3871 movaps %xmm2, %xmm3
3872 add $48, %ebx
3873 jle L(StrncpyExit13)
3874 palignr $13, %xmm1, %xmm2
3875 movaps %xmm3, %xmm1
3876 movaps %xmm2, (%edx)
3877 movaps 19(%ecx), %xmm2
3878 lea 16(%esi), %esi
3879 movaps %xmm2, %xmm3
3880 sub $16, %ebx
3881 jbe L(StrncpyExit13)
3882 palignr $13, %xmm1, %xmm2
3883 movaps %xmm2, 16(%edx)
3884 movaps 19+16(%ecx), %xmm2
3885 movaps %xmm3, %xmm1
3886 lea 16(%esi), %esi
3887 sub $16, %ebx
3888 jbe L(StrncpyExit13)
3889 movaps %xmm2, %xmm1
3890 movaps %xmm4, 32(%edx)
3891 lea 16(%esi), %esi
3892 sub $16, %ebx
3893 jbe L(StrncpyExit13)
3894 movaps %xmm7, %xmm1
3895 movaps %xmm5, 48(%edx)
3896 lea 16(%esi), %esi
3897 lea -16(%ebx), %ebx
3898
3899 L(StrncpyExit13):
3900 movaps (%edx, %esi), %xmm6
3901 psrldq $3, %xmm6
3902 palignr $13, %xmm1, %xmm6
3903 movaps %xmm6, (%edx, %esi)
3904 lea 3(%esi), %esi
3905 jmp L(CopyFrom1To16BytesCase3)
3906
3907 L(StrncpyLeave14):
3908 movaps %xmm2, %xmm3
3909 add $48, %ebx
3910 jle L(StrncpyExit14)
3911 palignr $14, %xmm1, %xmm2
3912 movaps %xmm3, %xmm1
3913 movaps %xmm2, (%edx)
3914 movaps 18(%ecx), %xmm2
3915 lea 16(%esi), %esi
3916 movaps %xmm2, %xmm3
3917 sub $16, %ebx
3918 jbe L(StrncpyExit14)
3919 palignr $14, %xmm1, %xmm2
3920 movaps %xmm2, 16(%edx)
3921 movaps 18+16(%ecx), %xmm2
3922 movaps %xmm3, %xmm1
3923 lea 16(%esi), %esi
3924 sub $16, %ebx
3925 jbe L(StrncpyExit14)
3926 movaps %xmm2, %xmm1
3927 movaps %xmm4, 32(%edx)
3928 lea 16(%esi), %esi
3929 sub $16, %ebx
3930 jbe L(StrncpyExit14)
3931 movaps %xmm7, %xmm1
3932 movaps %xmm5, 48(%edx)
3933 lea 16(%esi), %esi
3934 lea -16(%ebx), %ebx
3935
3936 L(StrncpyExit14):
3937 movaps (%edx, %esi), %xmm6
3938 psrldq $2, %xmm6
3939 palignr $14, %xmm1, %xmm6
3940 movaps %xmm6, (%edx, %esi)
3941 lea 2(%esi), %esi
3942 jmp L(CopyFrom1To16BytesCase3)
3943
3944 L(StrncpyLeave15):
3945 movaps %xmm2, %xmm3
3946 add $48, %ebx
3947 jle L(StrncpyExit15)
3948 palignr $15, %xmm1, %xmm2
3949 movaps %xmm3, %xmm1
3950 movaps %xmm2, (%edx)
3951 movaps 17(%ecx), %xmm2
3952 lea 16(%esi), %esi
3953 movaps %xmm2, %xmm3
3954 sub $16, %ebx
3955 jbe L(StrncpyExit15)
3956 palignr $15, %xmm1, %xmm2
3957 movaps %xmm2, 16(%edx)
3958 movaps 17+16(%ecx), %xmm2
3959 movaps %xmm3, %xmm1
3960 lea 16(%esi), %esi
3961 sub $16, %ebx
3962 jbe L(StrncpyExit15)
3963 movaps %xmm2, %xmm1
3964 movaps %xmm4, 32(%edx)
3965 lea 16(%esi), %esi
3966 sub $16, %ebx
3967 jbe L(StrncpyExit15)
3968 movaps %xmm7, %xmm1
3969 movaps %xmm5, 48(%edx)
3970 lea 16(%esi), %esi
3971 lea -16(%ebx), %ebx
3972
3973 L(StrncpyExit15):
3974 movaps (%edx, %esi), %xmm6
3975 psrldq $1, %xmm6
3976 palignr $15, %xmm1, %xmm6
3977 movaps %xmm6, (%edx, %esi)
3978 lea 1(%esi), %esi
3979 jmp L(CopyFrom1To16BytesCase3)
3980
3981 .p2align 4
3982 L(ExitTail0):
3983 movl %edx, %eax
3984 RETURN
3985
3986 .p2align 4
3987 L(StrncpyExit15Bytes):
3988 cmp $9, %ebx
3989 je L(ExitTail9)
3990 cmpb $0, 8(%ecx)
3991 jz L(ExitTail9)
3992 cmp $10, %ebx
3993 je L(ExitTail10)
3994 cmpb $0, 9(%ecx)
3995 jz L(ExitTail10)
3996 cmp $11, %ebx
3997 je L(ExitTail11)
3998 cmpb $0, 10(%ecx)
3999 jz L(ExitTail11)
4000 cmp $12, %ebx
4001 je L(ExitTail12)
4002 cmpb $0, 11(%ecx)
4003 jz L(ExitTail12)
4004 cmp $13, %ebx
4005 je L(ExitTail13)
4006 cmpb $0, 12(%ecx)
4007 jz L(ExitTail13)
4008 cmp $14, %ebx
4009 je L(ExitTail14)
4010 cmpb $0, 13(%ecx)
4011 jz L(ExitTail14)
4012 # endif
4013
4014 .p2align 4
4015 L(ExitTail15):
4016 movlpd (%ecx), %xmm0
4017 movlpd %xmm0, (%edx)
4018 movlpd 7(%ecx), %xmm0
4019 movlpd %xmm0, 7(%edx)
4020 # ifdef USE_AS_STPCPY
4021 lea 14(%edx), %eax
4022 # else
4023 movl %edx, %eax
4024 # endif
4025 # ifdef USE_AS_STRNCPY
4026 sub $15, %ebx
4027 lea 15(%edx), %ecx
4028 jnz L(StrncpyFillTailWithZero)
4029 # ifdef USE_AS_STPCPY
4030 cmpb $1, (%eax)
4031 sbb $-1, %eax
4032 # endif
4033 # endif
4034 RETURN
4035
4036 # ifdef USE_AS_STRNCPY
4037 .p2align 4
4038 L(StrncpyExit8Bytes):
4039 cmp $1, %ebx
4040 je L(ExitTail1)
4041 cmpb $0, (%ecx)
4042 jz L(ExitTail1)
4043 cmp $2, %ebx
4044 je L(ExitTail2)
4045 cmpb $0, 1(%ecx)
4046 jz L(ExitTail2)
4047 cmp $3, %ebx
4048 je L(ExitTail3)
4049 cmpb $0, 2(%ecx)
4050 jz L(ExitTail3)
4051 cmp $4, %ebx
4052 je L(ExitTail4)
4053 cmpb $0, 3(%ecx)
4054 jz L(ExitTail4)
4055 cmp $5, %ebx
4056 je L(ExitTail5)
4057 cmpb $0, 4(%ecx)
4058 jz L(ExitTail5)
4059 cmp $6, %ebx
4060 je L(ExitTail6)
4061 cmpb $0, 5(%ecx)
4062 jz L(ExitTail6)
4063 cmp $7, %ebx
4064 je L(ExitTail7)
4065 cmpb $0, 6(%ecx)
4066 jz L(ExitTail7)
4067 # endif
4068 .p2align 4
4069 L(ExitTail8):
4070 movlpd (%ecx), %xmm0
4071 movlpd %xmm0, (%edx)
4072 # ifdef USE_AS_STPCPY
4073 lea 7(%edx), %eax
4074 # else
4075 movl %edx, %eax
4076 # endif
4077 # ifdef USE_AS_STRNCPY
4078 sub $8, %ebx
4079 lea 8(%edx), %ecx
4080 jnz L(StrncpyFillTailWithZero)
4081 # ifdef USE_AS_STPCPY
4082 cmpb $1, (%eax)
4083 sbb $-1, %eax
4084 # endif
4085 # endif
4086 RETURN
4087
4088 END (STRCPY)
4089
4090 #endif