]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strcpy-ssse3.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strcpy-ssse3.S
1 /* strcpy with SSSE3
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20
21 #ifndef NOT_IN_libc
22
23 # ifndef USE_AS_STRCAT
24 # include <sysdep.h>
25
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
29
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
32 cfi_restore (REG)
33
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
36
37 # ifndef STRCPY
38 # define STRCPY __strcpy_ssse3
39 # endif
40
41 # ifdef USE_AS_STRNCPY
42 # define PARMS 8
43 # define ENTRANCE PUSH (%ebx)
44 # define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
45 # define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
46 # else
47 # define PARMS 4
48 # define ENTRANCE
49 # define RETURN ret
50 # define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
51 # endif
52
53 # ifdef USE_AS_STPCPY
54 # define SAVE_RESULT(n) lea n(%edx), %eax
55 # define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
56 # else
57 # define SAVE_RESULT(n) movl %edi, %eax
58 # define SAVE_RESULT_TAIL(n) movl %edx, %eax
59 # endif
60
61 # define STR1 PARMS
62 # define STR2 STR1+4
63 # define LEN STR2+4
64
65 /* In this code following instructions are used for copying:
66 movb - 1 byte
67 movw - 2 byte
68 movl - 4 byte
69 movlpd - 8 byte
70 movaps - 16 byte - requires 16 byte alignment
71 of sourse and destination adresses.
72 */
73
74 .text
75 ENTRY (STRCPY)
76 ENTRANCE
77 mov STR1(%esp), %edx
78 mov STR2(%esp), %ecx
79 # ifdef USE_AS_STRNCPY
80 movl LEN(%esp), %ebx
81 cmp $8, %ebx
82 jbe L(StrncpyExit8Bytes)
83 # endif
84 cmpb $0, (%ecx)
85 jz L(ExitTail1)
86 cmpb $0, 1(%ecx)
87 jz L(ExitTail2)
88 cmpb $0, 2(%ecx)
89 jz L(ExitTail3)
90 cmpb $0, 3(%ecx)
91 jz L(ExitTail4)
92 cmpb $0, 4(%ecx)
93 jz L(ExitTail5)
94 cmpb $0, 5(%ecx)
95 jz L(ExitTail6)
96 cmpb $0, 6(%ecx)
97 jz L(ExitTail7)
98 cmpb $0, 7(%ecx)
99 jz L(ExitTail8)
100 # ifdef USE_AS_STRNCPY
101 cmp $16, %ebx
102 jb L(StrncpyExit15Bytes)
103 # endif
104 cmpb $0, 8(%ecx)
105 jz L(ExitTail9)
106 cmpb $0, 9(%ecx)
107 jz L(ExitTail10)
108 cmpb $0, 10(%ecx)
109 jz L(ExitTail11)
110 cmpb $0, 11(%ecx)
111 jz L(ExitTail12)
112 cmpb $0, 12(%ecx)
113 jz L(ExitTail13)
114 cmpb $0, 13(%ecx)
115 jz L(ExitTail14)
116 cmpb $0, 14(%ecx)
117 jz L(ExitTail15)
118 # ifdef USE_AS_STRNCPY
119 cmp $16, %ebx
120 je L(ExitTail16)
121 # endif
122 cmpb $0, 15(%ecx)
123 jz L(ExitTail16)
124
125 PUSH (%edi)
126 mov %edx, %edi
127 # endif
128 PUSH (%esi)
129 # ifdef USE_AS_STRNCPY
130 mov %ecx, %esi
131 sub $16, %ebx
132 and $0xf, %esi
133
134 /* add 16 bytes ecx_offset to ebx */
135
136 add %esi, %ebx
137 # endif
138 lea 16(%ecx), %esi
139 and $-16, %esi
140 pxor %xmm0, %xmm0
141 movlpd (%ecx), %xmm1
142 movlpd %xmm1, (%edx)
143
144 pcmpeqb (%esi), %xmm0
145 movlpd 8(%ecx), %xmm1
146 movlpd %xmm1, 8(%edx)
147
148 pmovmskb %xmm0, %eax
149 sub %ecx, %esi
150
151 # ifdef USE_AS_STRNCPY
152 sub $16, %ebx
153 jbe L(CopyFrom1To16BytesCase2OrCase3)
154 # endif
155 test %eax, %eax
156 jnz L(CopyFrom1To16Bytes)
157
158 mov %edx, %eax
159 lea 16(%edx), %edx
160 and $-16, %edx
161 sub %edx, %eax
162
163 # ifdef USE_AS_STRNCPY
164 add %eax, %esi
165 lea -1(%esi), %esi
166 and $1<<31, %esi
167 test %esi, %esi
168 jnz L(ContinueCopy)
169 lea 16(%ebx), %ebx
170
171 L(ContinueCopy):
172 # endif
173 sub %eax, %ecx
174 mov %ecx, %eax
175 and $0xf, %eax
176 mov $0, %esi
177
178 /* case: ecx_offset == edx_offset */
179
180 jz L(Align16Both)
181
182 cmp $8, %eax
183 jae L(ShlHigh8)
184 cmp $1, %eax
185 je L(Shl1)
186 cmp $2, %eax
187 je L(Shl2)
188 cmp $3, %eax
189 je L(Shl3)
190 cmp $4, %eax
191 je L(Shl4)
192 cmp $5, %eax
193 je L(Shl5)
194 cmp $6, %eax
195 je L(Shl6)
196 jmp L(Shl7)
197
198 L(ShlHigh8):
199 je L(Shl8)
200 cmp $9, %eax
201 je L(Shl9)
202 cmp $10, %eax
203 je L(Shl10)
204 cmp $11, %eax
205 je L(Shl11)
206 cmp $12, %eax
207 je L(Shl12)
208 cmp $13, %eax
209 je L(Shl13)
210 cmp $14, %eax
211 je L(Shl14)
212 jmp L(Shl15)
213
214 L(Align16Both):
215 movaps (%ecx), %xmm1
216 movaps 16(%ecx), %xmm2
217 movaps %xmm1, (%edx)
218 pcmpeqb %xmm2, %xmm0
219 pmovmskb %xmm0, %eax
220 lea 16(%esi), %esi
221 # ifdef USE_AS_STRNCPY
222 sub $16, %ebx
223 jbe L(CopyFrom1To16BytesCase2OrCase3)
224 # endif
225 test %eax, %eax
226 jnz L(CopyFrom1To16Bytes)
227
228 movaps 16(%ecx, %esi), %xmm3
229 movaps %xmm2, (%edx, %esi)
230 pcmpeqb %xmm3, %xmm0
231 pmovmskb %xmm0, %eax
232 lea 16(%esi), %esi
233 # ifdef USE_AS_STRNCPY
234 sub $16, %ebx
235 jbe L(CopyFrom1To16BytesCase2OrCase3)
236 # endif
237 test %eax, %eax
238 jnz L(CopyFrom1To16Bytes)
239
240 movaps 16(%ecx, %esi), %xmm4
241 movaps %xmm3, (%edx, %esi)
242 pcmpeqb %xmm4, %xmm0
243 pmovmskb %xmm0, %eax
244 lea 16(%esi), %esi
245 # ifdef USE_AS_STRNCPY
246 sub $16, %ebx
247 jbe L(CopyFrom1To16BytesCase2OrCase3)
248 # endif
249 test %eax, %eax
250 jnz L(CopyFrom1To16Bytes)
251
252 movaps 16(%ecx, %esi), %xmm1
253 movaps %xmm4, (%edx, %esi)
254 pcmpeqb %xmm1, %xmm0
255 pmovmskb %xmm0, %eax
256 lea 16(%esi), %esi
257 # ifdef USE_AS_STRNCPY
258 sub $16, %ebx
259 jbe L(CopyFrom1To16BytesCase2OrCase3)
260 # endif
261 test %eax, %eax
262 jnz L(CopyFrom1To16Bytes)
263
264 movaps 16(%ecx, %esi), %xmm2
265 movaps %xmm1, (%edx, %esi)
266 pcmpeqb %xmm2, %xmm0
267 pmovmskb %xmm0, %eax
268 lea 16(%esi), %esi
269 # ifdef USE_AS_STRNCPY
270 sub $16, %ebx
271 jbe L(CopyFrom1To16BytesCase2OrCase3)
272 # endif
273 test %eax, %eax
274 jnz L(CopyFrom1To16Bytes)
275
276 movaps 16(%ecx, %esi), %xmm3
277 movaps %xmm2, (%edx, %esi)
278 pcmpeqb %xmm3, %xmm0
279 pmovmskb %xmm0, %eax
280 lea 16(%esi), %esi
281 # ifdef USE_AS_STRNCPY
282 sub $16, %ebx
283 jbe L(CopyFrom1To16BytesCase2OrCase3)
284 # endif
285 test %eax, %eax
286 jnz L(CopyFrom1To16Bytes)
287
288 movaps %xmm3, (%edx, %esi)
289 mov %ecx, %eax
290 lea 16(%ecx, %esi), %ecx
291 and $-0x40, %ecx
292 sub %ecx, %eax
293 sub %eax, %edx
294 # ifdef USE_AS_STRNCPY
295 lea 112(%ebx, %eax), %ebx
296 # endif
297 mov $-0x40, %esi
298
299 L(Aligned64Loop):
300 movaps (%ecx), %xmm2
301 movaps 32(%ecx), %xmm3
302 movaps %xmm2, %xmm4
303 movaps 16(%ecx), %xmm5
304 movaps %xmm3, %xmm6
305 movaps 48(%ecx), %xmm7
306 pminub %xmm5, %xmm2
307 pminub %xmm7, %xmm3
308 pminub %xmm2, %xmm3
309 lea 64(%edx), %edx
310 pcmpeqb %xmm0, %xmm3
311 lea 64(%ecx), %ecx
312 pmovmskb %xmm3, %eax
313 # ifdef USE_AS_STRNCPY
314 sub $64, %ebx
315 jbe L(StrncpyLeaveCase2OrCase3)
316 # endif
317 test %eax, %eax
318 jnz L(Aligned64Leave)
319 movaps %xmm4, -64(%edx)
320 movaps %xmm5, -48(%edx)
321 movaps %xmm6, -32(%edx)
322 movaps %xmm7, -16(%edx)
323 jmp L(Aligned64Loop)
324
325 L(Aligned64Leave):
326 # ifdef USE_AS_STRNCPY
327 lea 48(%ebx), %ebx
328 # endif
329 pcmpeqb %xmm4, %xmm0
330 pmovmskb %xmm0, %eax
331 test %eax, %eax
332 jnz L(CopyFrom1To16Bytes)
333
334 pcmpeqb %xmm5, %xmm0
335 # ifdef USE_AS_STRNCPY
336 lea -16(%ebx), %ebx
337 # endif
338 pmovmskb %xmm0, %eax
339 movaps %xmm4, -64(%edx)
340 test %eax, %eax
341 lea 16(%esi), %esi
342 jnz L(CopyFrom1To16Bytes)
343
344 pcmpeqb %xmm6, %xmm0
345 # ifdef USE_AS_STRNCPY
346 lea -16(%ebx), %ebx
347 # endif
348 pmovmskb %xmm0, %eax
349 movaps %xmm5, -48(%edx)
350 test %eax, %eax
351 lea 16(%esi), %esi
352 jnz L(CopyFrom1To16Bytes)
353
354 movaps %xmm6, -32(%edx)
355 pcmpeqb %xmm7, %xmm0
356 # ifdef USE_AS_STRNCPY
357 lea -16(%ebx), %ebx
358 # endif
359 pmovmskb %xmm0, %eax
360 lea 16(%esi), %esi
361 jmp L(CopyFrom1To16Bytes)
362
363 .p2align 4
364 L(Shl1):
365 movaps -1(%ecx), %xmm1
366 movaps 15(%ecx), %xmm2
367 L(Shl1Start):
368 pcmpeqb %xmm2, %xmm0
369 pmovmskb %xmm0, %eax
370 movaps %xmm2, %xmm3
371 # ifdef USE_AS_STRNCPY
372 sub $16, %ebx
373 jbe L(StrncpyExit1Case2OrCase3)
374 # endif
375 test %eax, %eax
376 jnz L(Shl1LoopExit)
377
378 palignr $1, %xmm1, %xmm2
379 movaps %xmm3, %xmm1
380 movaps %xmm2, (%edx)
381 movaps 31(%ecx), %xmm2
382
383 pcmpeqb %xmm2, %xmm0
384 lea 16(%edx), %edx
385 pmovmskb %xmm0, %eax
386 lea 16(%ecx), %ecx
387 movaps %xmm2, %xmm3
388 # ifdef USE_AS_STRNCPY
389 sub $16, %ebx
390 jbe L(StrncpyExit1Case2OrCase3)
391 # endif
392 test %eax, %eax
393 jnz L(Shl1LoopExit)
394
395 palignr $1, %xmm1, %xmm2
396 movaps %xmm2, (%edx)
397 movaps 31(%ecx), %xmm2
398 movaps %xmm3, %xmm1
399
400 pcmpeqb %xmm2, %xmm0
401 lea 16(%edx), %edx
402 pmovmskb %xmm0, %eax
403 lea 16(%ecx), %ecx
404 movaps %xmm2, %xmm3
405 # ifdef USE_AS_STRNCPY
406 sub $16, %ebx
407 jbe L(StrncpyExit1Case2OrCase3)
408 # endif
409 test %eax, %eax
410 jnz L(Shl1LoopExit)
411
412 palignr $1, %xmm1, %xmm2
413 movaps %xmm2, (%edx)
414 movaps 31(%ecx), %xmm2
415
416 pcmpeqb %xmm2, %xmm0
417 lea 16(%edx), %edx
418 pmovmskb %xmm0, %eax
419 lea 16(%ecx), %ecx
420 # ifdef USE_AS_STRNCPY
421 sub $16, %ebx
422 jbe L(StrncpyExit1Case2OrCase3)
423 # endif
424 test %eax, %eax
425 jnz L(Shl1LoopExit)
426
427 palignr $1, %xmm3, %xmm2
428 movaps %xmm2, (%edx)
429 lea 31(%ecx), %ecx
430 lea 16(%edx), %edx
431
432 mov %ecx, %eax
433 and $-0x40, %ecx
434 sub %ecx, %eax
435 lea -15(%ecx), %ecx
436 sub %eax, %edx
437 # ifdef USE_AS_STRNCPY
438 add %eax, %ebx
439 # endif
440 movaps -1(%ecx), %xmm1
441
442 L(Shl1LoopStart):
443 movaps 15(%ecx), %xmm2
444 movaps 31(%ecx), %xmm3
445 movaps %xmm3, %xmm6
446 movaps 47(%ecx), %xmm4
447 movaps %xmm4, %xmm7
448 movaps 63(%ecx), %xmm5
449 pminub %xmm2, %xmm6
450 pminub %xmm5, %xmm7
451 pminub %xmm6, %xmm7
452 pcmpeqb %xmm0, %xmm7
453 pmovmskb %xmm7, %eax
454 movaps %xmm5, %xmm7
455 palignr $1, %xmm4, %xmm5
456 test %eax, %eax
457 palignr $1, %xmm3, %xmm4
458 jnz L(Shl1Start)
459 # ifdef USE_AS_STRNCPY
460 sub $64, %ebx
461 jbe L(StrncpyLeave1)
462 # endif
463 palignr $1, %xmm2, %xmm3
464 lea 64(%ecx), %ecx
465 palignr $1, %xmm1, %xmm2
466 movaps %xmm7, %xmm1
467 movaps %xmm5, 48(%edx)
468 movaps %xmm4, 32(%edx)
469 movaps %xmm3, 16(%edx)
470 movaps %xmm2, (%edx)
471 lea 64(%edx), %edx
472 jmp L(Shl1LoopStart)
473
474 L(Shl1LoopExit):
475 movlpd (%ecx), %xmm0
476 movlpd %xmm0, (%edx)
477 movlpd 7(%ecx), %xmm0
478 movlpd %xmm0, 7(%edx)
479 mov $15, %esi
480 jmp L(CopyFrom1To16Bytes)
481
482 .p2align 4
483 L(Shl2):
484 movaps -2(%ecx), %xmm1
485 movaps 14(%ecx), %xmm2
486 L(Shl2Start):
487 pcmpeqb %xmm2, %xmm0
488 pmovmskb %xmm0, %eax
489 movaps %xmm2, %xmm3
490 # ifdef USE_AS_STRNCPY
491 sub $16, %ebx
492 jbe L(StrncpyExit2Case2OrCase3)
493 # endif
494 test %eax, %eax
495 jnz L(Shl2LoopExit)
496
497 palignr $2, %xmm1, %xmm2
498 movaps %xmm3, %xmm1
499 movaps %xmm2, (%edx)
500 movaps 30(%ecx), %xmm2
501
502 pcmpeqb %xmm2, %xmm0
503 lea 16(%edx), %edx
504 pmovmskb %xmm0, %eax
505 lea 16(%ecx), %ecx
506 movaps %xmm2, %xmm3
507 # ifdef USE_AS_STRNCPY
508 sub $16, %ebx
509 jbe L(StrncpyExit2Case2OrCase3)
510 # endif
511 test %eax, %eax
512 jnz L(Shl2LoopExit)
513
514 palignr $2, %xmm1, %xmm2
515 movaps %xmm2, (%edx)
516 movaps 30(%ecx), %xmm2
517 movaps %xmm3, %xmm1
518
519 pcmpeqb %xmm2, %xmm0
520 lea 16(%edx), %edx
521 pmovmskb %xmm0, %eax
522 lea 16(%ecx), %ecx
523 movaps %xmm2, %xmm3
524 # ifdef USE_AS_STRNCPY
525 sub $16, %ebx
526 jbe L(StrncpyExit2Case2OrCase3)
527 # endif
528 test %eax, %eax
529 jnz L(Shl2LoopExit)
530
531 palignr $2, %xmm1, %xmm2
532 movaps %xmm2, (%edx)
533 movaps 30(%ecx), %xmm2
534
535 pcmpeqb %xmm2, %xmm0
536 lea 16(%edx), %edx
537 pmovmskb %xmm0, %eax
538 lea 16(%ecx), %ecx
539 # ifdef USE_AS_STRNCPY
540 sub $16, %ebx
541 jbe L(StrncpyExit2Case2OrCase3)
542 # endif
543 test %eax, %eax
544 jnz L(Shl2LoopExit)
545
546 palignr $2, %xmm3, %xmm2
547 movaps %xmm2, (%edx)
548 lea 30(%ecx), %ecx
549 lea 16(%edx), %edx
550
551 mov %ecx, %eax
552 and $-0x40, %ecx
553 sub %ecx, %eax
554 lea -14(%ecx), %ecx
555 sub %eax, %edx
556 # ifdef USE_AS_STRNCPY
557 add %eax, %ebx
558 # endif
559 movaps -2(%ecx), %xmm1
560
561 L(Shl2LoopStart):
562 movaps 14(%ecx), %xmm2
563 movaps 30(%ecx), %xmm3
564 movaps %xmm3, %xmm6
565 movaps 46(%ecx), %xmm4
566 movaps %xmm4, %xmm7
567 movaps 62(%ecx), %xmm5
568 pminub %xmm2, %xmm6
569 pminub %xmm5, %xmm7
570 pminub %xmm6, %xmm7
571 pcmpeqb %xmm0, %xmm7
572 pmovmskb %xmm7, %eax
573 movaps %xmm5, %xmm7
574 palignr $2, %xmm4, %xmm5
575 test %eax, %eax
576 palignr $2, %xmm3, %xmm4
577 jnz L(Shl2Start)
578 # ifdef USE_AS_STRNCPY
579 sub $64, %ebx
580 jbe L(StrncpyLeave2)
581 # endif
582 palignr $2, %xmm2, %xmm3
583 lea 64(%ecx), %ecx
584 palignr $2, %xmm1, %xmm2
585 movaps %xmm7, %xmm1
586 movaps %xmm5, 48(%edx)
587 movaps %xmm4, 32(%edx)
588 movaps %xmm3, 16(%edx)
589 movaps %xmm2, (%edx)
590 lea 64(%edx), %edx
591 jmp L(Shl2LoopStart)
592
593 L(Shl2LoopExit):
594 movlpd (%ecx), %xmm0
595 movlpd 6(%ecx), %xmm1
596 movlpd %xmm0, (%edx)
597 movlpd %xmm1, 6(%edx)
598 mov $14, %esi
599 jmp L(CopyFrom1To16Bytes)
600
601 .p2align 4
602 L(Shl3):
603 movaps -3(%ecx), %xmm1
604 movaps 13(%ecx), %xmm2
605 L(Shl3Start):
606 pcmpeqb %xmm2, %xmm0
607 pmovmskb %xmm0, %eax
608 movaps %xmm2, %xmm3
609 # ifdef USE_AS_STRNCPY
610 sub $16, %ebx
611 jbe L(StrncpyExit3Case2OrCase3)
612 # endif
613 test %eax, %eax
614 jnz L(Shl3LoopExit)
615
616 palignr $3, %xmm1, %xmm2
617 movaps %xmm3, %xmm1
618 movaps %xmm2, (%edx)
619 movaps 29(%ecx), %xmm2
620
621 pcmpeqb %xmm2, %xmm0
622 lea 16(%edx), %edx
623 pmovmskb %xmm0, %eax
624 lea 16(%ecx), %ecx
625 movaps %xmm2, %xmm3
626 # ifdef USE_AS_STRNCPY
627 sub $16, %ebx
628 jbe L(StrncpyExit3Case2OrCase3)
629 # endif
630 test %eax, %eax
631 jnz L(Shl3LoopExit)
632
633 palignr $3, %xmm1, %xmm2
634 movaps %xmm2, (%edx)
635 movaps 29(%ecx), %xmm2
636 movaps %xmm3, %xmm1
637
638 pcmpeqb %xmm2, %xmm0
639 lea 16(%edx), %edx
640 pmovmskb %xmm0, %eax
641 lea 16(%ecx), %ecx
642 movaps %xmm2, %xmm3
643 # ifdef USE_AS_STRNCPY
644 sub $16, %ebx
645 jbe L(StrncpyExit3Case2OrCase3)
646 # endif
647 test %eax, %eax
648 jnz L(Shl3LoopExit)
649
650 palignr $3, %xmm1, %xmm2
651 movaps %xmm2, (%edx)
652 movaps 29(%ecx), %xmm2
653
654 pcmpeqb %xmm2, %xmm0
655 lea 16(%edx), %edx
656 pmovmskb %xmm0, %eax
657 lea 16(%ecx), %ecx
658 # ifdef USE_AS_STRNCPY
659 sub $16, %ebx
660 jbe L(StrncpyExit3Case2OrCase3)
661 # endif
662 test %eax, %eax
663 jnz L(Shl3LoopExit)
664
665 palignr $3, %xmm3, %xmm2
666 movaps %xmm2, (%edx)
667 lea 29(%ecx), %ecx
668 lea 16(%edx), %edx
669
670 mov %ecx, %eax
671 and $-0x40, %ecx
672 sub %ecx, %eax
673 lea -13(%ecx), %ecx
674 sub %eax, %edx
675 # ifdef USE_AS_STRNCPY
676 add %eax, %ebx
677 # endif
678 movaps -3(%ecx), %xmm1
679
680 L(Shl3LoopStart):
681 movaps 13(%ecx), %xmm2
682 movaps 29(%ecx), %xmm3
683 movaps %xmm3, %xmm6
684 movaps 45(%ecx), %xmm4
685 movaps %xmm4, %xmm7
686 movaps 61(%ecx), %xmm5
687 pminub %xmm2, %xmm6
688 pminub %xmm5, %xmm7
689 pminub %xmm6, %xmm7
690 pcmpeqb %xmm0, %xmm7
691 pmovmskb %xmm7, %eax
692 movaps %xmm5, %xmm7
693 palignr $3, %xmm4, %xmm5
694 test %eax, %eax
695 palignr $3, %xmm3, %xmm4
696 jnz L(Shl3Start)
697 # ifdef USE_AS_STRNCPY
698 sub $64, %ebx
699 jbe L(StrncpyLeave3)
700 # endif
701 palignr $3, %xmm2, %xmm3
702 lea 64(%ecx), %ecx
703 palignr $3, %xmm1, %xmm2
704 movaps %xmm7, %xmm1
705 movaps %xmm5, 48(%edx)
706 movaps %xmm4, 32(%edx)
707 movaps %xmm3, 16(%edx)
708 movaps %xmm2, (%edx)
709 lea 64(%edx), %edx
710 jmp L(Shl3LoopStart)
711
712 L(Shl3LoopExit):
713 movlpd (%ecx), %xmm0
714 movlpd 5(%ecx), %xmm1
715 movlpd %xmm0, (%edx)
716 movlpd %xmm1, 5(%edx)
717 mov $13, %esi
718 jmp L(CopyFrom1To16Bytes)
719
720 .p2align 4
721 L(Shl4):
722 movaps -4(%ecx), %xmm1
723 movaps 12(%ecx), %xmm2
724 L(Shl4Start):
725 pcmpeqb %xmm2, %xmm0
726 pmovmskb %xmm0, %eax
727 movaps %xmm2, %xmm3
728 # ifdef USE_AS_STRNCPY
729 sub $16, %ebx
730 jbe L(StrncpyExit4Case2OrCase3)
731 # endif
732 test %eax, %eax
733 jnz L(Shl4LoopExit)
734
735 palignr $4, %xmm1, %xmm2
736 movaps %xmm3, %xmm1
737 movaps %xmm2, (%edx)
738 movaps 28(%ecx), %xmm2
739
740 pcmpeqb %xmm2, %xmm0
741 lea 16(%edx), %edx
742 pmovmskb %xmm0, %eax
743 lea 16(%ecx), %ecx
744 movaps %xmm2, %xmm3
745 # ifdef USE_AS_STRNCPY
746 sub $16, %ebx
747 jbe L(StrncpyExit4Case2OrCase3)
748 # endif
749 test %eax, %eax
750 jnz L(Shl4LoopExit)
751
752 palignr $4, %xmm1, %xmm2
753 movaps %xmm2, (%edx)
754 movaps 28(%ecx), %xmm2
755 movaps %xmm3, %xmm1
756
757 pcmpeqb %xmm2, %xmm0
758 lea 16(%edx), %edx
759 pmovmskb %xmm0, %eax
760 lea 16(%ecx), %ecx
761 movaps %xmm2, %xmm3
762 # ifdef USE_AS_STRNCPY
763 sub $16, %ebx
764 jbe L(StrncpyExit4Case2OrCase3)
765 # endif
766 test %eax, %eax
767 jnz L(Shl4LoopExit)
768
769 palignr $4, %xmm1, %xmm2
770 movaps %xmm2, (%edx)
771 movaps 28(%ecx), %xmm2
772
773 pcmpeqb %xmm2, %xmm0
774 lea 16(%edx), %edx
775 pmovmskb %xmm0, %eax
776 lea 16(%ecx), %ecx
777 # ifdef USE_AS_STRNCPY
778 sub $16, %ebx
779 jbe L(StrncpyExit4Case2OrCase3)
780 # endif
781 test %eax, %eax
782 jnz L(Shl4LoopExit)
783
784 palignr $4, %xmm3, %xmm2
785 movaps %xmm2, (%edx)
786 lea 28(%ecx), %ecx
787 lea 16(%edx), %edx
788
789 mov %ecx, %eax
790 and $-0x40, %ecx
791 sub %ecx, %eax
792 lea -12(%ecx), %ecx
793 sub %eax, %edx
794 # ifdef USE_AS_STRNCPY
795 add %eax, %ebx
796 # endif
797 movaps -4(%ecx), %xmm1
798
799 L(Shl4LoopStart):
800 movaps 12(%ecx), %xmm2
801 movaps 28(%ecx), %xmm3
802 movaps %xmm3, %xmm6
803 movaps 44(%ecx), %xmm4
804 movaps %xmm4, %xmm7
805 movaps 60(%ecx), %xmm5
806 pminub %xmm2, %xmm6
807 pminub %xmm5, %xmm7
808 pminub %xmm6, %xmm7
809 pcmpeqb %xmm0, %xmm7
810 pmovmskb %xmm7, %eax
811 movaps %xmm5, %xmm7
812 palignr $4, %xmm4, %xmm5
813 test %eax, %eax
814 palignr $4, %xmm3, %xmm4
815 jnz L(Shl4Start)
816 # ifdef USE_AS_STRNCPY
817 sub $64, %ebx
818 jbe L(StrncpyLeave4)
819 # endif
820 palignr $4, %xmm2, %xmm3
821 lea 64(%ecx), %ecx
822 palignr $4, %xmm1, %xmm2
823 movaps %xmm7, %xmm1
824 movaps %xmm5, 48(%edx)
825 movaps %xmm4, 32(%edx)
826 movaps %xmm3, 16(%edx)
827 movaps %xmm2, (%edx)
828 lea 64(%edx), %edx
829 jmp L(Shl4LoopStart)
830
831 L(Shl4LoopExit):
832 movlpd (%ecx), %xmm0
833 movl 8(%ecx), %esi
834 movlpd %xmm0, (%edx)
835 movl %esi, 8(%edx)
836 mov $12, %esi
837 jmp L(CopyFrom1To16Bytes)
838
839 .p2align 4
840 L(Shl5):
841 movaps -5(%ecx), %xmm1
842 movaps 11(%ecx), %xmm2
843 L(Shl5Start):
844 pcmpeqb %xmm2, %xmm0
845 pmovmskb %xmm0, %eax
846 movaps %xmm2, %xmm3
847 # ifdef USE_AS_STRNCPY
848 sub $16, %ebx
849 jbe L(StrncpyExit5Case2OrCase3)
850 # endif
851 test %eax, %eax
852 jnz L(Shl5LoopExit)
853
854 palignr $5, %xmm1, %xmm2
855 movaps %xmm3, %xmm1
856 movaps %xmm2, (%edx)
857 movaps 27(%ecx), %xmm2
858
859 pcmpeqb %xmm2, %xmm0
860 lea 16(%edx), %edx
861 pmovmskb %xmm0, %eax
862 lea 16(%ecx), %ecx
863 movaps %xmm2, %xmm3
864 # ifdef USE_AS_STRNCPY
865 sub $16, %ebx
866 jbe L(StrncpyExit5Case2OrCase3)
867 # endif
868 test %eax, %eax
869 jnz L(Shl5LoopExit)
870
871 palignr $5, %xmm1, %xmm2
872 movaps %xmm2, (%edx)
873 movaps 27(%ecx), %xmm2
874 movaps %xmm3, %xmm1
875
876 pcmpeqb %xmm2, %xmm0
877 lea 16(%edx), %edx
878 pmovmskb %xmm0, %eax
879 lea 16(%ecx), %ecx
880 movaps %xmm2, %xmm3
881 # ifdef USE_AS_STRNCPY
882 sub $16, %ebx
883 jbe L(StrncpyExit5Case2OrCase3)
884 # endif
885 test %eax, %eax
886 jnz L(Shl5LoopExit)
887
888 palignr $5, %xmm1, %xmm2
889 movaps %xmm2, (%edx)
890 movaps 27(%ecx), %xmm2
891
892 pcmpeqb %xmm2, %xmm0
893 lea 16(%edx), %edx
894 pmovmskb %xmm0, %eax
895 lea 16(%ecx), %ecx
896 # ifdef USE_AS_STRNCPY
897 sub $16, %ebx
898 jbe L(StrncpyExit5Case2OrCase3)
899 # endif
900 test %eax, %eax
901 jnz L(Shl5LoopExit)
902
903 palignr $5, %xmm3, %xmm2
904 movaps %xmm2, (%edx)
905 lea 27(%ecx), %ecx
906 lea 16(%edx), %edx
907
908 mov %ecx, %eax
909 and $-0x40, %ecx
910 sub %ecx, %eax
911 lea -11(%ecx), %ecx
912 sub %eax, %edx
913 # ifdef USE_AS_STRNCPY
914 add %eax, %ebx
915 # endif
916 movaps -5(%ecx), %xmm1
917
918 L(Shl5LoopStart):
919 movaps 11(%ecx), %xmm2
920 movaps 27(%ecx), %xmm3
921 movaps %xmm3, %xmm6
922 movaps 43(%ecx), %xmm4
923 movaps %xmm4, %xmm7
924 movaps 59(%ecx), %xmm5
925 pminub %xmm2, %xmm6
926 pminub %xmm5, %xmm7
927 pminub %xmm6, %xmm7
928 pcmpeqb %xmm0, %xmm7
929 pmovmskb %xmm7, %eax
930 movaps %xmm5, %xmm7
931 palignr $5, %xmm4, %xmm5
932 test %eax, %eax
933 palignr $5, %xmm3, %xmm4
934 jnz L(Shl5Start)
935 # ifdef USE_AS_STRNCPY
936 sub $64, %ebx
937 jbe L(StrncpyLeave5)
938 # endif
939 palignr $5, %xmm2, %xmm3
940 lea 64(%ecx), %ecx
941 palignr $5, %xmm1, %xmm2
942 movaps %xmm7, %xmm1
943 movaps %xmm5, 48(%edx)
944 movaps %xmm4, 32(%edx)
945 movaps %xmm3, 16(%edx)
946 movaps %xmm2, (%edx)
947 lea 64(%edx), %edx
948 jmp L(Shl5LoopStart)
949
950 L(Shl5LoopExit):
951 movlpd (%ecx), %xmm0
952 movl 7(%ecx), %esi
953 movlpd %xmm0, (%edx)
954 movl %esi, 7(%edx)
955 mov $11, %esi
956 jmp L(CopyFrom1To16Bytes)
957
958 .p2align 4
959 L(Shl6):
960 movaps -6(%ecx), %xmm1
961 movaps 10(%ecx), %xmm2
962 L(Shl6Start):
963 pcmpeqb %xmm2, %xmm0
964 pmovmskb %xmm0, %eax
965 movaps %xmm2, %xmm3
966 # ifdef USE_AS_STRNCPY
967 sub $16, %ebx
968 jbe L(StrncpyExit6Case2OrCase3)
969 # endif
970 test %eax, %eax
971 jnz L(Shl6LoopExit)
972
973 palignr $6, %xmm1, %xmm2
974 movaps %xmm3, %xmm1
975 movaps %xmm2, (%edx)
976 movaps 26(%ecx), %xmm2
977
978 pcmpeqb %xmm2, %xmm0
979 lea 16(%edx), %edx
980 pmovmskb %xmm0, %eax
981 lea 16(%ecx), %ecx
982 movaps %xmm2, %xmm3
983 # ifdef USE_AS_STRNCPY
984 sub $16, %ebx
985 jbe L(StrncpyExit6Case2OrCase3)
986 # endif
987 test %eax, %eax
988 jnz L(Shl6LoopExit)
989
990 palignr $6, %xmm1, %xmm2
991 movaps %xmm2, (%edx)
992 movaps 26(%ecx), %xmm2
993 movaps %xmm3, %xmm1
994
995 pcmpeqb %xmm2, %xmm0
996 lea 16(%edx), %edx
997 pmovmskb %xmm0, %eax
998 lea 16(%ecx), %ecx
999 movaps %xmm2, %xmm3
1000 # ifdef USE_AS_STRNCPY
1001 sub $16, %ebx
1002 jbe L(StrncpyExit6Case2OrCase3)
1003 # endif
1004 test %eax, %eax
1005 jnz L(Shl6LoopExit)
1006
1007 palignr $6, %xmm1, %xmm2
1008 movaps %xmm2, (%edx)
1009 movaps 26(%ecx), %xmm2
1010
1011 pcmpeqb %xmm2, %xmm0
1012 lea 16(%edx), %edx
1013 pmovmskb %xmm0, %eax
1014 lea 16(%ecx), %ecx
1015 # ifdef USE_AS_STRNCPY
1016 sub $16, %ebx
1017 jbe L(StrncpyExit6Case2OrCase3)
1018 # endif
1019 test %eax, %eax
1020 jnz L(Shl6LoopExit)
1021
1022 palignr $6, %xmm3, %xmm2
1023 movaps %xmm2, (%edx)
1024 lea 26(%ecx), %ecx
1025 lea 16(%edx), %edx
1026
1027 mov %ecx, %eax
1028 and $-0x40, %ecx
1029 sub %ecx, %eax
1030 lea -10(%ecx), %ecx
1031 sub %eax, %edx
1032 # ifdef USE_AS_STRNCPY
1033 add %eax, %ebx
1034 # endif
1035 movaps -6(%ecx), %xmm1
1036
1037 L(Shl6LoopStart):
1038 movaps 10(%ecx), %xmm2
1039 movaps 26(%ecx), %xmm3
1040 movaps %xmm3, %xmm6
1041 movaps 42(%ecx), %xmm4
1042 movaps %xmm4, %xmm7
1043 movaps 58(%ecx), %xmm5
1044 pminub %xmm2, %xmm6
1045 pminub %xmm5, %xmm7
1046 pminub %xmm6, %xmm7
1047 pcmpeqb %xmm0, %xmm7
1048 pmovmskb %xmm7, %eax
1049 movaps %xmm5, %xmm7
1050 palignr $6, %xmm4, %xmm5
1051 test %eax, %eax
1052 palignr $6, %xmm3, %xmm4
1053 jnz L(Shl6Start)
1054 # ifdef USE_AS_STRNCPY
1055 sub $64, %ebx
1056 jbe L(StrncpyLeave6)
1057 # endif
1058 palignr $6, %xmm2, %xmm3
1059 lea 64(%ecx), %ecx
1060 palignr $6, %xmm1, %xmm2
1061 movaps %xmm7, %xmm1
1062 movaps %xmm5, 48(%edx)
1063 movaps %xmm4, 32(%edx)
1064 movaps %xmm3, 16(%edx)
1065 movaps %xmm2, (%edx)
1066 lea 64(%edx), %edx
1067 jmp L(Shl6LoopStart)
1068
1069 L(Shl6LoopExit):
1070 movlpd (%ecx), %xmm0
1071 movl 6(%ecx), %esi
1072 movlpd %xmm0, (%edx)
1073 movl %esi, 6(%edx)
1074 mov $10, %esi
1075 jmp L(CopyFrom1To16Bytes)
1076
1077 .p2align 4
1078 L(Shl7):
1079 movaps -7(%ecx), %xmm1
1080 movaps 9(%ecx), %xmm2
1081 L(Shl7Start):
1082 pcmpeqb %xmm2, %xmm0
1083 pmovmskb %xmm0, %eax
1084 movaps %xmm2, %xmm3
1085 # ifdef USE_AS_STRNCPY
1086 sub $16, %ebx
1087 jbe L(StrncpyExit7Case2OrCase3)
1088 # endif
1089 test %eax, %eax
1090 jnz L(Shl7LoopExit)
1091
1092 palignr $7, %xmm1, %xmm2
1093 movaps %xmm3, %xmm1
1094 movaps %xmm2, (%edx)
1095 movaps 25(%ecx), %xmm2
1096
1097 pcmpeqb %xmm2, %xmm0
1098 lea 16(%edx), %edx
1099 pmovmskb %xmm0, %eax
1100 lea 16(%ecx), %ecx
1101 movaps %xmm2, %xmm3
1102 # ifdef USE_AS_STRNCPY
1103 sub $16, %ebx
1104 jbe L(StrncpyExit7Case2OrCase3)
1105 # endif
1106 test %eax, %eax
1107 jnz L(Shl7LoopExit)
1108
1109 palignr $7, %xmm1, %xmm2
1110 movaps %xmm2, (%edx)
1111 movaps 25(%ecx), %xmm2
1112 movaps %xmm3, %xmm1
1113
1114 pcmpeqb %xmm2, %xmm0
1115 lea 16(%edx), %edx
1116 pmovmskb %xmm0, %eax
1117 lea 16(%ecx), %ecx
1118 movaps %xmm2, %xmm3
1119 # ifdef USE_AS_STRNCPY
1120 sub $16, %ebx
1121 jbe L(StrncpyExit7Case2OrCase3)
1122 # endif
1123 test %eax, %eax
1124 jnz L(Shl7LoopExit)
1125
1126 palignr $7, %xmm1, %xmm2
1127 movaps %xmm2, (%edx)
1128 movaps 25(%ecx), %xmm2
1129
1130 pcmpeqb %xmm2, %xmm0
1131 lea 16(%edx), %edx
1132 pmovmskb %xmm0, %eax
1133 lea 16(%ecx), %ecx
1134 # ifdef USE_AS_STRNCPY
1135 sub $16, %ebx
1136 jbe L(StrncpyExit7Case2OrCase3)
1137 # endif
1138 test %eax, %eax
1139 jnz L(Shl7LoopExit)
1140
1141 palignr $7, %xmm3, %xmm2
1142 movaps %xmm2, (%edx)
1143 lea 25(%ecx), %ecx
1144 lea 16(%edx), %edx
1145
1146 mov %ecx, %eax
1147 and $-0x40, %ecx
1148 sub %ecx, %eax
1149 lea -9(%ecx), %ecx
1150 sub %eax, %edx
1151 # ifdef USE_AS_STRNCPY
1152 add %eax, %ebx
1153 # endif
1154 movaps -7(%ecx), %xmm1
1155
1156 L(Shl7LoopStart):
1157 movaps 9(%ecx), %xmm2
1158 movaps 25(%ecx), %xmm3
1159 movaps %xmm3, %xmm6
1160 movaps 41(%ecx), %xmm4
1161 movaps %xmm4, %xmm7
1162 movaps 57(%ecx), %xmm5
1163 pminub %xmm2, %xmm6
1164 pminub %xmm5, %xmm7
1165 pminub %xmm6, %xmm7
1166 pcmpeqb %xmm0, %xmm7
1167 pmovmskb %xmm7, %eax
1168 movaps %xmm5, %xmm7
1169 palignr $7, %xmm4, %xmm5
1170 test %eax, %eax
1171 palignr $7, %xmm3, %xmm4
1172 jnz L(Shl7Start)
1173 # ifdef USE_AS_STRNCPY
1174 sub $64, %ebx
1175 jbe L(StrncpyLeave7)
1176 # endif
1177 palignr $7, %xmm2, %xmm3
1178 lea 64(%ecx), %ecx
1179 palignr $7, %xmm1, %xmm2
1180 movaps %xmm7, %xmm1
1181 movaps %xmm5, 48(%edx)
1182 movaps %xmm4, 32(%edx)
1183 movaps %xmm3, 16(%edx)
1184 movaps %xmm2, (%edx)
1185 lea 64(%edx), %edx
1186 jmp L(Shl7LoopStart)
1187
1188 L(Shl7LoopExit):
1189 movlpd (%ecx), %xmm0
1190 movl 5(%ecx), %esi
1191 movlpd %xmm0, (%edx)
1192 movl %esi, 5(%edx)
1193 mov $9, %esi
1194 jmp L(CopyFrom1To16Bytes)
1195
1196 .p2align 4
1197 L(Shl8):
1198 movaps -8(%ecx), %xmm1
1199 movaps 8(%ecx), %xmm2
1200 L(Shl8Start):
1201 pcmpeqb %xmm2, %xmm0
1202 pmovmskb %xmm0, %eax
1203 movaps %xmm2, %xmm3
1204 # ifdef USE_AS_STRNCPY
1205 sub $16, %ebx
1206 jbe L(StrncpyExit8Case2OrCase3)
1207 # endif
1208 test %eax, %eax
1209 jnz L(Shl8LoopExit)
1210
1211 palignr $8, %xmm1, %xmm2
1212 movaps %xmm3, %xmm1
1213 movaps %xmm2, (%edx)
1214 movaps 24(%ecx), %xmm2
1215
1216 pcmpeqb %xmm2, %xmm0
1217 lea 16(%edx), %edx
1218 pmovmskb %xmm0, %eax
1219 lea 16(%ecx), %ecx
1220 movaps %xmm2, %xmm3
1221 # ifdef USE_AS_STRNCPY
1222 sub $16, %ebx
1223 jbe L(StrncpyExit8Case2OrCase3)
1224 # endif
1225 test %eax, %eax
1226 jnz L(Shl8LoopExit)
1227
1228 palignr $8, %xmm1, %xmm2
1229 movaps %xmm2, (%edx)
1230 movaps 24(%ecx), %xmm2
1231 movaps %xmm3, %xmm1
1232
1233 pcmpeqb %xmm2, %xmm0
1234 lea 16(%edx), %edx
1235 pmovmskb %xmm0, %eax
1236 lea 16(%ecx), %ecx
1237 movaps %xmm2, %xmm3
1238 # ifdef USE_AS_STRNCPY
1239 sub $16, %ebx
1240 jbe L(StrncpyExit8Case2OrCase3)
1241 # endif
1242 test %eax, %eax
1243 jnz L(Shl8LoopExit)
1244
1245 palignr $8, %xmm1, %xmm2
1246 movaps %xmm2, (%edx)
1247 movaps 24(%ecx), %xmm2
1248
1249 pcmpeqb %xmm2, %xmm0
1250 lea 16(%edx), %edx
1251 pmovmskb %xmm0, %eax
1252 lea 16(%ecx), %ecx
1253 # ifdef USE_AS_STRNCPY
1254 sub $16, %ebx
1255 jbe L(StrncpyExit8Case2OrCase3)
1256 # endif
1257 test %eax, %eax
1258 jnz L(Shl8LoopExit)
1259
1260 palignr $8, %xmm3, %xmm2
1261 movaps %xmm2, (%edx)
1262 lea 24(%ecx), %ecx
1263 lea 16(%edx), %edx
1264
1265 mov %ecx, %eax
1266 and $-0x40, %ecx
1267 sub %ecx, %eax
1268 lea -8(%ecx), %ecx
1269 sub %eax, %edx
1270 # ifdef USE_AS_STRNCPY
1271 add %eax, %ebx
1272 # endif
1273 movaps -8(%ecx), %xmm1
1274
1275 L(Shl8LoopStart):
1276 movaps 8(%ecx), %xmm2
1277 movaps 24(%ecx), %xmm3
1278 movaps %xmm3, %xmm6
1279 movaps 40(%ecx), %xmm4
1280 movaps %xmm4, %xmm7
1281 movaps 56(%ecx), %xmm5
1282 pminub %xmm2, %xmm6
1283 pminub %xmm5, %xmm7
1284 pminub %xmm6, %xmm7
1285 pcmpeqb %xmm0, %xmm7
1286 pmovmskb %xmm7, %eax
1287 movaps %xmm5, %xmm7
1288 palignr $8, %xmm4, %xmm5
1289 test %eax, %eax
1290 palignr $8, %xmm3, %xmm4
1291 jnz L(Shl8Start)
1292 # ifdef USE_AS_STRNCPY
1293 sub $64, %ebx
1294 jbe L(StrncpyLeave8)
1295 # endif
1296 palignr $8, %xmm2, %xmm3
1297 lea 64(%ecx), %ecx
1298 palignr $8, %xmm1, %xmm2
1299 movaps %xmm7, %xmm1
1300 movaps %xmm5, 48(%edx)
1301 movaps %xmm4, 32(%edx)
1302 movaps %xmm3, 16(%edx)
1303 movaps %xmm2, (%edx)
1304 lea 64(%edx), %edx
1305 jmp L(Shl8LoopStart)
1306
1307 L(Shl8LoopExit):
1308 movlpd (%ecx), %xmm0
1309 movlpd %xmm0, (%edx)
1310 mov $8, %esi
1311 jmp L(CopyFrom1To16Bytes)
1312
1313 .p2align 4
1314 L(Shl9):
1315 movaps -9(%ecx), %xmm1
1316 movaps 7(%ecx), %xmm2
1317 L(Shl9Start):
1318 pcmpeqb %xmm2, %xmm0
1319 pmovmskb %xmm0, %eax
1320 movaps %xmm2, %xmm3
1321 # ifdef USE_AS_STRNCPY
1322 sub $16, %ebx
1323 jbe L(StrncpyExit9Case2OrCase3)
1324 # endif
1325 test %eax, %eax
1326 jnz L(Shl9LoopExit)
1327
1328 palignr $9, %xmm1, %xmm2
1329 movaps %xmm3, %xmm1
1330 movaps %xmm2, (%edx)
1331 movaps 23(%ecx), %xmm2
1332
1333 pcmpeqb %xmm2, %xmm0
1334 lea 16(%edx), %edx
1335 pmovmskb %xmm0, %eax
1336 lea 16(%ecx), %ecx
1337 movaps %xmm2, %xmm3
1338 # ifdef USE_AS_STRNCPY
1339 sub $16, %ebx
1340 jbe L(StrncpyExit9Case2OrCase3)
1341 # endif
1342 test %eax, %eax
1343 jnz L(Shl9LoopExit)
1344
1345 palignr $9, %xmm1, %xmm2
1346 movaps %xmm2, (%edx)
1347 movaps 23(%ecx), %xmm2
1348 movaps %xmm3, %xmm1
1349
1350 pcmpeqb %xmm2, %xmm0
1351 lea 16(%edx), %edx
1352 pmovmskb %xmm0, %eax
1353 lea 16(%ecx), %ecx
1354 movaps %xmm2, %xmm3
1355 # ifdef USE_AS_STRNCPY
1356 sub $16, %ebx
1357 jbe L(StrncpyExit9Case2OrCase3)
1358 # endif
1359 test %eax, %eax
1360 jnz L(Shl9LoopExit)
1361
1362 palignr $9, %xmm1, %xmm2
1363 movaps %xmm2, (%edx)
1364 movaps 23(%ecx), %xmm2
1365
1366 pcmpeqb %xmm2, %xmm0
1367 lea 16(%edx), %edx
1368 pmovmskb %xmm0, %eax
1369 lea 16(%ecx), %ecx
1370 # ifdef USE_AS_STRNCPY
1371 sub $16, %ebx
1372 jbe L(StrncpyExit9Case2OrCase3)
1373 # endif
1374 test %eax, %eax
1375 jnz L(Shl9LoopExit)
1376
1377 palignr $9, %xmm3, %xmm2
1378 movaps %xmm2, (%edx)
1379 lea 23(%ecx), %ecx
1380 lea 16(%edx), %edx
1381
1382 mov %ecx, %eax
1383 and $-0x40, %ecx
1384 sub %ecx, %eax
1385 lea -7(%ecx), %ecx
1386 sub %eax, %edx
1387 # ifdef USE_AS_STRNCPY
1388 add %eax, %ebx
1389 # endif
1390 movaps -9(%ecx), %xmm1
1391
1392 L(Shl9LoopStart):
1393 movaps 7(%ecx), %xmm2
1394 movaps 23(%ecx), %xmm3
1395 movaps %xmm3, %xmm6
1396 movaps 39(%ecx), %xmm4
1397 movaps %xmm4, %xmm7
1398 movaps 55(%ecx), %xmm5
1399 pminub %xmm2, %xmm6
1400 pminub %xmm5, %xmm7
1401 pminub %xmm6, %xmm7
1402 pcmpeqb %xmm0, %xmm7
1403 pmovmskb %xmm7, %eax
1404 movaps %xmm5, %xmm7
1405 palignr $9, %xmm4, %xmm5
1406 test %eax, %eax
1407 palignr $9, %xmm3, %xmm4
1408 jnz L(Shl9Start)
1409 # ifdef USE_AS_STRNCPY
1410 sub $64, %ebx
1411 jbe L(StrncpyLeave9)
1412 # endif
1413 palignr $9, %xmm2, %xmm3
1414 lea 64(%ecx), %ecx
1415 palignr $9, %xmm1, %xmm2
1416 movaps %xmm7, %xmm1
1417 movaps %xmm5, 48(%edx)
1418 movaps %xmm4, 32(%edx)
1419 movaps %xmm3, 16(%edx)
1420 movaps %xmm2, (%edx)
1421 lea 64(%edx), %edx
1422 jmp L(Shl9LoopStart)
1423
1424 L(Shl9LoopExit):
1425 movlpd -1(%ecx), %xmm0
1426 movlpd %xmm0, -1(%edx)
1427 mov $7, %esi
1428 jmp L(CopyFrom1To16Bytes)
1429
1430 .p2align 4
1431 L(Shl10):
1432 movaps -10(%ecx), %xmm1
1433 movaps 6(%ecx), %xmm2
1434 L(Shl10Start):
1435 pcmpeqb %xmm2, %xmm0
1436 pmovmskb %xmm0, %eax
1437 movaps %xmm2, %xmm3
1438 # ifdef USE_AS_STRNCPY
1439 sub $16, %ebx
1440 jbe L(StrncpyExit10Case2OrCase3)
1441 # endif
1442 test %eax, %eax
1443 jnz L(Shl10LoopExit)
1444
1445 palignr $10, %xmm1, %xmm2
1446 movaps %xmm3, %xmm1
1447 movaps %xmm2, (%edx)
1448 movaps 22(%ecx), %xmm2
1449
1450 pcmpeqb %xmm2, %xmm0
1451 lea 16(%edx), %edx
1452 pmovmskb %xmm0, %eax
1453 lea 16(%ecx), %ecx
1454 movaps %xmm2, %xmm3
1455 # ifdef USE_AS_STRNCPY
1456 sub $16, %ebx
1457 jbe L(StrncpyExit10Case2OrCase3)
1458 # endif
1459 test %eax, %eax
1460 jnz L(Shl10LoopExit)
1461
1462 palignr $10, %xmm1, %xmm2
1463 movaps %xmm2, (%edx)
1464 movaps 22(%ecx), %xmm2
1465 movaps %xmm3, %xmm1
1466
1467 pcmpeqb %xmm2, %xmm0
1468 lea 16(%edx), %edx
1469 pmovmskb %xmm0, %eax
1470 lea 16(%ecx), %ecx
1471 movaps %xmm2, %xmm3
1472 # ifdef USE_AS_STRNCPY
1473 sub $16, %ebx
1474 jbe L(StrncpyExit10Case2OrCase3)
1475 # endif
1476 test %eax, %eax
1477 jnz L(Shl10LoopExit)
1478
1479 palignr $10, %xmm1, %xmm2
1480 movaps %xmm2, (%edx)
1481 movaps 22(%ecx), %xmm2
1482
1483 pcmpeqb %xmm2, %xmm0
1484 lea 16(%edx), %edx
1485 pmovmskb %xmm0, %eax
1486 lea 16(%ecx), %ecx
1487 # ifdef USE_AS_STRNCPY
1488 sub $16, %ebx
1489 jbe L(StrncpyExit10Case2OrCase3)
1490 # endif
1491 test %eax, %eax
1492 jnz L(Shl10LoopExit)
1493
1494 palignr $10, %xmm3, %xmm2
1495 movaps %xmm2, (%edx)
1496 lea 22(%ecx), %ecx
1497 lea 16(%edx), %edx
1498
1499 mov %ecx, %eax
1500 and $-0x40, %ecx
1501 sub %ecx, %eax
1502 lea -6(%ecx), %ecx
1503 sub %eax, %edx
1504 # ifdef USE_AS_STRNCPY
1505 add %eax, %ebx
1506 # endif
1507 movaps -10(%ecx), %xmm1
1508
1509 L(Shl10LoopStart):
1510 movaps 6(%ecx), %xmm2
1511 movaps 22(%ecx), %xmm3
1512 movaps %xmm3, %xmm6
1513 movaps 38(%ecx), %xmm4
1514 movaps %xmm4, %xmm7
1515 movaps 54(%ecx), %xmm5
1516 pminub %xmm2, %xmm6
1517 pminub %xmm5, %xmm7
1518 pminub %xmm6, %xmm7
1519 pcmpeqb %xmm0, %xmm7
1520 pmovmskb %xmm7, %eax
1521 movaps %xmm5, %xmm7
1522 palignr $10, %xmm4, %xmm5
1523 test %eax, %eax
1524 palignr $10, %xmm3, %xmm4
1525 jnz L(Shl10Start)
1526 # ifdef USE_AS_STRNCPY
1527 sub $64, %ebx
1528 jbe L(StrncpyLeave10)
1529 # endif
1530 palignr $10, %xmm2, %xmm3
1531 lea 64(%ecx), %ecx
1532 palignr $10, %xmm1, %xmm2
1533 movaps %xmm7, %xmm1
1534 movaps %xmm5, 48(%edx)
1535 movaps %xmm4, 32(%edx)
1536 movaps %xmm3, 16(%edx)
1537 movaps %xmm2, (%edx)
1538 lea 64(%edx), %edx
1539 jmp L(Shl10LoopStart)
1540
1541 L(Shl10LoopExit):
1542 movlpd -2(%ecx), %xmm0
1543 movlpd %xmm0, -2(%edx)
1544 mov $6, %esi
1545 jmp L(CopyFrom1To16Bytes)
1546
1547 .p2align 4
1548 L(Shl11):
1549 movaps -11(%ecx), %xmm1
1550 movaps 5(%ecx), %xmm2
1551 L(Shl11Start):
1552 pcmpeqb %xmm2, %xmm0
1553 pmovmskb %xmm0, %eax
1554 movaps %xmm2, %xmm3
1555 # ifdef USE_AS_STRNCPY
1556 sub $16, %ebx
1557 jbe L(StrncpyExit11Case2OrCase3)
1558 # endif
1559 test %eax, %eax
1560 jnz L(Shl11LoopExit)
1561
1562 palignr $11, %xmm1, %xmm2
1563 movaps %xmm3, %xmm1
1564 movaps %xmm2, (%edx)
1565 movaps 21(%ecx), %xmm2
1566
1567 pcmpeqb %xmm2, %xmm0
1568 lea 16(%edx), %edx
1569 pmovmskb %xmm0, %eax
1570 lea 16(%ecx), %ecx
1571 movaps %xmm2, %xmm3
1572 # ifdef USE_AS_STRNCPY
1573 sub $16, %ebx
1574 jbe L(StrncpyExit11Case2OrCase3)
1575 # endif
1576 test %eax, %eax
1577 jnz L(Shl11LoopExit)
1578
1579 palignr $11, %xmm1, %xmm2
1580 movaps %xmm2, (%edx)
1581 movaps 21(%ecx), %xmm2
1582 movaps %xmm3, %xmm1
1583
1584 pcmpeqb %xmm2, %xmm0
1585 lea 16(%edx), %edx
1586 pmovmskb %xmm0, %eax
1587 lea 16(%ecx), %ecx
1588 movaps %xmm2, %xmm3
1589 # ifdef USE_AS_STRNCPY
1590 sub $16, %ebx
1591 jbe L(StrncpyExit11Case2OrCase3)
1592 # endif
1593 test %eax, %eax
1594 jnz L(Shl11LoopExit)
1595
1596 palignr $11, %xmm1, %xmm2
1597 movaps %xmm2, (%edx)
1598 movaps 21(%ecx), %xmm2
1599
1600 pcmpeqb %xmm2, %xmm0
1601 lea 16(%edx), %edx
1602 pmovmskb %xmm0, %eax
1603 lea 16(%ecx), %ecx
1604 # ifdef USE_AS_STRNCPY
1605 sub $16, %ebx
1606 jbe L(StrncpyExit11Case2OrCase3)
1607 # endif
1608 test %eax, %eax
1609 jnz L(Shl11LoopExit)
1610
1611 palignr $11, %xmm3, %xmm2
1612 movaps %xmm2, (%edx)
1613 lea 21(%ecx), %ecx
1614 lea 16(%edx), %edx
1615
1616 mov %ecx, %eax
1617 and $-0x40, %ecx
1618 sub %ecx, %eax
1619 lea -5(%ecx), %ecx
1620 sub %eax, %edx
1621 # ifdef USE_AS_STRNCPY
1622 add %eax, %ebx
1623 # endif
1624 movaps -11(%ecx), %xmm1
1625
1626 L(Shl11LoopStart):
1627 movaps 5(%ecx), %xmm2
1628 movaps 21(%ecx), %xmm3
1629 movaps %xmm3, %xmm6
1630 movaps 37(%ecx), %xmm4
1631 movaps %xmm4, %xmm7
1632 movaps 53(%ecx), %xmm5
1633 pminub %xmm2, %xmm6
1634 pminub %xmm5, %xmm7
1635 pminub %xmm6, %xmm7
1636 pcmpeqb %xmm0, %xmm7
1637 pmovmskb %xmm7, %eax
1638 movaps %xmm5, %xmm7
1639 palignr $11, %xmm4, %xmm5
1640 test %eax, %eax
1641 palignr $11, %xmm3, %xmm4
1642 jnz L(Shl11Start)
1643 # ifdef USE_AS_STRNCPY
1644 sub $64, %ebx
1645 jbe L(StrncpyLeave11)
1646 # endif
1647 palignr $11, %xmm2, %xmm3
1648 lea 64(%ecx), %ecx
1649 palignr $11, %xmm1, %xmm2
1650 movaps %xmm7, %xmm1
1651 movaps %xmm5, 48(%edx)
1652 movaps %xmm4, 32(%edx)
1653 movaps %xmm3, 16(%edx)
1654 movaps %xmm2, (%edx)
1655 lea 64(%edx), %edx
1656 jmp L(Shl11LoopStart)
1657
1658 L(Shl11LoopExit):
1659 movlpd -3(%ecx), %xmm0
1660 movlpd %xmm0, -3(%edx)
1661 mov $5, %esi
1662 jmp L(CopyFrom1To16Bytes)
1663
1664 .p2align 4
1665 L(Shl12):
1666 movaps -12(%ecx), %xmm1
1667 movaps 4(%ecx), %xmm2
1668 L(Shl12Start):
1669 pcmpeqb %xmm2, %xmm0
1670 pmovmskb %xmm0, %eax
1671 movaps %xmm2, %xmm3
1672 # ifdef USE_AS_STRNCPY
1673 sub $16, %ebx
1674 jbe L(StrncpyExit12Case2OrCase3)
1675 # endif
1676 test %eax, %eax
1677 jnz L(Shl12LoopExit)
1678
1679 palignr $12, %xmm1, %xmm2
1680 movaps %xmm3, %xmm1
1681 movaps %xmm2, (%edx)
1682 movaps 20(%ecx), %xmm2
1683
1684 pcmpeqb %xmm2, %xmm0
1685 lea 16(%edx), %edx
1686 pmovmskb %xmm0, %eax
1687 lea 16(%ecx), %ecx
1688 movaps %xmm2, %xmm3
1689 # ifdef USE_AS_STRNCPY
1690 sub $16, %ebx
1691 jbe L(StrncpyExit12Case2OrCase3)
1692 # endif
1693 test %eax, %eax
1694 jnz L(Shl12LoopExit)
1695
1696 palignr $12, %xmm1, %xmm2
1697 movaps %xmm2, (%edx)
1698 movaps 20(%ecx), %xmm2
1699 movaps %xmm3, %xmm1
1700
1701 pcmpeqb %xmm2, %xmm0
1702 lea 16(%edx), %edx
1703 pmovmskb %xmm0, %eax
1704 lea 16(%ecx), %ecx
1705 movaps %xmm2, %xmm3
1706 # ifdef USE_AS_STRNCPY
1707 sub $16, %ebx
1708 jbe L(StrncpyExit12Case2OrCase3)
1709 # endif
1710 test %eax, %eax
1711 jnz L(Shl12LoopExit)
1712
1713 palignr $12, %xmm1, %xmm2
1714 movaps %xmm2, (%edx)
1715 movaps 20(%ecx), %xmm2
1716
1717 pcmpeqb %xmm2, %xmm0
1718 lea 16(%edx), %edx
1719 pmovmskb %xmm0, %eax
1720 lea 16(%ecx), %ecx
1721 # ifdef USE_AS_STRNCPY
1722 sub $16, %ebx
1723 jbe L(StrncpyExit12Case2OrCase3)
1724 # endif
1725 test %eax, %eax
1726 jnz L(Shl12LoopExit)
1727
1728 palignr $12, %xmm3, %xmm2
1729 movaps %xmm2, (%edx)
1730 lea 20(%ecx), %ecx
1731 lea 16(%edx), %edx
1732
1733 mov %ecx, %eax
1734 and $-0x40, %ecx
1735 sub %ecx, %eax
1736 lea -4(%ecx), %ecx
1737 sub %eax, %edx
1738 # ifdef USE_AS_STRNCPY
1739 add %eax, %ebx
1740 # endif
1741 movaps -12(%ecx), %xmm1
1742
1743 L(Shl12LoopStart):
1744 movaps 4(%ecx), %xmm2
1745 movaps 20(%ecx), %xmm3
1746 movaps %xmm3, %xmm6
1747 movaps 36(%ecx), %xmm4
1748 movaps %xmm4, %xmm7
1749 movaps 52(%ecx), %xmm5
1750 pminub %xmm2, %xmm6
1751 pminub %xmm5, %xmm7
1752 pminub %xmm6, %xmm7
1753 pcmpeqb %xmm0, %xmm7
1754 pmovmskb %xmm7, %eax
1755 movaps %xmm5, %xmm7
1756 palignr $12, %xmm4, %xmm5
1757 test %eax, %eax
1758 palignr $12, %xmm3, %xmm4
1759 jnz L(Shl12Start)
1760 # ifdef USE_AS_STRNCPY
1761 sub $64, %ebx
1762 jbe L(StrncpyLeave12)
1763 # endif
1764 palignr $12, %xmm2, %xmm3
1765 lea 64(%ecx), %ecx
1766 palignr $12, %xmm1, %xmm2
1767 movaps %xmm7, %xmm1
1768 movaps %xmm5, 48(%edx)
1769 movaps %xmm4, 32(%edx)
1770 movaps %xmm3, 16(%edx)
1771 movaps %xmm2, (%edx)
1772 lea 64(%edx), %edx
1773 jmp L(Shl12LoopStart)
1774
1775 L(Shl12LoopExit):
1776 movl (%ecx), %esi
1777 movl %esi, (%edx)
1778 mov $4, %esi
1779 jmp L(CopyFrom1To16Bytes)
1780
1781 .p2align 4
1782 L(Shl13):
1783 movaps -13(%ecx), %xmm1
1784 movaps 3(%ecx), %xmm2
1785 L(Shl13Start):
1786 pcmpeqb %xmm2, %xmm0
1787 pmovmskb %xmm0, %eax
1788 movaps %xmm2, %xmm3
1789 # ifdef USE_AS_STRNCPY
1790 sub $16, %ebx
1791 jbe L(StrncpyExit13Case2OrCase3)
1792 # endif
1793 test %eax, %eax
1794 jnz L(Shl13LoopExit)
1795
1796 palignr $13, %xmm1, %xmm2
1797 movaps %xmm3, %xmm1
1798 movaps %xmm2, (%edx)
1799 movaps 19(%ecx), %xmm2
1800
1801 pcmpeqb %xmm2, %xmm0
1802 lea 16(%edx), %edx
1803 pmovmskb %xmm0, %eax
1804 lea 16(%ecx), %ecx
1805 movaps %xmm2, %xmm3
1806 # ifdef USE_AS_STRNCPY
1807 sub $16, %ebx
1808 jbe L(StrncpyExit13Case2OrCase3)
1809 # endif
1810 test %eax, %eax
1811 jnz L(Shl13LoopExit)
1812
1813 palignr $13, %xmm1, %xmm2
1814 movaps %xmm2, (%edx)
1815 movaps 19(%ecx), %xmm2
1816 movaps %xmm3, %xmm1
1817
1818 pcmpeqb %xmm2, %xmm0
1819 lea 16(%edx), %edx
1820 pmovmskb %xmm0, %eax
1821 lea 16(%ecx), %ecx
1822 movaps %xmm2, %xmm3
1823 # ifdef USE_AS_STRNCPY
1824 sub $16, %ebx
1825 jbe L(StrncpyExit13Case2OrCase3)
1826 # endif
1827 test %eax, %eax
1828 jnz L(Shl13LoopExit)
1829
1830 palignr $13, %xmm1, %xmm2
1831 movaps %xmm2, (%edx)
1832 movaps 19(%ecx), %xmm2
1833
1834 pcmpeqb %xmm2, %xmm0
1835 lea 16(%edx), %edx
1836 pmovmskb %xmm0, %eax
1837 lea 16(%ecx), %ecx
1838 # ifdef USE_AS_STRNCPY
1839 sub $16, %ebx
1840 jbe L(StrncpyExit13Case2OrCase3)
1841 # endif
1842 test %eax, %eax
1843 jnz L(Shl13LoopExit)
1844
1845 palignr $13, %xmm3, %xmm2
1846 movaps %xmm2, (%edx)
1847 lea 19(%ecx), %ecx
1848 lea 16(%edx), %edx
1849
1850 mov %ecx, %eax
1851 and $-0x40, %ecx
1852 sub %ecx, %eax
1853 lea -3(%ecx), %ecx
1854 sub %eax, %edx
1855 # ifdef USE_AS_STRNCPY
1856 add %eax, %ebx
1857 # endif
1858 movaps -13(%ecx), %xmm1
1859
1860 L(Shl13LoopStart):
1861 movaps 3(%ecx), %xmm2
1862 movaps 19(%ecx), %xmm3
1863 movaps %xmm3, %xmm6
1864 movaps 35(%ecx), %xmm4
1865 movaps %xmm4, %xmm7
1866 movaps 51(%ecx), %xmm5
1867 pminub %xmm2, %xmm6
1868 pminub %xmm5, %xmm7
1869 pminub %xmm6, %xmm7
1870 pcmpeqb %xmm0, %xmm7
1871 pmovmskb %xmm7, %eax
1872 movaps %xmm5, %xmm7
1873 palignr $13, %xmm4, %xmm5
1874 test %eax, %eax
1875 palignr $13, %xmm3, %xmm4
1876 jnz L(Shl13Start)
1877 # ifdef USE_AS_STRNCPY
1878 sub $64, %ebx
1879 jbe L(StrncpyLeave13)
1880 # endif
1881 palignr $13, %xmm2, %xmm3
1882 lea 64(%ecx), %ecx
1883 palignr $13, %xmm1, %xmm2
1884 movaps %xmm7, %xmm1
1885 movaps %xmm5, 48(%edx)
1886 movaps %xmm4, 32(%edx)
1887 movaps %xmm3, 16(%edx)
1888 movaps %xmm2, (%edx)
1889 lea 64(%edx), %edx
1890 jmp L(Shl13LoopStart)
1891
1892 L(Shl13LoopExit):
1893 movl -1(%ecx), %esi
1894 movl %esi, -1(%edx)
1895 mov $3, %esi
1896 jmp L(CopyFrom1To16Bytes)
1897
1898 .p2align 4
1899 L(Shl14):
1900 movaps -14(%ecx), %xmm1
1901 movaps 2(%ecx), %xmm2
1902 L(Shl14Start):
1903 pcmpeqb %xmm2, %xmm0
1904 pmovmskb %xmm0, %eax
1905 movaps %xmm2, %xmm3
1906 # ifdef USE_AS_STRNCPY
1907 sub $16, %ebx
1908 jbe L(StrncpyExit14Case2OrCase3)
1909 # endif
1910 test %eax, %eax
1911 jnz L(Shl14LoopExit)
1912
1913 palignr $14, %xmm1, %xmm2
1914 movaps %xmm3, %xmm1
1915 movaps %xmm2, (%edx)
1916 movaps 18(%ecx), %xmm2
1917
1918 pcmpeqb %xmm2, %xmm0
1919 lea 16(%edx), %edx
1920 pmovmskb %xmm0, %eax
1921 lea 16(%ecx), %ecx
1922 movaps %xmm2, %xmm3
1923 # ifdef USE_AS_STRNCPY
1924 sub $16, %ebx
1925 jbe L(StrncpyExit14Case2OrCase3)
1926 # endif
1927 test %eax, %eax
1928 jnz L(Shl14LoopExit)
1929
1930 palignr $14, %xmm1, %xmm2
1931 movaps %xmm2, (%edx)
1932 movaps 18(%ecx), %xmm2
1933 movaps %xmm3, %xmm1
1934
1935 pcmpeqb %xmm2, %xmm0
1936 lea 16(%edx), %edx
1937 pmovmskb %xmm0, %eax
1938 lea 16(%ecx), %ecx
1939 movaps %xmm2, %xmm3
1940 # ifdef USE_AS_STRNCPY
1941 sub $16, %ebx
1942 jbe L(StrncpyExit14Case2OrCase3)
1943 # endif
1944 test %eax, %eax
1945 jnz L(Shl14LoopExit)
1946
1947 palignr $14, %xmm1, %xmm2
1948 movaps %xmm2, (%edx)
1949 movaps 18(%ecx), %xmm2
1950
1951 pcmpeqb %xmm2, %xmm0
1952 lea 16(%edx), %edx
1953 pmovmskb %xmm0, %eax
1954 lea 16(%ecx), %ecx
1955 # ifdef USE_AS_STRNCPY
1956 sub $16, %ebx
1957 jbe L(StrncpyExit14Case2OrCase3)
1958 # endif
1959 test %eax, %eax
1960 jnz L(Shl14LoopExit)
1961
1962 palignr $14, %xmm3, %xmm2
1963 movaps %xmm2, (%edx)
1964 lea 18(%ecx), %ecx
1965 lea 16(%edx), %edx
1966
1967 mov %ecx, %eax
1968 and $-0x40, %ecx
1969 sub %ecx, %eax
1970 lea -2(%ecx), %ecx
1971 sub %eax, %edx
1972 # ifdef USE_AS_STRNCPY
1973 add %eax, %ebx
1974 # endif
1975 movaps -14(%ecx), %xmm1
1976
1977 L(Shl14LoopStart):
1978 movaps 2(%ecx), %xmm2
1979 movaps 18(%ecx), %xmm3
1980 movaps %xmm3, %xmm6
1981 movaps 34(%ecx), %xmm4
1982 movaps %xmm4, %xmm7
1983 movaps 50(%ecx), %xmm5
1984 pminub %xmm2, %xmm6
1985 pminub %xmm5, %xmm7
1986 pminub %xmm6, %xmm7
1987 pcmpeqb %xmm0, %xmm7
1988 pmovmskb %xmm7, %eax
1989 movaps %xmm5, %xmm7
1990 palignr $14, %xmm4, %xmm5
1991 test %eax, %eax
1992 palignr $14, %xmm3, %xmm4
1993 jnz L(Shl14Start)
1994 # ifdef USE_AS_STRNCPY
1995 sub $64, %ebx
1996 jbe L(StrncpyLeave14)
1997 # endif
1998 palignr $14, %xmm2, %xmm3
1999 lea 64(%ecx), %ecx
2000 palignr $14, %xmm1, %xmm2
2001 movaps %xmm7, %xmm1
2002 movaps %xmm5, 48(%edx)
2003 movaps %xmm4, 32(%edx)
2004 movaps %xmm3, 16(%edx)
2005 movaps %xmm2, (%edx)
2006 lea 64(%edx), %edx
2007 jmp L(Shl14LoopStart)
2008
2009 L(Shl14LoopExit):
2010 movl -2(%ecx), %esi
2011 movl %esi, -2(%edx)
2012 mov $2, %esi
2013 jmp L(CopyFrom1To16Bytes)
2014
2015 .p2align 4
2016 L(Shl15):
2017 movaps -15(%ecx), %xmm1
2018 movaps 1(%ecx), %xmm2
2019 L(Shl15Start):
2020 pcmpeqb %xmm2, %xmm0
2021 pmovmskb %xmm0, %eax
2022 movaps %xmm2, %xmm3
2023 # ifdef USE_AS_STRNCPY
2024 sub $16, %ebx
2025 jbe L(StrncpyExit15Case2OrCase3)
2026 # endif
2027 test %eax, %eax
2028 jnz L(Shl15LoopExit)
2029
2030 palignr $15, %xmm1, %xmm2
2031 movaps %xmm3, %xmm1
2032 movaps %xmm2, (%edx)
2033 movaps 17(%ecx), %xmm2
2034
2035 pcmpeqb %xmm2, %xmm0
2036 lea 16(%edx), %edx
2037 pmovmskb %xmm0, %eax
2038 lea 16(%ecx), %ecx
2039 movaps %xmm2, %xmm3
2040 # ifdef USE_AS_STRNCPY
2041 sub $16, %ebx
2042 jbe L(StrncpyExit15Case2OrCase3)
2043 # endif
2044 test %eax, %eax
2045 jnz L(Shl15LoopExit)
2046
2047 palignr $15, %xmm1, %xmm2
2048 movaps %xmm2, (%edx)
2049 movaps 17(%ecx), %xmm2
2050 movaps %xmm3, %xmm1
2051
2052 pcmpeqb %xmm2, %xmm0
2053 lea 16(%edx), %edx
2054 pmovmskb %xmm0, %eax
2055 lea 16(%ecx), %ecx
2056 movaps %xmm2, %xmm3
2057 # ifdef USE_AS_STRNCPY
2058 sub $16, %ebx
2059 jbe L(StrncpyExit15Case2OrCase3)
2060 # endif
2061 test %eax, %eax
2062 jnz L(Shl15LoopExit)
2063
2064 palignr $15, %xmm1, %xmm2
2065 movaps %xmm2, (%edx)
2066 movaps 17(%ecx), %xmm2
2067
2068 pcmpeqb %xmm2, %xmm0
2069 lea 16(%edx), %edx
2070 pmovmskb %xmm0, %eax
2071 lea 16(%ecx), %ecx
2072 # ifdef USE_AS_STRNCPY
2073 sub $16, %ebx
2074 jbe L(StrncpyExit15Case2OrCase3)
2075 # endif
2076 test %eax, %eax
2077 jnz L(Shl15LoopExit)
2078
2079 palignr $15, %xmm3, %xmm2
2080 movaps %xmm2, (%edx)
2081 lea 17(%ecx), %ecx
2082 lea 16(%edx), %edx
2083
2084 mov %ecx, %eax
2085 and $-0x40, %ecx
2086 sub %ecx, %eax
2087 lea -1(%ecx), %ecx
2088 sub %eax, %edx
2089 # ifdef USE_AS_STRNCPY
2090 add %eax, %ebx
2091 # endif
2092 movaps -15(%ecx), %xmm1
2093
2094 L(Shl15LoopStart):
2095 movaps 1(%ecx), %xmm2
2096 movaps 17(%ecx), %xmm3
2097 movaps %xmm3, %xmm6
2098 movaps 33(%ecx), %xmm4
2099 movaps %xmm4, %xmm7
2100 movaps 49(%ecx), %xmm5
2101 pminub %xmm2, %xmm6
2102 pminub %xmm5, %xmm7
2103 pminub %xmm6, %xmm7
2104 pcmpeqb %xmm0, %xmm7
2105 pmovmskb %xmm7, %eax
2106 movaps %xmm5, %xmm7
2107 palignr $15, %xmm4, %xmm5
2108 test %eax, %eax
2109 palignr $15, %xmm3, %xmm4
2110 jnz L(Shl15Start)
2111 # ifdef USE_AS_STRNCPY
2112 sub $64, %ebx
2113 jbe L(StrncpyLeave15)
2114 # endif
2115 palignr $15, %xmm2, %xmm3
2116 lea 64(%ecx), %ecx
2117 palignr $15, %xmm1, %xmm2
2118 movaps %xmm7, %xmm1
2119 movaps %xmm5, 48(%edx)
2120 movaps %xmm4, 32(%edx)
2121 movaps %xmm3, 16(%edx)
2122 movaps %xmm2, (%edx)
2123 lea 64(%edx), %edx
2124 jmp L(Shl15LoopStart)
2125
2126 L(Shl15LoopExit):
2127 movl -3(%ecx), %esi
2128 movl %esi, -3(%edx)
2129 mov $1, %esi
2130 # ifdef USE_AS_STRCAT
2131 jmp L(CopyFrom1To16Bytes)
2132 # endif
2133
2134
2135 # ifndef USE_AS_STRCAT
2136
2137 .p2align 4
2138 L(CopyFrom1To16Bytes):
2139 # ifdef USE_AS_STRNCPY
2140 add $16, %ebx
2141 # endif
2142 add %esi, %edx
2143 add %esi, %ecx
2144
2145 POP (%esi)
2146 test %al, %al
2147 jz L(ExitHigh8)
2148
2149 L(CopyFrom1To16BytesLess8):
2150 mov %al, %ah
2151 and $15, %ah
2152 jz L(ExitHigh4)
2153
2154 test $0x01, %al
2155 jnz L(Exit1)
2156 test $0x02, %al
2157 jnz L(Exit2)
2158 test $0x04, %al
2159 jnz L(Exit3)
2160
2161 .p2align 4
2162 L(Exit4):
2163 movl (%ecx), %eax
2164 movl %eax, (%edx)
2165 SAVE_RESULT (3)
2166 # ifdef USE_AS_STRNCPY
2167 sub $4, %ebx
2168 lea 4(%edx), %ecx
2169 jnz L(StrncpyFillTailWithZero1)
2170 # ifdef USE_AS_STPCPY
2171 cmpb $1, (%eax)
2172 sbb $-1, %eax
2173 # endif
2174 # endif
2175 RETURN1
2176
2177 .p2align 4
2178 L(ExitHigh4):
2179 test $0x10, %al
2180 jnz L(Exit5)
2181 test $0x20, %al
2182 jnz L(Exit6)
2183 test $0x40, %al
2184 jnz L(Exit7)
2185
2186 .p2align 4
2187 L(Exit8):
2188 movlpd (%ecx), %xmm0
2189 movlpd %xmm0, (%edx)
2190 SAVE_RESULT (7)
2191 # ifdef USE_AS_STRNCPY
2192 sub $8, %ebx
2193 lea 8(%edx), %ecx
2194 jnz L(StrncpyFillTailWithZero1)
2195 # ifdef USE_AS_STPCPY
2196 cmpb $1, (%eax)
2197 sbb $-1, %eax
2198 # endif
2199 # endif
2200 RETURN1
2201
2202 .p2align 4
2203 L(ExitHigh8):
2204 mov %ah, %al
2205 and $15, %al
2206 jz L(ExitHigh12)
2207
2208 test $0x01, %ah
2209 jnz L(Exit9)
2210 test $0x02, %ah
2211 jnz L(Exit10)
2212 test $0x04, %ah
2213 jnz L(Exit11)
2214
2215 .p2align 4
2216 L(Exit12):
2217 movlpd (%ecx), %xmm0
2218 movl 8(%ecx), %eax
2219 movlpd %xmm0, (%edx)
2220 movl %eax, 8(%edx)
2221 SAVE_RESULT (11)
2222 # ifdef USE_AS_STRNCPY
2223 sub $12, %ebx
2224 lea 12(%edx), %ecx
2225 jnz L(StrncpyFillTailWithZero1)
2226 # ifdef USE_AS_STPCPY
2227 cmpb $1, (%eax)
2228 sbb $-1, %eax
2229 # endif
2230 # endif
2231 RETURN1
2232
2233 .p2align 4
2234 L(ExitHigh12):
2235 test $0x10, %ah
2236 jnz L(Exit13)
2237 test $0x20, %ah
2238 jnz L(Exit14)
2239 test $0x40, %ah
2240 jnz L(Exit15)
2241
2242 .p2align 4
2243 L(Exit16):
2244 movdqu (%ecx), %xmm0
2245 movdqu %xmm0, (%edx)
2246 SAVE_RESULT (15)
2247 # ifdef USE_AS_STRNCPY
2248 sub $16, %ebx
2249 lea 16(%edx), %ecx
2250 jnz L(StrncpyFillTailWithZero1)
2251 # ifdef USE_AS_STPCPY
2252 cmpb $1, (%eax)
2253 sbb $-1, %eax
2254 # endif
2255 # endif
2256 RETURN1
2257
2258 # ifdef USE_AS_STRNCPY
2259
2260 CFI_PUSH(%esi)
2261
2262 .p2align 4
2263 L(CopyFrom1To16BytesCase2):
2264 add $16, %ebx
2265 add %esi, %ecx
2266 add %esi, %edx
2267
2268 POP (%esi)
2269
2270 test %al, %al
2271 jz L(ExitHighCase2)
2272
2273 cmp $8, %ebx
2274 ja L(CopyFrom1To16BytesLess8)
2275
2276 test $0x01, %al
2277 jnz L(Exit1)
2278 cmp $1, %ebx
2279 je L(Exit1)
2280 test $0x02, %al
2281 jnz L(Exit2)
2282 cmp $2, %ebx
2283 je L(Exit2)
2284 test $0x04, %al
2285 jnz L(Exit3)
2286 cmp $3, %ebx
2287 je L(Exit3)
2288 test $0x08, %al
2289 jnz L(Exit4)
2290 cmp $4, %ebx
2291 je L(Exit4)
2292 test $0x10, %al
2293 jnz L(Exit5)
2294 cmp $5, %ebx
2295 je L(Exit5)
2296 test $0x20, %al
2297 jnz L(Exit6)
2298 cmp $6, %ebx
2299 je L(Exit6)
2300 test $0x40, %al
2301 jnz L(Exit7)
2302 cmp $7, %ebx
2303 je L(Exit7)
2304 jmp L(Exit8)
2305
2306 .p2align 4
2307 L(ExitHighCase2):
2308 cmp $8, %ebx
2309 jbe L(CopyFrom1To16BytesLess8Case3)
2310
2311 test $0x01, %ah
2312 jnz L(Exit9)
2313 cmp $9, %ebx
2314 je L(Exit9)
2315 test $0x02, %ah
2316 jnz L(Exit10)
2317 cmp $10, %ebx
2318 je L(Exit10)
2319 test $0x04, %ah
2320 jnz L(Exit11)
2321 cmp $11, %ebx
2322 je L(Exit11)
2323 test $0x8, %ah
2324 jnz L(Exit12)
2325 cmp $12, %ebx
2326 je L(Exit12)
2327 test $0x10, %ah
2328 jnz L(Exit13)
2329 cmp $13, %ebx
2330 je L(Exit13)
2331 test $0x20, %ah
2332 jnz L(Exit14)
2333 cmp $14, %ebx
2334 je L(Exit14)
2335 test $0x40, %ah
2336 jnz L(Exit15)
2337 cmp $15, %ebx
2338 je L(Exit15)
2339 jmp L(Exit16)
2340
2341 CFI_PUSH(%esi)
2342
2343 .p2align 4
2344 L(CopyFrom1To16BytesCase2OrCase3):
2345 test %eax, %eax
2346 jnz L(CopyFrom1To16BytesCase2)
2347
2348 .p2align 4
2349 L(CopyFrom1To16BytesCase3):
2350 add $16, %ebx
2351 add %esi, %edx
2352 add %esi, %ecx
2353
2354 POP (%esi)
2355
2356 cmp $8, %ebx
2357 ja L(ExitHigh8Case3)
2358
2359 L(CopyFrom1To16BytesLess8Case3):
2360 cmp $4, %ebx
2361 ja L(ExitHigh4Case3)
2362
2363 cmp $1, %ebx
2364 je L(Exit1)
2365 cmp $2, %ebx
2366 je L(Exit2)
2367 cmp $3, %ebx
2368 je L(Exit3)
2369 movl (%ecx), %eax
2370 movl %eax, (%edx)
2371 SAVE_RESULT (4)
2372 RETURN1
2373
2374 .p2align 4
2375 L(ExitHigh4Case3):
2376 cmp $5, %ebx
2377 je L(Exit5)
2378 cmp $6, %ebx
2379 je L(Exit6)
2380 cmp $7, %ebx
2381 je L(Exit7)
2382 movlpd (%ecx), %xmm0
2383 movlpd %xmm0, (%edx)
2384 SAVE_RESULT (8)
2385 RETURN1
2386
2387 .p2align 4
2388 L(ExitHigh8Case3):
2389 cmp $12, %ebx
2390 ja L(ExitHigh12Case3)
2391
2392 cmp $9, %ebx
2393 je L(Exit9)
2394 cmp $10, %ebx
2395 je L(Exit10)
2396 cmp $11, %ebx
2397 je L(Exit11)
2398 movlpd (%ecx), %xmm0
2399 movl 8(%ecx), %eax
2400 movlpd %xmm0, (%edx)
2401 movl %eax, 8(%edx)
2402 SAVE_RESULT (12)
2403 RETURN1
2404
2405 .p2align 4
2406 L(ExitHigh12Case3):
2407 cmp $13, %ebx
2408 je L(Exit13)
2409 cmp $14, %ebx
2410 je L(Exit14)
2411 cmp $15, %ebx
2412 je L(Exit15)
2413 movlpd (%ecx), %xmm0
2414 movlpd 8(%ecx), %xmm1
2415 movlpd %xmm0, (%edx)
2416 movlpd %xmm1, 8(%edx)
2417 SAVE_RESULT (16)
2418 RETURN1
2419
2420 # endif
2421
2422 .p2align 4
2423 L(Exit1):
2424 movb (%ecx), %al
2425 movb %al, (%edx)
2426 SAVE_RESULT (0)
2427 # ifdef USE_AS_STRNCPY
2428 sub $1, %ebx
2429 lea 1(%edx), %ecx
2430 jnz L(StrncpyFillTailWithZero1)
2431 # ifdef USE_AS_STPCPY
2432 cmpb $1, (%eax)
2433 sbb $-1, %eax
2434 # endif
2435 # endif
2436 RETURN1
2437
2438 .p2align 4
2439 L(Exit2):
2440 movw (%ecx), %ax
2441 movw %ax, (%edx)
2442 SAVE_RESULT (1)
2443 # ifdef USE_AS_STRNCPY
2444 sub $2, %ebx
2445 lea 2(%edx), %ecx
2446 jnz L(StrncpyFillTailWithZero1)
2447 # ifdef USE_AS_STPCPY
2448 cmpb $1, (%eax)
2449 sbb $-1, %eax
2450 # endif
2451 # endif
2452 RETURN1
2453
2454 .p2align 4
2455 L(Exit3):
2456 movw (%ecx), %ax
2457 movw %ax, (%edx)
2458 movb 2(%ecx), %al
2459 movb %al, 2(%edx)
2460 SAVE_RESULT (2)
2461 # ifdef USE_AS_STRNCPY
2462 sub $3, %ebx
2463 lea 3(%edx), %ecx
2464 jnz L(StrncpyFillTailWithZero1)
2465 # ifdef USE_AS_STPCPY
2466 cmpb $1, (%eax)
2467 sbb $-1, %eax
2468 # endif
2469 # endif
2470 RETURN1
2471
2472 .p2align 4
2473 L(Exit5):
2474 movl (%ecx), %eax
2475 movl %eax, (%edx)
2476 movb 4(%ecx), %al
2477 movb %al, 4(%edx)
2478 SAVE_RESULT (4)
2479 # ifdef USE_AS_STRNCPY
2480 sub $5, %ebx
2481 lea 5(%edx), %ecx
2482 jnz L(StrncpyFillTailWithZero1)
2483 # ifdef USE_AS_STPCPY
2484 cmpb $1, (%eax)
2485 sbb $-1, %eax
2486 # endif
2487 # endif
2488 RETURN1
2489
2490 .p2align 4
2491 L(Exit6):
2492 movl (%ecx), %eax
2493 movl %eax, (%edx)
2494 movw 4(%ecx), %ax
2495 movw %ax, 4(%edx)
2496 SAVE_RESULT (5)
2497 # ifdef USE_AS_STRNCPY
2498 sub $6, %ebx
2499 lea 6(%edx), %ecx
2500 jnz L(StrncpyFillTailWithZero1)
2501 # ifdef USE_AS_STPCPY
2502 cmpb $1, (%eax)
2503 sbb $-1, %eax
2504 # endif
2505 # endif
2506 RETURN1
2507
2508 .p2align 4
2509 L(Exit7):
2510 movl (%ecx), %eax
2511 movl %eax, (%edx)
2512 movl 3(%ecx), %eax
2513 movl %eax, 3(%edx)
2514 SAVE_RESULT (6)
2515 # ifdef USE_AS_STRNCPY
2516 sub $7, %ebx
2517 lea 7(%edx), %ecx
2518 jnz L(StrncpyFillTailWithZero1)
2519 # ifdef USE_AS_STPCPY
2520 cmpb $1, (%eax)
2521 sbb $-1, %eax
2522 # endif
2523 # endif
2524 RETURN1
2525
2526 .p2align 4
2527 L(Exit9):
2528 movlpd (%ecx), %xmm0
2529 movb 8(%ecx), %al
2530 movlpd %xmm0, (%edx)
2531 movb %al, 8(%edx)
2532 SAVE_RESULT (8)
2533 # ifdef USE_AS_STRNCPY
2534 sub $9, %ebx
2535 lea 9(%edx), %ecx
2536 jnz L(StrncpyFillTailWithZero1)
2537 # ifdef USE_AS_STPCPY
2538 cmpb $1, (%eax)
2539 sbb $-1, %eax
2540 # endif
2541 # endif
2542 RETURN1
2543
2544 .p2align 4
2545 L(Exit10):
2546 movlpd (%ecx), %xmm0
2547 movw 8(%ecx), %ax
2548 movlpd %xmm0, (%edx)
2549 movw %ax, 8(%edx)
2550 SAVE_RESULT (9)
2551 # ifdef USE_AS_STRNCPY
2552 sub $10, %ebx
2553 lea 10(%edx), %ecx
2554 jnz L(StrncpyFillTailWithZero1)
2555 # ifdef USE_AS_STPCPY
2556 cmpb $1, (%eax)
2557 sbb $-1, %eax
2558 # endif
2559 # endif
2560 RETURN1
2561
2562 .p2align 4
2563 L(Exit11):
2564 movlpd (%ecx), %xmm0
2565 movl 7(%ecx), %eax
2566 movlpd %xmm0, (%edx)
2567 movl %eax, 7(%edx)
2568 SAVE_RESULT (10)
2569 # ifdef USE_AS_STRNCPY
2570 sub $11, %ebx
2571 lea 11(%edx), %ecx
2572 jnz L(StrncpyFillTailWithZero1)
2573 # ifdef USE_AS_STPCPY
2574 cmpb $1, (%eax)
2575 sbb $-1, %eax
2576 # endif
2577 # endif
2578 RETURN1
2579
2580 .p2align 4
2581 L(Exit13):
2582 movlpd (%ecx), %xmm0
2583 movlpd 5(%ecx), %xmm1
2584 movlpd %xmm0, (%edx)
2585 movlpd %xmm1, 5(%edx)
2586 SAVE_RESULT (12)
2587 # ifdef USE_AS_STRNCPY
2588 sub $13, %ebx
2589 lea 13(%edx), %ecx
2590 jnz L(StrncpyFillTailWithZero1)
2591 # ifdef USE_AS_STPCPY
2592 cmpb $1, (%eax)
2593 sbb $-1, %eax
2594 # endif
2595 # endif
2596 RETURN1
2597
2598 .p2align 4
2599 L(Exit14):
2600 movlpd (%ecx), %xmm0
2601 movlpd 6(%ecx), %xmm1
2602 movlpd %xmm0, (%edx)
2603 movlpd %xmm1, 6(%edx)
2604 SAVE_RESULT (13)
2605 # ifdef USE_AS_STRNCPY
2606 sub $14, %ebx
2607 lea 14(%edx), %ecx
2608 jnz L(StrncpyFillTailWithZero1)
2609 # ifdef USE_AS_STPCPY
2610 cmpb $1, (%eax)
2611 sbb $-1, %eax
2612 # endif
2613 # endif
2614 RETURN1
2615
2616 .p2align 4
2617 L(Exit15):
2618 movlpd (%ecx), %xmm0
2619 movlpd 7(%ecx), %xmm1
2620 movlpd %xmm0, (%edx)
2621 movlpd %xmm1, 7(%edx)
2622 SAVE_RESULT (14)
2623 # ifdef USE_AS_STRNCPY
2624 sub $15, %ebx
2625 lea 15(%edx), %ecx
2626 jnz L(StrncpyFillTailWithZero1)
2627 # ifdef USE_AS_STPCPY
2628 cmpb $1, (%eax)
2629 sbb $-1, %eax
2630 # endif
2631 # endif
2632 RETURN1
2633
2634 CFI_POP (%edi)
2635
2636 # ifdef USE_AS_STRNCPY
2637 .p2align 4
2638 L(Fill0):
2639 RETURN
2640
2641 .p2align 4
2642 L(Fill1):
2643 movb %dl, (%ecx)
2644 RETURN
2645
2646 .p2align 4
2647 L(Fill2):
2648 movw %dx, (%ecx)
2649 RETURN
2650
2651 .p2align 4
2652 L(Fill3):
2653 movw %dx, (%ecx)
2654 movb %dl, 2(%ecx)
2655 RETURN
2656
2657 .p2align 4
2658 L(Fill4):
2659 movl %edx, (%ecx)
2660 RETURN
2661
2662 .p2align 4
2663 L(Fill5):
2664 movl %edx, (%ecx)
2665 movb %dl, 4(%ecx)
2666 RETURN
2667
2668 .p2align 4
2669 L(Fill6):
2670 movl %edx, (%ecx)
2671 movw %dx, 4(%ecx)
2672 RETURN
2673
2674 .p2align 4
2675 L(Fill7):
2676 movl %edx, (%ecx)
2677 movl %edx, 3(%ecx)
2678 RETURN
2679
2680 .p2align 4
2681 L(Fill8):
2682 movlpd %xmm0, (%ecx)
2683 RETURN
2684
2685 .p2align 4
2686 L(Fill9):
2687 movlpd %xmm0, (%ecx)
2688 movb %dl, 8(%ecx)
2689 RETURN
2690
2691 .p2align 4
2692 L(Fill10):
2693 movlpd %xmm0, (%ecx)
2694 movw %dx, 8(%ecx)
2695 RETURN
2696
2697 .p2align 4
2698 L(Fill11):
2699 movlpd %xmm0, (%ecx)
2700 movl %edx, 7(%ecx)
2701 RETURN
2702
2703 .p2align 4
2704 L(Fill12):
2705 movlpd %xmm0, (%ecx)
2706 movl %edx, 8(%ecx)
2707 RETURN
2708
2709 .p2align 4
2710 L(Fill13):
2711 movlpd %xmm0, (%ecx)
2712 movlpd %xmm0, 5(%ecx)
2713 RETURN
2714
2715 .p2align 4
2716 L(Fill14):
2717 movlpd %xmm0, (%ecx)
2718 movlpd %xmm0, 6(%ecx)
2719 RETURN
2720
2721 .p2align 4
2722 L(Fill15):
2723 movlpd %xmm0, (%ecx)
2724 movlpd %xmm0, 7(%ecx)
2725 RETURN
2726
2727 .p2align 4
2728 L(Fill16):
2729 movlpd %xmm0, (%ecx)
2730 movlpd %xmm0, 8(%ecx)
2731 RETURN
2732
2733 .p2align 4
2734 L(StrncpyFillExit1):
2735 lea 16(%ebx), %ebx
2736 L(FillFrom1To16Bytes):
2737 test %ebx, %ebx
2738 jz L(Fill0)
2739 cmp $16, %ebx
2740 je L(Fill16)
2741 cmp $8, %ebx
2742 je L(Fill8)
2743 jg L(FillMore8)
2744 cmp $4, %ebx
2745 je L(Fill4)
2746 jg L(FillMore4)
2747 cmp $2, %ebx
2748 jl L(Fill1)
2749 je L(Fill2)
2750 jg L(Fill3)
2751 L(FillMore8): /* but less than 16 */
2752 cmp $12, %ebx
2753 je L(Fill12)
2754 jl L(FillLess12)
2755 cmp $14, %ebx
2756 jl L(Fill13)
2757 je L(Fill14)
2758 jg L(Fill15)
2759 L(FillMore4): /* but less than 8 */
2760 cmp $6, %ebx
2761 jl L(Fill5)
2762 je L(Fill6)
2763 jg L(Fill7)
2764 L(FillLess12): /* but more than 8 */
2765 cmp $10, %ebx
2766 jl L(Fill9)
2767 je L(Fill10)
2768 jmp L(Fill11)
2769
2770 CFI_PUSH(%edi)
2771
2772 .p2align 4
2773 L(StrncpyFillTailWithZero1):
2774 POP (%edi)
2775 L(StrncpyFillTailWithZero):
2776 pxor %xmm0, %xmm0
2777 xor %edx, %edx
2778 sub $16, %ebx
2779 jbe L(StrncpyFillExit1)
2780
2781 movlpd %xmm0, (%ecx)
2782 movlpd %xmm0, 8(%ecx)
2783
2784 lea 16(%ecx), %ecx
2785
2786 mov %ecx, %edx
2787 and $0xf, %edx
2788 sub %edx, %ecx
2789 add %edx, %ebx
2790 xor %edx, %edx
2791 sub $64, %ebx
2792 jb L(StrncpyFillLess64)
2793
2794 L(StrncpyFillLoopMovdqa):
2795 movdqa %xmm0, (%ecx)
2796 movdqa %xmm0, 16(%ecx)
2797 movdqa %xmm0, 32(%ecx)
2798 movdqa %xmm0, 48(%ecx)
2799 lea 64(%ecx), %ecx
2800 sub $64, %ebx
2801 jae L(StrncpyFillLoopMovdqa)
2802
2803 L(StrncpyFillLess64):
2804 add $32, %ebx
2805 jl L(StrncpyFillLess32)
2806 movdqa %xmm0, (%ecx)
2807 movdqa %xmm0, 16(%ecx)
2808 lea 32(%ecx), %ecx
2809 sub $16, %ebx
2810 jl L(StrncpyFillExit1)
2811 movdqa %xmm0, (%ecx)
2812 lea 16(%ecx), %ecx
2813 jmp L(FillFrom1To16Bytes)
2814
2815 L(StrncpyFillLess32):
2816 add $16, %ebx
2817 jl L(StrncpyFillExit1)
2818 movdqa %xmm0, (%ecx)
2819 lea 16(%ecx), %ecx
2820 jmp L(FillFrom1To16Bytes)
2821 # endif
2822
2823 .p2align 4
2824 L(ExitTail1):
2825 movb (%ecx), %al
2826 movb %al, (%edx)
2827 SAVE_RESULT_TAIL (0)
2828 # ifdef USE_AS_STRNCPY
2829 sub $1, %ebx
2830 lea 1(%edx), %ecx
2831 jnz L(StrncpyFillTailWithZero)
2832 # ifdef USE_AS_STPCPY
2833 cmpb $1, (%eax)
2834 sbb $-1, %eax
2835 # endif
2836 # endif
2837 RETURN
2838
2839 .p2align 4
2840 L(ExitTail2):
2841 movw (%ecx), %ax
2842 movw %ax, (%edx)
2843 SAVE_RESULT_TAIL (1)
2844 # ifdef USE_AS_STRNCPY
2845 sub $2, %ebx
2846 lea 2(%edx), %ecx
2847 jnz L(StrncpyFillTailWithZero)
2848 # ifdef USE_AS_STPCPY
2849 cmpb $1, (%eax)
2850 sbb $-1, %eax
2851 # endif
2852 # endif
2853 RETURN
2854
2855 .p2align 4
2856 L(ExitTail3):
2857 movw (%ecx), %ax
2858 movw %ax, (%edx)
2859 movb 2(%ecx), %al
2860 movb %al, 2(%edx)
2861 SAVE_RESULT_TAIL (2)
2862 # ifdef USE_AS_STRNCPY
2863 sub $3, %ebx
2864 lea 3(%edx), %ecx
2865 jnz L(StrncpyFillTailWithZero)
2866 # ifdef USE_AS_STPCPY
2867 cmpb $1, (%eax)
2868 sbb $-1, %eax
2869 # endif
2870 # endif
2871 RETURN
2872
2873 .p2align 4
2874 L(ExitTail4):
2875 movl (%ecx), %eax
2876 movl %eax, (%edx)
2877 SAVE_RESULT_TAIL (3)
2878 # ifdef USE_AS_STRNCPY
2879 sub $4, %ebx
2880 lea 4(%edx), %ecx
2881 jnz L(StrncpyFillTailWithZero)
2882 # ifdef USE_AS_STPCPY
2883 cmpb $1, (%eax)
2884 sbb $-1, %eax
2885 # endif
2886 # endif
2887 RETURN
2888
2889 .p2align 4
2890 L(ExitTail5):
2891 movl (%ecx), %eax
2892 movl %eax, (%edx)
2893 movb 4(%ecx), %al
2894 movb %al, 4(%edx)
2895 SAVE_RESULT_TAIL (4)
2896 # ifdef USE_AS_STRNCPY
2897 sub $5, %ebx
2898 lea 5(%edx), %ecx
2899 jnz L(StrncpyFillTailWithZero)
2900 # ifdef USE_AS_STPCPY
2901 cmpb $1, (%eax)
2902 sbb $-1, %eax
2903 # endif
2904 # endif
2905 RETURN
2906
2907 .p2align 4
2908 L(ExitTail6):
2909 movl (%ecx), %eax
2910 movl %eax, (%edx)
2911 movw 4(%ecx), %ax
2912 movw %ax, 4(%edx)
2913 SAVE_RESULT_TAIL (5)
2914 # ifdef USE_AS_STRNCPY
2915 sub $6, %ebx
2916 lea 6(%edx), %ecx
2917 jnz L(StrncpyFillTailWithZero)
2918 # ifdef USE_AS_STPCPY
2919 cmpb $1, (%eax)
2920 sbb $-1, %eax
2921 # endif
2922 # endif
2923 RETURN
2924
2925 .p2align 4
2926 L(ExitTail7):
2927 movl (%ecx), %eax
2928 movl %eax, (%edx)
2929 movl 3(%ecx), %eax
2930 movl %eax, 3(%edx)
2931 SAVE_RESULT_TAIL (6)
2932 # ifdef USE_AS_STRNCPY
2933 sub $7, %ebx
2934 lea 7(%edx), %ecx
2935 jnz L(StrncpyFillTailWithZero)
2936 # ifdef USE_AS_STPCPY
2937 cmpb $1, (%eax)
2938 sbb $-1, %eax
2939 # endif
2940 # endif
2941 RETURN
2942
2943 .p2align 4
2944 L(ExitTail8):
2945 movlpd (%ecx), %xmm0
2946 movlpd %xmm0, (%edx)
2947 SAVE_RESULT_TAIL (7)
2948 # ifdef USE_AS_STRNCPY
2949 sub $8, %ebx
2950 lea 8(%edx), %ecx
2951 jnz L(StrncpyFillTailWithZero)
2952 # endif
2953 RETURN
2954
2955 .p2align 4
2956 L(ExitTail9):
2957 movlpd (%ecx), %xmm0
2958 movb 8(%ecx), %al
2959 movlpd %xmm0, (%edx)
2960 movb %al, 8(%edx)
2961 SAVE_RESULT_TAIL (8)
2962 # ifdef USE_AS_STRNCPY
2963 sub $9, %ebx
2964 lea 9(%edx), %ecx
2965 jnz L(StrncpyFillTailWithZero)
2966 # ifdef USE_AS_STPCPY
2967 cmpb $1, (%eax)
2968 sbb $-1, %eax
2969 # endif
2970 # endif
2971 RETURN
2972
2973 .p2align 4
2974 L(ExitTail10):
2975 movlpd (%ecx), %xmm0
2976 movw 8(%ecx), %ax
2977 movlpd %xmm0, (%edx)
2978 movw %ax, 8(%edx)
2979 SAVE_RESULT_TAIL (9)
2980 # ifdef USE_AS_STRNCPY
2981 sub $10, %ebx
2982 lea 10(%edx), %ecx
2983 jnz L(StrncpyFillTailWithZero)
2984 # ifdef USE_AS_STPCPY
2985 cmpb $1, (%eax)
2986 sbb $-1, %eax
2987 # endif
2988 # endif
2989 RETURN
2990
2991 .p2align 4
2992 L(ExitTail11):
2993 movlpd (%ecx), %xmm0
2994 movl 7(%ecx), %eax
2995 movlpd %xmm0, (%edx)
2996 movl %eax, 7(%edx)
2997 SAVE_RESULT_TAIL (10)
2998 # ifdef USE_AS_STRNCPY
2999 sub $11, %ebx
3000 lea 11(%edx), %ecx
3001 jnz L(StrncpyFillTailWithZero)
3002 # ifdef USE_AS_STPCPY
3003 cmpb $1, (%eax)
3004 sbb $-1, %eax
3005 # endif
3006 # endif
3007 RETURN
3008
3009 .p2align 4
3010 L(ExitTail12):
3011 movlpd (%ecx), %xmm0
3012 movl 8(%ecx), %eax
3013 movlpd %xmm0, (%edx)
3014 movl %eax, 8(%edx)
3015 SAVE_RESULT_TAIL (11)
3016 # ifdef USE_AS_STRNCPY
3017 sub $12, %ebx
3018 lea 12(%edx), %ecx
3019 jnz L(StrncpyFillTailWithZero)
3020 # ifdef USE_AS_STPCPY
3021 cmpb $1, (%eax)
3022 sbb $-1, %eax
3023 # endif
3024 # endif
3025 RETURN
3026
3027 .p2align 4
3028 L(ExitTail13):
3029 movlpd (%ecx), %xmm0
3030 movlpd 5(%ecx), %xmm1
3031 movlpd %xmm0, (%edx)
3032 movlpd %xmm1, 5(%edx)
3033 SAVE_RESULT_TAIL (12)
3034 # ifdef USE_AS_STRNCPY
3035 sub $13, %ebx
3036 lea 13(%edx), %ecx
3037 jnz L(StrncpyFillTailWithZero)
3038 # ifdef USE_AS_STPCPY
3039 cmpb $1, (%eax)
3040 sbb $-1, %eax
3041 # endif
3042 # endif
3043 RETURN
3044
3045 .p2align 4
3046 L(ExitTail14):
3047 movlpd (%ecx), %xmm0
3048 movlpd 6(%ecx), %xmm1
3049 movlpd %xmm0, (%edx)
3050 movlpd %xmm1, 6(%edx)
3051 SAVE_RESULT_TAIL (13)
3052 # ifdef USE_AS_STRNCPY
3053 sub $14, %ebx
3054 lea 14(%edx), %ecx
3055 jnz L(StrncpyFillTailWithZero)
3056 # ifdef USE_AS_STPCPY
3057 cmpb $1, (%eax)
3058 sbb $-1, %eax
3059 # endif
3060 # endif
3061 RETURN
3062
3063 .p2align 4
3064 L(ExitTail15):
3065 movlpd (%ecx), %xmm0
3066 movlpd 7(%ecx), %xmm1
3067 movlpd %xmm0, (%edx)
3068 movlpd %xmm1, 7(%edx)
3069 SAVE_RESULT_TAIL (14)
3070 # ifdef USE_AS_STRNCPY
3071 sub $15, %ebx
3072 lea 15(%edx), %ecx
3073 jnz L(StrncpyFillTailWithZero)
3074 # endif
3075 RETURN
3076
3077 .p2align 4
3078 L(ExitTail16):
3079 movdqu (%ecx), %xmm0
3080 movdqu %xmm0, (%edx)
3081 SAVE_RESULT_TAIL (15)
3082 # ifdef USE_AS_STRNCPY
3083 sub $16, %ebx
3084 lea 16(%edx), %ecx
3085 jnz L(StrncpyFillTailWithZero)
3086 # ifdef USE_AS_STPCPY
3087 cmpb $1, (%eax)
3088 sbb $-1, %eax
3089 # endif
3090 # endif
3091 RETURN
3092 # endif
3093
3094 # ifdef USE_AS_STRNCPY
3095 # ifndef USE_AS_STRCAT
3096 CFI_PUSH (%esi)
3097 CFI_PUSH (%edi)
3098 # endif
3099 .p2align 4
3100 L(StrncpyLeaveCase2OrCase3):
3101 test %eax, %eax
3102 jnz L(Aligned64LeaveCase2)
3103
3104 L(Aligned64LeaveCase3):
3105 add $48, %ebx
3106 jle L(CopyFrom1To16BytesCase3)
3107 movaps %xmm4, -64(%edx)
3108 lea 16(%esi), %esi
3109 sub $16, %ebx
3110 jbe L(CopyFrom1To16BytesCase3)
3111 movaps %xmm5, -48(%edx)
3112 lea 16(%esi), %esi
3113 sub $16, %ebx
3114 jbe L(CopyFrom1To16BytesCase3)
3115 movaps %xmm6, -32(%edx)
3116 lea 16(%esi), %esi
3117 lea -16(%ebx), %ebx
3118 jmp L(CopyFrom1To16BytesCase3)
3119
3120 L(Aligned64LeaveCase2):
3121 pcmpeqb %xmm4, %xmm0
3122 pmovmskb %xmm0, %eax
3123 add $48, %ebx
3124 jle L(CopyFrom1To16BytesCase2OrCase3)
3125 test %eax, %eax
3126 jnz L(CopyFrom1To16Bytes)
3127
3128 pcmpeqb %xmm5, %xmm0
3129 pmovmskb %xmm0, %eax
3130 movaps %xmm4, -64(%edx)
3131 lea 16(%esi), %esi
3132 sub $16, %ebx
3133 jbe L(CopyFrom1To16BytesCase2OrCase3)
3134 test %eax, %eax
3135 jnz L(CopyFrom1To16Bytes)
3136
3137 pcmpeqb %xmm6, %xmm0
3138 pmovmskb %xmm0, %eax
3139 movaps %xmm5, -48(%edx)
3140 lea 16(%esi), %esi
3141 sub $16, %ebx
3142 jbe L(CopyFrom1To16BytesCase2OrCase3)
3143 test %eax, %eax
3144 jnz L(CopyFrom1To16Bytes)
3145
3146 pcmpeqb %xmm7, %xmm0
3147 pmovmskb %xmm0, %eax
3148 movaps %xmm6, -32(%edx)
3149 lea 16(%esi), %esi
3150 lea -16(%ebx), %ebx
3151 jmp L(CopyFrom1To16BytesCase2)
3152
3153 /*--------------------------------------------------*/
3154 .p2align 4
3155 L(StrncpyExit1Case2OrCase3):
3156 movlpd (%ecx), %xmm0
3157 movlpd 7(%ecx), %xmm1
3158 movlpd %xmm0, (%edx)
3159 movlpd %xmm1, 7(%edx)
3160 mov $15, %esi
3161 test %eax, %eax
3162 jnz L(CopyFrom1To16BytesCase2)
3163 jmp L(CopyFrom1To16BytesCase3)
3164
3165 .p2align 4
3166 L(StrncpyExit2Case2OrCase3):
3167 movlpd (%ecx), %xmm0
3168 movlpd 6(%ecx), %xmm1
3169 movlpd %xmm0, (%edx)
3170 movlpd %xmm1, 6(%edx)
3171 mov $14, %esi
3172 test %eax, %eax
3173 jnz L(CopyFrom1To16BytesCase2)
3174 jmp L(CopyFrom1To16BytesCase3)
3175
3176 .p2align 4
3177 L(StrncpyExit3Case2OrCase3):
3178 movlpd (%ecx), %xmm0
3179 movlpd 5(%ecx), %xmm1
3180 movlpd %xmm0, (%edx)
3181 movlpd %xmm1, 5(%edx)
3182 mov $13, %esi
3183 test %eax, %eax
3184 jnz L(CopyFrom1To16BytesCase2)
3185 jmp L(CopyFrom1To16BytesCase3)
3186
3187 .p2align 4
3188 L(StrncpyExit4Case2OrCase3):
3189 movlpd (%ecx), %xmm0
3190 movl 8(%ecx), %esi
3191 movlpd %xmm0, (%edx)
3192 movl %esi, 8(%edx)
3193 mov $12, %esi
3194 test %eax, %eax
3195 jnz L(CopyFrom1To16BytesCase2)
3196 jmp L(CopyFrom1To16BytesCase3)
3197
3198 .p2align 4
3199 L(StrncpyExit5Case2OrCase3):
3200 movlpd (%ecx), %xmm0
3201 movl 7(%ecx), %esi
3202 movlpd %xmm0, (%edx)
3203 movl %esi, 7(%edx)
3204 mov $11, %esi
3205 test %eax, %eax
3206 jnz L(CopyFrom1To16BytesCase2)
3207 jmp L(CopyFrom1To16BytesCase3)
3208
3209 .p2align 4
3210 L(StrncpyExit6Case2OrCase3):
3211 movlpd (%ecx), %xmm0
3212 movl 6(%ecx), %esi
3213 movlpd %xmm0, (%edx)
3214 movl %esi, 6(%edx)
3215 mov $10, %esi
3216 test %eax, %eax
3217 jnz L(CopyFrom1To16BytesCase2)
3218 jmp L(CopyFrom1To16BytesCase3)
3219
3220 .p2align 4
3221 L(StrncpyExit7Case2OrCase3):
3222 movlpd (%ecx), %xmm0
3223 movl 5(%ecx), %esi
3224 movlpd %xmm0, (%edx)
3225 movl %esi, 5(%edx)
3226 mov $9, %esi
3227 test %eax, %eax
3228 jnz L(CopyFrom1To16BytesCase2)
3229 jmp L(CopyFrom1To16BytesCase3)
3230
3231 .p2align 4
3232 L(StrncpyExit8Case2OrCase3):
3233 movlpd (%ecx), %xmm0
3234 movlpd %xmm0, (%edx)
3235 mov $8, %esi
3236 test %eax, %eax
3237 jnz L(CopyFrom1To16BytesCase2)
3238 jmp L(CopyFrom1To16BytesCase3)
3239
3240 .p2align 4
3241 L(StrncpyExit9Case2OrCase3):
3242 movlpd (%ecx), %xmm0
3243 movlpd %xmm0, (%edx)
3244 mov $7, %esi
3245 test %eax, %eax
3246 jnz L(CopyFrom1To16BytesCase2)
3247 jmp L(CopyFrom1To16BytesCase3)
3248
3249 .p2align 4
3250 L(StrncpyExit10Case2OrCase3):
3251 movlpd -1(%ecx), %xmm0
3252 movlpd %xmm0, -1(%edx)
3253 mov $6, %esi
3254 test %eax, %eax
3255 jnz L(CopyFrom1To16BytesCase2)
3256 jmp L(CopyFrom1To16BytesCase3)
3257
3258 .p2align 4
3259 L(StrncpyExit11Case2OrCase3):
3260 movlpd -2(%ecx), %xmm0
3261 movlpd %xmm0, -2(%edx)
3262 mov $5, %esi
3263 test %eax, %eax
3264 jnz L(CopyFrom1To16BytesCase2)
3265 jmp L(CopyFrom1To16BytesCase3)
3266
3267 .p2align 4
3268 L(StrncpyExit12Case2OrCase3):
3269 movl (%ecx), %esi
3270 movl %esi, (%edx)
3271 mov $4, %esi
3272 test %eax, %eax
3273 jnz L(CopyFrom1To16BytesCase2)
3274 jmp L(CopyFrom1To16BytesCase3)
3275
3276 .p2align 4
3277 L(StrncpyExit13Case2OrCase3):
3278 movl -1(%ecx), %esi
3279 movl %esi, -1(%edx)
3280 mov $3, %esi
3281 test %eax, %eax
3282 jnz L(CopyFrom1To16BytesCase2)
3283 jmp L(CopyFrom1To16BytesCase3)
3284
3285 .p2align 4
3286 L(StrncpyExit14Case2OrCase3):
3287 movl -2(%ecx), %esi
3288 movl %esi, -2(%edx)
3289 mov $2, %esi
3290 test %eax, %eax
3291 jnz L(CopyFrom1To16BytesCase2)
3292 jmp L(CopyFrom1To16BytesCase3)
3293
3294 .p2align 4
3295 L(StrncpyExit15Case2OrCase3):
3296 movl -3(%ecx), %esi
3297 movl %esi, -3(%edx)
3298 mov $1, %esi
3299 test %eax, %eax
3300 jnz L(CopyFrom1To16BytesCase2)
3301 jmp L(CopyFrom1To16BytesCase3)
3302
3303 L(StrncpyLeave1):
3304 movaps %xmm2, %xmm3
3305 add $48, %ebx
3306 jle L(StrncpyExit1)
3307 palignr $1, %xmm1, %xmm2
3308 movaps %xmm2, (%edx)
3309 movaps 31(%ecx), %xmm2
3310 lea 16(%esi), %esi
3311 sub $16, %ebx
3312 jbe L(StrncpyExit1)
3313 palignr $1, %xmm3, %xmm2
3314 movaps %xmm2, 16(%edx)
3315 lea 16(%esi), %esi
3316 sub $16, %ebx
3317 jbe L(StrncpyExit1)
3318 movaps %xmm4, 32(%edx)
3319 lea 16(%esi), %esi
3320 sub $16, %ebx
3321 jbe L(StrncpyExit1)
3322 movaps %xmm5, 48(%edx)
3323 lea 16(%esi), %esi
3324 lea -16(%ebx), %ebx
3325 L(StrncpyExit1):
3326 lea 15(%edx, %esi), %edx
3327 lea 15(%ecx, %esi), %ecx
3328 movdqu -16(%ecx), %xmm0
3329 xor %esi, %esi
3330 movdqu %xmm0, -16(%edx)
3331 jmp L(CopyFrom1To16BytesCase3)
3332
3333 L(StrncpyLeave2):
3334 movaps %xmm2, %xmm3
3335 add $48, %ebx
3336 jle L(StrncpyExit2)
3337 palignr $2, %xmm1, %xmm2
3338 movaps %xmm2, (%edx)
3339 movaps 30(%ecx), %xmm2
3340 lea 16(%esi), %esi
3341 sub $16, %ebx
3342 jbe L(StrncpyExit2)
3343 palignr $2, %xmm3, %xmm2
3344 movaps %xmm2, 16(%edx)
3345 lea 16(%esi), %esi
3346 sub $16, %ebx
3347 jbe L(StrncpyExit2)
3348 movaps %xmm4, 32(%edx)
3349 lea 16(%esi), %esi
3350 sub $16, %ebx
3351 jbe L(StrncpyExit2)
3352 movaps %xmm5, 48(%edx)
3353 lea 16(%esi), %esi
3354 lea -16(%ebx), %ebx
3355 L(StrncpyExit2):
3356 lea 14(%edx, %esi), %edx
3357 lea 14(%ecx, %esi), %ecx
3358 movdqu -16(%ecx), %xmm0
3359 xor %esi, %esi
3360 movdqu %xmm0, -16(%edx)
3361 jmp L(CopyFrom1To16BytesCase3)
3362
3363 L(StrncpyLeave3):
3364 movaps %xmm2, %xmm3
3365 add $48, %ebx
3366 jle L(StrncpyExit3)
3367 palignr $3, %xmm1, %xmm2
3368 movaps %xmm2, (%edx)
3369 movaps 29(%ecx), %xmm2
3370 lea 16(%esi), %esi
3371 sub $16, %ebx
3372 jbe L(StrncpyExit3)
3373 palignr $3, %xmm3, %xmm2
3374 movaps %xmm2, 16(%edx)
3375 lea 16(%esi), %esi
3376 sub $16, %ebx
3377 jbe L(StrncpyExit3)
3378 movaps %xmm4, 32(%edx)
3379 lea 16(%esi), %esi
3380 sub $16, %ebx
3381 jbe L(StrncpyExit3)
3382 movaps %xmm5, 48(%edx)
3383 lea 16(%esi), %esi
3384 lea -16(%ebx), %ebx
3385 L(StrncpyExit3):
3386 lea 13(%edx, %esi), %edx
3387 lea 13(%ecx, %esi), %ecx
3388 movdqu -16(%ecx), %xmm0
3389 xor %esi, %esi
3390 movdqu %xmm0, -16(%edx)
3391 jmp L(CopyFrom1To16BytesCase3)
3392
3393 L(StrncpyLeave4):
3394 movaps %xmm2, %xmm3
3395 add $48, %ebx
3396 jle L(StrncpyExit4)
3397 palignr $4, %xmm1, %xmm2
3398 movaps %xmm2, (%edx)
3399 movaps 28(%ecx), %xmm2
3400 lea 16(%esi), %esi
3401 sub $16, %ebx
3402 jbe L(StrncpyExit4)
3403 palignr $4, %xmm3, %xmm2
3404 movaps %xmm2, 16(%edx)
3405 lea 16(%esi), %esi
3406 sub $16, %ebx
3407 jbe L(StrncpyExit4)
3408 movaps %xmm4, 32(%edx)
3409 lea 16(%esi), %esi
3410 sub $16, %ebx
3411 jbe L(StrncpyExit4)
3412 movaps %xmm5, 48(%edx)
3413 lea 16(%esi), %esi
3414 lea -16(%ebx), %ebx
3415 L(StrncpyExit4):
3416 lea 12(%edx, %esi), %edx
3417 lea 12(%ecx, %esi), %ecx
3418 movlpd -12(%ecx), %xmm0
3419 movl -4(%ecx), %eax
3420 movlpd %xmm0, -12(%edx)
3421 movl %eax, -4(%edx)
3422 xor %esi, %esi
3423 jmp L(CopyFrom1To16BytesCase3)
3424
3425 L(StrncpyLeave5):
3426 movaps %xmm2, %xmm3
3427 add $48, %ebx
3428 jle L(StrncpyExit5)
3429 palignr $5, %xmm1, %xmm2
3430 movaps %xmm2, (%edx)
3431 movaps 27(%ecx), %xmm2
3432 lea 16(%esi), %esi
3433 sub $16, %ebx
3434 jbe L(StrncpyExit5)
3435 palignr $5, %xmm3, %xmm2
3436 movaps %xmm2, 16(%edx)
3437 lea 16(%esi), %esi
3438 sub $16, %ebx
3439 jbe L(StrncpyExit5)
3440 movaps %xmm4, 32(%edx)
3441 lea 16(%esi), %esi
3442 sub $16, %ebx
3443 jbe L(StrncpyExit5)
3444 movaps %xmm5, 48(%edx)
3445 lea 16(%esi), %esi
3446 lea -16(%ebx), %ebx
3447 L(StrncpyExit5):
3448 lea 11(%edx, %esi), %edx
3449 lea 11(%ecx, %esi), %ecx
3450 movlpd -11(%ecx), %xmm0
3451 movl -4(%ecx), %eax
3452 movlpd %xmm0, -11(%edx)
3453 movl %eax, -4(%edx)
3454 xor %esi, %esi
3455 jmp L(CopyFrom1To16BytesCase3)
3456
3457 L(StrncpyLeave6):
3458 movaps %xmm2, %xmm3
3459 add $48, %ebx
3460 jle L(StrncpyExit6)
3461 palignr $6, %xmm1, %xmm2
3462 movaps %xmm2, (%edx)
3463 movaps 26(%ecx), %xmm2
3464 lea 16(%esi), %esi
3465 sub $16, %ebx
3466 jbe L(StrncpyExit6)
3467 palignr $6, %xmm3, %xmm2
3468 movaps %xmm2, 16(%edx)
3469 lea 16(%esi), %esi
3470 sub $16, %ebx
3471 jbe L(StrncpyExit6)
3472 movaps %xmm4, 32(%edx)
3473 lea 16(%esi), %esi
3474 sub $16, %ebx
3475 jbe L(StrncpyExit6)
3476 movaps %xmm5, 48(%edx)
3477 lea 16(%esi), %esi
3478 lea -16(%ebx), %ebx
3479 L(StrncpyExit6):
3480 lea 10(%edx, %esi), %edx
3481 lea 10(%ecx, %esi), %ecx
3482
3483 movlpd -10(%ecx), %xmm0
3484 movw -2(%ecx), %ax
3485 movlpd %xmm0, -10(%edx)
3486 movw %ax, -2(%edx)
3487 xor %esi, %esi
3488 jmp L(CopyFrom1To16BytesCase3)
3489
3490 L(StrncpyLeave7):
3491 movaps %xmm2, %xmm3
3492 add $48, %ebx
3493 jle L(StrncpyExit7)
3494 palignr $7, %xmm1, %xmm2
3495 movaps %xmm2, (%edx)
3496 movaps 25(%ecx), %xmm2
3497 lea 16(%esi), %esi
3498 sub $16, %ebx
3499 jbe L(StrncpyExit7)
3500 palignr $7, %xmm3, %xmm2
3501 movaps %xmm2, 16(%edx)
3502 lea 16(%esi), %esi
3503 sub $16, %ebx
3504 jbe L(StrncpyExit7)
3505 movaps %xmm4, 32(%edx)
3506 lea 16(%esi), %esi
3507 sub $16, %ebx
3508 jbe L(StrncpyExit7)
3509 movaps %xmm5, 48(%edx)
3510 lea 16(%esi), %esi
3511 lea -16(%ebx), %ebx
3512 L(StrncpyExit7):
3513 lea 9(%edx, %esi), %edx
3514 lea 9(%ecx, %esi), %ecx
3515
3516 movlpd -9(%ecx), %xmm0
3517 movb -1(%ecx), %ah
3518 movlpd %xmm0, -9(%edx)
3519 movb %ah, -1(%edx)
3520 xor %esi, %esi
3521 jmp L(CopyFrom1To16BytesCase3)
3522
3523 L(StrncpyLeave8):
3524 movaps %xmm2, %xmm3
3525 add $48, %ebx
3526 jle L(StrncpyExit8)
3527 palignr $8, %xmm1, %xmm2
3528 movaps %xmm2, (%edx)
3529 movaps 24(%ecx), %xmm2
3530 lea 16(%esi), %esi
3531 sub $16, %ebx
3532 jbe L(StrncpyExit8)
3533 palignr $8, %xmm3, %xmm2
3534 movaps %xmm2, 16(%edx)
3535 lea 16(%esi), %esi
3536 sub $16, %ebx
3537 jbe L(StrncpyExit8)
3538 movaps %xmm4, 32(%edx)
3539 lea 16(%esi), %esi
3540 sub $16, %ebx
3541 jbe L(StrncpyExit8)
3542 movaps %xmm5, 48(%edx)
3543 lea 16(%esi), %esi
3544 lea -16(%ebx), %ebx
3545 L(StrncpyExit8):
3546 lea 8(%edx, %esi), %edx
3547 lea 8(%ecx, %esi), %ecx
3548 movlpd -8(%ecx), %xmm0
3549 movlpd %xmm0, -8(%edx)
3550 xor %esi, %esi
3551 jmp L(CopyFrom1To16BytesCase3)
3552
3553 L(StrncpyLeave9):
3554 movaps %xmm2, %xmm3
3555 add $48, %ebx
3556 jle L(StrncpyExit9)
3557 palignr $9, %xmm1, %xmm2
3558 movaps %xmm2, (%edx)
3559 movaps 23(%ecx), %xmm2
3560 lea 16(%esi), %esi
3561 sub $16, %ebx
3562 jbe L(StrncpyExit9)
3563 palignr $9, %xmm3, %xmm2
3564 movaps %xmm2, 16(%edx)
3565 lea 16(%esi), %esi
3566 sub $16, %ebx
3567 jbe L(StrncpyExit9)
3568 movaps %xmm4, 32(%edx)
3569 lea 16(%esi), %esi
3570 sub $16, %ebx
3571 jbe L(StrncpyExit9)
3572 movaps %xmm5, 48(%edx)
3573 lea 16(%esi), %esi
3574 lea -16(%ebx), %ebx
3575 L(StrncpyExit9):
3576 lea 7(%edx, %esi), %edx
3577 lea 7(%ecx, %esi), %ecx
3578
3579 movlpd -8(%ecx), %xmm0
3580 movlpd %xmm0, -8(%edx)
3581 xor %esi, %esi
3582 jmp L(CopyFrom1To16BytesCase3)
3583
3584 L(StrncpyLeave10):
3585 movaps %xmm2, %xmm3
3586 add $48, %ebx
3587 jle L(StrncpyExit10)
3588 palignr $10, %xmm1, %xmm2
3589 movaps %xmm2, (%edx)
3590 movaps 22(%ecx), %xmm2
3591 lea 16(%esi), %esi
3592 sub $16, %ebx
3593 jbe L(StrncpyExit10)
3594 palignr $10, %xmm3, %xmm2
3595 movaps %xmm2, 16(%edx)
3596 lea 16(%esi), %esi
3597 sub $16, %ebx
3598 jbe L(StrncpyExit10)
3599 movaps %xmm4, 32(%edx)
3600 lea 16(%esi), %esi
3601 sub $16, %ebx
3602 jbe L(StrncpyExit10)
3603 movaps %xmm5, 48(%edx)
3604 lea 16(%esi), %esi
3605 lea -16(%ebx), %ebx
3606 L(StrncpyExit10):
3607 lea 6(%edx, %esi), %edx
3608 lea 6(%ecx, %esi), %ecx
3609
3610 movlpd -8(%ecx), %xmm0
3611 movlpd %xmm0, -8(%edx)
3612 xor %esi, %esi
3613 jmp L(CopyFrom1To16BytesCase3)
3614
3615 L(StrncpyLeave11):
3616 movaps %xmm2, %xmm3
3617 add $48, %ebx
3618 jle L(StrncpyExit11)
3619 palignr $11, %xmm1, %xmm2
3620 movaps %xmm2, (%edx)
3621 movaps 21(%ecx), %xmm2
3622 lea 16(%esi), %esi
3623 sub $16, %ebx
3624 jbe L(StrncpyExit11)
3625 palignr $11, %xmm3, %xmm2
3626 movaps %xmm2, 16(%edx)
3627 lea 16(%esi), %esi
3628 sub $16, %ebx
3629 jbe L(StrncpyExit11)
3630 movaps %xmm4, 32(%edx)
3631 lea 16(%esi), %esi
3632 sub $16, %ebx
3633 jbe L(StrncpyExit11)
3634 movaps %xmm5, 48(%edx)
3635 lea 16(%esi), %esi
3636 lea -16(%ebx), %ebx
3637 L(StrncpyExit11):
3638 lea 5(%edx, %esi), %edx
3639 lea 5(%ecx, %esi), %ecx
3640 movl -5(%ecx), %esi
3641 movb -1(%ecx), %ah
3642 movl %esi, -5(%edx)
3643 movb %ah, -1(%edx)
3644 xor %esi, %esi
3645 jmp L(CopyFrom1To16BytesCase3)
3646
3647 L(StrncpyLeave12):
3648 movaps %xmm2, %xmm3
3649 add $48, %ebx
3650 jle L(StrncpyExit12)
3651 palignr $12, %xmm1, %xmm2
3652 movaps %xmm2, (%edx)
3653 movaps 20(%ecx), %xmm2
3654 lea 16(%esi), %esi
3655 sub $16, %ebx
3656 jbe L(StrncpyExit12)
3657 palignr $12, %xmm3, %xmm2
3658 movaps %xmm2, 16(%edx)
3659 lea 16(%esi), %esi
3660 sub $16, %ebx
3661 jbe L(StrncpyExit12)
3662 movaps %xmm4, 32(%edx)
3663 lea 16(%esi), %esi
3664 sub $16, %ebx
3665 jbe L(StrncpyExit12)
3666 movaps %xmm5, 48(%edx)
3667 lea 16(%esi), %esi
3668 lea -16(%ebx), %ebx
3669 L(StrncpyExit12):
3670 lea 4(%edx, %esi), %edx
3671 lea 4(%ecx, %esi), %ecx
3672 movl -4(%ecx), %eax
3673 movl %eax, -4(%edx)
3674 xor %esi, %esi
3675 jmp L(CopyFrom1To16BytesCase3)
3676
3677 L(StrncpyLeave13):
3678 movaps %xmm2, %xmm3
3679 add $48, %ebx
3680 jle L(StrncpyExit13)
3681 palignr $13, %xmm1, %xmm2
3682 movaps %xmm2, (%edx)
3683 movaps 19(%ecx), %xmm2
3684 lea 16(%esi), %esi
3685 sub $16, %ebx
3686 jbe L(StrncpyExit13)
3687 palignr $13, %xmm3, %xmm2
3688 movaps %xmm2, 16(%edx)
3689 lea 16(%esi), %esi
3690 sub $16, %ebx
3691 jbe L(StrncpyExit13)
3692 movaps %xmm4, 32(%edx)
3693 lea 16(%esi), %esi
3694 sub $16, %ebx
3695 jbe L(StrncpyExit13)
3696 movaps %xmm5, 48(%edx)
3697 lea 16(%esi), %esi
3698 lea -16(%ebx), %ebx
3699 L(StrncpyExit13):
3700 lea 3(%edx, %esi), %edx
3701 lea 3(%ecx, %esi), %ecx
3702
3703 movl -4(%ecx), %eax
3704 movl %eax, -4(%edx)
3705 xor %esi, %esi
3706 jmp L(CopyFrom1To16BytesCase3)
3707
3708 L(StrncpyLeave14):
3709 movaps %xmm2, %xmm3
3710 add $48, %ebx
3711 jle L(StrncpyExit14)
3712 palignr $14, %xmm1, %xmm2
3713 movaps %xmm2, (%edx)
3714 movaps 18(%ecx), %xmm2
3715 lea 16(%esi), %esi
3716 sub $16, %ebx
3717 jbe L(StrncpyExit14)
3718 palignr $14, %xmm3, %xmm2
3719 movaps %xmm2, 16(%edx)
3720 lea 16(%esi), %esi
3721 sub $16, %ebx
3722 jbe L(StrncpyExit14)
3723 movaps %xmm4, 32(%edx)
3724 lea 16(%esi), %esi
3725 sub $16, %ebx
3726 jbe L(StrncpyExit14)
3727 movaps %xmm5, 48(%edx)
3728 lea 16(%esi), %esi
3729 lea -16(%ebx), %ebx
3730 L(StrncpyExit14):
3731 lea 2(%edx, %esi), %edx
3732 lea 2(%ecx, %esi), %ecx
3733 movw -2(%ecx), %ax
3734 movw %ax, -2(%edx)
3735 xor %esi, %esi
3736 jmp L(CopyFrom1To16BytesCase3)
3737
3738 L(StrncpyLeave15):
3739 movaps %xmm2, %xmm3
3740 add $48, %ebx
3741 jle L(StrncpyExit15)
3742 palignr $15, %xmm1, %xmm2
3743 movaps %xmm2, (%edx)
3744 movaps 17(%ecx), %xmm2
3745 lea 16(%esi), %esi
3746 sub $16, %ebx
3747 jbe L(StrncpyExit15)
3748 palignr $15, %xmm3, %xmm2
3749 movaps %xmm2, 16(%edx)
3750 lea 16(%esi), %esi
3751 sub $16, %ebx
3752 jbe L(StrncpyExit15)
3753 movaps %xmm4, 32(%edx)
3754 lea 16(%esi), %esi
3755 sub $16, %ebx
3756 jbe L(StrncpyExit15)
3757 movaps %xmm5, 48(%edx)
3758 lea 16(%esi), %esi
3759 lea -16(%ebx), %ebx
3760 L(StrncpyExit15):
3761 lea 1(%edx, %esi), %edx
3762 lea 1(%ecx, %esi), %ecx
3763 movb -1(%ecx), %ah
3764 movb %ah, -1(%edx)
3765 xor %esi, %esi
3766 jmp L(CopyFrom1To16BytesCase3)
3767 # endif
3768
3769 # ifndef USE_AS_STRCAT
3770 # ifdef USE_AS_STRNCPY
3771 CFI_POP (%esi)
3772 CFI_POP (%edi)
3773
3774 .p2align 4
3775 L(ExitTail0):
3776 movl %edx, %eax
3777 RETURN
3778
3779 .p2align 4
3780 L(StrncpyExit15Bytes):
3781 cmp $12, %ebx
3782 jbe L(StrncpyExit12Bytes)
3783 cmpb $0, 8(%ecx)
3784 jz L(ExitTail9)
3785 cmpb $0, 9(%ecx)
3786 jz L(ExitTail10)
3787 cmpb $0, 10(%ecx)
3788 jz L(ExitTail11)
3789 cmpb $0, 11(%ecx)
3790 jz L(ExitTail12)
3791 cmp $13, %ebx
3792 je L(ExitTail13)
3793 cmpb $0, 12(%ecx)
3794 jz L(ExitTail13)
3795 cmp $14, %ebx
3796 je L(ExitTail14)
3797 cmpb $0, 13(%ecx)
3798 jz L(ExitTail14)
3799 movlpd (%ecx), %xmm0
3800 movlpd 7(%ecx), %xmm1
3801 movlpd %xmm0, (%edx)
3802 movlpd %xmm1, 7(%edx)
3803 # ifdef USE_AS_STPCPY
3804 lea 14(%edx), %eax
3805 cmpb $1, (%eax)
3806 sbb $-1, %eax
3807 # else
3808 movl %edx, %eax
3809 # endif
3810 RETURN
3811
3812 .p2align 4
3813 L(StrncpyExit12Bytes):
3814 cmp $9, %ebx
3815 je L(ExitTail9)
3816 cmpb $0, 8(%ecx)
3817 jz L(ExitTail9)
3818 cmp $10, %ebx
3819 je L(ExitTail10)
3820 cmpb $0, 9(%ecx)
3821 jz L(ExitTail10)
3822 cmp $11, %ebx
3823 je L(ExitTail11)
3824 cmpb $0, 10(%ecx)
3825 jz L(ExitTail11)
3826 movlpd (%ecx), %xmm0
3827 movl 8(%ecx), %eax
3828 movlpd %xmm0, (%edx)
3829 movl %eax, 8(%edx)
3830 SAVE_RESULT_TAIL (11)
3831 # ifdef USE_AS_STPCPY
3832 cmpb $1, (%eax)
3833 sbb $-1, %eax
3834 # endif
3835 RETURN
3836
3837 .p2align 4
3838 L(StrncpyExit8Bytes):
3839 cmp $4, %ebx
3840 jbe L(StrncpyExit4Bytes)
3841 cmpb $0, (%ecx)
3842 jz L(ExitTail1)
3843 cmpb $0, 1(%ecx)
3844 jz L(ExitTail2)
3845 cmpb $0, 2(%ecx)
3846 jz L(ExitTail3)
3847 cmpb $0, 3(%ecx)
3848 jz L(ExitTail4)
3849
3850 cmp $5, %ebx
3851 je L(ExitTail5)
3852 cmpb $0, 4(%ecx)
3853 jz L(ExitTail5)
3854 cmp $6, %ebx
3855 je L(ExitTail6)
3856 cmpb $0, 5(%ecx)
3857 jz L(ExitTail6)
3858 cmp $7, %ebx
3859 je L(ExitTail7)
3860 cmpb $0, 6(%ecx)
3861 jz L(ExitTail7)
3862 movlpd (%ecx), %xmm0
3863 movlpd %xmm0, (%edx)
3864 # ifdef USE_AS_STPCPY
3865 lea 7(%edx), %eax
3866 cmpb $1, (%eax)
3867 sbb $-1, %eax
3868 # else
3869 movl %edx, %eax
3870 # endif
3871 RETURN
3872
3873 .p2align 4
3874 L(StrncpyExit4Bytes):
3875 test %ebx, %ebx
3876 jz L(ExitTail0)
3877 cmp $1, %ebx
3878 je L(ExitTail1)
3879 cmpb $0, (%ecx)
3880 jz L(ExitTail1)
3881 cmp $2, %ebx
3882 je L(ExitTail2)
3883 cmpb $0, 1(%ecx)
3884 jz L(ExitTail2)
3885 cmp $3, %ebx
3886 je L(ExitTail3)
3887 cmpb $0, 2(%ecx)
3888 jz L(ExitTail3)
3889 movl (%ecx), %eax
3890 movl %eax, (%edx)
3891 SAVE_RESULT_TAIL (3)
3892 # ifdef USE_AS_STPCPY
3893 cmpb $1, (%eax)
3894 sbb $-1, %eax
3895 # endif
3896 RETURN
3897 # endif
3898
3899 END (STRCPY)
3900 # endif
3901 #endif