]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/multiarch/strcat-sse2.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strcat-sse2.S
CommitLineData
5fa16e9b 1/* strcat with SSE2
b168057a 2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
5fa16e9b
LD
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
5fa16e9b
LD
19
20
4f41c682 21#if IS_IN (libc)
5fa16e9b
LD
22
23# include <sysdep.h>
24
25
26# define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
29
30# define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
32 cfi_restore (REG)
33
34# define PUSH(REG) pushl REG; CFI_PUSH (REG)
35# define POP(REG) popl REG; CFI_POP (REG)
36
37# ifdef SHARED
38# define JMPTBL(I, B) I - B
39
40/* Load an entry in a jump table into ECX and branch to it. TABLE is a
41 jump table with relative offsets. INDEX is a register contains the
42 index into the jump table. SCALE is the scale of INDEX. */
43
44# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
45 /* We first load PC into ECX. */ \
9a1d9254 46 SETUP_PIC_REG(cx); \
5fa16e9b
LD
47 /* Get the address of the jump table. */ \
48 addl $(TABLE - .), %ecx; \
49 /* Get the entry and convert the relative offset to the \
50 absolute address. */ \
51 addl (%ecx,INDEX,SCALE), %ecx; \
c0c3f78a 52 /* We loaded the jump table and adjusted ECX. Go. */ \
5fa16e9b
LD
53 jmp *%ecx
54# else
55# define JMPTBL(I, B) I
56
57/* Branch to an entry in a jump table. TABLE is a jump table with
58 absolute offsets. INDEX is a register contains the index into the
59 jump table. SCALE is the scale of INDEX. */
60
61# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
62 jmp *TABLE(,INDEX,SCALE)
63# endif
64
65# ifndef STRCAT
66# define STRCAT __strcat_sse2
67# endif
68
69# define PARMS 4
70# define STR1 PARMS+4
71# define STR2 STR1+4
72
73# ifdef USE_AS_STRNCAT
74# define LEN STR2+8
75# define STR3 STR1+4
76# else
77# define STR3 STR1
78# endif
79
80# define USE_AS_STRCAT
81# ifdef USE_AS_STRNCAT
82# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
83# else
84# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
85# endif
86
87.text
88ENTRY (STRCAT)
89 PUSH (%esi)
90 mov STR1(%esp), %eax
91 mov STR2(%esp), %esi
92# ifdef USE_AS_STRNCAT
93 PUSH (%ebx)
94 movl LEN(%esp), %ebx
95 test %ebx, %ebx
96 jz L(ExitZero)
97# endif
98 cmpb $0, (%esi)
99 mov %esi, %ecx
100 mov %eax, %edx
101 jz L(ExitZero)
102
103 and $63, %ecx
104 and $63, %edx
105 cmp $32, %ecx
106 ja L(StrlenCore7_1)
107 cmp $48, %edx
108 ja L(alignment_prolog)
109
110 pxor %xmm0, %xmm0
111 pxor %xmm4, %xmm4
112 pxor %xmm7, %xmm7
113 movdqu (%eax), %xmm1
114 movdqu (%esi), %xmm5
115 pcmpeqb %xmm1, %xmm0
116 movdqu 16(%esi), %xmm6
117 pmovmskb %xmm0, %ecx
118 pcmpeqb %xmm5, %xmm4
119 pcmpeqb %xmm6, %xmm7
120 test %ecx, %ecx
121 jnz L(exit_less16_)
122 mov %eax, %ecx
123 and $-16, %eax
124 jmp L(loop_prolog)
125
126L(alignment_prolog):
127 pxor %xmm0, %xmm0
128 pxor %xmm4, %xmm4
129 mov %edx, %ecx
130 pxor %xmm7, %xmm7
131 and $15, %ecx
132 and $-16, %eax
133 pcmpeqb (%eax), %xmm0
134 movdqu (%esi), %xmm5
135 movdqu 16(%esi), %xmm6
136 pmovmskb %xmm0, %edx
137 pcmpeqb %xmm5, %xmm4
138 shr %cl, %edx
139 pcmpeqb %xmm6, %xmm7
140 test %edx, %edx
141 jnz L(exit_less16)
142 add %eax, %ecx
143
144 pxor %xmm0, %xmm0
145L(loop_prolog):
146 pxor %xmm1, %xmm1
147 pxor %xmm2, %xmm2
148 pxor %xmm3, %xmm3
149 .p2align 4
150L(align16_loop):
151 pcmpeqb 16(%eax), %xmm0
152 pmovmskb %xmm0, %edx
153 test %edx, %edx
154 jnz L(exit16)
155
156 pcmpeqb 32(%eax), %xmm1
157 pmovmskb %xmm1, %edx
158 test %edx, %edx
159 jnz L(exit32)
160
161 pcmpeqb 48(%eax), %xmm2
162 pmovmskb %xmm2, %edx
163 test %edx, %edx
164 jnz L(exit48)
165
166 pcmpeqb 64(%eax), %xmm3
167 pmovmskb %xmm3, %edx
168 lea 64(%eax), %eax
169 test %edx, %edx
170 jz L(align16_loop)
171 bsf %edx, %edx
172 add %edx, %eax
173 jmp L(StartStrcpyPart)
174
175 .p2align 4
176L(exit16):
177 bsf %edx, %edx
178 lea 16(%eax, %edx), %eax
179 jmp L(StartStrcpyPart)
180
181 .p2align 4
182L(exit32):
183 bsf %edx, %edx
184 lea 32(%eax, %edx), %eax
185 jmp L(StartStrcpyPart)
186
187 .p2align 4
188L(exit48):
189 bsf %edx, %edx
190 lea 48(%eax, %edx), %eax
191 jmp L(StartStrcpyPart)
192
193 .p2align 4
194L(exit_less16):
195 bsf %edx, %edx
196 add %ecx, %eax
197 add %edx, %eax
198 jmp L(StartStrcpyPart)
199
200 .p2align 4
201L(exit_less16_):
202 bsf %ecx, %ecx
203 add %ecx, %eax
204
205 .p2align 4
206L(StartStrcpyPart):
207 pmovmskb %xmm4, %edx
208# ifdef USE_AS_STRNCAT
209 cmp $16, %ebx
210 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
211# endif
212 test %edx, %edx
213 jnz L(CopyFrom1To16BytesTail1)
214
215 movdqu %xmm5, (%eax)
216 pmovmskb %xmm7, %edx
217# ifdef USE_AS_STRNCAT
218 cmp $32, %ebx
219 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
220# endif
221 test %edx, %edx
222 jnz L(CopyFrom1To32Bytes1)
223
224 mov %esi, %ecx
225 and $-16, %esi
226 and $15, %ecx
227 pxor %xmm0, %xmm0
228# ifdef USE_AS_STRNCAT
229 add %ecx, %ebx
230# endif
231 sub %ecx, %eax
232 jmp L(Unalign16Both)
233
234L(StrlenCore7_1):
235 mov %eax, %ecx
236 pxor %xmm0, %xmm0
237 and $15, %ecx
238 and $-16, %eax
239 pcmpeqb (%eax), %xmm0
240 pmovmskb %xmm0, %edx
241 shr %cl, %edx
242 test %edx, %edx
243 jnz L(exit_less16_1)
244 add %eax, %ecx
245
246 pxor %xmm0, %xmm0
247 pxor %xmm1, %xmm1
248 pxor %xmm2, %xmm2
249 pxor %xmm3, %xmm3
250
251 .p2align 4
252L(align16_loop_1):
253 pcmpeqb 16(%eax), %xmm0
254 pmovmskb %xmm0, %edx
255 test %edx, %edx
256 jnz L(exit16_1)
257
258 pcmpeqb 32(%eax), %xmm1
259 pmovmskb %xmm1, %edx
260 test %edx, %edx
261 jnz L(exit32_1)
262
263 pcmpeqb 48(%eax), %xmm2
264 pmovmskb %xmm2, %edx
265 test %edx, %edx
266 jnz L(exit48_1)
267
268 pcmpeqb 64(%eax), %xmm3
269 pmovmskb %xmm3, %edx
270 lea 64(%eax), %eax
271 test %edx, %edx
272 jz L(align16_loop_1)
273 bsf %edx, %edx
274 add %edx, %eax
275 jmp L(StartStrcpyPart_1)
276
277 .p2align 4
278L(exit16_1):
279 bsf %edx, %edx
280 lea 16(%eax, %edx), %eax
281 jmp L(StartStrcpyPart_1)
282
283 .p2align 4
284L(exit32_1):
285 bsf %edx, %edx
286 lea 32(%eax, %edx), %eax
287 jmp L(StartStrcpyPart_1)
288
289 .p2align 4
290L(exit48_1):
291 bsf %edx, %edx
292 lea 48(%eax, %edx), %eax
293 jmp L(StartStrcpyPart_1)
294
295 .p2align 4
296L(exit_less16_1):
297 bsf %edx, %edx
298 add %ecx, %eax
299 add %edx, %eax
300
301 .p2align 4
302L(StartStrcpyPart_1):
303 mov %esi, %ecx
304 and $15, %ecx
305 and $-16, %esi
306 pxor %xmm0, %xmm0
307 pxor %xmm1, %xmm1
308
309# ifdef USE_AS_STRNCAT
310 cmp $48, %ebx
311 ja L(BigN)
312# endif
313 pcmpeqb (%esi), %xmm1
314# ifdef USE_AS_STRNCAT
315 add %ecx, %ebx
316# endif
317 pmovmskb %xmm1, %edx
318 shr %cl, %edx
319# ifdef USE_AS_STRNCAT
320 cmp $16, %ebx
321 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
322# endif
323 test %edx, %edx
324 jnz L(CopyFrom1To16BytesTail)
325
326 pcmpeqb 16(%esi), %xmm0
327 pmovmskb %xmm0, %edx
328# ifdef USE_AS_STRNCAT
329 cmp $32, %ebx
330 jbe L(CopyFrom1To32BytesCase2OrCase3)
331# endif
332 test %edx, %edx
333 jnz L(CopyFrom1To32Bytes)
334
335 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
336 movdqu %xmm1, (%eax)
337 sub %ecx, %eax
338
339 .p2align 4
340L(Unalign16Both):
341 mov $16, %ecx
342 movdqa (%esi, %ecx), %xmm1
343 movaps 16(%esi, %ecx), %xmm2
344 movdqu %xmm1, (%eax, %ecx)
345 pcmpeqb %xmm2, %xmm0
346 pmovmskb %xmm0, %edx
347 add $16, %ecx
348# ifdef USE_AS_STRNCAT
349 sub $48, %ebx
350 jbe L(CopyFrom1To16BytesCase2OrCase3)
351# endif
352 test %edx, %edx
353 jnz L(CopyFrom1To16Bytes)
354L(Unalign16BothBigN):
355 movaps 16(%esi, %ecx), %xmm3
356 movdqu %xmm2, (%eax, %ecx)
357 pcmpeqb %xmm3, %xmm0
358 pmovmskb %xmm0, %edx
359 add $16, %ecx
360# ifdef USE_AS_STRNCAT
361 sub $16, %ebx
362 jbe L(CopyFrom1To16BytesCase2OrCase3)
363# endif
364 test %edx, %edx
365 jnz L(CopyFrom1To16Bytes)
366
367 movaps 16(%esi, %ecx), %xmm4
368 movdqu %xmm3, (%eax, %ecx)
369 pcmpeqb %xmm4, %xmm0
370 pmovmskb %xmm0, %edx
371 add $16, %ecx
372# ifdef USE_AS_STRNCAT
373 sub $16, %ebx
374 jbe L(CopyFrom1To16BytesCase2OrCase3)
375# endif
376 test %edx, %edx
377 jnz L(CopyFrom1To16Bytes)
378
379 movaps 16(%esi, %ecx), %xmm1
380 movdqu %xmm4, (%eax, %ecx)
381 pcmpeqb %xmm1, %xmm0
382 pmovmskb %xmm0, %edx
383 add $16, %ecx
384# ifdef USE_AS_STRNCAT
385 sub $16, %ebx
386 jbe L(CopyFrom1To16BytesCase2OrCase3)
387# endif
388 test %edx, %edx
389 jnz L(CopyFrom1To16Bytes)
390
391 movaps 16(%esi, %ecx), %xmm2
392 movdqu %xmm1, (%eax, %ecx)
393 pcmpeqb %xmm2, %xmm0
394 pmovmskb %xmm0, %edx
395 add $16, %ecx
396# ifdef USE_AS_STRNCAT
397 sub $16, %ebx
398 jbe L(CopyFrom1To16BytesCase2OrCase3)
399# endif
400 test %edx, %edx
401 jnz L(CopyFrom1To16Bytes)
402
403 movaps 16(%esi, %ecx), %xmm3
404 movdqu %xmm2, (%eax, %ecx)
405 pcmpeqb %xmm3, %xmm0
406 pmovmskb %xmm0, %edx
407 add $16, %ecx
408# ifdef USE_AS_STRNCAT
409 sub $16, %ebx
410 jbe L(CopyFrom1To16BytesCase2OrCase3)
411# endif
412 test %edx, %edx
413 jnz L(CopyFrom1To16Bytes)
414
415 movdqu %xmm3, (%eax, %ecx)
416 mov %esi, %edx
417 lea 16(%esi, %ecx), %esi
418 and $-0x40, %esi
419 sub %esi, %edx
420 sub %edx, %eax
421# ifdef USE_AS_STRNCAT
422 lea 128(%ebx, %edx), %ebx
423# endif
424 movaps (%esi), %xmm2
425 movaps %xmm2, %xmm4
426 movaps 16(%esi), %xmm5
427 movaps 32(%esi), %xmm3
428 movaps %xmm3, %xmm6
429 movaps 48(%esi), %xmm7
430 pminub %xmm5, %xmm2
431 pminub %xmm7, %xmm3
432 pminub %xmm2, %xmm3
433 pcmpeqb %xmm0, %xmm3
434 pmovmskb %xmm3, %edx
435# ifdef USE_AS_STRNCAT
436 sub $64, %ebx
437 jbe L(UnalignedLeaveCase2OrCase3)
438# endif
439 test %edx, %edx
440 jnz L(Unaligned64Leave)
441
442 .p2align 4
443L(Unaligned64Loop_start):
444 add $64, %eax
445 add $64, %esi
446 movdqu %xmm4, -64(%eax)
447 movaps (%esi), %xmm2
448 movdqa %xmm2, %xmm4
449 movdqu %xmm5, -48(%eax)
450 movaps 16(%esi), %xmm5
451 pminub %xmm5, %xmm2
452 movaps 32(%esi), %xmm3
453 movdqu %xmm6, -32(%eax)
454 movaps %xmm3, %xmm6
455 movdqu %xmm7, -16(%eax)
456 movaps 48(%esi), %xmm7
457 pminub %xmm7, %xmm3
458 pminub %xmm2, %xmm3
459 pcmpeqb %xmm0, %xmm3
460 pmovmskb %xmm3, %edx
461# ifdef USE_AS_STRNCAT
462 sub $64, %ebx
463 jbe L(UnalignedLeaveCase2OrCase3)
464# endif
465 test %edx, %edx
466 jz L(Unaligned64Loop_start)
467
468L(Unaligned64Leave):
469 pxor %xmm1, %xmm1
470
471 pcmpeqb %xmm4, %xmm0
472 pcmpeqb %xmm5, %xmm1
473 pmovmskb %xmm0, %edx
474 pmovmskb %xmm1, %ecx
475 test %edx, %edx
476 jnz L(CopyFrom1To16BytesUnaligned_0)
477 test %ecx, %ecx
478 jnz L(CopyFrom1To16BytesUnaligned_16)
479
480 pcmpeqb %xmm6, %xmm0
481 pcmpeqb %xmm7, %xmm1
482 pmovmskb %xmm0, %edx
483 pmovmskb %xmm1, %ecx
484 test %edx, %edx
485 jnz L(CopyFrom1To16BytesUnaligned_32)
486
487 bsf %ecx, %edx
488 movdqu %xmm4, (%eax)
489 movdqu %xmm5, 16(%eax)
490 movdqu %xmm6, 32(%eax)
491 add $48, %esi
492 add $48, %eax
493 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
494
495# ifdef USE_AS_STRNCAT
496 .p2align 4
497L(BigN):
498 pcmpeqb (%esi), %xmm1
499 pmovmskb %xmm1, %edx
500 shr %cl, %edx
501 test %edx, %edx
502 jnz L(CopyFrom1To16BytesTail)
503
504 pcmpeqb 16(%esi), %xmm0
505 pmovmskb %xmm0, %edx
506 test %edx, %edx
507 jnz L(CopyFrom1To32Bytes)
508
509 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
510 movdqu %xmm1, (%eax)
511 sub %ecx, %eax
512 sub $48, %ebx
513 add %ecx, %ebx
514
515 mov $16, %ecx
516 movdqa (%esi, %ecx), %xmm1
517 movaps 16(%esi, %ecx), %xmm2
518 movdqu %xmm1, (%eax, %ecx)
519 pcmpeqb %xmm2, %xmm0
520 pmovmskb %xmm0, %edx
521 add $16, %ecx
522 test %edx, %edx
523 jnz L(CopyFrom1To16Bytes)
524 jmp L(Unalign16BothBigN)
525# endif
526
527/*------------end of main part-------------------------------*/
528
529/* Case1 */
530 .p2align 4
531L(CopyFrom1To16Bytes):
532 add %ecx, %eax
533 add %ecx, %esi
534 bsf %edx, %edx
535 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
536
537 .p2align 4
538L(CopyFrom1To16BytesTail):
539 add %ecx, %esi
540 bsf %edx, %edx
541 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
542
543 .p2align 4
544L(CopyFrom1To32Bytes1):
545 add $16, %esi
546 add $16, %eax
547L(CopyFrom1To16BytesTail1):
548 bsf %edx, %edx
549 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
550
551 .p2align 4
552L(CopyFrom1To32Bytes):
553 bsf %edx, %edx
554 add %ecx, %esi
555 add $16, %edx
556 sub %ecx, %edx
557 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
558
559 .p2align 4
560L(CopyFrom1To16BytesUnaligned_0):
561 bsf %edx, %edx
562 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
563
564 .p2align 4
565L(CopyFrom1To16BytesUnaligned_16):
566 bsf %ecx, %edx
567 movdqu %xmm4, (%eax)
568 add $16, %esi
569 add $16, %eax
570 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
571
572 .p2align 4
573L(CopyFrom1To16BytesUnaligned_32):
574 bsf %edx, %edx
575 movdqu %xmm4, (%eax)
576 movdqu %xmm5, 16(%eax)
577 add $32, %esi
578 add $32, %eax
579 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
580
581# ifdef USE_AS_STRNCAT
582
583 .p2align 4
584L(CopyFrom1To16BytesExit):
585 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
586
587/* Case2 */
588
589 .p2align 4
590L(CopyFrom1To16BytesCase2):
591 add $16, %ebx
592 add %ecx, %eax
593 add %ecx, %esi
594 bsf %edx, %edx
595 cmp %ebx, %edx
596 jb L(CopyFrom1To16BytesExit)
597 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
598
599 .p2align 4
600L(CopyFrom1To32BytesCase2):
601 sub %ecx, %ebx
602 add %ecx, %esi
603 bsf %edx, %edx
604 add $16, %edx
605 sub %ecx, %edx
606 cmp %ebx, %edx
607 jb L(CopyFrom1To16BytesExit)
608 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
609
610L(CopyFrom1To16BytesTailCase2):
611 sub %ecx, %ebx
612 add %ecx, %esi
613 bsf %edx, %edx
614 cmp %ebx, %edx
615 jb L(CopyFrom1To16BytesExit)
616 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
617
618L(CopyFrom1To16BytesTail1Case2):
619 bsf %edx, %edx
620 cmp %ebx, %edx
621 jb L(CopyFrom1To16BytesExit)
622 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
623
624/* Case2 or Case3, Case3 */
625
626 .p2align 4
627L(CopyFrom1To16BytesCase2OrCase3):
628 test %edx, %edx
629 jnz L(CopyFrom1To16BytesCase2)
630L(CopyFrom1To16BytesCase3):
631 add $16, %ebx
632 add %ecx, %eax
633 add %ecx, %esi
634 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
635
636 .p2align 4
637L(CopyFrom1To32BytesCase2OrCase3):
638 test %edx, %edx
639 jnz L(CopyFrom1To32BytesCase2)
640 sub %ecx, %ebx
641 add %ecx, %esi
642 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
643
644 .p2align 4
645L(CopyFrom1To16BytesTailCase2OrCase3):
646 test %edx, %edx
647 jnz L(CopyFrom1To16BytesTailCase2)
648 sub %ecx, %ebx
649 add %ecx, %esi
650 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
651
652 .p2align 4
653L(CopyFrom1To32Bytes1Case2OrCase3):
654 add $16, %eax
655 add $16, %esi
656 sub $16, %ebx
657L(CopyFrom1To16BytesTail1Case2OrCase3):
658 test %edx, %edx
659 jnz L(CopyFrom1To16BytesTail1Case2)
660 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
661
662# endif
663
664# ifdef USE_AS_STRNCAT
665 .p2align 4
666L(StrncatExit0):
667 movb %bh, (%eax)
668 mov STR3(%esp), %eax
669 RETURN
670# endif
671
672 .p2align 4
673# ifdef USE_AS_STRNCAT
674L(StrncatExit1):
675 movb %bh, 1(%eax)
676# endif
677L(Exit1):
678# ifdef USE_AS_STRNCAT
679 movb (%esi), %dh
680# endif
681 movb %dh, (%eax)
682 mov STR3(%esp), %eax
683 RETURN
684
685 .p2align 4
686# ifdef USE_AS_STRNCAT
687L(StrncatExit2):
688 movb %bh, 2(%eax)
689# endif
690L(Exit2):
691 movw (%esi), %dx
692 movw %dx, (%eax)
693 mov STR3(%esp), %eax
694 RETURN
695
696 .p2align 4
697# ifdef USE_AS_STRNCAT
698L(StrncatExit3):
699 movb %bh, 3(%eax)
700# endif
701L(Exit3):
702 movw (%esi), %cx
703 movw %cx, (%eax)
704# ifdef USE_AS_STRNCAT
705 movb 2(%esi), %dh
706# endif
707 movb %dh, 2(%eax)
708 mov STR3(%esp), %eax
709 RETURN
710
711 .p2align 4
712# ifdef USE_AS_STRNCAT
713L(StrncatExit4):
714 movb %bh, 4(%eax)
715# endif
716L(Exit4):
717 movl (%esi), %edx
718 movl %edx, (%eax)
719 mov STR3(%esp), %eax
720 RETURN
721
722 .p2align 4
723# ifdef USE_AS_STRNCAT
724L(StrncatExit5):
725 movb %bh, 5(%eax)
726# endif
727L(Exit5):
728 movl (%esi), %ecx
729# ifdef USE_AS_STRNCAT
730 movb 4(%esi), %dh
731# endif
732 movb %dh, 4(%eax)
733 movl %ecx, (%eax)
734 mov STR3(%esp), %eax
735 RETURN
736
737 .p2align 4
738# ifdef USE_AS_STRNCAT
739L(StrncatExit6):
740 movb %bh, 6(%eax)
741# endif
742L(Exit6):
743 movl (%esi), %ecx
744 movw 4(%esi), %dx
745 movl %ecx, (%eax)
746 movw %dx, 4(%eax)
747 mov STR3(%esp), %eax
748 RETURN
749
750 .p2align 4
751# ifdef USE_AS_STRNCAT
752L(StrncatExit7):
753 movb %bh, 7(%eax)
754# endif
755L(Exit7):
756 movl (%esi), %ecx
757 movl 3(%esi), %edx
758 movl %ecx, (%eax)
759 movl %edx, 3(%eax)
760 mov STR3(%esp), %eax
761 RETURN
762
763 .p2align 4
764# ifdef USE_AS_STRNCAT
765L(StrncatExit8):
766 movb %bh, 8(%eax)
767# endif
768L(Exit8):
769 movlpd (%esi), %xmm0
770 movlpd %xmm0, (%eax)
771 mov STR3(%esp), %eax
772 RETURN
773
774 .p2align 4
775# ifdef USE_AS_STRNCAT
776L(StrncatExit9):
777 movb %bh, 9(%eax)
778# endif
779L(Exit9):
780 movlpd (%esi), %xmm0
781# ifdef USE_AS_STRNCAT
782 movb 8(%esi), %dh
783# endif
784 movb %dh, 8(%eax)
785 movlpd %xmm0, (%eax)
786 mov STR3(%esp), %eax
787 RETURN
788
789 .p2align 4
790# ifdef USE_AS_STRNCAT
791L(StrncatExit10):
792 movb %bh, 10(%eax)
793# endif
794L(Exit10):
795 movlpd (%esi), %xmm0
796 movw 8(%esi), %dx
797 movlpd %xmm0, (%eax)
798 movw %dx, 8(%eax)
799 mov STR3(%esp), %eax
800 RETURN
801
802 .p2align 4
803# ifdef USE_AS_STRNCAT
804L(StrncatExit11):
805 movb %bh, 11(%eax)
806# endif
807L(Exit11):
808 movlpd (%esi), %xmm0
809 movl 7(%esi), %edx
810 movlpd %xmm0, (%eax)
811 movl %edx, 7(%eax)
812 mov STR3(%esp), %eax
813 RETURN
814
815 .p2align 4
816# ifdef USE_AS_STRNCAT
817L(StrncatExit12):
818 movb %bh, 12(%eax)
819# endif
820L(Exit12):
821 movlpd (%esi), %xmm0
822 movl 8(%esi), %edx
823 movlpd %xmm0, (%eax)
824 movl %edx, 8(%eax)
825 mov STR3(%esp), %eax
826 RETURN
827
828 .p2align 4
829# ifdef USE_AS_STRNCAT
830L(StrncatExit13):
831 movb %bh, 13(%eax)
832# endif
833L(Exit13):
834 movlpd (%esi), %xmm0
835 movlpd 5(%esi), %xmm1
836 movlpd %xmm0, (%eax)
837 movlpd %xmm1, 5(%eax)
838 mov STR3(%esp), %eax
839 RETURN
840
841 .p2align 4
842# ifdef USE_AS_STRNCAT
843L(StrncatExit14):
844 movb %bh, 14(%eax)
845# endif
846L(Exit14):
847 movlpd (%esi), %xmm0
848 movlpd 6(%esi), %xmm1
849 movlpd %xmm0, (%eax)
850 movlpd %xmm1, 6(%eax)
851 mov STR3(%esp), %eax
852 RETURN
853
854 .p2align 4
855# ifdef USE_AS_STRNCAT
856L(StrncatExit15):
857 movb %bh, 15(%eax)
858# endif
859L(Exit15):
860 movlpd (%esi), %xmm0
861 movlpd 7(%esi), %xmm1
862 movlpd %xmm0, (%eax)
863 movlpd %xmm1, 7(%eax)
864 mov STR3(%esp), %eax
865 RETURN
866
867 .p2align 4
868# ifdef USE_AS_STRNCAT
869L(StrncatExit16):
870 movb %bh, 16(%eax)
871# endif
872L(Exit16):
873 movdqu (%esi), %xmm0
874 movdqu %xmm0, (%eax)
875 mov STR3(%esp), %eax
876 RETURN
877
878 .p2align 4
879# ifdef USE_AS_STRNCAT
880L(StrncatExit17):
881 movb %bh, 17(%eax)
882# endif
883L(Exit17):
884 movdqu (%esi), %xmm0
885# ifdef USE_AS_STRNCAT
886 movb 16(%esi), %dh
887# endif
888 movdqu %xmm0, (%eax)
889 movb %dh, 16(%eax)
890 mov STR3(%esp), %eax
891 RETURN
892
893 .p2align 4
894# ifdef USE_AS_STRNCAT
895L(StrncatExit18):
896 movb %bh, 18(%eax)
897# endif
898L(Exit18):
899 movdqu (%esi), %xmm0
900 movw 16(%esi), %cx
901 movdqu %xmm0, (%eax)
902 movw %cx, 16(%eax)
903 mov STR3(%esp), %eax
904 RETURN
905
906 .p2align 4
907# ifdef USE_AS_STRNCAT
908L(StrncatExit19):
909 movb %bh, 19(%eax)
910# endif
911L(Exit19):
912 movdqu (%esi), %xmm0
913 movl 15(%esi), %ecx
914 movdqu %xmm0, (%eax)
915 movl %ecx, 15(%eax)
916 mov STR3(%esp), %eax
917 RETURN
918
919 .p2align 4
920# ifdef USE_AS_STRNCAT
921L(StrncatExit20):
922 movb %bh, 20(%eax)
923# endif
924L(Exit20):
925 movdqu (%esi), %xmm0
926 movl 16(%esi), %ecx
927 movdqu %xmm0, (%eax)
928 movl %ecx, 16(%eax)
929 mov STR3(%esp), %eax
930 RETURN
931
932 .p2align 4
933# ifdef USE_AS_STRNCAT
934L(StrncatExit21):
935 movb %bh, 21(%eax)
936# endif
937L(Exit21):
938 movdqu (%esi), %xmm0
939 movl 16(%esi), %ecx
940# ifdef USE_AS_STRNCAT
941 movb 20(%esi), %dh
942# endif
943 movdqu %xmm0, (%eax)
944 movl %ecx, 16(%eax)
945 movb %dh, 20(%eax)
946 mov STR3(%esp), %eax
947 RETURN
948
949 .p2align 4
950# ifdef USE_AS_STRNCAT
951L(StrncatExit22):
952 movb %bh, 22(%eax)
953# endif
954L(Exit22):
955 movdqu (%esi), %xmm0
956 movlpd 14(%esi), %xmm3
957 movdqu %xmm0, (%eax)
958 movlpd %xmm3, 14(%eax)
959 mov STR3(%esp), %eax
960 RETURN
961
962 .p2align 4
963# ifdef USE_AS_STRNCAT
964L(StrncatExit23):
965 movb %bh, 23(%eax)
966# endif
967L(Exit23):
968 movdqu (%esi), %xmm0
969 movlpd 15(%esi), %xmm3
970 movdqu %xmm0, (%eax)
971 movlpd %xmm3, 15(%eax)
972 mov STR3(%esp), %eax
973 RETURN
974
975 .p2align 4
976# ifdef USE_AS_STRNCAT
977L(StrncatExit24):
978 movb %bh, 24(%eax)
979# endif
980L(Exit24):
981 movdqu (%esi), %xmm0
982 movlpd 16(%esi), %xmm2
983 movdqu %xmm0, (%eax)
984 movlpd %xmm2, 16(%eax)
985 mov STR3(%esp), %eax
986 RETURN
987
988 .p2align 4
989# ifdef USE_AS_STRNCAT
990L(StrncatExit25):
991 movb %bh, 25(%eax)
992# endif
993L(Exit25):
994 movdqu (%esi), %xmm0
995 movlpd 16(%esi), %xmm2
996# ifdef USE_AS_STRNCAT
997 movb 24(%esi), %dh
998# endif
999 movdqu %xmm0, (%eax)
1000 movlpd %xmm2, 16(%eax)
1001 movb %dh, 24(%eax)
1002 mov STR3(%esp), %eax
1003 RETURN
1004
1005 .p2align 4
1006# ifdef USE_AS_STRNCAT
1007L(StrncatExit26):
1008 movb %bh, 26(%eax)
1009# endif
1010L(Exit26):
1011 movdqu (%esi), %xmm0
1012 movlpd 16(%esi), %xmm2
1013 movw 24(%esi), %cx
1014 movdqu %xmm0, (%eax)
1015 movlpd %xmm2, 16(%eax)
1016 movw %cx, 24(%eax)
1017 mov STR3(%esp), %eax
1018 RETURN
1019
1020 .p2align 4
1021# ifdef USE_AS_STRNCAT
1022L(StrncatExit27):
1023 movb %bh, 27(%eax)
1024# endif
1025L(Exit27):
1026 movdqu (%esi), %xmm0
1027 movlpd 16(%esi), %xmm2
1028 movl 23(%esi), %ecx
1029 movdqu %xmm0, (%eax)
1030 movlpd %xmm2, 16(%eax)
1031 movl %ecx, 23(%eax)
1032 mov STR3(%esp), %eax
1033 RETURN
1034
1035 .p2align 4
1036# ifdef USE_AS_STRNCAT
1037L(StrncatExit28):
1038 movb %bh, 28(%eax)
1039# endif
1040L(Exit28):
1041 movdqu (%esi), %xmm0
1042 movlpd 16(%esi), %xmm2
1043 movl 24(%esi), %ecx
1044 movdqu %xmm0, (%eax)
1045 movlpd %xmm2, 16(%eax)
1046 movl %ecx, 24(%eax)
1047 mov STR3(%esp), %eax
1048 RETURN
1049
1050 .p2align 4
1051# ifdef USE_AS_STRNCAT
1052L(StrncatExit29):
1053 movb %bh, 29(%eax)
1054# endif
1055L(Exit29):
1056 movdqu (%esi), %xmm0
1057 movdqu 13(%esi), %xmm2
1058 movdqu %xmm0, (%eax)
1059 movdqu %xmm2, 13(%eax)
1060 mov STR3(%esp), %eax
1061 RETURN
1062
1063 .p2align 4
1064# ifdef USE_AS_STRNCAT
1065L(StrncatExit30):
1066 movb %bh, 30(%eax)
1067# endif
1068L(Exit30):
1069 movdqu (%esi), %xmm0
1070 movdqu 14(%esi), %xmm2
1071 movdqu %xmm0, (%eax)
1072 movdqu %xmm2, 14(%eax)
1073 mov STR3(%esp), %eax
1074 RETURN
1075
1076 .p2align 4
1077# ifdef USE_AS_STRNCAT
1078L(StrncatExit31):
1079 movb %bh, 31(%eax)
1080# endif
1081L(Exit31):
1082 movdqu (%esi), %xmm0
1083 movdqu 15(%esi), %xmm2
1084 movdqu %xmm0, (%eax)
1085 movdqu %xmm2, 15(%eax)
1086 mov STR3(%esp), %eax
1087 RETURN
1088
1089 .p2align 4
1090# ifdef USE_AS_STRNCAT
1091L(StrncatExit32):
1092 movb %bh, 32(%eax)
1093# endif
1094L(Exit32):
1095 movdqu (%esi), %xmm0
1096 movdqu 16(%esi), %xmm2
1097 movdqu %xmm0, (%eax)
1098 movdqu %xmm2, 16(%eax)
1099 mov STR3(%esp), %eax
1100 RETURN
1101
1102# ifdef USE_AS_STRNCAT
1103
1104 .p2align 4
1105L(UnalignedLeaveCase2OrCase3):
1106 test %edx, %edx
1107 jnz L(Unaligned64LeaveCase2)
1108L(Unaligned64LeaveCase3):
1109 lea 64(%ebx), %ecx
1110 and $-16, %ecx
1111 add $48, %ebx
1112 jl L(CopyFrom1To16BytesCase3)
1113 movdqu %xmm4, (%eax)
1114 sub $16, %ebx
1115 jb L(CopyFrom1To16BytesCase3)
1116 movdqu %xmm5, 16(%eax)
1117 sub $16, %ebx
1118 jb L(CopyFrom1To16BytesCase3)
1119 movdqu %xmm6, 32(%eax)
1120 sub $16, %ebx
1121 jb L(CopyFrom1To16BytesCase3)
1122 movdqu %xmm7, 48(%eax)
1123 xor %bh, %bh
1124 movb %bh, 64(%eax)
1125 mov STR3(%esp), %eax
1126 RETURN
1127
1128 .p2align 4
1129L(Unaligned64LeaveCase2):
1130 xor %ecx, %ecx
1131 pcmpeqb %xmm4, %xmm0
1132 pmovmskb %xmm0, %edx
1133 add $48, %ebx
1134 jle L(CopyFrom1To16BytesCase2OrCase3)
1135 test %edx, %edx
1136 jnz L(CopyFrom1To16Bytes)
1137
1138 pcmpeqb %xmm5, %xmm0
1139 pmovmskb %xmm0, %edx
1140 movdqu %xmm4, (%eax)
1141 add $16, %ecx
1142 sub $16, %ebx
1143 jbe L(CopyFrom1To16BytesCase2OrCase3)
1144 test %edx, %edx
1145 jnz L(CopyFrom1To16Bytes)
1146
1147 pcmpeqb %xmm6, %xmm0
1148 pmovmskb %xmm0, %edx
1149 movdqu %xmm5, 16(%eax)
1150 add $16, %ecx
1151 sub $16, %ebx
1152 jbe L(CopyFrom1To16BytesCase2OrCase3)
1153 test %edx, %edx
1154 jnz L(CopyFrom1To16Bytes)
1155
1156 pcmpeqb %xmm7, %xmm0
1157 pmovmskb %xmm0, %edx
1158 movdqu %xmm6, 32(%eax)
1159 lea 16(%eax, %ecx), %eax
1160 lea 16(%esi, %ecx), %esi
1161 bsf %edx, %edx
1162 cmp %ebx, %edx
1163 jb L(CopyFrom1To16BytesExit)
1164 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
1165# endif
1166 .p2align 4
1167L(ExitZero):
1168 RETURN
1169
1170END (STRCAT)
1171
1172 .p2align 4
1173 .section .rodata
1174L(ExitTable):
1175 .int JMPTBL(L(Exit1), L(ExitTable))
1176 .int JMPTBL(L(Exit2), L(ExitTable))
1177 .int JMPTBL(L(Exit3), L(ExitTable))
1178 .int JMPTBL(L(Exit4), L(ExitTable))
1179 .int JMPTBL(L(Exit5), L(ExitTable))
1180 .int JMPTBL(L(Exit6), L(ExitTable))
1181 .int JMPTBL(L(Exit7), L(ExitTable))
1182 .int JMPTBL(L(Exit8), L(ExitTable))
1183 .int JMPTBL(L(Exit9), L(ExitTable))
1184 .int JMPTBL(L(Exit10), L(ExitTable))
1185 .int JMPTBL(L(Exit11), L(ExitTable))
1186 .int JMPTBL(L(Exit12), L(ExitTable))
1187 .int JMPTBL(L(Exit13), L(ExitTable))
1188 .int JMPTBL(L(Exit14), L(ExitTable))
1189 .int JMPTBL(L(Exit15), L(ExitTable))
1190 .int JMPTBL(L(Exit16), L(ExitTable))
1191 .int JMPTBL(L(Exit17), L(ExitTable))
1192 .int JMPTBL(L(Exit18), L(ExitTable))
1193 .int JMPTBL(L(Exit19), L(ExitTable))
1194 .int JMPTBL(L(Exit20), L(ExitTable))
1195 .int JMPTBL(L(Exit21), L(ExitTable))
1196 .int JMPTBL(L(Exit22), L(ExitTable))
1197 .int JMPTBL(L(Exit23), L(ExitTable))
1198 .int JMPTBL(L(Exit24), L(ExitTable))
1199 .int JMPTBL(L(Exit25), L(ExitTable))
1200 .int JMPTBL(L(Exit26), L(ExitTable))
1201 .int JMPTBL(L(Exit27), L(ExitTable))
1202 .int JMPTBL(L(Exit28), L(ExitTable))
1203 .int JMPTBL(L(Exit29), L(ExitTable))
1204 .int JMPTBL(L(Exit30), L(ExitTable))
1205 .int JMPTBL(L(Exit31), L(ExitTable))
1206 .int JMPTBL(L(Exit32), L(ExitTable))
1207# ifdef USE_AS_STRNCAT
1208L(ExitStrncatTable):
1209 .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
1210 .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
1211 .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
1212 .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
1213 .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
1214 .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
1215 .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
1216 .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
1217 .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
1218 .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
1219 .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
1220 .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
1221 .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
1222 .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
1223 .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
1224 .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
1225 .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
1226 .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
1227 .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
1228 .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
1229 .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
1230 .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
1231 .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
1232 .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
1233 .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
1234 .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
1235 .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
1236 .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
1237 .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
1238 .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
1239 .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
1240 .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
1241 .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
1242# endif
1243#endif