]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strcat-ssse3.S
0c506c1a685f2fc4f9a771f6756a10e938f6462a
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strcat-ssse3.S
1 /* strcat with SSSE3
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20
21 #if IS_IN (libc)
22
23 # include <sysdep.h>
24
25 # define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
28
29 # define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
31 cfi_restore (REG)
32
33 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
34 # define POP(REG) popl REG; CFI_POP (REG)
35
36 # ifndef STRCAT
37 # define STRCAT __strcat_ssse3
38 # endif
39
40 # define PARMS 4
41 # define STR1 PARMS+4
42 # define STR2 STR1+4
43
44 # ifdef USE_AS_STRNCAT
45 # define LEN STR2+8
46 # endif
47
48 # define USE_AS_STRCAT
49
50 .text
51 ENTRY (STRCAT)
52 PUSH (%edi)
53 mov STR1(%esp), %edi
54 mov %edi, %edx
55
56 # define RETURN jmp L(StartStrcpyPart)
57 # include "strlen-sse2.S"
58
59 L(StartStrcpyPart):
60 mov STR2(%esp), %ecx
61 lea (%edi, %eax), %edx
62 # ifdef USE_AS_STRNCAT
63 PUSH (%ebx)
64 mov LEN(%esp), %ebx
65 test %ebx, %ebx
66 jz L(StrncatExit0)
67 cmp $8, %ebx
68 jbe L(StrncatExit8Bytes)
69 # endif
70 cmpb $0, (%ecx)
71 jz L(Exit1)
72 cmpb $0, 1(%ecx)
73 jz L(Exit2)
74 cmpb $0, 2(%ecx)
75 jz L(Exit3)
76 cmpb $0, 3(%ecx)
77 jz L(Exit4)
78 cmpb $0, 4(%ecx)
79 jz L(Exit5)
80 cmpb $0, 5(%ecx)
81 jz L(Exit6)
82 cmpb $0, 6(%ecx)
83 jz L(Exit7)
84 cmpb $0, 7(%ecx)
85 jz L(Exit8)
86 cmpb $0, 8(%ecx)
87 jz L(Exit9)
88 # ifdef USE_AS_STRNCAT
89 cmp $16, %ebx
90 jb L(StrncatExit15Bytes)
91 # endif
92 cmpb $0, 9(%ecx)
93 jz L(Exit10)
94 cmpb $0, 10(%ecx)
95 jz L(Exit11)
96 cmpb $0, 11(%ecx)
97 jz L(Exit12)
98 cmpb $0, 12(%ecx)
99 jz L(Exit13)
100 cmpb $0, 13(%ecx)
101 jz L(Exit14)
102 cmpb $0, 14(%ecx)
103 jz L(Exit15)
104 cmpb $0, 15(%ecx)
105 jz L(Exit16)
106 # ifdef USE_AS_STRNCAT
107 cmp $16, %ebx
108 je L(StrncatExit16)
109
110 # define RETURN1 \
111 POP (%ebx); \
112 POP (%edi); \
113 ret; \
114 CFI_PUSH (%ebx); \
115 CFI_PUSH (%edi)
116 # define USE_AS_STRNCPY
117 # else
118 # define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
119 # endif
120 # include "strcpy-ssse3.S"
121 .p2align 4
122 L(CopyFrom1To16Bytes):
123 add %esi, %edx
124 add %esi, %ecx
125
126 POP (%esi)
127 test %al, %al
128 jz L(ExitHigh)
129 test $0x01, %al
130 jnz L(Exit1)
131 test $0x02, %al
132 jnz L(Exit2)
133 test $0x04, %al
134 jnz L(Exit3)
135 test $0x08, %al
136 jnz L(Exit4)
137 test $0x10, %al
138 jnz L(Exit5)
139 test $0x20, %al
140 jnz L(Exit6)
141 test $0x40, %al
142 jnz L(Exit7)
143 movlpd (%ecx), %xmm0
144 movlpd %xmm0, (%edx)
145 movl %edi, %eax
146 RETURN1
147
148 .p2align 4
149 L(ExitHigh):
150 test $0x01, %ah
151 jnz L(Exit9)
152 test $0x02, %ah
153 jnz L(Exit10)
154 test $0x04, %ah
155 jnz L(Exit11)
156 test $0x08, %ah
157 jnz L(Exit12)
158 test $0x10, %ah
159 jnz L(Exit13)
160 test $0x20, %ah
161 jnz L(Exit14)
162 test $0x40, %ah
163 jnz L(Exit15)
164 movlpd (%ecx), %xmm0
165 movlpd 8(%ecx), %xmm1
166 movlpd %xmm0, (%edx)
167 movlpd %xmm1, 8(%edx)
168 movl %edi, %eax
169 RETURN1
170
171 .p2align 4
172 L(StrncatExit1):
173 movb %bh, 1(%edx)
174 L(Exit1):
175 movb (%ecx), %al
176 movb %al, (%edx)
177 movl %edi, %eax
178 RETURN1
179
180 .p2align 4
181 L(StrncatExit2):
182 movb %bh, 2(%edx)
183 L(Exit2):
184 movw (%ecx), %ax
185 movw %ax, (%edx)
186 movl %edi, %eax
187 RETURN1
188
189 .p2align 4
190 L(StrncatExit3):
191 movb %bh, 3(%edx)
192 L(Exit3):
193 movw (%ecx), %ax
194 movw %ax, (%edx)
195 movb 2(%ecx), %al
196 movb %al, 2(%edx)
197 movl %edi, %eax
198 RETURN1
199
200 .p2align 4
201 L(StrncatExit4):
202 movb %bh, 4(%edx)
203 L(Exit4):
204 movl (%ecx), %eax
205 movl %eax, (%edx)
206 movl %edi, %eax
207 RETURN1
208
209 .p2align 4
210 L(StrncatExit5):
211 movb %bh, 5(%edx)
212 L(Exit5):
213 movl (%ecx), %eax
214 movl %eax, (%edx)
215 movb 4(%ecx), %al
216 movb %al, 4(%edx)
217 movl %edi, %eax
218 RETURN1
219
220 .p2align 4
221 L(StrncatExit6):
222 movb %bh, 6(%edx)
223 L(Exit6):
224 movl (%ecx), %eax
225 movl %eax, (%edx)
226 movw 4(%ecx), %ax
227 movw %ax, 4(%edx)
228 movl %edi, %eax
229 RETURN1
230
231 .p2align 4
232 L(StrncatExit7):
233 movb %bh, 7(%edx)
234 L(Exit7):
235 movl (%ecx), %eax
236 movl %eax, (%edx)
237 movl 3(%ecx), %eax
238 movl %eax, 3(%edx)
239 movl %edi, %eax
240 RETURN1
241
242 .p2align 4
243 L(StrncatExit8):
244 movb %bh, 8(%edx)
245 L(Exit8):
246 movlpd (%ecx), %xmm0
247 movlpd %xmm0, (%edx)
248 movl %edi, %eax
249 RETURN1
250
251 .p2align 4
252 L(StrncatExit9):
253 movb %bh, 9(%edx)
254 L(Exit9):
255 movlpd (%ecx), %xmm0
256 movlpd %xmm0, (%edx)
257 movb 8(%ecx), %al
258 movb %al, 8(%edx)
259 movl %edi, %eax
260 RETURN1
261
262 .p2align 4
263 L(StrncatExit10):
264 movb %bh, 10(%edx)
265 L(Exit10):
266 movlpd (%ecx), %xmm0
267 movlpd %xmm0, (%edx)
268 movw 8(%ecx), %ax
269 movw %ax, 8(%edx)
270 movl %edi, %eax
271 RETURN1
272
273 .p2align 4
274 L(StrncatExit11):
275 movb %bh, 11(%edx)
276 L(Exit11):
277 movlpd (%ecx), %xmm0
278 movlpd %xmm0, (%edx)
279 movl 7(%ecx), %eax
280 movl %eax, 7(%edx)
281 movl %edi, %eax
282 RETURN1
283
284 .p2align 4
285 L(StrncatExit12):
286 movb %bh, 12(%edx)
287 L(Exit12):
288 movlpd (%ecx), %xmm0
289 movlpd %xmm0, (%edx)
290 movl 8(%ecx), %eax
291 movl %eax, 8(%edx)
292 movl %edi, %eax
293 RETURN1
294
295 .p2align 4
296 L(StrncatExit13):
297 movb %bh, 13(%edx)
298 L(Exit13):
299 movlpd (%ecx), %xmm0
300 movlpd %xmm0, (%edx)
301 movlpd 5(%ecx), %xmm0
302 movlpd %xmm0, 5(%edx)
303 movl %edi, %eax
304 RETURN1
305
306 .p2align 4
307 L(StrncatExit14):
308 movb %bh, 14(%edx)
309 L(Exit14):
310 movlpd (%ecx), %xmm0
311 movlpd %xmm0, (%edx)
312 movlpd 6(%ecx), %xmm0
313 movlpd %xmm0, 6(%edx)
314 movl %edi, %eax
315 RETURN1
316
317 .p2align 4
318 L(StrncatExit15):
319 movb %bh, 15(%edx)
320 L(Exit15):
321 movlpd (%ecx), %xmm0
322 movlpd %xmm0, (%edx)
323 movlpd 7(%ecx), %xmm0
324 movlpd %xmm0, 7(%edx)
325 movl %edi, %eax
326 RETURN1
327
328 .p2align 4
329 L(StrncatExit16):
330 movb %bh, 16(%edx)
331 L(Exit16):
332 movlpd (%ecx), %xmm0
333 movlpd 8(%ecx), %xmm1
334 movlpd %xmm0, (%edx)
335 movlpd %xmm1, 8(%edx)
336 movl %edi, %eax
337 RETURN1
338
339 # ifdef USE_AS_STRNCPY
340
341 CFI_PUSH(%esi)
342
343 .p2align 4
344 L(CopyFrom1To16BytesCase2):
345 add $16, %ebx
346 add %esi, %ecx
347 lea (%esi, %edx), %esi
348 lea -9(%ebx), %edx
349 and $1<<7, %dh
350 or %al, %dh
351 test %dh, %dh
352 lea (%esi), %edx
353 POP (%esi)
354 jz L(ExitHighCase2)
355
356 test $0x01, %al
357 jnz L(Exit1)
358 cmp $1, %ebx
359 je L(StrncatExit1)
360 test $0x02, %al
361 jnz L(Exit2)
362 cmp $2, %ebx
363 je L(StrncatExit2)
364 test $0x04, %al
365 jnz L(Exit3)
366 cmp $3, %ebx
367 je L(StrncatExit3)
368 test $0x08, %al
369 jnz L(Exit4)
370 cmp $4, %ebx
371 je L(StrncatExit4)
372 test $0x10, %al
373 jnz L(Exit5)
374 cmp $5, %ebx
375 je L(StrncatExit5)
376 test $0x20, %al
377 jnz L(Exit6)
378 cmp $6, %ebx
379 je L(StrncatExit6)
380 test $0x40, %al
381 jnz L(Exit7)
382 cmp $7, %ebx
383 je L(StrncatExit7)
384 movlpd (%ecx), %xmm0
385 movlpd %xmm0, (%edx)
386 lea 7(%edx), %eax
387 cmpb $1, (%eax)
388 sbb $-1, %eax
389 xor %cl, %cl
390 movb %cl, (%eax)
391 movl %edi, %eax
392 RETURN1
393
394 .p2align 4
395 L(ExitHighCase2):
396 test $0x01, %ah
397 jnz L(Exit9)
398 cmp $9, %ebx
399 je L(StrncatExit9)
400 test $0x02, %ah
401 jnz L(Exit10)
402 cmp $10, %ebx
403 je L(StrncatExit10)
404 test $0x04, %ah
405 jnz L(Exit11)
406 cmp $11, %ebx
407 je L(StrncatExit11)
408 test $0x8, %ah
409 jnz L(Exit12)
410 cmp $12, %ebx
411 je L(StrncatExit12)
412 test $0x10, %ah
413 jnz L(Exit13)
414 cmp $13, %ebx
415 je L(StrncatExit13)
416 test $0x20, %ah
417 jnz L(Exit14)
418 cmp $14, %ebx
419 je L(StrncatExit14)
420 test $0x40, %ah
421 jnz L(Exit15)
422 cmp $15, %ebx
423 je L(StrncatExit15)
424 movlpd (%ecx), %xmm0
425 movlpd %xmm0, (%edx)
426 movlpd 8(%ecx), %xmm1
427 movlpd %xmm1, 8(%edx)
428 movl %edi, %eax
429 RETURN1
430
431 CFI_PUSH(%esi)
432
433 L(CopyFrom1To16BytesCase2OrCase3):
434 test %eax, %eax
435 jnz L(CopyFrom1To16BytesCase2)
436
437 .p2align 4
438 L(CopyFrom1To16BytesCase3):
439 add $16, %ebx
440 add %esi, %edx
441 add %esi, %ecx
442
443 POP (%esi)
444
445 cmp $8, %ebx
446 ja L(ExitHighCase3)
447 cmp $1, %ebx
448 je L(StrncatExit1)
449 cmp $2, %ebx
450 je L(StrncatExit2)
451 cmp $3, %ebx
452 je L(StrncatExit3)
453 cmp $4, %ebx
454 je L(StrncatExit4)
455 cmp $5, %ebx
456 je L(StrncatExit5)
457 cmp $6, %ebx
458 je L(StrncatExit6)
459 cmp $7, %ebx
460 je L(StrncatExit7)
461 movlpd (%ecx), %xmm0
462 movlpd %xmm0, (%edx)
463 movb %bh, 8(%edx)
464 movl %edi, %eax
465 RETURN1
466
467 .p2align 4
468 L(ExitHighCase3):
469 cmp $9, %ebx
470 je L(StrncatExit9)
471 cmp $10, %ebx
472 je L(StrncatExit10)
473 cmp $11, %ebx
474 je L(StrncatExit11)
475 cmp $12, %ebx
476 je L(StrncatExit12)
477 cmp $13, %ebx
478 je L(StrncatExit13)
479 cmp $14, %ebx
480 je L(StrncatExit14)
481 cmp $15, %ebx
482 je L(StrncatExit15)
483 movlpd (%ecx), %xmm0
484 movlpd %xmm0, (%edx)
485 movlpd 8(%ecx), %xmm1
486 movlpd %xmm1, 8(%edx)
487 movb %bh, 16(%edx)
488 movl %edi, %eax
489 RETURN1
490
491 .p2align 4
492 L(StrncatExit0):
493 movl %edi, %eax
494 RETURN1
495
496 .p2align 4
497 L(StrncatExit15Bytes):
498 cmp $9, %ebx
499 je L(StrncatExit9)
500 cmpb $0, 9(%ecx)
501 jz L(Exit10)
502 cmp $10, %ebx
503 je L(StrncatExit10)
504 cmpb $0, 10(%ecx)
505 jz L(Exit11)
506 cmp $11, %ebx
507 je L(StrncatExit11)
508 cmpb $0, 11(%ecx)
509 jz L(Exit12)
510 cmp $12, %ebx
511 je L(StrncatExit12)
512 cmpb $0, 12(%ecx)
513 jz L(Exit13)
514 cmp $13, %ebx
515 je L(StrncatExit13)
516 cmpb $0, 13(%ecx)
517 jz L(Exit14)
518 cmp $14, %ebx
519 je L(StrncatExit14)
520 movlpd (%ecx), %xmm0
521 movlpd %xmm0, (%edx)
522 movlpd 7(%ecx), %xmm0
523 movlpd %xmm0, 7(%edx)
524 lea 14(%edx), %eax
525 cmpb $1, (%eax)
526 sbb $-1, %eax
527 movb %bh, (%eax)
528 movl %edi, %eax
529 RETURN1
530
531 .p2align 4
532 L(StrncatExit8Bytes):
533 cmpb $0, (%ecx)
534 jz L(Exit1)
535 cmp $1, %ebx
536 je L(StrncatExit1)
537 cmpb $0, 1(%ecx)
538 jz L(Exit2)
539 cmp $2, %ebx
540 je L(StrncatExit2)
541 cmpb $0, 2(%ecx)
542 jz L(Exit3)
543 cmp $3, %ebx
544 je L(StrncatExit3)
545 cmpb $0, 3(%ecx)
546 jz L(Exit4)
547 cmp $4, %ebx
548 je L(StrncatExit4)
549 cmpb $0, 4(%ecx)
550 jz L(Exit5)
551 cmp $5, %ebx
552 je L(StrncatExit5)
553 cmpb $0, 5(%ecx)
554 jz L(Exit6)
555 cmp $6, %ebx
556 je L(StrncatExit6)
557 cmpb $0, 6(%ecx)
558 jz L(Exit7)
559 cmp $7, %ebx
560 je L(StrncatExit7)
561 movlpd (%ecx), %xmm0
562 movlpd %xmm0, (%edx)
563 lea 7(%edx), %eax
564 cmpb $1, (%eax)
565 sbb $-1, %eax
566 movb %bh, (%eax)
567 movl %edi, %eax
568 RETURN1
569
570 # endif
571 END (STRCAT)
572 #endif