]>
Commit | Line | Data |
---|---|---|
99710781 | 1 | /* strcat with SSSE3 |
2b778ceb | 2 | Copyright (C) 2011-2021 Free Software Foundation, Inc. |
99710781 LD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
99710781 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
99710781 LD |
21 | |
22 | # include <sysdep.h> | |
23 | ||
24 | # ifndef STRCAT | |
25 | # define STRCAT __strcat_ssse3 | |
26 | # endif | |
27 | ||
28 | # define USE_AS_STRCAT | |
29 | ||
30 | .text | |
31 | ENTRY (STRCAT) | |
32 | # ifdef USE_AS_STRNCAT | |
33 | mov %rdx, %r8 | |
34 | # endif | |
35 | ||
99710781 | 36 | |
37bb363f OB |
37 | /* Inline corresponding strlen file, temporary until new strcpy |
38 | implementation gets merged. */ | |
99710781 | 39 | |
37bb363f OB |
40 | xor %eax, %eax |
41 | cmpb $0, (%rdi) | |
42 | jz L(exit_tail0) | |
43 | cmpb $0, 1(%rdi) | |
44 | jz L(exit_tail1) | |
45 | cmpb $0, 2(%rdi) | |
46 | jz L(exit_tail2) | |
47 | cmpb $0, 3(%rdi) | |
48 | jz L(exit_tail3) | |
49 | ||
50 | cmpb $0, 4(%rdi) | |
51 | jz L(exit_tail4) | |
52 | cmpb $0, 5(%rdi) | |
53 | jz L(exit_tail5) | |
54 | cmpb $0, 6(%rdi) | |
55 | jz L(exit_tail6) | |
56 | cmpb $0, 7(%rdi) | |
57 | jz L(exit_tail7) | |
58 | ||
59 | cmpb $0, 8(%rdi) | |
60 | jz L(exit_tail8) | |
61 | cmpb $0, 9(%rdi) | |
62 | jz L(exit_tail9) | |
63 | cmpb $0, 10(%rdi) | |
64 | jz L(exit_tail10) | |
65 | cmpb $0, 11(%rdi) | |
66 | jz L(exit_tail11) | |
67 | ||
68 | cmpb $0, 12(%rdi) | |
69 | jz L(exit_tail12) | |
70 | cmpb $0, 13(%rdi) | |
71 | jz L(exit_tail13) | |
72 | cmpb $0, 14(%rdi) | |
73 | jz L(exit_tail14) | |
74 | cmpb $0, 15(%rdi) | |
75 | jz L(exit_tail15) | |
76 | pxor %xmm0, %xmm0 | |
77 | lea 16(%rdi), %rcx | |
78 | lea 16(%rdi), %rax | |
79 | and $-16, %rax | |
80 | ||
81 | pcmpeqb (%rax), %xmm0 | |
82 | pmovmskb %xmm0, %edx | |
83 | pxor %xmm1, %xmm1 | |
84 | test %edx, %edx | |
85 | lea 16(%rax), %rax | |
86 | jnz L(exit) | |
87 | ||
88 | pcmpeqb (%rax), %xmm1 | |
89 | pmovmskb %xmm1, %edx | |
90 | pxor %xmm2, %xmm2 | |
91 | test %edx, %edx | |
92 | lea 16(%rax), %rax | |
93 | jnz L(exit) | |
94 | ||
95 | pcmpeqb (%rax), %xmm2 | |
96 | pmovmskb %xmm2, %edx | |
97 | pxor %xmm3, %xmm3 | |
98 | test %edx, %edx | |
99 | lea 16(%rax), %rax | |
100 | jnz L(exit) | |
101 | ||
102 | pcmpeqb (%rax), %xmm3 | |
103 | pmovmskb %xmm3, %edx | |
104 | test %edx, %edx | |
105 | lea 16(%rax), %rax | |
106 | jnz L(exit) | |
107 | ||
108 | pcmpeqb (%rax), %xmm0 | |
109 | pmovmskb %xmm0, %edx | |
110 | test %edx, %edx | |
111 | lea 16(%rax), %rax | |
112 | jnz L(exit) | |
113 | ||
114 | pcmpeqb (%rax), %xmm1 | |
115 | pmovmskb %xmm1, %edx | |
116 | test %edx, %edx | |
117 | lea 16(%rax), %rax | |
118 | jnz L(exit) | |
119 | ||
120 | pcmpeqb (%rax), %xmm2 | |
121 | pmovmskb %xmm2, %edx | |
122 | test %edx, %edx | |
123 | lea 16(%rax), %rax | |
124 | jnz L(exit) | |
125 | ||
126 | pcmpeqb (%rax), %xmm3 | |
127 | pmovmskb %xmm3, %edx | |
128 | test %edx, %edx | |
129 | lea 16(%rax), %rax | |
130 | jnz L(exit) | |
131 | ||
132 | pcmpeqb (%rax), %xmm0 | |
133 | pmovmskb %xmm0, %edx | |
134 | test %edx, %edx | |
135 | lea 16(%rax), %rax | |
136 | jnz L(exit) | |
137 | ||
138 | pcmpeqb (%rax), %xmm1 | |
139 | pmovmskb %xmm1, %edx | |
140 | test %edx, %edx | |
141 | lea 16(%rax), %rax | |
142 | jnz L(exit) | |
143 | ||
144 | pcmpeqb (%rax), %xmm2 | |
145 | pmovmskb %xmm2, %edx | |
146 | test %edx, %edx | |
147 | lea 16(%rax), %rax | |
148 | jnz L(exit) | |
149 | ||
150 | pcmpeqb (%rax), %xmm3 | |
151 | pmovmskb %xmm3, %edx | |
152 | test %edx, %edx | |
153 | lea 16(%rax), %rax | |
154 | jnz L(exit) | |
155 | ||
156 | pcmpeqb (%rax), %xmm0 | |
157 | pmovmskb %xmm0, %edx | |
158 | test %edx, %edx | |
159 | lea 16(%rax), %rax | |
160 | jnz L(exit) | |
161 | ||
162 | pcmpeqb (%rax), %xmm1 | |
163 | pmovmskb %xmm1, %edx | |
164 | test %edx, %edx | |
165 | lea 16(%rax), %rax | |
166 | jnz L(exit) | |
167 | ||
168 | pcmpeqb (%rax), %xmm2 | |
169 | pmovmskb %xmm2, %edx | |
170 | test %edx, %edx | |
171 | lea 16(%rax), %rax | |
172 | jnz L(exit) | |
173 | ||
174 | pcmpeqb (%rax), %xmm3 | |
175 | pmovmskb %xmm3, %edx | |
176 | test %edx, %edx | |
177 | lea 16(%rax), %rax | |
178 | jnz L(exit) | |
179 | ||
180 | and $-0x40, %rax | |
181 | ||
182 | .p2align 4 | |
183 | L(aligned_64): | |
184 | pcmpeqb (%rax), %xmm0 | |
185 | pcmpeqb 16(%rax), %xmm1 | |
186 | pcmpeqb 32(%rax), %xmm2 | |
187 | pcmpeqb 48(%rax), %xmm3 | |
188 | pmovmskb %xmm0, %edx | |
189 | pmovmskb %xmm1, %r11d | |
190 | pmovmskb %xmm2, %r10d | |
191 | pmovmskb %xmm3, %r9d | |
192 | or %edx, %r9d | |
193 | or %r11d, %r9d | |
194 | or %r10d, %r9d | |
195 | lea 64(%rax), %rax | |
196 | jz L(aligned_64) | |
197 | ||
198 | test %edx, %edx | |
199 | jnz L(aligned_64_exit_16) | |
200 | test %r11d, %r11d | |
201 | jnz L(aligned_64_exit_32) | |
202 | test %r10d, %r10d | |
203 | jnz L(aligned_64_exit_48) | |
204 | ||
205 | L(aligned_64_exit_64): | |
206 | pmovmskb %xmm3, %edx | |
207 | jmp L(exit) | |
208 | ||
209 | L(aligned_64_exit_48): | |
210 | lea -16(%rax), %rax | |
211 | mov %r10d, %edx | |
212 | jmp L(exit) | |
213 | ||
214 | L(aligned_64_exit_32): | |
215 | lea -32(%rax), %rax | |
216 | mov %r11d, %edx | |
217 | jmp L(exit) | |
218 | ||
219 | L(aligned_64_exit_16): | |
220 | lea -48(%rax), %rax | |
221 | ||
222 | L(exit): | |
223 | sub %rcx, %rax | |
224 | test %dl, %dl | |
225 | jz L(exit_high) | |
226 | test $0x01, %dl | |
227 | jnz L(exit_tail0) | |
228 | ||
229 | test $0x02, %dl | |
230 | jnz L(exit_tail1) | |
231 | ||
232 | test $0x04, %dl | |
233 | jnz L(exit_tail2) | |
234 | ||
235 | test $0x08, %dl | |
236 | jnz L(exit_tail3) | |
237 | ||
238 | test $0x10, %dl | |
239 | jnz L(exit_tail4) | |
240 | ||
241 | test $0x20, %dl | |
242 | jnz L(exit_tail5) | |
243 | ||
244 | test $0x40, %dl | |
245 | jnz L(exit_tail6) | |
246 | add $7, %eax | |
247 | L(exit_tail0): | |
248 | jmp L(StartStrcpyPart) | |
249 | ||
250 | .p2align 4 | |
251 | L(exit_high): | |
252 | add $8, %eax | |
253 | test $0x01, %dh | |
254 | jnz L(exit_tail0) | |
255 | ||
256 | test $0x02, %dh | |
257 | jnz L(exit_tail1) | |
258 | ||
259 | test $0x04, %dh | |
260 | jnz L(exit_tail2) | |
261 | ||
262 | test $0x08, %dh | |
263 | jnz L(exit_tail3) | |
264 | ||
265 | test $0x10, %dh | |
266 | jnz L(exit_tail4) | |
267 | ||
268 | test $0x20, %dh | |
269 | jnz L(exit_tail5) | |
270 | ||
271 | test $0x40, %dh | |
272 | jnz L(exit_tail6) | |
273 | add $7, %eax | |
274 | jmp L(StartStrcpyPart) | |
275 | ||
276 | .p2align 4 | |
277 | L(exit_tail1): | |
278 | add $1, %eax | |
279 | jmp L(StartStrcpyPart) | |
280 | ||
281 | .p2align 4 | |
282 | L(exit_tail2): | |
283 | add $2, %eax | |
284 | jmp L(StartStrcpyPart) | |
285 | ||
286 | .p2align 4 | |
287 | L(exit_tail3): | |
288 | add $3, %eax | |
289 | jmp L(StartStrcpyPart) | |
290 | ||
291 | .p2align 4 | |
292 | L(exit_tail4): | |
293 | add $4, %eax | |
294 | jmp L(StartStrcpyPart) | |
295 | ||
296 | .p2align 4 | |
297 | L(exit_tail5): | |
298 | add $5, %eax | |
299 | jmp L(StartStrcpyPart) | |
300 | ||
301 | .p2align 4 | |
302 | L(exit_tail6): | |
303 | add $6, %eax | |
304 | jmp L(StartStrcpyPart) | |
305 | ||
306 | .p2align 4 | |
307 | L(exit_tail7): | |
308 | add $7, %eax | |
309 | jmp L(StartStrcpyPart) | |
310 | ||
311 | .p2align 4 | |
312 | L(exit_tail8): | |
313 | add $8, %eax | |
314 | jmp L(StartStrcpyPart) | |
315 | ||
316 | .p2align 4 | |
317 | L(exit_tail9): | |
318 | add $9, %eax | |
319 | jmp L(StartStrcpyPart) | |
320 | ||
321 | .p2align 4 | |
322 | L(exit_tail10): | |
323 | add $10, %eax | |
324 | jmp L(StartStrcpyPart) | |
325 | ||
326 | .p2align 4 | |
327 | L(exit_tail11): | |
328 | add $11, %eax | |
329 | jmp L(StartStrcpyPart) | |
330 | ||
331 | .p2align 4 | |
332 | L(exit_tail12): | |
333 | add $12, %eax | |
334 | jmp L(StartStrcpyPart) | |
335 | ||
336 | .p2align 4 | |
337 | L(exit_tail13): | |
338 | add $13, %eax | |
339 | jmp L(StartStrcpyPart) | |
340 | ||
341 | .p2align 4 | |
342 | L(exit_tail14): | |
343 | add $14, %eax | |
344 | jmp L(StartStrcpyPart) | |
345 | ||
346 | .p2align 4 | |
347 | L(exit_tail15): | |
348 | add $15, %eax | |
349 | ||
350 | .p2align 4 | |
99710781 LD |
351 | L(StartStrcpyPart): |
352 | mov %rsi, %rcx | |
353 | lea (%rdi, %rax), %rdx | |
354 | # ifdef USE_AS_STRNCAT | |
355 | test %r8, %r8 | |
356 | jz L(StrncatExit0) | |
357 | cmp $8, %r8 | |
358 | jbe L(StrncatExit8Bytes) | |
359 | # endif | |
360 | cmpb $0, (%rcx) | |
361 | jz L(Exit1) | |
362 | cmpb $0, 1(%rcx) | |
363 | jz L(Exit2) | |
364 | cmpb $0, 2(%rcx) | |
365 | jz L(Exit3) | |
366 | cmpb $0, 3(%rcx) | |
367 | jz L(Exit4) | |
368 | cmpb $0, 4(%rcx) | |
369 | jz L(Exit5) | |
370 | cmpb $0, 5(%rcx) | |
371 | jz L(Exit6) | |
372 | cmpb $0, 6(%rcx) | |
373 | jz L(Exit7) | |
374 | cmpb $0, 7(%rcx) | |
375 | jz L(Exit8) | |
376 | cmpb $0, 8(%rcx) | |
377 | jz L(Exit9) | |
378 | # ifdef USE_AS_STRNCAT | |
379 | cmp $16, %r8 | |
380 | jb L(StrncatExit15Bytes) | |
381 | # endif | |
382 | cmpb $0, 9(%rcx) | |
383 | jz L(Exit10) | |
384 | cmpb $0, 10(%rcx) | |
385 | jz L(Exit11) | |
386 | cmpb $0, 11(%rcx) | |
387 | jz L(Exit12) | |
388 | cmpb $0, 12(%rcx) | |
389 | jz L(Exit13) | |
390 | cmpb $0, 13(%rcx) | |
391 | jz L(Exit14) | |
392 | cmpb $0, 14(%rcx) | |
393 | jz L(Exit15) | |
394 | cmpb $0, 15(%rcx) | |
395 | jz L(Exit16) | |
396 | # ifdef USE_AS_STRNCAT | |
397 | cmp $16, %r8 | |
398 | je L(StrncatExit16) | |
399 | # define USE_AS_STRNCPY | |
400 | # endif | |
401 | ||
402 | # include "strcpy-ssse3.S" | |
403 | ||
404 | .p2align 4 | |
405 | L(CopyFrom1To16Bytes): | |
406 | add %rsi, %rdx | |
407 | add %rsi, %rcx | |
408 | ||
409 | test %al, %al | |
410 | jz L(ExitHigh) | |
411 | test $0x01, %al | |
412 | jnz L(Exit1) | |
413 | test $0x02, %al | |
414 | jnz L(Exit2) | |
415 | test $0x04, %al | |
416 | jnz L(Exit3) | |
417 | test $0x08, %al | |
418 | jnz L(Exit4) | |
419 | test $0x10, %al | |
420 | jnz L(Exit5) | |
421 | test $0x20, %al | |
422 | jnz L(Exit6) | |
423 | test $0x40, %al | |
424 | jnz L(Exit7) | |
425 | movlpd (%rcx), %xmm0 | |
426 | movlpd %xmm0, (%rdx) | |
427 | mov %rdi, %rax | |
428 | ret | |
429 | ||
430 | .p2align 4 | |
431 | L(ExitHigh): | |
432 | test $0x01, %ah | |
433 | jnz L(Exit9) | |
434 | test $0x02, %ah | |
435 | jnz L(Exit10) | |
436 | test $0x04, %ah | |
437 | jnz L(Exit11) | |
438 | test $0x08, %ah | |
439 | jnz L(Exit12) | |
440 | test $0x10, %ah | |
441 | jnz L(Exit13) | |
442 | test $0x20, %ah | |
443 | jnz L(Exit14) | |
444 | test $0x40, %ah | |
445 | jnz L(Exit15) | |
446 | movlpd (%rcx), %xmm0 | |
447 | movlpd 8(%rcx), %xmm1 | |
448 | movlpd %xmm0, (%rdx) | |
449 | movlpd %xmm1, 8(%rdx) | |
450 | mov %rdi, %rax | |
451 | ret | |
452 | ||
453 | .p2align 4 | |
454 | L(StrncatExit1): | |
455 | xor %ah, %ah | |
456 | movb %ah, 1(%rdx) | |
457 | L(Exit1): | |
458 | movb (%rcx), %al | |
459 | movb %al, (%rdx) | |
460 | mov %rdi, %rax | |
461 | ret | |
462 | ||
463 | .p2align 4 | |
464 | L(StrncatExit2): | |
465 | xor %ah, %ah | |
466 | movb %ah, 2(%rdx) | |
467 | L(Exit2): | |
468 | movw (%rcx), %ax | |
469 | movw %ax, (%rdx) | |
470 | mov %rdi, %rax | |
471 | ret | |
472 | ||
473 | .p2align 4 | |
474 | L(StrncatExit3): | |
475 | xor %ah, %ah | |
476 | movb %ah, 3(%rdx) | |
477 | L(Exit3): | |
478 | movw (%rcx), %ax | |
479 | movw %ax, (%rdx) | |
480 | movb 2(%rcx), %al | |
481 | movb %al, 2(%rdx) | |
482 | mov %rdi, %rax | |
483 | ret | |
484 | ||
485 | .p2align 4 | |
486 | L(StrncatExit4): | |
487 | xor %ah, %ah | |
488 | movb %ah, 4(%rdx) | |
489 | L(Exit4): | |
490 | mov (%rcx), %eax | |
491 | mov %eax, (%rdx) | |
492 | mov %rdi, %rax | |
493 | ret | |
494 | ||
495 | .p2align 4 | |
496 | L(StrncatExit5): | |
497 | xor %ah, %ah | |
498 | movb %ah, 5(%rdx) | |
499 | L(Exit5): | |
500 | mov (%rcx), %eax | |
501 | mov %eax, (%rdx) | |
502 | movb 4(%rcx), %al | |
503 | movb %al, 4(%rdx) | |
504 | mov %rdi, %rax | |
505 | ret | |
506 | ||
507 | .p2align 4 | |
508 | L(StrncatExit6): | |
509 | xor %ah, %ah | |
510 | movb %ah, 6(%rdx) | |
511 | L(Exit6): | |
512 | mov (%rcx), %eax | |
513 | mov %eax, (%rdx) | |
514 | movw 4(%rcx), %ax | |
515 | movw %ax, 4(%rdx) | |
516 | mov %rdi, %rax | |
517 | ret | |
518 | ||
519 | .p2align 4 | |
520 | L(StrncatExit7): | |
521 | xor %ah, %ah | |
522 | movb %ah, 7(%rdx) | |
523 | L(Exit7): | |
524 | mov (%rcx), %eax | |
525 | mov %eax, (%rdx) | |
526 | mov 3(%rcx), %eax | |
527 | mov %eax, 3(%rdx) | |
528 | mov %rdi, %rax | |
529 | ret | |
530 | ||
531 | .p2align 4 | |
532 | L(StrncatExit8): | |
533 | xor %ah, %ah | |
534 | movb %ah, 8(%rdx) | |
535 | L(Exit8): | |
536 | movlpd (%rcx), %xmm0 | |
537 | movlpd %xmm0, (%rdx) | |
538 | mov %rdi, %rax | |
539 | ret | |
540 | ||
541 | .p2align 4 | |
542 | L(StrncatExit9): | |
543 | xor %ah, %ah | |
544 | movb %ah, 9(%rdx) | |
545 | L(Exit9): | |
546 | movlpd (%rcx), %xmm0 | |
547 | movlpd %xmm0, (%rdx) | |
548 | movb 8(%rcx), %al | |
549 | movb %al, 8(%rdx) | |
550 | mov %rdi, %rax | |
551 | ret | |
552 | ||
553 | .p2align 4 | |
554 | L(StrncatExit10): | |
555 | xor %ah, %ah | |
556 | movb %ah, 10(%rdx) | |
557 | L(Exit10): | |
558 | movlpd (%rcx), %xmm0 | |
559 | movlpd %xmm0, (%rdx) | |
560 | movw 8(%rcx), %ax | |
561 | movw %ax, 8(%rdx) | |
562 | mov %rdi, %rax | |
563 | ret | |
564 | ||
565 | .p2align 4 | |
566 | L(StrncatExit11): | |
567 | xor %ah, %ah | |
568 | movb %ah, 11(%rdx) | |
569 | L(Exit11): | |
570 | movlpd (%rcx), %xmm0 | |
571 | movlpd %xmm0, (%rdx) | |
572 | mov 7(%rcx), %eax | |
573 | mov %eax, 7(%rdx) | |
574 | mov %rdi, %rax | |
575 | ret | |
576 | ||
577 | .p2align 4 | |
578 | L(StrncatExit12): | |
579 | xor %ah, %ah | |
580 | movb %ah, 12(%rdx) | |
581 | L(Exit12): | |
582 | movlpd (%rcx), %xmm0 | |
583 | movlpd %xmm0, (%rdx) | |
584 | mov 8(%rcx), %eax | |
585 | mov %eax, 8(%rdx) | |
586 | mov %rdi, %rax | |
587 | ret | |
588 | ||
589 | .p2align 4 | |
590 | L(StrncatExit13): | |
591 | xor %ah, %ah | |
592 | movb %ah, 13(%rdx) | |
593 | L(Exit13): | |
594 | movlpd (%rcx), %xmm0 | |
595 | movlpd %xmm0, (%rdx) | |
596 | movlpd 5(%rcx), %xmm1 | |
597 | movlpd %xmm1, 5(%rdx) | |
598 | mov %rdi, %rax | |
599 | ret | |
600 | ||
601 | .p2align 4 | |
602 | L(StrncatExit14): | |
603 | xor %ah, %ah | |
604 | movb %ah, 14(%rdx) | |
605 | L(Exit14): | |
606 | movlpd (%rcx), %xmm0 | |
607 | movlpd %xmm0, (%rdx) | |
608 | movlpd 6(%rcx), %xmm1 | |
609 | movlpd %xmm1, 6(%rdx) | |
610 | mov %rdi, %rax | |
611 | ret | |
612 | ||
613 | .p2align 4 | |
614 | L(StrncatExit15): | |
615 | xor %ah, %ah | |
616 | movb %ah, 15(%rdx) | |
617 | L(Exit15): | |
618 | movlpd (%rcx), %xmm0 | |
619 | movlpd %xmm0, (%rdx) | |
620 | movlpd 7(%rcx), %xmm1 | |
621 | movlpd %xmm1, 7(%rdx) | |
622 | mov %rdi, %rax | |
623 | ret | |
624 | ||
625 | .p2align 4 | |
626 | L(StrncatExit16): | |
627 | xor %ah, %ah | |
628 | movb %ah, 16(%rdx) | |
629 | L(Exit16): | |
630 | movlpd (%rcx), %xmm0 | |
631 | movlpd 8(%rcx), %xmm1 | |
632 | movlpd %xmm0, (%rdx) | |
633 | movlpd %xmm1, 8(%rdx) | |
634 | mov %rdi, %rax | |
635 | ret | |
636 | ||
637 | # ifdef USE_AS_STRNCPY | |
638 | ||
639 | .p2align 4 | |
640 | L(CopyFrom1To16BytesCase2): | |
641 | add $16, %r8 | |
642 | add %rsi, %rcx | |
643 | lea (%rsi, %rdx), %rsi | |
644 | lea -9(%r8), %rdx | |
645 | and $1<<7, %dh | |
646 | or %al, %dh | |
647 | test %dh, %dh | |
648 | lea (%rsi), %rdx | |
649 | jz L(ExitHighCase2) | |
650 | ||
651 | test $0x01, %al | |
652 | jnz L(Exit1) | |
653 | cmp $1, %r8 | |
654 | je L(StrncatExit1) | |
655 | test $0x02, %al | |
656 | jnz L(Exit2) | |
657 | cmp $2, %r8 | |
658 | je L(StrncatExit2) | |
659 | test $0x04, %al | |
660 | jnz L(Exit3) | |
661 | cmp $3, %r8 | |
662 | je L(StrncatExit3) | |
663 | test $0x08, %al | |
664 | jnz L(Exit4) | |
665 | cmp $4, %r8 | |
666 | je L(StrncatExit4) | |
667 | test $0x10, %al | |
668 | jnz L(Exit5) | |
669 | cmp $5, %r8 | |
670 | je L(StrncatExit5) | |
671 | test $0x20, %al | |
672 | jnz L(Exit6) | |
673 | cmp $6, %r8 | |
674 | je L(StrncatExit6) | |
675 | test $0x40, %al | |
676 | jnz L(Exit7) | |
677 | cmp $7, %r8 | |
678 | je L(StrncatExit7) | |
679 | movlpd (%rcx), %xmm0 | |
680 | movlpd %xmm0, (%rdx) | |
681 | lea 7(%rdx), %rax | |
682 | cmpb $1, (%rax) | |
683 | sbb $-1, %rax | |
684 | xor %cl, %cl | |
685 | movb %cl, (%rax) | |
686 | mov %rdi, %rax | |
687 | ret | |
688 | ||
689 | .p2align 4 | |
690 | L(ExitHighCase2): | |
691 | test $0x01, %ah | |
692 | jnz L(Exit9) | |
693 | cmp $9, %r8 | |
694 | je L(StrncatExit9) | |
695 | test $0x02, %ah | |
696 | jnz L(Exit10) | |
697 | cmp $10, %r8 | |
698 | je L(StrncatExit10) | |
699 | test $0x04, %ah | |
700 | jnz L(Exit11) | |
701 | cmp $11, %r8 | |
702 | je L(StrncatExit11) | |
703 | test $0x8, %ah | |
704 | jnz L(Exit12) | |
705 | cmp $12, %r8 | |
706 | je L(StrncatExit12) | |
707 | test $0x10, %ah | |
708 | jnz L(Exit13) | |
709 | cmp $13, %r8 | |
710 | je L(StrncatExit13) | |
711 | test $0x20, %ah | |
712 | jnz L(Exit14) | |
713 | cmp $14, %r8 | |
714 | je L(StrncatExit14) | |
715 | test $0x40, %ah | |
716 | jnz L(Exit15) | |
717 | cmp $15, %r8 | |
718 | je L(StrncatExit15) | |
719 | movlpd (%rcx), %xmm0 | |
720 | movlpd %xmm0, (%rdx) | |
721 | movlpd 8(%rcx), %xmm1 | |
722 | movlpd %xmm1, 8(%rdx) | |
723 | mov %rdi, %rax | |
724 | ret | |
725 | ||
726 | L(CopyFrom1To16BytesCase2OrCase3): | |
727 | test %rax, %rax | |
728 | jnz L(CopyFrom1To16BytesCase2) | |
729 | ||
730 | .p2align 4 | |
731 | L(CopyFrom1To16BytesCase3): | |
732 | add $16, %r8 | |
733 | add %rsi, %rdx | |
734 | add %rsi, %rcx | |
735 | ||
736 | cmp $8, %r8 | |
737 | ja L(ExitHighCase3) | |
738 | cmp $1, %r8 | |
739 | je L(StrncatExit1) | |
740 | cmp $2, %r8 | |
741 | je L(StrncatExit2) | |
742 | cmp $3, %r8 | |
743 | je L(StrncatExit3) | |
744 | cmp $4, %r8 | |
745 | je L(StrncatExit4) | |
746 | cmp $5, %r8 | |
747 | je L(StrncatExit5) | |
748 | cmp $6, %r8 | |
749 | je L(StrncatExit6) | |
750 | cmp $7, %r8 | |
751 | je L(StrncatExit7) | |
752 | movlpd (%rcx), %xmm0 | |
753 | movlpd %xmm0, (%rdx) | |
754 | xor %ah, %ah | |
755 | movb %ah, 8(%rdx) | |
756 | mov %rdi, %rax | |
757 | ret | |
758 | ||
759 | .p2align 4 | |
760 | L(ExitHighCase3): | |
761 | cmp $9, %r8 | |
762 | je L(StrncatExit9) | |
763 | cmp $10, %r8 | |
764 | je L(StrncatExit10) | |
765 | cmp $11, %r8 | |
766 | je L(StrncatExit11) | |
767 | cmp $12, %r8 | |
768 | je L(StrncatExit12) | |
769 | cmp $13, %r8 | |
770 | je L(StrncatExit13) | |
771 | cmp $14, %r8 | |
772 | je L(StrncatExit14) | |
773 | cmp $15, %r8 | |
774 | je L(StrncatExit15) | |
775 | movlpd (%rcx), %xmm0 | |
776 | movlpd %xmm0, (%rdx) | |
777 | movlpd 8(%rcx), %xmm1 | |
778 | movlpd %xmm1, 8(%rdx) | |
779 | xor %ah, %ah | |
780 | movb %ah, 16(%rdx) | |
781 | mov %rdi, %rax | |
782 | ret | |
783 | ||
784 | .p2align 4 | |
785 | L(StrncatExit0): | |
786 | mov %rdi, %rax | |
787 | ret | |
788 | ||
789 | .p2align 4 | |
790 | L(StrncatExit15Bytes): | |
791 | cmp $9, %r8 | |
792 | je L(StrncatExit9) | |
793 | cmpb $0, 9(%rcx) | |
794 | jz L(Exit10) | |
795 | cmp $10, %r8 | |
796 | je L(StrncatExit10) | |
797 | cmpb $0, 10(%rcx) | |
798 | jz L(Exit11) | |
799 | cmp $11, %r8 | |
800 | je L(StrncatExit11) | |
801 | cmpb $0, 11(%rcx) | |
802 | jz L(Exit12) | |
803 | cmp $12, %r8 | |
804 | je L(StrncatExit12) | |
805 | cmpb $0, 12(%rcx) | |
806 | jz L(Exit13) | |
807 | cmp $13, %r8 | |
808 | je L(StrncatExit13) | |
809 | cmpb $0, 13(%rcx) | |
810 | jz L(Exit14) | |
811 | cmp $14, %r8 | |
812 | je L(StrncatExit14) | |
813 | movlpd (%rcx), %xmm0 | |
814 | movlpd %xmm0, (%rdx) | |
815 | movlpd 7(%rcx), %xmm1 | |
816 | movlpd %xmm1, 7(%rdx) | |
817 | lea 14(%rdx), %rax | |
818 | cmpb $1, (%rax) | |
819 | sbb $-1, %rax | |
820 | xor %cl, %cl | |
821 | movb %cl, (%rax) | |
822 | mov %rdi, %rax | |
823 | ret | |
824 | ||
825 | .p2align 4 | |
826 | L(StrncatExit8Bytes): | |
827 | cmpb $0, (%rcx) | |
828 | jz L(Exit1) | |
829 | cmp $1, %r8 | |
830 | je L(StrncatExit1) | |
831 | cmpb $0, 1(%rcx) | |
832 | jz L(Exit2) | |
833 | cmp $2, %r8 | |
834 | je L(StrncatExit2) | |
835 | cmpb $0, 2(%rcx) | |
836 | jz L(Exit3) | |
837 | cmp $3, %r8 | |
838 | je L(StrncatExit3) | |
839 | cmpb $0, 3(%rcx) | |
840 | jz L(Exit4) | |
841 | cmp $4, %r8 | |
842 | je L(StrncatExit4) | |
843 | cmpb $0, 4(%rcx) | |
844 | jz L(Exit5) | |
845 | cmp $5, %r8 | |
846 | je L(StrncatExit5) | |
847 | cmpb $0, 5(%rcx) | |
848 | jz L(Exit6) | |
849 | cmp $6, %r8 | |
850 | je L(StrncatExit6) | |
851 | cmpb $0, 6(%rcx) | |
852 | jz L(Exit7) | |
853 | cmp $7, %r8 | |
854 | je L(StrncatExit7) | |
855 | movlpd (%rcx), %xmm0 | |
856 | movlpd %xmm0, (%rdx) | |
857 | lea 7(%rdx), %rax | |
858 | cmpb $1, (%rax) | |
859 | sbb $-1, %rax | |
860 | xor %cl, %cl | |
861 | movb %cl, (%rax) | |
862 | mov %rdi, %rax | |
863 | ret | |
864 | ||
865 | # endif | |
866 | END (STRCAT) | |
867 | #endif |