]>
Commit | Line | Data |
---|---|---|
5fa16e9b | 1 | /* strcat with SSE2 |
b168057a | 2 | Copyright (C) 2011-2015 Free Software Foundation, Inc. |
5fa16e9b LD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
5fa16e9b LD |
19 | |
20 | ||
4f41c682 | 21 | #if IS_IN (libc) |
5fa16e9b LD |
22 | |
23 | # include <sysdep.h> | |
24 | ||
25 | ||
26 | # define CFI_PUSH(REG) \ | |
27 | cfi_adjust_cfa_offset (4); \ | |
28 | cfi_rel_offset (REG, 0) | |
29 | ||
30 | # define CFI_POP(REG) \ | |
31 | cfi_adjust_cfa_offset (-4); \ | |
32 | cfi_restore (REG) | |
33 | ||
34 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
35 | # define POP(REG) popl REG; CFI_POP (REG) | |
36 | ||
37 | # ifdef SHARED | |
38 | # define JMPTBL(I, B) I - B | |
39 | ||
40 | /* Load an entry in a jump table into ECX and branch to it. TABLE is a | |
41 | jump table with relative offsets. INDEX is a register contains the | |
42 | index into the jump table. SCALE is the scale of INDEX. */ | |
43 | ||
44 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ | |
45 | /* We first load PC into ECX. */ \ | |
9a1d9254 | 46 | SETUP_PIC_REG(cx); \ |
5fa16e9b LD |
47 | /* Get the address of the jump table. */ \ |
48 | addl $(TABLE - .), %ecx; \ | |
49 | /* Get the entry and convert the relative offset to the \ | |
50 | absolute address. */ \ | |
51 | addl (%ecx,INDEX,SCALE), %ecx; \ | |
c0c3f78a | 52 | /* We loaded the jump table and adjusted ECX. Go. */ \ |
5fa16e9b LD |
53 | jmp *%ecx |
54 | # else | |
55 | # define JMPTBL(I, B) I | |
56 | ||
57 | /* Branch to an entry in a jump table. TABLE is a jump table with | |
58 | absolute offsets. INDEX is a register contains the index into the | |
59 | jump table. SCALE is the scale of INDEX. */ | |
60 | ||
61 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ | |
62 | jmp *TABLE(,INDEX,SCALE) | |
63 | # endif | |
64 | ||
65 | # ifndef STRCAT | |
66 | # define STRCAT __strcat_sse2 | |
67 | # endif | |
68 | ||
69 | # define PARMS 4 | |
70 | # define STR1 PARMS+4 | |
71 | # define STR2 STR1+4 | |
72 | ||
73 | # ifdef USE_AS_STRNCAT | |
74 | # define LEN STR2+8 | |
75 | # define STR3 STR1+4 | |
76 | # else | |
77 | # define STR3 STR1 | |
78 | # endif | |
79 | ||
80 | # define USE_AS_STRCAT | |
81 | # ifdef USE_AS_STRNCAT | |
82 | # define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); | |
83 | # else | |
84 | # define RETURN POP(%esi); ret; CFI_PUSH(%esi); | |
85 | # endif | |
86 | ||
87 | .text | |
88 | ENTRY (STRCAT) | |
89 | PUSH (%esi) | |
90 | mov STR1(%esp), %eax | |
91 | mov STR2(%esp), %esi | |
92 | # ifdef USE_AS_STRNCAT | |
93 | PUSH (%ebx) | |
94 | movl LEN(%esp), %ebx | |
95 | test %ebx, %ebx | |
96 | jz L(ExitZero) | |
97 | # endif | |
98 | cmpb $0, (%esi) | |
99 | mov %esi, %ecx | |
100 | mov %eax, %edx | |
101 | jz L(ExitZero) | |
102 | ||
103 | and $63, %ecx | |
104 | and $63, %edx | |
105 | cmp $32, %ecx | |
106 | ja L(StrlenCore7_1) | |
107 | cmp $48, %edx | |
108 | ja L(alignment_prolog) | |
109 | ||
110 | pxor %xmm0, %xmm0 | |
111 | pxor %xmm4, %xmm4 | |
112 | pxor %xmm7, %xmm7 | |
113 | movdqu (%eax), %xmm1 | |
114 | movdqu (%esi), %xmm5 | |
115 | pcmpeqb %xmm1, %xmm0 | |
116 | movdqu 16(%esi), %xmm6 | |
117 | pmovmskb %xmm0, %ecx | |
118 | pcmpeqb %xmm5, %xmm4 | |
119 | pcmpeqb %xmm6, %xmm7 | |
120 | test %ecx, %ecx | |
121 | jnz L(exit_less16_) | |
122 | mov %eax, %ecx | |
123 | and $-16, %eax | |
124 | jmp L(loop_prolog) | |
125 | ||
126 | L(alignment_prolog): | |
127 | pxor %xmm0, %xmm0 | |
128 | pxor %xmm4, %xmm4 | |
129 | mov %edx, %ecx | |
130 | pxor %xmm7, %xmm7 | |
131 | and $15, %ecx | |
132 | and $-16, %eax | |
133 | pcmpeqb (%eax), %xmm0 | |
134 | movdqu (%esi), %xmm5 | |
135 | movdqu 16(%esi), %xmm6 | |
136 | pmovmskb %xmm0, %edx | |
137 | pcmpeqb %xmm5, %xmm4 | |
138 | shr %cl, %edx | |
139 | pcmpeqb %xmm6, %xmm7 | |
140 | test %edx, %edx | |
141 | jnz L(exit_less16) | |
142 | add %eax, %ecx | |
143 | ||
144 | pxor %xmm0, %xmm0 | |
145 | L(loop_prolog): | |
146 | pxor %xmm1, %xmm1 | |
147 | pxor %xmm2, %xmm2 | |
148 | pxor %xmm3, %xmm3 | |
149 | .p2align 4 | |
150 | L(align16_loop): | |
151 | pcmpeqb 16(%eax), %xmm0 | |
152 | pmovmskb %xmm0, %edx | |
153 | test %edx, %edx | |
154 | jnz L(exit16) | |
155 | ||
156 | pcmpeqb 32(%eax), %xmm1 | |
157 | pmovmskb %xmm1, %edx | |
158 | test %edx, %edx | |
159 | jnz L(exit32) | |
160 | ||
161 | pcmpeqb 48(%eax), %xmm2 | |
162 | pmovmskb %xmm2, %edx | |
163 | test %edx, %edx | |
164 | jnz L(exit48) | |
165 | ||
166 | pcmpeqb 64(%eax), %xmm3 | |
167 | pmovmskb %xmm3, %edx | |
168 | lea 64(%eax), %eax | |
169 | test %edx, %edx | |
170 | jz L(align16_loop) | |
171 | bsf %edx, %edx | |
172 | add %edx, %eax | |
173 | jmp L(StartStrcpyPart) | |
174 | ||
175 | .p2align 4 | |
176 | L(exit16): | |
177 | bsf %edx, %edx | |
178 | lea 16(%eax, %edx), %eax | |
179 | jmp L(StartStrcpyPart) | |
180 | ||
181 | .p2align 4 | |
182 | L(exit32): | |
183 | bsf %edx, %edx | |
184 | lea 32(%eax, %edx), %eax | |
185 | jmp L(StartStrcpyPart) | |
186 | ||
187 | .p2align 4 | |
188 | L(exit48): | |
189 | bsf %edx, %edx | |
190 | lea 48(%eax, %edx), %eax | |
191 | jmp L(StartStrcpyPart) | |
192 | ||
193 | .p2align 4 | |
194 | L(exit_less16): | |
195 | bsf %edx, %edx | |
196 | add %ecx, %eax | |
197 | add %edx, %eax | |
198 | jmp L(StartStrcpyPart) | |
199 | ||
200 | .p2align 4 | |
201 | L(exit_less16_): | |
202 | bsf %ecx, %ecx | |
203 | add %ecx, %eax | |
204 | ||
205 | .p2align 4 | |
206 | L(StartStrcpyPart): | |
207 | pmovmskb %xmm4, %edx | |
208 | # ifdef USE_AS_STRNCAT | |
209 | cmp $16, %ebx | |
210 | jbe L(CopyFrom1To16BytesTail1Case2OrCase3) | |
211 | # endif | |
212 | test %edx, %edx | |
213 | jnz L(CopyFrom1To16BytesTail1) | |
214 | ||
215 | movdqu %xmm5, (%eax) | |
216 | pmovmskb %xmm7, %edx | |
217 | # ifdef USE_AS_STRNCAT | |
218 | cmp $32, %ebx | |
219 | jbe L(CopyFrom1To32Bytes1Case2OrCase3) | |
220 | # endif | |
221 | test %edx, %edx | |
222 | jnz L(CopyFrom1To32Bytes1) | |
223 | ||
224 | mov %esi, %ecx | |
225 | and $-16, %esi | |
226 | and $15, %ecx | |
227 | pxor %xmm0, %xmm0 | |
228 | # ifdef USE_AS_STRNCAT | |
229 | add %ecx, %ebx | |
230 | # endif | |
231 | sub %ecx, %eax | |
232 | jmp L(Unalign16Both) | |
233 | ||
234 | L(StrlenCore7_1): | |
235 | mov %eax, %ecx | |
236 | pxor %xmm0, %xmm0 | |
237 | and $15, %ecx | |
238 | and $-16, %eax | |
239 | pcmpeqb (%eax), %xmm0 | |
240 | pmovmskb %xmm0, %edx | |
241 | shr %cl, %edx | |
242 | test %edx, %edx | |
243 | jnz L(exit_less16_1) | |
244 | add %eax, %ecx | |
245 | ||
246 | pxor %xmm0, %xmm0 | |
247 | pxor %xmm1, %xmm1 | |
248 | pxor %xmm2, %xmm2 | |
249 | pxor %xmm3, %xmm3 | |
250 | ||
251 | .p2align 4 | |
252 | L(align16_loop_1): | |
253 | pcmpeqb 16(%eax), %xmm0 | |
254 | pmovmskb %xmm0, %edx | |
255 | test %edx, %edx | |
256 | jnz L(exit16_1) | |
257 | ||
258 | pcmpeqb 32(%eax), %xmm1 | |
259 | pmovmskb %xmm1, %edx | |
260 | test %edx, %edx | |
261 | jnz L(exit32_1) | |
262 | ||
263 | pcmpeqb 48(%eax), %xmm2 | |
264 | pmovmskb %xmm2, %edx | |
265 | test %edx, %edx | |
266 | jnz L(exit48_1) | |
267 | ||
268 | pcmpeqb 64(%eax), %xmm3 | |
269 | pmovmskb %xmm3, %edx | |
270 | lea 64(%eax), %eax | |
271 | test %edx, %edx | |
272 | jz L(align16_loop_1) | |
273 | bsf %edx, %edx | |
274 | add %edx, %eax | |
275 | jmp L(StartStrcpyPart_1) | |
276 | ||
277 | .p2align 4 | |
278 | L(exit16_1): | |
279 | bsf %edx, %edx | |
280 | lea 16(%eax, %edx), %eax | |
281 | jmp L(StartStrcpyPart_1) | |
282 | ||
283 | .p2align 4 | |
284 | L(exit32_1): | |
285 | bsf %edx, %edx | |
286 | lea 32(%eax, %edx), %eax | |
287 | jmp L(StartStrcpyPart_1) | |
288 | ||
289 | .p2align 4 | |
290 | L(exit48_1): | |
291 | bsf %edx, %edx | |
292 | lea 48(%eax, %edx), %eax | |
293 | jmp L(StartStrcpyPart_1) | |
294 | ||
295 | .p2align 4 | |
296 | L(exit_less16_1): | |
297 | bsf %edx, %edx | |
298 | add %ecx, %eax | |
299 | add %edx, %eax | |
300 | ||
301 | .p2align 4 | |
302 | L(StartStrcpyPart_1): | |
303 | mov %esi, %ecx | |
304 | and $15, %ecx | |
305 | and $-16, %esi | |
306 | pxor %xmm0, %xmm0 | |
307 | pxor %xmm1, %xmm1 | |
308 | ||
309 | # ifdef USE_AS_STRNCAT | |
310 | cmp $48, %ebx | |
311 | ja L(BigN) | |
312 | # endif | |
313 | pcmpeqb (%esi), %xmm1 | |
314 | # ifdef USE_AS_STRNCAT | |
315 | add %ecx, %ebx | |
316 | # endif | |
317 | pmovmskb %xmm1, %edx | |
318 | shr %cl, %edx | |
319 | # ifdef USE_AS_STRNCAT | |
320 | cmp $16, %ebx | |
321 | jbe L(CopyFrom1To16BytesTailCase2OrCase3) | |
322 | # endif | |
323 | test %edx, %edx | |
324 | jnz L(CopyFrom1To16BytesTail) | |
325 | ||
326 | pcmpeqb 16(%esi), %xmm0 | |
327 | pmovmskb %xmm0, %edx | |
328 | # ifdef USE_AS_STRNCAT | |
329 | cmp $32, %ebx | |
330 | jbe L(CopyFrom1To32BytesCase2OrCase3) | |
331 | # endif | |
332 | test %edx, %edx | |
333 | jnz L(CopyFrom1To32Bytes) | |
334 | ||
335 | movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ | |
336 | movdqu %xmm1, (%eax) | |
337 | sub %ecx, %eax | |
338 | ||
339 | .p2align 4 | |
340 | L(Unalign16Both): | |
341 | mov $16, %ecx | |
342 | movdqa (%esi, %ecx), %xmm1 | |
343 | movaps 16(%esi, %ecx), %xmm2 | |
344 | movdqu %xmm1, (%eax, %ecx) | |
345 | pcmpeqb %xmm2, %xmm0 | |
346 | pmovmskb %xmm0, %edx | |
347 | add $16, %ecx | |
348 | # ifdef USE_AS_STRNCAT | |
349 | sub $48, %ebx | |
350 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
351 | # endif | |
352 | test %edx, %edx | |
353 | jnz L(CopyFrom1To16Bytes) | |
354 | L(Unalign16BothBigN): | |
355 | movaps 16(%esi, %ecx), %xmm3 | |
356 | movdqu %xmm2, (%eax, %ecx) | |
357 | pcmpeqb %xmm3, %xmm0 | |
358 | pmovmskb %xmm0, %edx | |
359 | add $16, %ecx | |
360 | # ifdef USE_AS_STRNCAT | |
361 | sub $16, %ebx | |
362 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
363 | # endif | |
364 | test %edx, %edx | |
365 | jnz L(CopyFrom1To16Bytes) | |
366 | ||
367 | movaps 16(%esi, %ecx), %xmm4 | |
368 | movdqu %xmm3, (%eax, %ecx) | |
369 | pcmpeqb %xmm4, %xmm0 | |
370 | pmovmskb %xmm0, %edx | |
371 | add $16, %ecx | |
372 | # ifdef USE_AS_STRNCAT | |
373 | sub $16, %ebx | |
374 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
375 | # endif | |
376 | test %edx, %edx | |
377 | jnz L(CopyFrom1To16Bytes) | |
378 | ||
379 | movaps 16(%esi, %ecx), %xmm1 | |
380 | movdqu %xmm4, (%eax, %ecx) | |
381 | pcmpeqb %xmm1, %xmm0 | |
382 | pmovmskb %xmm0, %edx | |
383 | add $16, %ecx | |
384 | # ifdef USE_AS_STRNCAT | |
385 | sub $16, %ebx | |
386 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
387 | # endif | |
388 | test %edx, %edx | |
389 | jnz L(CopyFrom1To16Bytes) | |
390 | ||
391 | movaps 16(%esi, %ecx), %xmm2 | |
392 | movdqu %xmm1, (%eax, %ecx) | |
393 | pcmpeqb %xmm2, %xmm0 | |
394 | pmovmskb %xmm0, %edx | |
395 | add $16, %ecx | |
396 | # ifdef USE_AS_STRNCAT | |
397 | sub $16, %ebx | |
398 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
399 | # endif | |
400 | test %edx, %edx | |
401 | jnz L(CopyFrom1To16Bytes) | |
402 | ||
403 | movaps 16(%esi, %ecx), %xmm3 | |
404 | movdqu %xmm2, (%eax, %ecx) | |
405 | pcmpeqb %xmm3, %xmm0 | |
406 | pmovmskb %xmm0, %edx | |
407 | add $16, %ecx | |
408 | # ifdef USE_AS_STRNCAT | |
409 | sub $16, %ebx | |
410 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
411 | # endif | |
412 | test %edx, %edx | |
413 | jnz L(CopyFrom1To16Bytes) | |
414 | ||
415 | movdqu %xmm3, (%eax, %ecx) | |
416 | mov %esi, %edx | |
417 | lea 16(%esi, %ecx), %esi | |
418 | and $-0x40, %esi | |
419 | sub %esi, %edx | |
420 | sub %edx, %eax | |
421 | # ifdef USE_AS_STRNCAT | |
422 | lea 128(%ebx, %edx), %ebx | |
423 | # endif | |
424 | movaps (%esi), %xmm2 | |
425 | movaps %xmm2, %xmm4 | |
426 | movaps 16(%esi), %xmm5 | |
427 | movaps 32(%esi), %xmm3 | |
428 | movaps %xmm3, %xmm6 | |
429 | movaps 48(%esi), %xmm7 | |
430 | pminub %xmm5, %xmm2 | |
431 | pminub %xmm7, %xmm3 | |
432 | pminub %xmm2, %xmm3 | |
433 | pcmpeqb %xmm0, %xmm3 | |
434 | pmovmskb %xmm3, %edx | |
435 | # ifdef USE_AS_STRNCAT | |
436 | sub $64, %ebx | |
437 | jbe L(UnalignedLeaveCase2OrCase3) | |
438 | # endif | |
439 | test %edx, %edx | |
440 | jnz L(Unaligned64Leave) | |
441 | ||
442 | .p2align 4 | |
443 | L(Unaligned64Loop_start): | |
444 | add $64, %eax | |
445 | add $64, %esi | |
446 | movdqu %xmm4, -64(%eax) | |
447 | movaps (%esi), %xmm2 | |
448 | movdqa %xmm2, %xmm4 | |
449 | movdqu %xmm5, -48(%eax) | |
450 | movaps 16(%esi), %xmm5 | |
451 | pminub %xmm5, %xmm2 | |
452 | movaps 32(%esi), %xmm3 | |
453 | movdqu %xmm6, -32(%eax) | |
454 | movaps %xmm3, %xmm6 | |
455 | movdqu %xmm7, -16(%eax) | |
456 | movaps 48(%esi), %xmm7 | |
457 | pminub %xmm7, %xmm3 | |
458 | pminub %xmm2, %xmm3 | |
459 | pcmpeqb %xmm0, %xmm3 | |
460 | pmovmskb %xmm3, %edx | |
461 | # ifdef USE_AS_STRNCAT | |
462 | sub $64, %ebx | |
463 | jbe L(UnalignedLeaveCase2OrCase3) | |
464 | # endif | |
465 | test %edx, %edx | |
466 | jz L(Unaligned64Loop_start) | |
467 | ||
468 | L(Unaligned64Leave): | |
469 | pxor %xmm1, %xmm1 | |
470 | ||
471 | pcmpeqb %xmm4, %xmm0 | |
472 | pcmpeqb %xmm5, %xmm1 | |
473 | pmovmskb %xmm0, %edx | |
474 | pmovmskb %xmm1, %ecx | |
475 | test %edx, %edx | |
476 | jnz L(CopyFrom1To16BytesUnaligned_0) | |
477 | test %ecx, %ecx | |
478 | jnz L(CopyFrom1To16BytesUnaligned_16) | |
479 | ||
480 | pcmpeqb %xmm6, %xmm0 | |
481 | pcmpeqb %xmm7, %xmm1 | |
482 | pmovmskb %xmm0, %edx | |
483 | pmovmskb %xmm1, %ecx | |
484 | test %edx, %edx | |
485 | jnz L(CopyFrom1To16BytesUnaligned_32) | |
486 | ||
487 | bsf %ecx, %edx | |
488 | movdqu %xmm4, (%eax) | |
489 | movdqu %xmm5, 16(%eax) | |
490 | movdqu %xmm6, 32(%eax) | |
491 | add $48, %esi | |
492 | add $48, %eax | |
493 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
494 | ||
495 | # ifdef USE_AS_STRNCAT | |
496 | .p2align 4 | |
497 | L(BigN): | |
498 | pcmpeqb (%esi), %xmm1 | |
499 | pmovmskb %xmm1, %edx | |
500 | shr %cl, %edx | |
501 | test %edx, %edx | |
502 | jnz L(CopyFrom1To16BytesTail) | |
503 | ||
504 | pcmpeqb 16(%esi), %xmm0 | |
505 | pmovmskb %xmm0, %edx | |
506 | test %edx, %edx | |
507 | jnz L(CopyFrom1To32Bytes) | |
508 | ||
509 | movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ | |
510 | movdqu %xmm1, (%eax) | |
511 | sub %ecx, %eax | |
512 | sub $48, %ebx | |
513 | add %ecx, %ebx | |
514 | ||
515 | mov $16, %ecx | |
516 | movdqa (%esi, %ecx), %xmm1 | |
517 | movaps 16(%esi, %ecx), %xmm2 | |
518 | movdqu %xmm1, (%eax, %ecx) | |
519 | pcmpeqb %xmm2, %xmm0 | |
520 | pmovmskb %xmm0, %edx | |
521 | add $16, %ecx | |
522 | test %edx, %edx | |
523 | jnz L(CopyFrom1To16Bytes) | |
524 | jmp L(Unalign16BothBigN) | |
525 | # endif | |
526 | ||
527 | /*------------end of main part-------------------------------*/ | |
528 | ||
529 | /* Case1 */ | |
530 | .p2align 4 | |
531 | L(CopyFrom1To16Bytes): | |
532 | add %ecx, %eax | |
533 | add %ecx, %esi | |
534 | bsf %edx, %edx | |
535 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
536 | ||
537 | .p2align 4 | |
538 | L(CopyFrom1To16BytesTail): | |
539 | add %ecx, %esi | |
540 | bsf %edx, %edx | |
541 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
542 | ||
543 | .p2align 4 | |
544 | L(CopyFrom1To32Bytes1): | |
545 | add $16, %esi | |
546 | add $16, %eax | |
547 | L(CopyFrom1To16BytesTail1): | |
548 | bsf %edx, %edx | |
549 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
550 | ||
551 | .p2align 4 | |
552 | L(CopyFrom1To32Bytes): | |
553 | bsf %edx, %edx | |
554 | add %ecx, %esi | |
555 | add $16, %edx | |
556 | sub %ecx, %edx | |
557 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
558 | ||
559 | .p2align 4 | |
560 | L(CopyFrom1To16BytesUnaligned_0): | |
561 | bsf %edx, %edx | |
562 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
563 | ||
564 | .p2align 4 | |
565 | L(CopyFrom1To16BytesUnaligned_16): | |
566 | bsf %ecx, %edx | |
567 | movdqu %xmm4, (%eax) | |
568 | add $16, %esi | |
569 | add $16, %eax | |
570 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
571 | ||
572 | .p2align 4 | |
573 | L(CopyFrom1To16BytesUnaligned_32): | |
574 | bsf %edx, %edx | |
575 | movdqu %xmm4, (%eax) | |
576 | movdqu %xmm5, 16(%eax) | |
577 | add $32, %esi | |
578 | add $32, %eax | |
579 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
580 | ||
581 | # ifdef USE_AS_STRNCAT | |
582 | ||
583 | .p2align 4 | |
584 | L(CopyFrom1To16BytesExit): | |
585 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) | |
586 | ||
587 | /* Case2 */ | |
588 | ||
589 | .p2align 4 | |
590 | L(CopyFrom1To16BytesCase2): | |
591 | add $16, %ebx | |
592 | add %ecx, %eax | |
593 | add %ecx, %esi | |
594 | bsf %edx, %edx | |
595 | cmp %ebx, %edx | |
596 | jb L(CopyFrom1To16BytesExit) | |
597 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
598 | ||
599 | .p2align 4 | |
600 | L(CopyFrom1To32BytesCase2): | |
601 | sub %ecx, %ebx | |
602 | add %ecx, %esi | |
603 | bsf %edx, %edx | |
604 | add $16, %edx | |
605 | sub %ecx, %edx | |
606 | cmp %ebx, %edx | |
607 | jb L(CopyFrom1To16BytesExit) | |
608 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
609 | ||
610 | L(CopyFrom1To16BytesTailCase2): | |
611 | sub %ecx, %ebx | |
612 | add %ecx, %esi | |
613 | bsf %edx, %edx | |
614 | cmp %ebx, %edx | |
615 | jb L(CopyFrom1To16BytesExit) | |
616 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
617 | ||
618 | L(CopyFrom1To16BytesTail1Case2): | |
619 | bsf %edx, %edx | |
620 | cmp %ebx, %edx | |
621 | jb L(CopyFrom1To16BytesExit) | |
622 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
623 | ||
624 | /* Case2 or Case3, Case3 */ | |
625 | ||
626 | .p2align 4 | |
627 | L(CopyFrom1To16BytesCase2OrCase3): | |
628 | test %edx, %edx | |
629 | jnz L(CopyFrom1To16BytesCase2) | |
630 | L(CopyFrom1To16BytesCase3): | |
631 | add $16, %ebx | |
632 | add %ecx, %eax | |
633 | add %ecx, %esi | |
634 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
635 | ||
636 | .p2align 4 | |
637 | L(CopyFrom1To32BytesCase2OrCase3): | |
638 | test %edx, %edx | |
639 | jnz L(CopyFrom1To32BytesCase2) | |
640 | sub %ecx, %ebx | |
641 | add %ecx, %esi | |
642 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
643 | ||
644 | .p2align 4 | |
645 | L(CopyFrom1To16BytesTailCase2OrCase3): | |
646 | test %edx, %edx | |
647 | jnz L(CopyFrom1To16BytesTailCase2) | |
648 | sub %ecx, %ebx | |
649 | add %ecx, %esi | |
650 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
651 | ||
652 | .p2align 4 | |
653 | L(CopyFrom1To32Bytes1Case2OrCase3): | |
654 | add $16, %eax | |
655 | add $16, %esi | |
656 | sub $16, %ebx | |
657 | L(CopyFrom1To16BytesTail1Case2OrCase3): | |
658 | test %edx, %edx | |
659 | jnz L(CopyFrom1To16BytesTail1Case2) | |
660 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
661 | ||
662 | # endif | |
663 | ||
664 | # ifdef USE_AS_STRNCAT | |
665 | .p2align 4 | |
666 | L(StrncatExit0): | |
667 | movb %bh, (%eax) | |
668 | mov STR3(%esp), %eax | |
669 | RETURN | |
670 | # endif | |
671 | ||
672 | .p2align 4 | |
673 | # ifdef USE_AS_STRNCAT | |
674 | L(StrncatExit1): | |
675 | movb %bh, 1(%eax) | |
676 | # endif | |
677 | L(Exit1): | |
678 | # ifdef USE_AS_STRNCAT | |
679 | movb (%esi), %dh | |
680 | # endif | |
681 | movb %dh, (%eax) | |
682 | mov STR3(%esp), %eax | |
683 | RETURN | |
684 | ||
685 | .p2align 4 | |
686 | # ifdef USE_AS_STRNCAT | |
687 | L(StrncatExit2): | |
688 | movb %bh, 2(%eax) | |
689 | # endif | |
690 | L(Exit2): | |
691 | movw (%esi), %dx | |
692 | movw %dx, (%eax) | |
693 | mov STR3(%esp), %eax | |
694 | RETURN | |
695 | ||
696 | .p2align 4 | |
697 | # ifdef USE_AS_STRNCAT | |
698 | L(StrncatExit3): | |
699 | movb %bh, 3(%eax) | |
700 | # endif | |
701 | L(Exit3): | |
702 | movw (%esi), %cx | |
703 | movw %cx, (%eax) | |
704 | # ifdef USE_AS_STRNCAT | |
705 | movb 2(%esi), %dh | |
706 | # endif | |
707 | movb %dh, 2(%eax) | |
708 | mov STR3(%esp), %eax | |
709 | RETURN | |
710 | ||
711 | .p2align 4 | |
712 | # ifdef USE_AS_STRNCAT | |
713 | L(StrncatExit4): | |
714 | movb %bh, 4(%eax) | |
715 | # endif | |
716 | L(Exit4): | |
717 | movl (%esi), %edx | |
718 | movl %edx, (%eax) | |
719 | mov STR3(%esp), %eax | |
720 | RETURN | |
721 | ||
722 | .p2align 4 | |
723 | # ifdef USE_AS_STRNCAT | |
724 | L(StrncatExit5): | |
725 | movb %bh, 5(%eax) | |
726 | # endif | |
727 | L(Exit5): | |
728 | movl (%esi), %ecx | |
729 | # ifdef USE_AS_STRNCAT | |
730 | movb 4(%esi), %dh | |
731 | # endif | |
732 | movb %dh, 4(%eax) | |
733 | movl %ecx, (%eax) | |
734 | mov STR3(%esp), %eax | |
735 | RETURN | |
736 | ||
737 | .p2align 4 | |
738 | # ifdef USE_AS_STRNCAT | |
739 | L(StrncatExit6): | |
740 | movb %bh, 6(%eax) | |
741 | # endif | |
742 | L(Exit6): | |
743 | movl (%esi), %ecx | |
744 | movw 4(%esi), %dx | |
745 | movl %ecx, (%eax) | |
746 | movw %dx, 4(%eax) | |
747 | mov STR3(%esp), %eax | |
748 | RETURN | |
749 | ||
750 | .p2align 4 | |
751 | # ifdef USE_AS_STRNCAT | |
752 | L(StrncatExit7): | |
753 | movb %bh, 7(%eax) | |
754 | # endif | |
755 | L(Exit7): | |
756 | movl (%esi), %ecx | |
757 | movl 3(%esi), %edx | |
758 | movl %ecx, (%eax) | |
759 | movl %edx, 3(%eax) | |
760 | mov STR3(%esp), %eax | |
761 | RETURN | |
762 | ||
763 | .p2align 4 | |
764 | # ifdef USE_AS_STRNCAT | |
765 | L(StrncatExit8): | |
766 | movb %bh, 8(%eax) | |
767 | # endif | |
768 | L(Exit8): | |
769 | movlpd (%esi), %xmm0 | |
770 | movlpd %xmm0, (%eax) | |
771 | mov STR3(%esp), %eax | |
772 | RETURN | |
773 | ||
774 | .p2align 4 | |
775 | # ifdef USE_AS_STRNCAT | |
776 | L(StrncatExit9): | |
777 | movb %bh, 9(%eax) | |
778 | # endif | |
779 | L(Exit9): | |
780 | movlpd (%esi), %xmm0 | |
781 | # ifdef USE_AS_STRNCAT | |
782 | movb 8(%esi), %dh | |
783 | # endif | |
784 | movb %dh, 8(%eax) | |
785 | movlpd %xmm0, (%eax) | |
786 | mov STR3(%esp), %eax | |
787 | RETURN | |
788 | ||
789 | .p2align 4 | |
790 | # ifdef USE_AS_STRNCAT | |
791 | L(StrncatExit10): | |
792 | movb %bh, 10(%eax) | |
793 | # endif | |
794 | L(Exit10): | |
795 | movlpd (%esi), %xmm0 | |
796 | movw 8(%esi), %dx | |
797 | movlpd %xmm0, (%eax) | |
798 | movw %dx, 8(%eax) | |
799 | mov STR3(%esp), %eax | |
800 | RETURN | |
801 | ||
802 | .p2align 4 | |
803 | # ifdef USE_AS_STRNCAT | |
804 | L(StrncatExit11): | |
805 | movb %bh, 11(%eax) | |
806 | # endif | |
807 | L(Exit11): | |
808 | movlpd (%esi), %xmm0 | |
809 | movl 7(%esi), %edx | |
810 | movlpd %xmm0, (%eax) | |
811 | movl %edx, 7(%eax) | |
812 | mov STR3(%esp), %eax | |
813 | RETURN | |
814 | ||
815 | .p2align 4 | |
816 | # ifdef USE_AS_STRNCAT | |
817 | L(StrncatExit12): | |
818 | movb %bh, 12(%eax) | |
819 | # endif | |
820 | L(Exit12): | |
821 | movlpd (%esi), %xmm0 | |
822 | movl 8(%esi), %edx | |
823 | movlpd %xmm0, (%eax) | |
824 | movl %edx, 8(%eax) | |
825 | mov STR3(%esp), %eax | |
826 | RETURN | |
827 | ||
828 | .p2align 4 | |
829 | # ifdef USE_AS_STRNCAT | |
830 | L(StrncatExit13): | |
831 | movb %bh, 13(%eax) | |
832 | # endif | |
833 | L(Exit13): | |
834 | movlpd (%esi), %xmm0 | |
835 | movlpd 5(%esi), %xmm1 | |
836 | movlpd %xmm0, (%eax) | |
837 | movlpd %xmm1, 5(%eax) | |
838 | mov STR3(%esp), %eax | |
839 | RETURN | |
840 | ||
841 | .p2align 4 | |
842 | # ifdef USE_AS_STRNCAT | |
843 | L(StrncatExit14): | |
844 | movb %bh, 14(%eax) | |
845 | # endif | |
846 | L(Exit14): | |
847 | movlpd (%esi), %xmm0 | |
848 | movlpd 6(%esi), %xmm1 | |
849 | movlpd %xmm0, (%eax) | |
850 | movlpd %xmm1, 6(%eax) | |
851 | mov STR3(%esp), %eax | |
852 | RETURN | |
853 | ||
854 | .p2align 4 | |
855 | # ifdef USE_AS_STRNCAT | |
856 | L(StrncatExit15): | |
857 | movb %bh, 15(%eax) | |
858 | # endif | |
859 | L(Exit15): | |
860 | movlpd (%esi), %xmm0 | |
861 | movlpd 7(%esi), %xmm1 | |
862 | movlpd %xmm0, (%eax) | |
863 | movlpd %xmm1, 7(%eax) | |
864 | mov STR3(%esp), %eax | |
865 | RETURN | |
866 | ||
867 | .p2align 4 | |
868 | # ifdef USE_AS_STRNCAT | |
869 | L(StrncatExit16): | |
870 | movb %bh, 16(%eax) | |
871 | # endif | |
872 | L(Exit16): | |
873 | movdqu (%esi), %xmm0 | |
874 | movdqu %xmm0, (%eax) | |
875 | mov STR3(%esp), %eax | |
876 | RETURN | |
877 | ||
878 | .p2align 4 | |
879 | # ifdef USE_AS_STRNCAT | |
880 | L(StrncatExit17): | |
881 | movb %bh, 17(%eax) | |
882 | # endif | |
883 | L(Exit17): | |
884 | movdqu (%esi), %xmm0 | |
885 | # ifdef USE_AS_STRNCAT | |
886 | movb 16(%esi), %dh | |
887 | # endif | |
888 | movdqu %xmm0, (%eax) | |
889 | movb %dh, 16(%eax) | |
890 | mov STR3(%esp), %eax | |
891 | RETURN | |
892 | ||
893 | .p2align 4 | |
894 | # ifdef USE_AS_STRNCAT | |
895 | L(StrncatExit18): | |
896 | movb %bh, 18(%eax) | |
897 | # endif | |
898 | L(Exit18): | |
899 | movdqu (%esi), %xmm0 | |
900 | movw 16(%esi), %cx | |
901 | movdqu %xmm0, (%eax) | |
902 | movw %cx, 16(%eax) | |
903 | mov STR3(%esp), %eax | |
904 | RETURN | |
905 | ||
906 | .p2align 4 | |
907 | # ifdef USE_AS_STRNCAT | |
908 | L(StrncatExit19): | |
909 | movb %bh, 19(%eax) | |
910 | # endif | |
911 | L(Exit19): | |
912 | movdqu (%esi), %xmm0 | |
913 | movl 15(%esi), %ecx | |
914 | movdqu %xmm0, (%eax) | |
915 | movl %ecx, 15(%eax) | |
916 | mov STR3(%esp), %eax | |
917 | RETURN | |
918 | ||
919 | .p2align 4 | |
920 | # ifdef USE_AS_STRNCAT | |
921 | L(StrncatExit20): | |
922 | movb %bh, 20(%eax) | |
923 | # endif | |
924 | L(Exit20): | |
925 | movdqu (%esi), %xmm0 | |
926 | movl 16(%esi), %ecx | |
927 | movdqu %xmm0, (%eax) | |
928 | movl %ecx, 16(%eax) | |
929 | mov STR3(%esp), %eax | |
930 | RETURN | |
931 | ||
932 | .p2align 4 | |
933 | # ifdef USE_AS_STRNCAT | |
934 | L(StrncatExit21): | |
935 | movb %bh, 21(%eax) | |
936 | # endif | |
937 | L(Exit21): | |
938 | movdqu (%esi), %xmm0 | |
939 | movl 16(%esi), %ecx | |
940 | # ifdef USE_AS_STRNCAT | |
941 | movb 20(%esi), %dh | |
942 | # endif | |
943 | movdqu %xmm0, (%eax) | |
944 | movl %ecx, 16(%eax) | |
945 | movb %dh, 20(%eax) | |
946 | mov STR3(%esp), %eax | |
947 | RETURN | |
948 | ||
949 | .p2align 4 | |
950 | # ifdef USE_AS_STRNCAT | |
951 | L(StrncatExit22): | |
952 | movb %bh, 22(%eax) | |
953 | # endif | |
954 | L(Exit22): | |
955 | movdqu (%esi), %xmm0 | |
956 | movlpd 14(%esi), %xmm3 | |
957 | movdqu %xmm0, (%eax) | |
958 | movlpd %xmm3, 14(%eax) | |
959 | mov STR3(%esp), %eax | |
960 | RETURN | |
961 | ||
962 | .p2align 4 | |
963 | # ifdef USE_AS_STRNCAT | |
964 | L(StrncatExit23): | |
965 | movb %bh, 23(%eax) | |
966 | # endif | |
967 | L(Exit23): | |
968 | movdqu (%esi), %xmm0 | |
969 | movlpd 15(%esi), %xmm3 | |
970 | movdqu %xmm0, (%eax) | |
971 | movlpd %xmm3, 15(%eax) | |
972 | mov STR3(%esp), %eax | |
973 | RETURN | |
974 | ||
975 | .p2align 4 | |
976 | # ifdef USE_AS_STRNCAT | |
977 | L(StrncatExit24): | |
978 | movb %bh, 24(%eax) | |
979 | # endif | |
980 | L(Exit24): | |
981 | movdqu (%esi), %xmm0 | |
982 | movlpd 16(%esi), %xmm2 | |
983 | movdqu %xmm0, (%eax) | |
984 | movlpd %xmm2, 16(%eax) | |
985 | mov STR3(%esp), %eax | |
986 | RETURN | |
987 | ||
988 | .p2align 4 | |
989 | # ifdef USE_AS_STRNCAT | |
990 | L(StrncatExit25): | |
991 | movb %bh, 25(%eax) | |
992 | # endif | |
993 | L(Exit25): | |
994 | movdqu (%esi), %xmm0 | |
995 | movlpd 16(%esi), %xmm2 | |
996 | # ifdef USE_AS_STRNCAT | |
997 | movb 24(%esi), %dh | |
998 | # endif | |
999 | movdqu %xmm0, (%eax) | |
1000 | movlpd %xmm2, 16(%eax) | |
1001 | movb %dh, 24(%eax) | |
1002 | mov STR3(%esp), %eax | |
1003 | RETURN | |
1004 | ||
1005 | .p2align 4 | |
1006 | # ifdef USE_AS_STRNCAT | |
1007 | L(StrncatExit26): | |
1008 | movb %bh, 26(%eax) | |
1009 | # endif | |
1010 | L(Exit26): | |
1011 | movdqu (%esi), %xmm0 | |
1012 | movlpd 16(%esi), %xmm2 | |
1013 | movw 24(%esi), %cx | |
1014 | movdqu %xmm0, (%eax) | |
1015 | movlpd %xmm2, 16(%eax) | |
1016 | movw %cx, 24(%eax) | |
1017 | mov STR3(%esp), %eax | |
1018 | RETURN | |
1019 | ||
1020 | .p2align 4 | |
1021 | # ifdef USE_AS_STRNCAT | |
1022 | L(StrncatExit27): | |
1023 | movb %bh, 27(%eax) | |
1024 | # endif | |
1025 | L(Exit27): | |
1026 | movdqu (%esi), %xmm0 | |
1027 | movlpd 16(%esi), %xmm2 | |
1028 | movl 23(%esi), %ecx | |
1029 | movdqu %xmm0, (%eax) | |
1030 | movlpd %xmm2, 16(%eax) | |
1031 | movl %ecx, 23(%eax) | |
1032 | mov STR3(%esp), %eax | |
1033 | RETURN | |
1034 | ||
1035 | .p2align 4 | |
1036 | # ifdef USE_AS_STRNCAT | |
1037 | L(StrncatExit28): | |
1038 | movb %bh, 28(%eax) | |
1039 | # endif | |
1040 | L(Exit28): | |
1041 | movdqu (%esi), %xmm0 | |
1042 | movlpd 16(%esi), %xmm2 | |
1043 | movl 24(%esi), %ecx | |
1044 | movdqu %xmm0, (%eax) | |
1045 | movlpd %xmm2, 16(%eax) | |
1046 | movl %ecx, 24(%eax) | |
1047 | mov STR3(%esp), %eax | |
1048 | RETURN | |
1049 | ||
1050 | .p2align 4 | |
1051 | # ifdef USE_AS_STRNCAT | |
1052 | L(StrncatExit29): | |
1053 | movb %bh, 29(%eax) | |
1054 | # endif | |
1055 | L(Exit29): | |
1056 | movdqu (%esi), %xmm0 | |
1057 | movdqu 13(%esi), %xmm2 | |
1058 | movdqu %xmm0, (%eax) | |
1059 | movdqu %xmm2, 13(%eax) | |
1060 | mov STR3(%esp), %eax | |
1061 | RETURN | |
1062 | ||
1063 | .p2align 4 | |
1064 | # ifdef USE_AS_STRNCAT | |
1065 | L(StrncatExit30): | |
1066 | movb %bh, 30(%eax) | |
1067 | # endif | |
1068 | L(Exit30): | |
1069 | movdqu (%esi), %xmm0 | |
1070 | movdqu 14(%esi), %xmm2 | |
1071 | movdqu %xmm0, (%eax) | |
1072 | movdqu %xmm2, 14(%eax) | |
1073 | mov STR3(%esp), %eax | |
1074 | RETURN | |
1075 | ||
1076 | .p2align 4 | |
1077 | # ifdef USE_AS_STRNCAT | |
1078 | L(StrncatExit31): | |
1079 | movb %bh, 31(%eax) | |
1080 | # endif | |
1081 | L(Exit31): | |
1082 | movdqu (%esi), %xmm0 | |
1083 | movdqu 15(%esi), %xmm2 | |
1084 | movdqu %xmm0, (%eax) | |
1085 | movdqu %xmm2, 15(%eax) | |
1086 | mov STR3(%esp), %eax | |
1087 | RETURN | |
1088 | ||
1089 | .p2align 4 | |
1090 | # ifdef USE_AS_STRNCAT | |
1091 | L(StrncatExit32): | |
1092 | movb %bh, 32(%eax) | |
1093 | # endif | |
1094 | L(Exit32): | |
1095 | movdqu (%esi), %xmm0 | |
1096 | movdqu 16(%esi), %xmm2 | |
1097 | movdqu %xmm0, (%eax) | |
1098 | movdqu %xmm2, 16(%eax) | |
1099 | mov STR3(%esp), %eax | |
1100 | RETURN | |
1101 | ||
1102 | # ifdef USE_AS_STRNCAT | |
1103 | ||
1104 | .p2align 4 | |
1105 | L(UnalignedLeaveCase2OrCase3): | |
1106 | test %edx, %edx | |
1107 | jnz L(Unaligned64LeaveCase2) | |
1108 | L(Unaligned64LeaveCase3): | |
1109 | lea 64(%ebx), %ecx | |
1110 | and $-16, %ecx | |
1111 | add $48, %ebx | |
1112 | jl L(CopyFrom1To16BytesCase3) | |
1113 | movdqu %xmm4, (%eax) | |
1114 | sub $16, %ebx | |
1115 | jb L(CopyFrom1To16BytesCase3) | |
1116 | movdqu %xmm5, 16(%eax) | |
1117 | sub $16, %ebx | |
1118 | jb L(CopyFrom1To16BytesCase3) | |
1119 | movdqu %xmm6, 32(%eax) | |
1120 | sub $16, %ebx | |
1121 | jb L(CopyFrom1To16BytesCase3) | |
1122 | movdqu %xmm7, 48(%eax) | |
1123 | xor %bh, %bh | |
1124 | movb %bh, 64(%eax) | |
1125 | mov STR3(%esp), %eax | |
1126 | RETURN | |
1127 | ||
1128 | .p2align 4 | |
1129 | L(Unaligned64LeaveCase2): | |
1130 | xor %ecx, %ecx | |
1131 | pcmpeqb %xmm4, %xmm0 | |
1132 | pmovmskb %xmm0, %edx | |
1133 | add $48, %ebx | |
1134 | jle L(CopyFrom1To16BytesCase2OrCase3) | |
1135 | test %edx, %edx | |
1136 | jnz L(CopyFrom1To16Bytes) | |
1137 | ||
1138 | pcmpeqb %xmm5, %xmm0 | |
1139 | pmovmskb %xmm0, %edx | |
1140 | movdqu %xmm4, (%eax) | |
1141 | add $16, %ecx | |
1142 | sub $16, %ebx | |
1143 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
1144 | test %edx, %edx | |
1145 | jnz L(CopyFrom1To16Bytes) | |
1146 | ||
1147 | pcmpeqb %xmm6, %xmm0 | |
1148 | pmovmskb %xmm0, %edx | |
1149 | movdqu %xmm5, 16(%eax) | |
1150 | add $16, %ecx | |
1151 | sub $16, %ebx | |
1152 | jbe L(CopyFrom1To16BytesCase2OrCase3) | |
1153 | test %edx, %edx | |
1154 | jnz L(CopyFrom1To16Bytes) | |
1155 | ||
1156 | pcmpeqb %xmm7, %xmm0 | |
1157 | pmovmskb %xmm0, %edx | |
1158 | movdqu %xmm6, 32(%eax) | |
1159 | lea 16(%eax, %ecx), %eax | |
1160 | lea 16(%esi, %ecx), %esi | |
1161 | bsf %edx, %edx | |
1162 | cmp %ebx, %edx | |
1163 | jb L(CopyFrom1To16BytesExit) | |
1164 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) | |
1165 | # endif | |
1166 | .p2align 4 | |
1167 | L(ExitZero): | |
1168 | RETURN | |
1169 | ||
1170 | END (STRCAT) | |
1171 | ||
1172 | .p2align 4 | |
1173 | .section .rodata | |
1174 | L(ExitTable): | |
1175 | .int JMPTBL(L(Exit1), L(ExitTable)) | |
1176 | .int JMPTBL(L(Exit2), L(ExitTable)) | |
1177 | .int JMPTBL(L(Exit3), L(ExitTable)) | |
1178 | .int JMPTBL(L(Exit4), L(ExitTable)) | |
1179 | .int JMPTBL(L(Exit5), L(ExitTable)) | |
1180 | .int JMPTBL(L(Exit6), L(ExitTable)) | |
1181 | .int JMPTBL(L(Exit7), L(ExitTable)) | |
1182 | .int JMPTBL(L(Exit8), L(ExitTable)) | |
1183 | .int JMPTBL(L(Exit9), L(ExitTable)) | |
1184 | .int JMPTBL(L(Exit10), L(ExitTable)) | |
1185 | .int JMPTBL(L(Exit11), L(ExitTable)) | |
1186 | .int JMPTBL(L(Exit12), L(ExitTable)) | |
1187 | .int JMPTBL(L(Exit13), L(ExitTable)) | |
1188 | .int JMPTBL(L(Exit14), L(ExitTable)) | |
1189 | .int JMPTBL(L(Exit15), L(ExitTable)) | |
1190 | .int JMPTBL(L(Exit16), L(ExitTable)) | |
1191 | .int JMPTBL(L(Exit17), L(ExitTable)) | |
1192 | .int JMPTBL(L(Exit18), L(ExitTable)) | |
1193 | .int JMPTBL(L(Exit19), L(ExitTable)) | |
1194 | .int JMPTBL(L(Exit20), L(ExitTable)) | |
1195 | .int JMPTBL(L(Exit21), L(ExitTable)) | |
1196 | .int JMPTBL(L(Exit22), L(ExitTable)) | |
1197 | .int JMPTBL(L(Exit23), L(ExitTable)) | |
1198 | .int JMPTBL(L(Exit24), L(ExitTable)) | |
1199 | .int JMPTBL(L(Exit25), L(ExitTable)) | |
1200 | .int JMPTBL(L(Exit26), L(ExitTable)) | |
1201 | .int JMPTBL(L(Exit27), L(ExitTable)) | |
1202 | .int JMPTBL(L(Exit28), L(ExitTable)) | |
1203 | .int JMPTBL(L(Exit29), L(ExitTable)) | |
1204 | .int JMPTBL(L(Exit30), L(ExitTable)) | |
1205 | .int JMPTBL(L(Exit31), L(ExitTable)) | |
1206 | .int JMPTBL(L(Exit32), L(ExitTable)) | |
1207 | # ifdef USE_AS_STRNCAT | |
1208 | L(ExitStrncatTable): | |
1209 | .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable)) | |
1210 | .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable)) | |
1211 | .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable)) | |
1212 | .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable)) | |
1213 | .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable)) | |
1214 | .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable)) | |
1215 | .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable)) | |
1216 | .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable)) | |
1217 | .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable)) | |
1218 | .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable)) | |
1219 | .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable)) | |
1220 | .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable)) | |
1221 | .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable)) | |
1222 | .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable)) | |
1223 | .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable)) | |
1224 | .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable)) | |
1225 | .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable)) | |
1226 | .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable)) | |
1227 | .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable)) | |
1228 | .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable)) | |
1229 | .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable)) | |
1230 | .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable)) | |
1231 | .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable)) | |
1232 | .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable)) | |
1233 | .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable)) | |
1234 | .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable)) | |
1235 | .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable)) | |
1236 | .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable)) | |
1237 | .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable)) | |
1238 | .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable)) | |
1239 | .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable)) | |
1240 | .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable)) | |
1241 | .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable)) | |
1242 | # endif | |
1243 | #endif |