]>
Commit | Line | Data |
---|---|---|
951fbcec | 1 | /* Optimized memrchr with sse2 without bsf |
dff8da6b | 2 | Copyright (C) 2011-2024 Free Software Foundation, Inc. |
951fbcec LD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
951fbcec | 18 | |
4f41c682 | 19 | #if IS_IN (libc) |
951fbcec LD |
20 | |
21 | # include <sysdep.h> | |
22 | # define CFI_PUSH(REG) \ | |
23 | cfi_adjust_cfa_offset (4); \ | |
24 | cfi_rel_offset (REG, 0) | |
25 | ||
26 | # define CFI_POP(REG) \ | |
27 | cfi_adjust_cfa_offset (-4); \ | |
28 | cfi_restore (REG) | |
29 | ||
30 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
31 | # define POP(REG) popl REG; CFI_POP (REG) | |
32 | ||
33 | # define PARMS 4 | |
34 | # define STR1 PARMS | |
35 | # define STR2 STR1+4 | |
36 | # define LEN STR2+4 | |
37 | ||
38 | atom_text_section | |
39 | ENTRY (__memrchr_sse2) | |
40 | mov STR1(%esp), %ecx | |
41 | movd STR2(%esp), %xmm1 | |
42 | mov LEN(%esp), %edx | |
43 | ||
44 | sub $16, %edx | |
45 | jbe L(length_less16) | |
46 | ||
47 | punpcklbw %xmm1, %xmm1 | |
48 | add %edx, %ecx | |
49 | punpcklbw %xmm1, %xmm1 | |
50 | ||
51 | movdqu (%ecx), %xmm0 | |
52 | pshufd $0, %xmm1, %xmm1 | |
53 | pcmpeqb %xmm1, %xmm0 | |
54 | ||
55 | pmovmskb %xmm0, %eax | |
56 | test %eax, %eax | |
f9e12320 | 57 | jnz L(exit_dispatch) |
951fbcec LD |
58 | |
59 | sub $64, %ecx | |
60 | mov %ecx, %eax | |
61 | and $15, %eax | |
62 | jz L(loop_prolog) | |
63 | ||
64 | lea 16(%ecx), %ecx | |
65 | lea 16(%edx), %edx | |
66 | sub %eax, %edx | |
67 | and $-16, %ecx | |
68 | ||
69 | .p2align 4 | |
70 | /* Loop start on aligned string. */ | |
71 | L(loop_prolog): | |
72 | sub $64, %edx | |
73 | jbe L(exit_loop) | |
74 | ||
75 | movdqa 48(%ecx), %xmm0 | |
76 | pcmpeqb %xmm1, %xmm0 | |
77 | pmovmskb %xmm0, %eax | |
78 | test %eax, %eax | |
79 | jnz L(matches48) | |
80 | ||
81 | movdqa 32(%ecx), %xmm2 | |
82 | pcmpeqb %xmm1, %xmm2 | |
83 | pmovmskb %xmm2, %eax | |
84 | test %eax, %eax | |
85 | jnz L(matches32) | |
86 | ||
87 | movdqa 16(%ecx), %xmm3 | |
88 | pcmpeqb %xmm1, %xmm3 | |
89 | pmovmskb %xmm3, %eax | |
90 | test %eax, %eax | |
91 | jnz L(matches16) | |
92 | ||
93 | movdqa (%ecx), %xmm4 | |
94 | pcmpeqb %xmm1, %xmm4 | |
95 | pmovmskb %xmm4, %eax | |
96 | test %eax, %eax | |
97 | jnz L(exit_dispatch) | |
98 | ||
99 | sub $64, %ecx | |
100 | sub $64, %edx | |
101 | jbe L(exit_loop) | |
102 | ||
103 | movdqa 48(%ecx), %xmm0 | |
104 | pcmpeqb %xmm1, %xmm0 | |
105 | pmovmskb %xmm0, %eax | |
106 | test %eax, %eax | |
107 | jnz L(matches48) | |
108 | ||
109 | movdqa 32(%ecx), %xmm2 | |
110 | pcmpeqb %xmm1, %xmm2 | |
111 | pmovmskb %xmm2, %eax | |
112 | test %eax, %eax | |
113 | jnz L(matches32) | |
114 | ||
115 | movdqa 16(%ecx), %xmm3 | |
116 | pcmpeqb %xmm1, %xmm3 | |
117 | pmovmskb %xmm3, %eax | |
118 | test %eax, %eax | |
119 | jnz L(matches16) | |
120 | ||
121 | movdqa (%ecx), %xmm3 | |
122 | pcmpeqb %xmm1, %xmm3 | |
123 | pmovmskb %xmm3, %eax | |
124 | test %eax, %eax | |
125 | jnz L(exit_dispatch) | |
126 | ||
127 | mov %ecx, %eax | |
128 | and $63, %eax | |
129 | test %eax, %eax | |
f9e12320 | 130 | jz L(align64_loop) |
951fbcec LD |
131 | |
132 | lea 64(%ecx), %ecx | |
133 | lea 64(%edx), %edx | |
134 | and $-64, %ecx | |
135 | sub %eax, %edx | |
136 | ||
137 | .p2align 4 | |
138 | L(align64_loop): | |
139 | sub $64, %ecx | |
140 | sub $64, %edx | |
141 | jbe L(exit_loop) | |
142 | ||
143 | movdqa (%ecx), %xmm0 | |
144 | movdqa 16(%ecx), %xmm2 | |
145 | movdqa 32(%ecx), %xmm3 | |
146 | movdqa 48(%ecx), %xmm4 | |
147 | ||
148 | pcmpeqb %xmm1, %xmm0 | |
149 | pcmpeqb %xmm1, %xmm2 | |
150 | pcmpeqb %xmm1, %xmm3 | |
151 | pcmpeqb %xmm1, %xmm4 | |
152 | ||
153 | pmaxub %xmm3, %xmm0 | |
154 | pmaxub %xmm4, %xmm2 | |
155 | pmaxub %xmm0, %xmm2 | |
156 | pmovmskb %xmm2, %eax | |
157 | ||
158 | test %eax, %eax | |
159 | jz L(align64_loop) | |
160 | ||
161 | pmovmskb %xmm4, %eax | |
162 | test %eax, %eax | |
163 | jnz L(matches48) | |
164 | ||
165 | pmovmskb %xmm3, %eax | |
166 | test %eax, %eax | |
167 | jnz L(matches32) | |
168 | ||
169 | movdqa 16(%ecx), %xmm2 | |
170 | ||
171 | pcmpeqb %xmm1, %xmm2 | |
172 | pcmpeqb (%ecx), %xmm1 | |
173 | ||
174 | pmovmskb %xmm2, %eax | |
175 | test %eax, %eax | |
176 | jnz L(matches16) | |
177 | ||
178 | pmovmskb %xmm1, %eax | |
179 | test %ah, %ah | |
180 | jnz L(exit_dispatch_high) | |
181 | mov %al, %dl | |
182 | and $15 << 4, %dl | |
183 | jnz L(exit_dispatch_8) | |
184 | test $0x08, %al | |
185 | jnz L(exit_4) | |
186 | test $0x04, %al | |
187 | jnz L(exit_3) | |
188 | test $0x02, %al | |
189 | jnz L(exit_2) | |
190 | mov %ecx, %eax | |
191 | ret | |
192 | ||
193 | .p2align 4 | |
194 | L(exit_loop): | |
195 | add $64, %edx | |
196 | cmp $32, %edx | |
197 | jbe L(exit_loop_32) | |
198 | ||
199 | movdqa 48(%ecx), %xmm0 | |
200 | pcmpeqb %xmm1, %xmm0 | |
201 | pmovmskb %xmm0, %eax | |
202 | test %eax, %eax | |
203 | jnz L(matches48) | |
204 | ||
205 | movdqa 32(%ecx), %xmm2 | |
206 | pcmpeqb %xmm1, %xmm2 | |
207 | pmovmskb %xmm2, %eax | |
208 | test %eax, %eax | |
209 | jnz L(matches32) | |
210 | ||
211 | movdqa 16(%ecx), %xmm3 | |
212 | pcmpeqb %xmm1, %xmm3 | |
213 | pmovmskb %xmm3, %eax | |
214 | test %eax, %eax | |
215 | jnz L(matches16_1) | |
216 | cmp $48, %edx | |
217 | jbe L(return_null) | |
218 | ||
219 | pcmpeqb (%ecx), %xmm1 | |
220 | pmovmskb %xmm1, %eax | |
221 | test %eax, %eax | |
222 | jnz L(matches0_1) | |
223 | xor %eax, %eax | |
224 | ret | |
225 | ||
226 | .p2align 4 | |
227 | L(exit_loop_32): | |
228 | movdqa 48(%ecx), %xmm0 | |
229 | pcmpeqb %xmm1, %xmm0 | |
230 | pmovmskb %xmm0, %eax | |
231 | test %eax, %eax | |
232 | jnz L(matches48_1) | |
233 | cmp $16, %edx | |
234 | jbe L(return_null) | |
235 | ||
236 | pcmpeqb 32(%ecx), %xmm1 | |
237 | pmovmskb %xmm1, %eax | |
238 | test %eax, %eax | |
239 | jnz L(matches32_1) | |
240 | xor %eax, %eax | |
241 | ret | |
242 | ||
243 | .p2align 4 | |
244 | L(matches16): | |
f9e12320 | 245 | lea 16(%ecx), %ecx |
951fbcec LD |
246 | test %ah, %ah |
247 | jnz L(exit_dispatch_high) | |
248 | mov %al, %dl | |
249 | and $15 << 4, %dl | |
250 | jnz L(exit_dispatch_8) | |
251 | test $0x08, %al | |
252 | jnz L(exit_4) | |
253 | test $0x04, %al | |
254 | jnz L(exit_3) | |
255 | test $0x02, %al | |
256 | jnz L(exit_2) | |
257 | mov %ecx, %eax | |
258 | ret | |
259 | ||
260 | .p2align 4 | |
261 | L(matches32): | |
f9e12320 | 262 | lea 32(%ecx), %ecx |
951fbcec LD |
263 | test %ah, %ah |
264 | jnz L(exit_dispatch_high) | |
265 | mov %al, %dl | |
266 | and $15 << 4, %dl | |
267 | jnz L(exit_dispatch_8) | |
268 | test $0x08, %al | |
269 | jnz L(exit_4) | |
270 | test $0x04, %al | |
271 | jnz L(exit_3) | |
272 | test $0x02, %al | |
273 | jnz L(exit_2) | |
274 | mov %ecx, %eax | |
275 | ret | |
276 | ||
277 | .p2align 4 | |
278 | L(matches48): | |
279 | lea 48(%ecx), %ecx | |
280 | ||
281 | .p2align 4 | |
282 | L(exit_dispatch): | |
283 | test %ah, %ah | |
284 | jnz L(exit_dispatch_high) | |
285 | mov %al, %dl | |
286 | and $15 << 4, %dl | |
287 | jnz L(exit_dispatch_8) | |
288 | test $0x08, %al | |
289 | jnz L(exit_4) | |
290 | test $0x04, %al | |
291 | jnz L(exit_3) | |
292 | test $0x02, %al | |
293 | jnz L(exit_2) | |
294 | mov %ecx, %eax | |
295 | ret | |
296 | ||
297 | .p2align 4 | |
f9e12320 | 298 | L(exit_dispatch_8): |
951fbcec LD |
299 | test $0x80, %al |
300 | jnz L(exit_8) | |
301 | test $0x40, %al | |
302 | jnz L(exit_7) | |
303 | test $0x20, %al | |
304 | jnz L(exit_6) | |
305 | lea 4(%ecx), %eax | |
306 | ret | |
307 | ||
308 | .p2align 4 | |
309 | L(exit_dispatch_high): | |
310 | mov %ah, %dh | |
311 | and $15 << 4, %dh | |
312 | jnz L(exit_dispatch_high_8) | |
313 | test $0x08, %ah | |
314 | jnz L(exit_12) | |
315 | test $0x04, %ah | |
316 | jnz L(exit_11) | |
317 | test $0x02, %ah | |
318 | jnz L(exit_10) | |
319 | lea 8(%ecx), %eax | |
320 | ret | |
321 | ||
322 | .p2align 4 | |
323 | L(exit_dispatch_high_8): | |
324 | test $0x80, %ah | |
325 | jnz L(exit_16) | |
326 | test $0x40, %ah | |
327 | jnz L(exit_15) | |
328 | test $0x20, %ah | |
329 | jnz L(exit_14) | |
330 | lea 12(%ecx), %eax | |
331 | ret | |
332 | ||
333 | .p2align 4 | |
334 | L(exit_2): | |
335 | lea 1(%ecx), %eax | |
336 | ret | |
337 | ||
338 | .p2align 4 | |
339 | L(exit_3): | |
340 | lea 2(%ecx), %eax | |
341 | ret | |
342 | ||
343 | .p2align 4 | |
344 | L(exit_4): | |
345 | lea 3(%ecx), %eax | |
346 | ret | |
347 | ||
348 | .p2align 4 | |
349 | L(exit_6): | |
350 | lea 5(%ecx), %eax | |
351 | ret | |
352 | ||
353 | .p2align 4 | |
354 | L(exit_7): | |
355 | lea 6(%ecx), %eax | |
356 | ret | |
357 | ||
358 | .p2align 4 | |
359 | L(exit_8): | |
360 | lea 7(%ecx), %eax | |
361 | ret | |
362 | ||
363 | .p2align 4 | |
364 | L(exit_10): | |
365 | lea 9(%ecx), %eax | |
366 | ret | |
367 | ||
368 | .p2align 4 | |
369 | L(exit_11): | |
370 | lea 10(%ecx), %eax | |
371 | ret | |
372 | ||
373 | .p2align 4 | |
374 | L(exit_12): | |
375 | lea 11(%ecx), %eax | |
376 | ret | |
377 | ||
378 | .p2align 4 | |
379 | L(exit_14): | |
380 | lea 13(%ecx), %eax | |
381 | ret | |
382 | ||
383 | .p2align 4 | |
384 | L(exit_15): | |
385 | lea 14(%ecx), %eax | |
386 | ret | |
387 | ||
388 | .p2align 4 | |
389 | L(exit_16): | |
390 | lea 15(%ecx), %eax | |
391 | ret | |
392 | ||
393 | .p2align 4 | |
394 | L(matches0_1): | |
395 | lea -64(%edx), %edx | |
396 | ||
397 | test %ah, %ah | |
398 | jnz L(exit_dispatch_1_high) | |
399 | mov %al, %ah | |
400 | and $15 << 4, %ah | |
401 | jnz L(exit_dispatch_1_8) | |
402 | test $0x08, %al | |
403 | jnz L(exit_1_4) | |
404 | test $0x04, %al | |
405 | jnz L(exit_1_3) | |
406 | test $0x02, %al | |
407 | jnz L(exit_1_2) | |
408 | add $0, %edx | |
409 | jl L(return_null) | |
410 | mov %ecx, %eax | |
411 | ret | |
412 | ||
413 | .p2align 4 | |
414 | L(matches16_1): | |
415 | lea -48(%edx), %edx | |
416 | lea 16(%ecx), %ecx | |
417 | ||
418 | test %ah, %ah | |
419 | jnz L(exit_dispatch_1_high) | |
420 | mov %al, %ah | |
421 | and $15 << 4, %ah | |
422 | jnz L(exit_dispatch_1_8) | |
423 | test $0x08, %al | |
424 | jnz L(exit_1_4) | |
425 | test $0x04, %al | |
426 | jnz L(exit_1_3) | |
427 | test $0x02, %al | |
428 | jnz L(exit_1_2) | |
429 | add $0, %edx | |
430 | jl L(return_null) | |
431 | mov %ecx, %eax | |
432 | ret | |
433 | ||
434 | .p2align 4 | |
435 | L(matches32_1): | |
436 | lea -32(%edx), %edx | |
437 | lea 32(%ecx), %ecx | |
f9e12320 | 438 | |
951fbcec LD |
439 | test %ah, %ah |
440 | jnz L(exit_dispatch_1_high) | |
441 | mov %al, %ah | |
442 | and $15 << 4, %ah | |
443 | jnz L(exit_dispatch_1_8) | |
444 | test $0x08, %al | |
445 | jnz L(exit_1_4) | |
446 | test $0x04, %al | |
447 | jnz L(exit_1_3) | |
448 | test $0x02, %al | |
449 | jnz L(exit_1_2) | |
450 | add $0, %edx | |
451 | jl L(return_null) | |
452 | mov %ecx, %eax | |
453 | ret | |
454 | ||
455 | .p2align 4 | |
456 | L(matches48_1): | |
457 | lea -16(%edx), %edx | |
458 | lea 48(%ecx), %ecx | |
459 | ||
460 | .p2align 4 | |
461 | L(exit_dispatch_1): | |
462 | test %ah, %ah | |
463 | jnz L(exit_dispatch_1_high) | |
464 | mov %al, %ah | |
465 | and $15 << 4, %ah | |
466 | jnz L(exit_dispatch_1_8) | |
467 | test $0x08, %al | |
468 | jnz L(exit_1_4) | |
469 | test $0x04, %al | |
470 | jnz L(exit_1_3) | |
471 | test $0x02, %al | |
472 | jnz L(exit_1_2) | |
473 | add $0, %edx | |
474 | jl L(return_null) | |
475 | mov %ecx, %eax | |
476 | ret | |
477 | ||
478 | .p2align 4 | |
f9e12320 | 479 | L(exit_dispatch_1_8): |
951fbcec LD |
480 | test $0x80, %al |
481 | jnz L(exit_1_8) | |
482 | test $0x40, %al | |
483 | jnz L(exit_1_7) | |
484 | test $0x20, %al | |
485 | jnz L(exit_1_6) | |
486 | add $4, %edx | |
487 | jl L(return_null) | |
488 | lea 4(%ecx), %eax | |
489 | ret | |
490 | ||
491 | .p2align 4 | |
492 | L(exit_dispatch_1_high): | |
493 | mov %ah, %al | |
494 | and $15 << 4, %al | |
495 | jnz L(exit_dispatch_1_high_8) | |
496 | test $0x08, %ah | |
497 | jnz L(exit_1_12) | |
498 | test $0x04, %ah | |
499 | jnz L(exit_1_11) | |
500 | test $0x02, %ah | |
501 | jnz L(exit_1_10) | |
502 | add $8, %edx | |
503 | jl L(return_null) | |
504 | lea 8(%ecx), %eax | |
505 | ret | |
506 | ||
507 | .p2align 4 | |
508 | L(exit_dispatch_1_high_8): | |
509 | test $0x80, %ah | |
510 | jnz L(exit_1_16) | |
511 | test $0x40, %ah | |
512 | jnz L(exit_1_15) | |
513 | test $0x20, %ah | |
514 | jnz L(exit_1_14) | |
515 | add $12, %edx | |
516 | jl L(return_null) | |
517 | lea 12(%ecx), %eax | |
518 | ret | |
519 | ||
520 | .p2align 4 | |
521 | L(exit_1_2): | |
522 | add $1, %edx | |
523 | jl L(return_null) | |
524 | lea 1(%ecx), %eax | |
525 | ret | |
526 | ||
527 | .p2align 4 | |
528 | L(exit_1_3): | |
529 | add $2, %edx | |
530 | jl L(return_null) | |
531 | lea 2(%ecx), %eax | |
532 | ret | |
533 | ||
534 | .p2align 4 | |
535 | L(exit_1_4): | |
536 | add $3, %edx | |
537 | jl L(return_null) | |
538 | lea 3(%ecx), %eax | |
539 | ret | |
540 | ||
541 | .p2align 4 | |
542 | L(exit_1_6): | |
543 | add $5, %edx | |
544 | jl L(return_null) | |
545 | lea 5(%ecx), %eax | |
546 | ret | |
547 | ||
548 | .p2align 4 | |
549 | L(exit_1_7): | |
550 | add $6, %edx | |
551 | jl L(return_null) | |
552 | lea 6(%ecx), %eax | |
553 | ret | |
554 | ||
555 | .p2align 4 | |
556 | L(exit_1_8): | |
557 | add $7, %edx | |
558 | jl L(return_null) | |
559 | lea 7(%ecx), %eax | |
560 | ret | |
561 | ||
562 | .p2align 4 | |
563 | L(exit_1_10): | |
564 | add $9, %edx | |
565 | jl L(return_null) | |
566 | lea 9(%ecx), %eax | |
567 | ret | |
568 | ||
569 | .p2align 4 | |
570 | L(exit_1_11): | |
571 | add $10, %edx | |
572 | jl L(return_null) | |
573 | lea 10(%ecx), %eax | |
574 | ret | |
575 | ||
576 | .p2align 4 | |
577 | L(exit_1_12): | |
578 | add $11, %edx | |
579 | jl L(return_null) | |
580 | lea 11(%ecx), %eax | |
581 | ret | |
582 | ||
583 | .p2align 4 | |
584 | L(exit_1_14): | |
585 | add $13, %edx | |
586 | jl L(return_null) | |
587 | lea 13(%ecx), %eax | |
588 | ret | |
589 | ||
590 | .p2align 4 | |
591 | L(exit_1_15): | |
592 | add $14, %edx | |
593 | jl L(return_null) | |
594 | lea 14(%ecx), %eax | |
595 | ret | |
596 | ||
597 | .p2align 4 | |
598 | L(exit_1_16): | |
599 | add $15, %edx | |
600 | jl L(return_null) | |
601 | lea 15(%ecx), %eax | |
602 | ret | |
603 | ||
604 | .p2align 4 | |
605 | L(return_null): | |
606 | xor %eax, %eax | |
607 | ret | |
608 | ||
609 | .p2align 4 | |
610 | L(length_less16_offset0): | |
611 | mov %dl, %cl | |
612 | pcmpeqb (%eax), %xmm1 | |
613 | ||
614 | mov $1, %edx | |
615 | sal %cl, %edx | |
616 | sub $1, %edx | |
617 | ||
618 | mov %eax, %ecx | |
619 | pmovmskb %xmm1, %eax | |
620 | ||
621 | and %edx, %eax | |
622 | test %eax, %eax | |
623 | jnz L(exit_dispatch) | |
624 | ||
625 | xor %eax, %eax | |
626 | ret | |
627 | ||
628 | .p2align 4 | |
629 | L(length_less16): | |
630 | punpcklbw %xmm1, %xmm1 | |
631 | add $16, %edx | |
632 | je L(return_null) | |
633 | punpcklbw %xmm1, %xmm1 | |
634 | ||
635 | mov %ecx, %eax | |
636 | pshufd $0, %xmm1, %xmm1 | |
637 | ||
638 | and $15, %ecx | |
639 | jz L(length_less16_offset0) | |
f9e12320 | 640 | |
951fbcec LD |
641 | PUSH (%edi) |
642 | ||
643 | mov %cl, %dh | |
644 | add %dl, %dh | |
645 | and $-16, %eax | |
646 | ||
647 | sub $16, %dh | |
648 | ja L(length_less16_part2) | |
649 | ||
650 | pcmpeqb (%eax), %xmm1 | |
651 | pmovmskb %xmm1, %edi | |
652 | ||
653 | sar %cl, %edi | |
654 | add %ecx, %eax | |
655 | mov %dl, %cl | |
656 | ||
657 | mov $1, %edx | |
658 | sal %cl, %edx | |
659 | sub $1, %edx | |
660 | ||
661 | and %edx, %edi | |
662 | test %edi, %edi | |
663 | jz L(ret_null) | |
664 | ||
665 | bsr %edi, %edi | |
666 | add %edi, %eax | |
667 | POP (%edi) | |
668 | ret | |
669 | ||
670 | CFI_PUSH (%edi) | |
671 | ||
672 | .p2align 4 | |
673 | L(length_less16_part2): | |
674 | movdqa 16(%eax), %xmm2 | |
675 | pcmpeqb %xmm1, %xmm2 | |
676 | pmovmskb %xmm2, %edi | |
677 | ||
678 | mov %cl, %ch | |
679 | ||
680 | mov %dh, %cl | |
681 | mov $1, %edx | |
682 | sal %cl, %edx | |
683 | sub $1, %edx | |
684 | ||
685 | and %edx, %edi | |
686 | ||
687 | test %edi, %edi | |
688 | jnz L(length_less16_part2_return) | |
689 | ||
690 | pcmpeqb (%eax), %xmm1 | |
691 | pmovmskb %xmm1, %edi | |
692 | ||
693 | mov %ch, %cl | |
694 | sar %cl, %edi | |
695 | test %edi, %edi | |
696 | jz L(ret_null) | |
697 | ||
698 | bsr %edi, %edi | |
699 | add %edi, %eax | |
700 | xor %ch, %ch | |
701 | add %ecx, %eax | |
702 | POP (%edi) | |
703 | ret | |
704 | ||
705 | CFI_PUSH (%edi) | |
706 | ||
707 | .p2align 4 | |
708 | L(length_less16_part2_return): | |
709 | bsr %edi, %edi | |
710 | lea 16(%eax, %edi), %eax | |
711 | POP (%edi) | |
712 | ret | |
713 | ||
714 | CFI_PUSH (%edi) | |
715 | ||
716 | .p2align 4 | |
717 | L(ret_null): | |
718 | xor %eax, %eax | |
719 | POP (%edi) | |
720 | ret | |
721 | ||
722 | END (__memrchr_sse2) | |
22999b2f | 723 | strong_alias (__memrchr_sse2, __GI___memrchr) |
951fbcec | 724 | #endif |