1 /* Optimized memrchr with sse2 without bsf
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
23 # define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
27 # define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
31 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
32 # define POP(REG) popl REG; CFI_POP (REG)
40 ENTRY (__memrchr_sse2)
42 movd STR2(%esp), %xmm1
48 punpcklbw %xmm1, %xmm1
50 punpcklbw %xmm1, %xmm1
53 pshufd $0, %xmm1, %xmm1
71 /* Loop start on aligned string. */
76 movdqa 48(%ecx), %xmm0
82 movdqa 32(%ecx), %xmm2
88 movdqa 16(%ecx), %xmm3
104 movdqa 48(%ecx), %xmm0
110 movdqa 32(%ecx), %xmm2
116 movdqa 16(%ecx), %xmm3
145 movdqa 16(%ecx), %xmm2
146 movdqa 32(%ecx), %xmm3
147 movdqa 48(%ecx), %xmm4
170 movdqa 16(%ecx), %xmm2
173 pcmpeqb (%ecx), %xmm1
181 jnz L(exit_dispatch_high)
184 jnz L(exit_dispatch_8)
200 movdqa 48(%ecx), %xmm0
206 movdqa 32(%ecx), %xmm2
212 movdqa 16(%ecx), %xmm3
220 pcmpeqb (%ecx), %xmm1
229 movdqa 48(%ecx), %xmm0
237 pcmpeqb 32(%ecx), %xmm1
248 jnz L(exit_dispatch_high)
251 jnz L(exit_dispatch_8)
265 jnz L(exit_dispatch_high)
268 jnz L(exit_dispatch_8)
285 jnz L(exit_dispatch_high)
288 jnz L(exit_dispatch_8)
310 L(exit_dispatch_high):
313 jnz L(exit_dispatch_high_8)
324 L(exit_dispatch_high_8):
399 jnz L(exit_dispatch_1_high)
402 jnz L(exit_dispatch_1_8)
420 jnz L(exit_dispatch_1_high)
423 jnz L(exit_dispatch_1_8)
441 jnz L(exit_dispatch_1_high)
444 jnz L(exit_dispatch_1_8)
464 jnz L(exit_dispatch_1_high)
467 jnz L(exit_dispatch_1_8)
480 L(exit_dispatch_1_8):
493 L(exit_dispatch_1_high):
496 jnz L(exit_dispatch_1_high_8)
509 L(exit_dispatch_1_high_8):
611 L(length_less16_offset0):
613 pcmpeqb (%eax), %xmm1
631 punpcklbw %xmm1, %xmm1
634 punpcklbw %xmm1, %xmm1
637 pshufd $0, %xmm1, %xmm1
640 jz L(length_less16_offset0)
649 ja L(length_less16_part2)
651 pcmpeqb (%eax), %xmm1
674 L(length_less16_part2):
675 movdqa 16(%eax), %xmm2
689 jnz L(length_less16_part2_return)
691 pcmpeqb (%eax), %xmm1
709 L(length_less16_part2_return):
711 lea 16(%eax, %edi), %eax