1 /* Optimized memrchr with sse2 without bsf
2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
22 # define CFI_PUSH(REG) \
23 cfi_adjust_cfa_offset (4); \
24 cfi_rel_offset (REG, 0)
26 # define CFI_POP(REG) \
27 cfi_adjust_cfa_offset (-4); \
30 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
31 # define POP(REG) popl REG; CFI_POP (REG)
39 ENTRY (__memrchr_sse2)
41 movd STR2(%esp), %xmm1
47 punpcklbw %xmm1, %xmm1
49 punpcklbw %xmm1, %xmm1
52 pshufd $0, %xmm1, %xmm1
70 /* Loop start on aligned string. */
75 movdqa 48(%ecx), %xmm0
81 movdqa 32(%ecx), %xmm2
87 movdqa 16(%ecx), %xmm3
103 movdqa 48(%ecx), %xmm0
109 movdqa 32(%ecx), %xmm2
115 movdqa 16(%ecx), %xmm3
144 movdqa 16(%ecx), %xmm2
145 movdqa 32(%ecx), %xmm3
146 movdqa 48(%ecx), %xmm4
169 movdqa 16(%ecx), %xmm2
172 pcmpeqb (%ecx), %xmm1
180 jnz L(exit_dispatch_high)
183 jnz L(exit_dispatch_8)
199 movdqa 48(%ecx), %xmm0
205 movdqa 32(%ecx), %xmm2
211 movdqa 16(%ecx), %xmm3
219 pcmpeqb (%ecx), %xmm1
228 movdqa 48(%ecx), %xmm0
236 pcmpeqb 32(%ecx), %xmm1
247 jnz L(exit_dispatch_high)
250 jnz L(exit_dispatch_8)
264 jnz L(exit_dispatch_high)
267 jnz L(exit_dispatch_8)
284 jnz L(exit_dispatch_high)
287 jnz L(exit_dispatch_8)
309 L(exit_dispatch_high):
312 jnz L(exit_dispatch_high_8)
323 L(exit_dispatch_high_8):
398 jnz L(exit_dispatch_1_high)
401 jnz L(exit_dispatch_1_8)
419 jnz L(exit_dispatch_1_high)
422 jnz L(exit_dispatch_1_8)
440 jnz L(exit_dispatch_1_high)
443 jnz L(exit_dispatch_1_8)
463 jnz L(exit_dispatch_1_high)
466 jnz L(exit_dispatch_1_8)
479 L(exit_dispatch_1_8):
492 L(exit_dispatch_1_high):
495 jnz L(exit_dispatch_1_high_8)
508 L(exit_dispatch_1_high_8):
610 L(length_less16_offset0):
612 pcmpeqb (%eax), %xmm1
630 punpcklbw %xmm1, %xmm1
633 punpcklbw %xmm1, %xmm1
636 pshufd $0, %xmm1, %xmm1
639 jz L(length_less16_offset0)
648 ja L(length_less16_part2)
650 pcmpeqb (%eax), %xmm1
673 L(length_less16_part2):
674 movdqa 16(%eax), %xmm2
688 jnz L(length_less16_part2_return)
690 pcmpeqb (%eax), %xmm1
708 L(length_less16_part2_return):
710 lea 16(%eax, %edi), %eax