1 /* Optimized memrchr with sse2
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 # define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
29 # define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
33 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
34 # define POP(REG) popl REG; CFI_POP (REG)
41 # define MEMCHR __memrchr_sse2_bsf
46 movd STR2(%esp), %xmm1
52 punpcklbw %xmm1, %xmm1
54 punpcklbw %xmm1, %xmm1
57 pshufd $0, %xmm1, %xmm1
60 /* Check if there is a match. */
76 /* Loop start on aligned string. */
81 movdqa 48(%ecx), %xmm0
87 movdqa 32(%ecx), %xmm2
93 movdqa 16(%ecx), %xmm3
109 movdqa 48(%ecx), %xmm0
115 movdqa 32(%ecx), %xmm2
121 movdqa 16(%ecx), %xmm3
150 movdqa 16(%ecx), %xmm2
151 movdqa 32(%ecx), %xmm3
152 movdqa 48(%ecx), %xmm4
175 movdqa 16(%ecx), %xmm2
178 pcmpeqb (%ecx), %xmm1
196 movdqa 48(%ecx), %xmm0
202 movdqa 32(%ecx), %xmm2
208 movdqa 16(%ecx), %xmm3
216 pcmpeqb (%ecx), %xmm1
225 movdqa 48(%ecx), %xmm0
233 pcmpeqb 32(%ecx), %xmm1
249 lea 16(%eax, %ecx), %eax
255 lea 32(%eax, %ecx), %eax
261 lea 48(%eax, %ecx), %eax
279 lea 16(%ecx, %eax), %eax
288 lea 32(%ecx, %eax), %eax
297 lea 48(%ecx, %eax), %eax
306 L(length_less16_offset0):
308 pcmpeqb (%eax), %xmm1
327 punpcklbw %xmm1, %xmm1
329 punpcklbw %xmm1, %xmm1
333 pshufd $0, %xmm1, %xmm1
335 jz L(length_less16_offset0)
343 ja L(length_less16_part2)
345 pcmpeqb (%eax), %xmm1
368 L(length_less16_part2):
369 movdqa 16(%eax), %xmm2
383 jnz L(length_less16_part2_return)
385 pcmpeqb (%eax), %xmm1
403 L(length_less16_part2_return):
405 lea 16(%eax, %edi), %eax