1 /* strrchr/wcsrchr optimized with AVX2.
2 Copyright (C) 2017-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
24 # define STRRCHR __strrchr_avx2
27 # ifdef USE_AS_WCSRCHR
28 # define VPBROADCAST vpbroadcastd
29 # define VPCMPEQ vpcmpeqd
31 # define VPBROADCAST vpbroadcastb
32 # define VPCMPEQ vpcmpeqb
36 # define VZEROUPPER vzeroupper
41 .section .text.avx,"ax",@progbits
45 /* Broadcast CHAR to YMM4. */
46 VPBROADCAST %xmm4, %ymm4
47 vpxor %ymm0, %ymm0, %ymm0
49 /* Check if we may cross page boundary with one vector load. */
50 andl $(2 * VEC_SIZE - 1), %ecx
52 ja L(cros_page_boundary)
55 VPCMPEQ %ymm1, %ymm0, %ymm2
56 VPCMPEQ %ymm1, %ymm4, %ymm3
73 /* Check if there is a nul CHAR. */
75 jnz L(char_and_nul_in_first_vec)
77 /* Remember the match and keep searching. */
84 L(cros_page_boundary):
85 andl $(VEC_SIZE - 1), %ecx
88 VPCMPEQ %ymm1, %ymm0, %ymm2
89 VPCMPEQ %ymm1, %ymm4, %ymm3
96 /* Check if there is a CHAR. */
110 /* Remember the match and keep searching. */
112 leaq (%rdi, %rcx), %rsi
116 vmovdqa (%rdi), %ymm1
117 VPCMPEQ %ymm1, %ymm0, %ymm2
119 VPCMPEQ %ymm1, %ymm4, %ymm3
120 vpmovmskb %ymm2, %ecx
121 vpmovmskb %ymm3, %eax
125 vmovdqa (%rdi), %ymm1
126 VPCMPEQ %ymm1, %ymm0, %ymm2
128 VPCMPEQ %ymm1, %ymm4, %ymm3
129 vpmovmskb %ymm2, %ecx
130 vpmovmskb %ymm3, %eax
134 vmovdqa (%rdi), %ymm1
135 VPCMPEQ %ymm1, %ymm0, %ymm2
137 VPCMPEQ %ymm1, %ymm4, %ymm3
138 vpmovmskb %ymm2, %ecx
139 vpmovmskb %ymm3, %eax
143 vmovdqa (%rdi), %ymm1
144 VPCMPEQ %ymm1, %ymm0, %ymm2
146 VPCMPEQ %ymm1, %ymm4, %ymm3
147 vpmovmskb %ymm2, %ecx
148 vpmovmskb %ymm3, %eax
154 /* Find a CHAR or a nul CHAR in a loop. */
163 # ifdef USE_AS_WCSRCHR
164 /* Keep the first bit for each matching CHAR for bsr. */
165 andl $0x11111111, %eax
168 leaq -VEC_SIZE(%rdi, %rax), %rax
174 /* Find a CHAR. Check if there is a nul CHAR. */
175 vpmovmskb %ymm2, %ecx
179 /* Remember the match and keep searching. */
186 # ifdef USE_AS_WCSRCHR
187 /* Keep the first bit for each matching CHAR for bsr. */
188 andl $0x11111111, %ecx
189 andl $0x11111111, %eax
191 /* Mask out any matching bits after the nul CHAR. */
197 /* If there is no CHAR here, return the remembered one. */
200 leaq -VEC_SIZE(%rdi, %rax), %rax
206 /* Find both a CHAR and a nul CHAR. */
209 L(char_and_nul_in_first_vec):
210 # ifdef USE_AS_WCSRCHR
211 /* Keep the first bit for each matching CHAR for bsr. */
212 andl $0x11111111, %ecx
213 andl $0x11111111, %eax
215 /* Mask out any matching bits after the nul CHAR. */
221 /* Return null pointer if the nul CHAR comes first. */
224 leaq -VEC_SIZE(%rdi, %rax), %rax