]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/strrchr-avx2.S
c66df12bff21b418de0cb5d36b57495e638cd6f9
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / strrchr-avx2.S
1 /* strrchr/wcsrchr optimized with AVX2.
2 Copyright (C) 2017-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #if IS_IN (libc)
20
21 # include <sysdep.h>
22
23 # ifndef STRRCHR
24 # define STRRCHR __strrchr_avx2
25 # endif
26
27 # ifdef USE_AS_WCSRCHR
28 # define VPBROADCAST vpbroadcastd
29 # define VPCMPEQ vpcmpeqd
30 # else
31 # define VPBROADCAST vpbroadcastb
32 # define VPCMPEQ vpcmpeqb
33 # endif
34
35 # ifndef VZEROUPPER
36 # define VZEROUPPER vzeroupper
37 # endif
38
39 # define VEC_SIZE 32
40
41 .section .text.avx,"ax",@progbits
42 ENTRY (STRRCHR)
43 movd %esi, %xmm4
44 movl %edi, %ecx
45 /* Broadcast CHAR to YMM4. */
46 VPBROADCAST %xmm4, %ymm4
47 vpxor %ymm0, %ymm0, %ymm0
48
49 /* Check if we may cross page boundary with one vector load. */
50 andl $(2 * VEC_SIZE - 1), %ecx
51 cmpl $VEC_SIZE, %ecx
52 ja L(cros_page_boundary)
53
54 vmovdqu (%rdi), %ymm1
55 VPCMPEQ %ymm1, %ymm0, %ymm2
56 VPCMPEQ %ymm1, %ymm4, %ymm3
57 vpmovmskb %ymm2, %ecx
58 vpmovmskb %ymm3, %eax
59 addq $VEC_SIZE, %rdi
60
61 testl %eax, %eax
62 jnz L(first_vec)
63
64 testl %ecx, %ecx
65 jnz L(return_null)
66
67 andq $-VEC_SIZE, %rdi
68 xorl %edx, %edx
69 jmp L(aligned_loop)
70
71 .p2align 4
72 L(first_vec):
73 /* Check if there is a nul CHAR. */
74 testl %ecx, %ecx
75 jnz L(char_and_nul_in_first_vec)
76
77 /* Remember the match and keep searching. */
78 movl %eax, %edx
79 movq %rdi, %rsi
80 andq $-VEC_SIZE, %rdi
81 jmp L(aligned_loop)
82
83 .p2align 4
84 L(cros_page_boundary):
85 andl $(VEC_SIZE - 1), %ecx
86 andq $-VEC_SIZE, %rdi
87 vmovdqa (%rdi), %ymm1
88 VPCMPEQ %ymm1, %ymm0, %ymm2
89 VPCMPEQ %ymm1, %ymm4, %ymm3
90 vpmovmskb %ymm2, %edx
91 vpmovmskb %ymm3, %eax
92 shrl %cl, %edx
93 shrl %cl, %eax
94 addq $VEC_SIZE, %rdi
95
96 /* Check if there is a CHAR. */
97 testl %eax, %eax
98 jnz L(found_char)
99
100 testl %edx, %edx
101 jnz L(return_null)
102
103 jmp L(aligned_loop)
104
105 .p2align 4
106 L(found_char):
107 testl %edx, %edx
108 jnz L(char_and_nul)
109
110 /* Remember the match and keep searching. */
111 movl %eax, %edx
112 leaq (%rdi, %rcx), %rsi
113
114 .p2align 4
115 L(aligned_loop):
116 vmovdqa (%rdi), %ymm1
117 VPCMPEQ %ymm1, %ymm0, %ymm2
118 addq $VEC_SIZE, %rdi
119 VPCMPEQ %ymm1, %ymm4, %ymm3
120 vpmovmskb %ymm2, %ecx
121 vpmovmskb %ymm3, %eax
122 orl %eax, %ecx
123 jnz L(char_nor_null)
124
125 vmovdqa (%rdi), %ymm1
126 VPCMPEQ %ymm1, %ymm0, %ymm2
127 add $VEC_SIZE, %rdi
128 VPCMPEQ %ymm1, %ymm4, %ymm3
129 vpmovmskb %ymm2, %ecx
130 vpmovmskb %ymm3, %eax
131 orl %eax, %ecx
132 jnz L(char_nor_null)
133
134 vmovdqa (%rdi), %ymm1
135 VPCMPEQ %ymm1, %ymm0, %ymm2
136 addq $VEC_SIZE, %rdi
137 VPCMPEQ %ymm1, %ymm4, %ymm3
138 vpmovmskb %ymm2, %ecx
139 vpmovmskb %ymm3, %eax
140 orl %eax, %ecx
141 jnz L(char_nor_null)
142
143 vmovdqa (%rdi), %ymm1
144 VPCMPEQ %ymm1, %ymm0, %ymm2
145 addq $VEC_SIZE, %rdi
146 VPCMPEQ %ymm1, %ymm4, %ymm3
147 vpmovmskb %ymm2, %ecx
148 vpmovmskb %ymm3, %eax
149 orl %eax, %ecx
150 jz L(aligned_loop)
151
152 .p2align 4
153 L(char_nor_null):
154 /* Find a CHAR or a nul CHAR in a loop. */
155 testl %eax, %eax
156 jnz L(match)
157 L(return_value):
158 testl %edx, %edx
159 jz L(return_null)
160 movl %edx, %eax
161 movq %rsi, %rdi
162
163 # ifdef USE_AS_WCSRCHR
164 /* Keep the first bit for each matching CHAR for bsr. */
165 andl $0x11111111, %eax
166 # endif
167 bsrl %eax, %eax
168 leaq -VEC_SIZE(%rdi, %rax), %rax
169 VZEROUPPER
170 ret
171
172 .p2align 4
173 L(match):
174 /* Find a CHAR. Check if there is a nul CHAR. */
175 vpmovmskb %ymm2, %ecx
176 testl %ecx, %ecx
177 jnz L(find_nul)
178
179 /* Remember the match and keep searching. */
180 movl %eax, %edx
181 movq %rdi, %rsi
182 jmp L(aligned_loop)
183
184 .p2align 4
185 L(find_nul):
186 # ifdef USE_AS_WCSRCHR
187 /* Keep the first bit for each matching CHAR for bsr. */
188 andl $0x11111111, %ecx
189 andl $0x11111111, %eax
190 # endif
191 /* Mask out any matching bits after the nul CHAR. */
192 movl %ecx, %r8d
193 subl $1, %r8d
194 xorl %ecx, %r8d
195 andl %r8d, %eax
196 testl %eax, %eax
197 /* If there is no CHAR here, return the remembered one. */
198 jz L(return_value)
199 bsrl %eax, %eax
200 leaq -VEC_SIZE(%rdi, %rax), %rax
201 VZEROUPPER
202 ret
203
204 .p2align 4
205 L(char_and_nul):
206 /* Find both a CHAR and a nul CHAR. */
207 addq %rcx, %rdi
208 movl %edx, %ecx
209 L(char_and_nul_in_first_vec):
210 # ifdef USE_AS_WCSRCHR
211 /* Keep the first bit for each matching CHAR for bsr. */
212 andl $0x11111111, %ecx
213 andl $0x11111111, %eax
214 # endif
215 /* Mask out any matching bits after the nul CHAR. */
216 movl %ecx, %r8d
217 subl $1, %r8d
218 xorl %ecx, %r8d
219 andl %r8d, %eax
220 testl %eax, %eax
221 /* Return null pointer if the nul CHAR comes first. */
222 jz L(return_null)
223 bsrl %eax, %eax
224 leaq -VEC_SIZE(%rdi, %rax), %rax
225 VZEROUPPER
226 ret
227
228 .p2align 4
229 L(return_null):
230 xorl %eax, %eax
231 VZEROUPPER
232 ret
233
234 END (STRRCHR)
235 #endif