]>
Commit | Line | Data |
---|---|---|
a5f524e4 | 1 | /* strchr with SSE2 without bsf |
dff8da6b | 2 | Copyright (C) 2011-2024 Free Software Foundation, Inc. |
a5f524e4 LD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
a5f524e4 | 18 | |
ceabdcd1 NG |
19 | #include <isa-level.h> |
20 | ||
21 | /* NB: atom builds with ISA level == 1 so no reason to hold onto this | |
22 | at ISA level >= 2. */ | |
23 | #if ISA_SHOULD_BUILD (1) | |
a5f524e4 LD |
24 | |
25 | # include <sysdep.h> | |
26 | # include "asm-syntax.h" | |
27 | ||
ceaa0c5d | 28 | atom_text_section |
a5f524e4 LD |
29 | ENTRY (__strchr_sse2_no_bsf) |
30 | movd %esi, %xmm1 | |
31 | movq %rdi, %rcx | |
32 | punpcklbw %xmm1, %xmm1 | |
33 | andq $~15, %rdi | |
34 | pxor %xmm2, %xmm2 | |
35 | punpcklbw %xmm1, %xmm1 | |
36 | orl $0xffffffff, %esi | |
37 | movdqa (%rdi), %xmm0 | |
38 | pshufd $0, %xmm1, %xmm1 | |
39 | subq %rdi, %rcx | |
40 | movdqa %xmm0, %xmm3 | |
41 | leaq 16(%rdi), %rdi | |
42 | pcmpeqb %xmm1, %xmm0 | |
43 | pcmpeqb %xmm2, %xmm3 | |
44 | shl %cl, %esi | |
45 | pmovmskb %xmm0, %eax | |
46 | pmovmskb %xmm3, %edx | |
47 | andl %esi, %eax | |
48 | andl %esi, %edx | |
49 | test %eax, %eax | |
50 | jnz L(matches) | |
51 | test %edx, %edx | |
52 | jnz L(return_null) | |
53 | ||
54 | L(loop): | |
55 | movdqa (%rdi), %xmm0 | |
56 | leaq 16(%rdi), %rdi | |
57 | movdqa %xmm0, %xmm3 | |
58 | pcmpeqb %xmm1, %xmm0 | |
59 | pcmpeqb %xmm2, %xmm3 | |
60 | pmovmskb %xmm0, %eax | |
61 | pmovmskb %xmm3, %edx | |
62 | or %eax, %edx | |
63 | jz L(loop) | |
64 | ||
65 | pmovmskb %xmm3, %edx | |
66 | test %eax, %eax | |
67 | jnz L(matches) | |
68 | ||
69 | /* Return NULL. */ | |
70 | .p2align 4 | |
71 | L(return_null): | |
72 | xor %rax, %rax | |
73 | ret | |
74 | ||
75 | L(matches): | |
76 | /* There is a match. First find where NULL is. */ | |
77 | leaq -16(%rdi), %rdi | |
78 | test %edx, %edx | |
79 | jz L(match_case1) | |
80 | ||
81 | .p2align 4 | |
82 | L(match_case2): | |
83 | test %al, %al | |
84 | jz L(match_high_case2) | |
85 | ||
86 | mov %al, %cl | |
87 | and $15, %cl | |
88 | jnz L(match_case2_4) | |
89 | ||
90 | mov %dl, %ch | |
91 | and $15, %ch | |
92 | jnz L(return_null) | |
93 | ||
94 | test $0x10, %al | |
95 | jnz L(Exit5) | |
96 | test $0x10, %dl | |
97 | jnz L(return_null) | |
98 | test $0x20, %al | |
99 | jnz L(Exit6) | |
100 | test $0x20, %dl | |
101 | jnz L(return_null) | |
102 | test $0x40, %al | |
103 | jnz L(Exit7) | |
104 | test $0x40, %dl | |
105 | jnz L(return_null) | |
106 | lea 7(%rdi), %rax | |
107 | ret | |
108 | ||
109 | .p2align 4 | |
110 | L(match_case2_4): | |
111 | test $0x01, %al | |
112 | jnz L(Exit1) | |
113 | test $0x01, %dl | |
114 | jnz L(return_null) | |
115 | test $0x02, %al | |
116 | jnz L(Exit2) | |
117 | test $0x02, %dl | |
118 | jnz L(return_null) | |
119 | test $0x04, %al | |
120 | jnz L(Exit3) | |
121 | test $0x04, %dl | |
122 | jnz L(return_null) | |
123 | lea 3(%rdi), %rax | |
124 | ret | |
125 | ||
126 | .p2align 4 | |
127 | L(match_high_case2): | |
128 | test %dl, %dl | |
129 | jnz L(return_null) | |
130 | ||
131 | mov %ah, %cl | |
132 | and $15, %cl | |
133 | jnz L(match_case2_12) | |
134 | ||
135 | mov %dh, %ch | |
136 | and $15, %ch | |
137 | jnz L(return_null) | |
138 | ||
139 | test $0x10, %ah | |
140 | jnz L(Exit13) | |
141 | test $0x10, %dh | |
142 | jnz L(return_null) | |
143 | test $0x20, %ah | |
144 | jnz L(Exit14) | |
145 | test $0x20, %dh | |
146 | jnz L(return_null) | |
147 | test $0x40, %ah | |
148 | jnz L(Exit15) | |
149 | test $0x40, %dh | |
150 | jnz L(return_null) | |
151 | lea 15(%rdi), %rax | |
152 | ret | |
153 | ||
154 | .p2align 4 | |
155 | L(match_case2_12): | |
156 | test $0x01, %ah | |
157 | jnz L(Exit9) | |
158 | test $0x01, %dh | |
159 | jnz L(return_null) | |
160 | test $0x02, %ah | |
161 | jnz L(Exit10) | |
162 | test $0x02, %dh | |
163 | jnz L(return_null) | |
164 | test $0x04, %ah | |
165 | jnz L(Exit11) | |
166 | test $0x04, %dh | |
167 | jnz L(return_null) | |
168 | lea 11(%rdi), %rax | |
169 | ret | |
170 | ||
171 | .p2align 4 | |
172 | L(match_case1): | |
173 | test %al, %al | |
174 | jz L(match_high_case1) | |
175 | ||
176 | test $0x01, %al | |
177 | jnz L(Exit1) | |
178 | test $0x02, %al | |
179 | jnz L(Exit2) | |
180 | test $0x04, %al | |
181 | jnz L(Exit3) | |
182 | test $0x08, %al | |
183 | jnz L(Exit4) | |
184 | test $0x10, %al | |
185 | jnz L(Exit5) | |
186 | test $0x20, %al | |
187 | jnz L(Exit6) | |
188 | test $0x40, %al | |
189 | jnz L(Exit7) | |
190 | lea 7(%rdi), %rax | |
191 | ret | |
192 | ||
193 | .p2align 4 | |
194 | L(match_high_case1): | |
195 | test $0x01, %ah | |
196 | jnz L(Exit9) | |
197 | test $0x02, %ah | |
198 | jnz L(Exit10) | |
199 | test $0x04, %ah | |
200 | jnz L(Exit11) | |
201 | test $0x08, %ah | |
202 | jnz L(Exit12) | |
203 | test $0x10, %ah | |
204 | jnz L(Exit13) | |
205 | test $0x20, %ah | |
206 | jnz L(Exit14) | |
207 | test $0x40, %ah | |
208 | jnz L(Exit15) | |
209 | lea 15(%rdi), %rax | |
210 | ret | |
211 | ||
212 | .p2align 4 | |
213 | L(Exit1): | |
214 | lea (%rdi), %rax | |
215 | ret | |
216 | ||
217 | .p2align 4 | |
218 | L(Exit2): | |
219 | lea 1(%rdi), %rax | |
220 | ret | |
221 | ||
222 | .p2align 4 | |
223 | L(Exit3): | |
224 | lea 2(%rdi), %rax | |
225 | ret | |
226 | ||
227 | .p2align 4 | |
228 | L(Exit4): | |
229 | lea 3(%rdi), %rax | |
230 | ret | |
231 | ||
232 | .p2align 4 | |
233 | L(Exit5): | |
234 | lea 4(%rdi), %rax | |
235 | ret | |
236 | ||
237 | .p2align 4 | |
238 | L(Exit6): | |
239 | lea 5(%rdi), %rax | |
240 | ret | |
241 | ||
242 | .p2align 4 | |
243 | L(Exit7): | |
244 | lea 6(%rdi), %rax | |
245 | ret | |
246 | ||
247 | .p2align 4 | |
248 | L(Exit9): | |
249 | lea 8(%rdi), %rax | |
250 | ret | |
251 | ||
252 | .p2align 4 | |
253 | L(Exit10): | |
254 | lea 9(%rdi), %rax | |
255 | ret | |
256 | ||
257 | .p2align 4 | |
258 | L(Exit11): | |
259 | lea 10(%rdi), %rax | |
260 | ret | |
261 | ||
262 | .p2align 4 | |
263 | L(Exit12): | |
264 | lea 11(%rdi), %rax | |
265 | ret | |
266 | ||
267 | .p2align 4 | |
268 | L(Exit13): | |
269 | lea 12(%rdi), %rax | |
270 | ret | |
271 | ||
272 | .p2align 4 | |
273 | L(Exit14): | |
274 | lea 13(%rdi), %rax | |
275 | ret | |
276 | ||
277 | .p2align 4 | |
278 | L(Exit15): | |
279 | lea 14(%rdi), %rax | |
280 | ret | |
281 | ||
282 | END (__strchr_sse2_no_bsf) | |
283 | #endif |