]>
Commit | Line | Data |
---|---|---|
693fb948 | 1 | /* strchr SSE2 without bsf |
f7a9f785 | 2 | Copyright (C) 2011-2016 Free Software Foundation, Inc. |
693fb948 LD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
693fb948 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
693fb948 LD |
21 | |
22 | # include <sysdep.h> | |
23 | ||
24 | # define CFI_PUSH(REG) \ | |
25 | cfi_adjust_cfa_offset (4); \ | |
26 | cfi_rel_offset (REG, 0) | |
27 | ||
28 | # define CFI_POP(REG) \ | |
29 | cfi_adjust_cfa_offset (-4); \ | |
30 | cfi_restore (REG) | |
31 | ||
32 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
33 | # define POP(REG) popl REG; CFI_POP (REG) | |
34 | ||
35 | # define PARMS 8 | |
36 | # define ENTRANCE PUSH(%edi) | |
37 | # define RETURN POP(%edi); ret; CFI_PUSH(%edi); | |
38 | ||
39 | # define STR1 PARMS | |
40 | # define STR2 STR1+4 | |
41 | ||
48882a1a | 42 | atom_text_section |
693fb948 LD |
43 | ENTRY (__strchr_sse2) |
44 | ||
45 | ENTRANCE | |
46 | mov STR1(%esp), %ecx | |
47 | movd STR2(%esp), %xmm1 | |
48 | ||
49 | pxor %xmm2, %xmm2 | |
50 | mov %ecx, %edi | |
51 | punpcklbw %xmm1, %xmm1 | |
52 | punpcklbw %xmm1, %xmm1 | |
53 | /* ECX has OFFSET. */ | |
54 | and $15, %ecx | |
55 | pshufd $0, %xmm1, %xmm1 | |
56 | je L(loop) | |
57 | ||
58 | /* Handle unaligned string. */ | |
59 | and $-16, %edi | |
60 | movdqa (%edi), %xmm0 | |
61 | pcmpeqb %xmm0, %xmm2 | |
62 | pcmpeqb %xmm1, %xmm0 | |
63 | /* Find where NULL is. */ | |
64 | pmovmskb %xmm2, %edx | |
65 | /* Check if there is a match. */ | |
66 | pmovmskb %xmm0, %eax | |
67 | /* Remove the leading bytes. */ | |
68 | sarl %cl, %edx | |
69 | sarl %cl, %eax | |
70 | test %eax, %eax | |
71 | jz L(unaligned_no_match) | |
72 | /* Check which byte is a match. */ | |
73 | /* Is there a NULL? */ | |
74 | add %ecx, %edi | |
75 | test %edx, %edx | |
76 | jz L(match_case1) | |
77 | jmp L(match_case2) | |
78 | ||
79 | .p2align 4 | |
80 | L(unaligned_no_match): | |
81 | test %edx, %edx | |
82 | jne L(return_null) | |
83 | ||
84 | pxor %xmm2, %xmm2 | |
85 | add $16, %edi | |
86 | ||
87 | .p2align 4 | |
88 | /* Loop start on aligned string. */ | |
89 | L(loop): | |
90 | movdqa (%edi), %xmm0 | |
91 | pcmpeqb %xmm0, %xmm2 | |
92 | pcmpeqb %xmm1, %xmm0 | |
93 | pmovmskb %xmm2, %edx | |
94 | pmovmskb %xmm0, %eax | |
95 | test %eax, %eax | |
96 | jnz L(matches) | |
97 | test %edx, %edx | |
98 | jnz L(return_null) | |
99 | add $16, %edi | |
100 | ||
101 | movdqa (%edi), %xmm0 | |
102 | pcmpeqb %xmm0, %xmm2 | |
103 | pcmpeqb %xmm1, %xmm0 | |
104 | pmovmskb %xmm2, %edx | |
105 | pmovmskb %xmm0, %eax | |
106 | test %eax, %eax | |
107 | jnz L(matches) | |
108 | test %edx, %edx | |
109 | jnz L(return_null) | |
110 | add $16, %edi | |
111 | ||
112 | movdqa (%edi), %xmm0 | |
113 | pcmpeqb %xmm0, %xmm2 | |
114 | pcmpeqb %xmm1, %xmm0 | |
115 | pmovmskb %xmm2, %edx | |
116 | pmovmskb %xmm0, %eax | |
117 | test %eax, %eax | |
118 | jnz L(matches) | |
119 | test %edx, %edx | |
120 | jnz L(return_null) | |
121 | add $16, %edi | |
122 | ||
123 | movdqa (%edi), %xmm0 | |
124 | pcmpeqb %xmm0, %xmm2 | |
125 | pcmpeqb %xmm1, %xmm0 | |
126 | pmovmskb %xmm2, %edx | |
127 | pmovmskb %xmm0, %eax | |
128 | test %eax, %eax | |
129 | jnz L(matches) | |
130 | test %edx, %edx | |
131 | jnz L(return_null) | |
132 | add $16, %edi | |
133 | jmp L(loop) | |
134 | ||
135 | L(matches): | |
136 | /* There is a match. First find where NULL is. */ | |
137 | test %edx, %edx | |
138 | jz L(match_case1) | |
139 | ||
140 | .p2align 4 | |
141 | L(match_case2): | |
142 | test %al, %al | |
143 | jz L(match_higth_case2) | |
144 | ||
145 | mov %al, %cl | |
146 | and $15, %cl | |
147 | jnz L(match_case2_4) | |
148 | ||
149 | mov %dl, %ch | |
150 | and $15, %ch | |
151 | jnz L(return_null) | |
152 | ||
153 | test $0x10, %al | |
154 | jnz L(Exit5) | |
155 | test $0x10, %dl | |
156 | jnz L(return_null) | |
157 | test $0x20, %al | |
158 | jnz L(Exit6) | |
159 | test $0x20, %dl | |
160 | jnz L(return_null) | |
161 | test $0x40, %al | |
162 | jnz L(Exit7) | |
163 | test $0x40, %dl | |
164 | jnz L(return_null) | |
165 | lea 7(%edi), %eax | |
166 | RETURN | |
167 | ||
168 | .p2align 4 | |
169 | L(match_case2_4): | |
170 | test $0x01, %al | |
171 | jnz L(Exit1) | |
172 | test $0x01, %dl | |
173 | jnz L(return_null) | |
174 | test $0x02, %al | |
175 | jnz L(Exit2) | |
176 | test $0x02, %dl | |
177 | jnz L(return_null) | |
178 | test $0x04, %al | |
179 | jnz L(Exit3) | |
180 | test $0x04, %dl | |
181 | jnz L(return_null) | |
182 | lea 3(%edi), %eax | |
183 | RETURN | |
184 | ||
185 | .p2align 4 | |
186 | L(match_higth_case2): | |
187 | test %dl, %dl | |
188 | jnz L(return_null) | |
189 | ||
190 | mov %ah, %cl | |
191 | and $15, %cl | |
192 | jnz L(match_case2_12) | |
193 | ||
194 | mov %dh, %ch | |
195 | and $15, %ch | |
196 | jnz L(return_null) | |
197 | ||
198 | test $0x10, %ah | |
199 | jnz L(Exit13) | |
200 | test $0x10, %dh | |
201 | jnz L(return_null) | |
202 | test $0x20, %ah | |
203 | jnz L(Exit14) | |
204 | test $0x20, %dh | |
205 | jnz L(return_null) | |
206 | test $0x40, %ah | |
207 | jnz L(Exit15) | |
208 | test $0x40, %dh | |
209 | jnz L(return_null) | |
210 | lea 15(%edi), %eax | |
211 | RETURN | |
212 | ||
213 | .p2align 4 | |
214 | L(match_case2_12): | |
215 | test $0x01, %ah | |
216 | jnz L(Exit9) | |
217 | test $0x01, %dh | |
218 | jnz L(return_null) | |
219 | test $0x02, %ah | |
220 | jnz L(Exit10) | |
221 | test $0x02, %dh | |
222 | jnz L(return_null) | |
223 | test $0x04, %ah | |
224 | jnz L(Exit11) | |
225 | test $0x04, %dh | |
226 | jnz L(return_null) | |
227 | lea 11(%edi), %eax | |
228 | RETURN | |
229 | ||
230 | .p2align 4 | |
231 | L(match_case1): | |
232 | test %al, %al | |
233 | jz L(match_higth_case1) | |
234 | ||
235 | test $0x01, %al | |
236 | jnz L(Exit1) | |
237 | test $0x02, %al | |
238 | jnz L(Exit2) | |
239 | test $0x04, %al | |
240 | jnz L(Exit3) | |
241 | test $0x08, %al | |
242 | jnz L(Exit4) | |
243 | test $0x10, %al | |
244 | jnz L(Exit5) | |
245 | test $0x20, %al | |
246 | jnz L(Exit6) | |
247 | test $0x40, %al | |
248 | jnz L(Exit7) | |
249 | lea 7(%edi), %eax | |
250 | RETURN | |
251 | ||
252 | .p2align 4 | |
253 | L(match_higth_case1): | |
254 | test $0x01, %ah | |
255 | jnz L(Exit9) | |
256 | test $0x02, %ah | |
257 | jnz L(Exit10) | |
258 | test $0x04, %ah | |
259 | jnz L(Exit11) | |
260 | test $0x08, %ah | |
261 | jnz L(Exit12) | |
262 | test $0x10, %ah | |
263 | jnz L(Exit13) | |
264 | test $0x20, %ah | |
265 | jnz L(Exit14) | |
266 | test $0x40, %ah | |
267 | jnz L(Exit15) | |
268 | lea 15(%edi), %eax | |
269 | RETURN | |
270 | ||
271 | .p2align 4 | |
272 | L(Exit1): | |
273 | lea (%edi), %eax | |
274 | RETURN | |
275 | ||
276 | .p2align 4 | |
277 | L(Exit2): | |
278 | lea 1(%edi), %eax | |
279 | RETURN | |
280 | ||
281 | .p2align 4 | |
282 | L(Exit3): | |
283 | lea 2(%edi), %eax | |
284 | RETURN | |
285 | ||
286 | .p2align 4 | |
287 | L(Exit4): | |
288 | lea 3(%edi), %eax | |
289 | RETURN | |
290 | ||
291 | .p2align 4 | |
292 | L(Exit5): | |
293 | lea 4(%edi), %eax | |
294 | RETURN | |
295 | ||
296 | .p2align 4 | |
297 | L(Exit6): | |
298 | lea 5(%edi), %eax | |
299 | RETURN | |
300 | ||
301 | .p2align 4 | |
302 | L(Exit7): | |
303 | lea 6(%edi), %eax | |
304 | RETURN | |
305 | ||
306 | .p2align 4 | |
307 | L(Exit9): | |
308 | lea 8(%edi), %eax | |
309 | RETURN | |
310 | ||
311 | .p2align 4 | |
312 | L(Exit10): | |
313 | lea 9(%edi), %eax | |
314 | RETURN | |
315 | ||
316 | .p2align 4 | |
317 | L(Exit11): | |
318 | lea 10(%edi), %eax | |
319 | RETURN | |
320 | ||
321 | .p2align 4 | |
322 | L(Exit12): | |
323 | lea 11(%edi), %eax | |
324 | RETURN | |
325 | ||
326 | .p2align 4 | |
327 | L(Exit13): | |
328 | lea 12(%edi), %eax | |
329 | RETURN | |
330 | ||
331 | .p2align 4 | |
332 | L(Exit14): | |
333 | lea 13(%edi), %eax | |
334 | RETURN | |
335 | ||
336 | .p2align 4 | |
337 | L(Exit15): | |
338 | lea 14(%edi), %eax | |
339 | RETURN | |
340 | ||
341 | /* Return NULL. */ | |
342 | .p2align 4 | |
343 | L(return_null): | |
344 | xor %eax, %eax | |
345 | RETURN | |
346 | ||
347 | END (__strchr_sse2) | |
348 | #endif |