]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strchr-sse2.S
d1abeaa8e1f4abdc2c6227c713f1a64c69a3af0f
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strchr-sse2.S
1 /* strchr SSE2 without bsf
2 Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21
22 # include <sysdep.h>
23
24 # define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28 # define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
33 # define POP(REG) popl REG; CFI_POP (REG)
34
35 # define PARMS 8
36 # define ENTRANCE PUSH(%edi)
37 # define RETURN POP(%edi); ret; CFI_PUSH(%edi);
38
39 # define STR1 PARMS
40 # define STR2 STR1+4
41
42 atom_text_section
43 ENTRY (__strchr_sse2)
44
45 ENTRANCE
46 mov STR1(%esp), %ecx
47 movd STR2(%esp), %xmm1
48
49 pxor %xmm2, %xmm2
50 mov %ecx, %edi
51 punpcklbw %xmm1, %xmm1
52 punpcklbw %xmm1, %xmm1
53 /* ECX has OFFSET. */
54 and $15, %ecx
55 pshufd $0, %xmm1, %xmm1
56 je L(loop)
57
58 /* Handle unaligned string. */
59 and $-16, %edi
60 movdqa (%edi), %xmm0
61 pcmpeqb %xmm0, %xmm2
62 pcmpeqb %xmm1, %xmm0
63 /* Find where NULL is. */
64 pmovmskb %xmm2, %edx
65 /* Check if there is a match. */
66 pmovmskb %xmm0, %eax
67 /* Remove the leading bytes. */
68 sarl %cl, %edx
69 sarl %cl, %eax
70 test %eax, %eax
71 jz L(unaligned_no_match)
72 /* Check which byte is a match. */
73 /* Is there a NULL? */
74 add %ecx, %edi
75 test %edx, %edx
76 jz L(match_case1)
77 jmp L(match_case2)
78
79 .p2align 4
80 L(unaligned_no_match):
81 test %edx, %edx
82 jne L(return_null)
83
84 pxor %xmm2, %xmm2
85 add $16, %edi
86
87 .p2align 4
88 /* Loop start on aligned string. */
89 L(loop):
90 movdqa (%edi), %xmm0
91 pcmpeqb %xmm0, %xmm2
92 pcmpeqb %xmm1, %xmm0
93 pmovmskb %xmm2, %edx
94 pmovmskb %xmm0, %eax
95 test %eax, %eax
96 jnz L(matches)
97 test %edx, %edx
98 jnz L(return_null)
99 add $16, %edi
100
101 movdqa (%edi), %xmm0
102 pcmpeqb %xmm0, %xmm2
103 pcmpeqb %xmm1, %xmm0
104 pmovmskb %xmm2, %edx
105 pmovmskb %xmm0, %eax
106 test %eax, %eax
107 jnz L(matches)
108 test %edx, %edx
109 jnz L(return_null)
110 add $16, %edi
111
112 movdqa (%edi), %xmm0
113 pcmpeqb %xmm0, %xmm2
114 pcmpeqb %xmm1, %xmm0
115 pmovmskb %xmm2, %edx
116 pmovmskb %xmm0, %eax
117 test %eax, %eax
118 jnz L(matches)
119 test %edx, %edx
120 jnz L(return_null)
121 add $16, %edi
122
123 movdqa (%edi), %xmm0
124 pcmpeqb %xmm0, %xmm2
125 pcmpeqb %xmm1, %xmm0
126 pmovmskb %xmm2, %edx
127 pmovmskb %xmm0, %eax
128 test %eax, %eax
129 jnz L(matches)
130 test %edx, %edx
131 jnz L(return_null)
132 add $16, %edi
133 jmp L(loop)
134
135 L(matches):
136 /* There is a match. First find where NULL is. */
137 test %edx, %edx
138 jz L(match_case1)
139
140 .p2align 4
141 L(match_case2):
142 test %al, %al
143 jz L(match_higth_case2)
144
145 mov %al, %cl
146 and $15, %cl
147 jnz L(match_case2_4)
148
149 mov %dl, %ch
150 and $15, %ch
151 jnz L(return_null)
152
153 test $0x10, %al
154 jnz L(Exit5)
155 test $0x10, %dl
156 jnz L(return_null)
157 test $0x20, %al
158 jnz L(Exit6)
159 test $0x20, %dl
160 jnz L(return_null)
161 test $0x40, %al
162 jnz L(Exit7)
163 test $0x40, %dl
164 jnz L(return_null)
165 lea 7(%edi), %eax
166 RETURN
167
168 .p2align 4
169 L(match_case2_4):
170 test $0x01, %al
171 jnz L(Exit1)
172 test $0x01, %dl
173 jnz L(return_null)
174 test $0x02, %al
175 jnz L(Exit2)
176 test $0x02, %dl
177 jnz L(return_null)
178 test $0x04, %al
179 jnz L(Exit3)
180 test $0x04, %dl
181 jnz L(return_null)
182 lea 3(%edi), %eax
183 RETURN
184
185 .p2align 4
186 L(match_higth_case2):
187 test %dl, %dl
188 jnz L(return_null)
189
190 mov %ah, %cl
191 and $15, %cl
192 jnz L(match_case2_12)
193
194 mov %dh, %ch
195 and $15, %ch
196 jnz L(return_null)
197
198 test $0x10, %ah
199 jnz L(Exit13)
200 test $0x10, %dh
201 jnz L(return_null)
202 test $0x20, %ah
203 jnz L(Exit14)
204 test $0x20, %dh
205 jnz L(return_null)
206 test $0x40, %ah
207 jnz L(Exit15)
208 test $0x40, %dh
209 jnz L(return_null)
210 lea 15(%edi), %eax
211 RETURN
212
213 .p2align 4
214 L(match_case2_12):
215 test $0x01, %ah
216 jnz L(Exit9)
217 test $0x01, %dh
218 jnz L(return_null)
219 test $0x02, %ah
220 jnz L(Exit10)
221 test $0x02, %dh
222 jnz L(return_null)
223 test $0x04, %ah
224 jnz L(Exit11)
225 test $0x04, %dh
226 jnz L(return_null)
227 lea 11(%edi), %eax
228 RETURN
229
230 .p2align 4
231 L(match_case1):
232 test %al, %al
233 jz L(match_higth_case1)
234
235 test $0x01, %al
236 jnz L(Exit1)
237 test $0x02, %al
238 jnz L(Exit2)
239 test $0x04, %al
240 jnz L(Exit3)
241 test $0x08, %al
242 jnz L(Exit4)
243 test $0x10, %al
244 jnz L(Exit5)
245 test $0x20, %al
246 jnz L(Exit6)
247 test $0x40, %al
248 jnz L(Exit7)
249 lea 7(%edi), %eax
250 RETURN
251
252 .p2align 4
253 L(match_higth_case1):
254 test $0x01, %ah
255 jnz L(Exit9)
256 test $0x02, %ah
257 jnz L(Exit10)
258 test $0x04, %ah
259 jnz L(Exit11)
260 test $0x08, %ah
261 jnz L(Exit12)
262 test $0x10, %ah
263 jnz L(Exit13)
264 test $0x20, %ah
265 jnz L(Exit14)
266 test $0x40, %ah
267 jnz L(Exit15)
268 lea 15(%edi), %eax
269 RETURN
270
271 .p2align 4
272 L(Exit1):
273 lea (%edi), %eax
274 RETURN
275
276 .p2align 4
277 L(Exit2):
278 lea 1(%edi), %eax
279 RETURN
280
281 .p2align 4
282 L(Exit3):
283 lea 2(%edi), %eax
284 RETURN
285
286 .p2align 4
287 L(Exit4):
288 lea 3(%edi), %eax
289 RETURN
290
291 .p2align 4
292 L(Exit5):
293 lea 4(%edi), %eax
294 RETURN
295
296 .p2align 4
297 L(Exit6):
298 lea 5(%edi), %eax
299 RETURN
300
301 .p2align 4
302 L(Exit7):
303 lea 6(%edi), %eax
304 RETURN
305
306 .p2align 4
307 L(Exit9):
308 lea 8(%edi), %eax
309 RETURN
310
311 .p2align 4
312 L(Exit10):
313 lea 9(%edi), %eax
314 RETURN
315
316 .p2align 4
317 L(Exit11):
318 lea 10(%edi), %eax
319 RETURN
320
321 .p2align 4
322 L(Exit12):
323 lea 11(%edi), %eax
324 RETURN
325
326 .p2align 4
327 L(Exit13):
328 lea 12(%edi), %eax
329 RETURN
330
331 .p2align 4
332 L(Exit14):
333 lea 13(%edi), %eax
334 RETURN
335
336 .p2align 4
337 L(Exit15):
338 lea 14(%edi), %eax
339 RETURN
340
341 /* Return NULL. */
342 .p2align 4
343 L(return_null):
344 xor %eax, %eax
345 RETURN
346
347 END (__strchr_sse2)
348 #endif