]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / memchr-sse2-bsf.S
1 /* Optimized memchr with sse2
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21
22 # include <sysdep.h>
23
24 # define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28 # define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
33 # define POP(REG) popl REG; CFI_POP (REG)
34
35 # define PARMS 4
36 # define STR1 PARMS
37 # define STR2 STR1+4
38
39 # ifndef USE_AS_RAWMEMCHR
40 # define LEN STR2+4
41 # define RETURN POP(%edi); ret; CFI_PUSH(%edi);
42 # endif
43
44 # ifndef MEMCHR
45 # define MEMCHR __memchr_sse2_bsf
46 # endif
47
48 .text
49 ENTRY (MEMCHR)
50
51 mov STR1(%esp), %ecx
52 movd STR2(%esp), %xmm1
53
54 # ifndef USE_AS_RAWMEMCHR
55 mov LEN(%esp), %edx
56 test %edx, %edx
57 jz L(return_null_1)
58 # endif
59 mov %ecx, %eax
60
61 punpcklbw %xmm1, %xmm1
62 punpcklbw %xmm1, %xmm1
63
64 and $63, %ecx
65 pshufd $0, %xmm1, %xmm1
66
67 cmp $48, %ecx
68 ja L(crosscache)
69
70 movdqu (%eax), %xmm0
71 pcmpeqb %xmm1, %xmm0
72 /* Check if there is a match. */
73 pmovmskb %xmm0, %ecx
74 test %ecx, %ecx
75 je L(unaligned_no_match_1)
76 /* Check which byte is a match. */
77 bsf %ecx, %ecx
78
79 # ifndef USE_AS_RAWMEMCHR
80 sub %ecx, %edx
81 jbe L(return_null_1)
82 # endif
83 add %ecx, %eax
84 ret
85
86 .p2align 4
87 L(unaligned_no_match_1):
88 # ifndef USE_AS_RAWMEMCHR
89 sub $16, %edx
90 jbe L(return_null_1)
91 PUSH (%edi)
92 lea 16(%eax), %edi
93 and $15, %eax
94 and $-16, %edi
95 add %eax, %edx
96 # else
97 lea 16(%eax), %edx
98 and $-16, %edx
99 # endif
100 jmp L(loop_prolog)
101
102 .p2align 4
103 L(return_null_1):
104 xor %eax, %eax
105 ret
106
107 # ifndef USE_AS_RAWMEMCHR
108 CFI_POP (%edi)
109 # endif
110
111 .p2align 4
112 L(crosscache):
113 /* Handle unaligned string. */
114
115 # ifndef USE_AS_RAWMEMCHR
116 PUSH (%edi)
117 mov %eax, %edi
118 and $15, %ecx
119 and $-16, %edi
120 movdqa (%edi), %xmm0
121 # else
122 mov %eax, %edx
123 and $15, %ecx
124 and $-16, %edx
125 movdqa (%edx), %xmm0
126 # endif
127 pcmpeqb %xmm1, %xmm0
128 /* Check if there is a match. */
129 pmovmskb %xmm0, %eax
130 /* Remove the leading bytes. */
131 sar %cl, %eax
132 test %eax, %eax
133 je L(unaligned_no_match)
134 /* Check which byte is a match. */
135 bsf %eax, %eax
136
137 # ifndef USE_AS_RAWMEMCHR
138 sub %eax, %edx
139 jbe L(return_null)
140 add %edi, %eax
141 add %ecx, %eax
142 RETURN
143 # else
144 add %edx, %eax
145 add %ecx, %eax
146 ret
147 # endif
148
149 .p2align 4
150 L(unaligned_no_match):
151 # ifndef USE_AS_RAWMEMCHR
152 sub $16, %edx
153 add %ecx, %edx
154 jle L(return_null)
155 add $16, %edi
156 # else
157 add $16, %edx
158 # endif
159
160 .p2align 4
161 /* Loop start on aligned string. */
162 L(loop_prolog):
163 # ifndef USE_AS_RAWMEMCHR
164 sub $64, %edx
165 jbe L(exit_loop)
166 movdqa (%edi), %xmm0
167 # else
168 movdqa (%edx), %xmm0
169 # endif
170 pcmpeqb %xmm1, %xmm0
171 pmovmskb %xmm0, %eax
172 test %eax, %eax
173 jnz L(matches)
174
175 # ifndef USE_AS_RAWMEMCHR
176 movdqa 16(%edi), %xmm2
177 # else
178 movdqa 16(%edx), %xmm2
179 # endif
180 pcmpeqb %xmm1, %xmm2
181 pmovmskb %xmm2, %eax
182 test %eax, %eax
183 jnz L(matches16)
184
185 # ifndef USE_AS_RAWMEMCHR
186 movdqa 32(%edi), %xmm3
187 # else
188 movdqa 32(%edx), %xmm3
189 # endif
190 pcmpeqb %xmm1, %xmm3
191 pmovmskb %xmm3, %eax
192 test %eax, %eax
193 jnz L(matches32)
194
195 # ifndef USE_AS_RAWMEMCHR
196 movdqa 48(%edi), %xmm4
197 # else
198 movdqa 48(%edx), %xmm4
199 # endif
200 pcmpeqb %xmm1, %xmm4
201
202 # ifndef USE_AS_RAWMEMCHR
203 add $64, %edi
204 # else
205 add $64, %edx
206 # endif
207 pmovmskb %xmm4, %eax
208 test %eax, %eax
209 jnz L(matches0)
210
211 # ifndef USE_AS_RAWMEMCHR
212 test $0x3f, %edi
213 # else
214 test $0x3f, %edx
215 # endif
216 jz L(align64_loop)
217
218 # ifndef USE_AS_RAWMEMCHR
219 sub $64, %edx
220 jbe L(exit_loop)
221 movdqa (%edi), %xmm0
222 # else
223 movdqa (%edx), %xmm0
224 # endif
225 pcmpeqb %xmm1, %xmm0
226 pmovmskb %xmm0, %eax
227 test %eax, %eax
228 jnz L(matches)
229
230 # ifndef USE_AS_RAWMEMCHR
231 movdqa 16(%edi), %xmm2
232 # else
233 movdqa 16(%edx), %xmm2
234 # endif
235 pcmpeqb %xmm1, %xmm2
236 pmovmskb %xmm2, %eax
237 test %eax, %eax
238 jnz L(matches16)
239
240 # ifndef USE_AS_RAWMEMCHR
241 movdqa 32(%edi), %xmm3
242 # else
243 movdqa 32(%edx), %xmm3
244 # endif
245 pcmpeqb %xmm1, %xmm3
246 pmovmskb %xmm3, %eax
247 test %eax, %eax
248 jnz L(matches32)
249
250 # ifndef USE_AS_RAWMEMCHR
251 movdqa 48(%edi), %xmm3
252 # else
253 movdqa 48(%edx), %xmm3
254 # endif
255 pcmpeqb %xmm1, %xmm3
256 pmovmskb %xmm3, %eax
257
258 # ifndef USE_AS_RAWMEMCHR
259 add $64, %edi
260 # else
261 add $64, %edx
262 # endif
263 test %eax, %eax
264 jnz L(matches0)
265
266 # ifndef USE_AS_RAWMEMCHR
267 mov %edi, %ecx
268 and $-64, %edi
269 and $63, %ecx
270 add %ecx, %edx
271 # else
272 and $-64, %edx
273 # endif
274
275 .p2align 4
276 L(align64_loop):
277 # ifndef USE_AS_RAWMEMCHR
278 sub $64, %edx
279 jbe L(exit_loop)
280 movdqa (%edi), %xmm0
281 movdqa 16(%edi), %xmm2
282 movdqa 32(%edi), %xmm3
283 movdqa 48(%edi), %xmm4
284 # else
285 movdqa (%edx), %xmm0
286 movdqa 16(%edx), %xmm2
287 movdqa 32(%edx), %xmm3
288 movdqa 48(%edx), %xmm4
289 # endif
290 pcmpeqb %xmm1, %xmm0
291 pcmpeqb %xmm1, %xmm2
292 pcmpeqb %xmm1, %xmm3
293 pcmpeqb %xmm1, %xmm4
294
295 pmaxub %xmm0, %xmm3
296 pmaxub %xmm2, %xmm4
297 pmaxub %xmm3, %xmm4
298 pmovmskb %xmm4, %eax
299
300 # ifndef USE_AS_RAWMEMCHR
301 add $64, %edi
302 # else
303 add $64, %edx
304 # endif
305
306 test %eax, %eax
307 jz L(align64_loop)
308
309 # ifndef USE_AS_RAWMEMCHR
310 sub $64, %edi
311 # else
312 sub $64, %edx
313 # endif
314
315 pmovmskb %xmm0, %eax
316 test %eax, %eax
317 jnz L(matches)
318
319 pmovmskb %xmm2, %eax
320 test %eax, %eax
321 jnz L(matches16)
322
323 # ifndef USE_AS_RAWMEMCHR
324 movdqa 32(%edi), %xmm3
325 # else
326 movdqa 32(%edx), %xmm3
327 # endif
328
329 pcmpeqb %xmm1, %xmm3
330
331 # ifndef USE_AS_RAWMEMCHR
332 pcmpeqb 48(%edi), %xmm1
333 # else
334 pcmpeqb 48(%edx), %xmm1
335 # endif
336 pmovmskb %xmm3, %eax
337 test %eax, %eax
338 jnz L(matches32)
339
340 pmovmskb %xmm1, %eax
341 bsf %eax, %eax
342
343 # ifndef USE_AS_RAWMEMCHR
344 lea 48(%edi, %eax), %eax
345 RETURN
346 # else
347 lea 48(%edx, %eax), %eax
348 ret
349 # endif
350
351 # ifndef USE_AS_RAWMEMCHR
352 .p2align 4
353 L(exit_loop):
354 add $64, %edx
355 cmp $32, %edx
356 jbe L(exit_loop_32)
357
358 movdqa (%edi), %xmm0
359 pcmpeqb %xmm1, %xmm0
360 pmovmskb %xmm0, %eax
361 test %eax, %eax
362 jnz L(matches)
363
364 movdqa 16(%edi), %xmm2
365 pcmpeqb %xmm1, %xmm2
366 pmovmskb %xmm2, %eax
367 test %eax, %eax
368 jnz L(matches16)
369
370 movdqa 32(%edi), %xmm3
371 pcmpeqb %xmm1, %xmm3
372 pmovmskb %xmm3, %eax
373 test %eax, %eax
374 jnz L(matches32_1)
375 cmp $48, %edx
376 jbe L(return_null)
377
378 pcmpeqb 48(%edi), %xmm1
379 pmovmskb %xmm1, %eax
380 test %eax, %eax
381 jnz L(matches48_1)
382 xor %eax, %eax
383 RETURN
384
385 .p2align 4
386 L(exit_loop_32):
387 movdqa (%edi), %xmm0
388 pcmpeqb %xmm1, %xmm0
389 pmovmskb %xmm0, %eax
390 test %eax, %eax
391 jnz L(matches_1)
392 cmp $16, %edx
393 jbe L(return_null)
394
395 pcmpeqb 16(%edi), %xmm1
396 pmovmskb %xmm1, %eax
397 test %eax, %eax
398 jnz L(matches16_1)
399 xor %eax, %eax
400 RETURN
401 # endif
402 .p2align 4
403 L(matches0):
404 bsf %eax, %eax
405 # ifndef USE_AS_RAWMEMCHR
406 lea -16(%eax, %edi), %eax
407 RETURN
408 # else
409 lea -16(%eax, %edx), %eax
410 ret
411 # endif
412
413 .p2align 4
414 L(matches):
415 bsf %eax, %eax
416 # ifndef USE_AS_RAWMEMCHR
417 add %edi, %eax
418 RETURN
419 # else
420 add %edx, %eax
421 ret
422 # endif
423
424 .p2align 4
425 L(matches16):
426 bsf %eax, %eax
427 # ifndef USE_AS_RAWMEMCHR
428 lea 16(%eax, %edi), %eax
429 RETURN
430 # else
431 lea 16(%eax, %edx), %eax
432 ret
433 # endif
434
435 .p2align 4
436 L(matches32):
437 bsf %eax, %eax
438 # ifndef USE_AS_RAWMEMCHR
439 lea 32(%eax, %edi), %eax
440 RETURN
441 # else
442 lea 32(%eax, %edx), %eax
443 ret
444 # endif
445
446 # ifndef USE_AS_RAWMEMCHR
447 .p2align 4
448 L(matches_1):
449 bsf %eax, %eax
450 sub %eax, %edx
451 jbe L(return_null)
452
453 add %edi, %eax
454 RETURN
455
456 .p2align 4
457 L(matches16_1):
458 sub $16, %edx
459 bsf %eax, %eax
460 sub %eax, %edx
461 jbe L(return_null)
462
463 lea 16(%edi, %eax), %eax
464 RETURN
465
466 .p2align 4
467 L(matches32_1):
468 sub $32, %edx
469 bsf %eax, %eax
470 sub %eax, %edx
471 jbe L(return_null)
472
473 lea 32(%edi, %eax), %eax
474 RETURN
475
476 .p2align 4
477 L(matches48_1):
478 sub $48, %edx
479 bsf %eax, %eax
480 sub %eax, %edx
481 jbe L(return_null)
482
483 lea 48(%edi, %eax), %eax
484 RETURN
485 # endif
486 .p2align 4
487 L(return_null):
488 xor %eax, %eax
489 # ifndef USE_AS_RAWMEMCHR
490 RETURN
491 # else
492 ret
493 # endif
494
495 END (MEMCHR)
496 #endif