]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / memchr-sse2-bsf.S
1 /* Optimized memchr with sse2
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21
22 # include <sysdep.h>
23
24 # define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28 # define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
33 # define POP(REG) popl REG; CFI_POP (REG)
34
35 # define PARMS 4
36 # define STR1 PARMS
37 # define STR2 STR1+4
38
39 # ifndef USE_AS_RAWMEMCHR
40 # define LEN STR2+4
41 # define RETURN POP(%edi); ret; CFI_PUSH(%edi);
42 # endif
43
44 # ifndef MEMCHR
45 # define MEMCHR __memchr_sse2_bsf
46 # endif
47
48 .text
49 ENTRY (MEMCHR)
50
51 mov STR1(%esp), %ecx
52 movd STR2(%esp), %xmm1
53
54 # ifndef USE_AS_RAWMEMCHR
55 mov LEN(%esp), %edx
56 test %edx, %edx
57 jz L(return_null_1)
58 # endif
59 mov %ecx, %eax
60
61 punpcklbw %xmm1, %xmm1
62 punpcklbw %xmm1, %xmm1
63
64 and $63, %ecx
65 pshufd $0, %xmm1, %xmm1
66
67 cmp $48, %ecx
68 ja L(crosscache)
69
70 movdqu (%eax), %xmm0
71 pcmpeqb %xmm1, %xmm0
72 /* Check if there is a match. */
73 pmovmskb %xmm0, %ecx
74 test %ecx, %ecx
75 je L(unaligned_no_match_1)
76 /* Check which byte is a match. */
77 bsf %ecx, %ecx
78
79 # ifndef USE_AS_RAWMEMCHR
80 sub %ecx, %edx
81 jbe L(return_null_1)
82 # endif
83 add %ecx, %eax
84 ret
85
86 .p2align 4
87 L(unaligned_no_match_1):
88 # ifndef USE_AS_RAWMEMCHR
89 sub $16, %edx
90 jbe L(return_null_1)
91 PUSH (%edi)
92 lea 16(%eax), %edi
93 and $15, %eax
94 and $-16, %edi
95 add %eax, %edx
96 # else
97 lea 16(%eax), %edx
98 and $-16, %edx
99 # endif
100 jmp L(loop_prolog)
101
102 .p2align 4
103 L(return_null_1):
104 xor %eax, %eax
105 ret
106
107 # ifndef USE_AS_RAWMEMCHR
108 CFI_POP (%edi)
109 # endif
110
111 .p2align 4
112 L(crosscache):
113 /* Handle unaligned string. */
114
115 # ifndef USE_AS_RAWMEMCHR
116 PUSH (%edi)
117 mov %eax, %edi
118 and $15, %ecx
119 and $-16, %edi
120 movdqa (%edi), %xmm0
121 # else
122 mov %eax, %edx
123 and $15, %ecx
124 and $-16, %edx
125 movdqa (%edx), %xmm0
126 # endif
127 pcmpeqb %xmm1, %xmm0
128 /* Check if there is a match. */
129 pmovmskb %xmm0, %eax
130 /* Remove the leading bytes. */
131 sar %cl, %eax
132 test %eax, %eax
133 je L(unaligned_no_match)
134 /* Check which byte is a match. */
135 bsf %eax, %eax
136
137 # ifndef USE_AS_RAWMEMCHR
138 sub %eax, %edx
139 jbe L(return_null)
140 add %edi, %eax
141 add %ecx, %eax
142 RETURN
143 # else
144 add %edx, %eax
145 add %ecx, %eax
146 ret
147 # endif
148
149 .p2align 4
150 L(unaligned_no_match):
151 # ifndef USE_AS_RAWMEMCHR
152 /* Calculate the last acceptable address and check for possible
153 addition overflow by using satured math:
154 edx = ecx + edx
155 edx |= -(edx < ecx) */
156 add %ecx, %edx
157 sbb %eax, %eax
158 or %eax, %edx
159 sub $16, %edx
160 jbe L(return_null)
161 add $16, %edi
162 # else
163 add $16, %edx
164 # endif
165
166 .p2align 4
167 /* Loop start on aligned string. */
168 L(loop_prolog):
169 # ifndef USE_AS_RAWMEMCHR
170 sub $64, %edx
171 jbe L(exit_loop)
172 movdqa (%edi), %xmm0
173 # else
174 movdqa (%edx), %xmm0
175 # endif
176 pcmpeqb %xmm1, %xmm0
177 pmovmskb %xmm0, %eax
178 test %eax, %eax
179 jnz L(matches)
180
181 # ifndef USE_AS_RAWMEMCHR
182 movdqa 16(%edi), %xmm2
183 # else
184 movdqa 16(%edx), %xmm2
185 # endif
186 pcmpeqb %xmm1, %xmm2
187 pmovmskb %xmm2, %eax
188 test %eax, %eax
189 jnz L(matches16)
190
191 # ifndef USE_AS_RAWMEMCHR
192 movdqa 32(%edi), %xmm3
193 # else
194 movdqa 32(%edx), %xmm3
195 # endif
196 pcmpeqb %xmm1, %xmm3
197 pmovmskb %xmm3, %eax
198 test %eax, %eax
199 jnz L(matches32)
200
201 # ifndef USE_AS_RAWMEMCHR
202 movdqa 48(%edi), %xmm4
203 # else
204 movdqa 48(%edx), %xmm4
205 # endif
206 pcmpeqb %xmm1, %xmm4
207
208 # ifndef USE_AS_RAWMEMCHR
209 add $64, %edi
210 # else
211 add $64, %edx
212 # endif
213 pmovmskb %xmm4, %eax
214 test %eax, %eax
215 jnz L(matches0)
216
217 # ifndef USE_AS_RAWMEMCHR
218 test $0x3f, %edi
219 # else
220 test $0x3f, %edx
221 # endif
222 jz L(align64_loop)
223
224 # ifndef USE_AS_RAWMEMCHR
225 sub $64, %edx
226 jbe L(exit_loop)
227 movdqa (%edi), %xmm0
228 # else
229 movdqa (%edx), %xmm0
230 # endif
231 pcmpeqb %xmm1, %xmm0
232 pmovmskb %xmm0, %eax
233 test %eax, %eax
234 jnz L(matches)
235
236 # ifndef USE_AS_RAWMEMCHR
237 movdqa 16(%edi), %xmm2
238 # else
239 movdqa 16(%edx), %xmm2
240 # endif
241 pcmpeqb %xmm1, %xmm2
242 pmovmskb %xmm2, %eax
243 test %eax, %eax
244 jnz L(matches16)
245
246 # ifndef USE_AS_RAWMEMCHR
247 movdqa 32(%edi), %xmm3
248 # else
249 movdqa 32(%edx), %xmm3
250 # endif
251 pcmpeqb %xmm1, %xmm3
252 pmovmskb %xmm3, %eax
253 test %eax, %eax
254 jnz L(matches32)
255
256 # ifndef USE_AS_RAWMEMCHR
257 movdqa 48(%edi), %xmm3
258 # else
259 movdqa 48(%edx), %xmm3
260 # endif
261 pcmpeqb %xmm1, %xmm3
262 pmovmskb %xmm3, %eax
263
264 # ifndef USE_AS_RAWMEMCHR
265 add $64, %edi
266 # else
267 add $64, %edx
268 # endif
269 test %eax, %eax
270 jnz L(matches0)
271
272 # ifndef USE_AS_RAWMEMCHR
273 mov %edi, %ecx
274 and $-64, %edi
275 and $63, %ecx
276 add %ecx, %edx
277 # else
278 and $-64, %edx
279 # endif
280
281 .p2align 4
282 L(align64_loop):
283 # ifndef USE_AS_RAWMEMCHR
284 sub $64, %edx
285 jbe L(exit_loop)
286 movdqa (%edi), %xmm0
287 movdqa 16(%edi), %xmm2
288 movdqa 32(%edi), %xmm3
289 movdqa 48(%edi), %xmm4
290 # else
291 movdqa (%edx), %xmm0
292 movdqa 16(%edx), %xmm2
293 movdqa 32(%edx), %xmm3
294 movdqa 48(%edx), %xmm4
295 # endif
296 pcmpeqb %xmm1, %xmm0
297 pcmpeqb %xmm1, %xmm2
298 pcmpeqb %xmm1, %xmm3
299 pcmpeqb %xmm1, %xmm4
300
301 pmaxub %xmm0, %xmm3
302 pmaxub %xmm2, %xmm4
303 pmaxub %xmm3, %xmm4
304 pmovmskb %xmm4, %eax
305
306 # ifndef USE_AS_RAWMEMCHR
307 add $64, %edi
308 # else
309 add $64, %edx
310 # endif
311
312 test %eax, %eax
313 jz L(align64_loop)
314
315 # ifndef USE_AS_RAWMEMCHR
316 sub $64, %edi
317 # else
318 sub $64, %edx
319 # endif
320
321 pmovmskb %xmm0, %eax
322 test %eax, %eax
323 jnz L(matches)
324
325 pmovmskb %xmm2, %eax
326 test %eax, %eax
327 jnz L(matches16)
328
329 # ifndef USE_AS_RAWMEMCHR
330 movdqa 32(%edi), %xmm3
331 # else
332 movdqa 32(%edx), %xmm3
333 # endif
334
335 pcmpeqb %xmm1, %xmm3
336
337 # ifndef USE_AS_RAWMEMCHR
338 pcmpeqb 48(%edi), %xmm1
339 # else
340 pcmpeqb 48(%edx), %xmm1
341 # endif
342 pmovmskb %xmm3, %eax
343 test %eax, %eax
344 jnz L(matches32)
345
346 pmovmskb %xmm1, %eax
347 bsf %eax, %eax
348
349 # ifndef USE_AS_RAWMEMCHR
350 lea 48(%edi, %eax), %eax
351 RETURN
352 # else
353 lea 48(%edx, %eax), %eax
354 ret
355 # endif
356
357 # ifndef USE_AS_RAWMEMCHR
358 .p2align 4
359 L(exit_loop):
360 add $64, %edx
361 cmp $32, %edx
362 jbe L(exit_loop_32)
363
364 movdqa (%edi), %xmm0
365 pcmpeqb %xmm1, %xmm0
366 pmovmskb %xmm0, %eax
367 test %eax, %eax
368 jnz L(matches)
369
370 movdqa 16(%edi), %xmm2
371 pcmpeqb %xmm1, %xmm2
372 pmovmskb %xmm2, %eax
373 test %eax, %eax
374 jnz L(matches16)
375
376 movdqa 32(%edi), %xmm3
377 pcmpeqb %xmm1, %xmm3
378 pmovmskb %xmm3, %eax
379 test %eax, %eax
380 jnz L(matches32_1)
381 cmp $48, %edx
382 jbe L(return_null)
383
384 pcmpeqb 48(%edi), %xmm1
385 pmovmskb %xmm1, %eax
386 test %eax, %eax
387 jnz L(matches48_1)
388 xor %eax, %eax
389 RETURN
390
391 .p2align 4
392 L(exit_loop_32):
393 movdqa (%edi), %xmm0
394 pcmpeqb %xmm1, %xmm0
395 pmovmskb %xmm0, %eax
396 test %eax, %eax
397 jnz L(matches_1)
398 cmp $16, %edx
399 jbe L(return_null)
400
401 pcmpeqb 16(%edi), %xmm1
402 pmovmskb %xmm1, %eax
403 test %eax, %eax
404 jnz L(matches16_1)
405 xor %eax, %eax
406 RETURN
407 # endif
408 .p2align 4
409 L(matches0):
410 bsf %eax, %eax
411 # ifndef USE_AS_RAWMEMCHR
412 lea -16(%eax, %edi), %eax
413 RETURN
414 # else
415 lea -16(%eax, %edx), %eax
416 ret
417 # endif
418
419 .p2align 4
420 L(matches):
421 bsf %eax, %eax
422 # ifndef USE_AS_RAWMEMCHR
423 add %edi, %eax
424 RETURN
425 # else
426 add %edx, %eax
427 ret
428 # endif
429
430 .p2align 4
431 L(matches16):
432 bsf %eax, %eax
433 # ifndef USE_AS_RAWMEMCHR
434 lea 16(%eax, %edi), %eax
435 RETURN
436 # else
437 lea 16(%eax, %edx), %eax
438 ret
439 # endif
440
441 .p2align 4
442 L(matches32):
443 bsf %eax, %eax
444 # ifndef USE_AS_RAWMEMCHR
445 lea 32(%eax, %edi), %eax
446 RETURN
447 # else
448 lea 32(%eax, %edx), %eax
449 ret
450 # endif
451
452 # ifndef USE_AS_RAWMEMCHR
453 .p2align 4
454 L(matches_1):
455 bsf %eax, %eax
456 sub %eax, %edx
457 jbe L(return_null)
458
459 add %edi, %eax
460 RETURN
461
462 .p2align 4
463 L(matches16_1):
464 sub $16, %edx
465 bsf %eax, %eax
466 sub %eax, %edx
467 jbe L(return_null)
468
469 lea 16(%edi, %eax), %eax
470 RETURN
471
472 .p2align 4
473 L(matches32_1):
474 sub $32, %edx
475 bsf %eax, %eax
476 sub %eax, %edx
477 jbe L(return_null)
478
479 lea 32(%edi, %eax), %eax
480 RETURN
481
482 .p2align 4
483 L(matches48_1):
484 sub $48, %edx
485 bsf %eax, %eax
486 sub %eax, %edx
487 jbe L(return_null)
488
489 lea 48(%edi, %eax), %eax
490 RETURN
491 # endif
492 .p2align 4
493 L(return_null):
494 xor %eax, %eax
495 # ifndef USE_AS_RAWMEMCHR
496 RETURN
497 # else
498 ret
499 # endif
500
501 END (MEMCHR)
502 #endif