]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/multiarch/memchr-sse2.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / memchr-sse2.S
CommitLineData
951fbcec 1/* Optimized memchr with sse2 without bsf
2b778ceb 2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
951fbcec
LD
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
5a82c748 18 <https://www.gnu.org/licenses/>. */
951fbcec 19
4f41c682 20#if IS_IN (libc)
951fbcec
LD
21
22# include <sysdep.h>
23
24# define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28# define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32# define PUSH(REG) pushl REG; CFI_PUSH (REG)
33# define POP(REG) popl REG; CFI_POP (REG)
34
35# ifndef USE_AS_RAWMEMCHR
36# define ENTRANCE PUSH(%edi);
37# define PARMS 8
38# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
39# else
40# define ENTRANCE
41# define PARMS 4
42# endif
43
44# define STR1 PARMS
45# define STR2 STR1+4
46
47# ifndef USE_AS_RAWMEMCHR
48# define LEN STR2+4
49# endif
50
51# ifndef MEMCHR
52# define MEMCHR __memchr_sse2
53# endif
54
55 atom_text_section
56ENTRY (MEMCHR)
57 ENTRANCE
58 mov STR1(%esp), %ecx
59 movd STR2(%esp), %xmm1
60# ifndef USE_AS_RAWMEMCHR
61 mov LEN(%esp), %edx
62 test %edx, %edx
63 jz L(return_null)
64# endif
65
66 punpcklbw %xmm1, %xmm1
67# ifndef USE_AS_RAWMEMCHR
68 mov %ecx, %edi
69# else
70 mov %ecx, %edx
71# endif
72 punpcklbw %xmm1, %xmm1
73
74 and $63, %ecx
75 pshufd $0, %xmm1, %xmm1
76 cmp $48, %ecx
77 ja L(crosscache)
78
79# ifndef USE_AS_RAWMEMCHR
80 movdqu (%edi), %xmm0
81# else
82 movdqu (%edx), %xmm0
83# endif
84 pcmpeqb %xmm1, %xmm0
85 pmovmskb %xmm0, %eax
86 test %eax, %eax
87# ifndef USE_AS_RAWMEMCHR
88 jnz L(match_case2_prolog)
89
90 sub $16, %edx
91 jbe L(return_null)
92 lea 16(%edi), %edi
93 and $15, %ecx
94 and $-16, %edi
95 add %ecx, %edx
96# else
97 jnz L(match_case1_prolog)
98 lea 16(%edx), %edx
99 and $-16, %edx
100# endif
101 jmp L(loop_prolog)
102
103 .p2align 4
104L(crosscache):
105 and $15, %ecx
106# ifndef USE_AS_RAWMEMCHR
107 and $-16, %edi
108 movdqa (%edi), %xmm0
109# else
110 and $-16, %edx
111 movdqa (%edx), %xmm0
112# endif
113 pcmpeqb %xmm1, %xmm0
114 pmovmskb %xmm0, %eax
115 sar %cl, %eax
116 test %eax, %eax
117
118# ifndef USE_AS_RAWMEMCHR
119 jnz L(match_case2_prolog1)
402bf069
L
120 /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using
121 "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
122 possible addition overflow. */
123 neg %ecx
124 add $16, %ecx
125 sub %ecx, %edx
23d27709 126 jbe L(return_null)
951fbcec
LD
127 lea 16(%edi), %edi
128# else
129 jnz L(match_case1_prolog1)
130 lea 16(%edx), %edx
131# endif
132
133 .p2align 4
134L(loop_prolog):
135# ifndef USE_AS_RAWMEMCHR
136 sub $64, %edx
137 jbe L(exit_loop)
138 movdqa (%edi), %xmm0
139# else
140 movdqa (%edx), %xmm0
141# endif
142 pcmpeqb %xmm1, %xmm0
143 xor %ecx, %ecx
144 pmovmskb %xmm0, %eax
145 test %eax, %eax
146 jnz L(match_case1)
147
148# ifndef USE_AS_RAWMEMCHR
149 movdqa 16(%edi), %xmm2
150# else
151 movdqa 16(%edx), %xmm2
152# endif
153 pcmpeqb %xmm1, %xmm2
154 lea 16(%ecx), %ecx
155 pmovmskb %xmm2, %eax
156 test %eax, %eax
157 jnz L(match_case1)
158
159# ifndef USE_AS_RAWMEMCHR
160 movdqa 32(%edi), %xmm3
161# else
162 movdqa 32(%edx), %xmm3
163# endif
164 pcmpeqb %xmm1, %xmm3
165 lea 16(%ecx), %ecx
166 pmovmskb %xmm3, %eax
167 test %eax, %eax
168 jnz L(match_case1)
169
170# ifndef USE_AS_RAWMEMCHR
171 movdqa 48(%edi), %xmm4
172# else
173 movdqa 48(%edx), %xmm4
174# endif
175 pcmpeqb %xmm1, %xmm4
176 lea 16(%ecx), %ecx
177 pmovmskb %xmm4, %eax
178 test %eax, %eax
179 jnz L(match_case1)
180
181# ifndef USE_AS_RAWMEMCHR
182 lea 64(%edi), %edi
183 sub $64, %edx
184 jbe L(exit_loop)
185
186 movdqa (%edi), %xmm0
187# else
188 lea 64(%edx), %edx
189 movdqa (%edx), %xmm0
190# endif
191 pcmpeqb %xmm1, %xmm0
192 xor %ecx, %ecx
193 pmovmskb %xmm0, %eax
194 test %eax, %eax
195 jnz L(match_case1)
196
197# ifndef USE_AS_RAWMEMCHR
198 movdqa 16(%edi), %xmm2
199# else
200 movdqa 16(%edx), %xmm2
201# endif
202 pcmpeqb %xmm1, %xmm2
203 lea 16(%ecx), %ecx
204 pmovmskb %xmm2, %eax
205 test %eax, %eax
206 jnz L(match_case1)
207
208# ifndef USE_AS_RAWMEMCHR
209 movdqa 32(%edi), %xmm3
210# else
211 movdqa 32(%edx), %xmm3
212# endif
213 pcmpeqb %xmm1, %xmm3
214 lea 16(%ecx), %ecx
215 pmovmskb %xmm3, %eax
216 test %eax, %eax
217 jnz L(match_case1)
218
219# ifndef USE_AS_RAWMEMCHR
220 movdqa 48(%edi), %xmm4
221# else
222 movdqa 48(%edx), %xmm4
223# endif
224 pcmpeqb %xmm1, %xmm4
225 lea 16(%ecx), %ecx
226 pmovmskb %xmm4, %eax
227 test %eax, %eax
228 jnz L(match_case1)
229
230# ifndef USE_AS_RAWMEMCHR
231 lea 64(%edi), %edi
232 mov %edi, %ecx
233 and $-64, %edi
234 and $63, %ecx
235 add %ecx, %edx
236# else
237 lea 64(%edx), %edx
238 and $-64, %edx
239# endif
240
241 .p2align 4
242L(align64_loop):
243
244# ifndef USE_AS_RAWMEMCHR
245 sub $64, %edx
246 jbe L(exit_loop)
247 movdqa (%edi), %xmm0
248 movdqa 16(%edi), %xmm2
249 movdqa 32(%edi), %xmm3
250 movdqa 48(%edi), %xmm4
251# else
252 movdqa (%edx), %xmm0
253 movdqa 16(%edx), %xmm2
254 movdqa 32(%edx), %xmm3
255 movdqa 48(%edx), %xmm4
256# endif
257 pcmpeqb %xmm1, %xmm0
258 pcmpeqb %xmm1, %xmm2
259 pcmpeqb %xmm1, %xmm3
260 pcmpeqb %xmm1, %xmm4
261
262 pmaxub %xmm0, %xmm3
263 pmaxub %xmm2, %xmm4
264 pmaxub %xmm3, %xmm4
265# ifndef USE_AS_RAWMEMCHR
266 add $64, %edi
267# else
268 add $64, %edx
269# endif
270 pmovmskb %xmm4, %eax
271
272 test %eax, %eax
273 jz L(align64_loop)
274
275# ifndef USE_AS_RAWMEMCHR
276 sub $64, %edi
277# else
278 sub $64, %edx
279# endif
280
281 pmovmskb %xmm0, %eax
282 xor %ecx, %ecx
283 test %eax, %eax
284 jnz L(match_case1)
285
286 pmovmskb %xmm2, %eax
287 lea 16(%ecx), %ecx
288 test %eax, %eax
289 jnz L(match_case1)
290
291# ifndef USE_AS_RAWMEMCHR
292 movdqa 32(%edi), %xmm3
293# else
294 movdqa 32(%edx), %xmm3
295# endif
296 pcmpeqb %xmm1, %xmm3
297 pmovmskb %xmm3, %eax
298 lea 16(%ecx), %ecx
299 test %eax, %eax
300 jnz L(match_case1)
301
302# ifndef USE_AS_RAWMEMCHR
303 pcmpeqb 48(%edi), %xmm1
304# else
305 pcmpeqb 48(%edx), %xmm1
306# endif
307 pmovmskb %xmm1, %eax
308 lea 16(%ecx), %ecx
309
310 .p2align 4
311L(match_case1):
312# ifndef USE_AS_RAWMEMCHR
313 add %ecx, %edi
314# else
315L(match_case1_prolog1):
316 add %ecx, %edx
317L(match_case1_prolog):
318# endif
319 test %al, %al
320 jz L(match_case1_high)
321 mov %al, %cl
322 and $15, %cl
323 jz L(match_case1_8)
324 test $0x01, %al
325 jnz L(ExitCase1_1)
326 test $0x02, %al
327 jnz L(ExitCase1_2)
328 test $0x04, %al
329 jnz L(ExitCase1_3)
330# ifndef USE_AS_RAWMEMCHR
331 lea 3(%edi), %eax
332 RETURN
333# else
334 lea 3(%edx), %eax
335 ret
336# endif
337
338 .p2align 4
339L(match_case1_8):
340 test $0x10, %al
341 jnz L(ExitCase1_5)
342 test $0x20, %al
343 jnz L(ExitCase1_6)
344 test $0x40, %al
345 jnz L(ExitCase1_7)
346# ifndef USE_AS_RAWMEMCHR
347 lea 7(%edi), %eax
348 RETURN
349# else
350 lea 7(%edx), %eax
351 ret
352# endif
353
354 .p2align 4
355L(match_case1_high):
356 mov %ah, %ch
357 and $15, %ch
358 jz L(match_case1_high_8)
359 test $0x01, %ah
360 jnz L(ExitCase1_9)
361 test $0x02, %ah
362 jnz L(ExitCase1_10)
363 test $0x04, %ah
364 jnz L(ExitCase1_11)
365# ifndef USE_AS_RAWMEMCHR
366 lea 11(%edi), %eax
367 RETURN
368# else
369 lea 11(%edx), %eax
370 ret
371# endif
372
373 .p2align 4
374L(match_case1_high_8):
375 test $0x10, %ah
376 jnz L(ExitCase1_13)
377 test $0x20, %ah
378 jnz L(ExitCase1_14)
379 test $0x40, %ah
380 jnz L(ExitCase1_15)
381# ifndef USE_AS_RAWMEMCHR
382 lea 15(%edi), %eax
383 RETURN
384# else
385 lea 15(%edx), %eax
386 ret
387# endif
388
389# ifndef USE_AS_RAWMEMCHR
390 .p2align 4
391L(exit_loop):
392 add $64, %edx
393
394 movdqa (%edi), %xmm0
395 pcmpeqb %xmm1, %xmm0
396 xor %ecx, %ecx
397 pmovmskb %xmm0, %eax
398 test %eax, %eax
399 jnz L(match_case2)
400 cmp $16, %edx
401 jbe L(return_null)
402
403 movdqa 16(%edi), %xmm2
404 pcmpeqb %xmm1, %xmm2
405 lea 16(%ecx), %ecx
406 pmovmskb %xmm2, %eax
407 test %eax, %eax
408 jnz L(match_case2)
409 cmp $32, %edx
410 jbe L(return_null)
411
412 movdqa 32(%edi), %xmm3
413 pcmpeqb %xmm1, %xmm3
414 lea 16(%ecx), %ecx
415 pmovmskb %xmm3, %eax
416 test %eax, %eax
417 jnz L(match_case2)
418 cmp $48, %edx
419 jbe L(return_null)
420
421 pcmpeqb 48(%edi), %xmm1
422 lea 16(%ecx), %ecx
423 pmovmskb %xmm1, %eax
424 test %eax, %eax
425 jnz L(match_case2)
426
427 xor %eax, %eax
428 RETURN
429# endif
430
431 .p2align 4
432L(ExitCase1_1):
433# ifndef USE_AS_RAWMEMCHR
434 mov %edi, %eax
435 RETURN
436# else
437 mov %edx, %eax
438 ret
439# endif
440
441 .p2align 4
442L(ExitCase1_2):
443# ifndef USE_AS_RAWMEMCHR
444 lea 1(%edi), %eax
445 RETURN
446# else
447 lea 1(%edx), %eax
448 ret
449# endif
450
451 .p2align 4
452L(ExitCase1_3):
453# ifndef USE_AS_RAWMEMCHR
454 lea 2(%edi), %eax
455 RETURN
456# else
457 lea 2(%edx), %eax
458 ret
459# endif
460
461 .p2align 4
462L(ExitCase1_5):
463# ifndef USE_AS_RAWMEMCHR
464 lea 4(%edi), %eax
465 RETURN
466# else
467 lea 4(%edx), %eax
468 ret
469# endif
470
471 .p2align 4
472L(ExitCase1_6):
473# ifndef USE_AS_RAWMEMCHR
474 lea 5(%edi), %eax
475 RETURN
476# else
477 lea 5(%edx), %eax
478 ret
479# endif
480
481 .p2align 4
482L(ExitCase1_7):
483# ifndef USE_AS_RAWMEMCHR
484 lea 6(%edi), %eax
485 RETURN
486# else
487 lea 6(%edx), %eax
488 ret
489# endif
490
491 .p2align 4
492L(ExitCase1_9):
493# ifndef USE_AS_RAWMEMCHR
494 lea 8(%edi), %eax
495 RETURN
496# else
497 lea 8(%edx), %eax
498 ret
499# endif
500
501 .p2align 4
502L(ExitCase1_10):
503# ifndef USE_AS_RAWMEMCHR
504 lea 9(%edi), %eax
505 RETURN
506# else
507 lea 9(%edx), %eax
508 ret
509# endif
510
511 .p2align 4
512L(ExitCase1_11):
513# ifndef USE_AS_RAWMEMCHR
514 lea 10(%edi), %eax
515 RETURN
516# else
517 lea 10(%edx), %eax
518 ret
519# endif
520
521 .p2align 4
522L(ExitCase1_13):
523# ifndef USE_AS_RAWMEMCHR
524 lea 12(%edi), %eax
525 RETURN
526# else
527 lea 12(%edx), %eax
528 ret
529# endif
530
531 .p2align 4
532L(ExitCase1_14):
533# ifndef USE_AS_RAWMEMCHR
534 lea 13(%edi), %eax
535 RETURN
536# else
537 lea 13(%edx), %eax
538 ret
539# endif
540
541 .p2align 4
542L(ExitCase1_15):
543# ifndef USE_AS_RAWMEMCHR
544 lea 14(%edi), %eax
545 RETURN
546# else
547 lea 14(%edx), %eax
548 ret
549# endif
550
551# ifndef USE_AS_RAWMEMCHR
552 .p2align 4
553L(match_case2):
554 sub %ecx, %edx
555L(match_case2_prolog1):
556 add %ecx, %edi
557L(match_case2_prolog):
558 test %al, %al
559 jz L(match_case2_high)
560 mov %al, %cl
561 and $15, %cl
562 jz L(match_case2_8)
563 test $0x01, %al
564 jnz L(ExitCase2_1)
565 test $0x02, %al
566 jnz L(ExitCase2_2)
567 test $0x04, %al
568 jnz L(ExitCase2_3)
569 sub $4, %edx
570 jb L(return_null)
571 lea 3(%edi), %eax
572 RETURN
573
574 .p2align 4
575L(match_case2_8):
576 test $0x10, %al
577 jnz L(ExitCase2_5)
578 test $0x20, %al
579 jnz L(ExitCase2_6)
580 test $0x40, %al
581 jnz L(ExitCase2_7)
582 sub $8, %edx
583 jb L(return_null)
584 lea 7(%edi), %eax
585 RETURN
586
587 .p2align 4
588L(match_case2_high):
589 mov %ah, %ch
590 and $15, %ch
591 jz L(match_case2_high_8)
592 test $0x01, %ah
593 jnz L(ExitCase2_9)
594 test $0x02, %ah
595 jnz L(ExitCase2_10)
596 test $0x04, %ah
597 jnz L(ExitCase2_11)
598 sub $12, %edx
599 jb L(return_null)
600 lea 11(%edi), %eax
601 RETURN
602
603 .p2align 4
604L(match_case2_high_8):
605 test $0x10, %ah
606 jnz L(ExitCase2_13)
607 test $0x20, %ah
608 jnz L(ExitCase2_14)
609 test $0x40, %ah
610 jnz L(ExitCase2_15)
611 sub $16, %edx
612 jb L(return_null)
613 lea 15(%edi), %eax
614 RETURN
615
616 .p2align 4
617L(ExitCase2_1):
618 mov %edi, %eax
619 RETURN
620
621 .p2align 4
622L(ExitCase2_2):
623 sub $2, %edx
624 jb L(return_null)
625 lea 1(%edi), %eax
626 RETURN
627
628 .p2align 4
629L(ExitCase2_3):
630 sub $3, %edx
631 jb L(return_null)
632 lea 2(%edi), %eax
633 RETURN
634
635 .p2align 4
636L(ExitCase2_5):
637 sub $5, %edx
638 jb L(return_null)
639 lea 4(%edi), %eax
640 RETURN
641
642 .p2align 4
643L(ExitCase2_6):
644 sub $6, %edx
645 jb L(return_null)
646 lea 5(%edi), %eax
647 RETURN
648
649 .p2align 4
650L(ExitCase2_7):
651 sub $7, %edx
652 jb L(return_null)
653 lea 6(%edi), %eax
654 RETURN
655
656 .p2align 4
657L(ExitCase2_9):
658 sub $9, %edx
659 jb L(return_null)
660 lea 8(%edi), %eax
661 RETURN
662
663 .p2align 4
664L(ExitCase2_10):
665 sub $10, %edx
666 jb L(return_null)
667 lea 9(%edi), %eax
668 RETURN
669
670 .p2align 4
671L(ExitCase2_11):
672 sub $11, %edx
673 jb L(return_null)
674 lea 10(%edi), %eax
675 RETURN
676
677 .p2align 4
678L(ExitCase2_13):
679 sub $13, %edx
680 jb L(return_null)
681 lea 12(%edi), %eax
682 RETURN
683
684 .p2align 4
685L(ExitCase2_14):
686 sub $14, %edx
687 jb L(return_null)
688 lea 13(%edi), %eax
689 RETURN
690
691 .p2align 4
692L(ExitCase2_15):
693 sub $15, %edx
694 jb L(return_null)
695 lea 14(%edi), %eax
696 RETURN
697# endif
698
699 .p2align 4
700L(return_null):
701 xor %eax, %eax
702# ifndef USE_AS_RAWMEMCHR
703 RETURN
704# else
705 ret
706# endif
707
708END (MEMCHR)
709#endif