]>
Commit | Line | Data |
---|---|---|
693fb948 | 1 | /* strchr with SSE2 with bsf |
2b778ceb | 2 | Copyright (C) 2011-2021 Free Software Foundation, Inc. |
693fb948 LD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
693fb948 | 18 | |
4f41c682 | 19 | #if IS_IN (libc) |
693fb948 LD |
20 | |
21 | # include <sysdep.h> | |
22 | ||
23 | # define CFI_PUSH(REG) \ | |
24 | cfi_adjust_cfa_offset (4); \ | |
25 | cfi_rel_offset (REG, 0) | |
26 | ||
27 | # define CFI_POP(REG) \ | |
28 | cfi_adjust_cfa_offset (-4); \ | |
29 | cfi_restore (REG) | |
30 | ||
31 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
32 | # define POP(REG) popl REG; CFI_POP (REG) | |
33 | ||
34 | # define PARMS 8 | |
35 | # define ENTRANCE PUSH(%edi) | |
36 | # define RETURN POP(%edi); ret; CFI_PUSH(%edi); | |
37 | ||
38 | # define STR1 PARMS | |
39 | # define STR2 STR1+4 | |
40 | ||
48882a1a | 41 | .text |
693fb948 LD |
42 | ENTRY (__strchr_sse2_bsf) |
43 | ||
44 | ENTRANCE | |
45 | mov STR1(%esp), %ecx | |
46 | movd STR2(%esp), %xmm1 | |
47 | ||
48 | pxor %xmm2, %xmm2 | |
49 | mov %ecx, %edi | |
50 | punpcklbw %xmm1, %xmm1 | |
51 | punpcklbw %xmm1, %xmm1 | |
52 | /* ECX has OFFSET. */ | |
53 | and $15, %ecx | |
54 | pshufd $0, %xmm1, %xmm1 | |
55 | je L(loop) | |
56 | ||
57 | /* Handle unaligned string. */ | |
58 | and $-16, %edi | |
59 | movdqa (%edi), %xmm0 | |
60 | pcmpeqb %xmm0, %xmm2 | |
61 | pcmpeqb %xmm1, %xmm0 | |
62 | /* Find where NULL is. */ | |
63 | pmovmskb %xmm2, %edx | |
64 | /* Check if there is a match. */ | |
65 | pmovmskb %xmm0, %eax | |
66 | /* Remove the leading bytes. */ | |
67 | sarl %cl, %edx | |
68 | sarl %cl, %eax | |
69 | test %eax, %eax | |
70 | je L(unaligned_no_match) | |
71 | /* Check which byte is a match. */ | |
72 | bsf %eax, %eax | |
73 | /* Is there a NULL? */ | |
74 | test %edx, %edx | |
75 | je L(unaligned_match) | |
76 | bsf %edx, %edx | |
77 | cmpl %edx, %eax | |
78 | /* Return NULL if NULL comes first. */ | |
79 | ja L(return_null) | |
80 | L(unaligned_match): | |
81 | add %edi, %eax | |
82 | add %ecx, %eax | |
83 | RETURN | |
84 | ||
85 | .p2align 4 | |
86 | L(unaligned_no_match): | |
87 | test %edx, %edx | |
88 | jne L(return_null) | |
89 | pxor %xmm2, %xmm2 | |
90 | ||
91 | add $16, %edi | |
92 | ||
93 | .p2align 4 | |
94 | /* Loop start on aligned string. */ | |
95 | L(loop): | |
96 | movdqa (%edi), %xmm0 | |
97 | pcmpeqb %xmm0, %xmm2 | |
98 | add $16, %edi | |
99 | pcmpeqb %xmm1, %xmm0 | |
100 | pmovmskb %xmm2, %edx | |
101 | pmovmskb %xmm0, %eax | |
102 | or %eax, %edx | |
103 | jnz L(matches) | |
104 | ||
105 | movdqa (%edi), %xmm0 | |
106 | pcmpeqb %xmm0, %xmm2 | |
107 | add $16, %edi | |
108 | pcmpeqb %xmm1, %xmm0 | |
109 | pmovmskb %xmm2, %edx | |
110 | pmovmskb %xmm0, %eax | |
111 | or %eax, %edx | |
112 | jnz L(matches) | |
113 | ||
114 | movdqa (%edi), %xmm0 | |
115 | pcmpeqb %xmm0, %xmm2 | |
116 | add $16, %edi | |
117 | pcmpeqb %xmm1, %xmm0 | |
118 | pmovmskb %xmm2, %edx | |
119 | pmovmskb %xmm0, %eax | |
120 | or %eax, %edx | |
121 | jnz L(matches) | |
122 | ||
123 | movdqa (%edi), %xmm0 | |
124 | pcmpeqb %xmm0, %xmm2 | |
125 | add $16, %edi | |
126 | pcmpeqb %xmm1, %xmm0 | |
127 | pmovmskb %xmm2, %edx | |
128 | pmovmskb %xmm0, %eax | |
129 | or %eax, %edx | |
130 | jnz L(matches) | |
131 | jmp L(loop) | |
132 | ||
133 | L(matches): | |
134 | pmovmskb %xmm2, %edx | |
135 | test %eax, %eax | |
136 | jz L(return_null) | |
137 | bsf %eax, %eax | |
138 | /* There is a match. First find where NULL is. */ | |
139 | test %edx, %edx | |
140 | je L(match) | |
141 | bsf %edx, %ecx | |
142 | /* Check if NULL comes first. */ | |
143 | cmpl %ecx, %eax | |
144 | ja L(return_null) | |
145 | L(match): | |
146 | sub $16, %edi | |
147 | add %edi, %eax | |
148 | RETURN | |
149 | ||
150 | /* Return NULL. */ | |
151 | .p2align 4 | |
152 | L(return_null): | |
153 | xor %eax, %eax | |
154 | RETURN | |
155 | ||
156 | END (__strchr_sse2_bsf) | |
157 | #endif |