]>
Commit | Line | Data |
---|---|---|
a5f524e4 | 1 | /* strchr with SSE2 without bsf |
04277e02 | 2 | Copyright (C) 2011-2019 Free Software Foundation, Inc. |
a5f524e4 LD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
a5f524e4 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
a5f524e4 LD |
21 | |
22 | # include <sysdep.h> | |
23 | # include "asm-syntax.h" | |
24 | ||
ceaa0c5d | 25 | atom_text_section |
a5f524e4 LD |
26 | ENTRY (__strchr_sse2_no_bsf) |
27 | movd %esi, %xmm1 | |
28 | movq %rdi, %rcx | |
29 | punpcklbw %xmm1, %xmm1 | |
30 | andq $~15, %rdi | |
31 | pxor %xmm2, %xmm2 | |
32 | punpcklbw %xmm1, %xmm1 | |
33 | orl $0xffffffff, %esi | |
34 | movdqa (%rdi), %xmm0 | |
35 | pshufd $0, %xmm1, %xmm1 | |
36 | subq %rdi, %rcx | |
37 | movdqa %xmm0, %xmm3 | |
38 | leaq 16(%rdi), %rdi | |
39 | pcmpeqb %xmm1, %xmm0 | |
40 | pcmpeqb %xmm2, %xmm3 | |
41 | shl %cl, %esi | |
42 | pmovmskb %xmm0, %eax | |
43 | pmovmskb %xmm3, %edx | |
44 | andl %esi, %eax | |
45 | andl %esi, %edx | |
46 | test %eax, %eax | |
47 | jnz L(matches) | |
48 | test %edx, %edx | |
49 | jnz L(return_null) | |
50 | ||
51 | L(loop): | |
52 | movdqa (%rdi), %xmm0 | |
53 | leaq 16(%rdi), %rdi | |
54 | movdqa %xmm0, %xmm3 | |
55 | pcmpeqb %xmm1, %xmm0 | |
56 | pcmpeqb %xmm2, %xmm3 | |
57 | pmovmskb %xmm0, %eax | |
58 | pmovmskb %xmm3, %edx | |
59 | or %eax, %edx | |
60 | jz L(loop) | |
61 | ||
62 | pmovmskb %xmm3, %edx | |
63 | test %eax, %eax | |
64 | jnz L(matches) | |
65 | ||
66 | /* Return NULL. */ | |
67 | .p2align 4 | |
68 | L(return_null): | |
69 | xor %rax, %rax | |
70 | ret | |
71 | ||
72 | L(matches): | |
73 | /* There is a match. First find where NULL is. */ | |
74 | leaq -16(%rdi), %rdi | |
75 | test %edx, %edx | |
76 | jz L(match_case1) | |
77 | ||
78 | .p2align 4 | |
79 | L(match_case2): | |
80 | test %al, %al | |
81 | jz L(match_high_case2) | |
82 | ||
83 | mov %al, %cl | |
84 | and $15, %cl | |
85 | jnz L(match_case2_4) | |
86 | ||
87 | mov %dl, %ch | |
88 | and $15, %ch | |
89 | jnz L(return_null) | |
90 | ||
91 | test $0x10, %al | |
92 | jnz L(Exit5) | |
93 | test $0x10, %dl | |
94 | jnz L(return_null) | |
95 | test $0x20, %al | |
96 | jnz L(Exit6) | |
97 | test $0x20, %dl | |
98 | jnz L(return_null) | |
99 | test $0x40, %al | |
100 | jnz L(Exit7) | |
101 | test $0x40, %dl | |
102 | jnz L(return_null) | |
103 | lea 7(%rdi), %rax | |
104 | ret | |
105 | ||
106 | .p2align 4 | |
107 | L(match_case2_4): | |
108 | test $0x01, %al | |
109 | jnz L(Exit1) | |
110 | test $0x01, %dl | |
111 | jnz L(return_null) | |
112 | test $0x02, %al | |
113 | jnz L(Exit2) | |
114 | test $0x02, %dl | |
115 | jnz L(return_null) | |
116 | test $0x04, %al | |
117 | jnz L(Exit3) | |
118 | test $0x04, %dl | |
119 | jnz L(return_null) | |
120 | lea 3(%rdi), %rax | |
121 | ret | |
122 | ||
123 | .p2align 4 | |
124 | L(match_high_case2): | |
125 | test %dl, %dl | |
126 | jnz L(return_null) | |
127 | ||
128 | mov %ah, %cl | |
129 | and $15, %cl | |
130 | jnz L(match_case2_12) | |
131 | ||
132 | mov %dh, %ch | |
133 | and $15, %ch | |
134 | jnz L(return_null) | |
135 | ||
136 | test $0x10, %ah | |
137 | jnz L(Exit13) | |
138 | test $0x10, %dh | |
139 | jnz L(return_null) | |
140 | test $0x20, %ah | |
141 | jnz L(Exit14) | |
142 | test $0x20, %dh | |
143 | jnz L(return_null) | |
144 | test $0x40, %ah | |
145 | jnz L(Exit15) | |
146 | test $0x40, %dh | |
147 | jnz L(return_null) | |
148 | lea 15(%rdi), %rax | |
149 | ret | |
150 | ||
151 | .p2align 4 | |
152 | L(match_case2_12): | |
153 | test $0x01, %ah | |
154 | jnz L(Exit9) | |
155 | test $0x01, %dh | |
156 | jnz L(return_null) | |
157 | test $0x02, %ah | |
158 | jnz L(Exit10) | |
159 | test $0x02, %dh | |
160 | jnz L(return_null) | |
161 | test $0x04, %ah | |
162 | jnz L(Exit11) | |
163 | test $0x04, %dh | |
164 | jnz L(return_null) | |
165 | lea 11(%rdi), %rax | |
166 | ret | |
167 | ||
168 | .p2align 4 | |
169 | L(match_case1): | |
170 | test %al, %al | |
171 | jz L(match_high_case1) | |
172 | ||
173 | test $0x01, %al | |
174 | jnz L(Exit1) | |
175 | test $0x02, %al | |
176 | jnz L(Exit2) | |
177 | test $0x04, %al | |
178 | jnz L(Exit3) | |
179 | test $0x08, %al | |
180 | jnz L(Exit4) | |
181 | test $0x10, %al | |
182 | jnz L(Exit5) | |
183 | test $0x20, %al | |
184 | jnz L(Exit6) | |
185 | test $0x40, %al | |
186 | jnz L(Exit7) | |
187 | lea 7(%rdi), %rax | |
188 | ret | |
189 | ||
190 | .p2align 4 | |
191 | L(match_high_case1): | |
192 | test $0x01, %ah | |
193 | jnz L(Exit9) | |
194 | test $0x02, %ah | |
195 | jnz L(Exit10) | |
196 | test $0x04, %ah | |
197 | jnz L(Exit11) | |
198 | test $0x08, %ah | |
199 | jnz L(Exit12) | |
200 | test $0x10, %ah | |
201 | jnz L(Exit13) | |
202 | test $0x20, %ah | |
203 | jnz L(Exit14) | |
204 | test $0x40, %ah | |
205 | jnz L(Exit15) | |
206 | lea 15(%rdi), %rax | |
207 | ret | |
208 | ||
209 | .p2align 4 | |
210 | L(Exit1): | |
211 | lea (%rdi), %rax | |
212 | ret | |
213 | ||
214 | .p2align 4 | |
215 | L(Exit2): | |
216 | lea 1(%rdi), %rax | |
217 | ret | |
218 | ||
219 | .p2align 4 | |
220 | L(Exit3): | |
221 | lea 2(%rdi), %rax | |
222 | ret | |
223 | ||
224 | .p2align 4 | |
225 | L(Exit4): | |
226 | lea 3(%rdi), %rax | |
227 | ret | |
228 | ||
229 | .p2align 4 | |
230 | L(Exit5): | |
231 | lea 4(%rdi), %rax | |
232 | ret | |
233 | ||
234 | .p2align 4 | |
235 | L(Exit6): | |
236 | lea 5(%rdi), %rax | |
237 | ret | |
238 | ||
239 | .p2align 4 | |
240 | L(Exit7): | |
241 | lea 6(%rdi), %rax | |
242 | ret | |
243 | ||
244 | .p2align 4 | |
245 | L(Exit9): | |
246 | lea 8(%rdi), %rax | |
247 | ret | |
248 | ||
249 | .p2align 4 | |
250 | L(Exit10): | |
251 | lea 9(%rdi), %rax | |
252 | ret | |
253 | ||
254 | .p2align 4 | |
255 | L(Exit11): | |
256 | lea 10(%rdi), %rax | |
257 | ret | |
258 | ||
259 | .p2align 4 | |
260 | L(Exit12): | |
261 | lea 11(%rdi), %rax | |
262 | ret | |
263 | ||
264 | .p2align 4 | |
265 | L(Exit13): | |
266 | lea 12(%rdi), %rax | |
267 | ret | |
268 | ||
269 | .p2align 4 | |
270 | L(Exit14): | |
271 | lea 13(%rdi), %rax | |
272 | ret | |
273 | ||
274 | .p2align 4 | |
275 | L(Exit15): | |
276 | lea 14(%rdi), %rax | |
277 | ret | |
278 | ||
279 | END (__strchr_sse2_no_bsf) | |
280 | #endif |