]>
Commit | Line | Data |
---|---|---|
693fb948 | 1 | /* strrchr with SSE2 with bsf and bsr |
04277e02 | 2 | Copyright (C) 2011-2019 Free Software Foundation, Inc. |
693fb948 LD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
693fb948 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
693fb948 LD |
21 | |
22 | # include <sysdep.h> | |
23 | ||
24 | # define CFI_PUSH(REG) \ | |
25 | cfi_adjust_cfa_offset (4); \ | |
26 | cfi_rel_offset (REG, 0) | |
27 | ||
28 | # define CFI_POP(REG) \ | |
29 | cfi_adjust_cfa_offset (-4); \ | |
30 | cfi_restore (REG) | |
31 | ||
32 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
33 | # define POP(REG) popl REG; CFI_POP (REG) | |
34 | ||
35 | # define PARMS 4 | |
36 | # define STR1 PARMS | |
37 | # define STR2 STR1+4 | |
38 | ||
48882a1a | 39 | .text |
693fb948 LD |
40 | ENTRY (__strrchr_sse2_bsf) |
41 | ||
42 | mov STR1(%esp), %ecx | |
43 | movd STR2(%esp), %xmm1 | |
44 | ||
45 | PUSH (%edi) | |
46 | pxor %xmm2, %xmm2 | |
47 | mov %ecx, %edi | |
48 | punpcklbw %xmm1, %xmm1 | |
49 | punpcklbw %xmm1, %xmm1 | |
50 | /* ECX has OFFSET. */ | |
51 | and $63, %ecx | |
52 | cmp $48, %ecx | |
53 | pshufd $0, %xmm1, %xmm1 | |
54 | ja L(crosscashe) | |
55 | ||
56 | /* unaligned string. */ | |
57 | movdqu (%edi), %xmm0 | |
58 | pcmpeqb %xmm0, %xmm2 | |
59 | pcmpeqb %xmm1, %xmm0 | |
60 | /* Find where NULL is. */ | |
61 | pmovmskb %xmm2, %edx | |
62 | /* Check if there is a match. */ | |
63 | pmovmskb %xmm0, %eax | |
64 | ||
65 | test %eax, %eax | |
66 | jnz L(unaligned_match1) | |
67 | ||
68 | test %edx, %edx | |
69 | jnz L(return_null) | |
70 | ||
71 | and $-16, %edi | |
72 | add $16, %edi | |
73 | ||
74 | PUSH (%esi) | |
75 | PUSH (%ebx) | |
76 | ||
77 | xor %ebx, %ebx | |
78 | jmp L(loop) | |
79 | ||
80 | CFI_POP (%esi) | |
81 | CFI_POP (%ebx) | |
82 | ||
83 | .p2align 4 | |
84 | L(unaligned_return_value1): | |
85 | bsf %edx, %ecx | |
86 | mov $2, %edx | |
87 | shl %cl, %edx | |
88 | sub $1, %edx | |
89 | and %edx, %eax | |
90 | jz L(return_null) | |
91 | bsr %eax, %eax | |
92 | add %edi, %eax | |
93 | POP (%edi) | |
94 | ret | |
95 | CFI_PUSH (%edi) | |
96 | ||
97 | .p2align 4 | |
98 | L(unaligned_match1): | |
99 | test %edx, %edx | |
100 | jnz L(unaligned_return_value1) | |
101 | ||
102 | PUSH (%esi) | |
103 | PUSH (%ebx) | |
104 | ||
105 | mov %eax, %ebx | |
106 | lea 16(%edi), %esi | |
107 | and $-16, %edi | |
108 | add $16, %edi | |
109 | jmp L(loop) | |
110 | ||
111 | CFI_POP (%esi) | |
112 | CFI_POP (%ebx) | |
113 | ||
114 | .p2align 4 | |
115 | L(crosscashe): | |
116 | /* Hancle unaligned string. */ | |
117 | and $15, %ecx | |
118 | and $-16, %edi | |
119 | pxor %xmm3, %xmm3 | |
120 | movdqa (%edi), %xmm0 | |
121 | pcmpeqb %xmm0, %xmm3 | |
122 | pcmpeqb %xmm1, %xmm0 | |
123 | /* Find where NULL is. */ | |
124 | pmovmskb %xmm3, %edx | |
125 | /* Check if there is a match. */ | |
126 | pmovmskb %xmm0, %eax | |
127 | /* Remove the leading bytes. */ | |
128 | shr %cl, %edx | |
129 | shr %cl, %eax | |
130 | ||
131 | test %eax, %eax | |
132 | jnz L(unaligned_match) | |
133 | ||
134 | test %edx, %edx | |
135 | jnz L(return_null) | |
136 | ||
137 | add $16, %edi | |
138 | ||
139 | PUSH (%esi) | |
140 | PUSH (%ebx) | |
141 | ||
142 | xor %ebx, %ebx | |
143 | jmp L(loop) | |
144 | ||
145 | CFI_POP (%esi) | |
146 | CFI_POP (%ebx) | |
147 | ||
148 | .p2align 4 | |
149 | L(unaligned_return_value): | |
150 | add %ecx, %edi | |
151 | bsf %edx, %ecx | |
152 | mov $2, %edx | |
153 | shl %cl, %edx | |
154 | sub $1, %edx | |
155 | and %edx, %eax | |
156 | jz L(return_null) | |
157 | bsr %eax, %eax | |
158 | add %edi, %eax | |
159 | POP (%edi) | |
160 | ret | |
161 | CFI_PUSH (%edi) | |
162 | ||
163 | .p2align 4 | |
164 | L(unaligned_match): | |
165 | test %edx, %edx | |
166 | jnz L(unaligned_return_value) | |
167 | ||
168 | PUSH (%esi) | |
169 | PUSH (%ebx) | |
170 | ||
171 | mov %eax, %ebx | |
172 | add $16, %edi | |
173 | lea (%edi, %ecx), %esi | |
174 | ||
175 | /* Loop start on aligned string. */ | |
176 | .p2align 4 | |
177 | L(loop): | |
178 | movdqa (%edi), %xmm0 | |
179 | pcmpeqb %xmm0, %xmm2 | |
180 | add $16, %edi | |
181 | pcmpeqb %xmm1, %xmm0 | |
182 | pmovmskb %xmm2, %ecx | |
183 | pmovmskb %xmm0, %eax | |
184 | or %eax, %ecx | |
185 | jnz L(matches) | |
186 | ||
187 | movdqa (%edi), %xmm0 | |
188 | pcmpeqb %xmm0, %xmm2 | |
189 | add $16, %edi | |
190 | pcmpeqb %xmm1, %xmm0 | |
191 | pmovmskb %xmm2, %ecx | |
192 | pmovmskb %xmm0, %eax | |
193 | or %eax, %ecx | |
194 | jnz L(matches) | |
195 | ||
196 | movdqa (%edi), %xmm0 | |
197 | pcmpeqb %xmm0, %xmm2 | |
198 | add $16, %edi | |
199 | pcmpeqb %xmm1, %xmm0 | |
200 | pmovmskb %xmm2, %ecx | |
201 | pmovmskb %xmm0, %eax | |
202 | or %eax, %ecx | |
203 | jnz L(matches) | |
204 | ||
205 | movdqa (%edi), %xmm0 | |
206 | pcmpeqb %xmm0, %xmm2 | |
207 | add $16, %edi | |
208 | pcmpeqb %xmm1, %xmm0 | |
209 | pmovmskb %xmm2, %ecx | |
210 | pmovmskb %xmm0, %eax | |
211 | or %eax, %ecx | |
212 | jz L(loop) | |
213 | ||
214 | L(matches): | |
215 | test %eax, %eax | |
216 | jnz L(match) | |
217 | L(return_value): | |
218 | test %ebx, %ebx | |
219 | jz L(return_null_1) | |
220 | bsr %ebx, %eax | |
221 | add %esi, %eax | |
222 | ||
223 | POP (%ebx) | |
224 | POP (%esi) | |
225 | ||
226 | sub $16, %eax | |
227 | POP (%edi) | |
228 | ret | |
229 | ||
230 | CFI_PUSH (%edi) | |
231 | CFI_PUSH (%ebx) | |
232 | CFI_PUSH (%esi) | |
233 | ||
234 | .p2align 4 | |
235 | L(match): | |
236 | pmovmskb %xmm2, %ecx | |
237 | test %ecx, %ecx | |
238 | jnz L(return_value_1) | |
239 | mov %eax, %ebx | |
240 | mov %edi, %esi | |
241 | jmp L(loop) | |
242 | ||
243 | .p2align 4 | |
244 | L(return_value_1): | |
245 | bsf %ecx, %ecx | |
246 | mov $2, %edx | |
247 | shl %cl, %edx | |
248 | sub $1, %edx | |
249 | and %edx, %eax | |
250 | jz L(return_value) | |
251 | ||
252 | POP (%ebx) | |
253 | POP (%esi) | |
254 | ||
255 | bsr %eax, %eax | |
256 | add %edi, %eax | |
257 | sub $16, %eax | |
258 | POP (%edi) | |
259 | ret | |
260 | ||
261 | CFI_PUSH (%edi) | |
262 | /* Return NULL. */ | |
263 | .p2align 4 | |
264 | L(return_null): | |
265 | xor %eax, %eax | |
266 | POP (%edi) | |
267 | ret | |
268 | ||
269 | CFI_PUSH (%edi) | |
270 | CFI_PUSH (%ebx) | |
271 | CFI_PUSH (%esi) | |
272 | /* Return NULL. */ | |
273 | .p2align 4 | |
274 | L(return_null_1): | |
275 | POP (%ebx) | |
276 | POP (%esi) | |
277 | POP (%edi) | |
278 | xor %eax, %eax | |
279 | ret | |
280 | ||
281 | END (__strrchr_sse2_bsf) | |
282 | #endif |