]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strrchr-sse2-bsf.S
CommitLineData
693fb948 1/* strrchr with SSE2 with bsf and bsr
04277e02 2 Copyright (C) 2011-2019 Free Software Foundation, Inc.
693fb948
LD
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
5a82c748 18 <https://www.gnu.org/licenses/>. */
693fb948 19
4f41c682 20#if IS_IN (libc)
693fb948
LD
21
22# include <sysdep.h>
23
24# define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28# define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32# define PUSH(REG) pushl REG; CFI_PUSH (REG)
33# define POP(REG) popl REG; CFI_POP (REG)
34
35# define PARMS 4
36# define STR1 PARMS
37# define STR2 STR1+4
38
48882a1a 39 .text
693fb948
LD
40ENTRY (__strrchr_sse2_bsf)
41
42 mov STR1(%esp), %ecx
43 movd STR2(%esp), %xmm1
44
45 PUSH (%edi)
46 pxor %xmm2, %xmm2
47 mov %ecx, %edi
48 punpcklbw %xmm1, %xmm1
49 punpcklbw %xmm1, %xmm1
50 /* ECX has OFFSET. */
51 and $63, %ecx
52 cmp $48, %ecx
53 pshufd $0, %xmm1, %xmm1
54 ja L(crosscashe)
55
56/* unaligned string. */
57 movdqu (%edi), %xmm0
58 pcmpeqb %xmm0, %xmm2
59 pcmpeqb %xmm1, %xmm0
60 /* Find where NULL is. */
61 pmovmskb %xmm2, %edx
62 /* Check if there is a match. */
63 pmovmskb %xmm0, %eax
64
65 test %eax, %eax
66 jnz L(unaligned_match1)
67
68 test %edx, %edx
69 jnz L(return_null)
70
71 and $-16, %edi
72 add $16, %edi
73
74 PUSH (%esi)
75 PUSH (%ebx)
76
77 xor %ebx, %ebx
78 jmp L(loop)
79
80 CFI_POP (%esi)
81 CFI_POP (%ebx)
82
83 .p2align 4
84L(unaligned_return_value1):
85 bsf %edx, %ecx
86 mov $2, %edx
87 shl %cl, %edx
88 sub $1, %edx
89 and %edx, %eax
90 jz L(return_null)
91 bsr %eax, %eax
92 add %edi, %eax
93 POP (%edi)
94 ret
95 CFI_PUSH (%edi)
96
97 .p2align 4
98L(unaligned_match1):
99 test %edx, %edx
100 jnz L(unaligned_return_value1)
101
102 PUSH (%esi)
103 PUSH (%ebx)
104
105 mov %eax, %ebx
106 lea 16(%edi), %esi
107 and $-16, %edi
108 add $16, %edi
109 jmp L(loop)
110
111 CFI_POP (%esi)
112 CFI_POP (%ebx)
113
114 .p2align 4
115 L(crosscashe):
116/* Hancle unaligned string. */
117 and $15, %ecx
118 and $-16, %edi
119 pxor %xmm3, %xmm3
120 movdqa (%edi), %xmm0
121 pcmpeqb %xmm0, %xmm3
122 pcmpeqb %xmm1, %xmm0
123 /* Find where NULL is. */
124 pmovmskb %xmm3, %edx
125 /* Check if there is a match. */
126 pmovmskb %xmm0, %eax
127 /* Remove the leading bytes. */
128 shr %cl, %edx
129 shr %cl, %eax
130
131 test %eax, %eax
132 jnz L(unaligned_match)
133
134 test %edx, %edx
135 jnz L(return_null)
136
137 add $16, %edi
138
139 PUSH (%esi)
140 PUSH (%ebx)
141
142 xor %ebx, %ebx
143 jmp L(loop)
144
145 CFI_POP (%esi)
146 CFI_POP (%ebx)
147
148 .p2align 4
149L(unaligned_return_value):
150 add %ecx, %edi
151 bsf %edx, %ecx
152 mov $2, %edx
153 shl %cl, %edx
154 sub $1, %edx
155 and %edx, %eax
156 jz L(return_null)
157 bsr %eax, %eax
158 add %edi, %eax
159 POP (%edi)
160 ret
161 CFI_PUSH (%edi)
162
163 .p2align 4
164L(unaligned_match):
165 test %edx, %edx
166 jnz L(unaligned_return_value)
167
168 PUSH (%esi)
169 PUSH (%ebx)
170
171 mov %eax, %ebx
172 add $16, %edi
173 lea (%edi, %ecx), %esi
174
175/* Loop start on aligned string. */
176 .p2align 4
177L(loop):
178 movdqa (%edi), %xmm0
179 pcmpeqb %xmm0, %xmm2
180 add $16, %edi
181 pcmpeqb %xmm1, %xmm0
182 pmovmskb %xmm2, %ecx
183 pmovmskb %xmm0, %eax
184 or %eax, %ecx
185 jnz L(matches)
186
187 movdqa (%edi), %xmm0
188 pcmpeqb %xmm0, %xmm2
189 add $16, %edi
190 pcmpeqb %xmm1, %xmm0
191 pmovmskb %xmm2, %ecx
192 pmovmskb %xmm0, %eax
193 or %eax, %ecx
194 jnz L(matches)
195
196 movdqa (%edi), %xmm0
197 pcmpeqb %xmm0, %xmm2
198 add $16, %edi
199 pcmpeqb %xmm1, %xmm0
200 pmovmskb %xmm2, %ecx
201 pmovmskb %xmm0, %eax
202 or %eax, %ecx
203 jnz L(matches)
204
205 movdqa (%edi), %xmm0
206 pcmpeqb %xmm0, %xmm2
207 add $16, %edi
208 pcmpeqb %xmm1, %xmm0
209 pmovmskb %xmm2, %ecx
210 pmovmskb %xmm0, %eax
211 or %eax, %ecx
212 jz L(loop)
213
214L(matches):
215 test %eax, %eax
216 jnz L(match)
217L(return_value):
218 test %ebx, %ebx
219 jz L(return_null_1)
220 bsr %ebx, %eax
221 add %esi, %eax
222
223 POP (%ebx)
224 POP (%esi)
225
226 sub $16, %eax
227 POP (%edi)
228 ret
229
230 CFI_PUSH (%edi)
231 CFI_PUSH (%ebx)
232 CFI_PUSH (%esi)
233
234 .p2align 4
235L(match):
236 pmovmskb %xmm2, %ecx
237 test %ecx, %ecx
238 jnz L(return_value_1)
239 mov %eax, %ebx
240 mov %edi, %esi
241 jmp L(loop)
242
243 .p2align 4
244L(return_value_1):
245 bsf %ecx, %ecx
246 mov $2, %edx
247 shl %cl, %edx
248 sub $1, %edx
249 and %edx, %eax
250 jz L(return_value)
251
252 POP (%ebx)
253 POP (%esi)
254
255 bsr %eax, %eax
256 add %edi, %eax
257 sub $16, %eax
258 POP (%edi)
259 ret
260
261 CFI_PUSH (%edi)
262/* Return NULL. */
263 .p2align 4
264L(return_null):
265 xor %eax, %eax
266 POP (%edi)
267 ret
268
269 CFI_PUSH (%edi)
270 CFI_PUSH (%ebx)
271 CFI_PUSH (%esi)
272/* Return NULL. */
273 .p2align 4
274L(return_null_1):
275 POP (%ebx)
276 POP (%esi)
277 POP (%edi)
278 xor %eax, %eax
279 ret
280
281END (__strrchr_sse2_bsf)
282#endif