]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
Remove "Contributed by" lines
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strrchr-sse2-bsf.S
CommitLineData
693fb948 1/* strrchr with SSE2 with bsf and bsr
2b778ceb 2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
693fb948
LD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
693fb948 18
4f41c682 19#if IS_IN (libc)
693fb948
LD
20
21# include <sysdep.h>
22
23# define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
26
27# define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
29 cfi_restore (REG)
30
31# define PUSH(REG) pushl REG; CFI_PUSH (REG)
32# define POP(REG) popl REG; CFI_POP (REG)
33
34# define PARMS 4
35# define STR1 PARMS
36# define STR2 STR1+4
37
48882a1a 38 .text
693fb948
LD
39ENTRY (__strrchr_sse2_bsf)
40
41 mov STR1(%esp), %ecx
42 movd STR2(%esp), %xmm1
43
44 PUSH (%edi)
45 pxor %xmm2, %xmm2
46 mov %ecx, %edi
47 punpcklbw %xmm1, %xmm1
48 punpcklbw %xmm1, %xmm1
49 /* ECX has OFFSET. */
50 and $63, %ecx
51 cmp $48, %ecx
52 pshufd $0, %xmm1, %xmm1
53 ja L(crosscashe)
54
55/* unaligned string. */
56 movdqu (%edi), %xmm0
57 pcmpeqb %xmm0, %xmm2
58 pcmpeqb %xmm1, %xmm0
59 /* Find where NULL is. */
60 pmovmskb %xmm2, %edx
61 /* Check if there is a match. */
62 pmovmskb %xmm0, %eax
63
64 test %eax, %eax
65 jnz L(unaligned_match1)
66
67 test %edx, %edx
68 jnz L(return_null)
69
70 and $-16, %edi
71 add $16, %edi
72
73 PUSH (%esi)
74 PUSH (%ebx)
75
76 xor %ebx, %ebx
77 jmp L(loop)
78
79 CFI_POP (%esi)
80 CFI_POP (%ebx)
81
82 .p2align 4
83L(unaligned_return_value1):
84 bsf %edx, %ecx
85 mov $2, %edx
86 shl %cl, %edx
87 sub $1, %edx
88 and %edx, %eax
89 jz L(return_null)
90 bsr %eax, %eax
91 add %edi, %eax
92 POP (%edi)
93 ret
94 CFI_PUSH (%edi)
95
96 .p2align 4
97L(unaligned_match1):
98 test %edx, %edx
99 jnz L(unaligned_return_value1)
100
101 PUSH (%esi)
102 PUSH (%ebx)
103
104 mov %eax, %ebx
105 lea 16(%edi), %esi
106 and $-16, %edi
107 add $16, %edi
108 jmp L(loop)
109
110 CFI_POP (%esi)
111 CFI_POP (%ebx)
112
113 .p2align 4
114 L(crosscashe):
115/* Hancle unaligned string. */
116 and $15, %ecx
117 and $-16, %edi
118 pxor %xmm3, %xmm3
119 movdqa (%edi), %xmm0
120 pcmpeqb %xmm0, %xmm3
121 pcmpeqb %xmm1, %xmm0
122 /* Find where NULL is. */
123 pmovmskb %xmm3, %edx
124 /* Check if there is a match. */
125 pmovmskb %xmm0, %eax
126 /* Remove the leading bytes. */
127 shr %cl, %edx
128 shr %cl, %eax
129
130 test %eax, %eax
131 jnz L(unaligned_match)
132
133 test %edx, %edx
134 jnz L(return_null)
135
136 add $16, %edi
137
138 PUSH (%esi)
139 PUSH (%ebx)
140
141 xor %ebx, %ebx
142 jmp L(loop)
143
144 CFI_POP (%esi)
145 CFI_POP (%ebx)
146
147 .p2align 4
148L(unaligned_return_value):
149 add %ecx, %edi
150 bsf %edx, %ecx
151 mov $2, %edx
152 shl %cl, %edx
153 sub $1, %edx
154 and %edx, %eax
155 jz L(return_null)
156 bsr %eax, %eax
157 add %edi, %eax
158 POP (%edi)
159 ret
160 CFI_PUSH (%edi)
161
162 .p2align 4
163L(unaligned_match):
164 test %edx, %edx
165 jnz L(unaligned_return_value)
166
167 PUSH (%esi)
168 PUSH (%ebx)
169
170 mov %eax, %ebx
171 add $16, %edi
172 lea (%edi, %ecx), %esi
173
174/* Loop start on aligned string. */
175 .p2align 4
176L(loop):
177 movdqa (%edi), %xmm0
178 pcmpeqb %xmm0, %xmm2
179 add $16, %edi
180 pcmpeqb %xmm1, %xmm0
181 pmovmskb %xmm2, %ecx
182 pmovmskb %xmm0, %eax
183 or %eax, %ecx
184 jnz L(matches)
185
186 movdqa (%edi), %xmm0
187 pcmpeqb %xmm0, %xmm2
188 add $16, %edi
189 pcmpeqb %xmm1, %xmm0
190 pmovmskb %xmm2, %ecx
191 pmovmskb %xmm0, %eax
192 or %eax, %ecx
193 jnz L(matches)
194
195 movdqa (%edi), %xmm0
196 pcmpeqb %xmm0, %xmm2
197 add $16, %edi
198 pcmpeqb %xmm1, %xmm0
199 pmovmskb %xmm2, %ecx
200 pmovmskb %xmm0, %eax
201 or %eax, %ecx
202 jnz L(matches)
203
204 movdqa (%edi), %xmm0
205 pcmpeqb %xmm0, %xmm2
206 add $16, %edi
207 pcmpeqb %xmm1, %xmm0
208 pmovmskb %xmm2, %ecx
209 pmovmskb %xmm0, %eax
210 or %eax, %ecx
211 jz L(loop)
212
213L(matches):
214 test %eax, %eax
215 jnz L(match)
216L(return_value):
217 test %ebx, %ebx
218 jz L(return_null_1)
219 bsr %ebx, %eax
220 add %esi, %eax
221
222 POP (%ebx)
223 POP (%esi)
224
225 sub $16, %eax
226 POP (%edi)
227 ret
228
229 CFI_PUSH (%edi)
230 CFI_PUSH (%ebx)
231 CFI_PUSH (%esi)
232
233 .p2align 4
234L(match):
235 pmovmskb %xmm2, %ecx
236 test %ecx, %ecx
237 jnz L(return_value_1)
238 mov %eax, %ebx
239 mov %edi, %esi
240 jmp L(loop)
241
242 .p2align 4
243L(return_value_1):
244 bsf %ecx, %ecx
245 mov $2, %edx
246 shl %cl, %edx
247 sub $1, %edx
248 and %edx, %eax
249 jz L(return_value)
250
251 POP (%ebx)
252 POP (%esi)
253
254 bsr %eax, %eax
255 add %edi, %eax
256 sub $16, %eax
257 POP (%edi)
258 ret
259
260 CFI_PUSH (%edi)
261/* Return NULL. */
262 .p2align 4
263L(return_null):
264 xor %eax, %eax
265 POP (%edi)
266 ret
267
268 CFI_PUSH (%edi)
269 CFI_PUSH (%ebx)
270 CFI_PUSH (%esi)
271/* Return NULL. */
272 .p2align 4
273L(return_null_1):
274 POP (%ebx)
275 POP (%esi)
276 POP (%edi)
277 xor %eax, %eax
278 ret
279
280END (__strrchr_sse2_bsf)
281#endif