]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/wcsrchr.S
Use <> for include of kernel-features.h.
[thirdparty/glibc.git] / sysdeps / x86_64 / wcsrchr.S
CommitLineData
1d3e4b61
UD
1/* wcsrchr with SSSE3
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#include <sysdep.h>
22
23 .text
24ENTRY (wcsrchr)
25
26 movd %rsi, %xmm1
27 mov %rdi, %rcx
28 punpckldq %xmm1, %xmm1
29 pxor %xmm2, %xmm2
30 punpckldq %xmm1, %xmm1
31 and $63, %rcx
32 cmp $48, %rcx
33 ja L(crosscache)
34
35 movdqu (%rdi), %xmm0
36 pcmpeqd %xmm0, %xmm2
37 pcmpeqd %xmm1, %xmm0
38 pmovmskb %xmm2, %rcx
39 pmovmskb %xmm0, %rax
40 add $16, %rdi
41
42 test %rax, %rax
43 jnz L(unaligned_match1)
44
45 test %rcx, %rcx
46 jnz L(return_null)
47
48 and $-16, %rdi
49 xor %r8, %r8
50 jmp L(loop)
51
52 .p2align 4
53L(unaligned_match1):
54 test %rcx, %rcx
55 jnz L(prolog_find_zero_1)
56
57 mov %rax, %r8
58 mov %rdi, %rsi
59 and $-16, %rdi
60 jmp L(loop)
61
62 .p2align 4
63L(crosscache):
64 and $15, %rcx
65 and $-16, %rdi
66 pxor %xmm3, %xmm3
67 movdqa (%rdi), %xmm0
68 pcmpeqd %xmm0, %xmm3
69 pcmpeqd %xmm1, %xmm0
70 pmovmskb %xmm3, %rdx
71 pmovmskb %xmm0, %rax
72 shr %cl, %rdx
73 shr %cl, %rax
74 add $16, %rdi
75
76 test %rax, %rax
77 jnz L(unaligned_match)
78
79 test %rdx, %rdx
80 jnz L(return_null)
81
82 xor %r8, %r8
83 jmp L(loop)
84
85 .p2align 4
86L(unaligned_match):
87 test %rdx, %rdx
88 jnz L(prolog_find_zero)
89
90 mov %rax, %r8
91 lea (%rdi, %rcx), %rsi
92
93/* Loop start on aligned string. */
94 .p2align 4
95L(loop):
96 movdqa (%rdi), %xmm0
97 pcmpeqd %xmm0, %xmm2
98 add $16, %rdi
99 pcmpeqd %xmm1, %xmm0
100 pmovmskb %xmm2, %rcx
101 pmovmskb %xmm0, %rax
102 or %rax, %rcx
103 jnz L(matches)
104
105 movdqa (%rdi), %xmm3
106 pcmpeqd %xmm3, %xmm2
107 add $16, %rdi
108 pcmpeqd %xmm1, %xmm3
109 pmovmskb %xmm2, %rcx
110 pmovmskb %xmm3, %rax
111 or %rax, %rcx
112 jnz L(matches)
113
114 movdqa (%rdi), %xmm4
115 pcmpeqd %xmm4, %xmm2
116 add $16, %rdi
117 pcmpeqd %xmm1, %xmm4
118 pmovmskb %xmm2, %rcx
119 pmovmskb %xmm4, %rax
120 or %rax, %rcx
121 jnz L(matches)
122
123 movdqa (%rdi), %xmm5
124 pcmpeqd %xmm5, %xmm2
125 add $16, %rdi
126 pcmpeqd %xmm1, %xmm5
127 pmovmskb %xmm2, %rcx
128 pmovmskb %xmm5, %rax
129 or %rax, %rcx
130 jz L(loop)
131
132 .p2align 4
133L(matches):
134 test %rax, %rax
135 jnz L(match)
136L(return_value):
137 test %r8, %r8
138 jz L(return_null)
139 mov %r8, %rax
140 mov %rsi, %rdi
141
142 test $15 << 4, %ah
143 jnz L(match_fourth_wchar)
144 test %ah, %ah
145 jnz L(match_third_wchar)
146 test $15 << 4, %al
147 jnz L(match_second_wchar)
148 lea -16(%rdi), %rax
149 ret
150
151 .p2align 4
152L(match):
153 pmovmskb %xmm2, %rcx
154 test %rcx, %rcx
155 jnz L(find_zero)
156 mov %rax, %r8
157 mov %rdi, %rsi
158 jmp L(loop)
159
160 .p2align 4
161L(find_zero):
162 test $15, %cl
163 jnz L(find_zero_in_first_wchar)
164 test %cl, %cl
165 jnz L(find_zero_in_second_wchar)
166 test $15, %ch
167 jnz L(find_zero_in_third_wchar)
168
169 and $1 << 13 - 1, %rax
170 jz L(return_value)
171
172 test $15 << 4, %ah
173 jnz L(match_fourth_wchar)
174 test %ah, %ah
175 jnz L(match_third_wchar)
176 test $15 << 4, %al
177 jnz L(match_second_wchar)
178 lea -16(%rdi), %rax
179 ret
180
181 .p2align 4
182L(find_zero_in_first_wchar):
183 test $1, %rax
184 jz L(return_value)
185 lea -16(%rdi), %rax
186 ret
187
188 .p2align 4
189L(find_zero_in_second_wchar):
190 and $1 << 5 - 1, %rax
191 jz L(return_value)
192
193 test $15 << 4, %al
194 jnz L(match_second_wchar)
195 lea -16(%rdi), %rax
196 ret
197
198 .p2align 4
199L(find_zero_in_third_wchar):
200 and $1 << 9 - 1, %rax
201 jz L(return_value)
202
203 test %ah, %ah
204 jnz L(match_third_wchar)
205 test $15 << 4, %al
206 jnz L(match_second_wchar)
207 lea -16(%rdi), %rax
208 ret
209
210 .p2align 4
211L(prolog_find_zero):
212 add %rcx, %rdi
213 mov %rdx, %rcx
214L(prolog_find_zero_1):
215 test $15, %cl
216 jnz L(prolog_find_zero_in_first_wchar)
217 test %cl, %cl
218 jnz L(prolog_find_zero_in_second_wchar)
219 test $15, %ch
220 jnz L(prolog_find_zero_in_third_wchar)
221
222 and $1 << 13 - 1, %rax
223 jz L(return_null)
224
225 test $15 << 4, %ah
226 jnz L(match_fourth_wchar)
227 test %ah, %ah
228 jnz L(match_third_wchar)
229 test $15 << 4, %al
230 jnz L(match_second_wchar)
231 lea -16(%rdi), %rax
232 ret
233
234 .p2align 4
235L(prolog_find_zero_in_first_wchar):
236 test $1, %rax
237 jz L(return_null)
238 lea -16(%rdi), %rax
239 ret
240
241 .p2align 4
242L(prolog_find_zero_in_second_wchar):
243 and $1 << 5 - 1, %rax
244 jz L(return_null)
245
246 test $15 << 4, %al
247 jnz L(match_second_wchar)
248 lea -16(%rdi), %rax
249 ret
250
251 .p2align 4
252L(prolog_find_zero_in_third_wchar):
253 and $1 << 9 - 1, %rax
254 jz L(return_null)
255
256 test %ah, %ah
257 jnz L(match_third_wchar)
258 test $15 << 4, %al
259 jnz L(match_second_wchar)
260 lea -16(%rdi), %rax
261 ret
262
263 .p2align 4
264L(match_second_wchar):
265 lea -12(%rdi), %rax
266 ret
267
268 .p2align 4
269L(match_third_wchar):
270 lea -8(%rdi), %rax
271 ret
272
273 .p2align 4
274L(match_fourth_wchar):
275 lea -4(%rdi), %rax
276 ret
277
278 .p2align 4
279L(return_null):
280 xor %rax, %rax
281 ret
282
283END (wcsrchr)