]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/wcschr-sse2.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / wcschr-sse2.S
1 /* wcschr with SSE2, without using bsf instructions
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #ifndef NOT_IN_libc
21 # include <sysdep.h>
22
23 # define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
26
27 # define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
29 cfi_restore (REG)
30
31 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
32 # define POP(REG) popl REG; CFI_POP (REG)
33
34 # define PARMS 4
35 # define STR1 PARMS
36 # define STR2 STR1+4
37
38 atom_text_section
39 ENTRY (__wcschr_sse2)
40
41 mov STR1(%esp), %ecx
42 movd STR2(%esp), %xmm1
43
44 mov %ecx, %eax
45 punpckldq %xmm1, %xmm1
46 pxor %xmm2, %xmm2
47 punpckldq %xmm1, %xmm1
48
49 and $63, %eax
50 cmp $48, %eax
51 ja L(cross_cache)
52
53 movdqu (%ecx), %xmm0
54 pcmpeqd %xmm0, %xmm2
55 pcmpeqd %xmm1, %xmm0
56 pmovmskb %xmm2, %edx
57 pmovmskb %xmm0, %eax
58 or %eax, %edx
59 jnz L(matches)
60 and $-16, %ecx
61 jmp L(loop)
62
63 .p2align 4
64 L(cross_cache):
65 PUSH (%edi)
66 mov %ecx, %edi
67 mov %eax, %ecx
68 and $-16, %edi
69 and $15, %ecx
70 movdqa (%edi), %xmm0
71 pcmpeqd %xmm0, %xmm2
72 pcmpeqd %xmm1, %xmm0
73 pmovmskb %xmm2, %edx
74 pmovmskb %xmm0, %eax
75
76 sarl %cl, %edx
77 sarl %cl, %eax
78 test %eax, %eax
79 jz L(unaligned_no_match)
80
81 add %edi, %ecx
82 POP (%edi)
83
84 test %edx, %edx
85 jz L(match_case1)
86 test %al, %al
87 jz L(match_higth_case2)
88 test $15, %al
89 jnz L(match_case2_4)
90 test $15, %dl
91 jnz L(return_null)
92 lea 4(%ecx), %eax
93 ret
94
95 CFI_PUSH (%edi)
96
97 .p2align 4
98 L(unaligned_no_match):
99 mov %edi, %ecx
100 POP (%edi)
101
102 test %edx, %edx
103 jnz L(return_null)
104
105 pxor %xmm2, %xmm2
106
107 /* Loop start on aligned string. */
108 .p2align 4
109 L(loop):
110 add $16, %ecx
111 movdqa (%ecx), %xmm0
112 pcmpeqd %xmm0, %xmm2
113 pcmpeqd %xmm1, %xmm0
114 pmovmskb %xmm2, %edx
115 pmovmskb %xmm0, %eax
116 or %eax, %edx
117 jnz L(matches)
118 add $16, %ecx
119
120 movdqa (%ecx), %xmm0
121 pcmpeqd %xmm0, %xmm2
122 pcmpeqd %xmm1, %xmm0
123 pmovmskb %xmm2, %edx
124 pmovmskb %xmm0, %eax
125 or %eax, %edx
126 jnz L(matches)
127 add $16, %ecx
128
129 movdqa (%ecx), %xmm0
130 pcmpeqd %xmm0, %xmm2
131 pcmpeqd %xmm1, %xmm0
132 pmovmskb %xmm2, %edx
133 pmovmskb %xmm0, %eax
134 or %eax, %edx
135 jnz L(matches)
136 add $16, %ecx
137
138 movdqa (%ecx), %xmm0
139 pcmpeqd %xmm0, %xmm2
140 pcmpeqd %xmm1, %xmm0
141 pmovmskb %xmm2, %edx
142 pmovmskb %xmm0, %eax
143 or %eax, %edx
144 jz L(loop)
145
146 .p2align 4
147 L(matches):
148 pmovmskb %xmm2, %edx
149 test %eax, %eax
150 jz L(return_null)
151 test %edx, %edx
152 jz L(match_case1)
153
154 .p2align 4
155 L(match_case2):
156 test %al, %al
157 jz L(match_higth_case2)
158 test $15, %al
159 jnz L(match_case2_4)
160 test $15, %dl
161 jnz L(return_null)
162 lea 4(%ecx), %eax
163 ret
164
165 .p2align 4
166 L(match_case2_4):
167 mov %ecx, %eax
168 ret
169
170 .p2align 4
171 L(match_higth_case2):
172 test %dl, %dl
173 jnz L(return_null)
174 test $15, %ah
175 jnz L(match_case2_12)
176 test $15, %dh
177 jnz L(return_null)
178 lea 12(%ecx), %eax
179 ret
180
181 .p2align 4
182 L(match_case2_12):
183 lea 8(%ecx), %eax
184 ret
185
186 .p2align 4
187 L(match_case1):
188 test %al, %al
189 jz L(match_higth_case1)
190
191 test $0x01, %al
192 jnz L(exit0)
193 lea 4(%ecx), %eax
194 ret
195
196 .p2align 4
197 L(match_higth_case1):
198 test $0x01, %ah
199 jnz L(exit3)
200 lea 12(%ecx), %eax
201 ret
202
203 .p2align 4
204 L(exit0):
205 mov %ecx, %eax
206 ret
207
208 .p2align 4
209 L(exit3):
210 lea 8(%ecx), %eax
211 ret
212
213 .p2align 4
214 L(return_null):
215 xor %eax, %eax
216 ret
217
218 END (__wcschr_sse2)
219 #endif