]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/wcslen-sse2.S
d0a0fbb3bfa4f55685a78777a17ded5174b2b555
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / wcslen-sse2.S
1 /* wcslen with SSE2
2 Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21 # include <sysdep.h>
22 # define STR 4
23
24 .text
25 ENTRY (__wcslen_sse2)
26 mov STR(%esp), %edx
27
28 cmp $0, (%edx)
29 jz L(exit_tail0)
30 cmp $0, 4(%edx)
31 jz L(exit_tail1)
32 cmp $0, 8(%edx)
33 jz L(exit_tail2)
34 cmp $0, 12(%edx)
35 jz L(exit_tail3)
36 cmp $0, 16(%edx)
37 jz L(exit_tail4)
38 cmp $0, 20(%edx)
39 jz L(exit_tail5)
40 cmp $0, 24(%edx)
41 jz L(exit_tail6)
42 cmp $0, 28(%edx)
43 jz L(exit_tail7)
44
45 pxor %xmm0, %xmm0
46
47 lea 32(%edx), %eax
48 lea 16(%edx), %ecx
49 and $-16, %eax
50
51 pcmpeqd (%eax), %xmm0
52 pmovmskb %xmm0, %edx
53 pxor %xmm1, %xmm1
54 test %edx, %edx
55 lea 16(%eax), %eax
56 jnz L(exit)
57
58 pcmpeqd (%eax), %xmm1
59 pmovmskb %xmm1, %edx
60 pxor %xmm2, %xmm2
61 test %edx, %edx
62 lea 16(%eax), %eax
63 jnz L(exit)
64
65 pcmpeqd (%eax), %xmm2
66 pmovmskb %xmm2, %edx
67 pxor %xmm3, %xmm3
68 test %edx, %edx
69 lea 16(%eax), %eax
70 jnz L(exit)
71
72 pcmpeqd (%eax), %xmm3
73 pmovmskb %xmm3, %edx
74 test %edx, %edx
75 lea 16(%eax), %eax
76 jnz L(exit)
77
78 and $-0x40, %eax
79
80 .p2align 4
81 L(aligned_64_loop):
82 movaps (%eax), %xmm0
83 movaps 16(%eax), %xmm1
84 movaps 32(%eax), %xmm2
85 movaps 48(%eax), %xmm6
86
87 pminub %xmm1, %xmm0
88 pminub %xmm6, %xmm2
89 pminub %xmm0, %xmm2
90 pcmpeqd %xmm3, %xmm2
91 pmovmskb %xmm2, %edx
92 test %edx, %edx
93 lea 64(%eax), %eax
94 jz L(aligned_64_loop)
95
96 pcmpeqd -64(%eax), %xmm3
97 pmovmskb %xmm3, %edx
98 test %edx, %edx
99 lea 48(%ecx), %ecx
100 jnz L(exit)
101
102 pcmpeqd %xmm1, %xmm3
103 pmovmskb %xmm3, %edx
104 test %edx, %edx
105 lea -16(%ecx), %ecx
106 jnz L(exit)
107
108 pcmpeqd -32(%eax), %xmm3
109 pmovmskb %xmm3, %edx
110 test %edx, %edx
111 lea -16(%ecx), %ecx
112 jnz L(exit)
113
114 pcmpeqd %xmm6, %xmm3
115 pmovmskb %xmm3, %edx
116 test %edx, %edx
117 lea -16(%ecx), %ecx
118 jnz L(exit)
119
120 jmp L(aligned_64_loop)
121
122 .p2align 4
123 L(exit):
124 sub %ecx, %eax
125 shr $2, %eax
126 test %dl, %dl
127 jz L(exit_high)
128
129 mov %dl, %cl
130 and $15, %cl
131 jz L(exit_1)
132 ret
133
134 .p2align 4
135 L(exit_high):
136 mov %dh, %ch
137 and $15, %ch
138 jz L(exit_3)
139 add $2, %eax
140 ret
141
142 .p2align 4
143 L(exit_1):
144 add $1, %eax
145 ret
146
147 .p2align 4
148 L(exit_3):
149 add $3, %eax
150 ret
151
152 .p2align 4
153 L(exit_tail0):
154 xor %eax, %eax
155 ret
156
157 .p2align 4
158 L(exit_tail1):
159 mov $1, %eax
160 ret
161
162 .p2align 4
163 L(exit_tail2):
164 mov $2, %eax
165 ret
166
167 .p2align 4
168 L(exit_tail3):
169 mov $3, %eax
170 ret
171
172 .p2align 4
173 L(exit_tail4):
174 mov $4, %eax
175 ret
176
177 .p2align 4
178 L(exit_tail5):
179 mov $5, %eax
180 ret
181
182 .p2align 4
183 L(exit_tail6):
184 mov $6, %eax
185 ret
186
187 .p2align 4
188 L(exit_tail7):
189 mov $7, %eax
190 ret
191
192 END (__wcslen_sse2)
193 #endif