]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc64/stpcpy.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / stpcpy.S
1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
2 For SPARC v9.
3 Copyright (C) 1998-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
28 #endif
29
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
44 */
45
46 .text
47 .align 32
48 ENTRY(__stpcpy)
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
51 andcc %o0, 7, %g0 /* IEU1 */
52 sllx %g1, 32, %g2 /* IEU0 Group */
53
54 bne,pn %icc, 12f /* CTI */
55 andcc %o1, 7, %g3 /* IEU1 */
56 or %g1, %g2, %g1 /* IEU0 Group */
57 bne,pn %icc, 14f /* CTI */
58
59 sllx %g1, 7, %g2 /* IEU0 Group */
60 1: ldx [%o1], %o3 /* Load */
61 add %o1, 8, %o1 /* IEU1 */
62 2: mov %o3, %g3 /* IEU0 Group */
63
64 sub %o3, %g1, %o2 /* IEU1 */
65 3: ldxa [%o1] ASI_PNF, %o3 /* Load */
66 #ifdef EIGHTBIT_NOT_RARE
67 andn %o2, %g3, %o2 /* IEU0 Group */
68 #endif
69 add %o0, 8, %o0 /* IEU0 Group */
70 andcc %o2, %g2, %g0 /* IEU1 */
71
72 add %o1, 8, %o1 /* IEU0 Group */
73 be,a,pt %xcc, 2b /* CTI */
74 stx %g3, [%o0 - 8] /* Store */
75 srlx %g3, 56, %g5 /* IEU0 Group */
76
77 andcc %g5, 0xff, %g0 /* IEU1 Group */
78 be,pn %icc, 11f /* CTI */
79 srlx %g3, 48, %g4 /* IEU0 */
80 andcc %g4, 0xff, %g0 /* IEU1 Group */
81
82 be,pn %icc, 10f /* CTI */
83 srlx %g3, 40, %g5 /* IEU0 */
84 andcc %g5, 0xff, %g0 /* IEU1 Group */
85 be,pn %icc, 9f /* CTI */
86
87 srlx %g3, 32, %g4 /* IEU0 */
88 andcc %g4, 0xff, %g0 /* IEU1 Group */
89 be,pn %icc, 8f /* CTI */
90 srlx %g3, 24, %g5 /* IEU0 */
91
92 andcc %g5, 0xff, %g0 /* IEU1 Group */
93 be,pn %icc, 7f /* CTI */
94 srlx %g3, 16, %g4 /* IEU0 */
95 andcc %g4, 0xff, %g0 /* IEU1 Group */
96
97 be,pn %icc, 6f /* CTI */
98 srlx %g3, 8, %g5 /* IEU0 */
99 andcc %g5, 0xff, %g0 /* IEU1 Group */
100 be,pn %icc, 5f /* CTI */
101
102 sub %o3, %g1, %o2 /* IEU0 */
103 stx %g3, [%o0 - 8] /* Store Group */
104 andcc %g3, 0xff, %g0 /* IEU1 */
105 bne,pt %icc, 3b /* CTI */
106
107 mov %o3, %g3 /* IEU0 Group */
108 4: retl /* CTI+IEU1 Group */
109 sub %o0, 1, %o0 /* IEU0 */
110
111 .align 16
112 6: ba,pt %xcc, 23f /* CTI Group */
113 sub %o0, 3, %g6 /* IEU0 */
114 5: sub %o0, 2, %g6 /* IEU0 Group */
115 stb %g5, [%o0 - 2] /* Store */
116
117 srlx %g3, 16, %g4 /* IEU0 Group */
118 23: sth %g4, [%o0 - 4] /* Store */
119 srlx %g3, 32, %g4 /* IEU0 Group */
120 stw %g4, [%o0 - 8] /* Store */
121
122 retl /* CTI+IEU1 Group */
123 mov %g6, %o0 /* IEU0 */
124 8: ba,pt %xcc, 24f /* CTI Group */
125 sub %o0, 5, %g6 /* IEU0 */
126
127 7: sub %o0, 4, %g6 /* IEU0 Group */
128 stb %g5, [%o0 - 4] /* Store */
129 srlx %g3, 32, %g4 /* IEU0 Group */
130 24: stw %g4, [%o0 - 8] /* Store */
131
132 retl /* CTI+IEU1 Group */
133 mov %g6, %o0 /* IEU0 */
134 10: ba,pt %xcc, 25f /* CTI Group */
135 sub %o0, 7, %g6 /* IEU0 */
136
137 9: sub %o0, 6, %g6 /* IEU0 Group */
138 stb %g5, [%o0 - 6] /* Store */
139 srlx %g3, 48, %g4 /* IEU0 */
140 25: sth %g4, [%o0 - 8] /* Store Group */
141
142 retl /* CTI+IEU1 Group */
143 mov %g6, %o0 /* IEU0 */
144 11: stb %g5, [%o0 - 8] /* Store Group */
145 retl /* CTI+IEU1 Group */
146
147 sub %o0, 8, %o0 /* IEU0 */
148
149 .align 16
150 12: or %g1, %g2, %g1 /* IEU0 Group */
151 ldub [%o1], %o3 /* Load */
152 sllx %g1, 7, %g2 /* IEU0 Group */
153 stb %o3, [%o0] /* Store Group */
154
155 13: add %o0, 1, %o0 /* IEU0 */
156 add %o1, 1, %o1 /* IEU1 */
157 andcc %o3, 0xff, %g0 /* IEU1 Group */
158 be,pn %icc, 4b /* CTI */
159
160 lduba [%o1] ASI_PNF, %o3 /* Load */
161 andcc %o0, 7, %g0 /* IEU1 Group */
162 bne,a,pt %icc, 13b /* CTI */
163 stb %o3, [%o0] /* Store */
164
165 andcc %o1, 7, %g3 /* IEU1 Group */
166 be,a,pt %icc, 1b /* CTI */
167 ldx [%o1], %o3 /* Load */
168 14: orcc %g0, 64, %g4 /* IEU1 Group */
169
170 sllx %g3, 3, %g5 /* IEU0 */
171 sub %o1, %g3, %o1 /* IEU0 Group */
172 sub %g4, %g5, %g4 /* IEU1 */
173 /* %g1 = 0101010101010101 *
174 * %g2 = 8080808080808080 *
175 * %g3 = source alignment *
176 * %g5 = number of bits to shift left *
177 * %g4 = number of bits to shift right */
178 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
179
180 addcc %o1, 8, %o1 /* IEU1 */
181 15: sllx %o5, %g5, %o3 /* IEU0 Group */
182 ldxa [%o1] ASI_PNF, %o5 /* Load */
183 srlx %o5, %g4, %o4 /* IEU0 Group */
184
185 add %o0, 8, %o0 /* IEU1 */
186 or %o3, %o4, %o3 /* IEU0 Group */
187 add %o1, 8, %o1 /* IEU1 */
188 sub %o3, %g1, %o4 /* IEU0 Group */
189
190 #ifdef EIGHTBIT_NOT_RARE
191 andn %o4, %o3, %o4 /* IEU0 Group */
192 #endif
193 andcc %o4, %g2, %g0 /* IEU1 Group */
194 be,a,pt %xcc, 15b /* CTI */
195 stx %o3, [%o0 - 8] /* Store */
196 srlx %o3, 56, %o4 /* IEU0 Group */
197
198 andcc %o4, 0xff, %g0 /* IEU1 Group */
199 be,pn %icc, 22f /* CTI */
200 srlx %o3, 48, %o4 /* IEU0 */
201 andcc %o4, 0xff, %g0 /* IEU1 Group */
202
203 be,pn %icc, 21f /* CTI */
204 srlx %o3, 40, %o4 /* IEU0 */
205 andcc %o4, 0xff, %g0 /* IEU1 Group */
206 be,pn %icc, 20f /* CTI */
207
208 srlx %o3, 32, %o4 /* IEU0 */
209 andcc %o4, 0xff, %g0 /* IEU1 Group */
210 be,pn %icc, 19f /* CTI */
211 srlx %o3, 24, %o4 /* IEU0 */
212
213 andcc %o4, 0xff, %g0 /* IEU1 Group */
214 be,pn %icc, 18f /* CTI */
215 srlx %o3, 16, %o4 /* IEU0 */
216 andcc %o4, 0xff, %g0 /* IEU1 Group */
217
218 be,pn %icc, 17f /* CTI */
219 srlx %o3, 8, %o4 /* IEU0 */
220 andcc %o4, 0xff, %g0 /* IEU1 Group */
221 be,pn %icc, 16f /* CTI */
222
223 andcc %o3, 0xff, %g0 /* IEU1 Group */
224 bne,pn %icc, 15b /* CTI */
225 stx %o3, [%o0 - 8] /* Store */
226 retl /* CTI+IEU1 Group */
227
228 sub %o0, 1, %o0 /* IEU0 */
229
230 .align 16
231 17: ba,pt %xcc, 26f /* CTI Group */
232 subcc %o0, 3, %g6 /* IEU1 */
233 18: ba,pt %xcc, 27f /* CTI Group */
234 subcc %o0, 4, %g6 /* IEU1 */
235
236 19: ba,pt %xcc, 28f /* CTI Group */
237 subcc %o0, 5, %g6 /* IEU1 */
238 16: subcc %o0, 2, %g6 /* IEU1 Group */
239 srlx %o3, 8, %o4 /* IEU0 */
240
241 stb %o4, [%o0 - 2] /* Store */
242 26: srlx %o3, 16, %o4 /* IEU0 Group */
243 stb %o4, [%o0 - 3] /* Store */
244 27: srlx %o3, 24, %o4 /* IEU0 Group */
245
246 stb %o4, [%o0 - 4] /* Store */
247 28: srlx %o3, 32, %o4 /* IEU0 Group */
248 stw %o4, [%o0 - 8] /* Store */
249 retl /* CTI+IEU1 Group */
250
251 mov %g6, %o0 /* IEU0 */
252
253 .align 16
254 21: ba,pt %xcc, 29f /* CTI Group */
255 subcc %o0, 7, %g6 /* IEU1 */
256 22: ba,pt %xcc, 30f /* CTI Group */
257 subcc %o0, 8, %g6 /* IEU1 */
258
259 20: subcc %o0, 6, %g6 /* IEU1 Group */
260 srlx %o3, 40, %o4 /* IEU0 */
261 stb %o4, [%o0 - 6] /* Store */
262 29: srlx %o3, 48, %o4 /* IEU0 Group */
263
264 stb %o4, [%o0 - 7] /* Store */
265 30: srlx %o3, 56, %o4 /* IEU0 Group */
266 stb %o4, [%o0 - 8] /* Store */
267 retl /* CTI+IEU1 Group */
268
269 mov %g6, %o0 /* IEU0 */
270 END(__stpcpy)
271
272 weak_alias (__stpcpy, stpcpy)
273 libc_hidden_def (__stpcpy)
274 libc_hidden_builtin_def (stpcpy)