]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc64/strcat.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / strcat.S
1 /* strcat (dest, src) -- Append SRC on the end of DEST.
2 For SPARC v9.
3 Copyright (C) 1998-2019 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6 Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 #define XCC xcc
26 #define USE_BPR
27 .register %g2, #scratch
28 .register %g3, #scratch
29 .register %g6, #scratch
30 #endif
31
32 /* Normally, this uses
33 ((xword - 0x0101010101010101) & 0x8080808080808080) test
34 to find out if any byte in xword could be zero. This is fast, but
35 also gives false alarm for any byte in range 0x81-0xff. It does
36 not matter for correctness, as if this test tells us there could
37 be some zero byte, we check it byte by byte, but if bytes with
38 high bits set are common in the strings, then this will give poor
39 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40 will use one tick slower, but more precise test
41 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42 which does not give any false alarms (but if some bits are set,
43 one cannot assume from it which bytes are zero and which are not).
44 It is yet to be measured, what is the correct default for glibc
45 in these days for an average user.
46 */
47
48 .text
49 .align 32
50 ENTRY(strcat)
51 sethi %hi(0x01010101), %g1 /* IEU0 Group */
52 ldub [%o0], %o3 /* Load */
53 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
54 mov %o0, %g6 /* IEU1 */
55
56 sllx %g1, 32, %g2 /* IEU0 Group */
57 andcc %o0, 7, %g0 /* IEU1 */
58 or %g1, %g2, %g1 /* IEU0 Group */
59 bne,pn %icc, 32f /* CTI */
60
61 sllx %g1, 7, %g2 /* IEU0 Group */
62 brz,pn %o3, 30f /* CTI+IEU1 */
63 ldx [%o0], %o3 /* Load */
64 48: add %o0, 8, %o0 /* IEU0 Group */
65
66 49: sub %o3, %g1, %o2 /* IEU0 Group */
67 #ifdef EIGHTBIT_NOT_RARE
68 andn %o2, %o3, %g5 /* IEU0 Group */
69 ldxa [%o0] ASI_PNF, %o3 /* Load */
70 andcc %g5, %g2, %g0 /* IEU1 Group */
71 #else
72 ldxa [%o0] ASI_PNF, %o3 /* Load */
73 andcc %o2, %g2, %g0 /* IEU1 Group */
74 #endif
75 be,pt %xcc, 49b /* CTI */
76
77 add %o0, 8, %o0 /* IEU0 */
78 addcc %o2, %g1, %g3 /* IEU1 Group */
79 srlx %o2, 32, %o2 /* IEU0 */
80 50: andcc %o2, %g2, %g0 /* IEU1 Group */
81
82 be,pn %xcc, 51f /* CTI */
83 srlx %g3, 56, %o2 /* IEU0 */
84 andcc %o2, 0xff, %g0 /* IEU1 Group */
85 be,pn %icc, 29f /* CTI */
86
87 srlx %g3, 48, %o2 /* IEU0 */
88 andcc %o2, 0xff, %g0 /* IEU1 Group */
89 be,pn %icc, 28f /* CTI */
90 srlx %g3, 40, %o2 /* IEU0 */
91
92 andcc %o2, 0xff, %g0 /* IEU1 Group */
93 be,pn %icc, 27f /* CTI */
94 srlx %g3, 32, %o2 /* IEU0 */
95 andcc %o2, 0xff, %g0 /* IEU1 Group */
96
97 be,pn %icc, 26f /* CTI */
98 51: srlx %g3, 24, %o2 /* IEU0 */
99 andcc %o2, 0xff, %g0 /* IEU1 Group */
100 be,pn %icc, 25f /* CTI */
101
102 srlx %g3, 16, %o2 /* IEU0 */
103 andcc %o2, 0xff, %g0 /* IEU1 Group */
104 be,pn %icc, 24f /* CTI */
105 srlx %g3, 8, %o2 /* IEU0 */
106
107 andcc %o2, 0xff, %g0 /* IEU1 Group */
108 be,pn %icc, 23f /* CTI */
109 sub %o3, %g1, %o2 /* IEU0 */
110 andcc %g3, 0xff, %g0 /* IEU1 Group */
111
112 be,pn %icc, 52f /* CTI */
113 ldxa [%o0] ASI_PNF, %o3 /* Load */
114 andcc %o2, %g2, %g0 /* IEU1 Group */
115 be,pt %xcc, 49b /* CTI */
116
117 add %o0, 8, %o0 /* IEU0 */
118 addcc %o2, %g1, %g3 /* IEU1 Group */
119 ba,pt %xcc, 50b /* CTI */
120 srlx %o2, 32, %o2 /* IEU0 */
121
122 .align 16
123 52: ba,pt %xcc, 12f /* CTI Group */
124 add %o0, -9, %o0 /* IEU0 */
125 23: ba,pt %xcc, 12f /* CTI Group */
126 add %o0, -10, %o0 /* IEU0 */
127
128 24: ba,pt %xcc, 12f /* CTI Group */
129 add %o0, -11, %o0 /* IEU0 */
130 25: ba,pt %xcc, 12f /* CTI Group */
131 add %o0, -12, %o0 /* IEU0 */
132
133 26: ba,pt %xcc, 12f /* CTI Group */
134 add %o0, -13, %o0 /* IEU0 */
135 27: ba,pt %xcc, 12f /* CTI Group */
136 add %o0, -14, %o0 /* IEU0 */
137
138 28: ba,pt %xcc, 12f /* CTI Group */
139 add %o0, -15, %o0 /* IEU0 */
140 29: add %o0, -16, %o0 /* IEU0 Group */
141 30: andcc %o1, 7, %g3 /* IEU1 */
142
143 31: bne,pn %icc, 14f /* CTI */
144 orcc %g0, 64, %g4 /* IEU1 Group */
145 1: ldx [%o1], %o3 /* Load */
146 add %o1, 8, %o1 /* IEU1 */
147
148 2: mov %o3, %g3 /* IEU0 Group */
149 3: sub %o3, %g1, %o2 /* IEU1 */
150 ldxa [%o1] ASI_PNF, %o3 /* Load */
151 #ifdef EIGHTBIT_NOT_RARE
152 andn %o2, %g3, %o2 /* IEU0 Group */
153 #endif
154 add %o0, 8, %o0 /* IEU0 Group */
155
156 andcc %o2, %g2, %g0 /* IEU1 */
157 add %o1, 8, %o1 /* IEU0 Group */
158 be,a,pt %xcc, 2b /* CTI */
159 stx %g3, [%o0 - 8] /* Store */
160
161 srlx %g3, 56, %g5 /* IEU0 Group */
162 andcc %g5, 0xff, %g0 /* IEU1 Group */
163 be,pn %icc, 11f /* CTI */
164 srlx %g3, 48, %g4 /* IEU0 */
165
166 andcc %g4, 0xff, %g0 /* IEU1 Group */
167 be,pn %icc, 10f /* CTI */
168 srlx %g3, 40, %g5 /* IEU0 */
169 andcc %g5, 0xff, %g0 /* IEU1 Group */
170
171 be,pn %icc, 9f /* CTI */
172 srlx %g3, 32, %g4 /* IEU0 */
173 andcc %g4, 0xff, %g0 /* IEU1 Group */
174 be,pn %icc, 8f /* CTI */
175
176 srlx %g3, 24, %g5 /* IEU0 */
177 andcc %g5, 0xff, %g0 /* IEU1 Group */
178 be,pn %icc, 7f /* CTI */
179 srlx %g3, 16, %g4 /* IEU0 */
180
181 andcc %g4, 0xff, %g0 /* IEU1 Group */
182 be,pn %icc, 6f /* CTI */
183 srlx %g3, 8, %g5 /* IEU0 */
184 andcc %g5, 0xff, %g0 /* IEU1 Group */
185
186 be,pn %icc, 5f /* CTI */
187 sub %o3, %g1, %o2 /* IEU0 */
188 stx %g3, [%o0 - 8] /* Store Group */
189 andcc %g3, 0xff, %g0 /* IEU1 */
190
191 bne,pt %icc, 3b /* CTI */
192 mov %o3, %g3 /* IEU0 Group */
193 4: retl /* CTI+IEU1 Group */
194 mov %g6, %o0 /* IEU0 */
195
196 .align 16
197 5: stb %g5, [%o0 - 2] /* Store Group */
198 srlx %g3, 16, %g4 /* IEU0 */
199 6: sth %g4, [%o0 - 4] /* Store Group */
200 srlx %g3, 32, %g4 /* IEU0 */
201
202 stw %g4, [%o0 - 8] /* Store Group */
203 retl /* CTI+IEU1 Group */
204 mov %g6, %o0 /* IEU0 */
205 7: stb %g5, [%o0 - 4] /* Store Group */
206
207 srlx %g3, 32, %g4 /* IEU0 */
208 8: stw %g4, [%o0 - 8] /* Store Group */
209 retl /* CTI+IEU1 Group */
210 mov %g6, %o0 /* IEU0 */
211
212 9: stb %g5, [%o0 - 6] /* Store Group */
213 srlx %g3, 48, %g4 /* IEU0 */
214 10: sth %g4, [%o0 - 8] /* Store Group */
215 retl /* CTI+IEU1 Group */
216
217 mov %g6, %o0 /* IEU0 */
218 11: stb %g5, [%o0 - 8] /* Store Group */
219 retl /* CTI+IEU1 Group */
220 mov %g6, %o0 /* IEU0 */
221
222 .align 16
223 32: andcc %o0, 7, %g0 /* IEU1 Group */
224 be,a,pn %icc, 48b /* CTI */
225 ldx [%o0], %o3 /* Load */
226 add %o0, 1, %o0 /* IEU0 Group */
227
228 brnz,a,pt %o3, 32b /* CTI+IEU1 */
229 lduba [%o0] ASI_PNF, %o3 /* Load */
230 add %o0, -1, %o0 /* IEU0 Group */
231 andcc %o0, 7, %g0 /* IEU1 Group */
232
233 be,a,pn %icc, 31b /* CTI */
234 andcc %o1, 7, %g3 /* IEU1 Group */
235 12: ldub [%o1], %o3 /* Load */
236 stb %o3, [%o0] /* Store Group */
237
238 13: add %o0, 1, %o0 /* IEU0 */
239 add %o1, 1, %o1 /* IEU1 */
240 andcc %o3, 0xff, %g0 /* IEU1 Group */
241 be,pn %icc, 4b /* CTI */
242
243 lduba [%o1] ASI_PNF, %o3 /* Load */
244 andcc %o0, 7, %g0 /* IEU1 Group */
245 bne,a,pt %icc, 13b /* CTI */
246 stb %o3, [%o0] /* Store */
247
248 andcc %o1, 7, %g3 /* IEU1 Group */
249 be,a,pt %icc, 1b /* CTI */
250 ldx [%o1], %o3 /* Load */
251 orcc %g0, 64, %g4 /* IEU1 Group */
252
253 14: sllx %g3, 3, %g5 /* IEU0 */
254 sub %o1, %g3, %o1 /* IEU0 Group */
255 sub %g4, %g5, %g4 /* IEU1 */
256 /* %g1 = 0101010101010101 *
257 * %g2 = 8080808080808080 *
258 * %g3 = source alignment *
259 * %g5 = number of bits to shift left *
260 * %g4 = number of bits to shift right */
261 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
262
263 addcc %o1, 8, %o1 /* IEU1 */
264 15: sllx %o5, %g5, %o3 /* IEU0 Group */
265 ldxa [%o1] ASI_PNF, %o5 /* Load */
266 srlx %o5, %g4, %o4 /* IEU0 Group */
267
268 add %o0, 8, %o0 /* IEU1 */
269 or %o3, %o4, %o3 /* IEU0 Group */
270 add %o1, 8, %o1 /* IEU1 */
271 sub %o3, %g1, %o4 /* IEU0 Group */
272
273 #ifdef EIGHTBIT_NOT_RARE
274 andn %o4, %o3, %o4 /* IEU0 Group */
275 #endif
276 andcc %o4, %g2, %g0 /* IEU1 Group */
277 be,a,pt %xcc, 15b /* CTI */
278 stx %o3, [%o0 - 8] /* Store */
279 srlx %o3, 56, %o4 /* IEU0 Group */
280
281 andcc %o4, 0xff, %g0 /* IEU1 Group */
282 be,pn %icc, 22f /* CTI */
283 srlx %o3, 48, %o4 /* IEU0 */
284 andcc %o4, 0xff, %g0 /* IEU1 Group */
285
286 be,pn %icc, 21f /* CTI */
287 srlx %o3, 40, %o4 /* IEU0 */
288 andcc %o4, 0xff, %g0 /* IEU1 Group */
289 be,pn %icc, 20f /* CTI */
290
291 srlx %o3, 32, %o4 /* IEU0 */
292 andcc %o4, 0xff, %g0 /* IEU1 Group */
293 be,pn %icc, 19f /* CTI */
294 srlx %o3, 24, %o4 /* IEU0 */
295
296 andcc %o4, 0xff, %g0 /* IEU1 Group */
297 be,pn %icc, 18f /* CTI */
298 srlx %o3, 16, %o4 /* IEU0 */
299 andcc %o4, 0xff, %g0 /* IEU1 Group */
300
301 be,pn %icc, 17f /* CTI */
302 srlx %o3, 8, %o4 /* IEU0 */
303 andcc %o4, 0xff, %g0 /* IEU1 Group */
304 be,pn %icc, 16f /* CTI */
305
306 andcc %o3, 0xff, %g0 /* IEU1 Group */
307 bne,pn %icc, 15b /* CTI */
308 stx %o3, [%o0 - 8] /* Store */
309 retl /* CTI+IEU1 Group */
310
311 mov %g6, %o0 /* IEU0 */
312
313 .align 16
314 16: srlx %o3, 8, %o4 /* IEU0 Group */
315 stb %o4, [%o0 - 2] /* Store */
316 17: srlx %o3, 16, %o4 /* IEU0 Group */
317 stb %o4, [%o0 - 3] /* Store */
318
319 18: srlx %o3, 24, %o4 /* IEU0 Group */
320 stb %o4, [%o0 - 4] /* Store */
321 19: srlx %o3, 32, %o4 /* IEU0 Group */
322 stw %o4, [%o0 - 8] /* Store */
323
324 retl /* CTI+IEU1 Group */
325 mov %g6, %o0 /* IEU0 */
326 nop
327 nop
328
329 20: srlx %o3, 40, %o4 /* IEU0 Group */
330 stb %o4, [%o0 - 6] /* Store */
331 21: srlx %o3, 48, %o4 /* IEU0 Group */
332 stb %o4, [%o0 - 7] /* Store */
333
334 22: srlx %o3, 56, %o4 /* IEU0 Group */
335 stb %o4, [%o0 - 8] /* Store */
336 retl /* CTI+IEU1 Group */
337 mov %g6, %o0 /* IEU0 */
338 END(strcat)
339 libc_hidden_builtin_def (strcat)