]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/sparc/sparc64/strcat.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / strcat.S
CommitLineData
ae6b8730
RH
1/* strcat (dest, src) -- Append SRC on the end of DEST.
2 For SPARC v9.
f7a9f785 3 Copyright (C) 1998-2016 Free Software Foundation, Inc.
ae6b8730
RH
4 This file is part of the GNU C Library.
5 Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6 Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
ae6b8730
RH
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 16 Lesser General Public License for more details.
ae6b8730 17
41bdb6e2 18 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
ae6b8730
RH
21
22#include <sysdep.h>
23#include <asm/asi.h>
24#ifndef XCC
25#define XCC xcc
26#define USE_BPR
8cb079d4
UD
27 .register %g2, #scratch
28 .register %g3, #scratch
62f29da7 29 .register %g6, #scratch
ae6b8730
RH
30#endif
31
32 /* Normally, this uses
33 ((xword - 0x0101010101010101) & 0x8080808080808080) test
34 to find out if any byte in xword could be zero. This is fast, but
35 also gives false alarm for any byte in range 0x81-0xff. It does
36 not matter for correctness, as if this test tells us there could
37 be some zero byte, we check it byte by byte, but if bytes with
38 high bits set are common in the strings, then this will give poor
39 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40 will use one tick slower, but more precise test
41 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42 which does not give any false alarms (but if some bits are set,
43 one cannot assume from it which bytes are zero and which are not).
44 It is yet to be measured, what is the correct default for glibc
45 in these days for an average user.
46 */
47
48 .text
49 .align 32
50ENTRY(strcat)
51 sethi %hi(0x01010101), %g1 /* IEU0 Group */
52 ldub [%o0], %o3 /* Load */
53 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
62f29da7 54 mov %o0, %g6 /* IEU1 */
ae6b8730
RH
55
56 sllx %g1, 32, %g2 /* IEU0 Group */
57 andcc %o0, 7, %g0 /* IEU1 */
58 or %g1, %g2, %g1 /* IEU0 Group */
59 bne,pn %icc, 32f /* CTI */
60
61 sllx %g1, 7, %g2 /* IEU0 Group */
62 brz,pn %o3, 30f /* CTI+IEU1 */
63 ldx [%o0], %o3 /* Load */
6448: add %o0, 8, %o0 /* IEU0 Group */
65
6649: sub %o3, %g1, %o2 /* IEU0 Group */
67#ifdef EIGHTBIT_NOT_RARE
68 andn %o2, %o3, %g5 /* IEU0 Group */
69 ldxa [%o0] ASI_PNF, %o3 /* Load */
70 andcc %g5, %g2, %g0 /* IEU1 Group */
71#else
72 ldxa [%o0] ASI_PNF, %o3 /* Load */
73 andcc %o2, %g2, %g0 /* IEU1 Group */
74#endif
75 be,pt %xcc, 49b /* CTI */
76
77 add %o0, 8, %o0 /* IEU0 */
78 addcc %o2, %g1, %g3 /* IEU1 Group */
79 srlx %o2, 32, %o2 /* IEU0 */
8050: andcc %o2, %g2, %g0 /* IEU1 Group */
81
82 be,pn %xcc, 51f /* CTI */
83 srlx %g3, 56, %o2 /* IEU0 */
84 andcc %o2, 0xff, %g0 /* IEU1 Group */
85 be,pn %icc, 29f /* CTI */
86
87 srlx %g3, 48, %o2 /* IEU0 */
88 andcc %o2, 0xff, %g0 /* IEU1 Group */
89 be,pn %icc, 28f /* CTI */
90 srlx %g3, 40, %o2 /* IEU0 */
91
92 andcc %o2, 0xff, %g0 /* IEU1 Group */
93 be,pn %icc, 27f /* CTI */
94 srlx %g3, 32, %o2 /* IEU0 */
95 andcc %o2, 0xff, %g0 /* IEU1 Group */
96
97 be,pn %icc, 26f /* CTI */
9851: srlx %g3, 24, %o2 /* IEU0 */
99 andcc %o2, 0xff, %g0 /* IEU1 Group */
100 be,pn %icc, 25f /* CTI */
101
102 srlx %g3, 16, %o2 /* IEU0 */
103 andcc %o2, 0xff, %g0 /* IEU1 Group */
104 be,pn %icc, 24f /* CTI */
105 srlx %g3, 8, %o2 /* IEU0 */
106
107 andcc %o2, 0xff, %g0 /* IEU1 Group */
108 be,pn %icc, 23f /* CTI */
109 sub %o3, %g1, %o2 /* IEU0 */
110 andcc %g3, 0xff, %g0 /* IEU1 Group */
111
112 be,pn %icc, 52f /* CTI */
113 ldxa [%o0] ASI_PNF, %o3 /* Load */
114 andcc %o2, %g2, %g0 /* IEU1 Group */
115 be,pt %xcc, 49b /* CTI */
116
117 add %o0, 8, %o0 /* IEU0 */
118 addcc %o2, %g1, %g3 /* IEU1 Group */
119 ba,pt %xcc, 50b /* CTI */
120 srlx %o2, 32, %o2 /* IEU0 */
121
122 .align 16
12352: ba,pt %xcc, 12f /* CTI Group */
124 add %o0, -9, %o0 /* IEU0 */
12523: ba,pt %xcc, 12f /* CTI Group */
126 add %o0, -10, %o0 /* IEU0 */
127
12824: ba,pt %xcc, 12f /* CTI Group */
129 add %o0, -11, %o0 /* IEU0 */
13025: ba,pt %xcc, 12f /* CTI Group */
131 add %o0, -12, %o0 /* IEU0 */
132
13326: ba,pt %xcc, 12f /* CTI Group */
134 add %o0, -13, %o0 /* IEU0 */
13527: ba,pt %xcc, 12f /* CTI Group */
136 add %o0, -14, %o0 /* IEU0 */
137
13828: ba,pt %xcc, 12f /* CTI Group */
139 add %o0, -15, %o0 /* IEU0 */
14029: add %o0, -16, %o0 /* IEU0 Group */
14130: andcc %o1, 7, %g3 /* IEU1 */
142
14331: bne,pn %icc, 14f /* CTI */
144 orcc %g0, 64, %g4 /* IEU1 Group */
1451: ldx [%o1], %o3 /* Load */
146 add %o1, 8, %o1 /* IEU1 */
147
1482: mov %o3, %g3 /* IEU0 Group */
1493: sub %o3, %g1, %o2 /* IEU1 */
150 ldxa [%o1] ASI_PNF, %o3 /* Load */
151#ifdef EIGHTBIT_NOT_RARE
152 andn %o2, %g3, %o2 /* IEU0 Group */
153#endif
154 add %o0, 8, %o0 /* IEU0 Group */
155
156 andcc %o2, %g2, %g0 /* IEU1 */
157 add %o1, 8, %o1 /* IEU0 Group */
158 be,a,pt %xcc, 2b /* CTI */
159 stx %g3, [%o0 - 8] /* Store */
160
161 srlx %g3, 56, %g5 /* IEU0 Group */
162 andcc %g5, 0xff, %g0 /* IEU1 Group */
163 be,pn %icc, 11f /* CTI */
164 srlx %g3, 48, %g4 /* IEU0 */
165
166 andcc %g4, 0xff, %g0 /* IEU1 Group */
167 be,pn %icc, 10f /* CTI */
168 srlx %g3, 40, %g5 /* IEU0 */
169 andcc %g5, 0xff, %g0 /* IEU1 Group */
170
171 be,pn %icc, 9f /* CTI */
172 srlx %g3, 32, %g4 /* IEU0 */
173 andcc %g4, 0xff, %g0 /* IEU1 Group */
174 be,pn %icc, 8f /* CTI */
175
176 srlx %g3, 24, %g5 /* IEU0 */
177 andcc %g5, 0xff, %g0 /* IEU1 Group */
178 be,pn %icc, 7f /* CTI */
179 srlx %g3, 16, %g4 /* IEU0 */
180
181 andcc %g4, 0xff, %g0 /* IEU1 Group */
182 be,pn %icc, 6f /* CTI */
183 srlx %g3, 8, %g5 /* IEU0 */
184 andcc %g5, 0xff, %g0 /* IEU1 Group */
185
186 be,pn %icc, 5f /* CTI */
187 sub %o3, %g1, %o2 /* IEU0 */
188 stx %g3, [%o0 - 8] /* Store Group */
189 andcc %g3, 0xff, %g0 /* IEU1 */
190
191 bne,pt %icc, 3b /* CTI */
192 mov %o3, %g3 /* IEU0 Group */
1934: retl /* CTI+IEU1 Group */
62f29da7 194 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
195
196 .align 16
1975: stb %g5, [%o0 - 2] /* Store Group */
198 srlx %g3, 16, %g4 /* IEU0 */
1996: sth %g4, [%o0 - 4] /* Store Group */
200 srlx %g3, 32, %g4 /* IEU0 */
201
202 stw %g4, [%o0 - 8] /* Store Group */
203 retl /* CTI+IEU1 Group */
62f29da7 204 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
2057: stb %g5, [%o0 - 4] /* Store Group */
206
207 srlx %g3, 32, %g4 /* IEU0 */
2088: stw %g4, [%o0 - 8] /* Store Group */
209 retl /* CTI+IEU1 Group */
62f29da7 210 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
211
2129: stb %g5, [%o0 - 6] /* Store Group */
213 srlx %g3, 48, %g4 /* IEU0 */
21410: sth %g4, [%o0 - 8] /* Store Group */
215 retl /* CTI+IEU1 Group */
216
62f29da7 217 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
21811: stb %g5, [%o0 - 8] /* Store Group */
219 retl /* CTI+IEU1 Group */
62f29da7 220 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
221
222 .align 16
22332: andcc %o0, 7, %g0 /* IEU1 Group */
224 be,a,pn %icc, 48b /* CTI */
225 ldx [%o0], %o3 /* Load */
226 add %o0, 1, %o0 /* IEU0 Group */
227
228 brnz,a,pt %o3, 32b /* CTI+IEU1 */
229 lduba [%o0] ASI_PNF, %o3 /* Load */
230 add %o0, -1, %o0 /* IEU0 Group */
231 andcc %o0, 7, %g0 /* IEU1 Group */
232
233 be,a,pn %icc, 31b /* CTI */
234 andcc %o1, 7, %g3 /* IEU1 Group */
23512: ldub [%o1], %o3 /* Load */
236 stb %o3, [%o0] /* Store Group */
237
23813: add %o0, 1, %o0 /* IEU0 */
239 add %o1, 1, %o1 /* IEU1 */
240 andcc %o3, 0xff, %g0 /* IEU1 Group */
241 be,pn %icc, 4b /* CTI */
242
243 lduba [%o1] ASI_PNF, %o3 /* Load */
244 andcc %o0, 7, %g0 /* IEU1 Group */
245 bne,a,pt %icc, 13b /* CTI */
246 stb %o3, [%o0] /* Store */
247
248 andcc %o1, 7, %g3 /* IEU1 Group */
249 be,a,pt %icc, 1b /* CTI */
250 ldx [%o1], %o3 /* Load */
251 orcc %g0, 64, %g4 /* IEU1 Group */
252
25314: sllx %g3, 3, %g5 /* IEU0 */
254 sub %o1, %g3, %o1 /* IEU0 Group */
255 sub %g4, %g5, %g4 /* IEU1 */
256 /* %g1 = 0101010101010101 *
257 * %g2 = 8080808080808080 *
258 * %g3 = source alignment *
259 * %g5 = number of bits to shift left *
260 * %g4 = number of bits to shift right */
261 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
262
263 addcc %o1, 8, %o1 /* IEU1 */
26415: sllx %o5, %g5, %o3 /* IEU0 Group */
265 ldxa [%o1] ASI_PNF, %o5 /* Load */
266 srlx %o5, %g4, %o4 /* IEU0 Group */
267
268 add %o0, 8, %o0 /* IEU1 */
269 or %o3, %o4, %o3 /* IEU0 Group */
270 add %o1, 8, %o1 /* IEU1 */
271 sub %o3, %g1, %o4 /* IEU0 Group */
272
273#ifdef EIGHTBIT_NOT_RARE
274 andn %o4, %o3, %o4 /* IEU0 Group */
275#endif
276 andcc %o4, %g2, %g0 /* IEU1 Group */
277 be,a,pt %xcc, 15b /* CTI */
278 stx %o3, [%o0 - 8] /* Store */
279 srlx %o3, 56, %o4 /* IEU0 Group */
280
281 andcc %o4, 0xff, %g0 /* IEU1 Group */
282 be,pn %icc, 22f /* CTI */
283 srlx %o3, 48, %o4 /* IEU0 */
284 andcc %o4, 0xff, %g0 /* IEU1 Group */
285
286 be,pn %icc, 21f /* CTI */
287 srlx %o3, 40, %o4 /* IEU0 */
288 andcc %o4, 0xff, %g0 /* IEU1 Group */
289 be,pn %icc, 20f /* CTI */
290
291 srlx %o3, 32, %o4 /* IEU0 */
292 andcc %o4, 0xff, %g0 /* IEU1 Group */
293 be,pn %icc, 19f /* CTI */
294 srlx %o3, 24, %o4 /* IEU0 */
295
296 andcc %o4, 0xff, %g0 /* IEU1 Group */
297 be,pn %icc, 18f /* CTI */
298 srlx %o3, 16, %o4 /* IEU0 */
299 andcc %o4, 0xff, %g0 /* IEU1 Group */
300
301 be,pn %icc, 17f /* CTI */
302 srlx %o3, 8, %o4 /* IEU0 */
303 andcc %o4, 0xff, %g0 /* IEU1 Group */
304 be,pn %icc, 16f /* CTI */
305
306 andcc %o3, 0xff, %g0 /* IEU1 Group */
307 bne,pn %icc, 15b /* CTI */
308 stx %o3, [%o0 - 8] /* Store */
309 retl /* CTI+IEU1 Group */
310
62f29da7 311 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
312
313 .align 16
31416: srlx %o3, 8, %o4 /* IEU0 Group */
315 stb %o4, [%o0 - 2] /* Store */
31617: srlx %o3, 16, %o4 /* IEU0 Group */
317 stb %o4, [%o0 - 3] /* Store */
318
31918: srlx %o3, 24, %o4 /* IEU0 Group */
320 stb %o4, [%o0 - 4] /* Store */
32119: srlx %o3, 32, %o4 /* IEU0 Group */
322 stw %o4, [%o0 - 8] /* Store */
323
324 retl /* CTI+IEU1 Group */
62f29da7 325 mov %g6, %o0 /* IEU0 */
ae6b8730
RH
326 nop
327 nop
328
32920: srlx %o3, 40, %o4 /* IEU0 Group */
330 stb %o4, [%o0 - 6] /* Store */
33121: srlx %o3, 48, %o4 /* IEU0 Group */
332 stb %o4, [%o0 - 7] /* Store */
333
33422: srlx %o3, 56, %o4 /* IEU0 Group */
335 stb %o4, [%o0 - 8] /* Store */
336 retl /* CTI+IEU1 Group */
62f29da7 337 mov %g6, %o0 /* IEU0 */
ae6b8730 338END(strcat)
85dd1003 339libc_hidden_builtin_def (strcat)