]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc64/strcat.S
Update.
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / strcat.S
1 /* strcat (dest, src) -- Append SRC on the end of DEST.
2 For SPARC v9.
3 Copyright (C) 1998, 1999 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6 Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public
19 License along with the GNU C Library; see the file COPYING.LIB. If not,
20 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include <sysdep.h>
24 #include <asm/asi.h>
25 #ifndef XCC
26 #define XCC xcc
27 #define USE_BPR
28 .register %g2, #scratch
29 .register %g3, #scratch
30 .register %g7, #scratch
31 #endif
32
33 /* Normally, this uses
34 ((xword - 0x0101010101010101) & 0x8080808080808080) test
35 to find out if any byte in xword could be zero. This is fast, but
36 also gives false alarm for any byte in range 0x81-0xff. It does
37 not matter for correctness, as if this test tells us there could
38 be some zero byte, we check it byte by byte, but if bytes with
39 high bits set are common in the strings, then this will give poor
40 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
41 will use one tick slower, but more precise test
42 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
43 which does not give any false alarms (but if some bits are set,
44 one cannot assume from it which bytes are zero and which are not).
45 It is yet to be measured, what is the correct default for glibc
46 in these days for an average user.
47 */
48
49 .text
50 .align 32
51 ENTRY(strcat)
52 sethi %hi(0x01010101), %g1 /* IEU0 Group */
53 ldub [%o0], %o3 /* Load */
54 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
55 mov %o0, %g7 /* IEU1 */
56
57 sllx %g1, 32, %g2 /* IEU0 Group */
58 andcc %o0, 7, %g0 /* IEU1 */
59 or %g1, %g2, %g1 /* IEU0 Group */
60 bne,pn %icc, 32f /* CTI */
61
62 sllx %g1, 7, %g2 /* IEU0 Group */
63 brz,pn %o3, 30f /* CTI+IEU1 */
64 ldx [%o0], %o3 /* Load */
65 48: add %o0, 8, %o0 /* IEU0 Group */
66
67 49: sub %o3, %g1, %o2 /* IEU0 Group */
68 #ifdef EIGHTBIT_NOT_RARE
69 andn %o2, %o3, %g5 /* IEU0 Group */
70 ldxa [%o0] ASI_PNF, %o3 /* Load */
71 andcc %g5, %g2, %g0 /* IEU1 Group */
72 #else
73 ldxa [%o0] ASI_PNF, %o3 /* Load */
74 andcc %o2, %g2, %g0 /* IEU1 Group */
75 #endif
76 be,pt %xcc, 49b /* CTI */
77
78 add %o0, 8, %o0 /* IEU0 */
79 addcc %o2, %g1, %g3 /* IEU1 Group */
80 srlx %o2, 32, %o2 /* IEU0 */
81 50: andcc %o2, %g2, %g0 /* IEU1 Group */
82
83 be,pn %xcc, 51f /* CTI */
84 srlx %g3, 56, %o2 /* IEU0 */
85 andcc %o2, 0xff, %g0 /* IEU1 Group */
86 be,pn %icc, 29f /* CTI */
87
88 srlx %g3, 48, %o2 /* IEU0 */
89 andcc %o2, 0xff, %g0 /* IEU1 Group */
90 be,pn %icc, 28f /* CTI */
91 srlx %g3, 40, %o2 /* IEU0 */
92
93 andcc %o2, 0xff, %g0 /* IEU1 Group */
94 be,pn %icc, 27f /* CTI */
95 srlx %g3, 32, %o2 /* IEU0 */
96 andcc %o2, 0xff, %g0 /* IEU1 Group */
97
98 be,pn %icc, 26f /* CTI */
99 51: srlx %g3, 24, %o2 /* IEU0 */
100 andcc %o2, 0xff, %g0 /* IEU1 Group */
101 be,pn %icc, 25f /* CTI */
102
103 srlx %g3, 16, %o2 /* IEU0 */
104 andcc %o2, 0xff, %g0 /* IEU1 Group */
105 be,pn %icc, 24f /* CTI */
106 srlx %g3, 8, %o2 /* IEU0 */
107
108 andcc %o2, 0xff, %g0 /* IEU1 Group */
109 be,pn %icc, 23f /* CTI */
110 sub %o3, %g1, %o2 /* IEU0 */
111 andcc %g3, 0xff, %g0 /* IEU1 Group */
112
113 be,pn %icc, 52f /* CTI */
114 ldxa [%o0] ASI_PNF, %o3 /* Load */
115 andcc %o2, %g2, %g0 /* IEU1 Group */
116 be,pt %xcc, 49b /* CTI */
117
118 add %o0, 8, %o0 /* IEU0 */
119 addcc %o2, %g1, %g3 /* IEU1 Group */
120 ba,pt %xcc, 50b /* CTI */
121 srlx %o2, 32, %o2 /* IEU0 */
122
123 .align 16
124 52: ba,pt %xcc, 12f /* CTI Group */
125 add %o0, -9, %o0 /* IEU0 */
126 23: ba,pt %xcc, 12f /* CTI Group */
127 add %o0, -10, %o0 /* IEU0 */
128
129 24: ba,pt %xcc, 12f /* CTI Group */
130 add %o0, -11, %o0 /* IEU0 */
131 25: ba,pt %xcc, 12f /* CTI Group */
132 add %o0, -12, %o0 /* IEU0 */
133
134 26: ba,pt %xcc, 12f /* CTI Group */
135 add %o0, -13, %o0 /* IEU0 */
136 27: ba,pt %xcc, 12f /* CTI Group */
137 add %o0, -14, %o0 /* IEU0 */
138
139 28: ba,pt %xcc, 12f /* CTI Group */
140 add %o0, -15, %o0 /* IEU0 */
141 29: add %o0, -16, %o0 /* IEU0 Group */
142 30: andcc %o1, 7, %g3 /* IEU1 */
143
144 31: bne,pn %icc, 14f /* CTI */
145 orcc %g0, 64, %g4 /* IEU1 Group */
146 1: ldx [%o1], %o3 /* Load */
147 add %o1, 8, %o1 /* IEU1 */
148
149 2: mov %o3, %g3 /* IEU0 Group */
150 3: sub %o3, %g1, %o2 /* IEU1 */
151 ldxa [%o1] ASI_PNF, %o3 /* Load */
152 #ifdef EIGHTBIT_NOT_RARE
153 andn %o2, %g3, %o2 /* IEU0 Group */
154 #endif
155 add %o0, 8, %o0 /* IEU0 Group */
156
157 andcc %o2, %g2, %g0 /* IEU1 */
158 add %o1, 8, %o1 /* IEU0 Group */
159 be,a,pt %xcc, 2b /* CTI */
160 stx %g3, [%o0 - 8] /* Store */
161
162 srlx %g3, 56, %g5 /* IEU0 Group */
163 andcc %g5, 0xff, %g0 /* IEU1 Group */
164 be,pn %icc, 11f /* CTI */
165 srlx %g3, 48, %g4 /* IEU0 */
166
167 andcc %g4, 0xff, %g0 /* IEU1 Group */
168 be,pn %icc, 10f /* CTI */
169 srlx %g3, 40, %g5 /* IEU0 */
170 andcc %g5, 0xff, %g0 /* IEU1 Group */
171
172 be,pn %icc, 9f /* CTI */
173 srlx %g3, 32, %g4 /* IEU0 */
174 andcc %g4, 0xff, %g0 /* IEU1 Group */
175 be,pn %icc, 8f /* CTI */
176
177 srlx %g3, 24, %g5 /* IEU0 */
178 andcc %g5, 0xff, %g0 /* IEU1 Group */
179 be,pn %icc, 7f /* CTI */
180 srlx %g3, 16, %g4 /* IEU0 */
181
182 andcc %g4, 0xff, %g0 /* IEU1 Group */
183 be,pn %icc, 6f /* CTI */
184 srlx %g3, 8, %g5 /* IEU0 */
185 andcc %g5, 0xff, %g0 /* IEU1 Group */
186
187 be,pn %icc, 5f /* CTI */
188 sub %o3, %g1, %o2 /* IEU0 */
189 stx %g3, [%o0 - 8] /* Store Group */
190 andcc %g3, 0xff, %g0 /* IEU1 */
191
192 bne,pt %icc, 3b /* CTI */
193 mov %o3, %g3 /* IEU0 Group */
194 4: retl /* CTI+IEU1 Group */
195 mov %g7, %o0 /* IEU0 */
196
197 .align 16
198 5: stb %g5, [%o0 - 2] /* Store Group */
199 srlx %g3, 16, %g4 /* IEU0 */
200 6: sth %g4, [%o0 - 4] /* Store Group */
201 srlx %g3, 32, %g4 /* IEU0 */
202
203 stw %g4, [%o0 - 8] /* Store Group */
204 retl /* CTI+IEU1 Group */
205 mov %g7, %o0 /* IEU0 */
206 7: stb %g5, [%o0 - 4] /* Store Group */
207
208 srlx %g3, 32, %g4 /* IEU0 */
209 8: stw %g4, [%o0 - 8] /* Store Group */
210 retl /* CTI+IEU1 Group */
211 mov %g7, %o0 /* IEU0 */
212
213 9: stb %g5, [%o0 - 6] /* Store Group */
214 srlx %g3, 48, %g4 /* IEU0 */
215 10: sth %g4, [%o0 - 8] /* Store Group */
216 retl /* CTI+IEU1 Group */
217
218 mov %g7, %o0 /* IEU0 */
219 11: stb %g5, [%o0 - 8] /* Store Group */
220 retl /* CTI+IEU1 Group */
221 mov %g7, %o0 /* IEU0 */
222
223 .align 16
224 32: andcc %o0, 7, %g0 /* IEU1 Group */
225 be,a,pn %icc, 48b /* CTI */
226 ldx [%o0], %o3 /* Load */
227 add %o0, 1, %o0 /* IEU0 Group */
228
229 brnz,a,pt %o3, 32b /* CTI+IEU1 */
230 lduba [%o0] ASI_PNF, %o3 /* Load */
231 add %o0, -1, %o0 /* IEU0 Group */
232 andcc %o0, 7, %g0 /* IEU1 Group */
233
234 be,a,pn %icc, 31b /* CTI */
235 andcc %o1, 7, %g3 /* IEU1 Group */
236 12: ldub [%o1], %o3 /* Load */
237 stb %o3, [%o0] /* Store Group */
238
239 13: add %o0, 1, %o0 /* IEU0 */
240 add %o1, 1, %o1 /* IEU1 */
241 andcc %o3, 0xff, %g0 /* IEU1 Group */
242 be,pn %icc, 4b /* CTI */
243
244 lduba [%o1] ASI_PNF, %o3 /* Load */
245 andcc %o0, 7, %g0 /* IEU1 Group */
246 bne,a,pt %icc, 13b /* CTI */
247 stb %o3, [%o0] /* Store */
248
249 andcc %o1, 7, %g3 /* IEU1 Group */
250 be,a,pt %icc, 1b /* CTI */
251 ldx [%o1], %o3 /* Load */
252 orcc %g0, 64, %g4 /* IEU1 Group */
253
254 14: sllx %g3, 3, %g5 /* IEU0 */
255 sub %o1, %g3, %o1 /* IEU0 Group */
256 sub %g4, %g5, %g4 /* IEU1 */
257 /* %g1 = 0101010101010101 *
258 * %g2 = 8080808080808080 *
259 * %g3 = source alignment *
260 * %g5 = number of bits to shift left *
261 * %g4 = number of bits to shift right */
262 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
263
264 addcc %o1, 8, %o1 /* IEU1 */
265 15: sllx %o5, %g5, %o3 /* IEU0 Group */
266 ldxa [%o1] ASI_PNF, %o5 /* Load */
267 srlx %o5, %g4, %o4 /* IEU0 Group */
268
269 add %o0, 8, %o0 /* IEU1 */
270 or %o3, %o4, %o3 /* IEU0 Group */
271 add %o1, 8, %o1 /* IEU1 */
272 sub %o3, %g1, %o4 /* IEU0 Group */
273
274 #ifdef EIGHTBIT_NOT_RARE
275 andn %o4, %o3, %o4 /* IEU0 Group */
276 #endif
277 andcc %o4, %g2, %g0 /* IEU1 Group */
278 be,a,pt %xcc, 15b /* CTI */
279 stx %o3, [%o0 - 8] /* Store */
280 srlx %o3, 56, %o4 /* IEU0 Group */
281
282 andcc %o4, 0xff, %g0 /* IEU1 Group */
283 be,pn %icc, 22f /* CTI */
284 srlx %o3, 48, %o4 /* IEU0 */
285 andcc %o4, 0xff, %g0 /* IEU1 Group */
286
287 be,pn %icc, 21f /* CTI */
288 srlx %o3, 40, %o4 /* IEU0 */
289 andcc %o4, 0xff, %g0 /* IEU1 Group */
290 be,pn %icc, 20f /* CTI */
291
292 srlx %o3, 32, %o4 /* IEU0 */
293 andcc %o4, 0xff, %g0 /* IEU1 Group */
294 be,pn %icc, 19f /* CTI */
295 srlx %o3, 24, %o4 /* IEU0 */
296
297 andcc %o4, 0xff, %g0 /* IEU1 Group */
298 be,pn %icc, 18f /* CTI */
299 srlx %o3, 16, %o4 /* IEU0 */
300 andcc %o4, 0xff, %g0 /* IEU1 Group */
301
302 be,pn %icc, 17f /* CTI */
303 srlx %o3, 8, %o4 /* IEU0 */
304 andcc %o4, 0xff, %g0 /* IEU1 Group */
305 be,pn %icc, 16f /* CTI */
306
307 andcc %o3, 0xff, %g0 /* IEU1 Group */
308 bne,pn %icc, 15b /* CTI */
309 stx %o3, [%o0 - 8] /* Store */
310 retl /* CTI+IEU1 Group */
311
312 mov %g7, %o0 /* IEU0 */
313
314 .align 16
315 16: srlx %o3, 8, %o4 /* IEU0 Group */
316 stb %o4, [%o0 - 2] /* Store */
317 17: srlx %o3, 16, %o4 /* IEU0 Group */
318 stb %o4, [%o0 - 3] /* Store */
319
320 18: srlx %o3, 24, %o4 /* IEU0 Group */
321 stb %o4, [%o0 - 4] /* Store */
322 19: srlx %o3, 32, %o4 /* IEU0 Group */
323 stw %o4, [%o0 - 8] /* Store */
324
325 retl /* CTI+IEU1 Group */
326 mov %g7, %o0 /* IEU0 */
327 nop
328 nop
329
330 20: srlx %o3, 40, %o4 /* IEU0 Group */
331 stb %o4, [%o0 - 6] /* Store */
332 21: srlx %o3, 48, %o4 /* IEU0 Group */
333 stb %o4, [%o0 - 7] /* Store */
334
335 22: srlx %o3, 56, %o4 /* IEU0 Group */
336 stb %o4, [%o0 - 8] /* Store */
337 retl /* CTI+IEU1 Group */
338 mov %g7, %o0 /* IEU0 */
339 END(strcat)