1 /* strcat (dest, src) -- Append SRC on the end of DEST.
3 Copyright (C) 1998-2017 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6 Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
27 .register %g2, #scratch
28 .register %g3, #scratch
29 .register %g6, #scratch
32 /* Normally, this uses
33 ((xword - 0x0101010101010101) & 0x8080808080808080) test
34 to find out if any byte in xword could be zero. This is fast, but
35 also gives false alarm for any byte in range 0x81-0xff. It does
36 not matter for correctness, as if this test tells us there could
37 be some zero byte, we check it byte by byte, but if bytes with
38 high bits set are common in the strings, then this will give poor
39 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40 will use one tick slower, but more precise test
41 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42 which does not give any false alarms (but if some bits are set,
43 one cannot assume from it which bytes are zero and which are not).
44 It is yet to be measured, what is the correct default for glibc
45 in these days for an average user.
51 sethi %hi(0x01010101), %g1 /* IEU0 Group */
52 ldub [%o0], %o3 /* Load */
53 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
54 mov %o0, %g6 /* IEU1 */
56 sllx %g1, 32, %g2 /* IEU0 Group */
57 andcc %o0, 7, %g0 /* IEU1 */
58 or %g1, %g2, %g1 /* IEU0 Group */
59 bne,pn %icc, 32f /* CTI */
61 sllx %g1, 7, %g2 /* IEU0 Group */
62 brz,pn %o3, 30f /* CTI+IEU1 */
63 ldx [%o0], %o3 /* Load */
64 48: add %o0, 8, %o0 /* IEU0 Group */
66 49: sub %o3, %g1, %o2 /* IEU0 Group */
67 #ifdef EIGHTBIT_NOT_RARE
68 andn %o2, %o3, %g5 /* IEU0 Group */
69 ldxa [%o0] ASI_PNF, %o3 /* Load */
70 andcc %g5, %g2, %g0 /* IEU1 Group */
72 ldxa [%o0] ASI_PNF, %o3 /* Load */
73 andcc %o2, %g2, %g0 /* IEU1 Group */
75 be,pt %xcc, 49b /* CTI */
77 add %o0, 8, %o0 /* IEU0 */
78 addcc %o2, %g1, %g3 /* IEU1 Group */
79 srlx %o2, 32, %o2 /* IEU0 */
80 50: andcc %o2, %g2, %g0 /* IEU1 Group */
82 be,pn %xcc, 51f /* CTI */
83 srlx %g3, 56, %o2 /* IEU0 */
84 andcc %o2, 0xff, %g0 /* IEU1 Group */
85 be,pn %icc, 29f /* CTI */
87 srlx %g3, 48, %o2 /* IEU0 */
88 andcc %o2, 0xff, %g0 /* IEU1 Group */
89 be,pn %icc, 28f /* CTI */
90 srlx %g3, 40, %o2 /* IEU0 */
92 andcc %o2, 0xff, %g0 /* IEU1 Group */
93 be,pn %icc, 27f /* CTI */
94 srlx %g3, 32, %o2 /* IEU0 */
95 andcc %o2, 0xff, %g0 /* IEU1 Group */
97 be,pn %icc, 26f /* CTI */
98 51: srlx %g3, 24, %o2 /* IEU0 */
99 andcc %o2, 0xff, %g0 /* IEU1 Group */
100 be,pn %icc, 25f /* CTI */
102 srlx %g3, 16, %o2 /* IEU0 */
103 andcc %o2, 0xff, %g0 /* IEU1 Group */
104 be,pn %icc, 24f /* CTI */
105 srlx %g3, 8, %o2 /* IEU0 */
107 andcc %o2, 0xff, %g0 /* IEU1 Group */
108 be,pn %icc, 23f /* CTI */
109 sub %o3, %g1, %o2 /* IEU0 */
110 andcc %g3, 0xff, %g0 /* IEU1 Group */
112 be,pn %icc, 52f /* CTI */
113 ldxa [%o0] ASI_PNF, %o3 /* Load */
114 andcc %o2, %g2, %g0 /* IEU1 Group */
115 be,pt %xcc, 49b /* CTI */
117 add %o0, 8, %o0 /* IEU0 */
118 addcc %o2, %g1, %g3 /* IEU1 Group */
119 ba,pt %xcc, 50b /* CTI */
120 srlx %o2, 32, %o2 /* IEU0 */
123 52: ba,pt %xcc, 12f /* CTI Group */
124 add %o0, -9, %o0 /* IEU0 */
125 23: ba,pt %xcc, 12f /* CTI Group */
126 add %o0, -10, %o0 /* IEU0 */
128 24: ba,pt %xcc, 12f /* CTI Group */
129 add %o0, -11, %o0 /* IEU0 */
130 25: ba,pt %xcc, 12f /* CTI Group */
131 add %o0, -12, %o0 /* IEU0 */
133 26: ba,pt %xcc, 12f /* CTI Group */
134 add %o0, -13, %o0 /* IEU0 */
135 27: ba,pt %xcc, 12f /* CTI Group */
136 add %o0, -14, %o0 /* IEU0 */
138 28: ba,pt %xcc, 12f /* CTI Group */
139 add %o0, -15, %o0 /* IEU0 */
140 29: add %o0, -16, %o0 /* IEU0 Group */
141 30: andcc %o1, 7, %g3 /* IEU1 */
143 31: bne,pn %icc, 14f /* CTI */
144 orcc %g0, 64, %g4 /* IEU1 Group */
145 1: ldx [%o1], %o3 /* Load */
146 add %o1, 8, %o1 /* IEU1 */
148 2: mov %o3, %g3 /* IEU0 Group */
149 3: sub %o3, %g1, %o2 /* IEU1 */
150 ldxa [%o1] ASI_PNF, %o3 /* Load */
151 #ifdef EIGHTBIT_NOT_RARE
152 andn %o2, %g3, %o2 /* IEU0 Group */
154 add %o0, 8, %o0 /* IEU0 Group */
156 andcc %o2, %g2, %g0 /* IEU1 */
157 add %o1, 8, %o1 /* IEU0 Group */
158 be,a,pt %xcc, 2b /* CTI */
159 stx %g3, [%o0 - 8] /* Store */
161 srlx %g3, 56, %g5 /* IEU0 Group */
162 andcc %g5, 0xff, %g0 /* IEU1 Group */
163 be,pn %icc, 11f /* CTI */
164 srlx %g3, 48, %g4 /* IEU0 */
166 andcc %g4, 0xff, %g0 /* IEU1 Group */
167 be,pn %icc, 10f /* CTI */
168 srlx %g3, 40, %g5 /* IEU0 */
169 andcc %g5, 0xff, %g0 /* IEU1 Group */
171 be,pn %icc, 9f /* CTI */
172 srlx %g3, 32, %g4 /* IEU0 */
173 andcc %g4, 0xff, %g0 /* IEU1 Group */
174 be,pn %icc, 8f /* CTI */
176 srlx %g3, 24, %g5 /* IEU0 */
177 andcc %g5, 0xff, %g0 /* IEU1 Group */
178 be,pn %icc, 7f /* CTI */
179 srlx %g3, 16, %g4 /* IEU0 */
181 andcc %g4, 0xff, %g0 /* IEU1 Group */
182 be,pn %icc, 6f /* CTI */
183 srlx %g3, 8, %g5 /* IEU0 */
184 andcc %g5, 0xff, %g0 /* IEU1 Group */
186 be,pn %icc, 5f /* CTI */
187 sub %o3, %g1, %o2 /* IEU0 */
188 stx %g3, [%o0 - 8] /* Store Group */
189 andcc %g3, 0xff, %g0 /* IEU1 */
191 bne,pt %icc, 3b /* CTI */
192 mov %o3, %g3 /* IEU0 Group */
193 4: retl /* CTI+IEU1 Group */
194 mov %g6, %o0 /* IEU0 */
197 5: stb %g5, [%o0 - 2] /* Store Group */
198 srlx %g3, 16, %g4 /* IEU0 */
199 6: sth %g4, [%o0 - 4] /* Store Group */
200 srlx %g3, 32, %g4 /* IEU0 */
202 stw %g4, [%o0 - 8] /* Store Group */
203 retl /* CTI+IEU1 Group */
204 mov %g6, %o0 /* IEU0 */
205 7: stb %g5, [%o0 - 4] /* Store Group */
207 srlx %g3, 32, %g4 /* IEU0 */
208 8: stw %g4, [%o0 - 8] /* Store Group */
209 retl /* CTI+IEU1 Group */
210 mov %g6, %o0 /* IEU0 */
212 9: stb %g5, [%o0 - 6] /* Store Group */
213 srlx %g3, 48, %g4 /* IEU0 */
214 10: sth %g4, [%o0 - 8] /* Store Group */
215 retl /* CTI+IEU1 Group */
217 mov %g6, %o0 /* IEU0 */
218 11: stb %g5, [%o0 - 8] /* Store Group */
219 retl /* CTI+IEU1 Group */
220 mov %g6, %o0 /* IEU0 */
223 32: andcc %o0, 7, %g0 /* IEU1 Group */
224 be,a,pn %icc, 48b /* CTI */
225 ldx [%o0], %o3 /* Load */
226 add %o0, 1, %o0 /* IEU0 Group */
228 brnz,a,pt %o3, 32b /* CTI+IEU1 */
229 lduba [%o0] ASI_PNF, %o3 /* Load */
230 add %o0, -1, %o0 /* IEU0 Group */
231 andcc %o0, 7, %g0 /* IEU1 Group */
233 be,a,pn %icc, 31b /* CTI */
234 andcc %o1, 7, %g3 /* IEU1 Group */
235 12: ldub [%o1], %o3 /* Load */
236 stb %o3, [%o0] /* Store Group */
238 13: add %o0, 1, %o0 /* IEU0 */
239 add %o1, 1, %o1 /* IEU1 */
240 andcc %o3, 0xff, %g0 /* IEU1 Group */
241 be,pn %icc, 4b /* CTI */
243 lduba [%o1] ASI_PNF, %o3 /* Load */
244 andcc %o0, 7, %g0 /* IEU1 Group */
245 bne,a,pt %icc, 13b /* CTI */
246 stb %o3, [%o0] /* Store */
248 andcc %o1, 7, %g3 /* IEU1 Group */
249 be,a,pt %icc, 1b /* CTI */
250 ldx [%o1], %o3 /* Load */
251 orcc %g0, 64, %g4 /* IEU1 Group */
253 14: sllx %g3, 3, %g5 /* IEU0 */
254 sub %o1, %g3, %o1 /* IEU0 Group */
255 sub %g4, %g5, %g4 /* IEU1 */
256 /* %g1 = 0101010101010101 *
257 * %g2 = 8080808080808080 *
258 * %g3 = source alignment *
259 * %g5 = number of bits to shift left *
260 * %g4 = number of bits to shift right */
261 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
263 addcc %o1, 8, %o1 /* IEU1 */
264 15: sllx %o5, %g5, %o3 /* IEU0 Group */
265 ldxa [%o1] ASI_PNF, %o5 /* Load */
266 srlx %o5, %g4, %o4 /* IEU0 Group */
268 add %o0, 8, %o0 /* IEU1 */
269 or %o3, %o4, %o3 /* IEU0 Group */
270 add %o1, 8, %o1 /* IEU1 */
271 sub %o3, %g1, %o4 /* IEU0 Group */
273 #ifdef EIGHTBIT_NOT_RARE
274 andn %o4, %o3, %o4 /* IEU0 Group */
276 andcc %o4, %g2, %g0 /* IEU1 Group */
277 be,a,pt %xcc, 15b /* CTI */
278 stx %o3, [%o0 - 8] /* Store */
279 srlx %o3, 56, %o4 /* IEU0 Group */
281 andcc %o4, 0xff, %g0 /* IEU1 Group */
282 be,pn %icc, 22f /* CTI */
283 srlx %o3, 48, %o4 /* IEU0 */
284 andcc %o4, 0xff, %g0 /* IEU1 Group */
286 be,pn %icc, 21f /* CTI */
287 srlx %o3, 40, %o4 /* IEU0 */
288 andcc %o4, 0xff, %g0 /* IEU1 Group */
289 be,pn %icc, 20f /* CTI */
291 srlx %o3, 32, %o4 /* IEU0 */
292 andcc %o4, 0xff, %g0 /* IEU1 Group */
293 be,pn %icc, 19f /* CTI */
294 srlx %o3, 24, %o4 /* IEU0 */
296 andcc %o4, 0xff, %g0 /* IEU1 Group */
297 be,pn %icc, 18f /* CTI */
298 srlx %o3, 16, %o4 /* IEU0 */
299 andcc %o4, 0xff, %g0 /* IEU1 Group */
301 be,pn %icc, 17f /* CTI */
302 srlx %o3, 8, %o4 /* IEU0 */
303 andcc %o4, 0xff, %g0 /* IEU1 Group */
304 be,pn %icc, 16f /* CTI */
306 andcc %o3, 0xff, %g0 /* IEU1 Group */
307 bne,pn %icc, 15b /* CTI */
308 stx %o3, [%o0 - 8] /* Store */
309 retl /* CTI+IEU1 Group */
311 mov %g6, %o0 /* IEU0 */
314 16: srlx %o3, 8, %o4 /* IEU0 Group */
315 stb %o4, [%o0 - 2] /* Store */
316 17: srlx %o3, 16, %o4 /* IEU0 Group */
317 stb %o4, [%o0 - 3] /* Store */
319 18: srlx %o3, 24, %o4 /* IEU0 Group */
320 stb %o4, [%o0 - 4] /* Store */
321 19: srlx %o3, 32, %o4 /* IEU0 Group */
322 stw %o4, [%o0 - 8] /* Store */
324 retl /* CTI+IEU1 Group */
325 mov %g6, %o0 /* IEU0 */
329 20: srlx %o3, 40, %o4 /* IEU0 Group */
330 stb %o4, [%o0 - 6] /* Store */
331 21: srlx %o3, 48, %o4 /* IEU0 Group */
332 stb %o4, [%o0 - 7] /* Store */
334 22: srlx %o3, 56, %o4 /* IEU0 Group */
335 stb %o4, [%o0 - 8] /* Store */
336 retl /* CTI+IEU1 Group */
337 mov %g6, %o0 /* IEU0 */
339 libc_hidden_builtin_def (strcat)