]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc64/strcpy.S
f4a14ae9cdd34ba7e3bbaef1ed7826fd735bc768
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / strcpy.S
1 /* Copy SRC to DEST returning DEST.
2 For SPARC v9.
3 Copyright (C) 1998-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
28 #endif
29
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
44 */
45
46 .text
47 .align 32
48 ENTRY(strcpy)
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 mov %o0, %g6 /* IEU1 */
51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
52 andcc %o0, 7, %g0 /* IEU1 */
53
54 sllx %g1, 32, %g2 /* IEU0 Group */
55 bne,pn %icc, 12f /* CTI */
56 andcc %o1, 7, %g3 /* IEU1 */
57 or %g1, %g2, %g1 /* IEU0 Group */
58
59 bne,pn %icc, 14f /* CTI */
60 sllx %g1, 7, %g2 /* IEU0 Group */
61 1: ldx [%o1], %o3 /* Load */
62 add %o1, 8, %o1 /* IEU1 */
63
64 2: mov %o3, %g3 /* IEU0 Group */
65 3: sub %o3, %g1, %o2 /* IEU1 */
66 ldxa [%o1] ASI_PNF, %o3 /* Load */
67 #ifdef EIGHTBIT_NOT_RARE
68 andn %o2, %g3, %o2 /* IEU0 Group */
69 #endif
70 add %o0, 8, %o0 /* IEU0 Group */
71
72 andcc %o2, %g2, %g0 /* IEU1 */
73 add %o1, 8, %o1 /* IEU0 Group */
74 be,a,pt %xcc, 2b /* CTI */
75 stx %g3, [%o0 - 8] /* Store */
76
77 srlx %g3, 56, %g5 /* IEU0 Group */
78 andcc %g5, 0xff, %g0 /* IEU1 Group */
79 be,pn %icc, 11f /* CTI */
80 srlx %g3, 48, %g4 /* IEU0 */
81
82 andcc %g4, 0xff, %g0 /* IEU1 Group */
83 be,pn %icc, 10f /* CTI */
84 srlx %g3, 40, %g5 /* IEU0 */
85 andcc %g5, 0xff, %g0 /* IEU1 Group */
86
87 be,pn %icc, 9f /* CTI */
88 srlx %g3, 32, %g4 /* IEU0 */
89 andcc %g4, 0xff, %g0 /* IEU1 Group */
90 be,pn %icc, 8f /* CTI */
91
92 srlx %g3, 24, %g5 /* IEU0 */
93 andcc %g5, 0xff, %g0 /* IEU1 Group */
94 be,pn %icc, 7f /* CTI */
95 srlx %g3, 16, %g4 /* IEU0 */
96
97 andcc %g4, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 6f /* CTI */
99 srlx %g3, 8, %g5 /* IEU0 */
100 andcc %g5, 0xff, %g0 /* IEU1 Group */
101
102 be,pn %icc, 5f /* CTI */
103 sub %o3, %g1, %o2 /* IEU0 */
104 stx %g3, [%o0 - 8] /* Store Group */
105 andcc %g3, 0xff, %g0 /* IEU1 */
106
107 bne,pt %icc, 3b /* CTI */
108 mov %o3, %g3 /* IEU0 Group */
109 4: retl /* CTI+IEU1 Group */
110 mov %g6, %o0 /* IEU0 */
111
112 .align 16
113 5: stb %g5, [%o0 - 2] /* Store Group */
114 srlx %g3, 16, %g4 /* IEU0 */
115 6: sth %g4, [%o0 - 4] /* Store Group */
116 srlx %g3, 32, %g4 /* IEU0 */
117
118 stw %g4, [%o0 - 8] /* Store Group */
119 retl /* CTI+IEU1 Group */
120 mov %g6, %o0 /* IEU0 */
121 7: stb %g5, [%o0 - 4] /* Store Group */
122
123 srlx %g3, 32, %g4 /* IEU0 */
124 8: stw %g4, [%o0 - 8] /* Store Group */
125 retl /* CTI+IEU1 Group */
126 mov %g6, %o0 /* IEU0 */
127
128 9: stb %g5, [%o0 - 6] /* Store Group */
129 srlx %g3, 48, %g4 /* IEU0 */
130 10: sth %g4, [%o0 - 8] /* Store Group */
131 retl /* CTI+IEU1 Group */
132
133 mov %g6, %o0 /* IEU0 */
134 11: stb %g5, [%o0 - 8] /* Store Group */
135 retl /* CTI+IEU1 Group */
136 mov %g6, %o0 /* IEU0 */
137
138 12: or %g1, %g2, %g1 /* IEU0 Group */
139 ldub [%o1], %o3 /* Load */
140 sllx %g1, 7, %g2 /* IEU0 Group */
141 stb %o3, [%o0] /* Store Group */
142
143 13: add %o0, 1, %o0 /* IEU0 */
144 add %o1, 1, %o1 /* IEU1 */
145 andcc %o3, 0xff, %g0 /* IEU1 Group */
146 be,pn %icc, 4b /* CTI */
147
148 lduba [%o1] ASI_PNF, %o3 /* Load */
149 andcc %o0, 7, %g0 /* IEU1 Group */
150 bne,a,pt %icc, 13b /* CTI */
151 stb %o3, [%o0] /* Store */
152
153 andcc %o1, 7, %g3 /* IEU1 Group */
154 be,a,pt %icc, 1b /* CTI */
155 ldx [%o1], %o3 /* Load */
156 14: orcc %g0, 64, %g4 /* IEU1 Group */
157
158 sllx %g3, 3, %g5 /* IEU0 */
159 sub %o1, %g3, %o1 /* IEU0 Group */
160 sub %g4, %g5, %g4 /* IEU1 */
161 /* %g1 = 0101010101010101 *
162 * %g2 = 8080808080808080 *
163 * %g3 = source alignment *
164 * %g5 = number of bits to shift left *
165 * %g4 = number of bits to shift right */
166 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
167
168 addcc %o1, 8, %o1 /* IEU1 */
169 15: sllx %o5, %g5, %o3 /* IEU0 Group */
170 ldxa [%o1] ASI_PNF, %o5 /* Load */
171 srlx %o5, %g4, %o4 /* IEU0 Group */
172
173 add %o0, 8, %o0 /* IEU1 */
174 or %o3, %o4, %o3 /* IEU0 Group */
175 add %o1, 8, %o1 /* IEU1 */
176 sub %o3, %g1, %o4 /* IEU0 Group */
177
178 #ifdef EIGHTBIT_NOT_RARE
179 andn %o4, %o3, %o4 /* IEU0 Group */
180 #endif
181 andcc %o4, %g2, %g0 /* IEU1 Group */
182 be,a,pt %xcc, 15b /* CTI */
183 stx %o3, [%o0 - 8] /* Store */
184 srlx %o3, 56, %o4 /* IEU0 Group */
185
186 andcc %o4, 0xff, %g0 /* IEU1 Group */
187 be,pn %icc, 22f /* CTI */
188 srlx %o3, 48, %o4 /* IEU0 */
189 andcc %o4, 0xff, %g0 /* IEU1 Group */
190
191 be,pn %icc, 21f /* CTI */
192 srlx %o3, 40, %o4 /* IEU0 */
193 andcc %o4, 0xff, %g0 /* IEU1 Group */
194 be,pn %icc, 20f /* CTI */
195
196 srlx %o3, 32, %o4 /* IEU0 */
197 andcc %o4, 0xff, %g0 /* IEU1 Group */
198 be,pn %icc, 19f /* CTI */
199 srlx %o3, 24, %o4 /* IEU0 */
200
201 andcc %o4, 0xff, %g0 /* IEU1 Group */
202 be,pn %icc, 18f /* CTI */
203 srlx %o3, 16, %o4 /* IEU0 */
204 andcc %o4, 0xff, %g0 /* IEU1 Group */
205
206 be,pn %icc, 17f /* CTI */
207 srlx %o3, 8, %o4 /* IEU0 */
208 andcc %o4, 0xff, %g0 /* IEU1 Group */
209 be,pn %icc, 16f /* CTI */
210
211 andcc %o3, 0xff, %g0 /* IEU1 Group */
212 bne,pn %icc, 15b /* CTI */
213 stx %o3, [%o0 - 8] /* Store */
214 retl /* CTI+IEU1 Group */
215
216 mov %g6, %o0 /* IEU0 */
217
218 .align 16
219 16: srlx %o3, 8, %o4 /* IEU0 Group */
220 stb %o4, [%o0 - 2] /* Store */
221 17: srlx %o3, 16, %o4 /* IEU0 Group */
222 stb %o4, [%o0 - 3] /* Store */
223
224 18: srlx %o3, 24, %o4 /* IEU0 Group */
225 stb %o4, [%o0 - 4] /* Store */
226 19: srlx %o3, 32, %o4 /* IEU0 Group */
227 stw %o4, [%o0 - 8] /* Store */
228
229 retl /* CTI+IEU1 Group */
230 mov %g6, %o0 /* IEU0 */
231 nop
232 nop
233
234 20: srlx %o3, 40, %o4 /* IEU0 Group */
235 stb %o4, [%o0 - 6] /* Store */
236 21: srlx %o3, 48, %o4 /* IEU0 Group */
237 stb %o4, [%o0 - 7] /* Store */
238
239 22: srlx %o3, 56, %o4 /* IEU0 Group */
240 stb %o4, [%o0 - 8] /* Store */
241 retl /* CTI+IEU1 Group */
242 mov %g6, %o0 /* IEU0 */
243 END(strcpy)
244 libc_hidden_builtin_def (strcpy)