]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc64/memchr.S
Update.
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / memchr.S
1 /* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
2 than N.
3 For SPARC v9.
4 Copyright (C) 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
7 Jakub Jelinek <jj@ultra.linux.cz>.
8 This version is developed using the same algorithm as the fast C
9 version which carries the following introduction:
10 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
11 with help from Dan Sahlin (dan@sics.se) and
12 commentary by Jim Blandy (jimb@ai.mit.edu);
13 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
14 and implemented by Roland McGrath (roland@ai.mit.edu).
15
16 The GNU C Library is free software; you can redistribute it and/or
17 modify it under the terms of the GNU Lesser General Public
18 License as published by the Free Software Foundation; either
19 version 2.1 of the License, or (at your option) any later version.
20
21 The GNU C Library is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Lesser General Public License for more details.
25
26 You should have received a copy of the GNU Lesser General Public
27 License along with the GNU C Library; if not, write to the Free
28 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307 USA. */
30
31 #include <sysdep.h>
32 #include <asm/asi.h>
33 #ifndef XCC
34 #define XCC xcc
35 #define USE_BPR
36 .register %g2, #scratch
37 .register %g3, #scratch
38 #endif
39
40 /* Normally, this uses
41 ((xword - 0x0101010101010101) & 0x8080808080808080) test
42 to find out if any byte in xword could be zero. This is fast, but
43 also gives false alarm for any byte in range 0x81-0xff. It does
44 not matter for correctness, as if this test tells us there could
45 be some zero byte, we check it byte by byte, but if bytes with
46 high bits set are common in the strings, then this will give poor
47 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
48 will use one tick slower, but more precise test
49 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
50 which does not give any false alarms (but if some bits are set,
51 one cannot assume from it which bytes are zero and which are not).
52 It is yet to be measured, what is the correct default for glibc
53 in these days for an average user.
54 */
55
56 .text
57 .align 32
58 ENTRY(__memchr)
59 and %o1, 0xff, %o1 /* IEU0 Group */
60 #ifdef USE_BPR
61 brz,pn %o2, 12f /* CTI+IEU1 */
62 #else
63 tst %o2 /* IEU1 */
64 be,pn %XCC, 12f /* CTI */
65 #endif
66 sll %o1, 8, %g3 /* IEU0 Group */
67 add %o0, %o2, %o2 /* IEU1 */
68
69 sethi %hi(0x01010101), %g1 /* IEU0 Group */
70 or %g3, %o1, %g3 /* IEU1 */
71 ldub [%o0], %o3 /* Load */
72 sllx %g3, 16, %g5 /* IEU0 Group */
73
74 or %g1, %lo(0x01010101), %g1 /* IEU1 */
75 sllx %g1, 32, %g2 /* IEU0 Group */
76 or %g3, %g5, %g3 /* IEU1 */
77 sllx %g3, 32, %g5 /* IEU0 Group */
78
79 cmp %o3, %o1 /* IEU1 */
80 be,pn %xcc, 13f /* CTI */
81 or %g1, %g2, %g1 /* IEU0 Group */
82 andcc %o0, 7, %g0 /* IEU1 */
83
84 bne,a,pn %icc, 21f /* CTI */
85 add %o0, 1, %o0 /* IEU0 Group */
86 ldx [%o0], %o3 /* Load Group */
87 sllx %g1, 7, %g2 /* IEU0 */
88
89 or %g3, %g5, %g3 /* IEU1 */
90 1: add %o0, 8, %o0 /* IEU0 Group */
91 xor %o3, %g3, %o4 /* IEU1 */
92 /* %g1 = 0101010101010101 *
93 * %g2 = 8080088080808080 *
94 * %g3 = c c c c c c c c *
95 * %o3 = value *
96 * %o4 = value XOR c */
97 2: cmp %o0, %o2 /* IEU1 Group */
98
99 bg,pn %XCC, 11f /* CTI */
100 ldxa [%o0] ASI_PNF, %o3 /* Load */
101 sub %o4, %g1, %o5 /* IEU0 Group */
102 add %o0, 8, %o0 /* IEU1 */
103 #ifdef EIGHTBIT_NOT_RARE
104 andn %o5, %o4, %o5 /* IEU0 Group */
105 #endif
106
107 andcc %o5, %g2, %g0 /* IEU1 Group */
108 be,a,pt %xcc, 2b /* CTI */
109 xor %o3, %g3, %o4 /* IEU0 */
110 srlx %o4, 56, %g5 /* IEU0 */
111
112 andcc %g5, 0xff, %g0 /* IEU1 Group */
113 be,pn %icc, 3f /* CTI */
114 srlx %o4, 48, %g5 /* IEU0 */
115 andcc %g5, 0xff, %g0 /* IEU1 Group */
116
117 be,pn %icc, 4f /* CTI */
118 srlx %o4, 40, %g5 /* IEU0 */
119 andcc %g5, 0xff, %g0 /* IEU1 Group */
120 be,pn %icc, 5f /* CTI */
121
122 srlx %o4, 32, %g5 /* IEU0 */
123 andcc %g5, 0xff, %g0 /* IEU1 Group */
124 be,pn %icc, 6f /* CTI */
125 srlx %o4, 24, %g5 /* IEU0 */
126
127 andcc %g5, 0xff, %g0 /* IEU1 Group */
128 be,pn %icc, 7f /* CTI */
129 srlx %o4, 16, %g5 /* IEU0 */
130 andcc %g5, 0xff, %g0 /* IEU1 Group */
131
132 be,pn %icc, 8f /* CTI */
133 srlx %o4, 8, %g5 /* IEU0 */
134 andcc %g5, 0xff, %g0 /* IEU1 Group */
135 be,pn %icc, 9f /* CTI */
136
137 andcc %o4, 0xff, %g0 /* IEU1 Group */
138 bne,pt %icc, 2b /* CTI */
139 xor %o3, %g3, %o4 /* IEU0 */
140 retl /* CTI+IEU1 Group */
141
142 add %o0, -9, %o0 /* IEU0 */
143
144 .align 16
145 3: retl /* CTI+IEU1 Group */
146 add %o0, -16, %o0 /* IEU0 */
147 4: retl /* CTI+IEU1 Group */
148 add %o0, -15, %o0 /* IEU0 */
149
150 5: retl /* CTI+IEU1 Group */
151 add %o0, -14, %o0 /* IEU0 */
152 6: retl /* CTI+IEU1 Group */
153 add %o0, -13, %o0 /* IEU0 */
154
155 7: retl /* CTI+IEU1 Group */
156 add %o0, -12, %o0 /* IEU0 */
157 8: retl /* CTI+IEU1 Group */
158 add %o0, -11, %o0 /* IEU0 */
159
160 9: retl /* CTI+IEU1 Group */
161 add %o0, -10, %o0 /* IEU0 */
162 11: sub %o4, %g1, %o5 /* IEU0 Group */
163 sub %o0, 8, %o0 /* IEU1 */
164
165 andcc %o5, %g2, %g0 /* IEU1 Group */
166 be,pt %xcc, 12f /* CTI */
167 sub %o2, %o0, %o2 /* IEU0 */
168 tst %o2 /* IEU1 Group */
169
170 be,pn %XCC, 12f /* CTI */
171 srlx %o4, 56, %g5 /* IEU0 */
172 andcc %g5, 0xff, %g0 /* IEU1 Group */
173 be,pn %icc, 13f /* CTI */
174
175 cmp %o2, 1 /* IEU0 */
176 be,pn %XCC, 12f /* CTI Group */
177 srlx %o4, 48, %g5 /* IEU0 */
178 andcc %g5, 0xff, %g0 /* IEU1 Group */
179
180 be,pn %icc, 14f /* CTI */
181 cmp %o2, 2 /* IEU1 Group */
182 be,pn %XCC, 12f /* CTI */
183 srlx %o4, 40, %g5 /* IEU0 */
184
185 andcc %g5, 0xff, %g0 /* IEU1 Group */
186 be,pn %icc, 15f /* CTI */
187 cmp %o2, 3 /* IEU1 Group */
188 be,pn %XCC, 12f /* CTI */
189
190 srlx %o4, 32, %g5 /* IEU0 */
191 andcc %g5, 0xff, %g0 /* IEU1 Group */
192 be,pn %icc, 16f /* CTI */
193 cmp %o2, 4 /* IEU1 Group */
194
195 be,pn %XCC, 12f /* CTI */
196 srlx %o4, 24, %g5 /* IEU0 */
197 andcc %g5, 0xff, %g0 /* IEU1 Group */
198 be,pn %icc, 17f /* CTI */
199
200 cmp %o2, 5 /* IEU1 Group */
201 be,pn %XCC, 12f /* CTI */
202 srlx %o4, 16, %g5 /* IEU0 */
203 andcc %g5, 0xff, %g0 /* IEU1 Group */
204
205 be,pn %icc, 18f /* CTI */
206 cmp %o2, 6 /* IEU1 Group */
207 be,pn %XCC, 12f /* CTI */
208 srlx %o4, 8, %g5 /* IEU0 */
209
210 andcc %g5, 0xff, %g0 /* IEU1 Group */
211 be,pn %icc, 19f /* CTI */
212 nop /* IEU0 */
213 12: retl /* CTI+IEU1 Group */
214
215 clr %o0 /* IEU0 */
216 nop /* Stub */
217 13: retl /* CTI+IEU1 Group */
218 nop /* IEU0 */
219
220 14: retl /* CTI+IEU1 Group */
221 add %o0, 1, %o0 /* IEU0 */
222 15: retl /* CTI+IEU1 Group */
223 add %o0, 2, %o0 /* IEU0 */
224
225 16: retl /* CTI+IEU1 Group */
226 add %o0, 3, %o0 /* IEU0 */
227 17: retl /* CTI+IEU1 Group */
228 add %o0, 4, %o0 /* IEU0 */
229
230 18: retl /* CTI+IEU1 Group */
231 add %o0, 5, %o0 /* IEU0 */
232 19: retl /* CTI+IEU1 Group */
233 add %o0, 6, %o0 /* IEU0 */
234
235 21: cmp %o0, %o2 /* IEU1 */
236 be,pn %XCC, 12b /* CTI */
237 sllx %g1, 7, %g2 /* IEU0 Group */
238 ldub [%o0], %o3 /* Load */
239
240 or %g3, %g5, %g3 /* IEU1 */
241 22: andcc %o0, 7, %g0 /* IEU1 Group */
242 be,a,pn %icc, 1b /* CTI */
243 ldx [%o0], %o3 /* Load */
244
245 cmp %o3, %o1 /* IEU1 Group */
246 be,pn %xcc, 23f /* CTI */
247 add %o0, 1, %o0 /* IEU0 */
248 cmp %o0, %o2 /* IEU1 Group */
249
250 bne,a,pt %XCC, 22b /* CTI */
251 ldub [%o0], %o3 /* Load */
252 retl /* CTI+IEU1 Group */
253 clr %o0 /* IEU0 */
254
255 23: retl /* CTI+IEU1 Group */
256 add %o0, -1, %o0 /* IEU0 */
257 END(__memchr)
258
259 weak_alias (__memchr, memchr)
260 #if !__BOUNDED_POINTERS__
261 weak_alias (__memchr, __ubp_memchr)
262 #endif
263 libc_hidden_builtin_def (memchr)