]>
Commit | Line | Data |
---|---|---|
cfc91acd | 1 | /* Optimized memset implementation for PowerPC64. |
124dcac8 | 2 | Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. |
cfc91acd RM |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, write to the Free | |
17 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
18 | 02111-1307 USA. */ | |
19 | ||
20 | #include <sysdep.h> | |
21 | #include <bp-sym.h> | |
22 | #include <bp-asm.h> | |
23 | ||
124dcac8 | 24 | /* Define a global static that can hold the cache line size. The |
cfc91acd | 25 | assumption is that startup code will access the "aux vector" to |
124dcac8 | 26 | to obtain the value set by the kernel and store it into this |
cfc91acd RM |
27 | variable. */ |
28 | .globl __cache_line_size | |
124dcac8 | 29 | .lcomm __cache_line_size,4,4 |
cfc91acd RM |
30 | .section ".toc","aw" |
31 | .LC0: | |
32 | .tc __cache_line_size[TC],__cache_line_size | |
33 | .section ".text" | |
34 | .align 2 | |
35 | ||
36 | /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); | |
37 | Returns 's'. | |
38 | ||
39 | The memset is done in three sizes: byte (8 bits), word (32 bits), | |
40 | cache line (256 bits). There is a special case for setting cache lines | |
41 | to 0, to take advantage of the dcbz instruction. */ | |
42 | ||
43 | EALIGN (BP_SYM (memset), 5, 0) | |
d7d06f79 | 44 | CALL_MCOUNT 3 |
cfc91acd RM |
45 | |
46 | #define rTMP r0 | |
47 | #define rRTN r3 /* Initial value of 1st argument. */ | |
48 | #if __BOUNDED_POINTERS__ | |
49 | # define rMEMP0 r4 /* Original value of 1st arg. */ | |
50 | # define rCHR r5 /* Char to set in each byte. */ | |
51 | # define rLEN r6 /* Length of region to set. */ | |
52 | # define rMEMP r10 /* Address at which we are storing. */ | |
53 | #else | |
54 | # define rMEMP0 r3 /* Original value of 1st arg. */ | |
55 | # define rCHR r4 /* Char to set in each byte. */ | |
56 | # define rLEN r5 /* Length of region to set. */ | |
57 | # define rMEMP r6 /* Address at which we are storing. */ | |
58 | #endif | |
59 | #define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ | |
60 | #define rMEMP2 r8 | |
61 | ||
62 | #define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ | |
63 | #define rCLS r8 /* Cache line size obtained from static. */ | |
64 | #define rCLM r9 /* Cache line size mask to check for cache alignment. */ | |
a334319f UD |
65 | |
66 | ___memset: | |
cfc91acd RM |
67 | #if __BOUNDED_POINTERS__ |
68 | cmpldi cr1, rRTN, 0 | |
69 | CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN) | |
70 | beq cr1, L(b0) | |
71 | STORE_RETURN_VALUE (rMEMP0) | |
72 | STORE_RETURN_BOUNDS (rTMP, rTMP2) | |
73 | L(b0): | |
74 | #endif | |
75 | /* Take care of case for size <= 4. */ | |
76 | cmpldi cr1, rLEN, 8 | |
77 | andi. rALIGN, rMEMP0, 7 | |
78 | mr rMEMP, rMEMP0 | |
79 | ble- cr1, L(small) | |
124dcac8 | 80 | |
cfc91acd RM |
81 | /* Align to doubleword boundary. */ |
82 | cmpldi cr5, rLEN, 31 | |
83 | rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ | |
84 | beq+ L(aligned2) | |
85 | mtcrf 0x01, rMEMP0 | |
86 | subfic rALIGN, rALIGN, 8 | |
87 | cror 28,30,31 /* Detect odd word aligned. */ | |
88 | add rMEMP, rMEMP, rALIGN | |
89 | sub rLEN, rLEN, rALIGN | |
90 | rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ | |
91 | bt 29, L(g4) | |
92 | /* Process the even word of doubleword. */ | |
93 | bf+ 31, L(g2) | |
94 | stb rCHR, 0(rMEMP0) | |
95 | bt 30, L(g4x) | |
96 | L(g2): | |
97 | sth rCHR, -6(rMEMP) | |
98 | L(g4x): | |
99 | stw rCHR, -4(rMEMP) | |
100 | b L(aligned) | |
101 | /* Process the odd word of doubleword. */ | |
102 | L(g4): | |
103 | bf 28, L(g4x) /* If false, word aligned on odd word. */ | |
104 | bf+ 31, L(g0) | |
105 | stb rCHR, 0(rMEMP0) | |
106 | bt 30, L(aligned) | |
124dcac8 RM |
107 | L(g0): |
108 | sth rCHR, -2(rMEMP) | |
109 | ||
cfc91acd RM |
110 | /* Handle the case of size < 31. */ |
111 | L(aligned2): | |
112 | rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ | |
113 | L(aligned): | |
114 | mtcrf 0x01, rLEN | |
115 | ble cr5, L(medium) | |
116 | /* Align to 32-byte boundary. */ | |
117 | andi. rALIGN, rMEMP, 0x18 | |
118 | subfic rALIGN, rALIGN, 0x20 | |
119 | insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ | |
120 | beq L(caligned) | |
121 | mtcrf 0x01, rALIGN | |
122 | add rMEMP, rMEMP, rALIGN | |
123 | sub rLEN, rLEN, rALIGN | |
124 | cmplwi cr1, rALIGN, 0x10 | |
125 | mr rMEMP2, rMEMP | |
126 | bf 28, L(a1) | |
127 | stdu rCHR, -8(rMEMP2) | |
128 | L(a1): blt cr1, L(a2) | |
129 | std rCHR, -8(rMEMP2) | |
130 | stdu rCHR, -16(rMEMP2) | |
131 | L(a2): | |
132 | ||
133 | /* Now aligned to a 32 byte boundary. */ | |
134 | L(caligned): | |
135 | cmpldi cr1, rCHR, 0 | |
136 | clrrdi. rALIGN, rLEN, 5 | |
124dcac8 | 137 | mtcrf 0x01, rLEN |
cfc91acd | 138 | beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ |
124dcac8 | 139 | L(nondcbz): |
cfc91acd RM |
140 | srdi rTMP, rALIGN, 5 |
141 | mtctr rTMP | |
142 | beq L(medium) /* We may not actually get to do a full line. */ | |
143 | clrldi. rLEN, rLEN, 59 | |
144 | add rMEMP, rMEMP, rALIGN | |
145 | li rNEG64, -0x40 | |
146 | bdz L(cloopdone) | |
147 | ||
148 | L(c3): dcbtst rNEG64, rMEMP | |
149 | std rCHR, -8(rMEMP) | |
150 | std rCHR, -16(rMEMP) | |
151 | std rCHR, -24(rMEMP) | |
152 | stdu rCHR, -32(rMEMP) | |
153 | bdnz L(c3) | |
154 | L(cloopdone): | |
155 | std rCHR, -8(rMEMP) | |
156 | std rCHR, -16(rMEMP) | |
157 | cmpldi cr1, rLEN, 16 | |
158 | std rCHR, -24(rMEMP) | |
159 | stdu rCHR, -32(rMEMP) | |
160 | beqlr | |
161 | add rMEMP, rMEMP, rALIGN | |
162 | b L(medium_tail2) | |
163 | ||
164 | .align 5 | |
165 | /* Clear lines of memory in 128-byte chunks. */ | |
166 | L(zloopstart): | |
124dcac8 | 167 | /* If the remaining length is less the 32 bytes, don't bother getting |
cfc91acd RM |
168 | the cache line size. */ |
169 | beq L(medium) | |
170 | ld rCLS,.LC0@toc(r2) | |
124dcac8 RM |
171 | lwz rCLS,0(rCLS) |
172 | /* If the cache line size was not set just goto to L(nondcbz) which is | |
173 | safe for any cache line size. */ | |
cfc91acd RM |
174 | cmpldi cr1,rCLS,0 |
175 | beq cr1,L(nondcbz) | |
124dcac8 RM |
176 | |
177 | ||
cfc91acd | 178 | /* Now we know the cache line size, and it is not 32-bytes, but |
124dcac8 RM |
179 | we may not yet be aligned to the cache line. May have a partial |
180 | line to fill, so touch it 1st. */ | |
181 | dcbt 0,rMEMP | |
cfc91acd RM |
182 | addi rCLM,rCLS,-1 |
183 | L(getCacheAligned): | |
184 | cmpldi cr1,rLEN,32 | |
185 | and. rTMP,rCLM,rMEMP | |
186 | blt cr1,L(handletail32) | |
187 | beq L(cacheAligned) | |
188 | addi rMEMP,rMEMP,32 | |
189 | addi rLEN,rLEN,-32 | |
190 | std rCHR,-32(rMEMP) | |
191 | std rCHR,-24(rMEMP) | |
192 | std rCHR,-16(rMEMP) | |
193 | std rCHR,-8(rMEMP) | |
194 | b L(getCacheAligned) | |
124dcac8 RM |
195 | |
196 | /* Now we are aligned to the cache line and can use dcbz. */ | |
cfc91acd RM |
197 | L(cacheAligned): |
198 | cmpld cr1,rLEN,rCLS | |
199 | blt cr1,L(handletail32) | |
200 | dcbz 0,rMEMP | |
201 | subf rLEN,rCLS,rLEN | |
202 | add rMEMP,rMEMP,rCLS | |
203 | b L(cacheAligned) | |
204 | ||
205 | /* We are here because the cache line size was set and was not 32-bytes | |
206 | and the remainder (rLEN) is less than the actual cache line size. | |
124dcac8 | 207 | So set up the preconditions for L(nondcbz) and go there. */ |
cfc91acd RM |
208 | L(handletail32): |
209 | clrrwi. rALIGN, rLEN, 5 | |
210 | b L(nondcbz) | |
211 | ||
212 | .align 5 | |
213 | L(small): | |
214 | /* Memset of 8 bytes or less. */ | |
215 | cmpldi cr6, rLEN, 4 | |
216 | cmpldi cr5, rLEN, 1 | |
217 | ble cr6,L(le4) | |
218 | subi rLEN, rLEN, 4 | |
219 | stb rCHR,0(rMEMP) | |
220 | stb rCHR,1(rMEMP) | |
221 | stb rCHR,2(rMEMP) | |
222 | stb rCHR,3(rMEMP) | |
223 | addi rMEMP,rMEMP, 4 | |
224 | cmpldi cr5, rLEN, 1 | |
225 | L(le4): | |
226 | cmpldi cr1, rLEN, 3 | |
227 | bltlr cr5 | |
228 | stb rCHR, 0(rMEMP) | |
229 | beqlr cr5 | |
230 | stb rCHR, 1(rMEMP) | |
231 | bltlr cr1 | |
232 | stb rCHR, 2(rMEMP) | |
233 | beqlr cr1 | |
234 | stb rCHR, 3(rMEMP) | |
235 | blr | |
236 | ||
237 | /* Memset of 0-31 bytes. */ | |
238 | .align 5 | |
239 | L(medium): | |
240 | insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ | |
241 | cmpldi cr1, rLEN, 16 | |
242 | L(medium_tail2): | |
243 | add rMEMP, rMEMP, rLEN | |
244 | L(medium_tail): | |
245 | bt- 31, L(medium_31t) | |
246 | bt- 30, L(medium_30t) | |
247 | L(medium_30f): | |
248 | bt- 29, L(medium_29t) | |
249 | L(medium_29f): | |
250 | bge- cr1, L(medium_27t) | |
251 | bflr- 28 | |
252 | std rCHR, -8(rMEMP) | |
253 | blr | |
254 | ||
255 | L(medium_31t): | |
256 | stbu rCHR, -1(rMEMP) | |
257 | bf- 30, L(medium_30f) | |
258 | L(medium_30t): | |
259 | sthu rCHR, -2(rMEMP) | |
260 | bf- 29, L(medium_29f) | |
261 | L(medium_29t): | |
262 | stwu rCHR, -4(rMEMP) | |
124dcac8 | 263 | blt- cr1, L(medium_27f) |
cfc91acd RM |
264 | L(medium_27t): |
265 | std rCHR, -8(rMEMP) | |
266 | stdu rCHR, -16(rMEMP) | |
267 | L(medium_27f): | |
268 | bflr- 28 | |
269 | L(medium_28t): | |
270 | std rCHR, -8(rMEMP) | |
271 | blr | |
272 | END_GEN_TB (BP_SYM (memset),TB_TOCLESS) | |
85dd1003 | 273 | libc_hidden_builtin_def (memset) |
cfc91acd | 274 | |
124dcac8 | 275 | /* Copied from bzero.S to prevent the linker from inserting a stub |
cfc91acd RM |
276 | between bzero and memset. */ |
277 | ENTRY (BP_SYM (__bzero)) | |
d7d06f79 | 278 | CALL_MCOUNT 3 |
cfc91acd RM |
279 | #if __BOUNDED_POINTERS__ |
280 | mr r6,r4 | |
281 | li r5,0 | |
282 | mr r4,r3 | |
283 | /* Tell memset that we don't want a return value. */ | |
284 | li r3,0 | |
a334319f | 285 | b ___memset |
cfc91acd RM |
286 | #else |
287 | mr r5,r4 | |
288 | li r4,0 | |
a334319f | 289 | b ___memset |
cfc91acd RM |
290 | #endif |
291 | END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) | |
292 | ||
293 | weak_alias (BP_SYM (__bzero), BP_SYM (bzero)) |