]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/memset.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / memset.S
1 /* Optimized memset implementation for PowerPC64.
2 Copyright (C) 1997-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20
21 .section ".toc","aw"
22 .LC0:
23 .tc __cache_line_size[TC],__cache_line_size
24 .section ".text"
25 .align 2
26
27 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
28 Returns 's'.
29
30 The memset is done in three sizes: byte (8 bits), word (32 bits),
31 cache line (256 bits). There is a special case for setting cache lines
32 to 0, to take advantage of the dcbz instruction. */
33
34 EALIGN (memset, 5, 0)
35 CALL_MCOUNT 3
36
37 #define rTMP r0
38 #define rRTN r3 /* Initial value of 1st argument. */
39 #define rMEMP0 r3 /* Original value of 1st arg. */
40 #define rCHR r4 /* Char to set in each byte. */
41 #define rLEN r5 /* Length of region to set. */
42 #define rMEMP r6 /* Address at which we are storing. */
43 #define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
44 #define rMEMP2 r8
45
46 #define rNEG64 r8 /* Constant -64 for clearing with dcbz. */
47 #define rCLS r8 /* Cache line size obtained from static. */
48 #define rCLM r9 /* Cache line size mask to check for cache alignment. */
49 L(_memset):
50 /* Take care of case for size <= 4. */
51 cmpldi cr1, rLEN, 8
52 andi. rALIGN, rMEMP0, 7
53 mr rMEMP, rMEMP0
54 ble- cr1, L(small)
55
56 /* Align to doubleword boundary. */
57 cmpldi cr5, rLEN, 31
58 insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
59 beq+ L(aligned2)
60 mtcrf 0x01, rMEMP0
61 subfic rALIGN, rALIGN, 8
62 cror 28,30,31 /* Detect odd word aligned. */
63 add rMEMP, rMEMP, rALIGN
64 sub rLEN, rLEN, rALIGN
65 insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
66 bt 29, L(g4)
67 /* Process the even word of doubleword. */
68 bf+ 31, L(g2)
69 stb rCHR, 0(rMEMP0)
70 bt 30, L(g4x)
71 L(g2):
72 sth rCHR, -6(rMEMP)
73 L(g4x):
74 stw rCHR, -4(rMEMP)
75 b L(aligned)
76 /* Process the odd word of doubleword. */
77 L(g4):
78 bf 28, L(g4x) /* If false, word aligned on odd word. */
79 bf+ 31, L(g0)
80 stb rCHR, 0(rMEMP0)
81 bt 30, L(aligned)
82 L(g0):
83 sth rCHR, -2(rMEMP)
84
85 /* Handle the case of size < 31. */
86 L(aligned2):
87 insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
88 L(aligned):
89 mtcrf 0x01, rLEN
90 ble cr5, L(medium)
91 /* Align to 32-byte boundary. */
92 andi. rALIGN, rMEMP, 0x18
93 subfic rALIGN, rALIGN, 0x20
94 insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
95 beq L(caligned)
96 mtcrf 0x01, rALIGN
97 add rMEMP, rMEMP, rALIGN
98 sub rLEN, rLEN, rALIGN
99 cmplwi cr1, rALIGN, 0x10
100 mr rMEMP2, rMEMP
101 bf 28, L(a1)
102 stdu rCHR, -8(rMEMP2)
103 L(a1): blt cr1, L(a2)
104 std rCHR, -8(rMEMP2)
105 stdu rCHR, -16(rMEMP2)
106 L(a2):
107
108 /* Now aligned to a 32 byte boundary. */
109 L(caligned):
110 cmpldi cr1, rCHR, 0
111 clrrdi. rALIGN, rLEN, 5
112 mtcrf 0x01, rLEN
113 beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
114 L(nondcbz):
115 srdi rTMP, rALIGN, 5
116 mtctr rTMP
117 beq L(medium) /* We may not actually get to do a full line. */
118 clrldi. rLEN, rLEN, 59
119 add rMEMP, rMEMP, rALIGN
120 li rNEG64, -0x40
121 bdz L(cloopdone)
122
123 L(c3): dcbtst rNEG64, rMEMP
124 std rCHR, -8(rMEMP)
125 std rCHR, -16(rMEMP)
126 std rCHR, -24(rMEMP)
127 stdu rCHR, -32(rMEMP)
128 bdnz L(c3)
129 L(cloopdone):
130 std rCHR, -8(rMEMP)
131 std rCHR, -16(rMEMP)
132 cmpldi cr1, rLEN, 16
133 std rCHR, -24(rMEMP)
134 stdu rCHR, -32(rMEMP)
135 beqlr
136 add rMEMP, rMEMP, rALIGN
137 b L(medium_tail2)
138
139 .align 5
140 /* Clear lines of memory in 128-byte chunks. */
141 L(zloopstart):
142 /* If the remaining length is less the 32 bytes, don't bother getting
143 the cache line size. */
144 beq L(medium)
145 ld rCLS,.LC0@toc(r2)
146 lwz rCLS,0(rCLS)
147 /* If the cache line size was not set just goto to L(nondcbz) which is
148 safe for any cache line size. */
149 cmpldi cr1,rCLS,0
150 beq cr1,L(nondcbz)
151
152
153 /* Now we know the cache line size, and it is not 32-bytes, but
154 we may not yet be aligned to the cache line. May have a partial
155 line to fill, so touch it 1st. */
156 dcbt 0,rMEMP
157 addi rCLM,rCLS,-1
158 L(getCacheAligned):
159 cmpldi cr1,rLEN,32
160 and. rTMP,rCLM,rMEMP
161 blt cr1,L(handletail32)
162 beq L(cacheAligned)
163 addi rMEMP,rMEMP,32
164 addi rLEN,rLEN,-32
165 std rCHR,-32(rMEMP)
166 std rCHR,-24(rMEMP)
167 std rCHR,-16(rMEMP)
168 std rCHR,-8(rMEMP)
169 b L(getCacheAligned)
170
171 /* Now we are aligned to the cache line and can use dcbz. */
172 L(cacheAligned):
173 cmpld cr1,rLEN,rCLS
174 blt cr1,L(handletail32)
175 dcbz 0,rMEMP
176 subf rLEN,rCLS,rLEN
177 add rMEMP,rMEMP,rCLS
178 b L(cacheAligned)
179
180 /* We are here because the cache line size was set and was not 32-bytes
181 and the remainder (rLEN) is less than the actual cache line size.
182 So set up the preconditions for L(nondcbz) and go there. */
183 L(handletail32):
184 clrrwi. rALIGN, rLEN, 5
185 b L(nondcbz)
186
187 .align 5
188 L(small):
189 /* Memset of 8 bytes or less. */
190 cmpldi cr6, rLEN, 4
191 cmpldi cr5, rLEN, 1
192 ble cr6,L(le4)
193 subi rLEN, rLEN, 4
194 stb rCHR,0(rMEMP)
195 stb rCHR,1(rMEMP)
196 stb rCHR,2(rMEMP)
197 stb rCHR,3(rMEMP)
198 addi rMEMP,rMEMP, 4
199 cmpldi cr5, rLEN, 1
200 L(le4):
201 cmpldi cr1, rLEN, 3
202 bltlr cr5
203 stb rCHR, 0(rMEMP)
204 beqlr cr5
205 stb rCHR, 1(rMEMP)
206 bltlr cr1
207 stb rCHR, 2(rMEMP)
208 beqlr cr1
209 stb rCHR, 3(rMEMP)
210 blr
211
212 /* Memset of 0-31 bytes. */
213 .align 5
214 L(medium):
215 insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
216 cmpldi cr1, rLEN, 16
217 L(medium_tail2):
218 add rMEMP, rMEMP, rLEN
219 L(medium_tail):
220 bt- 31, L(medium_31t)
221 bt- 30, L(medium_30t)
222 L(medium_30f):
223 bt- 29, L(medium_29t)
224 L(medium_29f):
225 bge- cr1, L(medium_27t)
226 bflr- 28
227 std rCHR, -8(rMEMP)
228 blr
229
230 L(medium_31t):
231 stbu rCHR, -1(rMEMP)
232 bf- 30, L(medium_30f)
233 L(medium_30t):
234 sthu rCHR, -2(rMEMP)
235 bf- 29, L(medium_29f)
236 L(medium_29t):
237 stwu rCHR, -4(rMEMP)
238 blt- cr1, L(medium_27f)
239 L(medium_27t):
240 std rCHR, -8(rMEMP)
241 stdu rCHR, -16(rMEMP)
242 L(medium_27f):
243 bflr- 28
244 L(medium_28t):
245 std rCHR, -8(rMEMP)
246 blr
247 END_GEN_TB (memset,TB_TOCLESS)
248 libc_hidden_builtin_def (memset)
249
250 #ifndef NO_BZERO_IMPL
251 /* Copied from bzero.S to prevent the linker from inserting a stub
252 between bzero and memset. */
253 ENTRY (__bzero)
254 CALL_MCOUNT 3
255 mr r5,r4
256 li r4,0
257 b L(_memset)
258 END_GEN_TB (__bzero,TB_TOCLESS)
259
260 weak_alias (__bzero, bzero)
261 #endif