]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/powerpc64/memset.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / memset.S
CommitLineData
cfc91acd 1/* Optimized memset implementation for PowerPC64.
bfff8b1b 2 Copyright (C) 1997-2017 Free Software Foundation, Inc.
cfc91acd
RM
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
cfc91acd
RM
18
19#include <sysdep.h>
cfc91acd 20
cfc91acd
RM
21 .section ".toc","aw"
22.LC0:
23 .tc __cache_line_size[TC],__cache_line_size
24 .section ".text"
25 .align 2
26
27/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
28 Returns 's'.
29
30 The memset is done in three sizes: byte (8 bits), word (32 bits),
31 cache line (256 bits). There is a special case for setting cache lines
32 to 0, to take advantage of the dcbz instruction. */
33
2d67d91a 34EALIGN (memset, 5, 0)
d7d06f79 35 CALL_MCOUNT 3
cfc91acd
RM
36
37#define rTMP r0
38#define rRTN r3 /* Initial value of 1st argument. */
2d67d91a
JM
39#define rMEMP0 r3 /* Original value of 1st arg. */
40#define rCHR r4 /* Char to set in each byte. */
41#define rLEN r5 /* Length of region to set. */
42#define rMEMP r6 /* Address at which we are storing. */
cfc91acd
RM
43#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
44#define rMEMP2 r8
45
46#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */
47#define rCLS r8 /* Cache line size obtained from static. */
48#define rCLM r9 /* Cache line size mask to check for cache alignment. */
0e03d562 49L(_memset):
cfc91acd
RM
50/* Take care of case for size <= 4. */
51 cmpldi cr1, rLEN, 8
52 andi. rALIGN, rMEMP0, 7
53 mr rMEMP, rMEMP0
54 ble- cr1, L(small)
124dcac8 55
cfc91acd
RM
56/* Align to doubleword boundary. */
57 cmpldi cr5, rLEN, 31
3be87c77 58 insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
cfc91acd
RM
59 beq+ L(aligned2)
60 mtcrf 0x01, rMEMP0
61 subfic rALIGN, rALIGN, 8
62 cror 28,30,31 /* Detect odd word aligned. */
63 add rMEMP, rMEMP, rALIGN
64 sub rLEN, rLEN, rALIGN
3be87c77 65 insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
cfc91acd
RM
66 bt 29, L(g4)
67/* Process the even word of doubleword. */
68 bf+ 31, L(g2)
69 stb rCHR, 0(rMEMP0)
70 bt 30, L(g4x)
71L(g2):
72 sth rCHR, -6(rMEMP)
73L(g4x):
74 stw rCHR, -4(rMEMP)
75 b L(aligned)
76/* Process the odd word of doubleword. */
77L(g4):
78 bf 28, L(g4x) /* If false, word aligned on odd word. */
79 bf+ 31, L(g0)
80 stb rCHR, 0(rMEMP0)
81 bt 30, L(aligned)
124dcac8
RM
82L(g0):
83 sth rCHR, -2(rMEMP)
84
cfc91acd
RM
85/* Handle the case of size < 31. */
86L(aligned2):
3be87c77 87 insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
cfc91acd
RM
88L(aligned):
89 mtcrf 0x01, rLEN
90 ble cr5, L(medium)
91/* Align to 32-byte boundary. */
92 andi. rALIGN, rMEMP, 0x18
93 subfic rALIGN, rALIGN, 0x20
3be87c77 94 insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cfc91acd
RM
95 beq L(caligned)
96 mtcrf 0x01, rALIGN
97 add rMEMP, rMEMP, rALIGN
98 sub rLEN, rLEN, rALIGN
99 cmplwi cr1, rALIGN, 0x10
100 mr rMEMP2, rMEMP
101 bf 28, L(a1)
102 stdu rCHR, -8(rMEMP2)
103L(a1): blt cr1, L(a2)
104 std rCHR, -8(rMEMP2)
105 stdu rCHR, -16(rMEMP2)
106L(a2):
107
108/* Now aligned to a 32 byte boundary. */
109L(caligned):
110 cmpldi cr1, rCHR, 0
111 clrrdi. rALIGN, rLEN, 5
124dcac8 112 mtcrf 0x01, rLEN
cfc91acd 113 beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
124dcac8 114L(nondcbz):
cfc91acd
RM
115 srdi rTMP, rALIGN, 5
116 mtctr rTMP
117 beq L(medium) /* We may not actually get to do a full line. */
118 clrldi. rLEN, rLEN, 59
119 add rMEMP, rMEMP, rALIGN
120 li rNEG64, -0x40
121 bdz L(cloopdone)
122
123L(c3): dcbtst rNEG64, rMEMP
124 std rCHR, -8(rMEMP)
125 std rCHR, -16(rMEMP)
126 std rCHR, -24(rMEMP)
127 stdu rCHR, -32(rMEMP)
128 bdnz L(c3)
129L(cloopdone):
130 std rCHR, -8(rMEMP)
131 std rCHR, -16(rMEMP)
132 cmpldi cr1, rLEN, 16
133 std rCHR, -24(rMEMP)
134 stdu rCHR, -32(rMEMP)
135 beqlr
136 add rMEMP, rMEMP, rALIGN
137 b L(medium_tail2)
138
139 .align 5
140/* Clear lines of memory in 128-byte chunks. */
141L(zloopstart):
124dcac8 142/* If the remaining length is less the 32 bytes, don't bother getting
cfc91acd
RM
143 the cache line size. */
144 beq L(medium)
145 ld rCLS,.LC0@toc(r2)
124dcac8
RM
146 lwz rCLS,0(rCLS)
147/* If the cache line size was not set just goto to L(nondcbz) which is
148 safe for any cache line size. */
cfc91acd
RM
149 cmpldi cr1,rCLS,0
150 beq cr1,L(nondcbz)
124dcac8
RM
151
152
cfc91acd 153/* Now we know the cache line size, and it is not 32-bytes, but
124dcac8
RM
154 we may not yet be aligned to the cache line. May have a partial
155 line to fill, so touch it 1st. */
156 dcbt 0,rMEMP
cfc91acd
RM
157 addi rCLM,rCLS,-1
158L(getCacheAligned):
159 cmpldi cr1,rLEN,32
160 and. rTMP,rCLM,rMEMP
161 blt cr1,L(handletail32)
162 beq L(cacheAligned)
163 addi rMEMP,rMEMP,32
164 addi rLEN,rLEN,-32
165 std rCHR,-32(rMEMP)
166 std rCHR,-24(rMEMP)
167 std rCHR,-16(rMEMP)
168 std rCHR,-8(rMEMP)
169 b L(getCacheAligned)
124dcac8
RM
170
171/* Now we are aligned to the cache line and can use dcbz. */
cfc91acd
RM
172L(cacheAligned):
173 cmpld cr1,rLEN,rCLS
174 blt cr1,L(handletail32)
175 dcbz 0,rMEMP
176 subf rLEN,rCLS,rLEN
177 add rMEMP,rMEMP,rCLS
178 b L(cacheAligned)
179
180/* We are here because the cache line size was set and was not 32-bytes
181 and the remainder (rLEN) is less than the actual cache line size.
124dcac8 182 So set up the preconditions for L(nondcbz) and go there. */
cfc91acd
RM
183L(handletail32):
184 clrrwi. rALIGN, rLEN, 5
185 b L(nondcbz)
186
187 .align 5
188L(small):
189/* Memset of 8 bytes or less. */
190 cmpldi cr6, rLEN, 4
191 cmpldi cr5, rLEN, 1
192 ble cr6,L(le4)
193 subi rLEN, rLEN, 4
194 stb rCHR,0(rMEMP)
195 stb rCHR,1(rMEMP)
196 stb rCHR,2(rMEMP)
197 stb rCHR,3(rMEMP)
198 addi rMEMP,rMEMP, 4
199 cmpldi cr5, rLEN, 1
200L(le4):
201 cmpldi cr1, rLEN, 3
202 bltlr cr5
203 stb rCHR, 0(rMEMP)
204 beqlr cr5
205 stb rCHR, 1(rMEMP)
206 bltlr cr1
207 stb rCHR, 2(rMEMP)
208 beqlr cr1
209 stb rCHR, 3(rMEMP)
210 blr
211
212/* Memset of 0-31 bytes. */
213 .align 5
214L(medium):
3be87c77 215 insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cfc91acd
RM
216 cmpldi cr1, rLEN, 16
217L(medium_tail2):
218 add rMEMP, rMEMP, rLEN
219L(medium_tail):
220 bt- 31, L(medium_31t)
221 bt- 30, L(medium_30t)
222L(medium_30f):
223 bt- 29, L(medium_29t)
224L(medium_29f):
225 bge- cr1, L(medium_27t)
226 bflr- 28
227 std rCHR, -8(rMEMP)
228 blr
229
230L(medium_31t):
231 stbu rCHR, -1(rMEMP)
232 bf- 30, L(medium_30f)
233L(medium_30t):
234 sthu rCHR, -2(rMEMP)
235 bf- 29, L(medium_29f)
236L(medium_29t):
237 stwu rCHR, -4(rMEMP)
124dcac8 238 blt- cr1, L(medium_27f)
cfc91acd
RM
239L(medium_27t):
240 std rCHR, -8(rMEMP)
241 stdu rCHR, -16(rMEMP)
242L(medium_27f):
243 bflr- 28
244L(medium_28t):
245 std rCHR, -8(rMEMP)
246 blr
2d67d91a 247END_GEN_TB (memset,TB_TOCLESS)
85dd1003 248libc_hidden_builtin_def (memset)
cfc91acd 249
8a29a3d0 250#ifndef NO_BZERO_IMPL
124dcac8 251/* Copied from bzero.S to prevent the linker from inserting a stub
cfc91acd 252 between bzero and memset. */
2d67d91a 253ENTRY (__bzero)
d7d06f79 254 CALL_MCOUNT 3
cfc91acd
RM
255 mr r5,r4
256 li r4,0
0e03d562 257 b L(_memset)
2d67d91a 258END_GEN_TB (__bzero,TB_TOCLESS)
cfc91acd 259
2d67d91a 260weak_alias (__bzero, bzero)
8a29a3d0 261#endif