]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/powerpc64/power4/memset.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / power4 / memset.S
CommitLineData
04067002 1/* Optimized memset implementation for PowerPC64.
04277e02 2 Copyright (C) 1997-2019 Free Software Foundation, Inc.
04067002
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
04067002
UD
18
19#include <sysdep.h>
04067002 20
f17a4233 21/* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
04067002
UD
22 Returns 's'.
23
24 The memset is done in three sizes: byte (8 bits), word (32 bits),
25 cache line (256 bits). There is a special case for setting cache lines
26 to 0, to take advantage of the dcbz instruction. */
27
18e0054b
WSM
28#ifndef MEMSET
29# define MEMSET memset
30#endif
a88f47a7 31 .machine power4
d5b41185 32ENTRY_TOCLESS (MEMSET, 5)
04067002
UD
33 CALL_MCOUNT 3
34
35#define rTMP r0
36#define rRTN r3 /* Initial value of 1st argument. */
2d67d91a
JM
37#define rMEMP0 r3 /* Original value of 1st arg. */
38#define rCHR r4 /* Char to set in each byte. */
39#define rLEN r5 /* Length of region to set. */
40#define rMEMP r6 /* Address at which we are storing. */
04067002
UD
41#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
42#define rMEMP2 r8
43
44#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */
45#define rCLS r8 /* Cache line size obtained from static. */
46#define rCLM r9 /* Cache line size mask to check for cache alignment. */
47L(_memset):
04067002
UD
48/* Take care of case for size <= 4. */
49 cmpldi cr1, rLEN, 8
50 andi. rALIGN, rMEMP0, 7
51 mr rMEMP, rMEMP0
52 ble- cr1, L(small)
53
54/* Align to doubleword boundary. */
55 cmpldi cr5, rLEN, 31
3be87c77 56 insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
04067002
UD
57 beq+ L(aligned2)
58 mtcrf 0x01, rMEMP0
59 subfic rALIGN, rALIGN, 8
60 cror 28,30,31 /* Detect odd word aligned. */
61 add rMEMP, rMEMP, rALIGN
62 sub rLEN, rLEN, rALIGN
3be87c77 63 insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
04067002
UD
64 bt 29, L(g4)
65/* Process the even word of doubleword. */
66 bf+ 31, L(g2)
67 stb rCHR, 0(rMEMP0)
68 bt 30, L(g4x)
69L(g2):
70 sth rCHR, -6(rMEMP)
71L(g4x):
72 stw rCHR, -4(rMEMP)
73 b L(aligned)
74/* Process the odd word of doubleword. */
75L(g4):
76 bf 28, L(g4x) /* If false, word aligned on odd word. */
77 bf+ 31, L(g0)
78 stb rCHR, 0(rMEMP0)
79 bt 30, L(aligned)
80L(g0):
81 sth rCHR, -2(rMEMP)
82
83/* Handle the case of size < 31. */
84L(aligned2):
3be87c77 85 insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
04067002
UD
86L(aligned):
87 mtcrf 0x01, rLEN
88 ble cr5, L(medium)
89/* Align to 32-byte boundary. */
90 andi. rALIGN, rMEMP, 0x18
91 subfic rALIGN, rALIGN, 0x20
3be87c77 92 insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
04067002
UD
93 beq L(caligned)
94 mtcrf 0x01, rALIGN
95 add rMEMP, rMEMP, rALIGN
96 sub rLEN, rLEN, rALIGN
97 cmplwi cr1, rALIGN, 0x10
98 mr rMEMP2, rMEMP
99 bf 28, L(a1)
100 stdu rCHR, -8(rMEMP2)
101L(a1): blt cr1, L(a2)
102 std rCHR, -8(rMEMP2)
103 stdu rCHR, -16(rMEMP2)
104L(a2):
105
106/* Now aligned to a 32 byte boundary. */
107L(caligned):
108 cmpldi cr1, rCHR, 0
109 clrrdi. rALIGN, rLEN, 5
110 mtcrf 0x01, rLEN
111 beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
112L(nondcbz):
113 srdi rTMP, rALIGN, 5
114 mtctr rTMP
115 beq L(medium) /* We may not actually get to do a full line. */
116 clrldi. rLEN, rLEN, 59
117 add rMEMP, rMEMP, rALIGN
118 li rNEG64, -0x40
119 bdz L(cloopdone)
120
121L(c3): dcbtst rNEG64, rMEMP
122 std rCHR, -8(rMEMP)
123 std rCHR, -16(rMEMP)
124 std rCHR, -24(rMEMP)
125 stdu rCHR, -32(rMEMP)
126 bdnz L(c3)
127L(cloopdone):
128 std rCHR, -8(rMEMP)
129 std rCHR, -16(rMEMP)
130 cmpldi cr1, rLEN, 16
131 std rCHR, -24(rMEMP)
132 stdu rCHR, -32(rMEMP)
133 beqlr
134 add rMEMP, rMEMP, rALIGN
135 b L(medium_tail2)
136
137 .align 5
138/* Clear lines of memory in 128-byte chunks. */
139L(zloopstart):
140/* If the remaining length is less the 32 bytes, don't bother getting
141 the cache line size. */
142 beq L(medium)
143 li rCLS,128 /* cache line size is 128 */
144
145/* Now we know the cache line size, and it is not 32-bytes, but
146 we may not yet be aligned to the cache line. May have a partial
147 line to fill, so touch it 1st. */
148 dcbt 0,rMEMP
149L(getCacheAligned):
150 cmpldi cr1,rLEN,32
151 andi. rTMP,rMEMP,127
3be87c77
AM
152 blt cr1,L(handletail32)
153 beq L(cacheAligned)
04067002
UD
154 addi rMEMP,rMEMP,32
155 addi rLEN,rLEN,-32
3be87c77
AM
156 std rCHR,-32(rMEMP)
157 std rCHR,-24(rMEMP)
158 std rCHR,-16(rMEMP)
159 std rCHR,-8(rMEMP)
160 b L(getCacheAligned)
04067002
UD
161
162/* Now we are aligned to the cache line and can use dcbz. */
163L(cacheAligned):
164 cmpld cr1,rLEN,rCLS
3be87c77 165 blt cr1,L(handletail32)
04067002
UD
166 dcbz 0,rMEMP
167 subf rLEN,rCLS,rLEN
3be87c77
AM
168 add rMEMP,rMEMP,rCLS
169 b L(cacheAligned)
04067002
UD
170
171/* We are here because the cache line size was set and was not 32-bytes
172 and the remainder (rLEN) is less than the actual cache line size.
173 So set up the preconditions for L(nondcbz) and go there. */
174L(handletail32):
175 clrrwi. rALIGN, rLEN, 5
176 b L(nondcbz)
177
178 .align 5
179L(small):
180/* Memset of 8 bytes or less. */
181 cmpldi cr6, rLEN, 4
182 cmpldi cr5, rLEN, 1
183 ble cr6,L(le4)
184 subi rLEN, rLEN, 4
185 stb rCHR,0(rMEMP)
186 stb rCHR,1(rMEMP)
187 stb rCHR,2(rMEMP)
188 stb rCHR,3(rMEMP)
189 addi rMEMP,rMEMP, 4
190 cmpldi cr5, rLEN, 1
191L(le4):
192 cmpldi cr1, rLEN, 3
193 bltlr cr5
194 stb rCHR, 0(rMEMP)
195 beqlr cr5
196 stb rCHR, 1(rMEMP)
197 bltlr cr1
198 stb rCHR, 2(rMEMP)
199 beqlr cr1
200 stb rCHR, 3(rMEMP)
201 blr
202
203/* Memset of 0-31 bytes. */
204 .align 5
205L(medium):
3be87c77 206 insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
04067002
UD
207 cmpldi cr1, rLEN, 16
208L(medium_tail2):
209 add rMEMP, rMEMP, rLEN
210L(medium_tail):
211 bt- 31, L(medium_31t)
212 bt- 30, L(medium_30t)
213L(medium_30f):
214 bt- 29, L(medium_29t)
215L(medium_29f):
216 bge- cr1, L(medium_27t)
217 bflr- 28
218 std rCHR, -8(rMEMP)
219 blr
220
221L(medium_31t):
222 stbu rCHR, -1(rMEMP)
223 bf- 30, L(medium_30f)
224L(medium_30t):
225 sthu rCHR, -2(rMEMP)
226 bf- 29, L(medium_29f)
227L(medium_29t):
228 stwu rCHR, -4(rMEMP)
229 blt- cr1, L(medium_27f)
230L(medium_27t):
231 std rCHR, -8(rMEMP)
232 stdu rCHR, -16(rMEMP)
233L(medium_27f):
234 bflr- 28
235L(medium_28t):
236 std rCHR, -8(rMEMP)
237 blr
18e0054b 238END_GEN_TB (MEMSET,TB_TOCLESS)
04067002
UD
239libc_hidden_builtin_def (memset)
240
241/* Copied from bzero.S to prevent the linker from inserting a stub
242 between bzero and memset. */
d5b41185 243ENTRY_TOCLESS (__bzero)
04067002 244 CALL_MCOUNT 3
04067002
UD
245 mr r5,r4
246 li r4,0
247 b L(_memset)
3b473fec
AZ
248END (__bzero)
249#ifndef __bzero
2d67d91a 250weak_alias (__bzero, bzero)
8a29a3d0 251#endif