]> git.ipfire.org Git - thirdparty/glibc.git/blame - ports/sysdeps/mips/memset.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / ports / sysdeps / mips / memset.S
CommitLineData
d4697bc9 1/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
af43a565 2 This file is part of the GNU C Library.
af43a565
AJ
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
ab84e3ff
PE
15 License along with the GNU C Library. If not, see
16 <http://www.gnu.org/licenses/>. */
af43a565 17
79440ec7
SE
18#ifdef ANDROID_CHANGES
19# include "machine/asm.h"
20# include "machine/regdef.h"
21# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
22#elif _LIBC
23# include <sysdep.h>
24# include <regdef.h>
25# include <sys/asm.h>
26# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
27#elif _COMPILING_NEWLIB
28# include "machine/asm.h"
29# include "machine/regdef.h"
30# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
31#else
32# include <regdef.h>
33# include <sys/asm.h>
34#endif
af43a565 35
79440ec7
SE
36/* Check to see if the MIPS architecture we are compiling for supports
37 prefetching. */
38
39#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
40# ifndef DISABLE_PREFETCH
41# define USE_PREFETCH
42# endif
43#endif
44
45#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
46# ifndef DISABLE_DOUBLE
47# define USE_DOUBLE
48# endif
49#endif
50
51#ifndef USE_DOUBLE
52# ifndef DISABLE_DOUBLE_ALIGN
53# define DOUBLE_ALIGN
54# endif
55#endif
56
57/* Some asm.h files do not have the L macro definition. */
58#ifndef L
59# if _MIPS_SIM == _ABIO32
60# define L(label) $L ## label
61# else
62# define L(label) .L ## label
63# endif
64#endif
65
66/* Some asm.h files do not have the PTR_ADDIU macro definition. */
67#ifndef PTR_ADDIU
68# ifdef USE_DOUBLE
69# define PTR_ADDIU daddiu
70# else
71# define PTR_ADDIU addiu
72# endif
73#endif
af43a565 74
79440ec7
SE
75/* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
76 or PREFETCH_STORE_STREAMED offers a large performance advantage
77 but PREPAREFORSTORE has some special restrictions to consider.
af43a565 78
79440ec7
SE
79 Prefetch with the 'prepare for store' hint does not copy a memory
80 location into the cache, it just allocates a cache line and zeros
81 it out. This means that if you do not write to the entire cache
82 line before writing it out to memory some data will get zero'ed out
83 when the cache line is written back to memory and data will be lost.
84
85 There are ifdef'ed sections of this memcpy to make sure that it does not
86 do prefetches on cache lines that are not going to be completely written.
87 This code is only needed and only used when PREFETCH_STORE_HINT is set to
88 PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
89 less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will
90 not work correctly. */
91
92#ifdef USE_PREFETCH
93# define PREFETCH_HINT_STORE 1
94# define PREFETCH_HINT_STORE_STREAMED 5
95# define PREFETCH_HINT_STORE_RETAINED 7
96# define PREFETCH_HINT_PREPAREFORSTORE 30
97
98/* If we have not picked out what hints to use at this point use the
99 standard load and store prefetch hints. */
100# ifndef PREFETCH_STORE_HINT
101# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
102# endif
103
104/* We double everything when USE_DOUBLE is true so we do 2 prefetches to
105 get 64 bytes in that case. The assumption is that each individual
106 prefetch brings in 32 bytes. */
107# ifdef USE_DOUBLE
108# define PREFETCH_CHUNK 64
109# define PREFETCH_FOR_STORE(chunk, reg) \
110 pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
111 pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
112# else
113# define PREFETCH_CHUNK 32
114# define PREFETCH_FOR_STORE(chunk, reg) \
115 pref PREFETCH_STORE_HINT, (chunk)*32(reg)
116# endif
117
118/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
119 than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
120 of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
121 hint is used, the code will not work correctly. If PREPAREFORSTORE is not
122 used than MAX_PREFETCH_SIZE does not matter. */
123# define MAX_PREFETCH_SIZE 128
124/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
125 than 5 on a STORE prefetch and that a single prefetch can never be larger
126 than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
127 we actually do two prefetches in that case, one 32 bytes after the other. */
128# ifdef USE_DOUBLE
129# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
130# else
131# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
132# endif
133
134# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
135 && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
136/* We cannot handle this because the initial prefetches may fetch bytes that
137 are before the buffer being copied. We start copies with an offset
138 of 4 so avoid this situation when using PREPAREFORSTORE. */
139# error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
140# endif
141#else /* USE_PREFETCH not defined */
142# define PREFETCH_FOR_STORE(offset, reg)
143#endif
144
145/* Allow the routine to be named something else if desired. */
146#ifndef MEMSET_NAME
147# define MEMSET_NAME memset
148#endif
149
150/* We load/store 64 bits at a time when USE_DOUBLE is true.
151 The C_ prefix stands for CHUNK and is used to avoid macro name
152 conflicts with system header files. */
153
154#ifdef USE_DOUBLE
155# define C_ST sd
156# if __MIPSEB
157# define C_STHI sdl /* high part is left in big-endian */
158# else
159# define C_STHI sdr /* high part is right in little-endian */
160# endif
af43a565 161#else
79440ec7
SE
162# define C_ST sw
163# if __MIPSEB
164# define C_STHI swl /* high part is left in big-endian */
165# else
166# define C_STHI swr /* high part is right in little-endian */
167# endif
af43a565
AJ
168#endif
169
79440ec7
SE
170/* Bookkeeping values for 32 vs. 64 bit mode. */
171#ifdef USE_DOUBLE
172# define NSIZE 8
173# define NSIZEMASK 0x3f
174# define NSIZEDMASK 0x7f
175#else
176# define NSIZE 4
177# define NSIZEMASK 0x1f
178# define NSIZEDMASK 0x3f
179#endif
180#define UNIT(unit) ((unit)*NSIZE)
181#define UNITM1(unit) (((unit)*NSIZE)-1)
182
183#ifdef ANDROID_CHANGES
184LEAF(MEMSET_NAME,0)
185#else
186LEAF(MEMSET_NAME)
187#endif
188
189 .set nomips16
af43a565 190 .set noreorder
79440ec7
SE
191/* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
192 size, copy dst pointer to v0 for the return value. */
193 slti t2,a2,(2 * NSIZE)
194 bne t2,zero,L(lastb)
195 move v0,a0
196
197/* If memset value is not zero, we copy it to all the bytes in a 32 or 64
198 bit word. */
199 beq a1,zero,L(set0) /* If memset value is zero no smear */
200 PTR_SUBU a3,zero,a0
201 nop
202
203 /* smear byte into 32 or 64 bit word */
204#if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2)
205# ifdef USE_DOUBLE
206 dins a1, a1, 8, 8 /* Replicate fill byte into half-word. */
207 dins a1, a1, 16, 16 /* Replicate fill byte into word. */
208 dins a1, a1, 32, 32 /* Replicate fill byte into dbl word. */
209# else
210 ins a1, a1, 8, 8 /* Replicate fill byte into half-word. */
211 ins a1, a1, 16, 16 /* Replicate fill byte into word. */
212# endif
213#else
214# ifdef USE_DOUBLE
215 and a1,0xff
216 dsll t2,a1,8
217 or a1,t2
218 dsll t2,a1,16
219 or a1,t2
220 dsll t2,a1,32
221 or a1,t2
222# else
223 and a1,0xff
224 sll t2,a1,8
225 or a1,t2
226 sll t2,a1,16
227 or a1,t2
228# endif
229#endif
230
231/* If the destination address is not aligned do a partial store to get it
232 aligned. If it is already aligned just jump to L(aligned). */
233L(set0):
234 andi t2,a3,(NSIZE-1) /* word-unaligned address? */
235 beq t2,zero,L(aligned) /* t2 is the unalignment count */
236 PTR_SUBU a2,a2,t2
237 C_STHI a1,0(a0)
238 PTR_ADDU a0,a0,t2
239
240L(aligned):
241/* If USE_DOUBLE is not set we may still want to align the data on a 16
242 byte boundry instead of an 8 byte boundry to maximize the opportunity
243 of proAptiv chips to do memory bonding (combining two sequential 4
244 byte stores into one 8 byte store). We know there are at least 4 bytes
245 left to store or we would have jumped to L(lastb) earlier in the code. */
246#ifdef DOUBLE_ALIGN
247 andi t2,a3,4
248 beq t2,zero,L(double_aligned)
249 PTR_SUBU a2,a2,t2
250 sw a1,0(a0)
251 PTR_ADDU a0,a0,t2
252L(double_aligned):
253#endif
af43a565 254
79440ec7
SE
255/* Now the destination is aligned to (word or double word) aligned address
256 Set a2 to count how many bytes we have to copy after all the 64/128 byte
257 chunks are copied and a3 to the dest pointer after all the 64/128 byte
258 chunks have been copied. We will loop, incrementing a0 until it equals
259 a3. */
260 andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
261 beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
262 PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
263 PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
af43a565 264
79440ec7
SE
265/* When in the loop we may prefetch with the 'prepare to store' hint,
266 in this case the a0+x should not be past the "t0-32" address. This
267 means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
268 for x=64 the last "safe" a0 address is "t0-96" In the current version we
269 will use "prefetch hint,128(a0)", so "t0-160" is the limit. */
270#if defined(USE_PREFETCH) \
271 && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
272 PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
273 PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
274#endif
275#if defined(USE_PREFETCH) \
276 && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
277 PREFETCH_FOR_STORE (1, a0)
278 PREFETCH_FOR_STORE (2, a0)
279 PREFETCH_FOR_STORE (3, a0)
280#endif
281
282L(loop16w):
283#if defined(USE_PREFETCH) \
284 && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
285 sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
286 bgtz v1,L(skip_pref)
287 nop
288#endif
289 PREFETCH_FOR_STORE (4, a0)
290 PREFETCH_FOR_STORE (5, a0)
291L(skip_pref):
292 C_ST a1,UNIT(0)(a0)
293 C_ST a1,UNIT(1)(a0)
294 C_ST a1,UNIT(2)(a0)
295 C_ST a1,UNIT(3)(a0)
296 C_ST a1,UNIT(4)(a0)
297 C_ST a1,UNIT(5)(a0)
298 C_ST a1,UNIT(6)(a0)
299 C_ST a1,UNIT(7)(a0)
300 C_ST a1,UNIT(8)(a0)
301 C_ST a1,UNIT(9)(a0)
302 C_ST a1,UNIT(10)(a0)
303 C_ST a1,UNIT(11)(a0)
304 C_ST a1,UNIT(12)(a0)
305 C_ST a1,UNIT(13)(a0)
306 C_ST a1,UNIT(14)(a0)
307 C_ST a1,UNIT(15)(a0)
308 PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
309 bne a0,a3,L(loop16w)
310 nop
311 move a2,t8
312
313/* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go.
314 Check for a 32(64) byte chunk and copy if if there is one. Otherwise
315 jump down to L(chk1w) to handle the tail end of the copy. */
5556231d 316L(chkw):
79440ec7
SE
317 andi t8,a2,NSIZEMASK /* is there a 32-byte/64-byte chunk. */
318 /* the t8 is the reminder count past 32-bytes */
319 beq a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
320 nop
321 C_ST a1,UNIT(0)(a0)
322 C_ST a1,UNIT(1)(a0)
323 C_ST a1,UNIT(2)(a0)
324 C_ST a1,UNIT(3)(a0)
325 C_ST a1,UNIT(4)(a0)
326 C_ST a1,UNIT(5)(a0)
327 C_ST a1,UNIT(6)(a0)
328 C_ST a1,UNIT(7)(a0)
329 PTR_ADDIU a0,a0,UNIT(8)
330
331/* Here we have less than 32(64) bytes to set. Set up for a loop to
332 copy one word (or double word) at a time. Set a2 to count how many
333 bytes we have to copy after all the word (or double word) chunks are
334 copied and a3 to the dest pointer after all the (d)word chunks have
335 been copied. We will loop, incrementing a0 until a0 equals a3. */
336L(chk1w):
337 andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
338 beq a2,t8,L(lastb)
339 PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
340 PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
341
342/* copying in words (4-byte or 8 byte chunks) */
343L(wordCopy_loop):
344 PTR_ADDIU a0,a0,UNIT(1)
345 bne a0,a3,L(wordCopy_loop)
346 C_ST a1,UNIT(-1)(a0)
347
348/* Copy the last 8 (or 16) bytes */
349L(lastb):
350 blez a2,L(leave)
351 PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
352L(lastbloop):
353 PTR_ADDIU a0,a0,1
354 bne a0,a3,L(lastbloop)
355 sb a1,-1(a0)
356L(leave):
357 j ra
af43a565
AJ
358 nop
359
79440ec7 360 .set at
af43a565 361 .set reorder
79440ec7
SE
362END(MEMSET_NAME)
363#ifndef ANDROID_CHANGES
364# ifdef _LIBC
365libc_hidden_builtin_def (MEMSET_NAME)
366# endif
367#endif