]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/s390/memcpy-z900.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / s390 / memcpy-z900.S
1 /* memcpy - copy a block from source to destination. 31/64 bit S/390 version.
2 Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20 #include <sysdep.h>
21 #include "asm-syntax.h"
22 #include <ifunc-memcpy.h>
23
24 /* INPUT PARAMETERS
25 %r2 = address of destination memory area
26 %r3 = address of source memory area
27 %r4 = number of bytes to copy. */
28
29 .text
30
31 #if defined __s390x__
32 # define LTGR ltgr
33 # define CGHI cghi
34 # define LGR lgr
35 # define AGHI aghi
36 # define BRCTG brctg
37 #else
38 # define LTGR ltr
39 # define CGHI chi
40 # define LGR lr
41 # define AGHI ahi
42 # define BRCTG brct
43 #endif /* ! defined __s390x__ */
44
45 #if HAVE_MEMCPY_Z900_G5
46 ENTRY(MEMPCPY_Z900_G5)
47 # if defined __s390x__
48 .machine "z900"
49 # else
50 .machine "g5"
51 # endif /* ! defined __s390x__ */
52 LGR %r1,%r2 # Use as dest
53 la %r2,0(%r4,%r2) # Return dest + n
54 j .L_Z900_G5_start
55 END(MEMPCPY_Z900_G5)
56
57 ENTRY(MEMCPY_Z900_G5)
58 # if defined __s390x__
59 .machine "z900"
60 # else
61 .machine "g5"
62 # endif /* ! defined __s390x__ */
63 LGR %r1,%r2 # r1: Use as dest ; r2: Return dest
64 .L_Z900_G5_start:
65 LTGR %r4,%r4
66 je .L_Z900_G5_4
67 AGHI %r4,-1
68 # if defined __s390x__
69 srlg %r5,%r4,8
70 # else
71 lr %r5,%r4
72 srl %r5,8
73 # endif /* ! defined __s390x__ */
74 LTGR %r5,%r5
75 jne .L_Z900_G5_13
76 .L_Z900_G5_3:
77 # if defined __s390x__
78 larl %r5,.L_Z900_G5_15
79 # define Z900_G5_EX_D 0
80 # else
81 basr %r5,0
82 .L_Z900_G5_14:
83 # define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14
84 # endif /* ! defined __s390x__ */
85 ex %r4,Z900_G5_EX_D(%r5)
86 .L_Z900_G5_4:
87 br %r14
88 .L_Z900_G5_13:
89 CGHI %r5,4096 # Switch to mvcle for copies >1MB
90 jh __memcpy_mvcle
91 .L_Z900_G5_12:
92 mvc 0(256,%r1),0(%r3)
93 la %r1,256(%r1)
94 la %r3,256(%r3)
95 BRCTG %r5,.L_Z900_G5_12
96 j .L_Z900_G5_3
97 .L_Z900_G5_15:
98 mvc 0(1,%r1),0(%r3)
99 END(MEMCPY_Z900_G5)
100 #endif /* HAVE_MEMCPY_Z900_G5 */
101
102 ENTRY(__memcpy_mvcle)
103 # Using as standalone function will result in unexpected
104 # results since the length field is incremented by 1 in order to
105 # compensate the changes already done in the functions above.
106 LGR %r0,%r2 # backup return dest [ + n ]
107 AGHI %r4,1 # length + 1
108 LGR %r5,%r4 # source length
109 LGR %r4,%r3 # source address
110 LGR %r2,%r1 # destination address
111 LGR %r3,%r5 # destination length = source length
112 .L_MVCLE_1:
113 mvcle %r2,%r4,0 # thats it, MVCLE is your friend
114 jo .L_MVCLE_1
115 LGR %r2,%r0 # return destination address
116 br %r14
117 END(__memcpy_mvcle)
118
119 #undef LTGR
120 #undef CGHI
121 #undef LGR
122 #undef AGHI
123 #undef BRCTG
124
125 #if HAVE_MEMCPY_Z10
126 ENTRY(MEMPCPY_Z10)
127 .machine "z10"
128 .machinemode "zarch_nohighgprs"
129 lgr %r1,%r2 # Use as dest
130 la %r2,0(%r4,%r2) # Return dest + n
131 j .L_Z10_start
132 END(MEMPCPY_Z10)
133
134 ENTRY(MEMCPY_Z10)
135 .machine "z10"
136 .machinemode "zarch_nohighgprs"
137 lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
138 .L_Z10_start:
139 # if !defined __s390x__
140 llgfr %r4,%r4
141 # endif /* !defined __s390x__ */
142 cgije %r4,0,.L_Z10_4
143 aghi %r4,-1
144 srlg %r5,%r4,8
145 cgijlh %r5,0,.L_Z10_13
146 .L_Z10_3:
147 exrl %r4,.L_Z10_15
148 .L_Z10_4:
149 br %r14
150 .L_Z10_13:
151 cgfi %r5,65535 # Switch to mvcle for copies >16MB
152 jh __memcpy_mvcle
153 .L_Z10_12:
154 pfd 1,768(%r3)
155 pfd 2,768(%r1)
156 mvc 0(256,%r1),0(%r3)
157 la %r1,256(%r1)
158 la %r3,256(%r3)
159 brctg %r5,.L_Z10_12
160 j .L_Z10_3
161 .L_Z10_15:
162 mvc 0(1,%r1),0(%r3)
163 END(MEMCPY_Z10)
164 #endif /* HAVE_MEMCPY_Z10 */
165
166 #if HAVE_MEMCPY_Z196
167 ENTRY(MEMPCPY_Z196)
168 .machine "z196"
169 .machinemode "zarch_nohighgprs"
170 lgr %r1,%r2 # Use as dest
171 la %r2,0(%r4,%r2) # Return dest + n
172 j .L_Z196_start
173 END(MEMPCPY_Z196)
174
175 ENTRY(MEMCPY_Z196)
176 .machine "z196"
177 .machinemode "zarch_nohighgprs"
178 lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
179 .L_Z196_start:
180 # if !defined __s390x__
181 llgfr %r4,%r4
182 # endif /* !defined __s390x__ */
183 ltgr %r4,%r4
184 je .L_Z196_4
185 .L_Z196_start2:
186 aghi %r4,-1
187 srlg %r5,%r4,8
188 ltgr %r5,%r5
189 jne .L_Z196_5
190 .L_Z196_3:
191 exrl %r4,.L_Z196_14
192 .L_Z196_4:
193 br %r14
194 .L_Z196_5:
195 cgfi %r5,262144 # Switch to mvcle for copies >64MB
196 jh __memcpy_mvcle
197 .L_Z196_2:
198 pfd 1,768(%r3)
199 pfd 2,768(%r1)
200 mvc 0(256,%r1),0(%r3)
201 aghi %r5,-1
202 la %r1,256(%r1)
203 la %r3,256(%r3)
204 jne .L_Z196_2
205 j .L_Z196_3
206 .L_Z196_14:
207 mvc 0(1,%r1),0(%r3)
208 END(MEMCPY_Z196)
209 #endif /* HAVE_MEMCPY_Z196 */
210
211 #if HAVE_MEMMOVE_Z13
212 ENTRY(MEMMOVE_Z13)
213 .machine "z13"
214 .machinemode "zarch_nohighgprs"
215 # if !defined __s390x__
216 /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
217 llgfr %r4,%r4
218 llgfr %r3,%r3
219 llgfr %r2,%r2
220 # endif /* !defined __s390x__ */
221 sgrk %r0,%r2,%r3
222 clgijh %r4,16,.L_MEMMOVE_Z13_LARGE
223 aghik %r5,%r4,-1
224 .L_MEMMOVE_Z13_SMALL:
225 jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */
226 /* Store up to 16 bytes with vll/vstl which needs the index
227 instead of lengths. */
228 vll %v16,%r5,0(%r3)
229 vstl %v16,%r5,0(%r2)
230 .L_MEMMOVE_Z13_END:
231 br %r14
232 .L_MEMMOVE_Z13_LARGE:
233 lgr %r1,%r2 /* For memcpy: r1: Use as dest ;
234 r2: Return dest */
235 /* The unsigned comparison (dst - src >= len) determines if we can
236 execute the forward case with memcpy. */
237 #if ! HAVE_MEMCPY_Z196
238 # error The z13 variant of memmove needs the z196 variant of memcpy!
239 #endif
240 clgrjhe %r0,%r4,.L_Z196_start2
241 risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */
242 aghi %r4,-16
243 clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B
244 .L_MEMMOVE_Z13_LARGE_16B_LOOP:
245 /* Store at least 16 bytes with vl/vst. The number of 16byte blocks
246 is stored in r5. */
247 vl %v16,0(%r4,%r3)
248 vst %v16,0(%r4,%r2)
249 aghi %r4,-16
250 brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP
251 aghik %r5,%r4,15
252 j .L_MEMMOVE_Z13_SMALL
253 .L_MEMMOVE_Z13_LARGE_64B:
254 /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks
255 will be stored in r0. */
256 aghi %r4,-48
257 srlg %r0,%r5,2 /* r5 = %r0 / 4
258 => Number of 64byte blocks. */
259 .L_MEMMOVE_Z13_LARGE_64B_LOOP:
260 vl %v20,48(%r4,%r3)
261 vl %v19,32(%r4,%r3)
262 vl %v18,16(%r4,%r3)
263 vl %v17,0(%r4,%r3)
264 vst %v20,48(%r4,%r2)
265 vst %v19,32(%r4,%r2)
266 vst %v18,16(%r4,%r2)
267 vst %v17,0(%r4,%r2)
268 aghi %r4,-64
269 brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP
270 aghi %r4,48
271 /* Recalculate the number of 16byte blocks. */
272 risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3
273 => Remaining 16byte blocks. */
274 jne .L_MEMMOVE_Z13_LARGE_16B_LOOP
275 aghik %r5,%r4,15
276 j .L_MEMMOVE_Z13_SMALL
277 END(MEMMOVE_Z13)
278 #endif /* HAVE_MEMMOVE_Z13 */
279
280 #if HAVE_MEMMOVE_ARCH13
281 ENTRY(MEMMOVE_ARCH13)
282 .machine "arch13"
283 .machinemode "zarch_nohighgprs"
284 # if ! defined __s390x__
285 /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
286 llgfr %r4,%r4
287 llgfr %r3,%r3
288 llgfr %r2,%r2
289 # endif /* ! defined __s390x__ */
290 sgrk %r5,%r2,%r3
291 aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */
292 clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE
293 .L_MEMMOVE_ARCH13_SMALL:
294 jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */
295 /* Store up to 16 bytes with vll/vstl (needs highest index). */
296 vll %v16,%r0,0(%r3)
297 vstl %v16,%r0,0(%r2)
298 .L_MEMMOVE_ARCH13_END:
299 br %r14
300 .L_MEMMOVE_ARCH13_LARGE:
301 lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */
302 /* The unsigned comparison (dst - src >= len) determines if we can
303 execute the forward case with memcpy. */
304 #if ! HAVE_MEMCPY_Z196
305 # error The arch13 variant of memmove needs the z196 variant of memcpy!
306 #endif
307 /* Backward case. */
308 clgrjhe %r5,%r4,.L_Z196_start2
309 clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
310 /* Move up to 256bytes with mvcrl (move right to left). */
311 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
312 br %r14
313 .L_MEMMOVE_ARCH13_LARGER_256B:
314 /* First move the "remaining" block of up to 256 bytes at the end of
315 src/dst buffers. Then move blocks of 256bytes in a loop starting
316 with the block at the end.
317 (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
318 passed to mvcrl instructions are aligned, too) */
319 risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */
320 risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */
321 slgr %r4,%r0
322 lay %r1,-1(%r4,%r1)
323 lay %r3,-1(%r4,%r3)
324 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
325 lghi %r0,255 /* Always copy 256 bytes in the loop below! */
326 .L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
327 aghi %r1,-256
328 aghi %r3,-256
329 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
330 brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
331 br %r14
332 END(MEMMOVE_ARCH13)
333 #endif /* HAVE_MEMMOVE_ARCH13 */
334
335 #if ! HAVE_MEMCPY_IFUNC
336 /* If we don't use ifunc, define an alias for mem[p]cpy here.
337 Otherwise see sysdeps/s390/mem[p]cpy.c. */
338 strong_alias (MEMCPY_DEFAULT, memcpy)
339 strong_alias (MEMPCPY_DEFAULT, __mempcpy)
340 weak_alias (__mempcpy, mempcpy)
341 #endif
342
343 #if ! HAVE_MEMMOVE_IFUNC
344 /* If we don't use ifunc, define an alias for memmove here.
345 Otherwise see sysdeps/s390/memmove.c. */
346 # if ! HAVE_MEMMOVE_C
347 /* If the c variant is needed, then sysdeps/s390/memmove-c.c
348 defines memmove.
349 Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
350 strong_alias (MEMMOVE_DEFAULT, memmove)
351 # endif
352 #endif
353
354 #if defined SHARED && IS_IN (libc)
355 /* Defines the internal symbols.
356 Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */
357 strong_alias (MEMCPY_DEFAULT, __GI_memcpy)
358 strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy)
359 strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy)
360 # if ! HAVE_MEMMOVE_C
361 /* If the c variant is needed, then sysdeps/s390/memmove-c.c
362 defines the internal symbol.
363 Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
364 strong_alias (MEMMOVE_DEFAULT, __GI_memmove)
365 # endif
366 #endif