]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/arm/memcpy.S
cba8609813c4cf0ff6f28ca5ff28c8d0e91d3c30
[thirdparty/glibc.git] / sysdeps / arm / memcpy.S
1 /* Copyright (C) 2006-2018 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Thumb requires excessive IT insns here. */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
24
25 /*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...) code
33 #else
34 #define PLD(code...)
35 #endif
36
37 /*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42 //#define CALGN(code...) code
43 #define CALGN(code...)
44
45 /*
46 * Endian independent macros for shifting bytes within registers.
47 */
48 #ifndef __ARMEB__
49 #define PULL lsr
50 #define PUSH lsl
51 #else
52 #define PULL lsl
53 #define PUSH lsr
54 #endif
55
56 .text
57 .syntax unified
58
59 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
60
61 ENTRY(memcpy)
62
63 push {r0, r4, lr}
64 cfi_adjust_cfa_offset (12)
65 cfi_rel_offset (r4, 4)
66 cfi_rel_offset (lr, 8)
67
68 cfi_remember_state
69
70 subs r2, r2, #4
71 blt 8f
72 ands ip, r0, #3
73 PLD( pld [r1, #0] )
74 bne 9f
75 ands ip, r1, #3
76 bne 10f
77
78 1: subs r2, r2, #(28)
79 push {r5 - r8}
80 cfi_adjust_cfa_offset (16)
81 cfi_rel_offset (r5, 0)
82 cfi_rel_offset (r6, 4)
83 cfi_rel_offset (r7, 8)
84 cfi_rel_offset (r8, 12)
85 blt 5f
86
87 CALGN( ands ip, r1, #31 )
88 CALGN( rsb r3, ip, #32 )
89 CALGN( sbcsne r4, r3, r2 ) @ C is always set here
90 CALGN( bcs 2f )
91 CALGN( adr r4, 6f )
92 CALGN( subs r2, r2, r3 ) @ C gets set
93 #ifndef ARM_ALWAYS_BX
94 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
95 #else
96 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
97 CALGN( bx r4 )
98 #endif
99
100 PLD( pld [r1, #0] )
101 2: PLD( subs r2, r2, #96 )
102 PLD( pld [r1, #28] )
103 PLD( blt 4f )
104 PLD( pld [r1, #60] )
105 PLD( pld [r1, #92] )
106
107 3: PLD( pld [r1, #124] )
108 4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
109 subs r2, r2, #32
110 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
111 bge 3b
112 PLD( cmn r2, #96 )
113 PLD( bge 4b )
114
115 5: ands ip, r2, #28
116 rsb ip, ip, #32
117 #ifndef ARM_ALWAYS_BX
118 /* C is always clear here. */
119 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
120 b 7f
121 #else
122 beq 7f
123 push {r10}
124 cfi_adjust_cfa_offset (4)
125 cfi_rel_offset (r10, 0)
126 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
127 /* If alignment is not perfect, then there will be some
128 padding (nop) instructions between this BX and label 6.
129 The computation above assumed that two instructions
130 later is exactly the right spot. */
131 add r10, #(6f - (0b + PC_OFS))
132 bx r10
133 #endif
134 .p2align ARM_BX_ALIGN_LOG2
135 6: nop
136 .p2align ARM_BX_ALIGN_LOG2
137 ldr r3, [r1], #4
138 .p2align ARM_BX_ALIGN_LOG2
139 ldr r4, [r1], #4
140 .p2align ARM_BX_ALIGN_LOG2
141 ldr r5, [r1], #4
142 .p2align ARM_BX_ALIGN_LOG2
143 ldr r6, [r1], #4
144 .p2align ARM_BX_ALIGN_LOG2
145 ldr r7, [r1], #4
146 .p2align ARM_BX_ALIGN_LOG2
147 ldr r8, [r1], #4
148 .p2align ARM_BX_ALIGN_LOG2
149 ldr lr, [r1], #4
150
151 #ifndef ARM_ALWAYS_BX
152 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
153 nop
154 #else
155 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
156 /* If alignment is not perfect, then there will be some
157 padding (nop) instructions between this BX and label 66.
158 The computation above assumed that two instructions
159 later is exactly the right spot. */
160 add r10, #(66f - (0b + PC_OFS))
161 bx r10
162 #endif
163 .p2align ARM_BX_ALIGN_LOG2
164 66: nop
165 .p2align ARM_BX_ALIGN_LOG2
166 str r3, [r0], #4
167 .p2align ARM_BX_ALIGN_LOG2
168 str r4, [r0], #4
169 .p2align ARM_BX_ALIGN_LOG2
170 str r5, [r0], #4
171 .p2align ARM_BX_ALIGN_LOG2
172 str r6, [r0], #4
173 .p2align ARM_BX_ALIGN_LOG2
174 str r7, [r0], #4
175 .p2align ARM_BX_ALIGN_LOG2
176 str r8, [r0], #4
177 .p2align ARM_BX_ALIGN_LOG2
178 str lr, [r0], #4
179
180 #ifdef ARM_ALWAYS_BX
181 pop {r10}
182 cfi_adjust_cfa_offset (-4)
183 cfi_restore (r10)
184 #endif
185
186 CALGN( bcs 2b )
187
188 7: pop {r5 - r8}
189 cfi_adjust_cfa_offset (-16)
190 cfi_restore (r5)
191 cfi_restore (r6)
192 cfi_restore (r7)
193 cfi_restore (r8)
194
195 8: movs r2, r2, lsl #31
196 ldrbne r3, [r1], #1
197 ldrbcs r4, [r1], #1
198 ldrbcs ip, [r1]
199 strbne r3, [r0], #1
200 strbcs r4, [r0], #1
201 strbcs ip, [r0]
202
203 #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
204 || defined (ARM_ALWAYS_BX))
205 pop {r0, r4, lr}
206 cfi_adjust_cfa_offset (-12)
207 cfi_restore (r4)
208 cfi_restore (lr)
209 bx lr
210 #else
211 pop {r0, r4, pc}
212 #endif
213
214 cfi_restore_state
215
216 9: rsb ip, ip, #4
217 cmp ip, #2
218 ldrbgt r3, [r1], #1
219 ldrbge r4, [r1], #1
220 ldrb lr, [r1], #1
221 strbgt r3, [r0], #1
222 strbge r4, [r0], #1
223 subs r2, r2, ip
224 strb lr, [r0], #1
225 blt 8b
226 ands ip, r1, #3
227 beq 1b
228
229 10: bic r1, r1, #3
230 cmp ip, #2
231 ldr lr, [r1], #4
232 beq 17f
233 bgt 18f
234
235
236 .macro forward_copy_shift pull push
237
238 subs r2, r2, #28
239 blt 14f
240
241 CALGN( ands ip, r1, #31 )
242 CALGN( rsb ip, ip, #32 )
243 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
244 CALGN( subcc r2, r2, ip )
245 CALGN( bcc 15f )
246
247 11: push {r5 - r8, r10}
248 cfi_adjust_cfa_offset (20)
249 cfi_rel_offset (r5, 0)
250 cfi_rel_offset (r6, 4)
251 cfi_rel_offset (r7, 8)
252 cfi_rel_offset (r8, 12)
253 cfi_rel_offset (r10, 16)
254
255 PLD( pld [r1, #0] )
256 PLD( subs r2, r2, #96 )
257 PLD( pld [r1, #28] )
258 PLD( blt 13f )
259 PLD( pld [r1, #60] )
260 PLD( pld [r1, #92] )
261
262 12: PLD( pld [r1, #124] )
263 13: ldmia r1!, {r4, r5, r6, r7}
264 mov r3, lr, PULL #\pull
265 subs r2, r2, #32
266 ldmia r1!, {r8, r10, ip, lr}
267 orr r3, r3, r4, PUSH #\push
268 mov r4, r4, PULL #\pull
269 orr r4, r4, r5, PUSH #\push
270 mov r5, r5, PULL #\pull
271 orr r5, r5, r6, PUSH #\push
272 mov r6, r6, PULL #\pull
273 orr r6, r6, r7, PUSH #\push
274 mov r7, r7, PULL #\pull
275 orr r7, r7, r8, PUSH #\push
276 mov r8, r8, PULL #\pull
277 orr r8, r8, r10, PUSH #\push
278 mov r10, r10, PULL #\pull
279 orr r10, r10, ip, PUSH #\push
280 mov ip, ip, PULL #\pull
281 orr ip, ip, lr, PUSH #\push
282 stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
283 bge 12b
284 PLD( cmn r2, #96 )
285 PLD( bge 13b )
286
287 pop {r5 - r8, r10}
288 cfi_adjust_cfa_offset (-20)
289 cfi_restore (r5)
290 cfi_restore (r6)
291 cfi_restore (r7)
292 cfi_restore (r8)
293 cfi_restore (r10)
294
295 14: ands ip, r2, #28
296 beq 16f
297
298 15: mov r3, lr, PULL #\pull
299 ldr lr, [r1], #4
300 subs ip, ip, #4
301 orr r3, r3, lr, PUSH #\push
302 str r3, [r0], #4
303 bgt 15b
304 CALGN( cmp r2, #0 )
305 CALGN( bge 11b )
306
307 16: sub r1, r1, #(\push / 8)
308 b 8b
309
310 .endm
311
312
313 forward_copy_shift pull=8 push=24
314
315 17: forward_copy_shift pull=16 push=16
316
317 18: forward_copy_shift pull=24 push=8
318
319 END(memcpy)
320 libc_hidden_builtin_def (memcpy)