]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/arm/memcpy.S
3e985dad68dfaf72b8b805118dde407f909ec19c
[thirdparty/glibc.git] / sysdeps / arm / memcpy.S
1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Thumb requires excessive IT insns here. */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
24
25 /*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...) code
33 #else
34 #define PLD(code...)
35 #endif
36
37 /*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42 //#define CALGN(code...) code
43 #define CALGN(code...)
44
45 /*
46 * Endian independent macros for shifting bytes within registers.
47 */
48 #ifndef __ARMEB__
49 #define PULL lsr
50 #define PUSH lsl
51 #else
52 #define PULL lsl
53 #define PUSH lsr
54 #endif
55
56 .text
57 .syntax unified
58
59 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
60
61 ENTRY(memcpy)
62
63 push {r0, r4, lr}
64 cfi_adjust_cfa_offset (12)
65 cfi_rel_offset (r4, 4)
66 cfi_rel_offset (lr, 8)
67
68 cfi_remember_state
69
70 subs r2, r2, #4
71 blt 8f
72 ands ip, r0, #3
73 PLD( sfi_pld r1, #0 )
74 bne 9f
75 ands ip, r1, #3
76 bne 10f
77
78 1: subs r2, r2, #(28)
79 push {r5 - r8}
80 cfi_adjust_cfa_offset (16)
81 cfi_rel_offset (r5, 0)
82 cfi_rel_offset (r6, 4)
83 cfi_rel_offset (r7, 8)
84 cfi_rel_offset (r8, 12)
85 blt 5f
86
87 CALGN( ands ip, r1, #31 )
88 CALGN( rsb r3, ip, #32 )
89 CALGN( sbcsne r4, r3, r2 ) @ C is always set here
90 CALGN( bcs 2f )
91 CALGN( adr r4, 6f )
92 CALGN( subs r2, r2, r3 ) @ C gets set
93 #ifndef ARM_ALWAYS_BX
94 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
95 #else
96 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
97 CALGN( bx r4 )
98 #endif
99
100 PLD( sfi_pld r1, #0 )
101 2: PLD( subs r2, r2, #96 )
102 PLD( sfi_pld r1, #28 )
103 PLD( blt 4f )
104 PLD( sfi_pld r1, #60 )
105 PLD( sfi_pld r1, #92 )
106
107 3: PLD( sfi_pld r1, #124 )
108 4: sfi_breg r1, \
109 ldmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
110 subs r2, r2, #32
111 sfi_breg r0, \
112 stmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
113 bge 3b
114 PLD( cmn r2, #96 )
115 PLD( bge 4b )
116
117 5: ands ip, r2, #28
118 rsb ip, ip, #32
119 #ifndef ARM_ALWAYS_BX
120 /* C is always clear here. */
121 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
122 b 7f
123 #else
124 beq 7f
125 push {r10}
126 cfi_adjust_cfa_offset (4)
127 cfi_rel_offset (r10, 0)
128 add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
129 bx r10
130 #endif
131 .p2align ARM_BX_ALIGN_LOG2
132 6: nop
133 .p2align ARM_BX_ALIGN_LOG2
134 sfi_breg r1, \
135 ldr r3, [\B], #4
136 .p2align ARM_BX_ALIGN_LOG2
137 sfi_breg r1, \
138 ldr r4, [\B], #4
139 .p2align ARM_BX_ALIGN_LOG2
140 sfi_breg r1, \
141 ldr r5, [\B], #4
142 .p2align ARM_BX_ALIGN_LOG2
143 sfi_breg r1, \
144 ldr r6, [\B], #4
145 .p2align ARM_BX_ALIGN_LOG2
146 sfi_breg r1, \
147 ldr r7, [\B], #4
148 .p2align ARM_BX_ALIGN_LOG2
149 sfi_breg r1, \
150 ldr r8, [\B], #4
151 .p2align ARM_BX_ALIGN_LOG2
152 sfi_breg r1, \
153 ldr lr, [\B], #4
154
155 #ifndef ARM_ALWAYS_BX
156 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
157 nop
158 #else
159 add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
160 bx r10
161 #endif
162 .p2align ARM_BX_ALIGN_LOG2
163 nop
164 .p2align ARM_BX_ALIGN_LOG2
165 sfi_breg r0, \
166 str r3, [\B], #4
167 .p2align ARM_BX_ALIGN_LOG2
168 sfi_breg r0, \
169 str r4, [\B], #4
170 .p2align ARM_BX_ALIGN_LOG2
171 sfi_breg r0, \
172 str r5, [\B], #4
173 .p2align ARM_BX_ALIGN_LOG2
174 sfi_breg r0, \
175 str r6, [\B], #4
176 .p2align ARM_BX_ALIGN_LOG2
177 sfi_breg r0, \
178 str r7, [\B], #4
179 .p2align ARM_BX_ALIGN_LOG2
180 sfi_breg r0, \
181 str r8, [\B], #4
182 .p2align ARM_BX_ALIGN_LOG2
183 sfi_breg r0, \
184 str lr, [\B], #4
185
186 #ifdef ARM_ALWAYS_BX
187 pop {r10}
188 cfi_adjust_cfa_offset (-4)
189 cfi_restore (r10)
190 #endif
191
192 CALGN( bcs 2b )
193
194 7: pop {r5 - r8}
195 cfi_adjust_cfa_offset (-16)
196 cfi_restore (r5)
197 cfi_restore (r6)
198 cfi_restore (r7)
199 cfi_restore (r8)
200
201 8: movs r2, r2, lsl #31
202 sfi_breg r1, \
203 ldrbne r3, [\B], #1
204 sfi_breg r1, \
205 ldrbcs r4, [\B], #1
206 sfi_breg r1, \
207 ldrbcs ip, [\B]
208 sfi_breg r0, \
209 strbne r3, [\B], #1
210 sfi_breg r0, \
211 strbcs r4, [\B], #1
212 sfi_breg r0, \
213 strbcs ip, [\B]
214
215 #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
216 || defined (ARM_ALWAYS_BX))
217 pop {r0, r4, lr}
218 cfi_adjust_cfa_offset (-12)
219 cfi_restore (r4)
220 cfi_restore (lr)
221 bx lr
222 #else
223 pop {r0, r4, pc}
224 #endif
225
226 cfi_restore_state
227
228 9: rsb ip, ip, #4
229 cmp ip, #2
230 sfi_breg r1, \
231 ldrbgt r3, [\B], #1
232 sfi_breg r1, \
233 ldrbge r4, [\B], #1
234 sfi_breg r1, \
235 ldrb lr, [\B], #1
236 sfi_breg r0, \
237 strbgt r3, [\B], #1
238 sfi_breg r0, \
239 strbge r4, [\B], #1
240 subs r2, r2, ip
241 sfi_breg r0, \
242 strb lr, [\B], #1
243 blt 8b
244 ands ip, r1, #3
245 beq 1b
246
247 10: bic r1, r1, #3
248 cmp ip, #2
249 sfi_breg r1, \
250 ldr lr, [\B], #4
251 beq 17f
252 bgt 18f
253
254
255 .macro forward_copy_shift pull push
256
257 subs r2, r2, #28
258 blt 14f
259
260 CALGN( ands ip, r1, #31 )
261 CALGN( rsb ip, ip, #32 )
262 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
263 CALGN( subcc r2, r2, ip )
264 CALGN( bcc 15f )
265
266 11: push {r5 - r8, r10}
267 cfi_adjust_cfa_offset (20)
268 cfi_rel_offset (r5, 0)
269 cfi_rel_offset (r6, 4)
270 cfi_rel_offset (r7, 8)
271 cfi_rel_offset (r8, 12)
272 cfi_rel_offset (r10, 16)
273
274 PLD( sfi_pld r1, #0 )
275 PLD( subs r2, r2, #96 )
276 PLD( sfi_pld r1, #28 )
277 PLD( blt 13f )
278 PLD( sfi_pld r1, #60 )
279 PLD( sfi_pld r1, #92 )
280
281 12: PLD( sfi_pld r1, #124 )
282 13: sfi_breg r1, \
283 ldmia \B!, {r4, r5, r6, r7}
284 mov r3, lr, PULL #\pull
285 subs r2, r2, #32
286 sfi_breg r1, \
287 ldmia \B!, {r8, r10, ip, lr}
288 orr r3, r3, r4, PUSH #\push
289 mov r4, r4, PULL #\pull
290 orr r4, r4, r5, PUSH #\push
291 mov r5, r5, PULL #\pull
292 orr r5, r5, r6, PUSH #\push
293 mov r6, r6, PULL #\pull
294 orr r6, r6, r7, PUSH #\push
295 mov r7, r7, PULL #\pull
296 orr r7, r7, r8, PUSH #\push
297 mov r8, r8, PULL #\pull
298 orr r8, r8, r10, PUSH #\push
299 mov r10, r10, PULL #\pull
300 orr r10, r10, ip, PUSH #\push
301 mov ip, ip, PULL #\pull
302 orr ip, ip, lr, PUSH #\push
303 sfi_breg r0, \
304 stmia \B!, {r3, r4, r5, r6, r7, r8, r10, ip}
305 bge 12b
306 PLD( cmn r2, #96 )
307 PLD( bge 13b )
308
309 pop {r5 - r8, r10}
310 cfi_adjust_cfa_offset (-20)
311 cfi_restore (r5)
312 cfi_restore (r6)
313 cfi_restore (r7)
314 cfi_restore (r8)
315 cfi_restore (r10)
316
317 14: ands ip, r2, #28
318 beq 16f
319
320 15: mov r3, lr, PULL #\pull
321 sfi_breg r1, \
322 ldr lr, [\B], #4
323 subs ip, ip, #4
324 orr r3, r3, lr, PUSH #\push
325 sfi_breg r0, \
326 str r3, [\B], #4
327 bgt 15b
328 CALGN( cmp r2, #0 )
329 CALGN( bge 11b )
330
331 16: sub r1, r1, #(\push / 8)
332 b 8b
333
334 .endm
335
336
337 forward_copy_shift pull=8 push=24
338
339 17: forward_copy_shift pull=16 push=16
340
341 18: forward_copy_shift pull=24 push=8
342
343 END(memcpy)
344 libc_hidden_builtin_def (memcpy)