]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/arm/memmove.S
RISC-V: Fix `test' operand error with soft-float ABI being configured
[thirdparty/glibc.git] / sysdeps / arm / memmove.S
1 /* Copyright (C) 2006-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Thumb requires excessive IT insns here. */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
24
25 /*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...) code
33 #else
34 #define PLD(code...)
35 #endif
36
37 /*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42 //#define CALGN(code...) code
43 #define CALGN(code...)
44
45 /*
46 * Endian independent macros for shifting bytes within registers.
47 */
48 #ifndef __ARMEB__
49 #define PULL lsr
50 #define PUSH lsl
51 #else
52 #define PULL lsl
53 #define PUSH lsr
54 #endif
55
56 .text
57 .syntax unified
58
59 /*
60 * Prototype: void *memmove(void *dest, const void *src, size_t n);
61 *
62 * Note:
63 *
64 * If the memory regions don't overlap, we simply branch to memcpy which is
65 * normally a bit faster. Otherwise the copy is done going downwards.
66 */
67
68 ENTRY(memmove)
69
70 subs ip, r0, r1
71 cmphi r2, ip
72 #if !IS_IN (libc)
73 bls memcpy
74 #else
75 bls HIDDEN_JUMPTARGET(memcpy)
76 #endif
77
78 push {r0, r4, lr}
79 cfi_adjust_cfa_offset (12)
80 cfi_rel_offset (r4, 4)
81 cfi_rel_offset (lr, 8)
82
83 cfi_remember_state
84
85 add r1, r1, r2
86 add r0, r0, r2
87 subs r2, r2, #4
88 blt 8f
89 ands ip, r0, #3
90 PLD( pld [r1, #-4] )
91 bne 9f
92 ands ip, r1, #3
93 bne 10f
94
95 1: subs r2, r2, #(28)
96 push {r5 - r8}
97 cfi_adjust_cfa_offset (16)
98 cfi_rel_offset (r5, 0)
99 cfi_rel_offset (r6, 4)
100 cfi_rel_offset (r7, 8)
101 cfi_rel_offset (r8, 12)
102 blt 5f
103
104 CALGN( ands ip, r1, #31 )
105 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
106 CALGN( bcs 2f )
107 CALGN( adr r4, 6f )
108 CALGN( subs r2, r2, ip ) @ C is set here
109 #ifndef ARM_ALWAYS_BX
110 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
111 #else
112 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
113 CALGN( bx r4 )
114 #endif
115
116 PLD( pld [r1, #-4] )
117 2: PLD( subs r2, r2, #96 )
118 PLD( pld [r1, #-32] )
119 PLD( blt 4f )
120 PLD( pld [r1, #-64] )
121 PLD( pld [r1, #-96] )
122
123 3: PLD( pld [r1, #-128] )
124 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
125 subs r2, r2, #32
126 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
127 bge 3b
128 PLD( cmn r2, #96 )
129 PLD( bge 4b )
130
131 5: ands ip, r2, #28
132 rsb ip, ip, #32
133 #ifndef ARM_ALWAYS_BX
134 /* C is always clear here. */
135 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
136 b 7f
137 #else
138 beq 7f
139 push {r10}
140 cfi_adjust_cfa_offset (4)
141 cfi_rel_offset (r10, 0)
142 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
143 /* If alignment is not perfect, then there will be some
144 padding (nop) instructions between this BX and label 6.
145 The computation above assumed that two instructions
146 later is exactly the right spot. */
147 add r10, #(6f - (0b + PC_OFS))
148 bx r10
149 #endif
150 .p2align ARM_BX_ALIGN_LOG2
151 6: nop
152 .p2align ARM_BX_ALIGN_LOG2
153 ldr r3, [r1, #-4]!
154 .p2align ARM_BX_ALIGN_LOG2
155 ldr r4, [r1, #-4]!
156 .p2align ARM_BX_ALIGN_LOG2
157 ldr r5, [r1, #-4]!
158 .p2align ARM_BX_ALIGN_LOG2
159 ldr r6, [r1, #-4]!
160 .p2align ARM_BX_ALIGN_LOG2
161 ldr r7, [r1, #-4]!
162 .p2align ARM_BX_ALIGN_LOG2
163 ldr r8, [r1, #-4]!
164 .p2align ARM_BX_ALIGN_LOG2
165 ldr lr, [r1, #-4]!
166
167 #ifndef ARM_ALWAYS_BX
168 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
169 nop
170 #else
171 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
172 /* If alignment is not perfect, then there will be some
173 padding (nop) instructions between this BX and label 66.
174 The computation above assumed that two instructions
175 later is exactly the right spot. */
176 add r10, #(66f - (0b + PC_OFS))
177 bx r10
178 #endif
179 .p2align ARM_BX_ALIGN_LOG2
180 66: nop
181 .p2align ARM_BX_ALIGN_LOG2
182 str r3, [r0, #-4]!
183 .p2align ARM_BX_ALIGN_LOG2
184 str r4, [r0, #-4]!
185 .p2align ARM_BX_ALIGN_LOG2
186 str r5, [r0, #-4]!
187 .p2align ARM_BX_ALIGN_LOG2
188 str r6, [r0, #-4]!
189 .p2align ARM_BX_ALIGN_LOG2
190 str r7, [r0, #-4]!
191 .p2align ARM_BX_ALIGN_LOG2
192 str r8, [r0, #-4]!
193 .p2align ARM_BX_ALIGN_LOG2
194 str lr, [r0, #-4]!
195
196 #ifdef ARM_ALWAYS_BX
197 pop {r10}
198 cfi_adjust_cfa_offset (-4)
199 cfi_restore (r10)
200 #endif
201
202 CALGN( bcs 2b )
203
204 7: pop {r5 - r8}
205 cfi_adjust_cfa_offset (-16)
206 cfi_restore (r5)
207 cfi_restore (r6)
208 cfi_restore (r7)
209 cfi_restore (r8)
210
211 8: movs r2, r2, lsl #31
212 ldrbne r3, [r1, #-1]!
213 ldrbcs r4, [r1, #-1]!
214 ldrbcs ip, [r1, #-1]
215 strbne r3, [r0, #-1]!
216 strbcs r4, [r0, #-1]!
217 strbcs ip, [r0, #-1]
218
219 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
220 || defined (ARM_ALWAYS_BX))
221 pop {r0, r4, lr}
222 cfi_adjust_cfa_offset (-12)
223 cfi_restore (r4)
224 cfi_restore (lr)
225 bx lr
226 #else
227 pop {r0, r4, pc}
228 #endif
229
230 cfi_restore_state
231
232 9: cmp ip, #2
233 ldrbgt r3, [r1, #-1]!
234 ldrbge r4, [r1, #-1]!
235 ldrb lr, [r1, #-1]!
236 strbgt r3, [r0, #-1]!
237 strbge r4, [r0, #-1]!
238 subs r2, r2, ip
239 strb lr, [r0, #-1]!
240 blt 8b
241 ands ip, r1, #3
242 beq 1b
243
244 10: bic r1, r1, #3
245 cmp ip, #2
246 ldr r3, [r1, #0]
247 beq 17f
248 blt 18f
249
250
251 .macro backward_copy_shift push pull
252
253 subs r2, r2, #28
254 blt 14f
255
256 CALGN( ands ip, r1, #31 )
257 CALGN( rsb ip, ip, #32 )
258 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
259 CALGN( subcc r2, r2, ip )
260 CALGN( bcc 15f )
261
262 11: push {r5 - r8, r10}
263 cfi_adjust_cfa_offset (20)
264 cfi_rel_offset (r5, 0)
265 cfi_rel_offset (r6, 4)
266 cfi_rel_offset (r7, 8)
267 cfi_rel_offset (r8, 12)
268 cfi_rel_offset (r10, 16)
269
270 PLD( pld [r1, #-4] )
271 PLD( subs r2, r2, #96 )
272 PLD( pld [r1, #-32] )
273 PLD( blt 13f )
274 PLD( pld [r1, #-64] )
275 PLD( pld [r1, #-96] )
276
277 12: PLD( pld [r1, #-128] )
278 13: ldmdb r1!, {r7, r8, r10, ip}
279 mov lr, r3, PUSH #\push
280 subs r2, r2, #32
281 ldmdb r1!, {r3, r4, r5, r6}
282 orr lr, lr, ip, PULL #\pull
283 mov ip, ip, PUSH #\push
284 orr ip, ip, r10, PULL #\pull
285 mov r10, r10, PUSH #\push
286 orr r10, r10, r8, PULL #\pull
287 mov r8, r8, PUSH #\push
288 orr r8, r8, r7, PULL #\pull
289 mov r7, r7, PUSH #\push
290 orr r7, r7, r6, PULL #\pull
291 mov r6, r6, PUSH #\push
292 orr r6, r6, r5, PULL #\pull
293 mov r5, r5, PUSH #\push
294 orr r5, r5, r4, PULL #\pull
295 mov r4, r4, PUSH #\push
296 orr r4, r4, r3, PULL #\pull
297 stmdb r0!, {r4 - r8, r10, ip, lr}
298 bge 12b
299 PLD( cmn r2, #96 )
300 PLD( bge 13b )
301
302 pop {r5 - r8, r10}
303 cfi_adjust_cfa_offset (-20)
304 cfi_restore (r5)
305 cfi_restore (r6)
306 cfi_restore (r7)
307 cfi_restore (r8)
308 cfi_restore (r10)
309
310 14: ands ip, r2, #28
311 beq 16f
312
313 15: mov lr, r3, PUSH #\push
314 ldr r3, [r1, #-4]!
315 subs ip, ip, #4
316 orr lr, lr, r3, PULL #\pull
317 str lr, [r0, #-4]!
318 bgt 15b
319 CALGN( cmp r2, #0 )
320 CALGN( bge 11b )
321
322 16: add r1, r1, #(\pull / 8)
323 b 8b
324
325 .endm
326
327
328 backward_copy_shift push=8 pull=24
329
330 17: backward_copy_shift push=16 pull=16
331
332 18: backward_copy_shift push=24 pull=8
333
334
335 END(memmove)
336 libc_hidden_builtin_def (memmove)