]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/arm/memcpy.S
RISC-V: Fix `test' operand error with soft-float ABI being configured
[thirdparty/glibc.git] / sysdeps / arm / memcpy.S
CommitLineData
04277e02 1/* Copyright (C) 2006-2019 Free Software Foundation, Inc.
0572b91b
DJ
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
ab84e3ff
PE
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
0572b91b 19
365261c3
RH
20/* Thumb requires excessive IT insns here. */
21#define NO_THUMB
0572b91b 22#include <sysdep.h>
9e1d4ac9 23#include <arm-features.h>
0572b91b
DJ
24
25/*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32#define PLD(code...) code
33#else
34#define PLD(code...)
35#endif
36
37/*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42//#define CALGN(code...) code
43#define CALGN(code...)
44
45/*
46 * Endian independent macros for shifting bytes within registers.
47 */
48#ifndef __ARMEB__
55668624
RH
49#define PULL lsr
50#define PUSH lsl
0572b91b 51#else
55668624
RH
52#define PULL lsl
53#define PUSH lsr
0572b91b
DJ
54#endif
55
56 .text
38435a9a 57 .syntax unified
0572b91b
DJ
58
59/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
60
61ENTRY(memcpy)
62
55668624 63 push {r0, r4, lr}
01b32e73
TS
64 cfi_adjust_cfa_offset (12)
65 cfi_rel_offset (r4, 4)
66 cfi_rel_offset (lr, 8)
67
68 cfi_remember_state
0572b91b
DJ
69
70 subs r2, r2, #4
71 blt 8f
72 ands ip, r0, #3
81cb7a0b 73 PLD( pld [r1, #0] )
0572b91b
DJ
74 bne 9f
75 ands ip, r1, #3
76 bne 10f
77
781: subs r2, r2, #(28)
55668624 79 push {r5 - r8}
01b32e73
TS
80 cfi_adjust_cfa_offset (16)
81 cfi_rel_offset (r5, 0)
82 cfi_rel_offset (r6, 4)
83 cfi_rel_offset (r7, 8)
84 cfi_rel_offset (r8, 12)
0572b91b
DJ
85 blt 5f
86
87 CALGN( ands ip, r1, #31 )
88 CALGN( rsb r3, ip, #32 )
38435a9a 89 CALGN( sbcsne r4, r3, r2 ) @ C is always set here
0572b91b
DJ
90 CALGN( bcs 2f )
91 CALGN( adr r4, 6f )
92 CALGN( subs r2, r2, r3 ) @ C gets set
9e1d4ac9 93#ifndef ARM_ALWAYS_BX
bb48a26a 94 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
9e1d4ac9 95#else
bb48a26a 96 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
9e1d4ac9
RM
97 CALGN( bx r4 )
98#endif
0572b91b 99
81cb7a0b 100 PLD( pld [r1, #0] )
0572b91b 1012: PLD( subs r2, r2, #96 )
81cb7a0b 102 PLD( pld [r1, #28] )
0572b91b 103 PLD( blt 4f )
81cb7a0b
ZW
104 PLD( pld [r1, #60] )
105 PLD( pld [r1, #92] )
0572b91b 106
81cb7a0b
ZW
1073: PLD( pld [r1, #124] )
1084: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
0572b91b 109 subs r2, r2, #32
81cb7a0b 110 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
0572b91b
DJ
111 bge 3b
112 PLD( cmn r2, #96 )
113 PLD( bge 4b )
114
1155: ands ip, r2, #28
116 rsb ip, ip, #32
9e1d4ac9 117#ifndef ARM_ALWAYS_BX
bb48a26a
RM
118 /* C is always clear here. */
119 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
0572b91b 120 b 7f
9e1d4ac9
RM
121#else
122 beq 7f
123 push {r10}
124 cfi_adjust_cfa_offset (4)
125 cfi_rel_offset (r10, 0)
298e5d56
RM
1260: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
127 /* If alignment is not perfect, then there will be some
128 padding (nop) instructions between this BX and label 6.
129 The computation above assumed that two instructions
130 later is exactly the right spot. */
131 add r10, #(6f - (0b + PC_OFS))
9e1d4ac9
RM
132 bx r10
133#endif
bb48a26a 134 .p2align ARM_BX_ALIGN_LOG2
0572b91b 1356: nop
bb48a26a 136 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 137 ldr r3, [r1], #4
bb48a26a 138 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 139 ldr r4, [r1], #4
bb48a26a 140 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 141 ldr r5, [r1], #4
bb48a26a 142 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 143 ldr r6, [r1], #4
bb48a26a 144 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 145 ldr r7, [r1], #4
bb48a26a 146 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 147 ldr r8, [r1], #4
bb48a26a 148 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 149 ldr lr, [r1], #4
0572b91b 150
9e1d4ac9 151#ifndef ARM_ALWAYS_BX
bb48a26a 152 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
0572b91b 153 nop
9e1d4ac9 154#else
298e5d56
RM
1550: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
156 /* If alignment is not perfect, then there will be some
157 padding (nop) instructions between this BX and label 66.
158 The computation above assumed that two instructions
159 later is exactly the right spot. */
160 add r10, #(66f - (0b + PC_OFS))
9e1d4ac9
RM
161 bx r10
162#endif
bb48a26a 163 .p2align ARM_BX_ALIGN_LOG2
298e5d56 16466: nop
bb48a26a 165 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 166 str r3, [r0], #4
bb48a26a 167 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 168 str r4, [r0], #4
bb48a26a 169 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 170 str r5, [r0], #4
bb48a26a 171 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 172 str r6, [r0], #4
bb48a26a 173 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 174 str r7, [r0], #4
bb48a26a 175 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 176 str r8, [r0], #4
bb48a26a 177 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 178 str lr, [r0], #4
0572b91b 179
9e1d4ac9
RM
180#ifdef ARM_ALWAYS_BX
181 pop {r10}
182 cfi_adjust_cfa_offset (-4)
183 cfi_restore (r10)
184#endif
185
0572b91b
DJ
186 CALGN( bcs 2b )
187
55668624 1887: pop {r5 - r8}
01b32e73
TS
189 cfi_adjust_cfa_offset (-16)
190 cfi_restore (r5)
191 cfi_restore (r6)
192 cfi_restore (r7)
193 cfi_restore (r8)
0572b91b
DJ
194
1958: movs r2, r2, lsl #31
81cb7a0b
ZW
196 ldrbne r3, [r1], #1
197 ldrbcs r4, [r1], #1
198 ldrbcs ip, [r1]
199 strbne r3, [r0], #1
200 strbcs r4, [r0], #1
201 strbcs ip, [r0]
0572b91b 202
9e1d4ac9
RM
203#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
204 || defined (ARM_ALWAYS_BX))
55668624 205 pop {r0, r4, lr}
01b32e73
TS
206 cfi_adjust_cfa_offset (-12)
207 cfi_restore (r4)
208 cfi_restore (lr)
b2b2415f
PB
209 bx lr
210#else
55668624 211 pop {r0, r4, pc}
b2b2415f 212#endif
0572b91b 213
01b32e73
TS
214 cfi_restore_state
215
0572b91b
DJ
2169: rsb ip, ip, #4
217 cmp ip, #2
81cb7a0b
ZW
218 ldrbgt r3, [r1], #1
219 ldrbge r4, [r1], #1
220 ldrb lr, [r1], #1
221 strbgt r3, [r0], #1
222 strbge r4, [r0], #1
0572b91b 223 subs r2, r2, ip
81cb7a0b 224 strb lr, [r0], #1
0572b91b
DJ
225 blt 8b
226 ands ip, r1, #3
227 beq 1b
228
22910: bic r1, r1, #3
230 cmp ip, #2
81cb7a0b 231 ldr lr, [r1], #4
0572b91b
DJ
232 beq 17f
233 bgt 18f
234
235
236 .macro forward_copy_shift pull push
237
238 subs r2, r2, #28
239 blt 14f
240
241 CALGN( ands ip, r1, #31 )
242 CALGN( rsb ip, ip, #32 )
38435a9a 243 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
0572b91b
DJ
244 CALGN( subcc r2, r2, ip )
245 CALGN( bcc 15f )
246
791de446 24711: push {r5 - r8, r10}
01b32e73
TS
248 cfi_adjust_cfa_offset (20)
249 cfi_rel_offset (r5, 0)
250 cfi_rel_offset (r6, 4)
251 cfi_rel_offset (r7, 8)
252 cfi_rel_offset (r8, 12)
791de446 253 cfi_rel_offset (r10, 16)
0572b91b 254
81cb7a0b 255 PLD( pld [r1, #0] )
0572b91b 256 PLD( subs r2, r2, #96 )
81cb7a0b 257 PLD( pld [r1, #28] )
0572b91b 258 PLD( blt 13f )
81cb7a0b
ZW
259 PLD( pld [r1, #60] )
260 PLD( pld [r1, #92] )
0572b91b 261
81cb7a0b
ZW
26212: PLD( pld [r1, #124] )
26313: ldmia r1!, {r4, r5, r6, r7}
55668624 264 mov r3, lr, PULL #\pull
0572b91b 265 subs r2, r2, #32
81cb7a0b 266 ldmia r1!, {r8, r10, ip, lr}
55668624
RH
267 orr r3, r3, r4, PUSH #\push
268 mov r4, r4, PULL #\pull
269 orr r4, r4, r5, PUSH #\push
270 mov r5, r5, PULL #\pull
271 orr r5, r5, r6, PUSH #\push
272 mov r6, r6, PULL #\pull
273 orr r6, r6, r7, PUSH #\push
274 mov r7, r7, PULL #\pull
275 orr r7, r7, r8, PUSH #\push
276 mov r8, r8, PULL #\pull
791de446
RM
277 orr r8, r8, r10, PUSH #\push
278 mov r10, r10, PULL #\pull
279 orr r10, r10, ip, PUSH #\push
55668624
RH
280 mov ip, ip, PULL #\pull
281 orr ip, ip, lr, PUSH #\push
81cb7a0b 282 stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
0572b91b
DJ
283 bge 12b
284 PLD( cmn r2, #96 )
285 PLD( bge 13b )
286
791de446 287 pop {r5 - r8, r10}
01b32e73
TS
288 cfi_adjust_cfa_offset (-20)
289 cfi_restore (r5)
290 cfi_restore (r6)
291 cfi_restore (r7)
292 cfi_restore (r8)
791de446 293 cfi_restore (r10)
0572b91b
DJ
294
29514: ands ip, r2, #28
296 beq 16f
297
55668624 29815: mov r3, lr, PULL #\pull
81cb7a0b 299 ldr lr, [r1], #4
0572b91b 300 subs ip, ip, #4
55668624 301 orr r3, r3, lr, PUSH #\push
81cb7a0b 302 str r3, [r0], #4
0572b91b
DJ
303 bgt 15b
304 CALGN( cmp r2, #0 )
305 CALGN( bge 11b )
306
30716: sub r1, r1, #(\push / 8)
308 b 8b
309
310 .endm
311
312
313 forward_copy_shift pull=8 push=24
314
31517: forward_copy_shift pull=16 push=16
316
31718: forward_copy_shift pull=24 push=8
318
319END(memcpy)
320libc_hidden_builtin_def (memcpy)