]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/arm/memmove.S
iconv, localedef: avoid floating point rounding differences [BZ #24372]
[thirdparty/glibc.git] / sysdeps / arm / memmove.S
CommitLineData
04277e02 1/* Copyright (C) 2006-2019 Free Software Foundation, Inc.
0572b91b
DJ
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
ab84e3ff
PE
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
0572b91b 19
365261c3
RH
20/* Thumb requires excessive IT insns here. */
21#define NO_THUMB
0572b91b 22#include <sysdep.h>
9e1d4ac9 23#include <arm-features.h>
0572b91b
DJ
24
25/*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32#define PLD(code...) code
33#else
34#define PLD(code...)
35#endif
36
37/*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42//#define CALGN(code...) code
43#define CALGN(code...)
44
45/*
46 * Endian independent macros for shifting bytes within registers.
47 */
48#ifndef __ARMEB__
55668624
RH
49#define PULL lsr
50#define PUSH lsl
0572b91b 51#else
55668624
RH
52#define PULL lsl
53#define PUSH lsr
0572b91b
DJ
54#endif
55
56 .text
38435a9a 57 .syntax unified
0572b91b
DJ
58
59/*
60 * Prototype: void *memmove(void *dest, const void *src, size_t n);
61 *
62 * Note:
63 *
64 * If the memory regions don't overlap, we simply branch to memcpy which is
65 * normally a bit faster. Otherwise the copy is done going downwards.
66 */
67
68ENTRY(memmove)
69
70 subs ip, r0, r1
71 cmphi r2, ip
4f41c682 72#if !IS_IN (libc)
0572b91b 73 bls memcpy
5de92c17
JM
74#else
75 bls HIDDEN_JUMPTARGET(memcpy)
76#endif
0572b91b 77
55668624 78 push {r0, r4, lr}
01b32e73
TS
79 cfi_adjust_cfa_offset (12)
80 cfi_rel_offset (r4, 4)
81 cfi_rel_offset (lr, 8)
82
83 cfi_remember_state
84
0572b91b
DJ
85 add r1, r1, r2
86 add r0, r0, r2
87 subs r2, r2, #4
88 blt 8f
89 ands ip, r0, #3
81cb7a0b 90 PLD( pld [r1, #-4] )
0572b91b
DJ
91 bne 9f
92 ands ip, r1, #3
93 bne 10f
94
951: subs r2, r2, #(28)
55668624 96 push {r5 - r8}
01b32e73
TS
97 cfi_adjust_cfa_offset (16)
98 cfi_rel_offset (r5, 0)
99 cfi_rel_offset (r6, 4)
100 cfi_rel_offset (r7, 8)
101 cfi_rel_offset (r8, 12)
0572b91b
DJ
102 blt 5f
103
104 CALGN( ands ip, r1, #31 )
38435a9a 105 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
0572b91b
DJ
106 CALGN( bcs 2f )
107 CALGN( adr r4, 6f )
108 CALGN( subs r2, r2, ip ) @ C is set here
9e1d4ac9 109#ifndef ARM_ALWAYS_BX
bb48a26a 110 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
9e1d4ac9 111#else
bb48a26a 112 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
9e1d4ac9
RM
113 CALGN( bx r4 )
114#endif
0572b91b 115
81cb7a0b 116 PLD( pld [r1, #-4] )
0572b91b 1172: PLD( subs r2, r2, #96 )
81cb7a0b 118 PLD( pld [r1, #-32] )
0572b91b 119 PLD( blt 4f )
81cb7a0b
ZW
120 PLD( pld [r1, #-64] )
121 PLD( pld [r1, #-96] )
0572b91b 122
81cb7a0b
ZW
1233: PLD( pld [r1, #-128] )
1244: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
0572b91b 125 subs r2, r2, #32
81cb7a0b 126 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
0572b91b
DJ
127 bge 3b
128 PLD( cmn r2, #96 )
129 PLD( bge 4b )
130
1315: ands ip, r2, #28
132 rsb ip, ip, #32
9e1d4ac9 133#ifndef ARM_ALWAYS_BX
bb48a26a
RM
134 /* C is always clear here. */
135 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
0572b91b 136 b 7f
9e1d4ac9
RM
137#else
138 beq 7f
139 push {r10}
140 cfi_adjust_cfa_offset (4)
141 cfi_rel_offset (r10, 0)
298e5d56
RM
1420: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
143 /* If alignment is not perfect, then there will be some
144 padding (nop) instructions between this BX and label 6.
145 The computation above assumed that two instructions
146 later is exactly the right spot. */
147 add r10, #(6f - (0b + PC_OFS))
9e1d4ac9
RM
148 bx r10
149#endif
bb48a26a 150 .p2align ARM_BX_ALIGN_LOG2
0572b91b 1516: nop
bb48a26a 152 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 153 ldr r3, [r1, #-4]!
bb48a26a 154 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 155 ldr r4, [r1, #-4]!
bb48a26a 156 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 157 ldr r5, [r1, #-4]!
bb48a26a 158 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 159 ldr r6, [r1, #-4]!
bb48a26a 160 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 161 ldr r7, [r1, #-4]!
bb48a26a 162 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 163 ldr r8, [r1, #-4]!
bb48a26a 164 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 165 ldr lr, [r1, #-4]!
0572b91b 166
9e1d4ac9 167#ifndef ARM_ALWAYS_BX
bb48a26a 168 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
0572b91b 169 nop
9e1d4ac9 170#else
298e5d56
RM
1710: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
172 /* If alignment is not perfect, then there will be some
173 padding (nop) instructions between this BX and label 66.
174 The computation above assumed that two instructions
175 later is exactly the right spot. */
176 add r10, #(66f - (0b + PC_OFS))
9e1d4ac9
RM
177 bx r10
178#endif
bb48a26a 179 .p2align ARM_BX_ALIGN_LOG2
298e5d56 18066: nop
bb48a26a 181 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 182 str r3, [r0, #-4]!
bb48a26a 183 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 184 str r4, [r0, #-4]!
bb48a26a 185 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 186 str r5, [r0, #-4]!
bb48a26a 187 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 188 str r6, [r0, #-4]!
bb48a26a 189 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 190 str r7, [r0, #-4]!
bb48a26a 191 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 192 str r8, [r0, #-4]!
bb48a26a 193 .p2align ARM_BX_ALIGN_LOG2
81cb7a0b 194 str lr, [r0, #-4]!
0572b91b 195
9e1d4ac9
RM
196#ifdef ARM_ALWAYS_BX
197 pop {r10}
198 cfi_adjust_cfa_offset (-4)
199 cfi_restore (r10)
200#endif
201
0572b91b
DJ
202 CALGN( bcs 2b )
203
55668624 2047: pop {r5 - r8}
01b32e73
TS
205 cfi_adjust_cfa_offset (-16)
206 cfi_restore (r5)
207 cfi_restore (r6)
208 cfi_restore (r7)
209 cfi_restore (r8)
0572b91b
DJ
210
2118: movs r2, r2, lsl #31
81cb7a0b
ZW
212 ldrbne r3, [r1, #-1]!
213 ldrbcs r4, [r1, #-1]!
214 ldrbcs ip, [r1, #-1]
215 strbne r3, [r0, #-1]!
216 strbcs r4, [r0, #-1]!
217 strbcs ip, [r0, #-1]
01b32e73 218
9e1d4ac9
RM
219#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
220 || defined (ARM_ALWAYS_BX))
55668624 221 pop {r0, r4, lr}
01b32e73
TS
222 cfi_adjust_cfa_offset (-12)
223 cfi_restore (r4)
224 cfi_restore (lr)
b2b2415f
PB
225 bx lr
226#else
55668624 227 pop {r0, r4, pc}
b2b2415f 228#endif
0572b91b 229
01b32e73
TS
230 cfi_restore_state
231
0572b91b 2329: cmp ip, #2
81cb7a0b
ZW
233 ldrbgt r3, [r1, #-1]!
234 ldrbge r4, [r1, #-1]!
235 ldrb lr, [r1, #-1]!
236 strbgt r3, [r0, #-1]!
237 strbge r4, [r0, #-1]!
0572b91b 238 subs r2, r2, ip
81cb7a0b 239 strb lr, [r0, #-1]!
0572b91b
DJ
240 blt 8b
241 ands ip, r1, #3
242 beq 1b
243
24410: bic r1, r1, #3
245 cmp ip, #2
81cb7a0b 246 ldr r3, [r1, #0]
0572b91b
DJ
247 beq 17f
248 blt 18f
249
250
251 .macro backward_copy_shift push pull
252
253 subs r2, r2, #28
254 blt 14f
255
256 CALGN( ands ip, r1, #31 )
257 CALGN( rsb ip, ip, #32 )
38435a9a 258 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
0572b91b
DJ
259 CALGN( subcc r2, r2, ip )
260 CALGN( bcc 15f )
261
791de446 26211: push {r5 - r8, r10}
01b32e73
TS
263 cfi_adjust_cfa_offset (20)
264 cfi_rel_offset (r5, 0)
265 cfi_rel_offset (r6, 4)
266 cfi_rel_offset (r7, 8)
267 cfi_rel_offset (r8, 12)
791de446 268 cfi_rel_offset (r10, 16)
0572b91b 269
81cb7a0b 270 PLD( pld [r1, #-4] )
0572b91b 271 PLD( subs r2, r2, #96 )
81cb7a0b 272 PLD( pld [r1, #-32] )
0572b91b 273 PLD( blt 13f )
81cb7a0b
ZW
274 PLD( pld [r1, #-64] )
275 PLD( pld [r1, #-96] )
0572b91b 276
81cb7a0b
ZW
27712: PLD( pld [r1, #-128] )
27813: ldmdb r1!, {r7, r8, r10, ip}
55668624 279 mov lr, r3, PUSH #\push
0572b91b 280 subs r2, r2, #32
81cb7a0b 281 ldmdb r1!, {r3, r4, r5, r6}
55668624
RH
282 orr lr, lr, ip, PULL #\pull
283 mov ip, ip, PUSH #\push
791de446
RM
284 orr ip, ip, r10, PULL #\pull
285 mov r10, r10, PUSH #\push
286 orr r10, r10, r8, PULL #\pull
55668624
RH
287 mov r8, r8, PUSH #\push
288 orr r8, r8, r7, PULL #\pull
289 mov r7, r7, PUSH #\push
290 orr r7, r7, r6, PULL #\pull
291 mov r6, r6, PUSH #\push
292 orr r6, r6, r5, PULL #\pull
293 mov r5, r5, PUSH #\push
294 orr r5, r5, r4, PULL #\pull
295 mov r4, r4, PUSH #\push
296 orr r4, r4, r3, PULL #\pull
81cb7a0b 297 stmdb r0!, {r4 - r8, r10, ip, lr}
0572b91b
DJ
298 bge 12b
299 PLD( cmn r2, #96 )
300 PLD( bge 13b )
301
791de446 302 pop {r5 - r8, r10}
01b32e73
TS
303 cfi_adjust_cfa_offset (-20)
304 cfi_restore (r5)
305 cfi_restore (r6)
306 cfi_restore (r7)
307 cfi_restore (r8)
791de446 308 cfi_restore (r10)
0572b91b
DJ
309
31014: ands ip, r2, #28
311 beq 16f
312
55668624 31315: mov lr, r3, PUSH #\push
81cb7a0b 314 ldr r3, [r1, #-4]!
0572b91b 315 subs ip, ip, #4
55668624 316 orr lr, lr, r3, PULL #\pull
81cb7a0b 317 str lr, [r0, #-4]!
0572b91b
DJ
318 bgt 15b
319 CALGN( cmp r2, #0 )
320 CALGN( bge 11b )
321
32216: add r1, r1, #(\pull / 8)
323 b 8b
324
325 .endm
326
327
328 backward_copy_shift push=8 pull=24
329
33017: backward_copy_shift push=16 pull=16
331
33218: backward_copy_shift push=24 pull=8
333
334
335END(memmove)
336libc_hidden_builtin_def (memmove)