]>
Commit | Line | Data |
---|---|---|
04277e02 | 1 | /* Copyright (C) 2006-2019 Free Software Foundation, Inc. |
0572b91b DJ |
2 | This file is part of the GNU C Library. |
3 | ||
4 | Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff PE |
17 | License along with the GNU C Library. If not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
0572b91b | 19 | |
365261c3 RH |
20 | /* Thumb requires excessive IT insns here. */ |
21 | #define NO_THUMB | |
0572b91b | 22 | #include <sysdep.h> |
9e1d4ac9 | 23 | #include <arm-features.h> |
0572b91b DJ |
24 | |
25 | /* | |
26 | * Data preload for architectures that support it (ARM V5TE and above) | |
27 | */ | |
28 | #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ | |
29 | && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ | |
30 | && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ | |
31 | && !defined (__ARM_ARCH_5T__)) | |
32 | #define PLD(code...) code | |
33 | #else | |
34 | #define PLD(code...) | |
35 | #endif | |
36 | ||
37 | /* | |
38 | * This can be used to enable code to cacheline align the source pointer. | |
39 | * Experiments on tested architectures (StrongARM and XScale) didn't show | |
40 | * this a worthwhile thing to do. That might be different in the future. | |
41 | */ | |
42 | //#define CALGN(code...) code | |
43 | #define CALGN(code...) | |
44 | ||
45 | /* | |
46 | * Endian independent macros for shifting bytes within registers. | |
47 | */ | |
48 | #ifndef __ARMEB__ | |
55668624 RH |
49 | #define PULL lsr |
50 | #define PUSH lsl | |
0572b91b | 51 | #else |
55668624 RH |
52 | #define PULL lsl |
53 | #define PUSH lsr | |
0572b91b DJ |
54 | #endif |
55 | ||
56 | .text | |
38435a9a | 57 | .syntax unified |
0572b91b DJ |
58 | |
59 | /* | |
60 | * Prototype: void *memmove(void *dest, const void *src, size_t n); | |
61 | * | |
62 | * Note: | |
63 | * | |
64 | * If the memory regions don't overlap, we simply branch to memcpy which is | |
65 | * normally a bit faster. Otherwise the copy is done going downwards. | |
66 | */ | |
67 | ||
68 | ENTRY(memmove) | |
69 | ||
70 | subs ip, r0, r1 | |
71 | cmphi r2, ip | |
4f41c682 | 72 | #if !IS_IN (libc) |
0572b91b | 73 | bls memcpy |
5de92c17 JM |
74 | #else |
75 | bls HIDDEN_JUMPTARGET(memcpy) | |
76 | #endif | |
0572b91b | 77 | |
55668624 | 78 | push {r0, r4, lr} |
01b32e73 TS |
79 | cfi_adjust_cfa_offset (12) |
80 | cfi_rel_offset (r4, 4) | |
81 | cfi_rel_offset (lr, 8) | |
82 | ||
83 | cfi_remember_state | |
84 | ||
0572b91b DJ |
85 | add r1, r1, r2 |
86 | add r0, r0, r2 | |
87 | subs r2, r2, #4 | |
88 | blt 8f | |
89 | ands ip, r0, #3 | |
81cb7a0b | 90 | PLD( pld [r1, #-4] ) |
0572b91b DJ |
91 | bne 9f |
92 | ands ip, r1, #3 | |
93 | bne 10f | |
94 | ||
95 | 1: subs r2, r2, #(28) | |
55668624 | 96 | push {r5 - r8} |
01b32e73 TS |
97 | cfi_adjust_cfa_offset (16) |
98 | cfi_rel_offset (r5, 0) | |
99 | cfi_rel_offset (r6, 4) | |
100 | cfi_rel_offset (r7, 8) | |
101 | cfi_rel_offset (r8, 12) | |
0572b91b DJ |
102 | blt 5f |
103 | ||
104 | CALGN( ands ip, r1, #31 ) | |
38435a9a | 105 | CALGN( sbcsne r4, ip, r2 ) @ C is always set here |
0572b91b DJ |
106 | CALGN( bcs 2f ) |
107 | CALGN( adr r4, 6f ) | |
108 | CALGN( subs r2, r2, ip ) @ C is set here | |
9e1d4ac9 | 109 | #ifndef ARM_ALWAYS_BX |
bb48a26a | 110 | CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) |
9e1d4ac9 | 111 | #else |
bb48a26a | 112 | CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) |
9e1d4ac9 RM |
113 | CALGN( bx r4 ) |
114 | #endif | |
0572b91b | 115 | |
81cb7a0b | 116 | PLD( pld [r1, #-4] ) |
0572b91b | 117 | 2: PLD( subs r2, r2, #96 ) |
81cb7a0b | 118 | PLD( pld [r1, #-32] ) |
0572b91b | 119 | PLD( blt 4f ) |
81cb7a0b ZW |
120 | PLD( pld [r1, #-64] ) |
121 | PLD( pld [r1, #-96] ) | |
0572b91b | 122 | |
81cb7a0b ZW |
123 | 3: PLD( pld [r1, #-128] ) |
124 | 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} | |
0572b91b | 125 | subs r2, r2, #32 |
81cb7a0b | 126 | stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} |
0572b91b DJ |
127 | bge 3b |
128 | PLD( cmn r2, #96 ) | |
129 | PLD( bge 4b ) | |
130 | ||
131 | 5: ands ip, r2, #28 | |
132 | rsb ip, ip, #32 | |
9e1d4ac9 | 133 | #ifndef ARM_ALWAYS_BX |
bb48a26a RM |
134 | /* C is always clear here. */ |
135 | addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) | |
0572b91b | 136 | b 7f |
9e1d4ac9 RM |
137 | #else |
138 | beq 7f | |
139 | push {r10} | |
140 | cfi_adjust_cfa_offset (4) | |
141 | cfi_rel_offset (r10, 0) | |
298e5d56 RM |
142 | 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) |
143 | /* If alignment is not perfect, then there will be some | |
144 | padding (nop) instructions between this BX and label 6. | |
145 | The computation above assumed that two instructions | |
146 | later is exactly the right spot. */ | |
147 | add r10, #(6f - (0b + PC_OFS)) | |
9e1d4ac9 RM |
148 | bx r10 |
149 | #endif | |
bb48a26a | 150 | .p2align ARM_BX_ALIGN_LOG2 |
0572b91b | 151 | 6: nop |
bb48a26a | 152 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 153 | ldr r3, [r1, #-4]! |
bb48a26a | 154 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 155 | ldr r4, [r1, #-4]! |
bb48a26a | 156 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 157 | ldr r5, [r1, #-4]! |
bb48a26a | 158 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 159 | ldr r6, [r1, #-4]! |
bb48a26a | 160 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 161 | ldr r7, [r1, #-4]! |
bb48a26a | 162 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 163 | ldr r8, [r1, #-4]! |
bb48a26a | 164 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 165 | ldr lr, [r1, #-4]! |
0572b91b | 166 | |
9e1d4ac9 | 167 | #ifndef ARM_ALWAYS_BX |
bb48a26a | 168 | add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) |
0572b91b | 169 | nop |
9e1d4ac9 | 170 | #else |
298e5d56 RM |
171 | 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) |
172 | /* If alignment is not perfect, then there will be some | |
173 | padding (nop) instructions between this BX and label 66. | |
174 | The computation above assumed that two instructions | |
175 | later is exactly the right spot. */ | |
176 | add r10, #(66f - (0b + PC_OFS)) | |
9e1d4ac9 RM |
177 | bx r10 |
178 | #endif | |
bb48a26a | 179 | .p2align ARM_BX_ALIGN_LOG2 |
298e5d56 | 180 | 66: nop |
bb48a26a | 181 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 182 | str r3, [r0, #-4]! |
bb48a26a | 183 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 184 | str r4, [r0, #-4]! |
bb48a26a | 185 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 186 | str r5, [r0, #-4]! |
bb48a26a | 187 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 188 | str r6, [r0, #-4]! |
bb48a26a | 189 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 190 | str r7, [r0, #-4]! |
bb48a26a | 191 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 192 | str r8, [r0, #-4]! |
bb48a26a | 193 | .p2align ARM_BX_ALIGN_LOG2 |
81cb7a0b | 194 | str lr, [r0, #-4]! |
0572b91b | 195 | |
9e1d4ac9 RM |
196 | #ifdef ARM_ALWAYS_BX |
197 | pop {r10} | |
198 | cfi_adjust_cfa_offset (-4) | |
199 | cfi_restore (r10) | |
200 | #endif | |
201 | ||
0572b91b DJ |
202 | CALGN( bcs 2b ) |
203 | ||
55668624 | 204 | 7: pop {r5 - r8} |
01b32e73 TS |
205 | cfi_adjust_cfa_offset (-16) |
206 | cfi_restore (r5) | |
207 | cfi_restore (r6) | |
208 | cfi_restore (r7) | |
209 | cfi_restore (r8) | |
0572b91b DJ |
210 | |
211 | 8: movs r2, r2, lsl #31 | |
81cb7a0b ZW |
212 | ldrbne r3, [r1, #-1]! |
213 | ldrbcs r4, [r1, #-1]! | |
214 | ldrbcs ip, [r1, #-1] | |
215 | strbne r3, [r0, #-1]! | |
216 | strbcs r4, [r0, #-1]! | |
217 | strbcs ip, [r0, #-1] | |
01b32e73 | 218 | |
9e1d4ac9 RM |
219 | #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ |
220 | || defined (ARM_ALWAYS_BX)) | |
55668624 | 221 | pop {r0, r4, lr} |
01b32e73 TS |
222 | cfi_adjust_cfa_offset (-12) |
223 | cfi_restore (r4) | |
224 | cfi_restore (lr) | |
b2b2415f PB |
225 | bx lr |
226 | #else | |
55668624 | 227 | pop {r0, r4, pc} |
b2b2415f | 228 | #endif |
0572b91b | 229 | |
01b32e73 TS |
230 | cfi_restore_state |
231 | ||
0572b91b | 232 | 9: cmp ip, #2 |
81cb7a0b ZW |
233 | ldrbgt r3, [r1, #-1]! |
234 | ldrbge r4, [r1, #-1]! | |
235 | ldrb lr, [r1, #-1]! | |
236 | strbgt r3, [r0, #-1]! | |
237 | strbge r4, [r0, #-1]! | |
0572b91b | 238 | subs r2, r2, ip |
81cb7a0b | 239 | strb lr, [r0, #-1]! |
0572b91b DJ |
240 | blt 8b |
241 | ands ip, r1, #3 | |
242 | beq 1b | |
243 | ||
244 | 10: bic r1, r1, #3 | |
245 | cmp ip, #2 | |
81cb7a0b | 246 | ldr r3, [r1, #0] |
0572b91b DJ |
247 | beq 17f |
248 | blt 18f | |
249 | ||
250 | ||
251 | .macro backward_copy_shift push pull | |
252 | ||
253 | subs r2, r2, #28 | |
254 | blt 14f | |
255 | ||
256 | CALGN( ands ip, r1, #31 ) | |
257 | CALGN( rsb ip, ip, #32 ) | |
38435a9a | 258 | CALGN( sbcsne r4, ip, r2 ) @ C is always set here |
0572b91b DJ |
259 | CALGN( subcc r2, r2, ip ) |
260 | CALGN( bcc 15f ) | |
261 | ||
791de446 | 262 | 11: push {r5 - r8, r10} |
01b32e73 TS |
263 | cfi_adjust_cfa_offset (20) |
264 | cfi_rel_offset (r5, 0) | |
265 | cfi_rel_offset (r6, 4) | |
266 | cfi_rel_offset (r7, 8) | |
267 | cfi_rel_offset (r8, 12) | |
791de446 | 268 | cfi_rel_offset (r10, 16) |
0572b91b | 269 | |
81cb7a0b | 270 | PLD( pld [r1, #-4] ) |
0572b91b | 271 | PLD( subs r2, r2, #96 ) |
81cb7a0b | 272 | PLD( pld [r1, #-32] ) |
0572b91b | 273 | PLD( blt 13f ) |
81cb7a0b ZW |
274 | PLD( pld [r1, #-64] ) |
275 | PLD( pld [r1, #-96] ) | |
0572b91b | 276 | |
81cb7a0b ZW |
277 | 12: PLD( pld [r1, #-128] ) |
278 | 13: ldmdb r1!, {r7, r8, r10, ip} | |
55668624 | 279 | mov lr, r3, PUSH #\push |
0572b91b | 280 | subs r2, r2, #32 |
81cb7a0b | 281 | ldmdb r1!, {r3, r4, r5, r6} |
55668624 RH |
282 | orr lr, lr, ip, PULL #\pull |
283 | mov ip, ip, PUSH #\push | |
791de446 RM |
284 | orr ip, ip, r10, PULL #\pull |
285 | mov r10, r10, PUSH #\push | |
286 | orr r10, r10, r8, PULL #\pull | |
55668624 RH |
287 | mov r8, r8, PUSH #\push |
288 | orr r8, r8, r7, PULL #\pull | |
289 | mov r7, r7, PUSH #\push | |
290 | orr r7, r7, r6, PULL #\pull | |
291 | mov r6, r6, PUSH #\push | |
292 | orr r6, r6, r5, PULL #\pull | |
293 | mov r5, r5, PUSH #\push | |
294 | orr r5, r5, r4, PULL #\pull | |
295 | mov r4, r4, PUSH #\push | |
296 | orr r4, r4, r3, PULL #\pull | |
81cb7a0b | 297 | stmdb r0!, {r4 - r8, r10, ip, lr} |
0572b91b DJ |
298 | bge 12b |
299 | PLD( cmn r2, #96 ) | |
300 | PLD( bge 13b ) | |
301 | ||
791de446 | 302 | pop {r5 - r8, r10} |
01b32e73 TS |
303 | cfi_adjust_cfa_offset (-20) |
304 | cfi_restore (r5) | |
305 | cfi_restore (r6) | |
306 | cfi_restore (r7) | |
307 | cfi_restore (r8) | |
791de446 | 308 | cfi_restore (r10) |
0572b91b DJ |
309 | |
310 | 14: ands ip, r2, #28 | |
311 | beq 16f | |
312 | ||
55668624 | 313 | 15: mov lr, r3, PUSH #\push |
81cb7a0b | 314 | ldr r3, [r1, #-4]! |
0572b91b | 315 | subs ip, ip, #4 |
55668624 | 316 | orr lr, lr, r3, PULL #\pull |
81cb7a0b | 317 | str lr, [r0, #-4]! |
0572b91b DJ |
318 | bgt 15b |
319 | CALGN( cmp r2, #0 ) | |
320 | CALGN( bge 11b ) | |
321 | ||
322 | 16: add r1, r1, #(\pull / 8) | |
323 | b 8b | |
324 | ||
325 | .endm | |
326 | ||
327 | ||
328 | backward_copy_shift push=8 pull=24 | |
329 | ||
330 | 17: backward_copy_shift push=16 pull=16 | |
331 | ||
332 | 18: backward_copy_shift push=24 pull=8 | |
333 | ||
334 | ||
335 | END(memmove) | |
336 | libc_hidden_builtin_def (memmove) |