sysdeps/arm/memmove.S

   1 /* Copyright (C) 2006-2019 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library.  If not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* Thumb requires excessive IT insns here.  */
  21 #define NO_THUMB
  22 #include <sysdep.h>
  23 #include <arm-features.h>
  24
  25 /*
  26  * Data preload for architectures that support it (ARM V5TE and above)
  27  */
  28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
  29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
  30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
  31      && !defined (__ARM_ARCH_5T__))
  32 #define PLD(code...)    code
  33 #else
  34 #define PLD(code...)
  35 #endif
  36
  37 /*
  38  * This can be used to enable code to cacheline align the source pointer.
  39  * Experiments on tested architectures (StrongARM and XScale) didn't show
  40  * this a worthwhile thing to do.  That might be different in the future.
  41  */
  42 //#define CALGN(code...)        code
  43 #define CALGN(code...)
  44
  45 /*
  46  * Endian independent macros for shifting bytes within registers.
  47  */
  48 #ifndef __ARMEB__
  49 #define PULL            lsr
  50 #define PUSH            lsl
  51 #else
  52 #define PULL            lsl
  53 #define PUSH            lsr
  54 #endif
  55
  56                 .text
  57                 .syntax unified
  58
  59 /*
  60  * Prototype: void *memmove(void *dest, const void *src, size_t n);
  61  *
  62  * Note:
  63  *
  64  * If the memory regions don't overlap, we simply branch to memcpy which is
  65  * normally a bit faster. Otherwise the copy is done going downwards.
  66  */
  67
  68 ENTRY(memmove)
  69
  70                 subs    ip, r0, r1
  71                 cmphi   r2, ip
  72 #if !IS_IN (libc)
  73                 bls     memcpy
  74 #else
  75                 bls     HIDDEN_JUMPTARGET(memcpy)
  76 #endif
  77
  78                 push    {r0, r4, lr}
  79                 cfi_adjust_cfa_offset (12)
  80                 cfi_rel_offset (r4, 4)
  81                 cfi_rel_offset (lr, 8)
  82
  83                 cfi_remember_state
  84
  85                 add     r1, r1, r2
  86                 add     r0, r0, r2
  87                 subs    r2, r2, #4
  88                 blt     8f
  89                 ands    ip, r0, #3
  90         PLD(    pld     [r1, #-4]               )
  91                 bne     9f
  92                 ands    ip, r1, #3
  93                 bne     10f
  94
  95 1:              subs    r2, r2, #(28)
  96                 push    {r5 - r8}
  97                 cfi_adjust_cfa_offset (16)
  98                 cfi_rel_offset (r5, 0)
  99                 cfi_rel_offset (r6, 4)
 100                 cfi_rel_offset (r7, 8)
 101                 cfi_rel_offset (r8, 12)
 102                 blt     5f
 103
 104         CALGN(  ands    ip, r1, #31             )
 105         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 106         CALGN(  bcs     2f                      )
 107         CALGN(  adr     r4, 6f                  )
 108         CALGN(  subs    r2, r2, ip              )  @ C is set here
 109 #ifndef ARM_ALWAYS_BX
 110         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 111 #else
 112         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 113         CALGN(  bx      r4                      )
 114 #endif
 115
 116         PLD(    pld     [r1, #-4]               )
 117 2:      PLD(    subs    r2, r2, #96             )
 118         PLD(    pld     [r1, #-32]              )
 119         PLD(    blt     4f                      )
 120         PLD(    pld     [r1, #-64]              )
 121         PLD(    pld     [r1, #-96]              )
 122
 123 3:      PLD(    pld     [r1, #-128]             )
 124 4:              ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
 125                 subs    r2, r2, #32
 126                 stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
 127                 bge     3b
 128         PLD(    cmn     r2, #96                 )
 129         PLD(    bge     4b                      )
 130
 131 5:              ands    ip, r2, #28
 132                 rsb     ip, ip, #32
 133 #ifndef ARM_ALWAYS_BX
 134                 /* C is always clear here.  */
 135                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 136                 b       7f
 137 #else
 138                 beq     7f
 139                 push    {r10}
 140                 cfi_adjust_cfa_offset (4)
 141                 cfi_rel_offset (r10, 0)
 142 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 143                 /* If alignment is not perfect, then there will be some
 144                    padding (nop) instructions between this BX and label 6.
 145                    The computation above assumed that two instructions
 146                    later is exactly the right spot.  */
 147                 add     r10, #(6f - (0b + PC_OFS))
 148                 bx      r10
 149 #endif
 150                 .p2align ARM_BX_ALIGN_LOG2
 151 6:              nop
 152                 .p2align ARM_BX_ALIGN_LOG2
 153                 ldr     r3, [r1, #-4]!
 154                 .p2align ARM_BX_ALIGN_LOG2
 155                 ldr     r4, [r1, #-4]!
 156                 .p2align ARM_BX_ALIGN_LOG2
 157                 ldr     r5, [r1, #-4]!
 158                 .p2align ARM_BX_ALIGN_LOG2
 159                 ldr     r6, [r1, #-4]!
 160                 .p2align ARM_BX_ALIGN_LOG2
 161                 ldr     r7, [r1, #-4]!
 162                 .p2align ARM_BX_ALIGN_LOG2
 163                 ldr     r8, [r1, #-4]!
 164                 .p2align ARM_BX_ALIGN_LOG2
 165                 ldr     lr, [r1, #-4]!
 166
 167 #ifndef ARM_ALWAYS_BX
 168                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 169                 nop
 170 #else
 171 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 172                 /* If alignment is not perfect, then there will be some
 173                    padding (nop) instructions between this BX and label 66.
 174                    The computation above assumed that two instructions
 175                    later is exactly the right spot.  */
 176                 add     r10, #(66f - (0b + PC_OFS))
 177                 bx      r10
 178 #endif
 179                 .p2align ARM_BX_ALIGN_LOG2
 180 66:             nop
 181                 .p2align ARM_BX_ALIGN_LOG2
 182                 str     r3, [r0, #-4]!
 183                 .p2align ARM_BX_ALIGN_LOG2
 184                 str     r4, [r0, #-4]!
 185                 .p2align ARM_BX_ALIGN_LOG2
 186                 str     r5, [r0, #-4]!
 187                 .p2align ARM_BX_ALIGN_LOG2
 188                 str     r6, [r0, #-4]!
 189                 .p2align ARM_BX_ALIGN_LOG2
 190                 str     r7, [r0, #-4]!
 191                 .p2align ARM_BX_ALIGN_LOG2
 192                 str     r8, [r0, #-4]!
 193                 .p2align ARM_BX_ALIGN_LOG2
 194                 str     lr, [r0, #-4]!
 195
 196 #ifdef ARM_ALWAYS_BX
 197                 pop     {r10}
 198                 cfi_adjust_cfa_offset (-4)
 199                 cfi_restore (r10)
 200 #endif
 201
 202         CALGN(  bcs     2b                      )
 203
 204 7:              pop     {r5 - r8}
 205                 cfi_adjust_cfa_offset (-16)
 206                 cfi_restore (r5)
 207                 cfi_restore (r6)
 208                 cfi_restore (r7)
 209                 cfi_restore (r8)
 210
 211 8:              movs    r2, r2, lsl #31
 212                 ldrbne  r3, [r1, #-1]!
 213                 ldrbcs  r4, [r1, #-1]!
 214                 ldrbcs  ip, [r1, #-1]
 215                 strbne  r3, [r0, #-1]!
 216                 strbcs  r4, [r0, #-1]!
 217                 strbcs  ip, [r0, #-1]
 218
 219 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
 220      || defined (ARM_ALWAYS_BX))
 221                 pop     {r0, r4, lr}
 222                 cfi_adjust_cfa_offset (-12)
 223                 cfi_restore (r4)
 224                 cfi_restore (lr)
 225                 bx      lr
 226 #else
 227                 pop     {r0, r4, pc}
 228 #endif
 229
 230                 cfi_restore_state
 231
 232 9:              cmp     ip, #2
 233                 ldrbgt  r3, [r1, #-1]!
 234                 ldrbge  r4, [r1, #-1]!
 235                 ldrb    lr, [r1, #-1]!
 236                 strbgt  r3, [r0, #-1]!
 237                 strbge  r4, [r0, #-1]!
 238                 subs    r2, r2, ip
 239                 strb    lr, [r0, #-1]!
 240                 blt     8b
 241                 ands    ip, r1, #3
 242                 beq     1b
 243
 244 10:             bic     r1, r1, #3
 245                 cmp     ip, #2
 246                 ldr     r3, [r1, #0]
 247                 beq     17f
 248                 blt     18f
 249
 250
 251                 .macro  backward_copy_shift push pull
 252
 253                 subs    r2, r2, #28
 254                 blt     14f
 255
 256         CALGN(  ands    ip, r1, #31             )
 257         CALGN(  rsb     ip, ip, #32             )
 258         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 259         CALGN(  subcc   r2, r2, ip              )
 260         CALGN(  bcc     15f                     )
 261
 262 11:             push    {r5 - r8, r10}
 263                 cfi_adjust_cfa_offset (20)
 264                 cfi_rel_offset (r5, 0)
 265                 cfi_rel_offset (r6, 4)
 266                 cfi_rel_offset (r7, 8)
 267                 cfi_rel_offset (r8, 12)
 268                 cfi_rel_offset (r10, 16)
 269
 270         PLD(    pld     [r1, #-4]               )
 271         PLD(    subs    r2, r2, #96             )
 272         PLD(    pld     [r1, #-32]              )
 273         PLD(    blt     13f                     )
 274         PLD(    pld     [r1, #-64]              )
 275         PLD(    pld     [r1, #-96]              )
 276
 277 12:     PLD(    pld     [r1, #-128]             )
 278 13:             ldmdb   r1!, {r7, r8, r10, ip}
 279                 mov     lr, r3, PUSH #\push
 280                 subs    r2, r2, #32
 281                 ldmdb   r1!, {r3, r4, r5, r6}
 282                 orr     lr, lr, ip, PULL #\pull
 283                 mov     ip, ip, PUSH #\push
 284                 orr     ip, ip, r10, PULL #\pull
 285                 mov     r10, r10, PUSH #\push
 286                 orr     r10, r10, r8, PULL #\pull
 287                 mov     r8, r8, PUSH #\push
 288                 orr     r8, r8, r7, PULL #\pull
 289                 mov     r7, r7, PUSH #\push
 290                 orr     r7, r7, r6, PULL #\pull
 291                 mov     r6, r6, PUSH #\push
 292                 orr     r6, r6, r5, PULL #\pull
 293                 mov     r5, r5, PUSH #\push
 294                 orr     r5, r5, r4, PULL #\pull
 295                 mov     r4, r4, PUSH #\push
 296                 orr     r4, r4, r3, PULL #\pull
 297                 stmdb   r0!, {r4 - r8, r10, ip, lr}
 298                 bge     12b
 299         PLD(    cmn     r2, #96                 )
 300         PLD(    bge     13b                     )
 301
 302                 pop     {r5 - r8, r10}
 303                 cfi_adjust_cfa_offset (-20)
 304                 cfi_restore (r5)
 305                 cfi_restore (r6)
 306                 cfi_restore (r7)
 307                 cfi_restore (r8)
 308                 cfi_restore (r10)
 309
 310 14:             ands    ip, r2, #28
 311                 beq     16f
 312
 313 15:             mov     lr, r3, PUSH #\push
 314                 ldr     r3, [r1, #-4]!
 315                 subs    ip, ip, #4
 316                 orr     lr, lr, r3, PULL #\pull
 317                 str     lr, [r0, #-4]!
 318                 bgt     15b
 319         CALGN(  cmp     r2, #0                  )
 320         CALGN(  bge     11b                     )
 321
 322 16:             add     r1, r1, #(\pull / 8)
 323                 b       8b
 324
 325                 .endm
 326
 327
 328                 backward_copy_shift     push=8  pull=24
 329
 330 17:             backward_copy_shift     push=16 pull=16
 331
 332 18:             backward_copy_shift     push=24 pull=8
 333
 334
 335 END(memmove)
 336 libc_hidden_builtin_def (memmove)