sysdeps/arm/memmove.S

   1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library.  If not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* Thumb requires excessive IT insns here.  */
  21 #define NO_THUMB
  22 #include <sysdep.h>
  23 #include <arm-features.h>
  24
  25 /*
  26  * Data preload for architectures that support it (ARM V5TE and above)
  27  */
  28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
  29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
  30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
  31      && !defined (__ARM_ARCH_5T__))
  32 #define PLD(code...)    code
  33 #else
  34 #define PLD(code...)
  35 #endif
  36
  37 /*
  38  * This can be used to enable code to cacheline align the source pointer.
  39  * Experiments on tested architectures (StrongARM and XScale) didn't show
  40  * this a worthwhile thing to do.  That might be different in the future.
  41  */
  42 //#define CALGN(code...)        code
  43 #define CALGN(code...)
  44
  45 /*
  46  * Endian independent macros for shifting bytes within registers.
  47  */
  48 #ifndef __ARMEB__
  49 #define PULL            lsr
  50 #define PUSH            lsl
  51 #else
  52 #define PULL            lsl
  53 #define PUSH            lsr
  54 #endif
  55
  56                 .text
  57                 .syntax unified
  58
  59 /*
  60  * Prototype: void *memmove(void *dest, const void *src, size_t n);
  61  *
  62  * Note:
  63  *
  64  * If the memory regions don't overlap, we simply branch to memcpy which is
  65  * normally a bit faster. Otherwise the copy is done going downwards.
  66  */
  67
  68 ENTRY(memmove)
  69
  70                 subs    ip, r0, r1
  71                 cmphi   r2, ip
  72 #if !IS_IN (libc)
  73                 bls     memcpy
  74 #else
  75                 bls     HIDDEN_JUMPTARGET(memcpy)
  76 #endif
  77
  78                 push    {r0, r4, lr}
  79                 cfi_adjust_cfa_offset (12)
  80                 cfi_rel_offset (r4, 4)
  81                 cfi_rel_offset (lr, 8)
  82
  83                 cfi_remember_state
  84
  85                 add     r1, r1, r2
  86                 add     r0, r0, r2
  87                 subs    r2, r2, #4
  88                 blt     8f
  89                 ands    ip, r0, #3
  90         PLD(    sfi_pld r1, #-4                 )
  91                 bne     9f
  92                 ands    ip, r1, #3
  93                 bne     10f
  94
  95 1:              subs    r2, r2, #(28)
  96                 push    {r5 - r8}
  97                 cfi_adjust_cfa_offset (16)
  98                 cfi_rel_offset (r5, 0)
  99                 cfi_rel_offset (r6, 4)
 100                 cfi_rel_offset (r7, 8)
 101                 cfi_rel_offset (r8, 12)
 102                 blt     5f
 103
 104         CALGN(  ands    ip, r1, #31             )
 105         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 106         CALGN(  bcs     2f                      )
 107         CALGN(  adr     r4, 6f                  )
 108         CALGN(  subs    r2, r2, ip              )  @ C is set here
 109 #ifndef ARM_ALWAYS_BX
 110         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 111 #else
 112         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 113         CALGN(  bx      r4                      )
 114 #endif
 115
 116         PLD(    sfi_pld r1, #-4                 )
 117 2:      PLD(    subs    r2, r2, #96             )
 118         PLD(    sfi_pld r1, #-32                )
 119         PLD(    blt     4f                      )
 120         PLD(    sfi_pld r1, #-64                )
 121         PLD(    sfi_pld r1, #-96                )
 122
 123 3:      PLD(    sfi_pld r1, #-128               )
 124 4:              sfi_breg r1, \
 125                 ldmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
 126                 subs    r2, r2, #32
 127                 sfi_breg r0, \
 128                 stmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
 129                 bge     3b
 130         PLD(    cmn     r2, #96                 )
 131         PLD(    bge     4b                      )
 132
 133 5:              ands    ip, r2, #28
 134                 rsb     ip, ip, #32
 135 #ifndef ARM_ALWAYS_BX
 136                 /* C is always clear here.  */
 137                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 138                 b       7f
 139 #else
 140                 beq     7f
 141                 push    {r10}
 142                 cfi_adjust_cfa_offset (4)
 143                 cfi_rel_offset (r10, 0)
 144                 add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 145                 bx      r10
 146 #endif
 147                 .p2align ARM_BX_ALIGN_LOG2
 148 6:              nop
 149                 .p2align ARM_BX_ALIGN_LOG2
 150                 sfi_breg r1, \
 151                 ldr     r3, [\B, #-4]!
 152                 .p2align ARM_BX_ALIGN_LOG2
 153                 sfi_breg r1, \
 154                 ldr     r4, [\B, #-4]!
 155                 .p2align ARM_BX_ALIGN_LOG2
 156                 sfi_breg r1, \
 157                 ldr     r5, [\B, #-4]!
 158                 .p2align ARM_BX_ALIGN_LOG2
 159                 sfi_breg r1, \
 160                 ldr     r6, [\B, #-4]!
 161                 .p2align ARM_BX_ALIGN_LOG2
 162                 sfi_breg r1, \
 163                 ldr     r7, [\B, #-4]!
 164                 .p2align ARM_BX_ALIGN_LOG2
 165                 sfi_breg r1, \
 166                 ldr     r8, [\B, #-4]!
 167                 .p2align ARM_BX_ALIGN_LOG2
 168                 sfi_breg r1, \
 169                 ldr     lr, [\B, #-4]!
 170
 171 #ifndef ARM_ALWAYS_BX
 172                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 173                 nop
 174 #else
 175                 add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 176                 bx      r10
 177 #endif
 178                 .p2align ARM_BX_ALIGN_LOG2
 179                 nop
 180                 .p2align ARM_BX_ALIGN_LOG2
 181                 sfi_breg r0, \
 182                 str     r3, [\B, #-4]!
 183                 .p2align ARM_BX_ALIGN_LOG2
 184                 sfi_breg r0, \
 185                 str     r4, [\B, #-4]!
 186                 .p2align ARM_BX_ALIGN_LOG2
 187                 sfi_breg r0, \
 188                 str     r5, [\B, #-4]!
 189                 .p2align ARM_BX_ALIGN_LOG2
 190                 sfi_breg r0, \
 191                 str     r6, [\B, #-4]!
 192                 .p2align ARM_BX_ALIGN_LOG2
 193                 sfi_breg r0, \
 194                 str     r7, [\B, #-4]!
 195                 .p2align ARM_BX_ALIGN_LOG2
 196                 sfi_breg r0, \
 197                 str     r8, [\B, #-4]!
 198                 .p2align ARM_BX_ALIGN_LOG2
 199                 sfi_breg r0, \
 200                 str     lr, [\B, #-4]!
 201
 202 #ifdef ARM_ALWAYS_BX
 203                 pop     {r10}
 204                 cfi_adjust_cfa_offset (-4)
 205                 cfi_restore (r10)
 206 #endif
 207
 208         CALGN(  bcs     2b                      )
 209
 210 7:              pop     {r5 - r8}
 211                 cfi_adjust_cfa_offset (-16)
 212                 cfi_restore (r5)
 213                 cfi_restore (r6)
 214                 cfi_restore (r7)
 215                 cfi_restore (r8)
 216
 217 8:              movs    r2, r2, lsl #31
 218                 sfi_breg r1, \
 219                 ldrbne  r3, [\B, #-1]!
 220                 sfi_breg r1, \
 221                 ldrbcs  r4, [\B, #-1]!
 222                 sfi_breg r1, \
 223                 ldrbcs  ip, [\B, #-1]
 224                 sfi_breg r0, \
 225                 strbne  r3, [\B, #-1]!
 226                 sfi_breg r0, \
 227                 strbcs  r4, [\B, #-1]!
 228                 sfi_breg r0, \
 229                 strbcs  ip, [\B, #-1]
 230
 231 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
 232      || defined (ARM_ALWAYS_BX))
 233                 pop     {r0, r4, lr}
 234                 cfi_adjust_cfa_offset (-12)
 235                 cfi_restore (r4)
 236                 cfi_restore (lr)
 237                 bx      lr
 238 #else
 239                 pop     {r0, r4, pc}
 240 #endif
 241
 242                 cfi_restore_state
 243
 244 9:              cmp     ip, #2
 245                 sfi_breg r1, \
 246                 ldrbgt  r3, [\B, #-1]!
 247                 sfi_breg r1, \
 248                 ldrbge  r4, [\B, #-1]!
 249                 sfi_breg r1, \
 250                 ldrb    lr, [\B, #-1]!
 251                 sfi_breg r0, \
 252                 strbgt  r3, [\B, #-1]!
 253                 sfi_breg r0, \
 254                 strbge  r4, [\B, #-1]!
 255                 subs    r2, r2, ip
 256                 sfi_breg r0, \
 257                 strb    lr, [\B, #-1]!
 258                 blt     8b
 259                 ands    ip, r1, #3
 260                 beq     1b
 261
 262 10:             bic     r1, r1, #3
 263                 cmp     ip, #2
 264                 sfi_breg r1, \
 265                 ldr     r3, [\B, #0]
 266                 beq     17f
 267                 blt     18f
 268
 269
 270                 .macro  backward_copy_shift push pull
 271
 272                 subs    r2, r2, #28
 273                 blt     14f
 274
 275         CALGN(  ands    ip, r1, #31             )
 276         CALGN(  rsb     ip, ip, #32             )
 277         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 278         CALGN(  subcc   r2, r2, ip              )
 279         CALGN(  bcc     15f                     )
 280
 281 11:             push    {r5 - r8, r10}
 282                 cfi_adjust_cfa_offset (20)
 283                 cfi_rel_offset (r5, 0)
 284                 cfi_rel_offset (r6, 4)
 285                 cfi_rel_offset (r7, 8)
 286                 cfi_rel_offset (r8, 12)
 287                 cfi_rel_offset (r10, 16)
 288
 289         PLD(    sfi_pld r1, #-4                 )
 290         PLD(    subs    r2, r2, #96             )
 291         PLD(    sfi_pld r1, #-32                )
 292         PLD(    blt     13f                     )
 293         PLD(    sfi_pld r1, #-64                )
 294         PLD(    sfi_pld r1, #-96                )
 295
 296 12:     PLD(    sfi_pld r1, #-128               )
 297 13:             sfi_breg r1, \
 298                 ldmdb   \B!, {r7, r8, r10, ip}
 299                 mov     lr, r3, PUSH #\push
 300                 subs    r2, r2, #32
 301                 sfi_breg r1, \
 302                 ldmdb   \B!, {r3, r4, r5, r6}
 303                 orr     lr, lr, ip, PULL #\pull
 304                 mov     ip, ip, PUSH #\push
 305                 orr     ip, ip, r10, PULL #\pull
 306                 mov     r10, r10, PUSH #\push
 307                 orr     r10, r10, r8, PULL #\pull
 308                 mov     r8, r8, PUSH #\push
 309                 orr     r8, r8, r7, PULL #\pull
 310                 mov     r7, r7, PUSH #\push
 311                 orr     r7, r7, r6, PULL #\pull
 312                 mov     r6, r6, PUSH #\push
 313                 orr     r6, r6, r5, PULL #\pull
 314                 mov     r5, r5, PUSH #\push
 315                 orr     r5, r5, r4, PULL #\pull
 316                 mov     r4, r4, PUSH #\push
 317                 orr     r4, r4, r3, PULL #\pull
 318                 sfi_breg r0, \
 319                 stmdb   \B!, {r4 - r8, r10, ip, lr}
 320                 bge     12b
 321         PLD(    cmn     r2, #96                 )
 322         PLD(    bge     13b                     )
 323
 324                 pop     {r5 - r8, r10}
 325                 cfi_adjust_cfa_offset (-20)
 326                 cfi_restore (r5)
 327                 cfi_restore (r6)
 328                 cfi_restore (r7)
 329                 cfi_restore (r8)
 330                 cfi_restore (r10)
 331
 332 14:             ands    ip, r2, #28
 333                 beq     16f
 334
 335 15:             mov     lr, r3, PUSH #\push
 336                 sfi_breg r1, \
 337                 ldr     r3, [\B, #-4]!
 338                 subs    ip, ip, #4
 339                 orr     lr, lr, r3, PULL #\pull
 340                 sfi_breg r0, \
 341                 str     lr, [\B, #-4]!
 342                 bgt     15b
 343         CALGN(  cmp     r2, #0                  )
 344         CALGN(  bge     11b                     )
 345
 346 16:             add     r1, r1, #(\pull / 8)
 347                 b       8b
 348
 349                 .endm
 350
 351
 352                 backward_copy_shift     push=8  pull=24
 353
 354 17:             backward_copy_shift     push=16 pull=16
 355
 356 18:             backward_copy_shift     push=24 pull=8
 357
 358
 359 END(memmove)
 360 libc_hidden_builtin_def (memmove)