* sysdeps/arm/memcpy.S: New file.

author Daniel Jacobowitz <dan@codesourcery.com>

Tue, 31 Oct 2006 17:07:54 +0000 (17:07 +0000)

committer Daniel Jacobowitz <dan@codesourcery.com>

Tue, 31 Oct 2006 17:07:54 +0000 (17:07 +0000)
author Daniel Jacobowitz <dan@codesourcery.com>
Tue, 31 Oct 2006 17:07:54 +0000 (17:07 +0000)
committer Daniel Jacobowitz <dan@codesourcery.com>
Tue, 31 Oct 2006 17:07:54 +0000 (17:07 +0000)
diff --git a/ChangeLog.arm b/ChangeLog.arm

index b28f7b5cf253deea4b1901a477334881edbc997e..6d49bde2dac580bcd20ca0444c98f50fe8e2415b 100644 (file)
--- a/ChangeLog.arm
+++ b/ChangeLog.arm
@@ -1,3 +1,9 @@
+2006-10-31  Nicolas Pitre  <nico@cam.org>
+           Joseph Myers  <joseph@codesourcery.com>
+
+       * sysdeps/arm/memcpy.S: New file.
+       * sysdeps/arm/memmove.S: Likewise.
+
  2006-09-22  Khem Raj  <kraj@mvista.com>
  
         * sysdeps/unix/sysv/linux/arm/fxstatat.c: New file.
diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S

new file mode 100644 (file)

index 0000000..61cf33c
--- /dev/null
+++ b/sysdeps/arm/memcpy.S
@@ -0,0 +1,227 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+/*
+ * Data preload for architectures that support it (ARM V5TE and above)
+ */
+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
+     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
+     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
+     && !defined (__ARM_ARCH_5T__))
+#define PLD(code...)    code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)        code
+#define CALGN(code...)
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull            lsr
+#define push            lsl
+#else
+#define pull            lsl
+#define push            lsr
+#endif
+
+               .text
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ENTRY(memcpy)
+
+               stmfd   sp!, {r0, r4, lr}
+
+               subs    r2, r2, #4
+               blt     8f
+               ands    ip, r0, #3
+       PLD(    pld     [r1, #0]                )
+               bne     9f
+               ands    ip, r1, #3
+               bne     10f
+
+1:             subs    r2, r2, #(28)
+               stmfd   sp!, {r5 - r8}
+               blt     5f
+
+       CALGN(  ands    ip, r1, #31             )
+       CALGN(  rsb     r3, ip, #32             )
+       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
+       CALGN(  bcs     2f                      )
+       CALGN(  adr     r4, 6f                  )
+       CALGN(  subs    r2, r2, r3              )  @ C gets set
+       CALGN(  add     pc, r4, ip              )
+
+       PLD(    pld     [r1, #0]                )
+2:     PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #28]               )
+       PLD(    blt     4f                      )
+       PLD(    pld     [r1, #60]               )
+       PLD(    pld     [r1, #92]               )
+
+3:     PLD(    pld     [r1, #124]              )
+4:             ldmia   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               subs    r2, r2, #32
+               stmia   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               bge     3b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     4b                      )
+
+5:             ands    ip, r2, #28
+               rsb     ip, ip, #32
+               addne   pc, pc, ip              @ C is always clear here
+               b       7f
+6:             nop
+               ldr     r3, [r1], #4
+               ldr     r4, [r1], #4
+               ldr     r5, [r1], #4
+               ldr     r6, [r1], #4
+               ldr     r7, [r1], #4
+               ldr     r8, [r1], #4
+               ldr     lr, [r1], #4
+
+               add     pc, pc, ip
+               nop
+               nop
+               str     r3, [r0], #4
+               str     r4, [r0], #4
+               str     r5, [r0], #4
+               str     r6, [r0], #4
+               str     r7, [r0], #4
+               str     r8, [r0], #4
+               str     lr, [r0], #4
+
+       CALGN(  bcs     2b                      )
+
+7:             ldmfd   sp!, {r5 - r8}
+
+8:             movs    r2, r2, lsl #31
+               ldrneb  r3, [r1], #1
+               ldrcsb  r4, [r1], #1
+               ldrcsb  ip, [r1]
+               strneb  r3, [r0], #1
+               strcsb  r4, [r0], #1
+               strcsb  ip, [r0]
+
+               ldmfd   sp!, {r0, r4, pc}
+
+9:             rsb     ip, ip, #4
+               cmp     ip, #2
+               ldrgtb  r3, [r1], #1
+               ldrgeb  r4, [r1], #1
+               ldrb    lr, [r1], #1
+               strgtb  r3, [r0], #1
+               strgeb  r4, [r0], #1
+               subs    r2, r2, ip
+               strb    lr, [r0], #1
+               blt     8b
+               ands    ip, r1, #3
+               beq     1b
+
+10:            bic     r1, r1, #3
+               cmp     ip, #2
+               ldr     lr, [r1], #4
+               beq     17f
+               bgt     18f
+
+
+               .macro  forward_copy_shift pull push
+
+               subs    r2, r2, #28
+               blt     14f
+
+       CALGN(  ands    ip, r1, #31             )
+       CALGN(  rsb     ip, ip, #32             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  subcc   r2, r2, ip              )
+       CALGN(  bcc     15f                     )
+
+11:            stmfd   sp!, {r5 - r9}
+
+       PLD(    pld     [r1, #0]                )
+       PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #28]               )
+       PLD(    blt     13f                     )
+       PLD(    pld     [r1, #60]               )
+       PLD(    pld     [r1, #92]               )
+
+12:    PLD(    pld     [r1, #124]              )
+13:            ldmia   r1!, {r4, r5, r6, r7}
+               mov     r3, lr, pull #\pull
+               subs    r2, r2, #32
+               ldmia   r1!, {r8, r9, ip, lr}
+               orr     r3, r3, r4, push #\push
+               mov     r4, r4, pull #\pull
+               orr     r4, r4, r5, push #\push
+               mov     r5, r5, pull #\pull
+               orr     r5, r5, r6, push #\push
+               mov     r6, r6, pull #\pull
+               orr     r6, r6, r7, push #\push
+               mov     r7, r7, pull #\pull
+               orr     r7, r7, r8, push #\push
+               mov     r8, r8, pull #\pull
+               orr     r8, r8, r9, push #\push
+               mov     r9, r9, pull #\pull
+               orr     r9, r9, ip, push #\push
+               mov     ip, ip, pull #\pull
+               orr     ip, ip, lr, push #\push
+               stmia   r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
+               bge     12b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     13b                     )
+
+               ldmfd   sp!, {r5 - r9}
+
+14:            ands    ip, r2, #28
+               beq     16f
+
+15:            mov     r3, lr, pull #\pull
+               ldr     lr, [r1], #4
+               subs    ip, ip, #4
+               orr     r3, r3, lr, push #\push
+               str     r3, [r0], #4
+               bgt     15b
+       CALGN(  cmp     r2, #0                  )
+       CALGN(  bge     11b                     )
+
+16:            sub     r1, r1, #(\push / 8)
+               b       8b
+
+               .endm
+
+
+               forward_copy_shift      pull=8  push=24
+
+17:            forward_copy_shift      pull=16 push=16
+
+18:            forward_copy_shift      pull=24 push=8
+
+END(memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/arm/memmove.S b/sysdeps/arm/memmove.S

new file mode 100644 (file)

index 0000000..2dd0790
--- /dev/null
+++ b/sysdeps/arm/memmove.S
@@ -0,0 +1,237 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+/*
+ * Data preload for architectures that support it (ARM V5TE and above)
+ */
+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
+     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
+     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
+     && !defined (__ARM_ARCH_5T__))
+#define PLD(code...)    code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)        code
+#define CALGN(code...)
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull            lsr
+#define push            lsl
+#else
+#define pull            lsl
+#define push            lsr
+#endif
+
+               .text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.
+ */
+
+ENTRY(memmove)
+
+               subs    ip, r0, r1
+               cmphi   r2, ip
+               bls     memcpy
+
+               stmfd   sp!, {r0, r4, lr}
+               add     r1, r1, r2
+               add     r0, r0, r2
+               subs    r2, r2, #4
+               blt     8f
+               ands    ip, r0, #3
+       PLD(    pld     [r1, #-4]               )
+               bne     9f
+               ands    ip, r1, #3
+               bne     10f
+
+1:             subs    r2, r2, #(28)
+               stmfd   sp!, {r5 - r8}
+               blt     5f
+
+       CALGN(  ands    ip, r1, #31             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  bcs     2f                      )
+       CALGN(  adr     r4, 6f                  )
+       CALGN(  subs    r2, r2, ip              )  @ C is set here
+       CALGN(  add     pc, r4, ip              )
+
+       PLD(    pld     [r1, #-4]               )
+2:     PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #-32]              )
+       PLD(    blt     4f                      )
+       PLD(    pld     [r1, #-64]              )
+       PLD(    pld     [r1, #-96]              )
+
+3:     PLD(    pld     [r1, #-128]             )
+4:             ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               subs    r2, r2, #32
+               stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               bge     3b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     4b                      )
+
+5:             ands    ip, r2, #28
+               rsb     ip, ip, #32
+               addne   pc, pc, ip              @ C is always clear here
+               b       7f
+6:             nop
+               ldr     r3, [r1, #-4]!
+               ldr     r4, [r1, #-4]!
+               ldr     r5, [r1, #-4]!
+               ldr     r6, [r1, #-4]!
+               ldr     r7, [r1, #-4]!
+               ldr     r8, [r1, #-4]!
+               ldr     lr, [r1, #-4]!
+
+               add     pc, pc, ip
+               nop
+               nop
+               str     r3, [r0, #-4]!
+               str     r4, [r0, #-4]!
+               str     r5, [r0, #-4]!
+               str     r6, [r0, #-4]!
+               str     r7, [r0, #-4]!
+               str     r8, [r0, #-4]!
+               str     lr, [r0, #-4]!
+
+       CALGN(  bcs     2b                      )
+
+7:             ldmfd   sp!, {r5 - r8}
+
+8:             movs    r2, r2, lsl #31
+               ldrneb  r3, [r1, #-1]!
+               ldrcsb  r4, [r1, #-1]!
+               ldrcsb  ip, [r1, #-1]
+               strneb  r3, [r0, #-1]!
+               strcsb  r4, [r0, #-1]!
+               strcsb  ip, [r0, #-1]
+               ldmfd   sp!, {r0, r4, pc}
+
+9:             cmp     ip, #2
+               ldrgtb  r3, [r1, #-1]!
+               ldrgeb  r4, [r1, #-1]!
+               ldrb    lr, [r1, #-1]!
+               strgtb  r3, [r0, #-1]!
+               strgeb  r4, [r0, #-1]!
+               subs    r2, r2, ip
+               strb    lr, [r0, #-1]!
+               blt     8b
+               ands    ip, r1, #3
+               beq     1b
+
+10:            bic     r1, r1, #3
+               cmp     ip, #2
+               ldr     r3, [r1, #0]
+               beq     17f
+               blt     18f
+
+
+               .macro  backward_copy_shift push pull
+
+               subs    r2, r2, #28
+               blt     14f
+
+       CALGN(  ands    ip, r1, #31             )
+       CALGN(  rsb     ip, ip, #32             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  subcc   r2, r2, ip              )
+       CALGN(  bcc     15f                     )
+
+11:            stmfd   sp!, {r5 - r9}
+
+       PLD(    pld     [r1, #-4]               )
+       PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #-32]              )
+       PLD(    blt     13f                     )
+       PLD(    pld     [r1, #-64]              )
+       PLD(    pld     [r1, #-96]              )
+
+12:    PLD(    pld     [r1, #-128]             )
+13:            ldmdb   r1!, {r7, r8, r9, ip}
+               mov     lr, r3, push #\push
+               subs    r2, r2, #32
+               ldmdb   r1!, {r3, r4, r5, r6}
+               orr     lr, lr, ip, pull #\pull
+               mov     ip, ip, push #\push
+               orr     ip, ip, r9, pull #\pull
+               mov     r9, r9, push #\push
+               orr     r9, r9, r8, pull #\pull
+               mov     r8, r8, push #\push
+               orr     r8, r8, r7, pull #\pull
+               mov     r7, r7, push #\push
+               orr     r7, r7, r6, pull #\pull
+               mov     r6, r6, push #\push
+               orr     r6, r6, r5, pull #\pull
+               mov     r5, r5, push #\push
+               orr     r5, r5, r4, pull #\pull
+               mov     r4, r4, push #\push
+               orr     r4, r4, r3, pull #\pull
+               stmdb   r0!, {r4 - r9, ip, lr}
+               bge     12b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     13b                     )
+
+               ldmfd   sp!, {r5 - r9}
+
+14:            ands    ip, r2, #28
+               beq     16f
+
+15:            mov     lr, r3, push #\push
+               ldr     r3, [r1, #-4]!
+               subs    ip, ip, #4
+               orr     lr, lr, r3, pull #\pull
+               str     lr, [r0, #-4]!
+               bgt     15b
+       CALGN(  cmp     r2, #0                  )
+       CALGN(  bge     11b                     )
+
+16:            add     r1, r1, #(\pull / 8)
+               b       8b
+
+               .endm
+
+
+               backward_copy_shift     push=8  pull=24
+
+17:            backward_copy_shift     push=16 pull=16
+
+18:            backward_copy_shift     push=24 pull=8
+
+
+END(memmove)
+libc_hidden_builtin_def (memmove)
author	Daniel Jacobowitz <dan@codesourcery.com>
	Tue, 31 Oct 2006 17:07:54 +0000 (17:07 +0000)
committer	Daniel Jacobowitz <dan@codesourcery.com>
	Tue, 31 Oct 2006 17:07:54 +0000 (17:07 +0000)
ChangeLog.arm		patch \| blob \| blame \| history
sysdeps/arm/memcpy.S	[new file with mode: 0644]	patch \| blob
sysdeps/arm/memmove.S	[new file with mode: 0644]	patch \| blob