]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
PowerPC Add 64-bit multilib implementation of memcpy
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Mon, 11 Mar 2013 20:29:43 +0000 (17:29 -0300)
committerTulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
Fri, 15 Mar 2013 12:48:31 +0000 (09:48 -0300)
Move and rename specialized memcpy implementation to multilib folder and
add IFUNC memcpy source.

ChangeLog
sysdeps/powerpc/powerpc64/multiarch/Makefile [new file with mode: 0644]
sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c [new file with mode: 0644]
sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S [moved from sysdeps/powerpc/powerpc64/a2/memcpy.S with 99% similarity]
sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S [moved from sysdeps/powerpc/powerpc64/cell/memcpy.S with 98% similarity]
sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S [moved from sysdeps/powerpc/powerpc64/power4/memcpy.S with 98% similarity]
sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S [moved from sysdeps/powerpc/powerpc64/power6/memcpy.S with 99% similarity]
sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S [moved from sysdeps/powerpc/powerpc64/power7/memcpy.S with 99% similarity]
sysdeps/powerpc/powerpc64/multiarch/memcpy.S [new file with mode: 0644]

index d4cb6786384ede876e56bb737d117db0568b7c30..098973b4c5ebd0260e43d59bf72a8d0973c6aedf 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2013-03-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+           Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
+
+     * sysdeps/powerpc/powerpc64/a2/memcpy.S: Moved to multiarch folder.
+     * sysdeps/powerpc/powerpc64/cell/memcpy.S: Moved to multiarch folder.
+     * sysdeps/powerpc/powerpc64/power4/memcpy.S: Moved to multiarch folder.
+     * sysdeps/powerpc/powerpc64/power6/memcpy.S: Moved to multiarch folder.
+     * sysdeps/powerpc/powerpc64/power7/memcpy.S: Moved to multiarch folder.
+     * sysdeps/powerpc/powerpc64/multiarch/Makefile: Multiarch makefile.
+     * sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S: Moved from a2 folder.
+     * sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S: Moved from cell
+     folder.
+     * sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S: Moved from power4
+     folder.
+     * sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S: Moved from power6
+     folder.
+     * sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S: Moved from power7
+     folder.
+     * sysdeps/powerpc/powerpc64/multiarch/memcpy.S: Multiarch implementation
+     using IFUNC extension.
+
 2013-03-07  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
 
        * sysdeps/powerpc/powerpc32/multiarch/Makefile (sysdep_routines):
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
new file mode 100644 (file)
index 0000000..5ba03ca
--- /dev/null
@@ -0,0 +1,4 @@
+ifeq ($(subdir),string)
+sysdep_routines += memcpy-a2 memcpy-cell memcpy-power4 memcpy-power6 \
+                  memcpy-power7
+endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
new file mode 100644 (file)
index 0000000..760f46c
--- /dev/null
@@ -0,0 +1,62 @@
+/* Enumerate available IFUNC implementations of a function.  PowerPC32 version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#include <ldsodefs.h>
+#include <ifunc-impl-list.h>
+
+/* Maximum number of IFUNC implementations.  */
+#define MAX_IFUNC      5
+
+/* Some of the .  */
+#define PPC_POWER4 (PPC_FEATURE_POWER4|PPC_FEATURE_ARCH_2_05|       \
+                   PPC_FEATURE_ARCH_2_06)
+#define PPC_POWER6 (PPC_FEATURE_ARCH_2_05|PPC_FEATURE_ARCH_2_06)
+
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+                       size_t max)
+{
+  assert (max >= MAX_IFUNC);
+
+  size_t i = 0;
+
+  uint32_t hwcap;
+
+  hwcap = GLRO(dl_hwcap);
+
+#ifdef SHARED
+  IFUNC_IMPL (i, name, memcpy,
+             IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX,
+                             __memcpy_power7)
+             IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06,
+                             __memcpy_power_a2)
+             IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_POWER6,
+                             __memcpy_power6)
+             IFUNC_IMPL_ADD (array, i, memcpy,
+                             hwcap & (PPC_FEATURE_CELL_BE >> 16),
+                             __memcpy_cell)
+             IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_POWER4,
+                             __memcpy_power4)
+             IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc64))
+#endif
+
+  return i;
+}
similarity index 99%
rename from sysdeps/powerpc/powerpc64/a2/memcpy.S
rename to sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S
index 84c82bb768342a3abea3d5c1aea3e2e1e26ce295..a2834f33456c645241fe2a389f900da19a5a159e 100644 (file)
@@ -30,7 +30,7 @@
 
 
        .machine  a2
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_power_a2, 5, 0)
        CALL_MCOUNT 3
 
        dcbt    0,r4            /* Prefetch ONE SRC cacheline  */
@@ -520,5 +520,4 @@ L(endloop2_128):
        b       L(lessthancacheline)
 
 
-END_GEN_TB (memcpy,TB_TOCLESS)
-libc_hidden_builtin_def (memcpy)
+END_GEN_TB (__memcpy_power_a2,TB_TOCLESS)
similarity index 98%
rename from sysdeps/powerpc/powerpc64/cell/memcpy.S
rename to sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S
index a271965dd73a471923dd13d859cee49a58d28f8a..c8dd87fa030dacac921acbe90367d360e43149d3 100644 (file)
@@ -39,7 +39,7 @@
 
 .align  7
 
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_cell, 5, 0)
        CALL_MCOUNT 3
 
        dcbt    0,r4            /* Prefetch ONE SRC cacheline  */
@@ -238,5 +238,4 @@ EALIGN (memcpy, 5, 0)
        stb     r0,0(r6)
 1:     blr
 
-END_GEN_TB (memcpy,TB_TOCLESS)
-libc_hidden_builtin_def (memcpy)
+END_GEN_TB (__memcpy_cell,TB_TOCLESS)
similarity index 98%
rename from sysdeps/powerpc/powerpc64/power4/memcpy.S
rename to sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S
index c43d1d2e4ed83d2d9f9c62f59899a83005541e68..cd167e253c734accd4b8c52e6f079d081135471f 100644 (file)
@@ -34,7 +34,7 @@
    Each case has a optimized unrolled loop.   */
 
        .machine power4
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_power4, 5, 0)
        CALL_MCOUNT 3
 
     cmpldi cr1,5,31
@@ -93,7 +93,7 @@ EALIGN (memcpy, 5, 0)
 
   /* Move doublewords where destination and source are DW aligned.
      Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration.
-     If the copy is not an exact multiple of 32 bytes, 1-3
+     If the copy is not an exact multiple of 32 bytes, 1-3 
      doublewords are copied as needed to set up the main loop.  After
      the main loop exits there may be a tail of 1-7 bytes. These byte are 
      copied a word/halfword/byte at a time as needed to preserve alignment.  */
@@ -411,5 +411,4 @@ EALIGN (memcpy, 5, 0)
     ld 31,-8(1)
     ld 3,-16(1)
     blr
-END_GEN_TB (memcpy,TB_TOCLESS)
-libc_hidden_builtin_def (memcpy)
+END_GEN_TB (__memcpy_power4,TB_TOCLESS)
similarity index 99%
rename from sysdeps/powerpc/powerpc64/power6/memcpy.S
rename to sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S
index 55c0d71184f97e4b79c08b7f6bd0d0386660067b..e0eba0ac2734aa1429bf92f11642c3b01a09d4be 100644 (file)
@@ -41,7 +41,7 @@
    for the destination.  */
 
        .machine        "power6"
-EALIGN (memcpy, 7, 0)
+EALIGN (__memcpy_power6, 7, 0)
        CALL_MCOUNT 3
 
     cmpldi cr1,5,31
@@ -1163,5 +1163,4 @@ L(du_done):
     ld 31,-8(1)
     ld 3,-16(1)
     blr
-END_GEN_TB (memcpy,TB_TOCLESS)
-libc_hidden_builtin_def (memcpy)
+END_GEN_TB (__memcpy_power6,TB_TOCLESS)
similarity index 99%
rename from sysdeps/powerpc/powerpc64/power7/memcpy.S
rename to sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S
index 800a9f1bb1c200fb25040a133236f219fa2d8510..e6ee4e2570fef7bba1e0fde976334f07aa70e622 100644 (file)
@@ -24,7 +24,7 @@
    Returns 'dst'.  */
 
        .machine power7
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_power7, 5, 0)
        CALL_MCOUNT 3
 
        cmpldi  cr1,5,31
@@ -500,5 +500,4 @@ L(end_unaligned_loop):
        ld      3,-16(1)
        blr
 
-END_GEN_TB (memcpy,TB_TOCLESS)
-libc_hidden_builtin_def (memcpy)
+END_GEN_TB (__memcpy_power7,TB_TOCLESS)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy.S
new file mode 100644 (file)
index 0000000..92c2e2a
--- /dev/null
@@ -0,0 +1,92 @@
+/* Multiple versions of memcpy PowerPC64.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+
+/* Define multiple versions only for the definition in lib and for
+   DSO.  In static binaries we need memcpy before the initialization
+   happened.  */
+#if defined SHARED && !defined NOT_IN_libc
+       .section        ".toc","aw"
+.LC__dl_hwcap:
+# ifdef SHARED
+       .tc _rtld_global_ro[TC],_rtld_global_ro
+# else
+       .tc _dl_hwcap[TC],_dl_hwcap
+# endif
+       .section ".text"
+
+ENTRY(memcpy)
+       .type   memcpy, @gnu_indirect_function
+       ld      r5,.LC__dl_hwcap@toc(r2)
+# ifdef SHARED
+       /* Load _rtld-global._dl_hwcap. */
+       ld      r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5)
+# else
+       ld      r5,0(r5) /* Load extern _dl_hwcap. */
+# endif
+       rldicl. r0,r5,57,63
+       bne- L(power7)
+       rldicl. r0,r5,56,63
+       bne- L(powerA2)
+       rldicl. r0,r5,52,63
+       bne- L(power6)
+       rldicl. r0,r5,48,63
+       bne- L(powercell)
+       rldicl. r0,r5,45,63
+       bne- L(power4)
+       ld      r3,__memcpy_ppc64@got(r2)
+       blr
+L(power7):
+       ld      r3,__memcpy_power7@got(r2)
+       blr
+L(powerA2):
+       ld      r3,__memcpy_power_a2@got(r2)
+       blr
+L(power6):
+       ld      r3,__memcpy_power6@got(r2)
+       blr
+L(powercell):
+       ld      r3,__memcpy_cell@got(r2)
+       blr
+L(power4):
+       ld      r3,__memcpy_power4@got(r2)
+       blr
+END(memcpy)
+
+# undef EALIGN
+# define EALIGN(name, alignt, words)                  \
+  ENTRY_2(__memcpy_ppc64)                             \
+BODY_LABEL(__memcpy_ppc64):                           \
+  cfi_startproc;
+
+# undef END_GEN_TB
+# define END_GEN_TB(name, mask)                       \
+  cfi_endproc;                                        \
+  TRACEBACK_MASK(__memcpy_ppc64,mask)                 \
+  END_2(__memcpy_ppc64)
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)                \
+  .globl __GI_memcpy; __GI_memcpy = __memcpy_ppc64
+
+#endif
+
+#include "../memcpy.S"