+2013-03-07 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
+ Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc32/multiarch/Makefile: New file.
+ * sysdeps/powerpc/powerpc32/multiarch/ifunc-impl-list.c: Likewise.
+ * sysdeps/powerpc/powerpc32/multiarch/memcpy.S: Likewise.
+ * sysdeps/powerpc/powerpc32/a2/memcpy.S: Moved to...
+ * sysdeps/powerpc/powerpc32/multiarch/memcpy-a2.S: ... here.
+ * sysdeps/powerpc/powerpc32/cell/memcpy.S: Moved to...
+ * sysdeps/powerpc/powerpc32/multiarch/memcpy-cell.S: ... here.
+ * sysdeps/powerpc/powerpc32/power4/memcpy.S: Moved to...
+ * sysdeps/powerpc/powerpc32/memcpy.S: ... here.
+ * sysdeps/powerpc/powerpc32/power6/memcpy.S: Moved to...
+ * sysdeps/powerpc/powerpc32/multiarch/memcpy-power6.S: ... here.
+ * sysdeps/powerpc/powerpc32/power7/memcpy.S: Moved to...
+ * sysdeps/powerpc/powerpc32/multiarch/memcpy-power7.S: ... here.
+
2013-03-07 Andreas Jaeger <aj@suse.de>
* sysdeps/unix/sysv/linux/Makefile (sysdep_headers): Add
--- /dev/null
+ifeq ($(subdir),string)
+sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell
+endif
--- /dev/null
+/* Enumerate available IFUNC implementations of a function. PowerPC32 version.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#include <ldsodefs.h>
+#include <ifunc-impl-list.h>
+
+/* Maximum number of IFUNC implementations. */
+#define MAX_IFUNC 5
+
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ size_t max)
+{
+ assert (max >= MAX_IFUNC);
+
+ size_t i = 0;
+
+ uint32_t hwcap;
+
+ hwcap = GLRO(dl_hwcap);
+
+#ifdef SHARED
+ IFUNC_IMPL (i, name, memcpy,
+ IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX,
+ __memcpy_power7)
+ IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06,
+ __memcpy_a2)
+ IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_05,
+ __memcpy_power6)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ hwcap & (PPC_FEATURE_CELL_BE >> 16),
+ __memcpy_cell)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc32))
+#endif
+
+ return i;
+}
#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */
.machine a2
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_a2, 5, 0)
CALL_MCOUNT
dcbt 0,r4 /* Prefetch ONE SRC cacheline */
b L(lessthancacheline)
-END (memcpy)
-libc_hidden_builtin_def (memcpy)
+END (__memcpy_a2)
.align 7
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_cell, 5, 0)
CALL_MCOUNT
dcbt 0,r4 /* Prefetch ONE SRC cacheline */
stb r0,0(r6)
1: blr
-END (memcpy)
-libc_hidden_builtin_def (memcpy)
+END (__memcpy_cell)
/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
Returns 'dst'.
- Memcpy handles short copies (< 32-bytes) using a binary move blocks
+ Memcpy handles short copies (< 32-bytes) using a binary move blocks
(no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled
with the appropriate combination of byte and halfword load/stores.
There is minimal effort to optimize the alignment of short moves.
Each case has an optimized unrolled loop. */
.machine power6
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_power6, 5, 0)
CALL_MCOUNT
stwu 1,-32(1)
lwz 31,24(1)
addi 1,1,32
blr
-END (memcpy)
-
-libc_hidden_builtin_def (memcpy)
+END (__memcpy_power6)
Returns 'dst'. */
.machine power7
-EALIGN (memcpy, 5, 0)
+EALIGN (__memcpy_power7, 5, 0)
CALL_MCOUNT
stwu 1,-32(1)
addi 1,1,32
blr
-END (memcpy)
-libc_hidden_builtin_def (memcpy)
+END (__memcpy_power7)
--- /dev/null
+/* Optimized memcpy implementation for PowerPC32 on PowerPC64.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+
+/* Define multiple versions only for the definition in libc. */
+#if defined SHARED && !defined NOT_IN_libc
+ .text
+ENTRY(memcpy)
+ .type memcpy, @gnu_indirect_function
+# ifdef PIC
+ mflr r11
+ cfi_register (lr,r11)
+ bcl 20,31,1f
+1: mflr r5
+ addis r5,r5,_GLOBAL_OFFSET_TABLE_-1b@ha
+ addi r5,r5,_GLOBAL_OFFSET_TABLE_-1b@l
+ lwz r6,_rtld_global_ro@got(r5)
+ mtlr r11
+ cfi_same_value (lr)
+ lwz r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r6)
+# else
+ lis r6,(_dl_hwcap+4)@ha
+ lwz r6,(_dl_hwcap+4)@l(r6)
+# endif
+ /* r5 - got pointer | r6 - _dl_hwcap */
+ andi. r7,r6,PPC_FEATURE_HAS_VSX
+ bne- L(power7)
+ andi. r7,r6,PPC_FEATURE_ARCH_2_06
+ bne- L(powerA2)
+ andi. r7,r6,PPC_FEATURE_ARCH_2_05
+ bne- L(power6)
+ andis. r7,r6,(PPC_FEATURE_CELL_BE >> 16)
+ bne- L(powerCELL)
+# ifdef PIC
+ lwz r3,__memcpy_ppc32@got(r5)
+# else
+ lis r3,__memcpy_ppc32@ha
+ lwz r3,__memcpy_ppc32@l(r3)
+# endif
+ blr
+L(power7):
+# ifdef PIC
+ lwz r3,__memcpy_power7@got(r5)
+# else
+ lis r3,__memcpy_power7@ha
+ lwz r3,__memcpy_power7@l(r3)
+# endif
+ blr
+L(powerA2):
+# ifdef PIC
+ lwz r3,__memcpy_a2@got(r5)
+# else
+ lis r3,__memcpy_a2@ha
+ lwz r3,__memcpy_a2@l(r3)
+# endif
+ blr
+L(power6):
+# ifdef PIC
+ lwz r3,__memcpy_power6@got(r5)
+# else
+ lis r3,__memcpy_power6@ha
+ lwz r3,__memcpy_power6@l(r3)
+# endif
+ blr
+L(powerCELL):
+# ifdef PIC
+ lwz r3,__memcpy_cell@got(r5)
+# else
+ lis r3,__memcpy_cell@ha
+ lwz r3,__memcpy_cell@l(r3)
+# endif
+ blr
+END(memcpy)
+
+# undef EALIGN
+# define EALIGN(name, alignt, words) \
+ .globl C_SYMBOL_NAME(__memcpy_ppc32); \
+ .type C_SYMBOL_NAME(__memcpy_ppc32),@function; \
+ .align ALIGNARG(alignt); \
+ EALIGN_W_##words; \
+ C_LABEL(__memcpy_ppc32) \
+ cfi_startproc;
+
+# undef END
+# define END(name) \
+ cfi_endproc; \
+ ASM_SIZE_DIRECTIVE(__memcpy_ppc32)
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memcpy; __GI_memcpy = __memcpy_ppc32
+
+#endif
+
+#include "../memcpy.S"