/* Optimized mempcpy implementation for POWER7.
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010-2021 Free Software Foundation, Inc.
Contributed by Luis Machado <luisgpm@br.ibm.com>.
This file is part of the GNU C Library.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
+ <https://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
-/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+/* void * [r3] __mempcpy (void *dst [r3], void *src [r4], size_t len [r5]);
Returns 'dst' + 'len'. */
+#ifndef MEMPCPY
+# define MEMPCPY __mempcpy
+#endif
.machine power7
-EALIGN (BP_SYM (__mempcpy), 5, 0)
+ENTRY_TOCLESS (MEMPCPY, 5)
CALL_MCOUNT 3
cmpldi cr1,5,31
mr 11,12
mtcrf 0x01,9
cmpldi cr6,9,1
- lvsl 5,0,12
+#ifdef __LITTLE_ENDIAN__
+ lvsr 5,0,12
+#else
+ lvsl 5,0,12
+#endif
lvx 3,0,12
bf 31,L(setup_unaligned_loop)
/* Copy another 16 bytes to align to 32-bytes due to the loop . */
lvx 4,12,6
- vperm 6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
addi 11,12,16
addi 10,3,16
stvx 6,0,3
vector instructions though. */
lvx 4,11,6 /* vr4 = r11+16. */
- vperm 6,3,4,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr6. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
lvx 3,11,7 /* vr3 = r11+32. */
- vperm 10,4,3,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr10. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 10,3,4,5
+#else
+ vperm 10,4,3,5
+#endif
addi 11,11,32
stvx 6,0,10
stvx 10,10,6
add 3,3,5
blr
-END_GEN_TB (BP_SYM (__mempcpy),TB_TOCLESS)
-libc_hidden_def (BP_SYM (__mempcpy))
-weak_alias (BP_SYM (__mempcpy), BP_SYM (mempcpy))
+END_GEN_TB (MEMPCPY,TB_TOCLESS)
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)