]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Fix memcpy IFUNC selection
authorH.J. Lu <hjl.tools@gmail.com>
Fri, 4 Mar 2016 16:37:40 +0000 (08:37 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Fri, 4 Mar 2016 16:39:07 +0000 (08:39 -0800)
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for
Fast_Copy_Backward to enable __memcpy_ssse3_back.  Existing selection
order is updated with following selection order:

1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_sse2 if SSSE3 isn't available.
4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_ssse3

[BZ #18880]
* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
instead of Slow_BSF, and also check for Fast_Copy_Backward to
enable __memcpy_ssse3_back.

ChangeLog
sysdeps/x86_64/multiarch/memcpy.S

index 7c5ee2dd19c9a8cbc89377717161f2a178339f75..7b36bd732dea8c498618a8244e4c490edb56eb55 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2016-03-04  Amit Pawar  <Amit.Pawar@amd.com>
+           H.J. Lu  <hongjiu.lu@intel.com>
+
+       [BZ #18880]
+       * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
+       instead of Slow_BSF, and also check for Fast_Copy_Backward to
+       enable __memcpy_ssse3_back.
+
 2016-03-03  H.J. Lu  <hongjiu.lu@intel.com>
 
        [BZ #19758]
index 64a1bcd137d3db85e23d0132aec47ad3b0476a50..8882590e51196e87fa95cd7f29da20371d641ffa 100644 (file)
@@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
        jz      1f
        HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
        jz      1f
-       leaq    __memcpy_avx512_no_vzeroupper(%rip), %rax
+       lea    __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
        ret
 #endif
-1:     leaq    __memcpy_avx_unaligned(%rip), %rax
+1:     lea     __memcpy_avx_unaligned(%rip), %RAX_LP
        HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-       jz 2f
-       ret
-2:     leaq    __memcpy_sse2(%rip), %rax
-       HAS_ARCH_FEATURE (Slow_BSF)
-       jnz     3f
-       leaq    __memcpy_sse2_unaligned(%rip), %rax
-       ret
-3:     HAS_CPU_FEATURE (SSSE3)
-       jz 4f
-       leaq    __memcpy_ssse3(%rip), %rax
-4:     ret
+       jnz     2f
+       lea     __memcpy_sse2_unaligned(%rip), %RAX_LP
+       HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+       jnz     2f
+       lea     __memcpy_sse2(%rip), %RAX_LP
+       HAS_CPU_FEATURE (SSSE3)
+       jz      2f
+       lea    __memcpy_ssse3_back(%rip), %RAX_LP
+       HAS_ARCH_FEATURE (Fast_Copy_Backward)
+       jnz     2f
+       lea     __memcpy_ssse3(%rip), %RAX_LP
+2:     ret
 END(__new_memcpy)
 
 # undef ENTRY