]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Check Prefer_ERMS in memmove/memcpy/mempcpy/memset
authorH.J. Lu <hjl.tools@gmail.com>
Thu, 30 Jun 2016 14:57:07 +0000 (07:57 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Thu, 30 Jun 2016 14:58:11 +0000 (07:58 -0700)
Although the Enhanced REP MOVSB/STOSB (ERMS) implementations of memmove,
memcpy, mempcpy and memset aren't used by the current processors, this
patch adds Prefer_ERMS check in memmove, memcpy, mempcpy and memset so
that they can be used in the future.

* sysdeps/x86/cpu-features.h (bit_arch_Prefer_ERMS): New.
(index_arch_Prefer_ERMS): Likewise.
* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Return
__memcpy_erms for Prefer_ERMS.
* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
(__memmove_erms): Enabled for libc.a.
* ysdeps/x86_64/multiarch/memmove.S (__libc_memmove): Return
__memmove_erms or Prefer_ERMS.
* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Return
__mempcpy_erms for Prefer_ERMS.
* sysdeps/x86_64/multiarch/memset.S (memset): Return
__memset_erms for Prefer_ERMS.

ChangeLog
sysdeps/x86/cpu-features.h
sysdeps/x86_64/multiarch/memcpy.S
sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
sysdeps/x86_64/multiarch/memmove.S
sysdeps/x86_64/multiarch/mempcpy.S
sysdeps/x86_64/multiarch/memset.S

index 9131b1f88977ecd82cd1bb763f2f6681ec7cd5d1..082422866dd3d315518d2f141893c9fe26b450ee 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2016-06-30  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * sysdeps/x86/cpu-features.h (bit_arch_Prefer_ERMS): New.
+       (index_arch_Prefer_ERMS): Likewise.
+       * sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Return
+       __memcpy_erms for Prefer_ERMS.
+       * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+       (__memmove_erms): Enabled for libc.a.
+       * ysdeps/x86_64/multiarch/memmove.S (__libc_memmove): Return
+       __memmove_erms or Prefer_ERMS.
+       * sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Return
+       __mempcpy_erms for Prefer_ERMS.
+       * sysdeps/x86_64/multiarch/memset.S (memset): Return
+       __memset_erms for Prefer_ERMS.
+
 2016-06-30  Andreas Schwab  <schwab@suse.de>
 
        [BZ #20262]
index 2bd93713a105583946205e1cc806a76220706f0d..97ffe765f4e021be6b67da750bbbcf22ba41507b 100644 (file)
@@ -36,6 +36,7 @@
 #define bit_arch_Prefer_MAP_32BIT_EXEC         (1 << 16)
 #define bit_arch_Prefer_No_VZEROUPPER          (1 << 17)
 #define bit_arch_Fast_Unaligned_Copy           (1 << 18)
+#define bit_arch_Prefer_ERMS                   (1 << 19)
 
 /* CPUID Feature flags.  */
 
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Fast_Unaligned_Copy        FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Prefer_ERMS                FEATURE_INDEX_1*FEATURE_SIZE
 
 
 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -274,6 +276,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
 # define index_arch_Fast_Unaligned_Copy        FEATURE_INDEX_1
+# define index_arch_Prefer_ERMS                FEATURE_INDEX_1
 
 #endif /* !__ASSEMBLER__ */
 
index f6771a4696fa31dc371ae4da7d7ca5f1a666795e..df7fbacd8ab5f368e63873181e6b1f2ef12bbd4d 100644 (file)
@@ -29,6 +29,9 @@
 ENTRY(__new_memcpy)
        .type   __new_memcpy, @gnu_indirect_function
        LOAD_RTLD_GLOBAL_RO_RDX
+       lea     __memcpy_erms(%rip), %RAX_LP
+       HAS_ARCH_FEATURE (Prefer_ERMS)
+       jnz     2f
 # ifdef HAVE_AVX512_ASM_SUPPORT
        HAS_ARCH_FEATURE (AVX512F_Usable)
        jz      1f
index a2cce39a160157ab385a433b06f1fd10ec307dcb..4893ea46b4068c55649e979eae5172a316d51eef 100644 (file)
@@ -150,13 +150,15 @@ L(nop):
 #if defined USE_MULTIARCH && IS_IN (libc)
 END (MEMMOVE_SYMBOL (__memmove, unaligned))
 
-# if VEC_SIZE == 16 && defined SHARED
+# if VEC_SIZE == 16
+#  if defined SHARED
 /* Only used to measure performance of REP MOVSB.  */
 ENTRY (__mempcpy_erms)
        movq    %rdi, %rax
        addq    %rdx, %rax
        jmp     L(start_movsb)
 END (__mempcpy_erms)
+#  endif
 
 ENTRY (__memmove_erms)
        movq    %rdi, %rax
@@ -181,7 +183,9 @@ L(movsb_backward):
        cld
        ret
 END (__memmove_erms)
+#  if defined SHARED
 strong_alias (__memmove_erms, __memcpy_erms)
+#  endif
 # endif
 
 # ifdef SHARED
index 25c3586ee96b2831f0f252f15d09c830e551162b..8e1c6ac8e8717cbad88188114b3665ee644beecd 100644 (file)
@@ -27,6 +27,9 @@
 ENTRY(__libc_memmove)
        .type   __libc_memmove, @gnu_indirect_function
        LOAD_RTLD_GLOBAL_RO_RDX
+       lea     __memmove_erms(%rip), %RAX_LP
+       HAS_ARCH_FEATURE (Prefer_ERMS)
+       jnz     2f
 # ifdef HAVE_AVX512_ASM_SUPPORT
        HAS_ARCH_FEATURE (AVX512F_Usable)
        jz      1f
index f9c6df301c37fc198d85b333498febe59f645c9a..4011a1a4f0ca0f8276edd59cbd57b2bdbbf21473 100644 (file)
@@ -29,6 +29,9 @@
 ENTRY(__mempcpy)
        .type   __mempcpy, @gnu_indirect_function
        LOAD_RTLD_GLOBAL_RO_RDX
+       lea     __mempcpy_erms(%rip), %RAX_LP
+       HAS_ARCH_FEATURE (Prefer_ERMS)
+       jnz     2f
 # ifdef HAVE_AVX512_ASM_SUPPORT
        HAS_ARCH_FEATURE (AVX512F_Usable)
        jz      1f
index 4e52d8f8c4795c1b4e2dd77a51c2e82a5520bce7..2b964a0398780e33eac03dd77915876e36726abb 100644 (file)
@@ -26,6 +26,9 @@
 ENTRY(memset)
        .type   memset, @gnu_indirect_function
        LOAD_RTLD_GLOBAL_RO_RDX
+       lea     __memset_erms(%rip), %RAX_LP
+       HAS_ARCH_FEATURE (Prefer_ERMS)
+       jnz     2f
        lea     __memset_sse2_unaligned_erms(%rip), %RAX_LP
        HAS_CPU_FEATURE (ERMS)
        jnz     1f