]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Add memmove family functions with 256-bit EVEX
authorH.J. Lu <hjl.tools@gmail.com>
Fri, 5 Mar 2021 14:46:08 +0000 (06:46 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Mon, 29 Mar 2021 14:40:17 +0000 (07:40 -0700)
Update ifunc-memmove.h to select the function optimized with 256-bit EVEX
instructions using YMM16-YMM31 registers to avoid RTM abort with usable
AVX512VL since VZEROUPPER isn't needed at function exit.

sysdeps/x86_64/multiarch/Makefile
sysdeps/x86_64/multiarch/ifunc-impl-list.c
sysdeps/x86_64/multiarch/ifunc-memmove.h
sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S [new file with mode: 0644]
sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S

index 46783cd14bb5708012fc1f9a6c3c81c1fcb7e871..4563fc56f5f961d5d4f8e1b4acd1e59b5e96b4b8 100644 (file)
@@ -41,6 +41,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
                   memset-avx2-unaligned-erms \
                   memset-avx512-unaligned-erms \
                   memchr-evex \
+                  memmove-evex-unaligned-erms \
                   memrchr-evex \
                   rawmemchr-evex \
                   stpcpy-evex \
index 74b20d8bd1062bbec2ca5740ba47773996a46ede..e1c39d58d68d7c70cf00a8ec475ce204feaa315f 100644 (file)
@@ -80,6 +80,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              CPU_FEATURE_USABLE (AVX),
                              __memmove_chk_avx_unaligned_erms)
+             IFUNC_IMPL_ADD (array, i, __memmove_chk,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memmove_chk_evex_unaligned)
+             IFUNC_IMPL_ADD (array, i, __memmove_chk,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memmove_chk_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              CPU_FEATURE_USABLE (SSSE3),
                              __memmove_chk_ssse3_back)
@@ -102,6 +108,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, memmove,
                              CPU_FEATURE_USABLE (AVX),
                              __memmove_avx_unaligned_erms)
+             IFUNC_IMPL_ADD (array, i, memmove,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memmove_evex_unaligned)
+             IFUNC_IMPL_ADD (array, i, memmove,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memmove_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memmove,
                              CPU_FEATURE_USABLE (AVX512F),
                              __memmove_avx512_no_vzeroupper)
@@ -565,6 +577,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              CPU_FEATURE_USABLE (AVX),
                              __memcpy_chk_avx_unaligned_erms)
+             IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memcpy_chk_evex_unaligned)
+             IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memcpy_chk_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              CPU_FEATURE_USABLE (SSSE3),
                              __memcpy_chk_ssse3_back)
@@ -587,6 +605,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, memcpy,
                              CPU_FEATURE_USABLE (AVX),
                              __memcpy_avx_unaligned_erms)
+             IFUNC_IMPL_ADD (array, i, memcpy,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memcpy_evex_unaligned)
+             IFUNC_IMPL_ADD (array, i, memcpy,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __memcpy_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
                              __memcpy_ssse3_back)
              IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
@@ -623,6 +647,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              CPU_FEATURE_USABLE (AVX),
                              __mempcpy_chk_avx_unaligned_erms)
+             IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __mempcpy_chk_evex_unaligned)
+             IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __mempcpy_chk_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              CPU_FEATURE_USABLE (SSSE3),
                              __mempcpy_chk_ssse3_back)
@@ -654,6 +684,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, mempcpy,
                              CPU_FEATURE_USABLE (AVX),
                              __mempcpy_avx_unaligned_erms)
+             IFUNC_IMPL_ADD (array, i, mempcpy,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __mempcpy_evex_unaligned)
+             IFUNC_IMPL_ADD (array, i, mempcpy,
+                             CPU_FEATURE_USABLE (AVX512VL),
+                             __mempcpy_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
                              __mempcpy_ssse3_back)
              IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
index bf42a555dc43ad1f72373790eb876a41d980ad65..517b332bfc2b069358bf0d91188c18b8b36c930e 100644 (file)
@@ -29,6 +29,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
   attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+  attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
@@ -59,10 +63,21 @@ IFUNC_SELECTOR (void)
 
   if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-       return OPTIMIZE (avx_unaligned_erms);
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+       {
+         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+           return OPTIMIZE (evex_unaligned_erms);
+
+         return OPTIMIZE (evex_unaligned);
+       }
+
+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+       {
+         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+           return OPTIMIZE (avx_unaligned_erms);
 
-      return OPTIMIZE (avx_unaligned);
+         return OPTIMIZE (avx_unaligned);
+       }
     }
 
   if (!CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
new file mode 100644 (file)
index 0000000..0cbce8f
--- /dev/null
@@ -0,0 +1,33 @@
+#if IS_IN (libc)
+# define VEC_SIZE      32
+# define XMM0          xmm16
+# define XMM1          xmm17
+# define YMM0          ymm16
+# define YMM1          ymm17
+# define VEC0          ymm16
+# define VEC1          ymm17
+# define VEC2          ymm18
+# define VEC3          ymm19
+# define VEC4          ymm20
+# define VEC5          ymm21
+# define VEC6          ymm22
+# define VEC7          ymm23
+# define VEC8          ymm24
+# define VEC9          ymm25
+# define VEC10         ymm26
+# define VEC11         ymm27
+# define VEC12         ymm28
+# define VEC13         ymm29
+# define VEC14         ymm30
+# define VEC15         ymm31
+# define VEC(i)                VEC##i
+# define VMOVNT                vmovntdq
+# define VMOVU         vmovdqu64
+# define VMOVA         vmovdqa64
+# define VZEROUPPER
+
+# define SECTION(p)            p##.evex
+# define MEMMOVE_SYMBOL(p,s)   p##_evex_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
index 50bb1fccb2674c26afe3242b51b01f012250f25a..d713d7d6792897be397aaff8914e780373fbc224 100644 (file)
 # define MEMMOVE_CHK_SYMBOL(p,s)       MEMMOVE_SYMBOL(p, s)
 #endif
 
+#ifndef XMM0
+# define XMM0                          xmm0
+#endif
+
+#ifndef YMM0
+# define YMM0                          ymm0
+#endif
+
 #ifndef VZEROUPPER
 # if VEC_SIZE > 16
 #  define VZEROUPPER vzeroupper
@@ -301,20 +309,20 @@ L(less_vec):
 #if VEC_SIZE > 32
 L(between_32_63):
        /* From 32 to 63.  No branch when size == 32.  */
-       vmovdqu (%rsi), %ymm0
-       vmovdqu -32(%rsi,%rdx), %ymm1
-       vmovdqu %ymm0, (%rdi)
-       vmovdqu %ymm1, -32(%rdi,%rdx)
+       VMOVU   (%rsi), %YMM0
+       VMOVU   -32(%rsi,%rdx), %YMM1
+       VMOVU   %YMM0, (%rdi)
+       VMOVU   %YMM1, -32(%rdi,%rdx)
        VZEROUPPER
        ret
 #endif
 #if VEC_SIZE > 16
        /* From 16 to 31.  No branch when size == 16.  */
 L(between_16_31):
-       vmovdqu (%rsi), %xmm0
-       vmovdqu -16(%rsi,%rdx), %xmm1
-       vmovdqu %xmm0, (%rdi)
-       vmovdqu %xmm1, -16(%rdi,%rdx)
+       VMOVU   (%rsi), %XMM0
+       VMOVU   -16(%rsi,%rdx), %XMM1
+       VMOVU   %XMM0, (%rdi)
+       VMOVU   %XMM1, -16(%rdi,%rdx)
        ret
 #endif
 L(between_8_15):