]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Use ZMM16-ZMM31 in AVX512 memmove family functions
authorH.J. Lu <hjl.tools@gmail.com>
Sun, 7 Mar 2021 17:45:23 +0000 (09:45 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Thu, 27 Jan 2022 20:47:19 +0000 (12:47 -0800)
Update ifunc-memmove.h to select the function optimized with AVX512
instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable
AVX512VL since VZEROUPPER isn't needed at function exit.

(cherry picked from commit e4fda4631017e49d4ee5a2755db34289b6860fa4)

sysdeps/x86_64/multiarch/ifunc-impl-list.c
sysdeps/x86_64/multiarch/ifunc-memmove.h
sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S

index 80b4db8d0542f1840b8ac8b1f8d25ea46ccc5dc3..ed25b4497c6d6561bb0d6b9981abc5f33e98e223 100644 (file)
@@ -83,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memmove_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memmove_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memmove_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
@@ -148,10 +148,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memmove_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, memmove,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memmove_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, memmove,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memmove_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
                              __memmove_ssse3_back)
@@ -697,10 +697,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memcpy_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memcpy_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memcpy_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
@@ -766,10 +766,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memcpy_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, memcpy,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memcpy_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, memcpy,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __memcpy_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
              IFUNC_IMPL_ADD (array, i, memcpy, 1,
@@ -783,10 +783,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __mempcpy_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __mempcpy_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __mempcpy_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
@@ -828,10 +828,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __mempcpy_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, mempcpy,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __mempcpy_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, mempcpy,
-                             HAS_ARCH_FEATURE (AVX512F_Usable),
+                             HAS_ARCH_FEATURE (AVX512VL_Usable),
                              __mempcpy_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, mempcpy,
                              HAS_ARCH_FEATURE (AVX_Usable),
index fe003b28e1fe4d0ec2e4f65ece6828bce68b723d..554a3f8be8ab9b85eea541b912e6d179c2c92fac 100644 (file)
@@ -56,13 +56,15 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
     {
-      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-       return OPTIMIZE (avx512_no_vzeroupper);
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
+       {
+       if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+           return OPTIMIZE (avx512_unaligned_erms);
 
-      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
-       return OPTIMIZE (avx512_unaligned_erms);
+         return OPTIMIZE (avx512_unaligned);
+       }
 
-      return OPTIMIZE (avx512_unaligned);
+      return OPTIMIZE (avx512_no_vzeroupper);
     }
 
   if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
index aac1515cf6b3bb5c29377de6ffdafa89641cd7ad..7dad1ad74c9afe1952d1dc98a3970f15df990c66 100644 (file)
@@ -1,11 +1,25 @@
 #if IS_IN (libc)
 # define VEC_SIZE      64
-# define VEC(i)                zmm##i
+# define XMM0          xmm16
+# define XMM1          xmm17
+# define YMM0          ymm16
+# define YMM1          ymm17
+# define VEC0          zmm16
+# define VEC1          zmm17
+# define VEC2          zmm18
+# define VEC3          zmm19
+# define VEC4          zmm20
+# define VEC5          zmm21
+# define VEC6          zmm22
+# define VEC7          zmm23
+# define VEC8          zmm24
+# define VEC(i)                VEC##i
 # define VMOVNT                vmovntdq
 # define VMOVU         vmovdqu64
 # define VMOVA         vmovdqa64
+# define VZEROUPPER
 
-# define SECTION(p)            p##.avx512
+# define SECTION(p)            p##.evex512
 # define MEMMOVE_SYMBOL(p,s)   p##_avx512_##s
 
 # include "memmove-vec-unaligned-erms.S"