]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Use ZMM16-ZMM31 in AVX512 memmove family functions
authorH.J. Lu <hjl.tools@gmail.com>
Sun, 7 Mar 2021 17:45:23 +0000 (09:45 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Mon, 29 Mar 2021 14:40:17 +0000 (07:40 -0700)
Update ifunc-memmove.h to select the function optimized with AVX512
instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable
AVX512VL since VZEROUPPER isn't needed at function exit.

sysdeps/x86_64/multiarch/ifunc-impl-list.c
sysdeps/x86_64/multiarch/ifunc-memmove.h
sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S

index 37f17075faf3e355515a59aaefa8eda27cda36fc..770986e41a6180038bf05dd6f332b8eb7be47bd7 100644 (file)
@@ -83,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512F),
                              __memmove_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memmove_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memmove_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              CPU_FEATURE_USABLE (AVX),
@@ -148,10 +148,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512F),
                              __memmove_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, memmove,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memmove_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, memmove,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memmove_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3),
                              __memmove_ssse3_back)
@@ -733,10 +733,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512F),
                              __memcpy_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memcpy_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memcpy_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              CPU_FEATURE_USABLE (AVX),
@@ -802,10 +802,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512F),
                              __memcpy_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, memcpy,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memcpy_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, memcpy,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __memcpy_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
              IFUNC_IMPL_ADD (array, i, memcpy, 1,
@@ -819,10 +819,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512F),
                              __mempcpy_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __mempcpy_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __mempcpy_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              CPU_FEATURE_USABLE (AVX),
@@ -864,10 +864,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512F),
                              __mempcpy_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, mempcpy,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __mempcpy_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, mempcpy,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __mempcpy_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, mempcpy,
                              CPU_FEATURE_USABLE (AVX),
index 4eba926eca4679a166e768defe02a3d3fffd61fb..a14718a9705e860cca7bcb4b836d5bc62f37df8e 100644 (file)
@@ -56,13 +56,15 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
     {
-      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-       return OPTIMIZE (avx512_no_vzeroupper);
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+       {
+         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+           return OPTIMIZE (avx512_unaligned_erms);
 
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-       return OPTIMIZE (avx512_unaligned_erms);
+         return OPTIMIZE (avx512_unaligned);
+       }
 
-      return OPTIMIZE (avx512_unaligned);
+      return OPTIMIZE (avx512_no_vzeroupper);
     }
 
   if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
index aac1515cf6b3bb5c29377de6ffdafa89641cd7ad..848848ab39ff9326270196952874f9e4b2577e0b 100644 (file)
@@ -1,11 +1,32 @@
 #if IS_IN (libc)
 # define VEC_SIZE      64
-# define VEC(i)                zmm##i
+# define XMM0          xmm16
+# define XMM1          xmm17
+# define YMM0          ymm16
+# define YMM1          ymm17
+# define VEC0          zmm16
+# define VEC1          zmm17
+# define VEC2          zmm18
+# define VEC3          zmm19
+# define VEC4          zmm20
+# define VEC5          zmm21
+# define VEC6          zmm22
+# define VEC7          zmm23
+# define VEC8          zmm24
+# define VEC9          zmm25
+# define VEC10         zmm26
+# define VEC11         zmm27
+# define VEC12         zmm28
+# define VEC13         zmm29
+# define VEC14         zmm30
+# define VEC15         zmm31
+# define VEC(i)                VEC##i
 # define VMOVNT                vmovntdq
 # define VMOVU         vmovdqu64
 # define VMOVA         vmovdqa64
+# define VZEROUPPER
 
-# define SECTION(p)            p##.avx512
+# define SECTION(p)            p##.evex512
 # define MEMMOVE_SYMBOL(p,s)   p##_avx512_##s
 
 # include "memmove-vec-unaligned-erms.S"