]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Use ZMM16-ZMM31 in AVX512 memset family functions
authorH.J. Lu <hjl.tools@gmail.com>
Sun, 7 Mar 2021 17:44:18 +0000 (09:44 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Thu, 27 Jan 2022 19:33:05 +0000 (11:33 -0800)
Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized
with AVX512 instructions using ZMM16-ZMM31 registers to avoid RTM abort
with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at
function exit.

(cherry picked from commit 4e2d8f352774b56078c34648b14a2412c38384f4)

sysdeps/x86_64/multiarch/ifunc-impl-list.c
sysdeps/x86_64/multiarch/ifunc-memset.h
sysdeps/x86_64/multiarch/ifunc-wmemset.h
sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S

index f78318e01a1950183c7dd238ba4a9132ddc94873..a51f9b59116cf84345a2860c7343f9a0daa61516 100644 (file)
@@ -211,10 +211,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                               && CPU_FEATURE_USABLE (AVX512BW)),
                              __memset_chk_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memset_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             (CPU_FEATURE_USABLE (AVX512VL)
+                              && CPU_FEATURE_USABLE (AVX512BW)),
                              __memset_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memset_chk,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             (CPU_FEATURE_USABLE (AVX512VL)
+                              && CPU_FEATURE_USABLE (AVX512BW)),
                              __memset_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __memset_chk,
                              CPU_FEATURE_USABLE (AVX512F),
@@ -252,10 +254,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                               && CPU_FEATURE_USABLE (AVX512BW)),
                              __memset_evex_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memset,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             (CPU_FEATURE_USABLE (AVX512VL)
+                              && CPU_FEATURE_USABLE (AVX512BW)),
                              __memset_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memset,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             (CPU_FEATURE_USABLE (AVX512VL)
+                              && CPU_FEATURE_USABLE (AVX512BW)),
                              __memset_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, memset,
                              CPU_FEATURE_USABLE (AVX512F),
@@ -719,7 +723,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                              CPU_FEATURE_USABLE (AVX512VL),
                              __wmemset_evex_unaligned)
              IFUNC_IMPL_ADD (array, i, wmemset,
-                             CPU_FEATURE_USABLE (AVX512F),
+                             CPU_FEATURE_USABLE (AVX512VL),
                              __wmemset_avx512_unaligned))
 
 #ifdef SHARED
index c5eb92121bcd1e37b73dea3772699c7f12f301bd..57029fc17bc243b7489694daf2298cd3e5abd4e5 100644 (file)
@@ -53,13 +53,16 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
     {
-      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-       return OPTIMIZE (avx512_no_vzeroupper);
+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+         && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+       {
+         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+           return OPTIMIZE (avx512_unaligned_erms);
 
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-       return OPTIMIZE (avx512_unaligned_erms);
+         return OPTIMIZE (avx512_unaligned);
+       }
 
-      return OPTIMIZE (avx512_unaligned);
+      return OPTIMIZE (avx512_no_vzeroupper);
     }
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
index e81b96c69fe38f7c4ea8b71510adddbc60e1cd3f..e06e8b4d8030def089903377b46663e3089a0c15 100644 (file)
@@ -33,13 +33,13 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
-         && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
-         && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-       return OPTIMIZE (avx512_unaligned);
-
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
-       return OPTIMIZE (evex_unaligned);
+       {
+         if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+           return OPTIMIZE (avx512_unaligned);
+
+         return OPTIMIZE (evex_unaligned);
+       }
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
        return OPTIMIZE (avx2_unaligned_rtm);
index 0783979ca5d5d77c65c688a44596f6bdd75350d3..22e7b187c84920df3711cbc422b7e2d65a591e61 100644 (file)
@@ -1,22 +1,22 @@
 #if IS_IN (libc)
 # define VEC_SIZE      64
-# define VEC(i)                zmm##i
+# define XMM0          xmm16
+# define YMM0          ymm16
+# define VEC0          zmm16
+# define VEC(i)                VEC##i
 # define VMOVU         vmovdqu64
 # define VMOVA         vmovdqa64
+# define VZEROUPPER
 
 # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
-  vmovd d, %xmm0; \
   movq r, %rax; \
-  vpbroadcastb %xmm0, %xmm0; \
-  vpbroadcastq %xmm0, %zmm0
+  vpbroadcastb d, %VEC0
 
 # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
-  vmovd d, %xmm0; \
   movq r, %rax; \
-  vpbroadcastd %xmm0, %xmm0; \
-  vpbroadcastq %xmm0, %zmm0
+  vpbroadcastd d, %VEC0
 
-# define SECTION(p)            p##.avx512
+# define SECTION(p)            p##.evex512
 # define MEMSET_SYMBOL(p,s)    p##_avx512_##s
 # define WMEMSET_SYMBOL(p,s)   p##_avx512_##s