]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Use AVX_Fast_Unaligned_Load from Zen onwards.
authorAmit Pawar <Amit.Pawar@amd.com>
Fri, 6 Jul 2018 13:55:36 +0000 (09:55 -0400)
committerCarlos O'Donell <carlos@redhat.com>
Fri, 6 Jul 2018 13:55:36 +0000 (09:55 -0400)
From Zen onwards this will be enabled. It was disabled for the
Excavator case and will remain disabled.

Reviewd-by: Carlos O'Donell <carlos@redhat.com>
ChangeLog
sysdeps/x86/cpu-features.c

index 5a1f291b858e5a9723881083309d99c4c57ecdc6..7fe8e2463e3b4f5c0d89f6a62a6a0f522b083bc2 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2018-07-06  Amit Pawar  <amit.pawar@amd.com>
+
+       * sysdeps/x86/cpu-features.c (get_common_indeces):
+       AVX_Fast_Unaligned_Load is enabled when AVX2 is detected.
+       * sysdeps/x86/cpu-features.c (init_cpu_features):
+       AVX_Fast_Unaligned_Load is disabled for Excavator core.
+
 2018-07-05  Florian Weimer  <fweimer@redhat.com>
 
        * csu/Makefile (CFLAGS-static-reloc.os): Build with stack
index 0fc3674c4b81f85af9a19228ed2b0e83e41511ab..d41ebde823c52f2da97c215ea7f18ecb2074c2f0 100644 (file)
@@ -78,8 +78,15 @@ get_common_indeces (struct cpu_features *cpu_features,
              /* The following features depend on AVX being usable.  */
              /* Determine if AVX2 is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
+             {
                cpu_features->feature[index_arch_AVX2_Usable]
                  |= bit_arch_AVX2_Usable;
+
+               /* Unaligned load with 256-bit AVX registers are faster on
+                  Intel/AMD processors with AVX2.  */
+               cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+                 |= bit_arch_AVX_Fast_Unaligned_Load;
+             }
              /* Determine if FMA is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, FMA))
                cpu_features->feature[index_arch_FMA_Usable]
@@ -298,11 +305,6 @@ init_cpu_features (struct cpu_features *cpu_features)
            }
        }
 
-      /* Unaligned load with 256-bit AVX registers are faster on
-        Intel processors with AVX2.  */
-      if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
-       cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
-         |= bit_arch_AVX_Fast_Unaligned_Load;
 
       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
          if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
@@ -351,9 +353,15 @@ init_cpu_features (struct cpu_features *cpu_features)
 #endif
          /* "Excavator"   */
          if (model >= 0x60 && model <= 0x7f)
+         {
            cpu_features->feature[index_arch_Fast_Unaligned_Load]
              |= (bit_arch_Fast_Unaligned_Load
                  | bit_arch_Fast_Copy_Backward);
+
+           /* Unaligned AVX loads are slower.*/
+           cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
+         }
        }
     }
   else