]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86: Do not prefer ERMS for memset on Zen3+
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>
Thu, 8 Feb 2024 13:08:39 +0000 (10:08 -0300)
committerH.J. Lu <hjl.tools@gmail.com>
Tue, 13 Feb 2024 16:49:13 +0000 (08:49 -0800)
For AMD Zen3+ architecture, the performance of the vectorized loop is
slightly better than ERMS.

Checked on x86_64-linux-gnu on Zen3.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
sysdeps/x86/dl-cacheinfo.h

index f34d12846caf9422c07264e744baf20e45742a12..5a98f70364220da48b0984cfba94a6ba59b43a10 100644 (file)
@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
      minimum value is fixed.  */
   rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
                                     long int, NULL);
+  if (cpu_features->basic.kind == arch_kind_amd
+      && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
+    /* For AMD Zen3+ architecture, the performance of the vectorized loop is
+       slightly better than ERMS.  */
+    rep_stosb_threshold = SIZE_MAX;
 
   TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);