x86: Optimizing memcpy for AMD Zen architecture.

author Sajan Karumanchi <sajan.karumanchi@amd.com>

Wed, 28 Oct 2020 07:35:33 +0000 (13:05 +0530)

committer Florian Weimer <fweimer@redhat.com>

Fri, 30 Oct 2020 12:04:09 +0000 (13:04 +0100)
author Sajan Karumanchi <sajan.karumanchi@amd.com>
Wed, 28 Oct 2020 07:35:33 +0000 (13:05 +0530)
committer Florian Weimer <fweimer@redhat.com>
Fri, 30 Oct 2020 12:04:09 +0000 (13:04 +0100)
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c

index 43f2e7445ce476a52dddc0f86cff7aa03e6bad24..39c13b7195c77a69e2f3bab9c718aafdaad453a6 100644 (file)
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -722,7 +722,7 @@ intel_bug_no_cache_info:
               threads = 1 << ((ecx >> 12) & 0x0f);
             }
  
-         if (threads == 0)
+         if (threads == 0 || cpu_features->basic.family >= 0x17)
             {
               /* If APIC ID width is not available, use logical
                  processor count.  */
@@ -737,8 +737,22 @@ intel_bug_no_cache_info:
           if (threads > 0)
             shared /= threads;
  
-         /* Account for exclusive L2 and L3 caches.  */
-         shared += core;
+         /* Get shared cache per ccx for Zen architectures.  */
+         if (cpu_features->basic.family >= 0x17)
+           {
+             unsigned int eax;
+
+             /* Get number of threads share the L3 cache in CCX.  */
+             __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
+
+             unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
+             shared *= threads_per_ccx;
+           }
+         else
+           {
+             /* Account for exclusive L2 and L3 caches.  */
+             shared += core;
+            }
         }
  
  #ifndef DISABLE_PREFETCHW
author	Sajan Karumanchi <sajan.karumanchi@amd.com>
	Wed, 28 Oct 2020 07:35:33 +0000 (13:05 +0530)
committer	Florian Weimer <fweimer@redhat.com>
	Fri, 30 Oct 2020 12:04:09 +0000 (13:04 +0100)