+++ /dev/null
-From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001
-From: Sajan Karumanchi <sajan.karumanchi@amd.com>
-Date: Tue, 1 Aug 2023 15:20:55 +0000
-Subject: [PATCH 02/44] x86: Fix for cache computation on AMD legacy cpus.
-
-Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D'
-set to Zero, thus resulting in zeroed-out computed cache values.
-This patch reintroduces the old way of cache computation as a
-fail-safe option to handle these exceptions.
-Fixed 'level4_cache_size' value through handle_amd().
-
-Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
-Tested-by: Florian Weimer <fweimer@redhat.com>
----
- sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++-----
- 1 file changed, 199 insertions(+), 27 deletions(-)
-
-diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
-index cd4d0351ae..285773039f 100644
---- a/sysdeps/x86/dl-cacheinfo.h
-+++ b/sysdeps/x86/dl-cacheinfo.h
-@@ -315,40 +315,206 @@ handle_amd (int name)
- {
- unsigned int eax;
- unsigned int ebx;
-- unsigned int ecx;
-+ unsigned int ecx = 0;
- unsigned int edx;
-- unsigned int count = 0x1;
-+ unsigned int max_cpuid = 0;
-+ unsigned int fn = 0;
-
- /* No level 4 cache (yet). */
- if (name > _SC_LEVEL3_CACHE_LINESIZE)
- return 0;
-
-- if (name >= _SC_LEVEL3_CACHE_SIZE)
-- count = 0x3;
-- else if (name >= _SC_LEVEL2_CACHE_SIZE)
-- count = 0x2;
-- else if (name >= _SC_LEVEL1_DCACHE_SIZE)
-- count = 0x0;
-+ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx);
-+
-+ if (max_cpuid >= 0x8000001D)
-+ /* Use __cpuid__ '0x8000_001D' to compute cache details. */
-+ {
-+ unsigned int count = 0x1;
-+
-+ if (name >= _SC_LEVEL3_CACHE_SIZE)
-+ count = 0x3;
-+ else if (name >= _SC_LEVEL2_CACHE_SIZE)
-+ count = 0x2;
-+ else if (name >= _SC_LEVEL1_DCACHE_SIZE)
-+ count = 0x0;
-+
-+ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
-+
-+ if (ecx != 0)
-+ {
-+ switch (name)
-+ {
-+ case _SC_LEVEL1_ICACHE_ASSOC:
-+ case _SC_LEVEL1_DCACHE_ASSOC:
-+ case _SC_LEVEL2_CACHE_ASSOC:
-+ case _SC_LEVEL3_CACHE_ASSOC:
-+ return ((ebx >> 22) & 0x3ff) + 1;
-+ case _SC_LEVEL1_ICACHE_LINESIZE:
-+ case _SC_LEVEL1_DCACHE_LINESIZE:
-+ case _SC_LEVEL2_CACHE_LINESIZE:
-+ case _SC_LEVEL3_CACHE_LINESIZE:
-+ return (ebx & 0xfff) + 1;
-+ case _SC_LEVEL1_ICACHE_SIZE:
-+ case _SC_LEVEL1_DCACHE_SIZE:
-+ case _SC_LEVEL2_CACHE_SIZE:
-+ case _SC_LEVEL3_CACHE_SIZE:
-+ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1);
-+ default:
-+ __builtin_unreachable ();
-+ }
-+ return -1;
-+ }
-+ }
-+
-+ /* Legacy cache computation for CPUs prior to Bulldozer family.
-+ This is also a fail-safe mechanism for some hypervisors that
-+ accidentally configure __cpuid__ '0x8000_001D' to Zero. */
-
-- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
-+ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
-+
-+ if (max_cpuid < fn)
-+ return 0;
-+
-+ __cpuid (fn, eax, ebx, ecx, edx);
-+
-+ if (name < _SC_LEVEL1_DCACHE_SIZE)
-+ {
-+ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
-+ ecx = edx;
-+ }
-
- switch (name)
- {
-- case _SC_LEVEL1_ICACHE_ASSOC:
-- case _SC_LEVEL1_DCACHE_ASSOC:
-- case _SC_LEVEL2_CACHE_ASSOC:
-+ case _SC_LEVEL1_DCACHE_SIZE:
-+ return (ecx >> 14) & 0x3fc00;
-+
-+ case _SC_LEVEL1_DCACHE_ASSOC:
-+ ecx >>= 16;
-+ if ((ecx & 0xff) == 0xff)
-+ {
-+ /* Fully associative. */
-+ return (ecx << 2) & 0x3fc00;
-+ }
-+ return ecx & 0xff;
-+
-+ case _SC_LEVEL1_DCACHE_LINESIZE:
-+ return ecx & 0xff;
-+
-+ case _SC_LEVEL2_CACHE_SIZE:
-+ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
-+
-+ case _SC_LEVEL2_CACHE_ASSOC:
-+ switch ((ecx >> 12) & 0xf)
-+ {
-+ case 0:
-+ case 1:
-+ case 2:
-+ case 4:
-+ return (ecx >> 12) & 0xf;
-+ case 6:
-+ return 8;
-+ case 8:
-+ return 16;
-+ case 10:
-+ return 32;
-+ case 11:
-+ return 48;
-+ case 12:
-+ return 64;
-+ case 13:
-+ return 96;
-+ case 14:
-+ return 128;
-+ case 15:
-+ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
-+ default:
-+ return 0;
-+ }
-+
-+ case _SC_LEVEL2_CACHE_LINESIZE:
-+ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
-+
-+ case _SC_LEVEL3_CACHE_SIZE:
-+ {
-+ long int total_l3_cache = 0, l3_cache_per_thread = 0;
-+ unsigned int threads = 0;
-+ const struct cpu_features *cpu_features;
-+
-+ if ((edx & 0xf000) == 0)
-+ return 0;
-+
-+ total_l3_cache = (edx & 0x3ffc0000) << 1;
-+ cpu_features = __get_cpu_features ();
-+
-+ /* Figure out the number of logical threads that share L3. */
-+ if (max_cpuid >= 0x80000008)
-+ {
-+ /* Get width of APIC ID. */
-+ __cpuid (0x80000008, eax, ebx, ecx, edx);
-+ threads = (ecx & 0xff) + 1;
-+ }
-+
-+ if (threads == 0)
-+ {
-+ /* If APIC ID width is not available, use logical
-+ processor count. */
-+ __cpuid (0x00000001, eax, ebx, ecx, edx);
-+ if ((edx & (1 << 28)) != 0)
-+ threads = (ebx >> 16) & 0xff;
-+ }
-+
-+ /* Cap usage of highest cache level to the number of
-+ supported threads. */
-+ if (threads > 0)
-+ l3_cache_per_thread = total_l3_cache/threads;
-+
-+ /* Get shared cache per ccx for Zen architectures. */
-+ if (cpu_features->basic.family >= 0x17)
-+ {
-+ long int l3_cache_per_ccx = 0;
-+ /* Get number of threads share the L3 cache in CCX. */
-+ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
-+ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
-+ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx;
-+ return l3_cache_per_ccx;
-+ }
-+ else
-+ {
-+ return l3_cache_per_thread;
-+ }
-+ }
-+
- case _SC_LEVEL3_CACHE_ASSOC:
-- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0;
-- case _SC_LEVEL1_ICACHE_LINESIZE:
-- case _SC_LEVEL1_DCACHE_LINESIZE:
-- case _SC_LEVEL2_CACHE_LINESIZE:
-+ switch ((edx >> 12) & 0xf)
-+ {
-+ case 0:
-+ case 1:
-+ case 2:
-+ case 4:
-+ return (edx >> 12) & 0xf;
-+ case 6:
-+ return 8;
-+ case 8:
-+ return 16;
-+ case 10:
-+ return 32;
-+ case 11:
-+ return 48;
-+ case 12:
-+ return 64;
-+ case 13:
-+ return 96;
-+ case 14:
-+ return 128;
-+ case 15:
-+ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
-+ default:
-+ return 0;
-+ }
-+
- case _SC_LEVEL3_CACHE_LINESIZE:
-- return ecx ? (ebx & 0xfff) + 1 : 0;
-- case _SC_LEVEL1_ICACHE_SIZE:
-- case _SC_LEVEL1_DCACHE_SIZE:
-- case _SC_LEVEL2_CACHE_SIZE:
-- case _SC_LEVEL3_CACHE_SIZE:
-- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0;
-+ return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
-+
- default:
- __builtin_unreachable ();
- }
-@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
- data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
- core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
- shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
-- shared_per_thread = shared;
-
- level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
- level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
-@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
- level3_cache_size = shared;
- level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
- level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
-+ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE);
-
- if (shared <= 0)
-- /* No shared L3 cache. All we have is the L2 cache. */
-- shared = core;
-+ {
-+ /* No shared L3 cache. All we have is the L2 cache. */
-+ shared = core;
-+ }
-+ else if (cpu_features->basic.family < 0x17)
-+ {
-+ /* Account for exclusive L2 and L3 caches. */
-+ shared += core;
-+ }
-
-- if (shared_per_thread <= 0)
-- shared_per_thread = shared;
-+ shared_per_thread = shared;
- }
-
- cpu_features->level1_icache_size = level1_icache_size;
---
-2.39.2
-