]> git.ipfire.org Git - people/mlorenz/ipfire-2.x.git/blame - src/patches/glibc-2.38/0002-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
glibc: Import patches from upstream
[people/mlorenz/ipfire-2.x.git] / src / patches / glibc-2.38 / 0002-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
CommitLineData
b9215da1
MT
1From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001
2From: Sajan Karumanchi <sajan.karumanchi@amd.com>
3Date: Tue, 1 Aug 2023 15:20:55 +0000
4Subject: [PATCH 02/27] x86: Fix for cache computation on AMD legacy cpus.
5
6Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D'
7set to Zero, thus resulting in zeroed-out computed cache values.
8This patch reintroduces the old way of cache computation as a
9fail-safe option to handle these exceptions.
10Fixed 'level4_cache_size' value through handle_amd().
11
12Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
13Tested-by: Florian Weimer <fweimer@redhat.com>
14---
15 sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++-----
16 1 file changed, 199 insertions(+), 27 deletions(-)
17
18diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
19index cd4d0351ae..285773039f 100644
20--- a/sysdeps/x86/dl-cacheinfo.h
21+++ b/sysdeps/x86/dl-cacheinfo.h
22@@ -315,40 +315,206 @@ handle_amd (int name)
23 {
24 unsigned int eax;
25 unsigned int ebx;
26- unsigned int ecx;
27+ unsigned int ecx = 0;
28 unsigned int edx;
29- unsigned int count = 0x1;
30+ unsigned int max_cpuid = 0;
31+ unsigned int fn = 0;
32
33 /* No level 4 cache (yet). */
34 if (name > _SC_LEVEL3_CACHE_LINESIZE)
35 return 0;
36
37- if (name >= _SC_LEVEL3_CACHE_SIZE)
38- count = 0x3;
39- else if (name >= _SC_LEVEL2_CACHE_SIZE)
40- count = 0x2;
41- else if (name >= _SC_LEVEL1_DCACHE_SIZE)
42- count = 0x0;
43+ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx);
44+
45+ if (max_cpuid >= 0x8000001D)
46+ /* Use __cpuid__ '0x8000_001D' to compute cache details. */
47+ {
48+ unsigned int count = 0x1;
49+
50+ if (name >= _SC_LEVEL3_CACHE_SIZE)
51+ count = 0x3;
52+ else if (name >= _SC_LEVEL2_CACHE_SIZE)
53+ count = 0x2;
54+ else if (name >= _SC_LEVEL1_DCACHE_SIZE)
55+ count = 0x0;
56+
57+ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
58+
59+ if (ecx != 0)
60+ {
61+ switch (name)
62+ {
63+ case _SC_LEVEL1_ICACHE_ASSOC:
64+ case _SC_LEVEL1_DCACHE_ASSOC:
65+ case _SC_LEVEL2_CACHE_ASSOC:
66+ case _SC_LEVEL3_CACHE_ASSOC:
67+ return ((ebx >> 22) & 0x3ff) + 1;
68+ case _SC_LEVEL1_ICACHE_LINESIZE:
69+ case _SC_LEVEL1_DCACHE_LINESIZE:
70+ case _SC_LEVEL2_CACHE_LINESIZE:
71+ case _SC_LEVEL3_CACHE_LINESIZE:
72+ return (ebx & 0xfff) + 1;
73+ case _SC_LEVEL1_ICACHE_SIZE:
74+ case _SC_LEVEL1_DCACHE_SIZE:
75+ case _SC_LEVEL2_CACHE_SIZE:
76+ case _SC_LEVEL3_CACHE_SIZE:
77+ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1);
78+ default:
79+ __builtin_unreachable ();
80+ }
81+ return -1;
82+ }
83+ }
84+
85+ /* Legacy cache computation for CPUs prior to Bulldozer family.
86+ This is also a fail-safe mechanism for some hypervisors that
87+ accidentally configure __cpuid__ '0x8000_001D' to Zero. */
88
89- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
90+ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
91+
92+ if (max_cpuid < fn)
93+ return 0;
94+
95+ __cpuid (fn, eax, ebx, ecx, edx);
96+
97+ if (name < _SC_LEVEL1_DCACHE_SIZE)
98+ {
99+ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
100+ ecx = edx;
101+ }
102
103 switch (name)
104 {
105- case _SC_LEVEL1_ICACHE_ASSOC:
106- case _SC_LEVEL1_DCACHE_ASSOC:
107- case _SC_LEVEL2_CACHE_ASSOC:
108+ case _SC_LEVEL1_DCACHE_SIZE:
109+ return (ecx >> 14) & 0x3fc00;
110+
111+ case _SC_LEVEL1_DCACHE_ASSOC:
112+ ecx >>= 16;
113+ if ((ecx & 0xff) == 0xff)
114+ {
115+ /* Fully associative. */
116+ return (ecx << 2) & 0x3fc00;
117+ }
118+ return ecx & 0xff;
119+
120+ case _SC_LEVEL1_DCACHE_LINESIZE:
121+ return ecx & 0xff;
122+
123+ case _SC_LEVEL2_CACHE_SIZE:
124+ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
125+
126+ case _SC_LEVEL2_CACHE_ASSOC:
127+ switch ((ecx >> 12) & 0xf)
128+ {
129+ case 0:
130+ case 1:
131+ case 2:
132+ case 4:
133+ return (ecx >> 12) & 0xf;
134+ case 6:
135+ return 8;
136+ case 8:
137+ return 16;
138+ case 10:
139+ return 32;
140+ case 11:
141+ return 48;
142+ case 12:
143+ return 64;
144+ case 13:
145+ return 96;
146+ case 14:
147+ return 128;
148+ case 15:
149+ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
150+ default:
151+ return 0;
152+ }
153+
154+ case _SC_LEVEL2_CACHE_LINESIZE:
155+ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
156+
157+ case _SC_LEVEL3_CACHE_SIZE:
158+ {
159+ long int total_l3_cache = 0, l3_cache_per_thread = 0;
160+ unsigned int threads = 0;
161+ const struct cpu_features *cpu_features;
162+
163+ if ((edx & 0xf000) == 0)
164+ return 0;
165+
166+ total_l3_cache = (edx & 0x3ffc0000) << 1;
167+ cpu_features = __get_cpu_features ();
168+
169+ /* Figure out the number of logical threads that share L3. */
170+ if (max_cpuid >= 0x80000008)
171+ {
172+ /* Get width of APIC ID. */
173+ __cpuid (0x80000008, eax, ebx, ecx, edx);
174+ threads = (ecx & 0xff) + 1;
175+ }
176+
177+ if (threads == 0)
178+ {
179+ /* If APIC ID width is not available, use logical
180+ processor count. */
181+ __cpuid (0x00000001, eax, ebx, ecx, edx);
182+ if ((edx & (1 << 28)) != 0)
183+ threads = (ebx >> 16) & 0xff;
184+ }
185+
186+ /* Cap usage of highest cache level to the number of
187+ supported threads. */
188+ if (threads > 0)
189+ l3_cache_per_thread = total_l3_cache/threads;
190+
191+ /* Get shared cache per ccx for Zen architectures. */
192+ if (cpu_features->basic.family >= 0x17)
193+ {
194+ long int l3_cache_per_ccx = 0;
195+ /* Get number of threads share the L3 cache in CCX. */
196+ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
197+ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
198+ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx;
199+ return l3_cache_per_ccx;
200+ }
201+ else
202+ {
203+ return l3_cache_per_thread;
204+ }
205+ }
206+
207 case _SC_LEVEL3_CACHE_ASSOC:
208- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0;
209- case _SC_LEVEL1_ICACHE_LINESIZE:
210- case _SC_LEVEL1_DCACHE_LINESIZE:
211- case _SC_LEVEL2_CACHE_LINESIZE:
212+ switch ((edx >> 12) & 0xf)
213+ {
214+ case 0:
215+ case 1:
216+ case 2:
217+ case 4:
218+ return (edx >> 12) & 0xf;
219+ case 6:
220+ return 8;
221+ case 8:
222+ return 16;
223+ case 10:
224+ return 32;
225+ case 11:
226+ return 48;
227+ case 12:
228+ return 64;
229+ case 13:
230+ return 96;
231+ case 14:
232+ return 128;
233+ case 15:
234+ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
235+ default:
236+ return 0;
237+ }
238+
239 case _SC_LEVEL3_CACHE_LINESIZE:
240- return ecx ? (ebx & 0xfff) + 1 : 0;
241- case _SC_LEVEL1_ICACHE_SIZE:
242- case _SC_LEVEL1_DCACHE_SIZE:
243- case _SC_LEVEL2_CACHE_SIZE:
244- case _SC_LEVEL3_CACHE_SIZE:
245- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0;
246+ return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
247+
248 default:
249 __builtin_unreachable ();
250 }
251@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
252 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
253 core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
254 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
255- shared_per_thread = shared;
256
257 level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
258 level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
259@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
260 level3_cache_size = shared;
261 level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
262 level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
263+ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE);
264
265 if (shared <= 0)
266- /* No shared L3 cache. All we have is the L2 cache. */
267- shared = core;
268+ {
269+ /* No shared L3 cache. All we have is the L2 cache. */
270+ shared = core;
271+ }
272+ else if (cpu_features->basic.family < 0x17)
273+ {
274+ /* Account for exclusive L2 and L3 caches. */
275+ shared += core;
276+ }
277
278- if (shared_per_thread <= 0)
279- shared_per_thread = shared;
280+ shared_per_thread = shared;
281 }
282
283 cpu_features->level1_icache_size = level1_icache_size;
284--
2852.39.2
286