]>
Commit | Line | Data |
---|---|---|
b9215da1 MT |
1 | From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001 |
2 | From: Sajan Karumanchi <sajan.karumanchi@amd.com> | |
3 | Date: Tue, 1 Aug 2023 15:20:55 +0000 | |
4 | Subject: [PATCH 02/27] x86: Fix for cache computation on AMD legacy cpus. | |
5 | ||
6 | Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D' | |
7 | set to Zero, thus resulting in zeroed-out computed cache values. | |
8 | This patch reintroduces the old way of cache computation as a | |
9 | fail-safe option to handle these exceptions. | |
10 | Fixed 'level4_cache_size' value through handle_amd(). | |
11 | ||
12 | Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com> | |
13 | Tested-by: Florian Weimer <fweimer@redhat.com> | |
14 | --- | |
15 | sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++----- | |
16 | 1 file changed, 199 insertions(+), 27 deletions(-) | |
17 | ||
18 | diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h | |
19 | index cd4d0351ae..285773039f 100644 | |
20 | --- a/sysdeps/x86/dl-cacheinfo.h | |
21 | +++ b/sysdeps/x86/dl-cacheinfo.h | |
22 | @@ -315,40 +315,206 @@ handle_amd (int name) | |
23 | { | |
24 | unsigned int eax; | |
25 | unsigned int ebx; | |
26 | - unsigned int ecx; | |
27 | + unsigned int ecx = 0; | |
28 | unsigned int edx; | |
29 | - unsigned int count = 0x1; | |
30 | + unsigned int max_cpuid = 0; | |
31 | + unsigned int fn = 0; | |
32 | ||
33 | /* No level 4 cache (yet). */ | |
34 | if (name > _SC_LEVEL3_CACHE_LINESIZE) | |
35 | return 0; | |
36 | ||
37 | - if (name >= _SC_LEVEL3_CACHE_SIZE) | |
38 | - count = 0x3; | |
39 | - else if (name >= _SC_LEVEL2_CACHE_SIZE) | |
40 | - count = 0x2; | |
41 | - else if (name >= _SC_LEVEL1_DCACHE_SIZE) | |
42 | - count = 0x0; | |
43 | + __cpuid (0x80000000, max_cpuid, ebx, ecx, edx); | |
44 | + | |
45 | + if (max_cpuid >= 0x8000001D) | |
46 | + /* Use __cpuid__ '0x8000_001D' to compute cache details. */ | |
47 | + { | |
48 | + unsigned int count = 0x1; | |
49 | + | |
50 | + if (name >= _SC_LEVEL3_CACHE_SIZE) | |
51 | + count = 0x3; | |
52 | + else if (name >= _SC_LEVEL2_CACHE_SIZE) | |
53 | + count = 0x2; | |
54 | + else if (name >= _SC_LEVEL1_DCACHE_SIZE) | |
55 | + count = 0x0; | |
56 | + | |
57 | + __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); | |
58 | + | |
59 | + if (ecx != 0) | |
60 | + { | |
61 | + switch (name) | |
62 | + { | |
63 | + case _SC_LEVEL1_ICACHE_ASSOC: | |
64 | + case _SC_LEVEL1_DCACHE_ASSOC: | |
65 | + case _SC_LEVEL2_CACHE_ASSOC: | |
66 | + case _SC_LEVEL3_CACHE_ASSOC: | |
67 | + return ((ebx >> 22) & 0x3ff) + 1; | |
68 | + case _SC_LEVEL1_ICACHE_LINESIZE: | |
69 | + case _SC_LEVEL1_DCACHE_LINESIZE: | |
70 | + case _SC_LEVEL2_CACHE_LINESIZE: | |
71 | + case _SC_LEVEL3_CACHE_LINESIZE: | |
72 | + return (ebx & 0xfff) + 1; | |
73 | + case _SC_LEVEL1_ICACHE_SIZE: | |
74 | + case _SC_LEVEL1_DCACHE_SIZE: | |
75 | + case _SC_LEVEL2_CACHE_SIZE: | |
76 | + case _SC_LEVEL3_CACHE_SIZE: | |
77 | + return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); | |
78 | + default: | |
79 | + __builtin_unreachable (); | |
80 | + } | |
81 | + return -1; | |
82 | + } | |
83 | + } | |
84 | + | |
85 | + /* Legacy cache computation for CPUs prior to Bulldozer family. | |
86 | + This is also a fail-safe mechanism for some hypervisors that | |
87 | + accidentally configure __cpuid__ '0x8000_001D' to Zero. */ | |
88 | ||
89 | - __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); | |
90 | + fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); | |
91 | + | |
92 | + if (max_cpuid < fn) | |
93 | + return 0; | |
94 | + | |
95 | + __cpuid (fn, eax, ebx, ecx, edx); | |
96 | + | |
97 | + if (name < _SC_LEVEL1_DCACHE_SIZE) | |
98 | + { | |
99 | + name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; | |
100 | + ecx = edx; | |
101 | + } | |
102 | ||
103 | switch (name) | |
104 | { | |
105 | - case _SC_LEVEL1_ICACHE_ASSOC: | |
106 | - case _SC_LEVEL1_DCACHE_ASSOC: | |
107 | - case _SC_LEVEL2_CACHE_ASSOC: | |
108 | + case _SC_LEVEL1_DCACHE_SIZE: | |
109 | + return (ecx >> 14) & 0x3fc00; | |
110 | + | |
111 | + case _SC_LEVEL1_DCACHE_ASSOC: | |
112 | + ecx >>= 16; | |
113 | + if ((ecx & 0xff) == 0xff) | |
114 | + { | |
115 | + /* Fully associative. */ | |
116 | + return (ecx << 2) & 0x3fc00; | |
117 | + } | |
118 | + return ecx & 0xff; | |
119 | + | |
120 | + case _SC_LEVEL1_DCACHE_LINESIZE: | |
121 | + return ecx & 0xff; | |
122 | + | |
123 | + case _SC_LEVEL2_CACHE_SIZE: | |
124 | + return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; | |
125 | + | |
126 | + case _SC_LEVEL2_CACHE_ASSOC: | |
127 | + switch ((ecx >> 12) & 0xf) | |
128 | + { | |
129 | + case 0: | |
130 | + case 1: | |
131 | + case 2: | |
132 | + case 4: | |
133 | + return (ecx >> 12) & 0xf; | |
134 | + case 6: | |
135 | + return 8; | |
136 | + case 8: | |
137 | + return 16; | |
138 | + case 10: | |
139 | + return 32; | |
140 | + case 11: | |
141 | + return 48; | |
142 | + case 12: | |
143 | + return 64; | |
144 | + case 13: | |
145 | + return 96; | |
146 | + case 14: | |
147 | + return 128; | |
148 | + case 15: | |
149 | + return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); | |
150 | + default: | |
151 | + return 0; | |
152 | + } | |
153 | + | |
154 | + case _SC_LEVEL2_CACHE_LINESIZE: | |
155 | + return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; | |
156 | + | |
157 | + case _SC_LEVEL3_CACHE_SIZE: | |
158 | + { | |
159 | + long int total_l3_cache = 0, l3_cache_per_thread = 0; | |
160 | + unsigned int threads = 0; | |
161 | + const struct cpu_features *cpu_features; | |
162 | + | |
163 | + if ((edx & 0xf000) == 0) | |
164 | + return 0; | |
165 | + | |
166 | + total_l3_cache = (edx & 0x3ffc0000) << 1; | |
167 | + cpu_features = __get_cpu_features (); | |
168 | + | |
169 | + /* Figure out the number of logical threads that share L3. */ | |
170 | + if (max_cpuid >= 0x80000008) | |
171 | + { | |
172 | + /* Get width of APIC ID. */ | |
173 | + __cpuid (0x80000008, eax, ebx, ecx, edx); | |
174 | + threads = (ecx & 0xff) + 1; | |
175 | + } | |
176 | + | |
177 | + if (threads == 0) | |
178 | + { | |
179 | + /* If APIC ID width is not available, use logical | |
180 | + processor count. */ | |
181 | + __cpuid (0x00000001, eax, ebx, ecx, edx); | |
182 | + if ((edx & (1 << 28)) != 0) | |
183 | + threads = (ebx >> 16) & 0xff; | |
184 | + } | |
185 | + | |
186 | + /* Cap usage of highest cache level to the number of | |
187 | + supported threads. */ | |
188 | + if (threads > 0) | |
189 | + l3_cache_per_thread = total_l3_cache/threads; | |
190 | + | |
191 | + /* Get shared cache per ccx for Zen architectures. */ | |
192 | + if (cpu_features->basic.family >= 0x17) | |
193 | + { | |
194 | + long int l3_cache_per_ccx = 0; | |
195 | + /* Get number of threads share the L3 cache in CCX. */ | |
196 | + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); | |
197 | + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; | |
198 | + l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx; | |
199 | + return l3_cache_per_ccx; | |
200 | + } | |
201 | + else | |
202 | + { | |
203 | + return l3_cache_per_thread; | |
204 | + } | |
205 | + } | |
206 | + | |
207 | case _SC_LEVEL3_CACHE_ASSOC: | |
208 | - return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; | |
209 | - case _SC_LEVEL1_ICACHE_LINESIZE: | |
210 | - case _SC_LEVEL1_DCACHE_LINESIZE: | |
211 | - case _SC_LEVEL2_CACHE_LINESIZE: | |
212 | + switch ((edx >> 12) & 0xf) | |
213 | + { | |
214 | + case 0: | |
215 | + case 1: | |
216 | + case 2: | |
217 | + case 4: | |
218 | + return (edx >> 12) & 0xf; | |
219 | + case 6: | |
220 | + return 8; | |
221 | + case 8: | |
222 | + return 16; | |
223 | + case 10: | |
224 | + return 32; | |
225 | + case 11: | |
226 | + return 48; | |
227 | + case 12: | |
228 | + return 64; | |
229 | + case 13: | |
230 | + return 96; | |
231 | + case 14: | |
232 | + return 128; | |
233 | + case 15: | |
234 | + return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); | |
235 | + default: | |
236 | + return 0; | |
237 | + } | |
238 | + | |
239 | case _SC_LEVEL3_CACHE_LINESIZE: | |
240 | - return ecx ? (ebx & 0xfff) + 1 : 0; | |
241 | - case _SC_LEVEL1_ICACHE_SIZE: | |
242 | - case _SC_LEVEL1_DCACHE_SIZE: | |
243 | - case _SC_LEVEL2_CACHE_SIZE: | |
244 | - case _SC_LEVEL3_CACHE_SIZE: | |
245 | - return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; | |
246 | + return (edx & 0xf000) == 0 ? 0 : edx & 0xff; | |
247 | + | |
248 | default: | |
249 | __builtin_unreachable (); | |
250 | } | |
251 | @@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) | |
252 | data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); | |
253 | core = handle_amd (_SC_LEVEL2_CACHE_SIZE); | |
254 | shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); | |
255 | - shared_per_thread = shared; | |
256 | ||
257 | level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); | |
258 | level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); | |
259 | @@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) | |
260 | level3_cache_size = shared; | |
261 | level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC); | |
262 | level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); | |
263 | + level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE); | |
264 | ||
265 | if (shared <= 0) | |
266 | - /* No shared L3 cache. All we have is the L2 cache. */ | |
267 | - shared = core; | |
268 | + { | |
269 | + /* No shared L3 cache. All we have is the L2 cache. */ | |
270 | + shared = core; | |
271 | + } | |
272 | + else if (cpu_features->basic.family < 0x17) | |
273 | + { | |
274 | + /* Account for exclusive L2 and L3 caches. */ | |
275 | + shared += core; | |
276 | + } | |
277 | ||
278 | - if (shared_per_thread <= 0) | |
279 | - shared_per_thread = shared; | |
280 | + shared_per_thread = shared; | |
281 | } | |
282 | ||
283 | cpu_features->level1_icache_size = level1_icache_size; | |
284 | -- | |
285 | 2.39.2 | |
286 |