]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86/dl-cacheinfo.h
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / x86 / dl-cacheinfo.h
CommitLineData
0f09154c 1/* Initialize x86 cache info.
2b778ceb 2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
0f09154c
L
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19static const struct intel_02_cache_info
20{
21 unsigned char idx;
22 unsigned char assoc;
23 unsigned char linesize;
24 unsigned char rel_name;
25 unsigned int size;
26} intel_02_known [] =
27 {
28#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
29 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
30 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
31 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
32 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
33 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
34 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
35 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
36 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
37 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
38 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
39 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
40 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
41 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
42 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
43 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
44 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
45 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
46 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
47 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
48 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
49 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
50 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
51 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
52 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
53 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
54 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
55 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
56 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
57 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
58 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
59 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
60 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
61 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
62 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
63 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
64 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
65 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
66 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
67 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
68 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
69 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
70 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
71 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
72 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
73 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
74 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
75 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
76 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
77 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
78 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
79 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
80 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
81 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
82 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
83 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
84 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
85 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
86 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
87 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
88 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
89 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
90 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
91 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
92 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
93 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
94 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
95 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
96 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
97 };
98
99#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
100
101static int
102intel_02_known_compare (const void *p1, const void *p2)
103{
104 const struct intel_02_cache_info *i1;
105 const struct intel_02_cache_info *i2;
106
107 i1 = (const struct intel_02_cache_info *) p1;
108 i2 = (const struct intel_02_cache_info *) p2;
109
110 if (i1->idx == i2->idx)
111 return 0;
112
113 return i1->idx < i2->idx ? -1 : 1;
114}
115
116
117static long int
118__attribute__ ((noinline))
119intel_check_word (int name, unsigned int value, bool *has_level_2,
120 bool *no_level_2_or_3,
121 const struct cpu_features *cpu_features)
122{
123 if ((value & 0x80000000) != 0)
124 /* The register value is reserved. */
125 return 0;
126
127 /* Fold the name. The _SC_ constants are always in the order SIZE,
128 ASSOC, LINESIZE. */
129 int folded_rel_name = (M(name) / 3) * 3;
130
131 while (value != 0)
132 {
133 unsigned int byte = value & 0xff;
134
135 if (byte == 0x40)
136 {
137 *no_level_2_or_3 = true;
138
139 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
140 /* No need to look further. */
141 break;
142 }
143 else if (byte == 0xff)
144 {
145 /* CPUID leaf 0x4 contains all the information. We need to
146 iterate over it. */
147 unsigned int eax;
148 unsigned int ebx;
149 unsigned int ecx;
150 unsigned int edx;
151
152 unsigned int round = 0;
153 while (1)
154 {
155 __cpuid_count (4, round, eax, ebx, ecx, edx);
156
157 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
158 if (type == null)
159 /* That was the end. */
160 break;
161
162 unsigned int level = (eax >> 5) & 0x7;
163
164 if ((level == 1 && type == data
165 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
166 || (level == 1 && type == inst
167 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
168 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
169 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
170 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
171 {
172 unsigned int offset = M(name) - folded_rel_name;
173
174 if (offset == 0)
175 /* Cache size. */
176 return (((ebx >> 22) + 1)
177 * (((ebx >> 12) & 0x3ff) + 1)
178 * ((ebx & 0xfff) + 1)
179 * (ecx + 1));
180 if (offset == 1)
181 return (ebx >> 22) + 1;
182
183 assert (offset == 2);
184 return (ebx & 0xfff) + 1;
185 }
186
187 ++round;
188 }
189 /* There is no other cache information anywhere else. */
190 break;
191 }
192 else
193 {
194 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
195 {
196 /* Intel reused this value. For family 15, model 6 it
197 specifies the 3rd level cache. Otherwise the 2nd
198 level cache. */
199 unsigned int family = cpu_features->basic.family;
200 unsigned int model = cpu_features->basic.model;
201
202 if (family == 15 && model == 6)
203 {
204 /* The level 3 cache is encoded for this model like
205 the level 2 cache is for other models. Pretend
206 the caller asked for the level 2 cache. */
207 name = (_SC_LEVEL2_CACHE_SIZE
208 + (name - _SC_LEVEL3_CACHE_SIZE));
209 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
210 }
211 }
212
213 struct intel_02_cache_info *found;
214 struct intel_02_cache_info search;
215
216 search.idx = byte;
217 found = bsearch (&search, intel_02_known, nintel_02_known,
218 sizeof (intel_02_known[0]), intel_02_known_compare);
219 if (found != NULL)
220 {
221 if (found->rel_name == folded_rel_name)
222 {
223 unsigned int offset = M(name) - folded_rel_name;
224
225 if (offset == 0)
226 /* Cache size. */
227 return found->size;
228 if (offset == 1)
229 return found->assoc;
230
231 assert (offset == 2);
232 return found->linesize;
233 }
234
235 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
236 *has_level_2 = true;
237 }
238 }
239
240 /* Next byte for the next round. */
241 value >>= 8;
242 }
243
244 /* Nothing found. */
245 return 0;
246}
247
248
249static long int __attribute__ ((noinline))
250handle_intel (int name, const struct cpu_features *cpu_features)
251{
252 unsigned int maxidx = cpu_features->basic.max_cpuid;
253
254 /* Return -1 for older CPUs. */
255 if (maxidx < 2)
256 return -1;
257
258 /* OK, we can use the CPUID instruction to get all info about the
259 caches. */
260 unsigned int cnt = 0;
261 unsigned int max = 1;
262 long int result = 0;
263 bool no_level_2_or_3 = false;
264 bool has_level_2 = false;
265
266 while (cnt++ < max)
267 {
268 unsigned int eax;
269 unsigned int ebx;
270 unsigned int ecx;
271 unsigned int edx;
272 __cpuid (2, eax, ebx, ecx, edx);
273
274 /* The low byte of EAX in the first round contain the number of
275 rounds we have to make. At least one, the one we are already
276 doing. */
277 if (cnt == 1)
278 {
279 max = eax & 0xff;
280 eax &= 0xffffff00;
281 }
282
283 /* Process the individual registers' value. */
284 result = intel_check_word (name, eax, &has_level_2,
285 &no_level_2_or_3, cpu_features);
286 if (result != 0)
287 return result;
288
289 result = intel_check_word (name, ebx, &has_level_2,
290 &no_level_2_or_3, cpu_features);
291 if (result != 0)
292 return result;
293
294 result = intel_check_word (name, ecx, &has_level_2,
295 &no_level_2_or_3, cpu_features);
296 if (result != 0)
297 return result;
298
299 result = intel_check_word (name, edx, &has_level_2,
300 &no_level_2_or_3, cpu_features);
301 if (result != 0)
302 return result;
303 }
304
305 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
306 && no_level_2_or_3)
307 return -1;
308
309 return 0;
310}
311
312
313static long int __attribute__ ((noinline))
314handle_amd (int name)
315{
316 unsigned int eax;
317 unsigned int ebx;
318 unsigned int ecx;
319 unsigned int edx;
320 __cpuid (0x80000000, eax, ebx, ecx, edx);
321
322 /* No level 4 cache (yet). */
323 if (name > _SC_LEVEL3_CACHE_LINESIZE)
324 return 0;
325
326 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
327 if (eax < fn)
328 return 0;
329
330 __cpuid (fn, eax, ebx, ecx, edx);
331
332 if (name < _SC_LEVEL1_DCACHE_SIZE)
333 {
334 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
335 ecx = edx;
336 }
337
338 switch (name)
339 {
340 case _SC_LEVEL1_DCACHE_SIZE:
341 return (ecx >> 14) & 0x3fc00;
342
343 case _SC_LEVEL1_DCACHE_ASSOC:
344 ecx >>= 16;
345 if ((ecx & 0xff) == 0xff)
346 /* Fully associative. */
347 return (ecx << 2) & 0x3fc00;
348 return ecx & 0xff;
349
350 case _SC_LEVEL1_DCACHE_LINESIZE:
351 return ecx & 0xff;
352
353 case _SC_LEVEL2_CACHE_SIZE:
354 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
355
356 case _SC_LEVEL2_CACHE_ASSOC:
357 switch ((ecx >> 12) & 0xf)
358 {
359 case 0:
360 case 1:
361 case 2:
362 case 4:
363 return (ecx >> 12) & 0xf;
364 case 6:
365 return 8;
366 case 8:
367 return 16;
368 case 10:
369 return 32;
370 case 11:
371 return 48;
372 case 12:
373 return 64;
374 case 13:
375 return 96;
376 case 14:
377 return 128;
378 case 15:
379 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
380 default:
381 return 0;
382 }
383 /* NOTREACHED */
384
385 case _SC_LEVEL2_CACHE_LINESIZE:
386 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
387
388 case _SC_LEVEL3_CACHE_SIZE:
389 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
390
391 case _SC_LEVEL3_CACHE_ASSOC:
392 switch ((edx >> 12) & 0xf)
393 {
394 case 0:
395 case 1:
396 case 2:
397 case 4:
398 return (edx >> 12) & 0xf;
399 case 6:
400 return 8;
401 case 8:
402 return 16;
403 case 10:
404 return 32;
405 case 11:
406 return 48;
407 case 12:
408 return 64;
409 case 13:
410 return 96;
411 case 14:
412 return 128;
413 case 15:
414 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
415 default:
416 return 0;
417 }
418 /* NOTREACHED */
419
420 case _SC_LEVEL3_CACHE_LINESIZE:
421 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
422
423 default:
424 assert (! "cannot happen");
425 }
426 return -1;
427}
428
429
430static long int __attribute__ ((noinline))
431handle_zhaoxin (int name)
432{
433 unsigned int eax;
434 unsigned int ebx;
435 unsigned int ecx;
436 unsigned int edx;
437
438 int folded_rel_name = (M(name) / 3) * 3;
439
440 unsigned int round = 0;
441 while (1)
442 {
443 __cpuid_count (4, round, eax, ebx, ecx, edx);
444
445 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
446 if (type == null)
447 break;
448
449 unsigned int level = (eax >> 5) & 0x7;
450
451 if ((level == 1 && type == data
452 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
453 || (level == 1 && type == inst
454 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
455 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
456 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)))
457 {
458 unsigned int offset = M(name) - folded_rel_name;
459
460 if (offset == 0)
461 /* Cache size. */
462 return (((ebx >> 22) + 1)
463 * (((ebx >> 12) & 0x3ff) + 1)
464 * ((ebx & 0xfff) + 1)
465 * (ecx + 1));
466 if (offset == 1)
467 return (ebx >> 22) + 1;
468
469 assert (offset == 2);
470 return (ebx & 0xfff) + 1;
471 }
472
473 ++round;
474 }
475
476 /* Nothing found. */
477 return 0;
478}