]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86/cacheinfo.c
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / x86 / cacheinfo.c
CommitLineData
6d59823c 1/* x86_64 cache info.
04277e02 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
bfe6f5fa
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
bfe6f5fa 18
9c450f6f
L
19#if IS_IN (libc)
20
bfe6f5fa
UD
21#include <assert.h>
22#include <stdbool.h>
23#include <stdlib.h>
24#include <unistd.h>
6f6f1215 25#include <cpuid.h>
1ae6c72d 26#include <init-arch.h>
6f6f1215 27
bfe6f5fa
UD
28static const struct intel_02_cache_info
29{
1de0c161
UD
30 unsigned char idx;
31 unsigned char assoc;
32 unsigned char linesize;
33 unsigned char rel_name;
34 unsigned int size;
bfe6f5fa
UD
35} intel_02_known [] =
36 {
1de0c161
UD
37#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
38 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
39 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
40 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
41 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
42 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
43 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
3af48cbd 44 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
1de0c161
UD
45 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
46 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
47 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
48 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
49 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
50 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
51 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
52 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
53 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
54 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
55 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
56 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
57 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
58 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
59 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
60 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
61 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
62 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
63 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
64 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
65 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
66 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
67 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
68 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
69 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
70 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
71 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
72 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
73 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
75 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
77 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
78 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
79 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
80 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
81 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
82 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
83 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
3af48cbd 84 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
1de0c161
UD
85 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
86 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
87 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
88 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
89 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
90 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
91 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
92 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
93 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
94 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
95 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
96 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
97 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
98 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
99 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
dedc7c7b 100 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
1de0c161
UD
101 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
102 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
3e9099b4
UD
103 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
104 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
105 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
bfe6f5fa
UD
106 };
107
108#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
109
110static int
111intel_02_known_compare (const void *p1, const void *p2)
112{
113 const struct intel_02_cache_info *i1;
114 const struct intel_02_cache_info *i2;
115
116 i1 = (const struct intel_02_cache_info *) p1;
117 i2 = (const struct intel_02_cache_info *) p2;
118
119 if (i1->idx == i2->idx)
120 return 0;
121
122 return i1->idx < i2->idx ? -1 : 1;
123}
124
125
126static long int
127__attribute__ ((noinline))
128intel_check_word (int name, unsigned int value, bool *has_level_2,
48e7bc7a
L
129 bool *no_level_2_or_3,
130 const struct cpu_features *cpu_features)
bfe6f5fa
UD
131{
132 if ((value & 0x80000000) != 0)
133 /* The register value is reserved. */
134 return 0;
135
136 /* Fold the name. The _SC_ constants are always in the order SIZE,
137 ASSOC, LINESIZE. */
1de0c161 138 int folded_rel_name = (M(name) / 3) * 3;
bfe6f5fa
UD
139
140 while (value != 0)
141 {
142 unsigned int byte = value & 0xff;
143
144 if (byte == 0x40)
145 {
146 *no_level_2_or_3 = true;
147
1de0c161 148 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
bfe6f5fa
UD
149 /* No need to look further. */
150 break;
151 }
2a115601
UD
152 else if (byte == 0xff)
153 {
154 /* CPUID leaf 0x4 contains all the information. We need to
155 iterate over it. */
156 unsigned int eax;
157 unsigned int ebx;
158 unsigned int ecx;
159 unsigned int edx;
160
161 unsigned int round = 0;
162 while (1)
163 {
b4acef1f 164 __cpuid_count (4, round, eax, ebx, ecx, edx);
2a115601
UD
165
166 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
167 if (type == null)
168 /* That was the end. */
169 break;
170
171 unsigned int level = (eax >> 5) & 0x7;
172
173 if ((level == 1 && type == data
174 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
175 || (level == 1 && type == inst
176 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
177 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
178 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
180 {
181 unsigned int offset = M(name) - folded_rel_name;
182
183 if (offset == 0)
184 /* Cache size. */
185 return (((ebx >> 22) + 1)
186 * (((ebx >> 12) & 0x3ff) + 1)
187 * ((ebx & 0xfff) + 1)
188 * (ecx + 1));
189 if (offset == 1)
190 return (ebx >> 22) + 1;
191
192 assert (offset == 2);
193 return (ebx & 0xfff) + 1;
194 }
bb242059
UD
195
196 ++round;
2a115601
UD
197 }
198 /* There is no other cache information anywhere else. */
199 break;
200 }
bfe6f5fa
UD
201 else
202 {
1de0c161 203 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
bfe6f5fa
UD
204 {
205 /* Intel reused this value. For family 15, model 6 it
206 specifies the 3rd level cache. Otherwise the 2nd
207 level cache. */
c22e4c2a
L
208 unsigned int family = cpu_features->basic.family;
209 unsigned int model = cpu_features->basic.model;
b2509a1e 210
bfe6f5fa
UD
211 if (family == 15 && model == 6)
212 {
213 /* The level 3 cache is encoded for this model like
214 the level 2 cache is for other models. Pretend
215 the caller asked for the level 2 cache. */
216 name = (_SC_LEVEL2_CACHE_SIZE
217 + (name - _SC_LEVEL3_CACHE_SIZE));
1de0c161 218 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
bfe6f5fa
UD
219 }
220 }
221
222 struct intel_02_cache_info *found;
223 struct intel_02_cache_info search;
224
225 search.idx = byte;
226 found = bsearch (&search, intel_02_known, nintel_02_known,
227 sizeof (intel_02_known[0]), intel_02_known_compare);
228 if (found != NULL)
229 {
1de0c161 230 if (found->rel_name == folded_rel_name)
bfe6f5fa 231 {
1de0c161 232 unsigned int offset = M(name) - folded_rel_name;
bfe6f5fa
UD
233
234 if (offset == 0)
235 /* Cache size. */
236 return found->size;
237 if (offset == 1)
238 return found->assoc;
239
240 assert (offset == 2);
241 return found->linesize;
242 }
243
1de0c161 244 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
bfe6f5fa
UD
245 *has_level_2 = true;
246 }
247 }
248
249 /* Next byte for the next round. */
250 value >>= 8;
251 }
252
253 /* Nothing found. */
254 return 0;
255}
256
257
258static long int __attribute__ ((noinline))
48e7bc7a 259handle_intel (int name, const struct cpu_features *cpu_features)
bfe6f5fa 260{
c22e4c2a 261 unsigned int maxidx = cpu_features->basic.max_cpuid;
48e7bc7a 262
6a824767
L
263 /* Return -1 for older CPUs. */
264 if (maxidx < 2)
265 return -1;
bfe6f5fa
UD
266
267 /* OK, we can use the CPUID instruction to get all info about the
268 caches. */
269 unsigned int cnt = 0;
270 unsigned int max = 1;
271 long int result = 0;
272 bool no_level_2_or_3 = false;
273 bool has_level_2 = false;
6d59823c 274
bfe6f5fa
UD
275 while (cnt++ < max)
276 {
277 unsigned int eax;
278 unsigned int ebx;
279 unsigned int ecx;
280 unsigned int edx;
6f6f1215 281 __cpuid (2, eax, ebx, ecx, edx);
bfe6f5fa
UD
282
283 /* The low byte of EAX in the first round contain the number of
284 rounds we have to make. At least one, the one we are already
285 doing. */
286 if (cnt == 1)
287 {
288 max = eax & 0xff;
289 eax &= 0xffffff00;
290 }
291
292 /* Process the individual registers' value. */
48e7bc7a
L
293 result = intel_check_word (name, eax, &has_level_2,
294 &no_level_2_or_3, cpu_features);
bfe6f5fa
UD
295 if (result != 0)
296 return result;
297
48e7bc7a
L
298 result = intel_check_word (name, ebx, &has_level_2,
299 &no_level_2_or_3, cpu_features);
bfe6f5fa
UD
300 if (result != 0)
301 return result;
302
48e7bc7a
L
303 result = intel_check_word (name, ecx, &has_level_2,
304 &no_level_2_or_3, cpu_features);
bfe6f5fa
UD
305 if (result != 0)
306 return result;
307
48e7bc7a
L
308 result = intel_check_word (name, edx, &has_level_2,
309 &no_level_2_or_3, cpu_features);
bfe6f5fa
UD
310 if (result != 0)
311 return result;
312 }
313
314 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
315 && no_level_2_or_3)
316 return -1;
317
318 return 0;
319}
320
321
322static long int __attribute__ ((noinline))
323handle_amd (int name)
324{
325 unsigned int eax;
326 unsigned int ebx;
327 unsigned int ecx;
328 unsigned int edx;
6f6f1215 329 __cpuid (0x80000000, eax, ebx, ecx, edx);
bfe6f5fa 330
80e7d6a6
UD
331 /* No level 4 cache (yet). */
332 if (name > _SC_LEVEL3_CACHE_LINESIZE)
bfe6f5fa
UD
333 return 0;
334
335 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
336 if (eax < fn)
337 return 0;
338
6f6f1215 339 __cpuid (fn, eax, ebx, ecx, edx);
bfe6f5fa
UD
340
341 if (name < _SC_LEVEL1_DCACHE_SIZE)
342 {
343 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
344 ecx = edx;
345 }
346
347 switch (name)
348 {
349 case _SC_LEVEL1_DCACHE_SIZE:
350 return (ecx >> 14) & 0x3fc00;
80e7d6a6 351
bfe6f5fa
UD
352 case _SC_LEVEL1_DCACHE_ASSOC:
353 ecx >>= 16;
354 if ((ecx & 0xff) == 0xff)
355 /* Fully associative. */
356 return (ecx << 2) & 0x3fc00;
357 return ecx & 0xff;
80e7d6a6 358
bfe6f5fa
UD
359 case _SC_LEVEL1_DCACHE_LINESIZE:
360 return ecx & 0xff;
80e7d6a6 361
bfe6f5fa
UD
362 case _SC_LEVEL2_CACHE_SIZE:
363 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
80e7d6a6 364
bfe6f5fa 365 case _SC_LEVEL2_CACHE_ASSOC:
76fca9f1 366 switch ((ecx >> 12) & 0xf)
7e4ba49c
HJ
367 {
368 case 0:
369 case 1:
370 case 2:
371 case 4:
76fca9f1 372 return (ecx >> 12) & 0xf;
bfe6f5fa
UD
373 case 6:
374 return 8;
375 case 8:
376 return 16;
80e7d6a6
UD
377 case 10:
378 return 32;
379 case 11:
380 return 48;
381 case 12:
382 return 64;
383 case 13:
384 return 96;
385 case 14:
386 return 128;
387 case 15:
76fca9f1 388 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
bfe6f5fa
UD
389 default:
390 return 0;
7e4ba49c 391 }
80e7d6a6
UD
392 /* NOTREACHED */
393
bfe6f5fa
UD
394 case _SC_LEVEL2_CACHE_LINESIZE:
395 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
80e7d6a6
UD
396
397 case _SC_LEVEL3_CACHE_SIZE:
398 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
399
400 case _SC_LEVEL3_CACHE_ASSOC:
76fca9f1 401 switch ((edx >> 12) & 0xf)
80e7d6a6
UD
402 {
403 case 0:
404 case 1:
405 case 2:
406 case 4:
76fca9f1 407 return (edx >> 12) & 0xf;
80e7d6a6
UD
408 case 6:
409 return 8;
410 case 8:
411 return 16;
412 case 10:
413 return 32;
414 case 11:
415 return 48;
416 case 12:
417 return 64;
418 case 13:
419 return 96;
420 case 14:
421 return 128;
422 case 15:
76fca9f1 423 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
80e7d6a6
UD
424 default:
425 return 0;
426 }
427 /* NOTREACHED */
428
429 case _SC_LEVEL3_CACHE_LINESIZE:
430 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
431
bfe6f5fa
UD
432 default:
433 assert (! "cannot happen");
434 }
435 return -1;
436}
437
438
439/* Get the value of the system variable NAME. */
440long int
441attribute_hidden
442__cache_sysconf (int name)
443{
7c1d7225
L
444 const struct cpu_features *cpu_features = __get_cpu_features ();
445
c22e4c2a 446 if (cpu_features->basic.kind == arch_kind_intel)
48e7bc7a 447 return handle_intel (name, cpu_features);
bfe6f5fa 448
c22e4c2a 449 if (cpu_features->basic.kind == arch_kind_amd)
bfe6f5fa
UD
450 return handle_amd (name);
451
452 // XXX Fill in more vendors.
453
454 /* CPU not known, we have no information. */
455 return 0;
456}
457
458
3af48cbd 459/* Data cache size for use in memory and string routines, typically
c0dde15b 460 L1 size, rounded to multiple of 256 bytes. */
afec409a
L
461long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
462long int __x86_data_cache_size attribute_hidden = 32 * 1024;
463/* Similar to __x86_data_cache_size_half, but not rounded. */
464long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
465/* Similar to __x86_data_cache_size, but not rounded. */
466long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
6d59823c 467/* Shared cache size for use in memory and string routines, typically
c0dde15b 468 L2 or L3 size, rounded to multiple of 256 bytes. */
afec409a
L
469long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
470long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
471/* Similar to __x86_shared_cache_size_half, but not rounded. */
472long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
473/* Similar to __x86_shared_cache_size, but not rounded. */
474long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
6f6f1215 475
a057f5f8
L
476/* Threshold to use non temporal store. */
477long int __x86_shared_non_temporal_threshold attribute_hidden;
478
6f6f1215 479#ifndef DISABLE_PREFETCHW
0435403c 480/* PREFETCHW support flag for use in memory and string routines. */
afec409a 481int __x86_prefetchw attribute_hidden;
6f6f1215 482#endif
bfe6f5fa
UD
483
484
485static void
486__attribute__((constructor))
487init_cacheinfo (void)
488{
489 /* Find out what brand of processor. */
490 unsigned int eax;
491 unsigned int ebx;
492 unsigned int ecx;
493 unsigned int edx;
bfe6f5fa 494 int max_cpuid_ex;
0435403c 495 long int data = -1;
bfe6f5fa
UD
496 long int shared = -1;
497 unsigned int level;
498 unsigned int threads = 0;
7c1d7225 499 const struct cpu_features *cpu_features = __get_cpu_features ();
c22e4c2a 500 int max_cpuid = cpu_features->basic.max_cpuid;
bfe6f5fa 501
c22e4c2a 502 if (cpu_features->basic.kind == arch_kind_intel)
bfe6f5fa 503 {
48e7bc7a 504 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
6d59823c 505
48e7bc7a 506 long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
9e4ec3e8
L
507 bool inclusive_cache = true;
508
0435403c 509 /* Try L3 first. */
bfe6f5fa 510 level = 3;
48e7bc7a 511 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
6d59823c 512
d6af2388
L
513 /* Number of logical processors sharing L2 cache. */
514 int threads_l2;
515
516 /* Number of logical processors sharing L3 cache. */
517 int threads_l3;
518
bfe6f5fa 519 if (shared <= 0)
7e4ba49c 520 {
0435403c 521 /* Try L2 otherwise. */
7e4ba49c 522 level = 2;
9e4ec3e8 523 shared = core;
d6af2388
L
524 threads_l2 = 0;
525 threads_l3 = -1;
526 }
527 else
528 {
529 threads_l2 = 0;
530 threads_l3 = 0;
bfe6f5fa 531 }
6d59823c 532
7c08d791
L
533 /* A value of 0 for the HTT bit indicates there is only a single
534 logical processor. */
535 if (HAS_CPU_FEATURE (HTT))
7e4ba49c 536 {
7c08d791
L
537 /* Figure out the number of logical threads that share the
538 highest cache level. */
539 if (max_cpuid >= 4)
540 {
c22e4c2a
L
541 unsigned int family = cpu_features->basic.family;
542 unsigned int model = cpu_features->basic.model;
e2e4f560 543
7c08d791 544 int i = 0;
6d59823c 545
d6af2388
L
546 /* Query until cache level 2 and 3 are enumerated. */
547 int check = 0x1 | (threads_l3 == 0) << 1;
7c08d791
L
548 do
549 {
550 __cpuid_count (4, i++, eax, ebx, ecx, edx);
551
552 /* There seems to be a bug in at least some Pentium Ds
553 which sometimes fail to iterate all cache parameters.
554 Do not loop indefinitely here, stop in this case and
555 assume there is no such information. */
556 if ((eax & 0x1f) == 0)
557 goto intel_bug_no_cache_info;
6d59823c 558
d6af2388
L
559 switch ((eax >> 5) & 0x7)
560 {
561 default:
562 break;
563 case 2:
564 if ((check & 0x1))
565 {
566 /* Get maximum number of logical processors
567 sharing L2 cache. */
568 threads_l2 = (eax >> 14) & 0x3ff;
569 check &= ~0x1;
570 }
571 break;
572 case 3:
573 if ((check & (0x1 << 1)))
574 {
575 /* Get maximum number of logical processors
576 sharing L3 cache. */
577 threads_l3 = (eax >> 14) & 0x3ff;
9e4ec3e8 578
d6af2388
L
579 /* Check if L2 and L3 caches are inclusive. */
580 inclusive_cache = (edx & 0x2) != 0;
581 check &= ~(0x1 << 1);
582 }
583 break;
584 }
585 }
586 while (check);
3aa2588d 587
d6af2388
L
588 /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
589 numbers of addressable IDs for logical processors sharing
590 the cache, instead of the maximum number of threads
7c08d791 591 sharing the cache. */
d6af2388 592 if (max_cpuid >= 11)
a546baa9 593 {
7c08d791
L
594 /* Find the number of logical processors shipped in
595 one core and apply count mask. */
596 i = 0;
d6af2388
L
597
598 /* Count SMT only if there is L3 cache. Always count
599 core if there is no L3 cache. */
600 int count = ((threads_l2 > 0 && level == 3)
601 | ((threads_l3 > 0
602 || (threads_l2 > 0 && level == 2)) << 1));
603
604 while (count)
a546baa9 605 {
7c08d791
L
606 __cpuid_count (11, i++, eax, ebx, ecx, edx);
607
608 int shipped = ebx & 0xff;
de71e042 609 int type = ecx & 0xff00;
7c08d791
L
610 if (shipped == 0 || type == 0)
611 break;
d6af2388
L
612 else if (type == 0x100)
613 {
614 /* Count SMT. */
615 if ((count & 0x1))
616 {
617 int count_mask;
618
619 /* Compute count mask. */
620 asm ("bsr %1, %0"
621 : "=r" (count_mask) : "g" (threads_l2));
622 count_mask = ~(-1 << (count_mask + 1));
623 threads_l2 = (shipped - 1) & count_mask;
624 count &= ~0x1;
625 }
626 }
7c08d791
L
627 else if (type == 0x200)
628 {
d6af2388
L
629 /* Count core. */
630 if ((count & (0x1 << 1)))
631 {
632 int count_mask;
633 int threads_core
634 = (level == 2 ? threads_l2 : threads_l3);
635
636 /* Compute count mask. */
637 asm ("bsr %1, %0"
638 : "=r" (count_mask) : "g" (threads_core));
639 count_mask = ~(-1 << (count_mask + 1));
640 threads_core = (shipped - 1) & count_mask;
641 if (level == 2)
642 threads_l2 = threads_core;
643 else
644 threads_l3 = threads_core;
645 count &= ~(0x1 << 1);
646 }
7c08d791
L
647 }
648 }
649 }
d6af2388
L
650 if (threads_l2 > 0)
651 threads_l2 += 1;
652 if (threads_l3 > 0)
653 threads_l3 += 1;
654 if (level == 2)
7c08d791 655 {
d6af2388 656 if (threads_l2)
7c08d791 657 {
d6af2388
L
658 threads = threads_l2;
659 if (threads > 2 && family == 6)
660 switch (model)
661 {
662 case 0x37:
663 case 0x4a:
664 case 0x4d:
665 case 0x5a:
666 case 0x5d:
667 /* Silvermont has L2 cache shared by 2 cores. */
668 threads = 2;
669 break;
670 default:
671 break;
672 }
a546baa9
L
673 }
674 }
d6af2388
L
675 else if (threads_l3)
676 threads = threads_l3;
a546baa9 677 }
7c08d791 678 else
a3d9ab50 679 {
7c08d791
L
680intel_bug_no_cache_info:
681 /* Assume that all logical threads share the highest cache
682 level. */
683
684 threads
48e7bc7a 685 = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx
7c08d791 686 >> 16) & 0xff);
a3d9ab50 687 }
6d59823c 688
7c08d791
L
689 /* Cap usage of highest cache level to the number of supported
690 threads. */
691 if (shared > 0 && threads > 0)
692 shared /= threads;
bfe6f5fa 693 }
6d59823c 694
9e4ec3e8 695 /* Account for non-inclusive L2 and L3 caches. */
d6af2388
L
696 if (!inclusive_cache)
697 {
698 if (threads_l2 > 0)
699 core /= threads_l2;
700 shared += core;
701 }
bfe6f5fa 702 }
c22e4c2a 703 else if (cpu_features->basic.kind == arch_kind_amd)
bfe6f5fa 704 {
0435403c
UD
705 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
706 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
707 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
6d59823c 708
0435403c 709 /* Get maximum extended function. */
6f6f1215 710 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
bfe6f5fa 711
0435403c
UD
712 if (shared <= 0)
713 /* No shared L3 cache. All we have is the L2 cache. */
714 shared = core;
715 else
716 {
717 /* Figure out the number of logical threads that share L3. */
718 if (max_cpuid_ex >= 0x80000008)
719 {
720 /* Get width of APIC ID. */
6f6f1215 721 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
0435403c
UD
722 threads = 1 << ((ecx >> 12) & 0x0f);
723 }
724
725 if (threads == 0)
726 {
727 /* If APIC ID width is not available, use logical
728 processor count. */
6f6f1215 729 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
0435403c
UD
730
731 if ((edx & (1 << 28)) != 0)
732 threads = (ebx >> 16) & 0xff;
733 }
734
735 /* Cap usage of highest cache level to the number of
736 supported threads. */
737 if (threads > 0)
738 shared /= threads;
739
740 /* Account for exclusive L2 and L3 caches. */
741 shared += core;
742 }
743
6f6f1215 744#ifndef DISABLE_PREFETCHW
bfe6f5fa
UD
745 if (max_cpuid_ex >= 0x80000001)
746 {
6f6f1215 747 __cpuid (0x80000001, eax, ebx, ecx, edx);
0435403c 748 /* PREFETCHW || 3DNow! */
bfe6f5fa 749 if ((ecx & 0x100) || (edx & 0x80000000))
afec409a 750 __x86_prefetchw = -1;
bfe6f5fa 751 }
6f6f1215 752#endif
bfe6f5fa
UD
753 }
754
905947c3
L
755 if (cpu_features->data_cache_size != 0)
756 data = cpu_features->data_cache_size;
757
0435403c 758 if (data > 0)
3af48cbd 759 {
afec409a
L
760 __x86_raw_data_cache_size_half = data / 2;
761 __x86_raw_data_cache_size = data;
c0dde15b
UD
762 /* Round data cache size to multiple of 256 bytes. */
763 data = data & ~255L;
afec409a
L
764 __x86_data_cache_size_half = data / 2;
765 __x86_data_cache_size = data;
3af48cbd 766 }
bfe6f5fa 767
905947c3
L
768 if (cpu_features->shared_cache_size != 0)
769 shared = cpu_features->shared_cache_size;
770
bfe6f5fa 771 if (shared > 0)
e2b393bc 772 {
afec409a
L
773 __x86_raw_shared_cache_size_half = shared / 2;
774 __x86_raw_shared_cache_size = shared;
c0dde15b
UD
775 /* Round shared cache size to multiple of 256 bytes. */
776 shared = shared & ~255L;
afec409a
L
777 __x86_shared_cache_size_half = shared / 2;
778 __x86_shared_cache_size = shared;
e2b393bc 779 }
a057f5f8
L
780
781 /* The large memcpy micro benchmark in glibc shows that 6 times of
782 shared cache size is the approximate value above which non-temporal
808fd9e6
L
783 store becomes faster on a 8-core processor. This is the 3/4 of the
784 total shared cache size. */
785 __x86_shared_non_temporal_threshold
905947c3
L
786 = (cpu_features->non_temporal_threshold != 0
787 ? cpu_features->non_temporal_threshold
788 : __x86_shared_cache_size * threads * 3 / 4);
bfe6f5fa 789}
9c450f6f
L
790
791#endif