2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
26 #include <init-arch.h>
28 static const struct intel_02_cache_info
32 unsigned char linesize
;
33 unsigned char rel_name
;
37 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
38 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 8192 },
39 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 16384 },
40 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
41 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
42 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
43 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
44 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 24576 },
45 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
46 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
47 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
48 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
49 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
50 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
51 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
52 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
53 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 196608 },
54 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
55 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
56 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 393216 },
57 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
58 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
59 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
60 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
61 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
62 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
63 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
64 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
65 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
66 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE
), 3145728 },
67 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE
), 4194304 },
68 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 6291456 },
69 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
70 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
71 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 16777216 },
72 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE
), 6291456 },
73 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
74 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
75 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
76 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
77 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
78 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
79 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
80 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
81 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
82 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
83 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
84 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
85 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
86 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
87 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
88 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
89 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
90 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
91 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
92 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
93 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
94 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
95 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
96 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
97 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
98 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
99 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
100 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
101 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
102 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
103 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
104 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 18874368 },
105 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 25165824 },
108 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
111 intel_02_known_compare (const void *p1
, const void *p2
)
113 const struct intel_02_cache_info
*i1
;
114 const struct intel_02_cache_info
*i2
;
116 i1
= (const struct intel_02_cache_info
*) p1
;
117 i2
= (const struct intel_02_cache_info
*) p2
;
119 if (i1
->idx
== i2
->idx
)
122 return i1
->idx
< i2
->idx
? -1 : 1;
127 __attribute__ ((noinline
))
128 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
129 bool *no_level_2_or_3
,
130 const struct cpu_features
*cpu_features
)
132 if ((value
& 0x80000000) != 0)
133 /* The register value is reserved. */
136 /* Fold the name. The _SC_ constants are always in the order SIZE,
138 int folded_rel_name
= (M(name
) / 3) * 3;
142 unsigned int byte
= value
& 0xff;
146 *no_level_2_or_3
= true;
148 if (folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
149 /* No need to look further. */
152 else if (byte
== 0xff)
154 /* CPUID leaf 0x4 contains all the information. We need to
161 unsigned int round
= 0;
164 __cpuid_count (4, round
, eax
, ebx
, ecx
, edx
);
166 enum { null
= 0, data
= 1, inst
= 2, uni
= 3 } type
= eax
& 0x1f;
168 /* That was the end. */
171 unsigned int level
= (eax
>> 5) & 0x7;
173 if ((level
== 1 && type
== data
174 && folded_rel_name
== M(_SC_LEVEL1_DCACHE_SIZE
))
175 || (level
== 1 && type
== inst
176 && folded_rel_name
== M(_SC_LEVEL1_ICACHE_SIZE
))
177 || (level
== 2 && folded_rel_name
== M(_SC_LEVEL2_CACHE_SIZE
))
178 || (level
== 3 && folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
179 || (level
== 4 && folded_rel_name
== M(_SC_LEVEL4_CACHE_SIZE
)))
181 unsigned int offset
= M(name
) - folded_rel_name
;
185 return (((ebx
>> 22) + 1)
186 * (((ebx
>> 12) & 0x3ff) + 1)
187 * ((ebx
& 0xfff) + 1)
190 return (ebx
>> 22) + 1;
192 assert (offset
== 2);
193 return (ebx
& 0xfff) + 1;
198 /* There is no other cache information anywhere else. */
203 if (byte
== 0x49 && folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
205 /* Intel reused this value. For family 15, model 6 it
206 specifies the 3rd level cache. Otherwise the 2nd
208 unsigned int family
= cpu_features
->basic
.family
;
209 unsigned int model
= cpu_features
->basic
.model
;
211 if (family
== 15 && model
== 6)
213 /* The level 3 cache is encoded for this model like
214 the level 2 cache is for other models. Pretend
215 the caller asked for the level 2 cache. */
216 name
= (_SC_LEVEL2_CACHE_SIZE
217 + (name
- _SC_LEVEL3_CACHE_SIZE
));
218 folded_rel_name
= M(_SC_LEVEL2_CACHE_SIZE
);
222 struct intel_02_cache_info
*found
;
223 struct intel_02_cache_info search
;
226 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
227 sizeof (intel_02_known
[0]), intel_02_known_compare
);
230 if (found
->rel_name
== folded_rel_name
)
232 unsigned int offset
= M(name
) - folded_rel_name
;
240 assert (offset
== 2);
241 return found
->linesize
;
244 if (found
->rel_name
== M(_SC_LEVEL2_CACHE_SIZE
))
249 /* Next byte for the next round. */
258 static long int __attribute__ ((noinline
))
259 handle_intel (int name
, const struct cpu_features
*cpu_features
)
261 unsigned int maxidx
= cpu_features
->basic
.max_cpuid
;
263 /* Return -1 for older CPUs. */
267 /* OK, we can use the CPUID instruction to get all info about the
269 unsigned int cnt
= 0;
270 unsigned int max
= 1;
272 bool no_level_2_or_3
= false;
273 bool has_level_2
= false;
281 __cpuid (2, eax
, ebx
, ecx
, edx
);
283 /* The low byte of EAX in the first round contain the number of
284 rounds we have to make. At least one, the one we are already
292 /* Process the individual registers' value. */
293 result
= intel_check_word (name
, eax
, &has_level_2
,
294 &no_level_2_or_3
, cpu_features
);
298 result
= intel_check_word (name
, ebx
, &has_level_2
,
299 &no_level_2_or_3
, cpu_features
);
303 result
= intel_check_word (name
, ecx
, &has_level_2
,
304 &no_level_2_or_3
, cpu_features
);
308 result
= intel_check_word (name
, edx
, &has_level_2
,
309 &no_level_2_or_3
, cpu_features
);
314 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
322 static long int __attribute__ ((noinline
))
323 handle_amd (int name
)
329 __cpuid (0x80000000, eax
, ebx
, ecx
, edx
);
331 /* No level 4 cache (yet). */
332 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
335 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
339 __cpuid (fn
, eax
, ebx
, ecx
, edx
);
341 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
343 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
349 case _SC_LEVEL1_DCACHE_SIZE
:
350 return (ecx
>> 14) & 0x3fc00;
352 case _SC_LEVEL1_DCACHE_ASSOC
:
354 if ((ecx
& 0xff) == 0xff)
355 /* Fully associative. */
356 return (ecx
<< 2) & 0x3fc00;
359 case _SC_LEVEL1_DCACHE_LINESIZE
:
362 case _SC_LEVEL2_CACHE_SIZE
:
363 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
365 case _SC_LEVEL2_CACHE_ASSOC
:
366 switch ((ecx
>> 12) & 0xf)
372 return (ecx
>> 12) & 0xf;
388 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
394 case _SC_LEVEL2_CACHE_LINESIZE
:
395 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
397 case _SC_LEVEL3_CACHE_SIZE
:
398 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
400 case _SC_LEVEL3_CACHE_ASSOC
:
401 switch ((edx
>> 12) & 0xf)
407 return (edx
>> 12) & 0xf;
423 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
429 case _SC_LEVEL3_CACHE_LINESIZE
:
430 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
433 assert (! "cannot happen");
439 /* Get the value of the system variable NAME. */
442 __cache_sysconf (int name
)
444 const struct cpu_features
*cpu_features
= __get_cpu_features ();
446 if (cpu_features
->basic
.kind
== arch_kind_intel
)
447 return handle_intel (name
, cpu_features
);
449 if (cpu_features
->basic
.kind
== arch_kind_amd
)
450 return handle_amd (name
);
452 // XXX Fill in more vendors.
454 /* CPU not known, we have no information. */
459 /* Data cache size for use in memory and string routines, typically
460 L1 size, rounded to multiple of 256 bytes. */
461 long int __x86_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
462 long int __x86_data_cache_size attribute_hidden
= 32 * 1024;
463 /* Similar to __x86_data_cache_size_half, but not rounded. */
464 long int __x86_raw_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
465 /* Similar to __x86_data_cache_size, but not rounded. */
466 long int __x86_raw_data_cache_size attribute_hidden
= 32 * 1024;
467 /* Shared cache size for use in memory and string routines, typically
468 L2 or L3 size, rounded to multiple of 256 bytes. */
469 long int __x86_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
470 long int __x86_shared_cache_size attribute_hidden
= 1024 * 1024;
471 /* Similar to __x86_shared_cache_size_half, but not rounded. */
472 long int __x86_raw_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
473 /* Similar to __x86_shared_cache_size, but not rounded. */
474 long int __x86_raw_shared_cache_size attribute_hidden
= 1024 * 1024;
476 /* Threshold to use non temporal store. */
477 long int __x86_shared_non_temporal_threshold attribute_hidden
;
479 #ifndef DISABLE_PREFETCHW
480 /* PREFETCHW support flag for use in memory and string routines. */
481 int __x86_prefetchw attribute_hidden
;
486 __attribute__((constructor
))
487 init_cacheinfo (void)
489 /* Find out what brand of processor. */
496 long int shared
= -1;
498 unsigned int threads
= 0;
499 const struct cpu_features
*cpu_features
= __get_cpu_features ();
500 int max_cpuid
= cpu_features
->basic
.max_cpuid
;
502 if (cpu_features
->basic
.kind
== arch_kind_intel
)
504 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, cpu_features
);
506 long int core
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, cpu_features
);
507 bool inclusive_cache
= true;
511 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, cpu_features
);
513 /* Number of logical processors sharing L2 cache. */
516 /* Number of logical processors sharing L3 cache. */
521 /* Try L2 otherwise. */
533 /* A value of 0 for the HTT bit indicates there is only a single
534 logical processor. */
535 if (HAS_CPU_FEATURE (HTT
))
537 /* Figure out the number of logical threads that share the
538 highest cache level. */
541 unsigned int family
= cpu_features
->basic
.family
;
542 unsigned int model
= cpu_features
->basic
.model
;
546 /* Query until cache level 2 and 3 are enumerated. */
547 int check
= 0x1 | (threads_l3
== 0) << 1;
550 __cpuid_count (4, i
++, eax
, ebx
, ecx
, edx
);
552 /* There seems to be a bug in at least some Pentium Ds
553 which sometimes fail to iterate all cache parameters.
554 Do not loop indefinitely here, stop in this case and
555 assume there is no such information. */
556 if ((eax
& 0x1f) == 0)
557 goto intel_bug_no_cache_info
;
559 switch ((eax
>> 5) & 0x7)
566 /* Get maximum number of logical processors
568 threads_l2
= (eax
>> 14) & 0x3ff;
573 if ((check
& (0x1 << 1)))
575 /* Get maximum number of logical processors
577 threads_l3
= (eax
>> 14) & 0x3ff;
579 /* Check if L2 and L3 caches are inclusive. */
580 inclusive_cache
= (edx
& 0x2) != 0;
581 check
&= ~(0x1 << 1);
588 /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
589 numbers of addressable IDs for logical processors sharing
590 the cache, instead of the maximum number of threads
591 sharing the cache. */
594 /* Find the number of logical processors shipped in
595 one core and apply count mask. */
598 /* Count SMT only if there is L3 cache. Always count
599 core if there is no L3 cache. */
600 int count
= ((threads_l2
> 0 && level
== 3)
602 || (threads_l2
> 0 && level
== 2)) << 1));
606 __cpuid_count (11, i
++, eax
, ebx
, ecx
, edx
);
608 int shipped
= ebx
& 0xff;
609 int type
= ecx
& 0xff00;
610 if (shipped
== 0 || type
== 0)
612 else if (type
== 0x100)
619 /* Compute count mask. */
621 : "=r" (count_mask
) : "g" (threads_l2
));
622 count_mask
= ~(-1 << (count_mask
+ 1));
623 threads_l2
= (shipped
- 1) & count_mask
;
627 else if (type
== 0x200)
630 if ((count
& (0x1 << 1)))
634 = (level
== 2 ? threads_l2
: threads_l3
);
636 /* Compute count mask. */
638 : "=r" (count_mask
) : "g" (threads_core
));
639 count_mask
= ~(-1 << (count_mask
+ 1));
640 threads_core
= (shipped
- 1) & count_mask
;
642 threads_l2
= threads_core
;
644 threads_l3
= threads_core
;
645 count
&= ~(0x1 << 1);
658 threads
= threads_l2
;
659 if (threads
> 2 && family
== 6)
667 /* Silvermont has L2 cache shared by 2 cores. */
676 threads
= threads_l3
;
680 intel_bug_no_cache_info
:
681 /* Assume that all logical threads share the highest cache
685 = ((cpu_features
->cpuid
[COMMON_CPUID_INDEX_1
].ebx
689 /* Cap usage of highest cache level to the number of supported
691 if (shared
> 0 && threads
> 0)
695 /* Account for non-inclusive L2 and L3 caches. */
696 if (!inclusive_cache
)
703 else if (cpu_features
->basic
.kind
== arch_kind_amd
)
705 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
706 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
707 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
709 /* Get maximum extended function. */
710 __cpuid (0x80000000, max_cpuid_ex
, ebx
, ecx
, edx
);
713 /* No shared L3 cache. All we have is the L2 cache. */
717 /* Figure out the number of logical threads that share L3. */
718 if (max_cpuid_ex
>= 0x80000008)
720 /* Get width of APIC ID. */
721 __cpuid (0x80000008, max_cpuid_ex
, ebx
, ecx
, edx
);
722 threads
= 1 << ((ecx
>> 12) & 0x0f);
727 /* If APIC ID width is not available, use logical
729 __cpuid (0x00000001, max_cpuid_ex
, ebx
, ecx
, edx
);
731 if ((edx
& (1 << 28)) != 0)
732 threads
= (ebx
>> 16) & 0xff;
735 /* Cap usage of highest cache level to the number of
736 supported threads. */
740 /* Account for exclusive L2 and L3 caches. */
744 #ifndef DISABLE_PREFETCHW
745 if (max_cpuid_ex
>= 0x80000001)
747 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
748 /* PREFETCHW || 3DNow! */
749 if ((ecx
& 0x100) || (edx
& 0x80000000))
750 __x86_prefetchw
= -1;
755 if (cpu_features
->data_cache_size
!= 0)
756 data
= cpu_features
->data_cache_size
;
760 __x86_raw_data_cache_size_half
= data
/ 2;
761 __x86_raw_data_cache_size
= data
;
762 /* Round data cache size to multiple of 256 bytes. */
764 __x86_data_cache_size_half
= data
/ 2;
765 __x86_data_cache_size
= data
;
768 if (cpu_features
->shared_cache_size
!= 0)
769 shared
= cpu_features
->shared_cache_size
;
773 __x86_raw_shared_cache_size_half
= shared
/ 2;
774 __x86_raw_shared_cache_size
= shared
;
775 /* Round shared cache size to multiple of 256 bytes. */
776 shared
= shared
& ~255L;
777 __x86_shared_cache_size_half
= shared
/ 2;
778 __x86_shared_cache_size
= shared
;
781 /* The large memcpy micro benchmark in glibc shows that 6 times of
782 shared cache size is the approximate value above which non-temporal
783 store becomes faster on a 8-core processor. This is the 3/4 of the
784 total shared cache size. */
785 __x86_shared_non_temporal_threshold
786 = (cpu_features
->non_temporal_threshold
!= 0
787 ? cpu_features
->non_temporal_threshold
788 : __x86_shared_cache_size
* threads
* 3 / 4);