]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/cacheinfo.c
ca13a53f253ea27bfb77a94080c3708d42abb556
[thirdparty/glibc.git] / sysdeps / x86_64 / cacheinfo.c
1 /* x86_64 cache info.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <cpuid.h>
24
25 #ifndef __cpuid_count
26 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
27 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
28 # if defined(__i386__) && defined(__PIC__)
29 /* %ebx may be the PIC register. */
30 # define __cpuid_count(level, count, a, b, c, d) \
31 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
32 "cpuid\n\t" \
33 "xchg{l}\t{%%}ebx, %1\n\t" \
34 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
35 : "0" (level), "2" (count))
36 # else
37 # define __cpuid_count(level, count, a, b, c, d) \
38 __asm__ ("cpuid\n\t" \
39 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
40 : "0" (level), "2" (count))
41 # endif
42 #endif
43
44 #ifdef USE_MULTIARCH
45 # include "multiarch/init-arch.h"
46
47 # define is_intel __cpu_features.kind == arch_kind_intel
48 # define is_amd __cpu_features.kind == arch_kind_amd
49 # define max_cpuid __cpu_features.max_cpuid
50 #else
51 /* This spells out "GenuineIntel". */
52 # define is_intel \
53 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
54 /* This spells out "AuthenticAMD". */
55 # define is_amd \
56 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
57 #endif
58
59 static const struct intel_02_cache_info
60 {
61 unsigned char idx;
62 unsigned char assoc;
63 unsigned char linesize;
64 unsigned char rel_name;
65 unsigned int size;
66 } intel_02_known [] =
67 {
68 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
69 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
70 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
71 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
72 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
73 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
75 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
76 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
77 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
78 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
79 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
80 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
81 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
82 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
83 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
84 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
85 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
86 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
87 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
88 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
90 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
91 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
92 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
93 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
94 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
95 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
96 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
97 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
98 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
99 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
100 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
101 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
102 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
103 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
104 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
105 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
106 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
107 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
108 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
109 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
110 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
111 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
112 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
113 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
114 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
115 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
116 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
117 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
118 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
119 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
120 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
121 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
122 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
123 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
124 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
125 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
126 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
127 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
128 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
129 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
130 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
131 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
132 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
133 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
134 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
135 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
136 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
137 };
138
139 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
140
141 static int
142 intel_02_known_compare (const void *p1, const void *p2)
143 {
144 const struct intel_02_cache_info *i1;
145 const struct intel_02_cache_info *i2;
146
147 i1 = (const struct intel_02_cache_info *) p1;
148 i2 = (const struct intel_02_cache_info *) p2;
149
150 if (i1->idx == i2->idx)
151 return 0;
152
153 return i1->idx < i2->idx ? -1 : 1;
154 }
155
156
157 static long int
158 __attribute__ ((noinline))
159 intel_check_word (int name, unsigned int value, bool *has_level_2,
160 bool *no_level_2_or_3)
161 {
162 if ((value & 0x80000000) != 0)
163 /* The register value is reserved. */
164 return 0;
165
166 /* Fold the name. The _SC_ constants are always in the order SIZE,
167 ASSOC, LINESIZE. */
168 int folded_rel_name = (M(name) / 3) * 3;
169
170 while (value != 0)
171 {
172 unsigned int byte = value & 0xff;
173
174 if (byte == 0x40)
175 {
176 *no_level_2_or_3 = true;
177
178 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 /* No need to look further. */
180 break;
181 }
182 else if (byte == 0xff)
183 {
184 /* CPUID leaf 0x4 contains all the information. We need to
185 iterate over it. */
186 unsigned int eax;
187 unsigned int ebx;
188 unsigned int ecx;
189 unsigned int edx;
190
191 unsigned int round = 0;
192 while (1)
193 {
194 __cpuid_count (4, round, eax, ebx, ecx, edx);
195
196 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
197 if (type == null)
198 /* That was the end. */
199 break;
200
201 unsigned int level = (eax >> 5) & 0x7;
202
203 if ((level == 1 && type == data
204 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
205 || (level == 1 && type == inst
206 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
207 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
208 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
209 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
210 {
211 unsigned int offset = M(name) - folded_rel_name;
212
213 if (offset == 0)
214 /* Cache size. */
215 return (((ebx >> 22) + 1)
216 * (((ebx >> 12) & 0x3ff) + 1)
217 * ((ebx & 0xfff) + 1)
218 * (ecx + 1));
219 if (offset == 1)
220 return (ebx >> 22) + 1;
221
222 assert (offset == 2);
223 return (ebx & 0xfff) + 1;
224 }
225
226 ++round;
227 }
228 /* There is no other cache information anywhere else. */
229 break;
230 }
231 else
232 {
233 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
234 {
235 /* Intel reused this value. For family 15, model 6 it
236 specifies the 3rd level cache. Otherwise the 2nd
237 level cache. */
238 unsigned int family;
239 unsigned int model;
240 #ifdef USE_MULTIARCH
241 family = __cpu_features.family;
242 model = __cpu_features.model;
243 #else
244 unsigned int eax;
245 unsigned int ebx;
246 unsigned int ecx;
247 unsigned int edx;
248 __cpuid (1, eax, ebx, ecx, edx);
249
250 family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
251 model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
252 #endif
253
254 if (family == 15 && model == 6)
255 {
256 /* The level 3 cache is encoded for this model like
257 the level 2 cache is for other models. Pretend
258 the caller asked for the level 2 cache. */
259 name = (_SC_LEVEL2_CACHE_SIZE
260 + (name - _SC_LEVEL3_CACHE_SIZE));
261 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
262 }
263 }
264
265 struct intel_02_cache_info *found;
266 struct intel_02_cache_info search;
267
268 search.idx = byte;
269 found = bsearch (&search, intel_02_known, nintel_02_known,
270 sizeof (intel_02_known[0]), intel_02_known_compare);
271 if (found != NULL)
272 {
273 if (found->rel_name == folded_rel_name)
274 {
275 unsigned int offset = M(name) - folded_rel_name;
276
277 if (offset == 0)
278 /* Cache size. */
279 return found->size;
280 if (offset == 1)
281 return found->assoc;
282
283 assert (offset == 2);
284 return found->linesize;
285 }
286
287 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
288 *has_level_2 = true;
289 }
290 }
291
292 /* Next byte for the next round. */
293 value >>= 8;
294 }
295
296 /* Nothing found. */
297 return 0;
298 }
299
300
301 static long int __attribute__ ((noinline))
302 handle_intel (int name, unsigned int maxidx)
303 {
304 assert (maxidx >= 2);
305
306 /* OK, we can use the CPUID instruction to get all info about the
307 caches. */
308 unsigned int cnt = 0;
309 unsigned int max = 1;
310 long int result = 0;
311 bool no_level_2_or_3 = false;
312 bool has_level_2 = false;
313
314 while (cnt++ < max)
315 {
316 unsigned int eax;
317 unsigned int ebx;
318 unsigned int ecx;
319 unsigned int edx;
320 __cpuid (2, eax, ebx, ecx, edx);
321
322 /* The low byte of EAX in the first round contain the number of
323 rounds we have to make. At least one, the one we are already
324 doing. */
325 if (cnt == 1)
326 {
327 max = eax & 0xff;
328 eax &= 0xffffff00;
329 }
330
331 /* Process the individual registers' value. */
332 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
333 if (result != 0)
334 return result;
335
336 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
337 if (result != 0)
338 return result;
339
340 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
341 if (result != 0)
342 return result;
343
344 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
345 if (result != 0)
346 return result;
347 }
348
349 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
350 && no_level_2_or_3)
351 return -1;
352
353 return 0;
354 }
355
356
357 static long int __attribute__ ((noinline))
358 handle_amd (int name)
359 {
360 unsigned int eax;
361 unsigned int ebx;
362 unsigned int ecx;
363 unsigned int edx;
364 __cpuid (0x80000000, eax, ebx, ecx, edx);
365
366 /* No level 4 cache (yet). */
367 if (name > _SC_LEVEL3_CACHE_LINESIZE)
368 return 0;
369
370 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
371 if (eax < fn)
372 return 0;
373
374 __cpuid (fn, eax, ebx, ecx, edx);
375
376 if (name < _SC_LEVEL1_DCACHE_SIZE)
377 {
378 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
379 ecx = edx;
380 }
381
382 switch (name)
383 {
384 case _SC_LEVEL1_DCACHE_SIZE:
385 return (ecx >> 14) & 0x3fc00;
386
387 case _SC_LEVEL1_DCACHE_ASSOC:
388 ecx >>= 16;
389 if ((ecx & 0xff) == 0xff)
390 /* Fully associative. */
391 return (ecx << 2) & 0x3fc00;
392 return ecx & 0xff;
393
394 case _SC_LEVEL1_DCACHE_LINESIZE:
395 return ecx & 0xff;
396
397 case _SC_LEVEL2_CACHE_SIZE:
398 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
399
400 case _SC_LEVEL2_CACHE_ASSOC:
401 switch ((ecx >> 12) & 0xf)
402 {
403 case 0:
404 case 1:
405 case 2:
406 case 4:
407 return (ecx >> 12) & 0xf;
408 case 6:
409 return 8;
410 case 8:
411 return 16;
412 case 10:
413 return 32;
414 case 11:
415 return 48;
416 case 12:
417 return 64;
418 case 13:
419 return 96;
420 case 14:
421 return 128;
422 case 15:
423 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
424 default:
425 return 0;
426 }
427 /* NOTREACHED */
428
429 case _SC_LEVEL2_CACHE_LINESIZE:
430 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
431
432 case _SC_LEVEL3_CACHE_SIZE:
433 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
434
435 case _SC_LEVEL3_CACHE_ASSOC:
436 switch ((edx >> 12) & 0xf)
437 {
438 case 0:
439 case 1:
440 case 2:
441 case 4:
442 return (edx >> 12) & 0xf;
443 case 6:
444 return 8;
445 case 8:
446 return 16;
447 case 10:
448 return 32;
449 case 11:
450 return 48;
451 case 12:
452 return 64;
453 case 13:
454 return 96;
455 case 14:
456 return 128;
457 case 15:
458 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
459 default:
460 return 0;
461 }
462 /* NOTREACHED */
463
464 case _SC_LEVEL3_CACHE_LINESIZE:
465 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
466
467 default:
468 assert (! "cannot happen");
469 }
470 return -1;
471 }
472
473
474 /* Get the value of the system variable NAME. */
475 long int
476 attribute_hidden
477 __cache_sysconf (int name)
478 {
479 #ifdef USE_MULTIARCH
480 if (__cpu_features.kind == arch_kind_unknown)
481 __init_cpu_features ();
482 #else
483 /* Find out what brand of processor. */
484 unsigned int max_cpuid;
485 unsigned int ebx;
486 unsigned int ecx;
487 unsigned int edx;
488 __cpuid (0, max_cpuid, ebx, ecx, edx);
489 #endif
490
491 if (is_intel)
492 return handle_intel (name, max_cpuid);
493
494 if (is_amd)
495 return handle_amd (name);
496
497 // XXX Fill in more vendors.
498
499 /* CPU not known, we have no information. */
500 return 0;
501 }
502
503
504 /* Data cache size for use in memory and string routines, typically
505 L1 size, rounded to multiple of 256 bytes. */
506 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
507 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
508 /* Similar to __x86_data_cache_size_half, but not rounded. */
509 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
510 /* Similar to __x86_data_cache_size, but not rounded. */
511 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
512 /* Shared cache size for use in memory and string routines, typically
513 L2 or L3 size, rounded to multiple of 256 bytes. */
514 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
515 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
516 /* Similar to __x86_shared_cache_size_half, but not rounded. */
517 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
518 /* Similar to __x86_shared_cache_size, but not rounded. */
519 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
520
521 #ifndef DISABLE_PREFETCHW
522 /* PREFETCHW support flag for use in memory and string routines. */
523 int __x86_prefetchw attribute_hidden;
524 #endif
525
526 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
527 /* Instructions preferred for memory and string routines.
528
529 0: Regular instructions
530 1: MMX instructions
531 2: SSE2 instructions
532 3: SSSE3 instructions
533
534 */
535 int __x86_preferred_memory_instruction attribute_hidden;
536 #endif
537
538
539 static void
540 __attribute__((constructor))
541 init_cacheinfo (void)
542 {
543 /* Find out what brand of processor. */
544 unsigned int eax;
545 unsigned int ebx;
546 unsigned int ecx;
547 unsigned int edx;
548 int max_cpuid_ex;
549 long int data = -1;
550 long int shared = -1;
551 unsigned int level;
552 unsigned int threads = 0;
553
554 #ifdef USE_MULTIARCH
555 if (__cpu_features.kind == arch_kind_unknown)
556 __init_cpu_features ();
557 #else
558 int max_cpuid;
559 __cpuid (0, max_cpuid, ebx, ecx, edx);
560 #endif
561
562 if (is_intel)
563 {
564 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
565
566 /* Try L3 first. */
567 level = 3;
568 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
569
570 if (shared <= 0)
571 {
572 /* Try L2 otherwise. */
573 level = 2;
574 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
575 }
576
577 unsigned int ebx_1;
578
579 #ifdef USE_MULTIARCH
580 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
581 ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
582 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
583 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
584 #else
585 __cpuid (1, eax, ebx_1, ecx, edx);
586 #endif
587
588 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
589 /* Intel prefers SSSE3 instructions for memory/string routines
590 if they are available. */
591 if ((ecx & 0x200))
592 __x86_preferred_memory_instruction = 3;
593 else
594 __x86_preferred_memory_instruction = 2;
595 #endif
596
597 /* Figure out the number of logical threads that share the
598 highest cache level. */
599 if (max_cpuid >= 4)
600 {
601 int i = 0;
602
603 /* Query until desired cache level is enumerated. */
604 do
605 {
606 __cpuid_count (4, i++, eax, ebx, ecx, edx);
607
608 /* There seems to be a bug in at least some Pentium Ds
609 which sometimes fail to iterate all cache parameters.
610 Do not loop indefinitely here, stop in this case and
611 assume there is no such information. */
612 if ((eax & 0x1f) == 0)
613 goto intel_bug_no_cache_info;
614 }
615 while (((eax >> 5) & 0x7) != level);
616
617 threads = (eax >> 14) & 0x3ff;
618
619 /* If max_cpuid >= 11, THREADS is the maximum number of
620 addressable IDs for logical processors sharing the
621 cache, instead of the maximum number of threads
622 sharing the cache. */
623 if (threads && max_cpuid >= 11)
624 {
625 /* Find the number of logical processors shipped in
626 one core and apply count mask. */
627 i = 0;
628 while (1)
629 {
630 __cpuid_count (11, i++, eax, ebx, ecx, edx);
631
632 int shipped = ebx & 0xff;
633 int type = ecx & 0xff0;
634 if (shipped == 0 || type == 0)
635 break;
636 else if (type == 0x200)
637 {
638 int count_mask;
639
640 /* Compute count mask. */
641 asm ("bsr %1, %0"
642 : "=r" (count_mask) : "g" (threads));
643 count_mask = ~(-1 << (count_mask + 1));
644 threads = (shipped - 1) & count_mask;
645 break;
646 }
647 }
648 }
649 threads += 1;
650 }
651 else
652 {
653 intel_bug_no_cache_info:
654 /* Assume that all logical threads share the highest cache level. */
655
656 threads = (ebx_1 >> 16) & 0xff;
657 }
658
659 /* Cap usage of highest cache level to the number of supported
660 threads. */
661 if (shared > 0 && threads > 0)
662 shared /= threads;
663 }
664 /* This spells out "AuthenticAMD". */
665 else if (is_amd)
666 {
667 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
668 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
669 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
670
671 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
672 # ifdef USE_MULTIARCH
673 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
674 ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
675 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
676 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
677 # else
678 __cpuid (1, eax, ebx, ecx, edx);
679 # endif
680
681 /* AMD prefers SSSE3 instructions for memory/string routines
682 if they are avaiable, otherwise it prefers integer
683 instructions. */
684 if ((ecx & 0x200))
685 __x86_preferred_memory_instruction = 3;
686 else
687 __x86_preferred_memory_instruction = 0;
688 #endif
689
690 /* Get maximum extended function. */
691 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
692
693 if (shared <= 0)
694 /* No shared L3 cache. All we have is the L2 cache. */
695 shared = core;
696 else
697 {
698 /* Figure out the number of logical threads that share L3. */
699 if (max_cpuid_ex >= 0x80000008)
700 {
701 /* Get width of APIC ID. */
702 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
703 threads = 1 << ((ecx >> 12) & 0x0f);
704 }
705
706 if (threads == 0)
707 {
708 /* If APIC ID width is not available, use logical
709 processor count. */
710 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
711
712 if ((edx & (1 << 28)) != 0)
713 threads = (ebx >> 16) & 0xff;
714 }
715
716 /* Cap usage of highest cache level to the number of
717 supported threads. */
718 if (threads > 0)
719 shared /= threads;
720
721 /* Account for exclusive L2 and L3 caches. */
722 shared += core;
723 }
724
725 #ifndef DISABLE_PREFETCHW
726 if (max_cpuid_ex >= 0x80000001)
727 {
728 __cpuid (0x80000001, eax, ebx, ecx, edx);
729 /* PREFETCHW || 3DNow! */
730 if ((ecx & 0x100) || (edx & 0x80000000))
731 __x86_prefetchw = -1;
732 }
733 #endif
734 }
735
736 if (data > 0)
737 {
738 __x86_raw_data_cache_size_half = data / 2;
739 __x86_raw_data_cache_size = data;
740 /* Round data cache size to multiple of 256 bytes. */
741 data = data & ~255L;
742 __x86_data_cache_size_half = data / 2;
743 __x86_data_cache_size = data;
744 }
745
746 if (shared > 0)
747 {
748 __x86_raw_shared_cache_size_half = shared / 2;
749 __x86_raw_shared_cache_size = shared;
750 /* Round shared cache size to multiple of 256 bytes. */
751 shared = shared & ~255L;
752 __x86_shared_cache_size_half = shared / 2;
753 __x86_shared_cache_size = shared;
754 }
755 }