]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/cacheinfo.c
9aed28f854c7375c1757bd916ce0802ff5c8ab94
[thirdparty/glibc.git] / sysdeps / x86_64 / cacheinfo.c
1 /* x86_64 cache info.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <cpuid.h>
24
25 #ifndef __cpuid_count
26 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
27 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
28 # if defined(__i386__) && defined(__PIC__)
29 /* %ebx may be the PIC register. */
30 # define __cpuid_count(level, count, a, b, c, d) \
31 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
32 "cpuid\n\t" \
33 "xchg{l}\t{%%}ebx, %1\n\t" \
34 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
35 : "0" (level), "2" (count))
36 # else
37 # define __cpuid_count(level, count, a, b, c, d) \
38 __asm__ ("cpuid\n\t" \
39 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
40 : "0" (level), "2" (count))
41 # endif
42 #endif
43
44 #ifdef USE_MULTIARCH
45 # include "multiarch/init-arch.h"
46
47 # define is_intel __cpu_features.kind == arch_kind_intel
48 # define is_amd __cpu_features.kind == arch_kind_amd
49 # define max_cpuid __cpu_features.max_cpuid
50 #else
51 /* This spells out "GenuineIntel". */
52 # define is_intel \
53 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
54 /* This spells out "AuthenticAMD". */
55 # define is_amd \
56 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
57 #endif
58
59 static const struct intel_02_cache_info
60 {
61 unsigned char idx;
62 unsigned char assoc;
63 unsigned char linesize;
64 unsigned char rel_name;
65 unsigned int size;
66 } intel_02_known [] =
67 {
68 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
69 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
70 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
71 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
72 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
73 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
75 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
76 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
77 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
78 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
79 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
80 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
81 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
82 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
83 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
84 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
85 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
86 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
87 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
88 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
90 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
91 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
92 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
93 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
94 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
95 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
96 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
97 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
98 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
99 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
100 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
101 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
102 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
103 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
104 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
105 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
106 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
107 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
108 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
109 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
110 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
111 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
112 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
113 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
114 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
115 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
116 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
117 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
118 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
119 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
120 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
121 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
122 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
123 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
124 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
125 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
126 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
127 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
128 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
129 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
130 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
131 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
132 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
133 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
134 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
135 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
136 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
137 };
138
139 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
140
141 static int
142 intel_02_known_compare (const void *p1, const void *p2)
143 {
144 const struct intel_02_cache_info *i1;
145 const struct intel_02_cache_info *i2;
146
147 i1 = (const struct intel_02_cache_info *) p1;
148 i2 = (const struct intel_02_cache_info *) p2;
149
150 if (i1->idx == i2->idx)
151 return 0;
152
153 return i1->idx < i2->idx ? -1 : 1;
154 }
155
156
157 static long int
158 __attribute__ ((noinline))
159 intel_check_word (int name, unsigned int value, bool *has_level_2,
160 bool *no_level_2_or_3)
161 {
162 if ((value & 0x80000000) != 0)
163 /* The register value is reserved. */
164 return 0;
165
166 /* Fold the name. The _SC_ constants are always in the order SIZE,
167 ASSOC, LINESIZE. */
168 int folded_rel_name = (M(name) / 3) * 3;
169
170 while (value != 0)
171 {
172 unsigned int byte = value & 0xff;
173
174 if (byte == 0x40)
175 {
176 *no_level_2_or_3 = true;
177
178 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 /* No need to look further. */
180 break;
181 }
182 else if (byte == 0xff)
183 {
184 /* CPUID leaf 0x4 contains all the information. We need to
185 iterate over it. */
186 unsigned int eax;
187 unsigned int ebx;
188 unsigned int ecx;
189 unsigned int edx;
190
191 unsigned int round = 0;
192 while (1)
193 {
194 asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
195 : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
196 : "0" (4), "2" (round));
197
198 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
199 if (type == null)
200 /* That was the end. */
201 break;
202
203 unsigned int level = (eax >> 5) & 0x7;
204
205 if ((level == 1 && type == data
206 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
207 || (level == 1 && type == inst
208 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
209 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
210 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
211 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
212 {
213 unsigned int offset = M(name) - folded_rel_name;
214
215 if (offset == 0)
216 /* Cache size. */
217 return (((ebx >> 22) + 1)
218 * (((ebx >> 12) & 0x3ff) + 1)
219 * ((ebx & 0xfff) + 1)
220 * (ecx + 1));
221 if (offset == 1)
222 return (ebx >> 22) + 1;
223
224 assert (offset == 2);
225 return (ebx & 0xfff) + 1;
226 }
227
228 ++round;
229 }
230 /* There is no other cache information anywhere else. */
231 break;
232 }
233 else
234 {
235 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
236 {
237 /* Intel reused this value. For family 15, model 6 it
238 specifies the 3rd level cache. Otherwise the 2nd
239 level cache. */
240 unsigned int family;
241 unsigned int model;
242 #ifdef USE_MULTIARCH
243 family = __cpu_features.family;
244 model = __cpu_features.model;
245 #else
246 unsigned int eax;
247 unsigned int ebx;
248 unsigned int ecx;
249 unsigned int edx;
250 __cpuid (1, eax, ebx, ecx, edx);
251
252 family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
253 model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
254 #endif
255
256 if (family == 15 && model == 6)
257 {
258 /* The level 3 cache is encoded for this model like
259 the level 2 cache is for other models. Pretend
260 the caller asked for the level 2 cache. */
261 name = (_SC_LEVEL2_CACHE_SIZE
262 + (name - _SC_LEVEL3_CACHE_SIZE));
263 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
264 }
265 }
266
267 struct intel_02_cache_info *found;
268 struct intel_02_cache_info search;
269
270 search.idx = byte;
271 found = bsearch (&search, intel_02_known, nintel_02_known,
272 sizeof (intel_02_known[0]), intel_02_known_compare);
273 if (found != NULL)
274 {
275 if (found->rel_name == folded_rel_name)
276 {
277 unsigned int offset = M(name) - folded_rel_name;
278
279 if (offset == 0)
280 /* Cache size. */
281 return found->size;
282 if (offset == 1)
283 return found->assoc;
284
285 assert (offset == 2);
286 return found->linesize;
287 }
288
289 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
290 *has_level_2 = true;
291 }
292 }
293
294 /* Next byte for the next round. */
295 value >>= 8;
296 }
297
298 /* Nothing found. */
299 return 0;
300 }
301
302
303 static long int __attribute__ ((noinline))
304 handle_intel (int name, unsigned int maxidx)
305 {
306 assert (maxidx >= 2);
307
308 /* OK, we can use the CPUID instruction to get all info about the
309 caches. */
310 unsigned int cnt = 0;
311 unsigned int max = 1;
312 long int result = 0;
313 bool no_level_2_or_3 = false;
314 bool has_level_2 = false;
315
316 while (cnt++ < max)
317 {
318 unsigned int eax;
319 unsigned int ebx;
320 unsigned int ecx;
321 unsigned int edx;
322 __cpuid (2, eax, ebx, ecx, edx);
323
324 /* The low byte of EAX in the first round contain the number of
325 rounds we have to make. At least one, the one we are already
326 doing. */
327 if (cnt == 1)
328 {
329 max = eax & 0xff;
330 eax &= 0xffffff00;
331 }
332
333 /* Process the individual registers' value. */
334 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
335 if (result != 0)
336 return result;
337
338 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
339 if (result != 0)
340 return result;
341
342 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
343 if (result != 0)
344 return result;
345
346 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
347 if (result != 0)
348 return result;
349 }
350
351 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
352 && no_level_2_or_3)
353 return -1;
354
355 return 0;
356 }
357
358
359 static long int __attribute__ ((noinline))
360 handle_amd (int name)
361 {
362 unsigned int eax;
363 unsigned int ebx;
364 unsigned int ecx;
365 unsigned int edx;
366 __cpuid (0x80000000, eax, ebx, ecx, edx);
367
368 /* No level 4 cache (yet). */
369 if (name > _SC_LEVEL3_CACHE_LINESIZE)
370 return 0;
371
372 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
373 if (eax < fn)
374 return 0;
375
376 __cpuid (fn, eax, ebx, ecx, edx);
377
378 if (name < _SC_LEVEL1_DCACHE_SIZE)
379 {
380 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
381 ecx = edx;
382 }
383
384 switch (name)
385 {
386 case _SC_LEVEL1_DCACHE_SIZE:
387 return (ecx >> 14) & 0x3fc00;
388
389 case _SC_LEVEL1_DCACHE_ASSOC:
390 ecx >>= 16;
391 if ((ecx & 0xff) == 0xff)
392 /* Fully associative. */
393 return (ecx << 2) & 0x3fc00;
394 return ecx & 0xff;
395
396 case _SC_LEVEL1_DCACHE_LINESIZE:
397 return ecx & 0xff;
398
399 case _SC_LEVEL2_CACHE_SIZE:
400 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
401
402 case _SC_LEVEL2_CACHE_ASSOC:
403 switch ((ecx >> 12) & 0xf)
404 {
405 case 0:
406 case 1:
407 case 2:
408 case 4:
409 return (ecx >> 12) & 0xf;
410 case 6:
411 return 8;
412 case 8:
413 return 16;
414 case 10:
415 return 32;
416 case 11:
417 return 48;
418 case 12:
419 return 64;
420 case 13:
421 return 96;
422 case 14:
423 return 128;
424 case 15:
425 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
426 default:
427 return 0;
428 }
429 /* NOTREACHED */
430
431 case _SC_LEVEL2_CACHE_LINESIZE:
432 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
433
434 case _SC_LEVEL3_CACHE_SIZE:
435 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
436
437 case _SC_LEVEL3_CACHE_ASSOC:
438 switch ((edx >> 12) & 0xf)
439 {
440 case 0:
441 case 1:
442 case 2:
443 case 4:
444 return (edx >> 12) & 0xf;
445 case 6:
446 return 8;
447 case 8:
448 return 16;
449 case 10:
450 return 32;
451 case 11:
452 return 48;
453 case 12:
454 return 64;
455 case 13:
456 return 96;
457 case 14:
458 return 128;
459 case 15:
460 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
461 default:
462 return 0;
463 }
464 /* NOTREACHED */
465
466 case _SC_LEVEL3_CACHE_LINESIZE:
467 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
468
469 default:
470 assert (! "cannot happen");
471 }
472 return -1;
473 }
474
475
476 /* Get the value of the system variable NAME. */
477 long int
478 attribute_hidden
479 __cache_sysconf (int name)
480 {
481 #ifdef USE_MULTIARCH
482 if (__cpu_features.kind == arch_kind_unknown)
483 __init_cpu_features ();
484 #else
485 /* Find out what brand of processor. */
486 unsigned int max_cpuid;
487 unsigned int ebx;
488 unsigned int ecx;
489 unsigned int edx;
490 __cpuid (0, max_cpuid, ebx, ecx, edx);
491 #endif
492
493 if (is_intel)
494 return handle_intel (name, max_cpuid);
495
496 if (is_amd)
497 return handle_amd (name);
498
499 // XXX Fill in more vendors.
500
501 /* CPU not known, we have no information. */
502 return 0;
503 }
504
505
506 /* Data cache size for use in memory and string routines, typically
507 L1 size, rounded to multiple of 256 bytes. */
508 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
509 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
510 /* Similar to __x86_data_cache_size_half, but not rounded. */
511 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
512 /* Similar to __x86_data_cache_size, but not rounded. */
513 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
514 /* Shared cache size for use in memory and string routines, typically
515 L2 or L3 size, rounded to multiple of 256 bytes. */
516 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
517 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
518 /* Similar to __x86_shared_cache_size_half, but not rounded. */
519 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
520 /* Similar to __x86_shared_cache_size, but not rounded. */
521 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
522
523 #ifndef DISABLE_PREFETCHW
524 /* PREFETCHW support flag for use in memory and string routines. */
525 int __x86_prefetchw attribute_hidden;
526 #endif
527
528 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
529 /* Instructions preferred for memory and string routines.
530
531 0: Regular instructions
532 1: MMX instructions
533 2: SSE2 instructions
534 3: SSSE3 instructions
535
536 */
537 int __x86_preferred_memory_instruction attribute_hidden;
538 #endif
539
540
541 static void
542 __attribute__((constructor))
543 init_cacheinfo (void)
544 {
545 /* Find out what brand of processor. */
546 unsigned int eax;
547 unsigned int ebx;
548 unsigned int ecx;
549 unsigned int edx;
550 int max_cpuid_ex;
551 long int data = -1;
552 long int shared = -1;
553 unsigned int level;
554 unsigned int threads = 0;
555
556 #ifdef USE_MULTIARCH
557 if (__cpu_features.kind == arch_kind_unknown)
558 __init_cpu_features ();
559 #else
560 int max_cpuid;
561 __cpuid (0, max_cpuid, ebx, ecx, edx);
562 #endif
563
564 if (is_intel)
565 {
566 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
567
568 /* Try L3 first. */
569 level = 3;
570 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
571
572 if (shared <= 0)
573 {
574 /* Try L2 otherwise. */
575 level = 2;
576 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
577 }
578
579 unsigned int ebx_1;
580
581 #ifdef USE_MULTIARCH
582 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
583 ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
584 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
585 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
586 #else
587 __cpuid (1, eax, ebx_1, ecx, edx);
588 #endif
589
590 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
591 /* Intel prefers SSSE3 instructions for memory/string routines
592 if they are available. */
593 if ((ecx & 0x200))
594 __x86_preferred_memory_instruction = 3;
595 else
596 __x86_preferred_memory_instruction = 2;
597 #endif
598
599 /* Figure out the number of logical threads that share the
600 highest cache level. */
601 if (max_cpuid >= 4)
602 {
603 int i = 0;
604
605 /* Query until desired cache level is enumerated. */
606 do
607 {
608 __cpuid_count (4, i++, eax, ebx, ecx, edx);
609
610 /* There seems to be a bug in at least some Pentium Ds
611 which sometimes fail to iterate all cache parameters.
612 Do not loop indefinitely here, stop in this case and
613 assume there is no such information. */
614 if ((eax & 0x1f) == 0)
615 goto intel_bug_no_cache_info;
616 }
617 while (((eax >> 5) & 0x7) != level);
618
619 threads = (eax >> 14) & 0x3ff;
620
621 /* If max_cpuid >= 11, THREADS is the maximum number of
622 addressable IDs for logical processors sharing the
623 cache, instead of the maximum number of threads
624 sharing the cache. */
625 if (threads && max_cpuid >= 11)
626 {
627 /* Find the number of logical processors shipped in
628 one core and apply count mask. */
629 i = 0;
630 while (1)
631 {
632 __cpuid_count (11, i++, eax, ebx, ecx, edx);
633
634 int shipped = ebx & 0xff;
635 int type = ecx & 0xff0;
636 if (shipped == 0 || type == 0)
637 break;
638 else if (type == 0x200)
639 {
640 int count_mask;
641
642 /* Compute count mask. */
643 asm ("bsr %1, %0"
644 : "=r" (count_mask) : "g" (threads));
645 count_mask = ~(-1 << (count_mask + 1));
646 threads = (shipped - 1) & count_mask;
647 break;
648 }
649 }
650 }
651 threads += 1;
652 }
653 else
654 {
655 intel_bug_no_cache_info:
656 /* Assume that all logical threads share the highest cache level. */
657
658 threads = (ebx_1 >> 16) & 0xff;
659 }
660
661 /* Cap usage of highest cache level to the number of supported
662 threads. */
663 if (shared > 0 && threads > 0)
664 shared /= threads;
665 }
666 /* This spells out "AuthenticAMD". */
667 else if (is_amd)
668 {
669 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
670 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
671 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
672
673 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
674 # ifdef USE_MULTIARCH
675 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
676 ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
677 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
678 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
679 # else
680 __cpuid (1, eax, ebx, ecx, edx);
681 # endif
682
683 /* AMD prefers SSSE3 instructions for memory/string routines
684 if they are avaiable, otherwise it prefers integer
685 instructions. */
686 if ((ecx & 0x200))
687 __x86_preferred_memory_instruction = 3;
688 else
689 __x86_preferred_memory_instruction = 0;
690 #endif
691
692 /* Get maximum extended function. */
693 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
694
695 if (shared <= 0)
696 /* No shared L3 cache. All we have is the L2 cache. */
697 shared = core;
698 else
699 {
700 /* Figure out the number of logical threads that share L3. */
701 if (max_cpuid_ex >= 0x80000008)
702 {
703 /* Get width of APIC ID. */
704 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
705 threads = 1 << ((ecx >> 12) & 0x0f);
706 }
707
708 if (threads == 0)
709 {
710 /* If APIC ID width is not available, use logical
711 processor count. */
712 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
713
714 if ((edx & (1 << 28)) != 0)
715 threads = (ebx >> 16) & 0xff;
716 }
717
718 /* Cap usage of highest cache level to the number of
719 supported threads. */
720 if (threads > 0)
721 shared /= threads;
722
723 /* Account for exclusive L2 and L3 caches. */
724 shared += core;
725 }
726
727 #ifndef DISABLE_PREFETCHW
728 if (max_cpuid_ex >= 0x80000001)
729 {
730 __cpuid (0x80000001, eax, ebx, ecx, edx);
731 /* PREFETCHW || 3DNow! */
732 if ((ecx & 0x100) || (edx & 0x80000000))
733 __x86_prefetchw = -1;
734 }
735 #endif
736 }
737
738 if (data > 0)
739 {
740 __x86_raw_data_cache_size_half = data / 2;
741 __x86_raw_data_cache_size = data;
742 /* Round data cache size to multiple of 256 bytes. */
743 data = data & ~255L;
744 __x86_data_cache_size_half = data / 2;
745 __x86_data_cache_size = data;
746 }
747
748 if (shared > 0)
749 {
750 __x86_raw_shared_cache_size_half = shared / 2;
751 __x86_raw_shared_cache_size = shared;
752 /* Round shared cache size to multiple of 256 bytes. */
753 shared = shared & ~255L;
754 __x86_shared_cache_size_half = shared / 2;
755 __x86_shared_cache_size = shared;
756 }
757 }