]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/cacheinfo.c
X86-64: Use non-temporal store in memcpy on large data
[thirdparty/glibc.git] / sysdeps / x86_64 / cacheinfo.c
CommitLineData
6d59823c 1/* x86_64 cache info.
f7a9f785 2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
bfe6f5fa
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
bfe6f5fa
UD
18
19#include <assert.h>
20#include <stdbool.h>
21#include <stdlib.h>
22#include <unistd.h>
6f6f1215 23#include <cpuid.h>
1ae6c72d 24#include <init-arch.h>
6f6f1215 25
e2e4f560
L
26#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
27#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
28#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
425ce2ed 29
bfe6f5fa
UD
30static const struct intel_02_cache_info
31{
1de0c161
UD
32 unsigned char idx;
33 unsigned char assoc;
34 unsigned char linesize;
35 unsigned char rel_name;
36 unsigned int size;
bfe6f5fa
UD
37} intel_02_known [] =
38 {
1de0c161
UD
39#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
40 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
41 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
42 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
43 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
44 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
45 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
3af48cbd 46 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
1de0c161
UD
47 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
48 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
49 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
50 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
51 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
52 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
53 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
54 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
55 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
56 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
57 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
58 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
59 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
60 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
61 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
62 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
63 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
64 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
65 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
66 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
67 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
68 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
69 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
70 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
71 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
72 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
73 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
74 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
75 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
77 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
78 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
79 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
80 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
81 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
82 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
83 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
84 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
85 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
3af48cbd 86 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
1de0c161
UD
87 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
88 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
90 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
91 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
92 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
93 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
94 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
95 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
96 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
97 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
98 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
99 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
100 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
101 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
dedc7c7b 102 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
1de0c161
UD
103 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
104 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
3e9099b4
UD
105 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
106 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
107 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
bfe6f5fa
UD
108 };
109
110#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
111
112static int
113intel_02_known_compare (const void *p1, const void *p2)
114{
115 const struct intel_02_cache_info *i1;
116 const struct intel_02_cache_info *i2;
117
118 i1 = (const struct intel_02_cache_info *) p1;
119 i2 = (const struct intel_02_cache_info *) p2;
120
121 if (i1->idx == i2->idx)
122 return 0;
123
124 return i1->idx < i2->idx ? -1 : 1;
125}
126
127
128static long int
129__attribute__ ((noinline))
130intel_check_word (int name, unsigned int value, bool *has_level_2,
131 bool *no_level_2_or_3)
132{
133 if ((value & 0x80000000) != 0)
134 /* The register value is reserved. */
135 return 0;
136
137 /* Fold the name. The _SC_ constants are always in the order SIZE,
138 ASSOC, LINESIZE. */
1de0c161 139 int folded_rel_name = (M(name) / 3) * 3;
bfe6f5fa
UD
140
141 while (value != 0)
142 {
143 unsigned int byte = value & 0xff;
144
145 if (byte == 0x40)
146 {
147 *no_level_2_or_3 = true;
148
1de0c161 149 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
bfe6f5fa
UD
150 /* No need to look further. */
151 break;
152 }
2a115601
UD
153 else if (byte == 0xff)
154 {
155 /* CPUID leaf 0x4 contains all the information. We need to
156 iterate over it. */
157 unsigned int eax;
158 unsigned int ebx;
159 unsigned int ecx;
160 unsigned int edx;
161
162 unsigned int round = 0;
163 while (1)
164 {
b4acef1f 165 __cpuid_count (4, round, eax, ebx, ecx, edx);
2a115601
UD
166
167 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
168 if (type == null)
169 /* That was the end. */
170 break;
171
172 unsigned int level = (eax >> 5) & 0x7;
173
174 if ((level == 1 && type == data
175 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
176 || (level == 1 && type == inst
177 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
178 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
179 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
180 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
181 {
182 unsigned int offset = M(name) - folded_rel_name;
183
184 if (offset == 0)
185 /* Cache size. */
186 return (((ebx >> 22) + 1)
187 * (((ebx >> 12) & 0x3ff) + 1)
188 * ((ebx & 0xfff) + 1)
189 * (ecx + 1));
190 if (offset == 1)
191 return (ebx >> 22) + 1;
192
193 assert (offset == 2);
194 return (ebx & 0xfff) + 1;
195 }
bb242059
UD
196
197 ++round;
2a115601
UD
198 }
199 /* There is no other cache information anywhere else. */
200 break;
201 }
bfe6f5fa
UD
202 else
203 {
1de0c161 204 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
bfe6f5fa
UD
205 {
206 /* Intel reused this value. For family 15, model 6 it
207 specifies the 3rd level cache. Otherwise the 2nd
208 level cache. */
e2e4f560
L
209 unsigned int family = GLRO(dl_x86_cpu_features).family;
210 unsigned int model = GLRO(dl_x86_cpu_features).model;
b2509a1e 211
bfe6f5fa
UD
212 if (family == 15 && model == 6)
213 {
214 /* The level 3 cache is encoded for this model like
215 the level 2 cache is for other models. Pretend
216 the caller asked for the level 2 cache. */
217 name = (_SC_LEVEL2_CACHE_SIZE
218 + (name - _SC_LEVEL3_CACHE_SIZE));
1de0c161 219 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
bfe6f5fa
UD
220 }
221 }
222
223 struct intel_02_cache_info *found;
224 struct intel_02_cache_info search;
225
226 search.idx = byte;
227 found = bsearch (&search, intel_02_known, nintel_02_known,
228 sizeof (intel_02_known[0]), intel_02_known_compare);
229 if (found != NULL)
230 {
1de0c161 231 if (found->rel_name == folded_rel_name)
bfe6f5fa 232 {
1de0c161 233 unsigned int offset = M(name) - folded_rel_name;
bfe6f5fa
UD
234
235 if (offset == 0)
236 /* Cache size. */
237 return found->size;
238 if (offset == 1)
239 return found->assoc;
240
241 assert (offset == 2);
242 return found->linesize;
243 }
244
1de0c161 245 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
bfe6f5fa
UD
246 *has_level_2 = true;
247 }
248 }
249
250 /* Next byte for the next round. */
251 value >>= 8;
252 }
253
254 /* Nothing found. */
255 return 0;
256}
257
258
259static long int __attribute__ ((noinline))
260handle_intel (int name, unsigned int maxidx)
261{
262 assert (maxidx >= 2);
263
264 /* OK, we can use the CPUID instruction to get all info about the
265 caches. */
266 unsigned int cnt = 0;
267 unsigned int max = 1;
268 long int result = 0;
269 bool no_level_2_or_3 = false;
270 bool has_level_2 = false;
6d59823c 271
bfe6f5fa
UD
272 while (cnt++ < max)
273 {
274 unsigned int eax;
275 unsigned int ebx;
276 unsigned int ecx;
277 unsigned int edx;
6f6f1215 278 __cpuid (2, eax, ebx, ecx, edx);
bfe6f5fa
UD
279
280 /* The low byte of EAX in the first round contain the number of
281 rounds we have to make. At least one, the one we are already
282 doing. */
283 if (cnt == 1)
284 {
285 max = eax & 0xff;
286 eax &= 0xffffff00;
287 }
288
289 /* Process the individual registers' value. */
290 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
291 if (result != 0)
292 return result;
293
294 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
295 if (result != 0)
296 return result;
297
298 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
299 if (result != 0)
300 return result;
301
302 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
303 if (result != 0)
304 return result;
305 }
306
307 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
308 && no_level_2_or_3)
309 return -1;
310
311 return 0;
312}
313
314
315static long int __attribute__ ((noinline))
316handle_amd (int name)
317{
318 unsigned int eax;
319 unsigned int ebx;
320 unsigned int ecx;
321 unsigned int edx;
6f6f1215 322 __cpuid (0x80000000, eax, ebx, ecx, edx);
bfe6f5fa 323
80e7d6a6
UD
324 /* No level 4 cache (yet). */
325 if (name > _SC_LEVEL3_CACHE_LINESIZE)
bfe6f5fa
UD
326 return 0;
327
328 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
329 if (eax < fn)
330 return 0;
331
6f6f1215 332 __cpuid (fn, eax, ebx, ecx, edx);
bfe6f5fa
UD
333
334 if (name < _SC_LEVEL1_DCACHE_SIZE)
335 {
336 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
337 ecx = edx;
338 }
339
340 switch (name)
341 {
342 case _SC_LEVEL1_DCACHE_SIZE:
343 return (ecx >> 14) & 0x3fc00;
80e7d6a6 344
bfe6f5fa
UD
345 case _SC_LEVEL1_DCACHE_ASSOC:
346 ecx >>= 16;
347 if ((ecx & 0xff) == 0xff)
348 /* Fully associative. */
349 return (ecx << 2) & 0x3fc00;
350 return ecx & 0xff;
80e7d6a6 351
bfe6f5fa
UD
352 case _SC_LEVEL1_DCACHE_LINESIZE:
353 return ecx & 0xff;
80e7d6a6 354
bfe6f5fa
UD
355 case _SC_LEVEL2_CACHE_SIZE:
356 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
80e7d6a6 357
bfe6f5fa 358 case _SC_LEVEL2_CACHE_ASSOC:
76fca9f1 359 switch ((ecx >> 12) & 0xf)
7e4ba49c
HJ
360 {
361 case 0:
362 case 1:
363 case 2:
364 case 4:
76fca9f1 365 return (ecx >> 12) & 0xf;
bfe6f5fa
UD
366 case 6:
367 return 8;
368 case 8:
369 return 16;
80e7d6a6
UD
370 case 10:
371 return 32;
372 case 11:
373 return 48;
374 case 12:
375 return 64;
376 case 13:
377 return 96;
378 case 14:
379 return 128;
380 case 15:
76fca9f1 381 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
bfe6f5fa
UD
382 default:
383 return 0;
7e4ba49c 384 }
80e7d6a6
UD
385 /* NOTREACHED */
386
bfe6f5fa
UD
387 case _SC_LEVEL2_CACHE_LINESIZE:
388 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
80e7d6a6
UD
389
390 case _SC_LEVEL3_CACHE_SIZE:
391 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
392
393 case _SC_LEVEL3_CACHE_ASSOC:
76fca9f1 394 switch ((edx >> 12) & 0xf)
80e7d6a6
UD
395 {
396 case 0:
397 case 1:
398 case 2:
399 case 4:
76fca9f1 400 return (edx >> 12) & 0xf;
80e7d6a6
UD
401 case 6:
402 return 8;
403 case 8:
404 return 16;
405 case 10:
406 return 32;
407 case 11:
408 return 48;
409 case 12:
410 return 64;
411 case 13:
412 return 96;
413 case 14:
414 return 128;
415 case 15:
76fca9f1 416 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
80e7d6a6
UD
417 default:
418 return 0;
419 }
420 /* NOTREACHED */
421
422 case _SC_LEVEL3_CACHE_LINESIZE:
423 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
424
bfe6f5fa
UD
425 default:
426 assert (! "cannot happen");
427 }
428 return -1;
429}
430
431
432/* Get the value of the system variable NAME. */
433long int
434attribute_hidden
435__cache_sysconf (int name)
436{
b2509a1e
UD
437 if (is_intel)
438 return handle_intel (name, max_cpuid);
bfe6f5fa 439
b2509a1e 440 if (is_amd)
bfe6f5fa
UD
441 return handle_amd (name);
442
443 // XXX Fill in more vendors.
444
445 /* CPU not known, we have no information. */
446 return 0;
447}
448
449
3af48cbd 450/* Data cache size for use in memory and string routines, typically
c0dde15b 451 L1 size, rounded to multiple of 256 bytes. */
afec409a
L
452long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
453long int __x86_data_cache_size attribute_hidden = 32 * 1024;
454/* Similar to __x86_data_cache_size_half, but not rounded. */
455long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
456/* Similar to __x86_data_cache_size, but not rounded. */
457long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
6d59823c 458/* Shared cache size for use in memory and string routines, typically
c0dde15b 459 L2 or L3 size, rounded to multiple of 256 bytes. */
afec409a
L
460long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
461long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
462/* Similar to __x86_shared_cache_size_half, but not rounded. */
463long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
464/* Similar to __x86_shared_cache_size, but not rounded. */
465long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
6f6f1215 466
a057f5f8
L
467/* Threshold to use non temporal store. */
468long int __x86_shared_non_temporal_threshold attribute_hidden;
469
6f6f1215 470#ifndef DISABLE_PREFETCHW
0435403c 471/* PREFETCHW support flag for use in memory and string routines. */
afec409a 472int __x86_prefetchw attribute_hidden;
6f6f1215 473#endif
bfe6f5fa
UD
474
475
476static void
477__attribute__((constructor))
478init_cacheinfo (void)
479{
480 /* Find out what brand of processor. */
481 unsigned int eax;
482 unsigned int ebx;
483 unsigned int ecx;
484 unsigned int edx;
bfe6f5fa 485 int max_cpuid_ex;
0435403c 486 long int data = -1;
bfe6f5fa
UD
487 long int shared = -1;
488 unsigned int level;
489 unsigned int threads = 0;
490
425ce2ed 491 if (is_intel)
bfe6f5fa 492 {
0435403c 493 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
6d59823c 494
0435403c 495 /* Try L3 first. */
bfe6f5fa 496 level = 3;
6d59823c
UD
497 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
498
bfe6f5fa 499 if (shared <= 0)
7e4ba49c 500 {
0435403c 501 /* Try L2 otherwise. */
7e4ba49c
HJ
502 level = 2;
503 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
bfe6f5fa 504 }
6d59823c
UD
505
506 /* Figure out the number of logical threads that share the
0435403c 507 highest cache level. */
bfe6f5fa 508 if (max_cpuid >= 4)
7e4ba49c 509 {
e2e4f560
L
510 unsigned int family = GLRO(dl_x86_cpu_features).family;
511 unsigned int model = GLRO(dl_x86_cpu_features).model;
512
bfe6f5fa 513 int i = 0;
6d59823c 514
0435403c 515 /* Query until desired cache level is enumerated. */
bfe6f5fa
UD
516 do
517 {
6f6f1215 518 __cpuid_count (4, i++, eax, ebx, ecx, edx);
5a01ab7b
UD
519
520 /* There seems to be a bug in at least some Pentium Ds
521 which sometimes fail to iterate all cache parameters.
522 Do not loop indefinitely here, stop in this case and
523 assume there is no such information. */
524 if ((eax & 0x1f) == 0)
525 goto intel_bug_no_cache_info;
bfe6f5fa 526 }
7e4ba49c 527 while (((eax >> 5) & 0x7) != level);
6d59823c 528
a546baa9 529 threads = (eax >> 14) & 0x3ff;
3aa2588d 530
a546baa9
L
531 /* If max_cpuid >= 11, THREADS is the maximum number of
532 addressable IDs for logical processors sharing the
533 cache, instead of the maximum number of threads
534 sharing the cache. */
535 if (threads && max_cpuid >= 11)
536 {
537 /* Find the number of logical processors shipped in
538 one core and apply count mask. */
539 i = 0;
540 while (1)
541 {
542 __cpuid_count (11, i++, eax, ebx, ecx, edx);
543
544 int shipped = ebx & 0xff;
545 int type = ecx & 0xff0;
546 if (shipped == 0 || type == 0)
547 break;
548 else if (type == 0x200)
549 {
550 int count_mask;
551
552 /* Compute count mask. */
553 asm ("bsr %1, %0"
554 : "=r" (count_mask) : "g" (threads));
555 count_mask = ~(-1 << (count_mask + 1));
556 threads = (shipped - 1) & count_mask;
557 break;
558 }
559 }
560 }
561 threads += 1;
a3d9ab50
L
562 if (threads > 2 && level == 2 && family == 6)
563 {
a3d9ab50
L
564 switch (model)
565 {
566 case 0x57:
567 /* Knights Landing has L2 cache shared by 2 cores. */
568 case 0x37:
569 case 0x4a:
570 case 0x4d:
571 case 0x5a:
572 case 0x5d:
573 /* Silvermont has L2 cache shared by 2 cores. */
574 threads = 2;
575 break;
576 default:
577 break;
578 }
579 }
bfe6f5fa
UD
580 }
581 else
7e4ba49c 582 {
5a01ab7b 583 intel_bug_no_cache_info:
0435403c 584 /* Assume that all logical threads share the highest cache level. */
6d59823c 585
e2e4f560
L
586 threads
587 = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
588 >> 16) & 0xff);
bfe6f5fa 589 }
6d59823c
UD
590
591 /* Cap usage of highest cache level to the number of supported
0435403c 592 threads. */
bfe6f5fa 593 if (shared > 0 && threads > 0)
7e4ba49c 594 shared /= threads;
bfe6f5fa
UD
595 }
596 /* This spells out "AuthenticAMD". */
425ce2ed 597 else if (is_amd)
bfe6f5fa 598 {
0435403c
UD
599 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
600 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
601 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
6d59823c 602
0435403c 603 /* Get maximum extended function. */
6f6f1215 604 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
bfe6f5fa 605
0435403c
UD
606 if (shared <= 0)
607 /* No shared L3 cache. All we have is the L2 cache. */
608 shared = core;
609 else
610 {
611 /* Figure out the number of logical threads that share L3. */
612 if (max_cpuid_ex >= 0x80000008)
613 {
614 /* Get width of APIC ID. */
6f6f1215 615 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
0435403c
UD
616 threads = 1 << ((ecx >> 12) & 0x0f);
617 }
618
619 if (threads == 0)
620 {
621 /* If APIC ID width is not available, use logical
622 processor count. */
6f6f1215 623 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
0435403c
UD
624
625 if ((edx & (1 << 28)) != 0)
626 threads = (ebx >> 16) & 0xff;
627 }
628
629 /* Cap usage of highest cache level to the number of
630 supported threads. */
631 if (threads > 0)
632 shared /= threads;
633
634 /* Account for exclusive L2 and L3 caches. */
635 shared += core;
636 }
637
6f6f1215 638#ifndef DISABLE_PREFETCHW
bfe6f5fa
UD
639 if (max_cpuid_ex >= 0x80000001)
640 {
6f6f1215 641 __cpuid (0x80000001, eax, ebx, ecx, edx);
0435403c 642 /* PREFETCHW || 3DNow! */
bfe6f5fa 643 if ((ecx & 0x100) || (edx & 0x80000000))
afec409a 644 __x86_prefetchw = -1;
bfe6f5fa 645 }
6f6f1215 646#endif
bfe6f5fa
UD
647 }
648
0435403c 649 if (data > 0)
3af48cbd 650 {
afec409a
L
651 __x86_raw_data_cache_size_half = data / 2;
652 __x86_raw_data_cache_size = data;
c0dde15b
UD
653 /* Round data cache size to multiple of 256 bytes. */
654 data = data & ~255L;
afec409a
L
655 __x86_data_cache_size_half = data / 2;
656 __x86_data_cache_size = data;
3af48cbd 657 }
bfe6f5fa
UD
658
659 if (shared > 0)
e2b393bc 660 {
afec409a
L
661 __x86_raw_shared_cache_size_half = shared / 2;
662 __x86_raw_shared_cache_size = shared;
c0dde15b
UD
663 /* Round shared cache size to multiple of 256 bytes. */
664 shared = shared & ~255L;
afec409a
L
665 __x86_shared_cache_size_half = shared / 2;
666 __x86_shared_cache_size = shared;
e2b393bc 667 }
a057f5f8
L
668
669 /* The large memcpy micro benchmark in glibc shows that 6 times of
670 shared cache size is the approximate value above which non-temporal
671 store becomes faster. */
672 __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;
bfe6f5fa 673}