]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/driver-i386.c
Add TIGERLAKE and COOPERLAKE to GCC.
[thirdparty/gcc.git] / gcc / config / i386 / driver-i386.c
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26
27 const char *host_detect_local_cpu (int argc, const char **argv);
28
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30 #include "cpuid.h"
31
32 struct cache_desc
33 {
34 unsigned sizekb;
35 unsigned assoc;
36 unsigned line;
37 };
38
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
41
42 static char *
43 describe_cache (struct cache_desc level1, struct cache_desc level2)
44 {
45 char size[100], line[100], size2[100];
46
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
49
50 snprintf (size, sizeof (size),
51 "--param l1-cache-size=%u ", level1.sizekb);
52 snprintf (line, sizeof (line),
53 "--param l1-cache-line-size=%u ", level1.line);
54
55 snprintf (size2, sizeof (size2),
56 "--param l2-cache-size=%u ", level2.sizekb);
57
58 return concat (size, line, size2, NULL);
59 }
60
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
62
63 static void
64 detect_l2_cache (struct cache_desc *level2)
65 {
66 unsigned eax, ebx, ecx, edx;
67 unsigned assoc;
68
69 __cpuid (0x80000006, eax, ebx, ecx, edx);
70
71 level2->sizekb = (ecx >> 16) & 0xffff;
72 level2->line = ecx & 0xff;
73
74 assoc = (ecx >> 12) & 0xf;
75 if (assoc == 6)
76 assoc = 8;
77 else if (assoc == 8)
78 assoc = 16;
79 else if (assoc >= 0xa && assoc <= 0xc)
80 assoc = 32 + (assoc - 0xa) * 16;
81 else if (assoc >= 0xd && assoc <= 0xe)
82 assoc = 96 + (assoc - 0xd) * 32;
83
84 level2->assoc = assoc;
85 }
86
87 /* Returns the description of caches for an AMD processor. */
88
89 static const char *
90 detect_caches_amd (unsigned max_ext_level)
91 {
92 unsigned eax, ebx, ecx, edx;
93
94 struct cache_desc level1, level2 = {0, 0, 0};
95
96 if (max_ext_level < 0x80000005)
97 return "";
98
99 __cpuid (0x80000005, eax, ebx, ecx, edx);
100
101 level1.sizekb = (ecx >> 24) & 0xff;
102 level1.assoc = (ecx >> 16) & 0xff;
103 level1.line = ecx & 0xff;
104
105 if (max_ext_level >= 0x80000006)
106 detect_l2_cache (&level2);
107
108 return describe_cache (level1, level2);
109 }
110
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
115
116 static void
117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 struct cache_desc *level1, struct cache_desc *level2)
119 {
120 int i;
121
122 for (i = 24; i >= 0; i -= 8)
123 switch ((reg >> i) & 0xff)
124 {
125 case 0x0a:
126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127 break;
128 case 0x0c:
129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130 break;
131 case 0x0d:
132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133 break;
134 case 0x0e:
135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136 break;
137 case 0x21:
138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139 break;
140 case 0x24:
141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142 break;
143 case 0x2c:
144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145 break;
146 case 0x39:
147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148 break;
149 case 0x3a:
150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151 break;
152 case 0x3b:
153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154 break;
155 case 0x3c:
156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157 break;
158 case 0x3d:
159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160 break;
161 case 0x3e:
162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163 break;
164 case 0x41:
165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166 break;
167 case 0x42:
168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169 break;
170 case 0x43:
171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172 break;
173 case 0x44:
174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175 break;
176 case 0x45:
177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178 break;
179 case 0x48:
180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181 break;
182 case 0x49:
183 if (xeon_mp)
184 break;
185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186 break;
187 case 0x4e:
188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189 break;
190 case 0x60:
191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192 break;
193 case 0x66:
194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195 break;
196 case 0x67:
197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198 break;
199 case 0x68:
200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201 break;
202 case 0x78:
203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204 break;
205 case 0x79:
206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207 break;
208 case 0x7a:
209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210 break;
211 case 0x7b:
212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213 break;
214 case 0x7c:
215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216 break;
217 case 0x7d:
218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219 break;
220 case 0x7f:
221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222 break;
223 case 0x80:
224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225 break;
226 case 0x82:
227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228 break;
229 case 0x83:
230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231 break;
232 case 0x84:
233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234 break;
235 case 0x85:
236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237 break;
238 case 0x86:
239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240 break;
241 case 0x87:
242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
243
244 default:
245 break;
246 }
247 }
248
249 /* Detect cache parameters using CPUID function 2. */
250
251 static void
252 detect_caches_cpuid2 (bool xeon_mp,
253 struct cache_desc *level1, struct cache_desc *level2)
254 {
255 unsigned regs[4];
256 int nreps, i;
257
258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
259
260 nreps = regs[0] & 0x0f;
261 regs[0] &= ~0x0f;
262
263 while (--nreps >= 0)
264 {
265 for (i = 0; i < 4; i++)
266 if (regs[i] && !((regs[i] >> 31) & 1))
267 decode_caches_intel (regs[i], xeon_mp, level1, level2);
268
269 if (nreps)
270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
271 }
272 }
273
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
276
277 enum cache_type
278 {
279 CACHE_END = 0,
280 CACHE_DATA = 1,
281 CACHE_INST = 2,
282 CACHE_UNIFIED = 3
283 };
284
285 static void
286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 struct cache_desc *level3)
288 {
289 struct cache_desc *cache;
290
291 unsigned eax, ebx, ecx, edx;
292 int count;
293
294 for (count = 0;; count++)
295 {
296 __cpuid_count(4, count, eax, ebx, ecx, edx);
297 switch (eax & 0x1f)
298 {
299 case CACHE_END:
300 return;
301 case CACHE_DATA:
302 case CACHE_UNIFIED:
303 {
304 switch ((eax >> 5) & 0x07)
305 {
306 case 1:
307 cache = level1;
308 break;
309 case 2:
310 cache = level2;
311 break;
312 case 3:
313 cache = level3;
314 break;
315 default:
316 cache = NULL;
317 }
318
319 if (cache)
320 {
321 unsigned sets = ecx + 1;
322 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
323
324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 cache->line = (ebx & 0x0fff) + 1;
326
327 cache->sizekb = (cache->assoc * part
328 * cache->line * sets) / 1024;
329 }
330 }
331 default:
332 break;
333 }
334 }
335 }
336
337 /* Returns the description of caches for an Intel processor. */
338
339 static const char *
340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 unsigned max_ext_level, unsigned *l2sizekb)
342 {
343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
344
345 if (max_level >= 4)
346 detect_caches_cpuid4 (&level1, &level2, &level3);
347 else if (max_level >= 2)
348 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349 else
350 return "";
351
352 if (level1.sizekb == 0)
353 return "";
354
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
357 if (level3.sizekb)
358 level2 = level3;
359
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363 detect_l2_cache (&level2);
364
365 *l2sizekb = level2.sizekb;
366
367 return describe_cache (level1, level2);
368 }
369
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
374
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
378 for -march=native.
379
380 ARGC and ARGV are set depending on the actual arguments given
381 in the spec. */
382
383 const char *host_detect_local_cpu (int argc, const char **argv)
384 {
385 enum processor_type processor = PROCESSOR_I386;
386 const char *cpu = "i386";
387
388 const char *cache = "";
389 const char *options = "";
390
391 unsigned int eax, ebx, ecx, edx;
392
393 unsigned int max_level, ext_level;
394
395 unsigned int vendor;
396 unsigned int model, family;
397
398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
400
401 /* Extended features */
402 unsigned int has_lahf_lm = 0, has_sse4a = 0;
403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410 unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422 unsigned int has_avx512bitalg = 0;
423 unsigned int has_shstk = 0;
424 unsigned int has_avx512vnni = 0, has_vaes = 0;
425 unsigned int has_vpclmulqdq = 0;
426 unsigned int has_avx512vp2intersect = 0;
427 unsigned int has_movdiri = 0, has_movdir64b = 0;
428 unsigned int has_enqcmd = 0;
429 unsigned int has_waitpkg = 0;
430 unsigned int has_cldemote = 0;
431 unsigned int has_avx512bf16 = 0;
432
433 unsigned int has_ptwrite = 0;
434
435 bool arch;
436
437 unsigned int l2sizekb = 0;
438
439 if (argc < 1)
440 return NULL;
441
442 arch = !strcmp (argv[0], "arch");
443
444 if (!arch && strcmp (argv[0], "tune"))
445 return NULL;
446
447 max_level = __get_cpuid_max (0, &vendor);
448 if (max_level < 1)
449 goto done;
450
451 __cpuid (1, eax, ebx, ecx, edx);
452
453 model = (eax >> 4) & 0x0f;
454 family = (eax >> 8) & 0x0f;
455 if (vendor == signature_INTEL_ebx
456 || vendor == signature_AMD_ebx)
457 {
458 unsigned int extended_model, extended_family;
459
460 extended_model = (eax >> 12) & 0xf0;
461 extended_family = (eax >> 20) & 0xff;
462 if (family == 0x0f)
463 {
464 family += extended_family;
465 model += extended_model;
466 }
467 else if (family == 0x06)
468 model += extended_model;
469 }
470
471 has_sse3 = ecx & bit_SSE3;
472 has_ssse3 = ecx & bit_SSSE3;
473 has_sse4_1 = ecx & bit_SSE4_1;
474 has_sse4_2 = ecx & bit_SSE4_2;
475 has_avx = ecx & bit_AVX;
476 has_osxsave = ecx & bit_OSXSAVE;
477 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
478 has_movbe = ecx & bit_MOVBE;
479 has_popcnt = ecx & bit_POPCNT;
480 has_aes = ecx & bit_AES;
481 has_pclmul = ecx & bit_PCLMUL;
482 has_fma = ecx & bit_FMA;
483 has_f16c = ecx & bit_F16C;
484 has_rdrnd = ecx & bit_RDRND;
485 has_xsave = ecx & bit_XSAVE;
486
487 has_cmpxchg8b = edx & bit_CMPXCHG8B;
488 has_cmov = edx & bit_CMOV;
489 has_mmx = edx & bit_MMX;
490 has_fxsr = edx & bit_FXSAVE;
491 has_sse = edx & bit_SSE;
492 has_sse2 = edx & bit_SSE2;
493
494 if (max_level >= 7)
495 {
496 __cpuid_count (7, 0, eax, ebx, ecx, edx);
497
498 has_bmi = ebx & bit_BMI;
499 has_sgx = ebx & bit_SGX;
500 has_hle = ebx & bit_HLE;
501 has_rtm = ebx & bit_RTM;
502 has_avx2 = ebx & bit_AVX2;
503 has_bmi2 = ebx & bit_BMI2;
504 has_fsgsbase = ebx & bit_FSGSBASE;
505 has_rdseed = ebx & bit_RDSEED;
506 has_adx = ebx & bit_ADX;
507 has_avx512f = ebx & bit_AVX512F;
508 has_avx512er = ebx & bit_AVX512ER;
509 has_avx512pf = ebx & bit_AVX512PF;
510 has_avx512cd = ebx & bit_AVX512CD;
511 has_sha = ebx & bit_SHA;
512 has_clflushopt = ebx & bit_CLFLUSHOPT;
513 has_clwb = ebx & bit_CLWB;
514 has_avx512dq = ebx & bit_AVX512DQ;
515 has_avx512bw = ebx & bit_AVX512BW;
516 has_avx512vl = ebx & bit_AVX512VL;
517 has_avx512ifma = ebx & bit_AVX512IFMA;
518
519 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
520 has_avx512vbmi = ecx & bit_AVX512VBMI;
521 has_pku = ecx & bit_OSPKE;
522 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
523 has_avx512vnni = ecx & bit_AVX512VNNI;
524 has_rdpid = ecx & bit_RDPID;
525 has_gfni = ecx & bit_GFNI;
526 has_vaes = ecx & bit_VAES;
527 has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
528 has_avx512bitalg = ecx & bit_AVX512BITALG;
529 has_movdiri = ecx & bit_MOVDIRI;
530 has_movdir64b = ecx & bit_MOVDIR64B;
531 has_enqcmd = ecx & bit_ENQCMD;
532 has_cldemote = ecx & bit_CLDEMOTE;
533
534 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
535 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
536 has_avx512vp2intersect = edx & bit_AVX512VP2INTERSECT;
537
538 has_shstk = ecx & bit_SHSTK;
539 has_pconfig = edx & bit_PCONFIG;
540 has_waitpkg = ecx & bit_WAITPKG;
541
542 __cpuid_count (7, 1, eax, ebx, ecx, edx);
543 has_avx512bf16 = eax & bit_AVX512BF16;
544 }
545
546 if (max_level >= 13)
547 {
548 __cpuid_count (13, 1, eax, ebx, ecx, edx);
549
550 has_xsaveopt = eax & bit_XSAVEOPT;
551 has_xsavec = eax & bit_XSAVEC;
552 has_xsaves = eax & bit_XSAVES;
553 }
554
555 if (max_level >= 0x14)
556 {
557 __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
558
559 has_ptwrite = ebx & bit_PTWRITE;
560 }
561
562 /* Check cpuid level of extended features. */
563 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
564
565 if (ext_level >= 0x80000001)
566 {
567 __cpuid (0x80000001, eax, ebx, ecx, edx);
568
569 has_lahf_lm = ecx & bit_LAHF_LM;
570 has_sse4a = ecx & bit_SSE4a;
571 has_abm = ecx & bit_ABM;
572 has_lwp = ecx & bit_LWP;
573 has_fma4 = ecx & bit_FMA4;
574 has_xop = ecx & bit_XOP;
575 has_tbm = ecx & bit_TBM;
576 has_lzcnt = ecx & bit_LZCNT;
577 has_prfchw = ecx & bit_PRFCHW;
578
579 has_longmode = edx & bit_LM;
580 has_3dnowp = edx & bit_3DNOWP;
581 has_3dnow = edx & bit_3DNOW;
582 has_mwaitx = ecx & bit_MWAITX;
583 }
584
585 if (ext_level >= 0x80000008)
586 {
587 __cpuid (0x80000008, eax, ebx, ecx, edx);
588 has_clzero = ebx & bit_CLZERO;
589 has_wbnoinvd = ebx & bit_WBNOINVD;
590 }
591
592 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
593 #define XCR_XFEATURE_ENABLED_MASK 0x0
594 #define XSTATE_FP 0x1
595 #define XSTATE_SSE 0x2
596 #define XSTATE_YMM 0x4
597 #define XSTATE_OPMASK 0x20
598 #define XSTATE_ZMM 0x40
599 #define XSTATE_HI_ZMM 0x80
600
601 #define XCR_AVX_ENABLED_MASK \
602 (XSTATE_SSE | XSTATE_YMM)
603 #define XCR_AVX512F_ENABLED_MASK \
604 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
605
606 if (has_osxsave)
607 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
608 : "=a" (eax), "=d" (edx)
609 : "c" (XCR_XFEATURE_ENABLED_MASK));
610 else
611 eax = 0;
612
613 /* Check if AVX registers are supported. */
614 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
615 {
616 has_avx = 0;
617 has_avx2 = 0;
618 has_fma = 0;
619 has_fma4 = 0;
620 has_f16c = 0;
621 has_xop = 0;
622 has_xsave = 0;
623 has_xsaveopt = 0;
624 has_xsaves = 0;
625 has_xsavec = 0;
626 }
627
628 /* Check if AVX512F registers are supported. */
629 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
630 {
631 has_avx512f = 0;
632 has_avx512er = 0;
633 has_avx512pf = 0;
634 has_avx512cd = 0;
635 has_avx512dq = 0;
636 has_avx512bw = 0;
637 has_avx512vl = 0;
638 }
639
640 if (!arch)
641 {
642 if (vendor == signature_AMD_ebx
643 || vendor == signature_CENTAUR_ebx
644 || vendor == signature_CYRIX_ebx
645 || vendor == signature_NSC_ebx)
646 cache = detect_caches_amd (ext_level);
647 else if (vendor == signature_INTEL_ebx)
648 {
649 bool xeon_mp = (family == 15 && model == 6);
650 cache = detect_caches_intel (xeon_mp, max_level,
651 ext_level, &l2sizekb);
652 }
653 }
654
655 if (vendor == signature_AMD_ebx)
656 {
657 unsigned int name;
658
659 /* Detect geode processor by its processor signature. */
660 if (ext_level >= 0x80000002)
661 __cpuid (0x80000002, name, ebx, ecx, edx);
662 else
663 name = 0;
664
665 if (name == signature_NSC_ebx)
666 processor = PROCESSOR_GEODE;
667 else if (has_movbe && family == 22)
668 processor = PROCESSOR_BTVER2;
669 else if (has_clwb)
670 processor = PROCESSOR_ZNVER2;
671 else if (has_clzero)
672 processor = PROCESSOR_ZNVER1;
673 else if (has_avx2)
674 processor = PROCESSOR_BDVER4;
675 else if (has_xsaveopt)
676 processor = PROCESSOR_BDVER3;
677 else if (has_bmi)
678 processor = PROCESSOR_BDVER2;
679 else if (has_xop)
680 processor = PROCESSOR_BDVER1;
681 else if (has_sse4a && has_ssse3)
682 processor = PROCESSOR_BTVER1;
683 else if (has_sse4a)
684 processor = PROCESSOR_AMDFAM10;
685 else if (has_sse2 || has_longmode)
686 processor = PROCESSOR_K8;
687 else if (has_3dnowp && family == 6)
688 processor = PROCESSOR_ATHLON;
689 else if (has_mmx)
690 processor = PROCESSOR_K6;
691 else
692 processor = PROCESSOR_PENTIUM;
693 }
694 else if (vendor == signature_CENTAUR_ebx)
695 {
696 processor = PROCESSOR_GENERIC;
697
698 switch (family)
699 {
700 default:
701 /* We have no idea. */
702 break;
703
704 case 5:
705 if (has_3dnow || has_mmx)
706 processor = PROCESSOR_I486;
707 break;
708
709 case 6:
710 if (has_longmode)
711 processor = PROCESSOR_K8;
712 else if (model >= 9)
713 processor = PROCESSOR_PENTIUMPRO;
714 else if (model >= 6)
715 processor = PROCESSOR_I486;
716 }
717 }
718 else
719 {
720 switch (family)
721 {
722 case 4:
723 processor = PROCESSOR_I486;
724 break;
725 case 5:
726 processor = PROCESSOR_PENTIUM;
727 break;
728 case 6:
729 processor = PROCESSOR_PENTIUMPRO;
730 break;
731 case 15:
732 processor = PROCESSOR_PENTIUM4;
733 break;
734 default:
735 /* We have no idea. */
736 processor = PROCESSOR_GENERIC;
737 }
738 }
739
740 switch (processor)
741 {
742 case PROCESSOR_I386:
743 /* Default. */
744 break;
745 case PROCESSOR_I486:
746 if (arch && vendor == signature_CENTAUR_ebx)
747 {
748 if (model >= 6)
749 cpu = "c3";
750 else if (has_3dnow)
751 cpu = "winchip2";
752 else
753 /* Assume WinChip C6. */
754 cpu = "winchip-c6";
755 }
756 else
757 cpu = "i486";
758 break;
759 case PROCESSOR_PENTIUM:
760 if (arch && has_mmx)
761 cpu = "pentium-mmx";
762 else
763 cpu = "pentium";
764 break;
765 case PROCESSOR_PENTIUMPRO:
766 switch (model)
767 {
768 case 0x1c:
769 case 0x26:
770 /* Bonnell. */
771 cpu = "bonnell";
772 break;
773 case 0x37:
774 case 0x4a:
775 case 0x4d:
776 case 0x5a:
777 case 0x5d:
778 /* Silvermont. */
779 cpu = "silvermont";
780 break;
781 case 0x5c:
782 case 0x5f:
783 /* Goldmont. */
784 cpu = "goldmont";
785 break;
786 case 0x7a:
787 /* Goldmont Plus. */
788 cpu = "goldmont-plus";
789 break;
790 case 0x0f:
791 /* Merom. */
792 case 0x17:
793 case 0x1d:
794 /* Penryn. */
795 cpu = "core2";
796 break;
797 case 0x1a:
798 case 0x1e:
799 case 0x1f:
800 case 0x2e:
801 /* Nehalem. */
802 cpu = "nehalem";
803 break;
804 case 0x25:
805 case 0x2c:
806 case 0x2f:
807 /* Westmere. */
808 cpu = "westmere";
809 break;
810 case 0x2a:
811 case 0x2d:
812 /* Sandy Bridge. */
813 cpu = "sandybridge";
814 break;
815 case 0x3a:
816 case 0x3e:
817 /* Ivy Bridge. */
818 cpu = "ivybridge";
819 break;
820 case 0x3c:
821 case 0x3f:
822 case 0x45:
823 case 0x46:
824 /* Haswell. */
825 cpu = "haswell";
826 break;
827 case 0x3d:
828 case 0x47:
829 case 0x4f:
830 case 0x56:
831 /* Broadwell. */
832 cpu = "broadwell";
833 break;
834 case 0x4e:
835 case 0x5e:
836 /* Skylake. */
837 case 0x8e:
838 case 0x9e:
839 /* Kaby Lake. */
840 cpu = "skylake";
841 break;
842 case 0x55:
843 if (has_avx512vnni)
844 /* Cascade Lake. */
845 cpu = "cascadelake";
846 else
847 /* Skylake with AVX-512. */
848 cpu = "skylake-avx512";
849 break;
850 case 0x57:
851 /* Knights Landing. */
852 cpu = "knl";
853 break;
854 case 0x66:
855 /* Cannon Lake. */
856 cpu = "cannonlake";
857 break;
858 case 0x85:
859 /* Knights Mill. */
860 cpu = "knm";
861 break;
862 default:
863 if (arch)
864 {
865 /* This is unknown family 0x6 CPU. */
866 if (has_avx)
867 {
868 /* Assume Tiger Lake */
869 if (has_avx512vp2intersect)
870 cpu = "tigerlake";
871 /* Assume Cooper Lake */
872 else if (has_avx512bf16)
873 cpu = "cooperlake";
874 /* Assume Ice Lake Server. */
875 else if (has_wbnoinvd)
876 cpu = "icelake-server";
877 /* Assume Ice Lake. */
878 else if (has_avx512bitalg)
879 cpu = "icelake-client";
880 /* Assume Cannon Lake. */
881 else if (has_avx512vbmi)
882 cpu = "cannonlake";
883 /* Assume Knights Mill. */
884 else if (has_avx5124vnniw)
885 cpu = "knm";
886 /* Assume Knights Landing. */
887 else if (has_avx512er)
888 cpu = "knl";
889 /* Assume Skylake with AVX-512. */
890 else if (has_avx512f)
891 cpu = "skylake-avx512";
892 /* Assume Skylake. */
893 else if (has_clflushopt)
894 cpu = "skylake";
895 /* Assume Broadwell. */
896 else if (has_adx)
897 cpu = "broadwell";
898 else if (has_avx2)
899 /* Assume Haswell. */
900 cpu = "haswell";
901 else
902 /* Assume Sandy Bridge. */
903 cpu = "sandybridge";
904 }
905 else if (has_sse4_2)
906 {
907 if (has_gfni)
908 /* Assume Tremont. */
909 cpu = "tremont";
910 else if (has_sgx)
911 /* Assume Goldmont Plus. */
912 cpu = "goldmont-plus";
913 else if (has_xsave)
914 /* Assume Goldmont. */
915 cpu = "goldmont";
916 else if (has_movbe)
917 /* Assume Silvermont. */
918 cpu = "silvermont";
919 else
920 /* Assume Nehalem. */
921 cpu = "nehalem";
922 }
923 else if (has_ssse3)
924 {
925 if (has_movbe)
926 /* Assume Bonnell. */
927 cpu = "bonnell";
928 else
929 /* Assume Core 2. */
930 cpu = "core2";
931 }
932 else if (has_longmode)
933 /* Perhaps some emulator? Assume x86-64, otherwise gcc
934 -march=native would be unusable for 64-bit compilations,
935 as all the CPUs below are 32-bit only. */
936 cpu = "x86-64";
937 else if (has_sse3)
938 {
939 if (vendor == signature_CENTAUR_ebx)
940 /* C7 / Eden "Esther" */
941 cpu = "c7";
942 else
943 /* It is Core Duo. */
944 cpu = "pentium-m";
945 }
946 else if (has_sse2)
947 /* It is Pentium M. */
948 cpu = "pentium-m";
949 else if (has_sse)
950 {
951 if (vendor == signature_CENTAUR_ebx)
952 {
953 if (model >= 9)
954 /* Eden "Nehemiah" */
955 cpu = "nehemiah";
956 else
957 cpu = "c3-2";
958 }
959 else
960 /* It is Pentium III. */
961 cpu = "pentium3";
962 }
963 else if (has_mmx)
964 /* It is Pentium II. */
965 cpu = "pentium2";
966 else
967 /* Default to Pentium Pro. */
968 cpu = "pentiumpro";
969 }
970 else
971 /* For -mtune, we default to -mtune=generic. */
972 cpu = "generic";
973 break;
974 }
975 break;
976 case PROCESSOR_PENTIUM4:
977 if (has_sse3)
978 {
979 if (has_longmode)
980 cpu = "nocona";
981 else
982 cpu = "prescott";
983 }
984 else
985 cpu = "pentium4";
986 break;
987 case PROCESSOR_GEODE:
988 cpu = "geode";
989 break;
990 case PROCESSOR_K6:
991 if (arch && has_3dnow)
992 cpu = "k6-3";
993 else
994 cpu = "k6";
995 break;
996 case PROCESSOR_ATHLON:
997 if (arch && has_sse)
998 cpu = "athlon-4";
999 else
1000 cpu = "athlon";
1001 break;
1002 case PROCESSOR_K8:
1003 if (arch)
1004 {
1005 if (vendor == signature_CENTAUR_ebx)
1006 {
1007 if (has_sse4_1)
1008 /* Nano 3000 | Nano dual / quad core | Eden X4 */
1009 cpu = "nano-3000";
1010 else if (has_ssse3)
1011 /* Nano 1000 | Nano 2000 */
1012 cpu = "nano";
1013 else if (has_sse3)
1014 /* Eden X2 */
1015 cpu = "eden-x2";
1016 else
1017 /* Default to k8 */
1018 cpu = "k8";
1019 }
1020 else if (has_sse3)
1021 cpu = "k8-sse3";
1022 else
1023 cpu = "k8";
1024 }
1025 else
1026 /* For -mtune, we default to -mtune=k8 */
1027 cpu = "k8";
1028 break;
1029 case PROCESSOR_AMDFAM10:
1030 cpu = "amdfam10";
1031 break;
1032 case PROCESSOR_BDVER1:
1033 cpu = "bdver1";
1034 break;
1035 case PROCESSOR_BDVER2:
1036 cpu = "bdver2";
1037 break;
1038 case PROCESSOR_BDVER3:
1039 cpu = "bdver3";
1040 break;
1041 case PROCESSOR_BDVER4:
1042 cpu = "bdver4";
1043 break;
1044 case PROCESSOR_ZNVER1:
1045 cpu = "znver1";
1046 break;
1047 case PROCESSOR_ZNVER2:
1048 cpu = "znver2";
1049 break;
1050 case PROCESSOR_BTVER1:
1051 cpu = "btver1";
1052 break;
1053 case PROCESSOR_BTVER2:
1054 cpu = "btver2";
1055 break;
1056
1057 default:
1058 /* Use something reasonable. */
1059 if (arch)
1060 {
1061 if (has_ssse3)
1062 cpu = "core2";
1063 else if (has_sse3)
1064 {
1065 if (has_longmode)
1066 cpu = "nocona";
1067 else
1068 cpu = "prescott";
1069 }
1070 else if (has_longmode)
1071 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1072 -march=native would be unusable for 64-bit compilations,
1073 as all the CPUs below are 32-bit only. */
1074 cpu = "x86-64";
1075 else if (has_sse2)
1076 cpu = "pentium4";
1077 else if (has_cmov)
1078 cpu = "pentiumpro";
1079 else if (has_mmx)
1080 cpu = "pentium-mmx";
1081 else if (has_cmpxchg8b)
1082 cpu = "pentium";
1083 }
1084 else
1085 cpu = "generic";
1086 }
1087
1088 if (arch)
1089 {
1090 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1091 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1092 const char *sse = has_sse ? " -msse" : " -mno-sse";
1093 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1094 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1095 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1096 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1097 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1098 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1099 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1100 const char *aes = has_aes ? " -maes" : " -mno-aes";
1101 const char *sha = has_sha ? " -msha" : " -mno-sha";
1102 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1103 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1104 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1105 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1106 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1107 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1108 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1109 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1110 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1111 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1112 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1113 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1114 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1115 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1116 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1117 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1118 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1119 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1120 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1121 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1122 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1123 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1124 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1125 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1126 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1127 const char *adx = has_adx ? " -madx" : " -mno-adx";
1128 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1129 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1130 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1131 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1132 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1133 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1134 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1135 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1136 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1137 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1138 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1139 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1140 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1141 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1142 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1143 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1144 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1145 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1146 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1147 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1148 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1149 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1150 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1151 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1152 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1153 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1154 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1155 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1156 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1157 const char *avx512vp2intersect = has_avx512vp2intersect ? " -mavx512vp2intersect" : " -mno-avx512vp2intersect";
1158 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1159 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1160 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1161 const char *enqcmd = has_enqcmd ? " -menqcmd" : " -mno-enqcmd";
1162 const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
1163 const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
1164 const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
1165 const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
1166
1167 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1168 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1169 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1170 pconfig, wbnoinvd,
1171 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1172 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1173 fxsr, xsave, xsaveopt, avx512f, avx512er,
1174 avx512cd, avx512pf, prefetchwt1, clflushopt,
1175 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1176 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1177 clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1178 avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1179 avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
1180 ptwrite, avx512bf16, enqcmd, avx512vp2intersect,
1181 NULL);
1182 }
1183
1184 done:
1185 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1186 }
1187 #else
1188
1189 /* If we are compiling with GCC where %EBX register is fixed, then the
1190 driver will just ignore -march and -mtune "native" target and will leave
1191 to the newly built compiler to generate code for its default target. */
1192
1193 const char *host_detect_local_cpu (int, const char **)
1194 {
1195 return NULL;
1196 }
1197 #endif /* __GNUC__ */