]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/driver-i386.c
pr79683.c: Disable costmodel.
[thirdparty/gcc.git] / gcc / config / i386 / driver-i386.c
CommitLineData
fa959ce4 1/* Subroutines for the gcc driver.
cbe34bb5 2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
fa959ce4
MM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
2f83c7d6 8the Free Software Foundation; either version 3, or (at your option)
fa959ce4
MM
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
2f83c7d6
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
fa959ce4
MM
19
20#include "config.h"
21#include "system.h"
edccdcb1
L
22#include "coretypes.h"
23#include "tm.h"
fa959ce4 24
895016f6
UB
25const char *host_detect_local_cpu (int argc, const char **argv);
26
02147868 27#if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
b3172cab 28#include "cpuid.h"
fa959ce4 29
cb0dee88
UB
30struct cache_desc
31{
32 unsigned sizekb;
33 unsigned assoc;
34 unsigned line;
35};
36
37/* Returns command line parameters that describe size and
38 cache line size of the processor caches. */
2711355f
ZD
39
40static char *
cb0dee88 41describe_cache (struct cache_desc level1, struct cache_desc level2)
2711355f 42{
f4a1dd0d 43 char size[100], line[100], size2[100];
2711355f 44
cb0dee88
UB
45 /* At the moment, gcc does not use the information
46 about the associativity of the cache. */
47
f3afc8a7
UB
48 snprintf (size, sizeof (size),
49 "--param l1-cache-size=%u ", level1.sizekb);
50 snprintf (line, sizeof (line),
51 "--param l1-cache-line-size=%u ", level1.line);
2711355f 52
f3afc8a7
UB
53 snprintf (size2, sizeof (size2),
54 "--param l2-cache-size=%u ", level2.sizekb);
2711355f 55
f3afc8a7 56 return concat (size, line, size2, NULL);
f4a1dd0d
ZM
57}
58
cb0dee88
UB
59/* Detect L2 cache parameters using CPUID extended function 0x80000006. */
60
f4a1dd0d 61static void
cb0dee88 62detect_l2_cache (struct cache_desc *level2)
f4a1dd0d 63{
cb0dee88
UB
64 unsigned eax, ebx, ecx, edx;
65 unsigned assoc;
f4a1dd0d
ZM
66
67 __cpuid (0x80000006, eax, ebx, ecx, edx);
68
cb0dee88
UB
69 level2->sizekb = (ecx >> 16) & 0xffff;
70 level2->line = ecx & 0xff;
71
f4a1dd0d
ZM
72 assoc = (ecx >> 12) & 0xf;
73 if (assoc == 6)
74 assoc = 8;
75 else if (assoc == 8)
76 assoc = 16;
77 else if (assoc >= 0xa && assoc <= 0xc)
78 assoc = 32 + (assoc - 0xa) * 16;
79 else if (assoc >= 0xd && assoc <= 0xe)
80 assoc = 96 + (assoc - 0xd) * 32;
cb0dee88
UB
81
82 level2->assoc = assoc;
2711355f
ZD
83}
84
85/* Returns the description of caches for an AMD processor. */
86
d3bfe4de 87static const char *
2711355f
ZD
88detect_caches_amd (unsigned max_ext_level)
89{
90 unsigned eax, ebx, ecx, edx;
cb0dee88
UB
91
92 struct cache_desc level1, level2 = {0, 0, 0};
2711355f
ZD
93
94 if (max_ext_level < 0x80000005)
d3bfe4de 95 return "";
2711355f 96
b3172cab 97 __cpuid (0x80000005, eax, ebx, ecx, edx);
2711355f 98
cb0dee88
UB
99 level1.sizekb = (ecx >> 24) & 0xff;
100 level1.assoc = (ecx >> 16) & 0xff;
101 level1.line = ecx & 0xff;
2711355f 102
f4a1dd0d 103 if (max_ext_level >= 0x80000006)
cb0dee88 104 detect_l2_cache (&level2);
f4a1dd0d 105
cb0dee88 106 return describe_cache (level1, level2);
2711355f
ZD
107}
108
cb0dee88
UB
109/* Decodes the size, the associativity and the cache line size of
110 L1/L2 caches of an Intel processor. Values are based on
111 "Intel Processor Identification and the CPUID Instruction"
112 [Application Note 485], revision -032, December 2007. */
2711355f
ZD
113
114static void
cb0dee88
UB
115decode_caches_intel (unsigned reg, bool xeon_mp,
116 struct cache_desc *level1, struct cache_desc *level2)
2711355f 117{
cb0dee88
UB
118 int i;
119
120 for (i = 24; i >= 0; i -= 8)
121 switch ((reg >> i) & 0xff)
122 {
123 case 0x0a:
124 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125 break;
126 case 0x0c:
127 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128 break;
f313cce5
UB
129 case 0x0d:
130 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131 break;
132 case 0x0e:
133 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134 break;
135 case 0x21:
136 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137 break;
138 case 0x24:
139 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140 break;
cb0dee88
UB
141 case 0x2c:
142 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143 break;
144 case 0x39:
145 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146 break;
147 case 0x3a:
148 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149 break;
150 case 0x3b:
151 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152 break;
153 case 0x3c:
154 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155 break;
156 case 0x3d:
157 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158 break;
159 case 0x3e:
160 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161 break;
162 case 0x41:
163 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164 break;
165 case 0x42:
166 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167 break;
168 case 0x43:
169 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170 break;
171 case 0x44:
172 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173 break;
174 case 0x45:
175 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176 break;
f313cce5
UB
177 case 0x48:
178 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179 break;
cb0dee88
UB
180 case 0x49:
181 if (xeon_mp)
182 break;
183 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184 break;
185 case 0x4e:
186 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187 break;
188 case 0x60:
189 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190 break;
191 case 0x66:
192 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193 break;
194 case 0x67:
195 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196 break;
197 case 0x68:
198 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199 break;
200 case 0x78:
201 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202 break;
203 case 0x79:
204 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205 break;
206 case 0x7a:
207 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208 break;
209 case 0x7b:
210 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211 break;
212 case 0x7c:
213 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214 break;
215 case 0x7d:
216 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217 break;
218 case 0x7f:
219 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220 break;
f313cce5
UB
221 case 0x80:
222 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223 break;
cb0dee88
UB
224 case 0x82:
225 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226 break;
227 case 0x83:
228 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229 break;
230 case 0x84:
231 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232 break;
233 case 0x85:
234 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235 break;
236 case 0x86:
237 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238 break;
239 case 0x87:
240 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
241
242 default:
243 break;
244 }
245}
2711355f 246
cb0dee88 247/* Detect cache parameters using CPUID function 2. */
2711355f 248
cb0dee88
UB
249static void
250detect_caches_cpuid2 (bool xeon_mp,
251 struct cache_desc *level1, struct cache_desc *level2)
252{
dc8bd8d9
UB
253 unsigned regs[4];
254 int nreps, i;
cb0dee88 255
dc8bd8d9 256 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
cb0dee88 257
dc8bd8d9
UB
258 nreps = regs[0] & 0x0f;
259 regs[0] &= ~0x0f;
cb0dee88
UB
260
261 while (--nreps >= 0)
2711355f 262 {
dc8bd8d9
UB
263 for (i = 0; i < 4; i++)
264 if (regs[i] && !((regs[i] >> 31) & 1))
265 decode_caches_intel (regs[i], xeon_mp, level1, level2);
cb0dee88
UB
266
267 if (nreps)
dc8bd8d9 268 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
cb0dee88
UB
269 }
270}
2711355f 271
cb0dee88
UB
272/* Detect cache parameters using CPUID function 4. This
273 method doesn't require hardcoded tables. */
2711355f 274
cb0dee88
UB
275enum cache_type
276{
277 CACHE_END = 0,
278 CACHE_DATA = 1,
279 CACHE_INST = 2,
280 CACHE_UNIFIED = 3
281};
282
283static void
a0463099
AK
284detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285 struct cache_desc *level3)
cb0dee88
UB
286{
287 struct cache_desc *cache;
288
289 unsigned eax, ebx, ecx, edx;
290 int count;
291
292 for (count = 0;; count++)
293 {
294 __cpuid_count(4, count, eax, ebx, ecx, edx);
295 switch (eax & 0x1f)
296 {
297 case CACHE_END:
298 return;
299 case CACHE_DATA:
300 case CACHE_UNIFIED:
301 {
302 switch ((eax >> 5) & 0x07)
303 {
304 case 1:
305 cache = level1;
306 break;
307 case 2:
308 cache = level2;
309 break;
a0463099
AK
310 case 3:
311 cache = level3;
312 break;
cb0dee88
UB
313 default:
314 cache = NULL;
315 }
316
317 if (cache)
318 {
319 unsigned sets = ecx + 1;
dc8bd8d9 320 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
cb0dee88 321
dc8bd8d9 322 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
cb0dee88 323 cache->line = (ebx & 0x0fff) + 1;
cb0dee88
UB
324
325 cache->sizekb = (cache->assoc * part
326 * cache->line * sets) / 1024;
a0463099 327 }
cb0dee88 328 }
2711355f
ZD
329 default:
330 break;
331 }
332 }
333}
334
cb0dee88 335/* Returns the description of caches for an Intel processor. */
2711355f 336
d3bfe4de 337static const char *
a0463099
AK
338detect_caches_intel (bool xeon_mp, unsigned max_level,
339 unsigned max_ext_level, unsigned *l2sizekb)
2711355f 340{
a0463099 341 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
2711355f 342
cb0dee88 343 if (max_level >= 4)
a0463099 344 detect_caches_cpuid4 (&level1, &level2, &level3);
cb0dee88
UB
345 else if (max_level >= 2)
346 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347 else
d3bfe4de 348 return "";
2711355f 349
cb0dee88 350 if (level1.sizekb == 0)
d3bfe4de 351 return "";
2711355f 352
a0463099
AK
353 /* Let the L3 replace the L2. This assumes inclusive caches
354 and single threaded program for now. */
355 if (level3.sizekb)
356 level2 = level3;
357
cb0dee88
UB
358 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
359 method if other methods fail to provide L2 cache parameters. */
360 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361 detect_l2_cache (&level2);
f4a1dd0d 362
a0463099
AK
363 *l2sizekb = level2.sizekb;
364
cb0dee88 365 return describe_cache (level1, level2);
2711355f
ZD
366}
367
fa959ce4
MM
368/* This will be called by the spec parser in gcc.c when it sees
369 a %:local_cpu_detect(args) construct. Currently it will be called
370 with either "arch" or "tune" as argument depending on if -march=native
371 or -mtune=native is to be substituted.
372
373 It returns a string containing new command line parameters to be
374 put at the place of the above two options, depending on what CPU
375 this is executed. E.g. "-march=k8" on an AMD64 machine
376 for -march=native.
377
378 ARGC and ARGV are set depending on the actual arguments given
379 in the spec. */
b3172cab 380
fa959ce4
MM
381const char *host_detect_local_cpu (int argc, const char **argv)
382{
b3172cab
UB
383 enum processor_type processor = PROCESSOR_I386;
384 const char *cpu = "i386";
385
2711355f 386 const char *cache = "";
5be6cb59 387 const char *options = "";
b3172cab 388
cb0dee88 389 unsigned int eax, ebx, ecx, edx;
b3172cab
UB
390
391 unsigned int max_level, ext_level;
cb0dee88 392
fa959ce4 393 unsigned int vendor;
cb0dee88 394 unsigned int model, family;
b3172cab
UB
395
396 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
398
399 /* Extended features */
400 unsigned int has_lahf_lm = 0, has_sse4a = 0;
401 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
634fa334 402 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
7afac110 403 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
8ad9d49e 404 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
5eed4f27 405 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
82feeb8d 406 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
73e32c47 407 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
d1925759 408 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
d05e383b 409 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
3a0d99bb 410 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
3f97cb0b 411 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
43b3f52f 412 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
9cdea277 413 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
f4af595f 414 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
9c3bca11 415 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
1d516992 416 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
5fbb13a7 417 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
b8cca31c 418 unsigned int has_gfni = 0;
b3172cab 419
edccdcb1
L
420 bool arch;
421
a0463099
AK
422 unsigned int l2sizekb = 0;
423
edccdcb1
L
424 if (argc < 1)
425 return NULL;
426
b3172cab
UB
427 arch = !strcmp (argv[0], "arch");
428
edccdcb1 429 if (!arch && strcmp (argv[0], "tune"))
fa959ce4
MM
430 return NULL;
431
b3172cab
UB
432 max_level = __get_cpuid_max (0, &vendor);
433 if (max_level < 1)
fa959ce4 434 goto done;
fa959ce4 435
b3172cab 436 __cpuid (1, eax, ebx, ecx, edx);
fa959ce4 437
cb0dee88 438 model = (eax >> 4) & 0x0f;
b3172cab 439 family = (eax >> 8) & 0x0f;
d478ac59
GG
440 if (vendor == signature_INTEL_ebx
441 || vendor == signature_AMD_ebx)
37c50435
L
442 {
443 unsigned int extended_model, extended_family;
444
445 extended_model = (eax >> 12) & 0xf0;
446 extended_family = (eax >> 20) & 0xff;
447 if (family == 0x0f)
448 {
449 family += extended_family;
450 model += extended_model;
451 }
452 else if (family == 0x06)
453 model += extended_model;
454 }
b3172cab
UB
455
456 has_sse3 = ecx & bit_SSE3;
457 has_ssse3 = ecx & bit_SSSE3;
634fa334
L
458 has_sse4_1 = ecx & bit_SSE4_1;
459 has_sse4_2 = ecx & bit_SSE4_2;
460 has_avx = ecx & bit_AVX;
a91529c4 461 has_osxsave = ecx & bit_OSXSAVE;
b3172cab 462 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
cabf85c3 463 has_movbe = ecx & bit_MOVBE;
634fa334
L
464 has_popcnt = ecx & bit_POPCNT;
465 has_aes = ecx & bit_AES;
466 has_pclmul = ecx & bit_PCLMUL;
5eed4f27 467 has_fma = ecx & bit_FMA;
d1925759
L
468 has_f16c = ecx & bit_F16C;
469 has_rdrnd = ecx & bit_RDRND;
3a0d99bb 470 has_xsave = ecx & bit_XSAVE;
fa959ce4 471
b3172cab
UB
472 has_cmpxchg8b = edx & bit_CMPXCHG8B;
473 has_cmov = edx & bit_CMOV;
474 has_mmx = edx & bit_MMX;
3a0d99bb 475 has_fxsr = edx & bit_FXSAVE;
b3172cab
UB
476 has_sse = edx & bit_SSE;
477 has_sse2 = edx & bit_SSE2;
478
2c9b39ef
L
479 if (max_level >= 7)
480 {
481 __cpuid_count (7, 0, eax, ebx, ecx, edx);
482
483 has_bmi = ebx & bit_BMI;
73e32c47 484 has_sgx = ebx & bit_SGX;
5dcfdccd 485 has_hle = ebx & bit_HLE;
76a02e42 486 has_rtm = ebx & bit_RTM;
2c9b39ef
L
487 has_avx2 = ebx & bit_AVX2;
488 has_bmi2 = ebx & bit_BMI2;
d1925759 489 has_fsgsbase = ebx & bit_FSGSBASE;
4c340b5d 490 has_rdseed = ebx & bit_RDSEED;
d05e383b 491 has_adx = ebx & bit_ADX;
3f97cb0b
AI
492 has_avx512f = ebx & bit_AVX512F;
493 has_avx512er = ebx & bit_AVX512ER;
494 has_avx512pf = ebx & bit_AVX512PF;
495 has_avx512cd = ebx & bit_AVX512CD;
c1618f82 496 has_sha = ebx & bit_SHA;
9cdea277 497 has_clflushopt = ebx & bit_CLFLUSHOPT;
9c3bca11 498 has_clwb = ebx & bit_CLWB;
07165dd7 499 has_avx512dq = ebx & bit_AVX512DQ;
b525d943 500 has_avx512bw = ebx & bit_AVX512BW;
f4af595f 501 has_avx512vl = ebx & bit_AVX512VL;
21272090 502 has_avx512ifma = ebx & bit_AVX512IFMA;
43b3f52f
IT
503
504 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
41a4ef22
KY
505 has_avx512vbmi = ecx & bit_AVX512VBMI;
506 has_pku = ecx & bit_OSPKE;
1d516992 507 has_rdpid = ecx & bit_RDPID;
b8cca31c 508 has_gfni = ecx & bit_GFNI;
1d516992 509
5fbb13a7
KY
510 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
511 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
2c9b39ef
L
512 }
513
3a0d99bb
AI
514 if (max_level >= 13)
515 {
516 __cpuid_count (13, 1, eax, ebx, ecx, edx);
517
518 has_xsaveopt = eax & bit_XSAVEOPT;
9cdea277
IT
519 has_xsavec = eax & bit_XSAVEC;
520 has_xsaves = eax & bit_XSAVES;
3a0d99bb
AI
521 }
522
d0b50387
JJ
523 /* Check cpuid level of extended features. */
524 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
525
0a2d7bc0 526 if (ext_level >= 0x80000001)
d0b50387
JJ
527 {
528 __cpuid (0x80000001, eax, ebx, ecx, edx);
529
530 has_lahf_lm = ecx & bit_LAHF_LM;
531 has_sse4a = ecx & bit_SSE4a;
532 has_abm = ecx & bit_ABM;
533 has_lwp = ecx & bit_LWP;
534 has_fma4 = ecx & bit_FMA4;
535 has_xop = ecx & bit_XOP;
536 has_tbm = ecx & bit_TBM;
537 has_lzcnt = ecx & bit_LZCNT;
538 has_prfchw = ecx & bit_PRFCHW;
539
540 has_longmode = edx & bit_LM;
541 has_3dnowp = edx & bit_3DNOWP;
542 has_3dnow = edx & bit_3DNOW;
500a08b2 543 has_mwaitx = ecx & bit_MWAITX;
0a2d7bc0 544 }
9ce29eb0 545
0a2d7bc0
UB
546 if (ext_level >= 0x80000008)
547 {
9ce29eb0
VK
548 __cpuid (0x80000008, eax, ebx, ecx, edx);
549 has_clzero = ebx & bit_CLZERO;
d0b50387
JJ
550 }
551
a91529c4
L
552 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
553#define XCR_XFEATURE_ENABLED_MASK 0x0
554#define XSTATE_FP 0x1
555#define XSTATE_SSE 0x2
556#define XSTATE_YMM 0x4
2c12f2f4
IT
557#define XSTATE_OPMASK 0x20
558#define XSTATE_ZMM 0x40
559#define XSTATE_HI_ZMM 0x80
0a2d7bc0
UB
560
561#define XCR_AVX_ENABLED_MASK \
562 (XSTATE_SSE | XSTATE_YMM)
563#define XCR_AVX512F_ENABLED_MASK \
564 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
565
a91529c4
L
566 if (has_osxsave)
567 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
568 : "=a" (eax), "=d" (edx)
569 : "c" (XCR_XFEATURE_ENABLED_MASK));
0a2d7bc0
UB
570 else
571 eax = 0;
a91529c4 572
0a2d7bc0
UB
573 /* Check if AVX registers are supported. */
574 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
a91529c4
L
575 {
576 has_avx = 0;
577 has_avx2 = 0;
578 has_fma = 0;
579 has_fma4 = 0;
d0b50387 580 has_f16c = 0;
a91529c4 581 has_xop = 0;
3a0d99bb
AI
582 has_xsave = 0;
583 has_xsaveopt = 0;
9cdea277
IT
584 has_xsaves = 0;
585 has_xsavec = 0;
a91529c4
L
586 }
587
0a2d7bc0
UB
588 /* Check if AVX512F registers are supported. */
589 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
2c12f2f4
IT
590 {
591 has_avx512f = 0;
592 has_avx512er = 0;
593 has_avx512pf = 0;
594 has_avx512cd = 0;
595 has_avx512dq = 0;
596 has_avx512bw = 0;
597 has_avx512vl = 0;
598 }
599
2711355f
ZD
600 if (!arch)
601 {
19db293a 602 if (vendor == signature_AMD_ebx
af0e415b
UB
603 || vendor == signature_CENTAUR_ebx
604 || vendor == signature_CYRIX_ebx
7b9d1bd8 605 || vendor == signature_NSC_ebx)
2711355f 606 cache = detect_caches_amd (ext_level);
ef64d158 607 else if (vendor == signature_INTEL_ebx)
cb0dee88
UB
608 {
609 bool xeon_mp = (family == 15 && model == 6);
a0463099
AK
610 cache = detect_caches_intel (xeon_mp, max_level,
611 ext_level, &l2sizekb);
cb0dee88 612 }
2711355f
ZD
613 }
614
ef64d158 615 if (vendor == signature_AMD_ebx)
fa959ce4 616 {
fbdf817d 617 unsigned int name;
b3172cab 618
fbdf817d 619 /* Detect geode processor by its processor signature. */
0a2d7bc0 620 if (ext_level >= 0x80000002)
fbdf817d
UB
621 __cpuid (0x80000002, name, ebx, ecx, edx);
622 else
623 name = 0;
624
ef64d158 625 if (name == signature_NSC_ebx)
fbdf817d 626 processor = PROCESSOR_GEODE;
d478ac59 627 else if (has_movbe && family == 22)
e32bfc16 628 processor = PROCESSOR_BTVER2;
9ce29eb0
VK
629 else if (has_clzero)
630 processor = PROCESSOR_ZNVER1;
ed97ad47
GG
631 else if (has_avx2)
632 processor = PROCESSOR_BDVER4;
eb2f2b44
GG
633 else if (has_xsaveopt)
634 processor = PROCESSOR_BDVER3;
4d652a18
HJ
635 else if (has_bmi)
636 processor = PROCESSOR_BDVER2;
1133125e
HJ
637 else if (has_xop)
638 processor = PROCESSOR_BDVER1;
14b52538
CF
639 else if (has_sse4a && has_ssse3)
640 processor = PROCESSOR_BTVER1;
fbdf817d 641 else if (has_sse4a)
35a63f21 642 processor = PROCESSOR_AMDFAM10;
fbdf817d
UB
643 else if (has_sse2 || has_longmode)
644 processor = PROCESSOR_K8;
f7593cb4 645 else if (has_3dnowp && family == 6)
fbdf817d
UB
646 processor = PROCESSOR_ATHLON;
647 else if (has_mmx)
648 processor = PROCESSOR_K6;
649 else
650 processor = PROCESSOR_PENTIUM;
fa959ce4 651 }
19db293a
UB
652 else if (vendor == signature_CENTAUR_ebx)
653 {
4bdf739d
UB
654 processor = PROCESSOR_GENERIC;
655
656 switch (family)
19db293a 657 {
4bdf739d
UB
658 default:
659 /* We have no idea. */
660 break;
661
662 case 5:
663 if (has_3dnow || has_mmx)
664 processor = PROCESSOR_I486;
665 break;
666
667 case 6:
d3606ee3
JM
668 if (has_longmode)
669 processor = PROCESSOR_K8;
a239d460 670 else if (model >= 9)
4bdf739d
UB
671 processor = PROCESSOR_PENTIUMPRO;
672 else if (model >= 6)
673 processor = PROCESSOR_I486;
19db293a
UB
674 }
675 }
fa959ce4
MM
676 else
677 {
edccdcb1
L
678 switch (family)
679 {
b3172cab
UB
680 case 4:
681 processor = PROCESSOR_I486;
682 break;
edccdcb1 683 case 5:
b3172cab 684 processor = PROCESSOR_PENTIUM;
edccdcb1
L
685 break;
686 case 6:
687 processor = PROCESSOR_PENTIUMPRO;
688 break;
689 case 15:
690 processor = PROCESSOR_PENTIUM4;
691 break;
692 default:
b3172cab 693 /* We have no idea. */
9d532162 694 processor = PROCESSOR_GENERIC;
edccdcb1
L
695 }
696 }
697
698 switch (processor)
699 {
700 case PROCESSOR_I386:
b3172cab 701 /* Default. */
edccdcb1
L
702 break;
703 case PROCESSOR_I486:
4bdf739d
UB
704 if (arch && vendor == signature_CENTAUR_ebx)
705 {
706 if (model >= 6)
707 cpu = "c3";
708 else if (has_3dnow)
709 cpu = "winchip2";
710 else
711 /* Assume WinChip C6. */
712 cpu = "winchip-c6";
713 }
714 else
715 cpu = "i486";
edccdcb1
L
716 break;
717 case PROCESSOR_PENTIUM:
b3172cab 718 if (arch && has_mmx)
edccdcb1
L
719 cpu = "pentium-mmx";
720 else
721 cpu = "pentium";
722 break;
723 case PROCESSOR_PENTIUMPRO:
44f276c6 724 switch (model)
edccdcb1 725 {
44f276c6
L
726 case 0x1c:
727 case 0x26:
d3c11974
L
728 /* Bonnell. */
729 cpu = "bonnell";
44f276c6 730 break;
e5287671 731 case 0x37:
c8f2dff2 732 case 0x4a:
e5287671 733 case 0x4d:
c8f2dff2
L
734 case 0x5a:
735 case 0x5d:
e5287671 736 /* Silvermont. */
d3c11974 737 cpu = "silvermont";
e5287671 738 break;
992592ec
CW
739 case 0x0f:
740 /* Merom. */
741 case 0x17:
742 case 0x1d:
743 /* Penryn. */
744 cpu = "core2";
745 break;
44f276c6
L
746 case 0x1a:
747 case 0x1e:
748 case 0x1f:
749 case 0x2e:
eefe143b 750 /* Nehalem. */
d3c11974
L
751 cpu = "nehalem";
752 break;
44f276c6 753 case 0x25:
12bbb78f 754 case 0x2c:
44f276c6 755 case 0x2f:
eefe143b 756 /* Westmere. */
d3c11974 757 cpu = "westmere";
44f276c6 758 break;
35758e5b 759 case 0x2a:
815cecbe 760 case 0x2d:
35758e5b 761 /* Sandy Bridge. */
d3c11974 762 cpu = "sandybridge";
35758e5b 763 break;
992592ec
CW
764 case 0x3a:
765 case 0x3e:
766 /* Ivy Bridge. */
d3c11974 767 cpu = "ivybridge";
44f276c6 768 break;
992592ec 769 case 0x3c:
c8f2dff2 770 case 0x3f:
d0cf4e84
L
771 case 0x45:
772 case 0x46:
992592ec 773 /* Haswell. */
d3c11974 774 cpu = "haswell";
44f276c6 775 break;
c8f2dff2 776 case 0x3d:
dc04bc84 777 case 0x47:
c8f2dff2
L
778 case 0x4f:
779 case 0x56:
780 /* Broadwell. */
781 cpu = "broadwell";
782 break;
3e0f3349
YR
783 case 0x4e:
784 case 0x5e:
785 /* Skylake. */
60edf8bb
MT
786 case 0x8e:
787 case 0x9e:
788 /* Kaby Lake. */
3e0f3349
YR
789 cpu = "skylake";
790 break;
c8f2dff2
L
791 case 0x57:
792 /* Knights Landing. */
793 cpu = "knl";
794 break;
cace2309
SP
795 case 0x85:
796 /* Knights Mill. */
797 cpu = "knm";
798 break;
44f276c6
L
799 default:
800 if (arch)
801 {
4ffae7ff 802 /* This is unknown family 0x6 CPU. */
52747219
IT
803 /* Assume Knights Landing. */
804 if (has_avx512f)
805 cpu = "knl";
cace2309
SP
806 /* Assume Knights Mill */
807 else if (has_avx5124vnniw)
808 cpu = "knm";
60edf8bb
MT
809 /* Assume Skylake. */
810 else if (has_clflushopt)
811 cpu = "skylake";
52747219
IT
812 /* Assume Broadwell. */
813 else if (has_adx)
19ac6899
TI
814 cpu = "broadwell";
815 else if (has_avx2)
992592ec 816 /* Assume Haswell. */
d3c11974 817 cpu = "haswell";
992592ec 818 else if (has_avx)
4ffae7ff 819 /* Assume Sandy Bridge. */
d3c11974 820 cpu = "sandybridge";
4ffae7ff 821 else if (has_sse4_2)
0b871ccf
YR
822 {
823 if (has_movbe)
d3c11974
L
824 /* Assume Silvermont. */
825 cpu = "silvermont";
0b871ccf 826 else
d3c11974
L
827 /* Assume Nehalem. */
828 cpu = "nehalem";
0b871ccf 829 }
4ffae7ff
L
830 else if (has_ssse3)
831 {
832 if (has_movbe)
d3c11974
L
833 /* Assume Bonnell. */
834 cpu = "bonnell";
4ffae7ff
L
835 else
836 /* Assume Core 2. */
837 cpu = "core2";
838 }
8d37375b
JJ
839 else if (has_longmode)
840 /* Perhaps some emulator? Assume x86-64, otherwise gcc
841 -march=native would be unusable for 64-bit compilations,
842 as all the CPUs below are 32-bit only. */
843 cpu = "x86-64";
fb112177 844 else if (has_sse3)
a239d460
JM
845 {
846 if (vendor == signature_CENTAUR_ebx)
847 /* C7 / Eden "Esther" */
848 cpu = "c7";
849 else
850 /* It is Core Duo. */
851 cpu = "pentium-m";
852 }
fb112177
L
853 else if (has_sse2)
854 /* It is Pentium M. */
855 cpu = "pentium-m";
856 else if (has_sse)
4bdf739d
UB
857 {
858 if (vendor == signature_CENTAUR_ebx)
a239d460
JM
859 {
860 if (model >= 9)
861 /* Eden "Nehemiah" */
862 cpu = "nehemiah";
863 else
864 cpu = "c3-2";
865 }
4bdf739d
UB
866 else
867 /* It is Pentium III. */
868 cpu = "pentium3";
869 }
fb112177
L
870 else if (has_mmx)
871 /* It is Pentium II. */
872 cpu = "pentium2";
44f276c6 873 else
fb112177
L
874 /* Default to Pentium Pro. */
875 cpu = "pentiumpro";
44f276c6 876 }
b3172cab 877 else
44f276c6
L
878 /* For -mtune, we default to -mtune=generic. */
879 cpu = "generic";
880 break;
fa959ce4 881 }
b3172cab
UB
882 break;
883 case PROCESSOR_PENTIUM4:
884 if (has_sse3)
fa959ce4 885 {
b3172cab
UB
886 if (has_longmode)
887 cpu = "nocona";
fa959ce4 888 else
fb112177 889 cpu = "prescott";
fa959ce4 890 }
b3172cab 891 else
fb112177 892 cpu = "pentium4";
edccdcb1
L
893 break;
894 case PROCESSOR_GEODE:
895 cpu = "geode";
896 break;
897 case PROCESSOR_K6:
b3172cab
UB
898 if (arch && has_3dnow)
899 cpu = "k6-3";
edccdcb1
L
900 else
901 cpu = "k6";
902 break;
903 case PROCESSOR_ATHLON:
b3172cab 904 if (arch && has_sse)
edccdcb1
L
905 cpu = "athlon-4";
906 else
907 cpu = "athlon";
908 break;
edccdcb1 909 case PROCESSOR_K8:
d3606ee3
JM
910 if (arch)
911 {
912 if (vendor == signature_CENTAUR_ebx)
913 {
914 if (has_sse4_1)
915 /* Nano 3000 | Nano dual / quad core | Eden X4 */
916 cpu = "nano-3000";
917 else if (has_ssse3)
918 /* Nano 1000 | Nano 2000 */
919 cpu = "nano";
920 else if (has_sse3)
921 /* Eden X2 */
922 cpu = "eden-x2";
923 else
924 /* Default to k8 */
925 cpu = "k8";
926 }
927 else if (has_sse3)
928 cpu = "k8-sse3";
929 else
930 cpu = "k8";
931 }
b3172cab 932 else
d3606ee3 933 /* For -mtune, we default to -mtune=k8 */
b3172cab 934 cpu = "k8";
edccdcb1 935 break;
35a63f21
DR
936 case PROCESSOR_AMDFAM10:
937 cpu = "amdfam10";
938 break;
1133125e
HJ
939 case PROCESSOR_BDVER1:
940 cpu = "bdver1";
941 break;
4d652a18
HJ
942 case PROCESSOR_BDVER2:
943 cpu = "bdver2";
944 break;
eb2f2b44
GG
945 case PROCESSOR_BDVER3:
946 cpu = "bdver3";
947 break;
ed97ad47
GG
948 case PROCESSOR_BDVER4:
949 cpu = "bdver4";
950 break;
9ce29eb0
VK
951 case PROCESSOR_ZNVER1:
952 cpu = "znver1";
953 break;
14b52538
CF
954 case PROCESSOR_BTVER1:
955 cpu = "btver1";
956 break;
e32bfc16
VK
957 case PROCESSOR_BTVER2:
958 cpu = "btver2";
959 break;
b3172cab 960
edccdcb1 961 default:
b3172cab
UB
962 /* Use something reasonable. */
963 if (arch)
964 {
965 if (has_ssse3)
966 cpu = "core2";
967 else if (has_sse3)
968 {
969 if (has_longmode)
970 cpu = "nocona";
971 else
972 cpu = "prescott";
973 }
4bdf739d
UB
974 else if (has_longmode)
975 /* Perhaps some emulator? Assume x86-64, otherwise gcc
976 -march=native would be unusable for 64-bit compilations,
977 as all the CPUs below are 32-bit only. */
978 cpu = "x86-64";
b3172cab
UB
979 else if (has_sse2)
980 cpu = "pentium4";
981 else if (has_cmov)
982 cpu = "pentiumpro";
983 else if (has_mmx)
984 cpu = "pentium-mmx";
985 else if (has_cmpxchg8b)
986 cpu = "pentium";
987 }
988 else
989 cpu = "generic";
fa959ce4
MM
990 }
991
5be6cb59
UB
992 if (arch)
993 {
11c2aa39
UB
994 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
995 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
996 const char *sse = has_sse ? " -msse" : " -mno-sse";
997 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
998 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
999 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1000 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
5eed4f27
L
1001 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1002 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1003 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
11c2aa39 1004 const char *aes = has_aes ? " -maes" : " -mno-aes";
c1618f82 1005 const char *sha = has_sha ? " -msha" : " -mno-sha";
5eed4f27
L
1006 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1007 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1008 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1009 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1010 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1011 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1012 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1013 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
73e32c47 1014 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
82feeb8d 1015 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
5eed4f27
L
1016 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1017 const char *avx = has_avx ? " -mavx" : " -mno-avx";
7afac110 1018 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
642a011d 1019 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
5eed4f27 1020 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
3ed2c643 1021 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
38d7f26e 1022 const char *hle = has_hle ? " -mhle" : " -mno-hle";
76a02e42 1023 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
d1925759
L
1024 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1025 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1026 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
4c340b5d 1027 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
e61c94dd 1028 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
d05e383b 1029 const char *adx = has_adx ? " -madx" : " -mno-adx";
3a0d99bb
AI
1030 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1031 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1032 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
3f97cb0b
AI
1033 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1034 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1035 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1036 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
43b3f52f 1037 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
9cdea277
IT
1038 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1039 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1040 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
07165dd7 1041 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
b525d943 1042 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
f4af595f 1043 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
4190ea38 1044 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
3dcc8af5 1045 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
5fbb13a7
KY
1046 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1047 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
9c3bca11 1048 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
500a08b2 1049 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
9ce29eb0 1050 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
41a4ef22 1051 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1d516992 1052 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
b8cca31c 1053 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
11c2aa39 1054 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
c1618f82 1055 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
73e32c47 1056 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
76a02e42 1057 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
3a0d99bb 1058 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
3f97cb0b 1059 fxsr, xsave, xsaveopt, avx512f, avx512er,
9cdea277 1060 avx512cd, avx512pf, prefetchwt1, clflushopt,
f4af595f 1061 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
5fbb13a7 1062 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
b8cca31c 1063 clwb, mwaitx, clzero, pku, rdpid, gfni, NULL);
5be6cb59
UB
1064 }
1065
fa959ce4 1066done:
f3afc8a7 1067 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
fa959ce4
MM
1068}
1069#else
b3172cab 1070
02147868
UB
1071/* If we are compiling with GCC where %EBX register is fixed, then the
1072 driver will just ignore -march and -mtune "native" target and will leave
1073 to the newly built compiler to generate code for its default target. */
b3172cab 1074
997ef9e7 1075const char *host_detect_local_cpu (int, const char **)
fa959ce4 1076{
f3afc8a7 1077 return NULL;
fa959ce4 1078}
a6ecb05c 1079#endif /* __GNUC__ */