]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/driver-i386.c
i386-common.c (OPTION_MASK_ISA_MOVDIRI_SET, [...]): New defines.
[thirdparty/gcc.git] / gcc / config / i386 / driver-i386.c
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26
27 const char *host_detect_local_cpu (int argc, const char **argv);
28
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30 #include "cpuid.h"
31
32 struct cache_desc
33 {
34 unsigned sizekb;
35 unsigned assoc;
36 unsigned line;
37 };
38
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
41
42 static char *
43 describe_cache (struct cache_desc level1, struct cache_desc level2)
44 {
45 char size[100], line[100], size2[100];
46
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
49
50 snprintf (size, sizeof (size),
51 "--param l1-cache-size=%u ", level1.sizekb);
52 snprintf (line, sizeof (line),
53 "--param l1-cache-line-size=%u ", level1.line);
54
55 snprintf (size2, sizeof (size2),
56 "--param l2-cache-size=%u ", level2.sizekb);
57
58 return concat (size, line, size2, NULL);
59 }
60
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
62
63 static void
64 detect_l2_cache (struct cache_desc *level2)
65 {
66 unsigned eax, ebx, ecx, edx;
67 unsigned assoc;
68
69 __cpuid (0x80000006, eax, ebx, ecx, edx);
70
71 level2->sizekb = (ecx >> 16) & 0xffff;
72 level2->line = ecx & 0xff;
73
74 assoc = (ecx >> 12) & 0xf;
75 if (assoc == 6)
76 assoc = 8;
77 else if (assoc == 8)
78 assoc = 16;
79 else if (assoc >= 0xa && assoc <= 0xc)
80 assoc = 32 + (assoc - 0xa) * 16;
81 else if (assoc >= 0xd && assoc <= 0xe)
82 assoc = 96 + (assoc - 0xd) * 32;
83
84 level2->assoc = assoc;
85 }
86
87 /* Returns the description of caches for an AMD processor. */
88
89 static const char *
90 detect_caches_amd (unsigned max_ext_level)
91 {
92 unsigned eax, ebx, ecx, edx;
93
94 struct cache_desc level1, level2 = {0, 0, 0};
95
96 if (max_ext_level < 0x80000005)
97 return "";
98
99 __cpuid (0x80000005, eax, ebx, ecx, edx);
100
101 level1.sizekb = (ecx >> 24) & 0xff;
102 level1.assoc = (ecx >> 16) & 0xff;
103 level1.line = ecx & 0xff;
104
105 if (max_ext_level >= 0x80000006)
106 detect_l2_cache (&level2);
107
108 return describe_cache (level1, level2);
109 }
110
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
115
116 static void
117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 struct cache_desc *level1, struct cache_desc *level2)
119 {
120 int i;
121
122 for (i = 24; i >= 0; i -= 8)
123 switch ((reg >> i) & 0xff)
124 {
125 case 0x0a:
126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127 break;
128 case 0x0c:
129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130 break;
131 case 0x0d:
132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133 break;
134 case 0x0e:
135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136 break;
137 case 0x21:
138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139 break;
140 case 0x24:
141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142 break;
143 case 0x2c:
144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145 break;
146 case 0x39:
147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148 break;
149 case 0x3a:
150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151 break;
152 case 0x3b:
153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154 break;
155 case 0x3c:
156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157 break;
158 case 0x3d:
159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160 break;
161 case 0x3e:
162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163 break;
164 case 0x41:
165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166 break;
167 case 0x42:
168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169 break;
170 case 0x43:
171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172 break;
173 case 0x44:
174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175 break;
176 case 0x45:
177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178 break;
179 case 0x48:
180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181 break;
182 case 0x49:
183 if (xeon_mp)
184 break;
185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186 break;
187 case 0x4e:
188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189 break;
190 case 0x60:
191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192 break;
193 case 0x66:
194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195 break;
196 case 0x67:
197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198 break;
199 case 0x68:
200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201 break;
202 case 0x78:
203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204 break;
205 case 0x79:
206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207 break;
208 case 0x7a:
209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210 break;
211 case 0x7b:
212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213 break;
214 case 0x7c:
215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216 break;
217 case 0x7d:
218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219 break;
220 case 0x7f:
221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222 break;
223 case 0x80:
224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225 break;
226 case 0x82:
227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228 break;
229 case 0x83:
230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231 break;
232 case 0x84:
233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234 break;
235 case 0x85:
236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237 break;
238 case 0x86:
239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240 break;
241 case 0x87:
242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
243
244 default:
245 break;
246 }
247 }
248
249 /* Detect cache parameters using CPUID function 2. */
250
251 static void
252 detect_caches_cpuid2 (bool xeon_mp,
253 struct cache_desc *level1, struct cache_desc *level2)
254 {
255 unsigned regs[4];
256 int nreps, i;
257
258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
259
260 nreps = regs[0] & 0x0f;
261 regs[0] &= ~0x0f;
262
263 while (--nreps >= 0)
264 {
265 for (i = 0; i < 4; i++)
266 if (regs[i] && !((regs[i] >> 31) & 1))
267 decode_caches_intel (regs[i], xeon_mp, level1, level2);
268
269 if (nreps)
270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
271 }
272 }
273
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
276
277 enum cache_type
278 {
279 CACHE_END = 0,
280 CACHE_DATA = 1,
281 CACHE_INST = 2,
282 CACHE_UNIFIED = 3
283 };
284
285 static void
286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 struct cache_desc *level3)
288 {
289 struct cache_desc *cache;
290
291 unsigned eax, ebx, ecx, edx;
292 int count;
293
294 for (count = 0;; count++)
295 {
296 __cpuid_count(4, count, eax, ebx, ecx, edx);
297 switch (eax & 0x1f)
298 {
299 case CACHE_END:
300 return;
301 case CACHE_DATA:
302 case CACHE_UNIFIED:
303 {
304 switch ((eax >> 5) & 0x07)
305 {
306 case 1:
307 cache = level1;
308 break;
309 case 2:
310 cache = level2;
311 break;
312 case 3:
313 cache = level3;
314 break;
315 default:
316 cache = NULL;
317 }
318
319 if (cache)
320 {
321 unsigned sets = ecx + 1;
322 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
323
324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 cache->line = (ebx & 0x0fff) + 1;
326
327 cache->sizekb = (cache->assoc * part
328 * cache->line * sets) / 1024;
329 }
330 }
331 default:
332 break;
333 }
334 }
335 }
336
337 /* Returns the description of caches for an Intel processor. */
338
339 static const char *
340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 unsigned max_ext_level, unsigned *l2sizekb)
342 {
343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
344
345 if (max_level >= 4)
346 detect_caches_cpuid4 (&level1, &level2, &level3);
347 else if (max_level >= 2)
348 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349 else
350 return "";
351
352 if (level1.sizekb == 0)
353 return "";
354
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
357 if (level3.sizekb)
358 level2 = level3;
359
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363 detect_l2_cache (&level2);
364
365 *l2sizekb = level2.sizekb;
366
367 return describe_cache (level1, level2);
368 }
369
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
374
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
378 for -march=native.
379
380 ARGC and ARGV are set depending on the actual arguments given
381 in the spec. */
382
383 const char *host_detect_local_cpu (int argc, const char **argv)
384 {
385 enum processor_type processor = PROCESSOR_I386;
386 const char *cpu = "i386";
387
388 const char *cache = "";
389 const char *options = "";
390
391 unsigned int eax, ebx, ecx, edx;
392
393 unsigned int max_level, ext_level;
394
395 unsigned int vendor;
396 unsigned int model, family;
397
398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
400
401 /* Extended features */
402 unsigned int has_lahf_lm = 0, has_sse4a = 0;
403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410 unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422 unsigned int has_avx512bitalg = 0;
423 unsigned int has_ibt = 0, has_shstk = 0;
424 unsigned int has_avx512vnni = 0, has_vaes = 0;
425 unsigned int has_vpclmulqdq = 0;
426 unsigned int has_movdiri = 0, has_movdir64b = 0;
427
428 bool arch;
429
430 unsigned int l2sizekb = 0;
431
432 if (argc < 1)
433 return NULL;
434
435 arch = !strcmp (argv[0], "arch");
436
437 if (!arch && strcmp (argv[0], "tune"))
438 return NULL;
439
440 max_level = __get_cpuid_max (0, &vendor);
441 if (max_level < 1)
442 goto done;
443
444 __cpuid (1, eax, ebx, ecx, edx);
445
446 model = (eax >> 4) & 0x0f;
447 family = (eax >> 8) & 0x0f;
448 if (vendor == signature_INTEL_ebx
449 || vendor == signature_AMD_ebx)
450 {
451 unsigned int extended_model, extended_family;
452
453 extended_model = (eax >> 12) & 0xf0;
454 extended_family = (eax >> 20) & 0xff;
455 if (family == 0x0f)
456 {
457 family += extended_family;
458 model += extended_model;
459 }
460 else if (family == 0x06)
461 model += extended_model;
462 }
463
464 has_sse3 = ecx & bit_SSE3;
465 has_ssse3 = ecx & bit_SSSE3;
466 has_sse4_1 = ecx & bit_SSE4_1;
467 has_sse4_2 = ecx & bit_SSE4_2;
468 has_avx = ecx & bit_AVX;
469 has_osxsave = ecx & bit_OSXSAVE;
470 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
471 has_movbe = ecx & bit_MOVBE;
472 has_popcnt = ecx & bit_POPCNT;
473 has_aes = ecx & bit_AES;
474 has_pclmul = ecx & bit_PCLMUL;
475 has_fma = ecx & bit_FMA;
476 has_f16c = ecx & bit_F16C;
477 has_rdrnd = ecx & bit_RDRND;
478 has_xsave = ecx & bit_XSAVE;
479
480 has_cmpxchg8b = edx & bit_CMPXCHG8B;
481 has_cmov = edx & bit_CMOV;
482 has_mmx = edx & bit_MMX;
483 has_fxsr = edx & bit_FXSAVE;
484 has_sse = edx & bit_SSE;
485 has_sse2 = edx & bit_SSE2;
486
487 if (max_level >= 7)
488 {
489 __cpuid_count (7, 0, eax, ebx, ecx, edx);
490
491 has_bmi = ebx & bit_BMI;
492 has_sgx = ebx & bit_SGX;
493 has_hle = ebx & bit_HLE;
494 has_rtm = ebx & bit_RTM;
495 has_avx2 = ebx & bit_AVX2;
496 has_bmi2 = ebx & bit_BMI2;
497 has_fsgsbase = ebx & bit_FSGSBASE;
498 has_rdseed = ebx & bit_RDSEED;
499 has_adx = ebx & bit_ADX;
500 has_avx512f = ebx & bit_AVX512F;
501 has_avx512er = ebx & bit_AVX512ER;
502 has_avx512pf = ebx & bit_AVX512PF;
503 has_avx512cd = ebx & bit_AVX512CD;
504 has_sha = ebx & bit_SHA;
505 has_clflushopt = ebx & bit_CLFLUSHOPT;
506 has_clwb = ebx & bit_CLWB;
507 has_avx512dq = ebx & bit_AVX512DQ;
508 has_avx512bw = ebx & bit_AVX512BW;
509 has_avx512vl = ebx & bit_AVX512VL;
510 has_avx512ifma = ebx & bit_AVX512IFMA;
511
512 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
513 has_avx512vbmi = ecx & bit_AVX512VBMI;
514 has_pku = ecx & bit_OSPKE;
515 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
516 has_avx512vnni = ecx & bit_AVX512VNNI;
517 has_rdpid = ecx & bit_RDPID;
518 has_gfni = ecx & bit_GFNI;
519 has_vaes = ecx & bit_VAES;
520 has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
521 has_avx512bitalg = ecx & bit_AVX512BITALG;
522 has_movdiri = ecx & bit_MOVDIRI;
523 has_movdir64b = ecx & bit_MOVDIR64B;
524
525 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
526 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
527
528 has_shstk = ecx & bit_SHSTK;
529 has_ibt = edx & bit_IBT;
530 has_pconfig = edx & bit_PCONFIG;
531 }
532
533 if (max_level >= 13)
534 {
535 __cpuid_count (13, 1, eax, ebx, ecx, edx);
536
537 has_xsaveopt = eax & bit_XSAVEOPT;
538 has_xsavec = eax & bit_XSAVEC;
539 has_xsaves = eax & bit_XSAVES;
540 }
541
542 /* Check cpuid level of extended features. */
543 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
544
545 if (ext_level >= 0x80000001)
546 {
547 __cpuid (0x80000001, eax, ebx, ecx, edx);
548
549 has_lahf_lm = ecx & bit_LAHF_LM;
550 has_sse4a = ecx & bit_SSE4a;
551 has_abm = ecx & bit_ABM;
552 has_lwp = ecx & bit_LWP;
553 has_fma4 = ecx & bit_FMA4;
554 has_xop = ecx & bit_XOP;
555 has_tbm = ecx & bit_TBM;
556 has_lzcnt = ecx & bit_LZCNT;
557 has_prfchw = ecx & bit_PRFCHW;
558
559 has_longmode = edx & bit_LM;
560 has_3dnowp = edx & bit_3DNOWP;
561 has_3dnow = edx & bit_3DNOW;
562 has_mwaitx = ecx & bit_MWAITX;
563 }
564
565 if (ext_level >= 0x80000008)
566 {
567 __cpuid (0x80000008, eax, ebx, ecx, edx);
568 has_clzero = ebx & bit_CLZERO;
569 has_wbnoinvd = ebx & bit_WBNOINVD;
570 }
571
572 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
573 #define XCR_XFEATURE_ENABLED_MASK 0x0
574 #define XSTATE_FP 0x1
575 #define XSTATE_SSE 0x2
576 #define XSTATE_YMM 0x4
577 #define XSTATE_OPMASK 0x20
578 #define XSTATE_ZMM 0x40
579 #define XSTATE_HI_ZMM 0x80
580
581 #define XCR_AVX_ENABLED_MASK \
582 (XSTATE_SSE | XSTATE_YMM)
583 #define XCR_AVX512F_ENABLED_MASK \
584 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
585
586 if (has_osxsave)
587 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
588 : "=a" (eax), "=d" (edx)
589 : "c" (XCR_XFEATURE_ENABLED_MASK));
590 else
591 eax = 0;
592
593 /* Check if AVX registers are supported. */
594 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
595 {
596 has_avx = 0;
597 has_avx2 = 0;
598 has_fma = 0;
599 has_fma4 = 0;
600 has_f16c = 0;
601 has_xop = 0;
602 has_xsave = 0;
603 has_xsaveopt = 0;
604 has_xsaves = 0;
605 has_xsavec = 0;
606 }
607
608 /* Check if AVX512F registers are supported. */
609 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
610 {
611 has_avx512f = 0;
612 has_avx512er = 0;
613 has_avx512pf = 0;
614 has_avx512cd = 0;
615 has_avx512dq = 0;
616 has_avx512bw = 0;
617 has_avx512vl = 0;
618 }
619
620 if (!arch)
621 {
622 if (vendor == signature_AMD_ebx
623 || vendor == signature_CENTAUR_ebx
624 || vendor == signature_CYRIX_ebx
625 || vendor == signature_NSC_ebx)
626 cache = detect_caches_amd (ext_level);
627 else if (vendor == signature_INTEL_ebx)
628 {
629 bool xeon_mp = (family == 15 && model == 6);
630 cache = detect_caches_intel (xeon_mp, max_level,
631 ext_level, &l2sizekb);
632 }
633 }
634
635 if (vendor == signature_AMD_ebx)
636 {
637 unsigned int name;
638
639 /* Detect geode processor by its processor signature. */
640 if (ext_level >= 0x80000002)
641 __cpuid (0x80000002, name, ebx, ecx, edx);
642 else
643 name = 0;
644
645 if (name == signature_NSC_ebx)
646 processor = PROCESSOR_GEODE;
647 else if (has_movbe && family == 22)
648 processor = PROCESSOR_BTVER2;
649 else if (has_clzero)
650 processor = PROCESSOR_ZNVER1;
651 else if (has_avx2)
652 processor = PROCESSOR_BDVER4;
653 else if (has_xsaveopt)
654 processor = PROCESSOR_BDVER3;
655 else if (has_bmi)
656 processor = PROCESSOR_BDVER2;
657 else if (has_xop)
658 processor = PROCESSOR_BDVER1;
659 else if (has_sse4a && has_ssse3)
660 processor = PROCESSOR_BTVER1;
661 else if (has_sse4a)
662 processor = PROCESSOR_AMDFAM10;
663 else if (has_sse2 || has_longmode)
664 processor = PROCESSOR_K8;
665 else if (has_3dnowp && family == 6)
666 processor = PROCESSOR_ATHLON;
667 else if (has_mmx)
668 processor = PROCESSOR_K6;
669 else
670 processor = PROCESSOR_PENTIUM;
671 }
672 else if (vendor == signature_CENTAUR_ebx)
673 {
674 processor = PROCESSOR_GENERIC;
675
676 switch (family)
677 {
678 default:
679 /* We have no idea. */
680 break;
681
682 case 5:
683 if (has_3dnow || has_mmx)
684 processor = PROCESSOR_I486;
685 break;
686
687 case 6:
688 if (has_longmode)
689 processor = PROCESSOR_K8;
690 else if (model >= 9)
691 processor = PROCESSOR_PENTIUMPRO;
692 else if (model >= 6)
693 processor = PROCESSOR_I486;
694 }
695 }
696 else
697 {
698 switch (family)
699 {
700 case 4:
701 processor = PROCESSOR_I486;
702 break;
703 case 5:
704 processor = PROCESSOR_PENTIUM;
705 break;
706 case 6:
707 processor = PROCESSOR_PENTIUMPRO;
708 break;
709 case 15:
710 processor = PROCESSOR_PENTIUM4;
711 break;
712 default:
713 /* We have no idea. */
714 processor = PROCESSOR_GENERIC;
715 }
716 }
717
718 switch (processor)
719 {
720 case PROCESSOR_I386:
721 /* Default. */
722 break;
723 case PROCESSOR_I486:
724 if (arch && vendor == signature_CENTAUR_ebx)
725 {
726 if (model >= 6)
727 cpu = "c3";
728 else if (has_3dnow)
729 cpu = "winchip2";
730 else
731 /* Assume WinChip C6. */
732 cpu = "winchip-c6";
733 }
734 else
735 cpu = "i486";
736 break;
737 case PROCESSOR_PENTIUM:
738 if (arch && has_mmx)
739 cpu = "pentium-mmx";
740 else
741 cpu = "pentium";
742 break;
743 case PROCESSOR_PENTIUMPRO:
744 switch (model)
745 {
746 case 0x1c:
747 case 0x26:
748 /* Bonnell. */
749 cpu = "bonnell";
750 break;
751 case 0x37:
752 case 0x4a:
753 case 0x4d:
754 case 0x5a:
755 case 0x5d:
756 /* Silvermont. */
757 cpu = "silvermont";
758 break;
759 case 0x0f:
760 /* Merom. */
761 case 0x17:
762 case 0x1d:
763 /* Penryn. */
764 cpu = "core2";
765 break;
766 case 0x1a:
767 case 0x1e:
768 case 0x1f:
769 case 0x2e:
770 /* Nehalem. */
771 cpu = "nehalem";
772 break;
773 case 0x25:
774 case 0x2c:
775 case 0x2f:
776 /* Westmere. */
777 cpu = "westmere";
778 break;
779 case 0x2a:
780 case 0x2d:
781 /* Sandy Bridge. */
782 cpu = "sandybridge";
783 break;
784 case 0x3a:
785 case 0x3e:
786 /* Ivy Bridge. */
787 cpu = "ivybridge";
788 break;
789 case 0x3c:
790 case 0x3f:
791 case 0x45:
792 case 0x46:
793 /* Haswell. */
794 cpu = "haswell";
795 break;
796 case 0x3d:
797 case 0x47:
798 case 0x4f:
799 case 0x56:
800 /* Broadwell. */
801 cpu = "broadwell";
802 break;
803 case 0x4e:
804 case 0x5e:
805 /* Skylake. */
806 case 0x8e:
807 case 0x9e:
808 /* Kaby Lake. */
809 cpu = "skylake";
810 break;
811 case 0x55:
812 /* Skylake with AVX-512. */
813 cpu = "skylake-avx512";
814 break;
815 case 0x57:
816 /* Knights Landing. */
817 cpu = "knl";
818 break;
819 case 0x66:
820 /* Cannon Lake. */
821 cpu = "cannonlake";
822 break;
823 case 0x85:
824 /* Knights Mill. */
825 cpu = "knm";
826 break;
827 default:
828 if (arch)
829 {
830 /* This is unknown family 0x6 CPU. */
831 /* Assume Ice Lake Server. */
832 if (has_wbnoinvd)
833 cpu = "icelake-server";
834 /* Assume Ice Lake. */
835 else if (has_gfni)
836 cpu = "icelake-client";
837 /* Assume Cannon Lake. */
838 else if (has_avx512vbmi)
839 cpu = "cannonlake";
840 /* Assume Knights Mill. */
841 else if (has_avx5124vnniw)
842 cpu = "knm";
843 /* Assume Knights Landing. */
844 else if (has_avx512er)
845 cpu = "knl";
846 /* Assume Skylake with AVX-512. */
847 else if (has_avx512f)
848 cpu = "skylake-avx512";
849 /* Assume Skylake. */
850 else if (has_clflushopt)
851 cpu = "skylake";
852 /* Assume Broadwell. */
853 else if (has_adx)
854 cpu = "broadwell";
855 else if (has_avx2)
856 /* Assume Haswell. */
857 cpu = "haswell";
858 else if (has_avx)
859 /* Assume Sandy Bridge. */
860 cpu = "sandybridge";
861 else if (has_sse4_2)
862 {
863 if (has_movbe)
864 /* Assume Silvermont. */
865 cpu = "silvermont";
866 else
867 /* Assume Nehalem. */
868 cpu = "nehalem";
869 }
870 else if (has_ssse3)
871 {
872 if (has_movbe)
873 /* Assume Bonnell. */
874 cpu = "bonnell";
875 else
876 /* Assume Core 2. */
877 cpu = "core2";
878 }
879 else if (has_longmode)
880 /* Perhaps some emulator? Assume x86-64, otherwise gcc
881 -march=native would be unusable for 64-bit compilations,
882 as all the CPUs below are 32-bit only. */
883 cpu = "x86-64";
884 else if (has_sse3)
885 {
886 if (vendor == signature_CENTAUR_ebx)
887 /* C7 / Eden "Esther" */
888 cpu = "c7";
889 else
890 /* It is Core Duo. */
891 cpu = "pentium-m";
892 }
893 else if (has_sse2)
894 /* It is Pentium M. */
895 cpu = "pentium-m";
896 else if (has_sse)
897 {
898 if (vendor == signature_CENTAUR_ebx)
899 {
900 if (model >= 9)
901 /* Eden "Nehemiah" */
902 cpu = "nehemiah";
903 else
904 cpu = "c3-2";
905 }
906 else
907 /* It is Pentium III. */
908 cpu = "pentium3";
909 }
910 else if (has_mmx)
911 /* It is Pentium II. */
912 cpu = "pentium2";
913 else
914 /* Default to Pentium Pro. */
915 cpu = "pentiumpro";
916 }
917 else
918 /* For -mtune, we default to -mtune=generic. */
919 cpu = "generic";
920 break;
921 }
922 break;
923 case PROCESSOR_PENTIUM4:
924 if (has_sse3)
925 {
926 if (has_longmode)
927 cpu = "nocona";
928 else
929 cpu = "prescott";
930 }
931 else
932 cpu = "pentium4";
933 break;
934 case PROCESSOR_GEODE:
935 cpu = "geode";
936 break;
937 case PROCESSOR_K6:
938 if (arch && has_3dnow)
939 cpu = "k6-3";
940 else
941 cpu = "k6";
942 break;
943 case PROCESSOR_ATHLON:
944 if (arch && has_sse)
945 cpu = "athlon-4";
946 else
947 cpu = "athlon";
948 break;
949 case PROCESSOR_K8:
950 if (arch)
951 {
952 if (vendor == signature_CENTAUR_ebx)
953 {
954 if (has_sse4_1)
955 /* Nano 3000 | Nano dual / quad core | Eden X4 */
956 cpu = "nano-3000";
957 else if (has_ssse3)
958 /* Nano 1000 | Nano 2000 */
959 cpu = "nano";
960 else if (has_sse3)
961 /* Eden X2 */
962 cpu = "eden-x2";
963 else
964 /* Default to k8 */
965 cpu = "k8";
966 }
967 else if (has_sse3)
968 cpu = "k8-sse3";
969 else
970 cpu = "k8";
971 }
972 else
973 /* For -mtune, we default to -mtune=k8 */
974 cpu = "k8";
975 break;
976 case PROCESSOR_AMDFAM10:
977 cpu = "amdfam10";
978 break;
979 case PROCESSOR_BDVER1:
980 cpu = "bdver1";
981 break;
982 case PROCESSOR_BDVER2:
983 cpu = "bdver2";
984 break;
985 case PROCESSOR_BDVER3:
986 cpu = "bdver3";
987 break;
988 case PROCESSOR_BDVER4:
989 cpu = "bdver4";
990 break;
991 case PROCESSOR_ZNVER1:
992 cpu = "znver1";
993 break;
994 case PROCESSOR_BTVER1:
995 cpu = "btver1";
996 break;
997 case PROCESSOR_BTVER2:
998 cpu = "btver2";
999 break;
1000
1001 default:
1002 /* Use something reasonable. */
1003 if (arch)
1004 {
1005 if (has_ssse3)
1006 cpu = "core2";
1007 else if (has_sse3)
1008 {
1009 if (has_longmode)
1010 cpu = "nocona";
1011 else
1012 cpu = "prescott";
1013 }
1014 else if (has_longmode)
1015 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1016 -march=native would be unusable for 64-bit compilations,
1017 as all the CPUs below are 32-bit only. */
1018 cpu = "x86-64";
1019 else if (has_sse2)
1020 cpu = "pentium4";
1021 else if (has_cmov)
1022 cpu = "pentiumpro";
1023 else if (has_mmx)
1024 cpu = "pentium-mmx";
1025 else if (has_cmpxchg8b)
1026 cpu = "pentium";
1027 }
1028 else
1029 cpu = "generic";
1030 }
1031
1032 if (arch)
1033 {
1034 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1035 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1036 const char *sse = has_sse ? " -msse" : " -mno-sse";
1037 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1038 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1039 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1040 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1041 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1042 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1043 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1044 const char *aes = has_aes ? " -maes" : " -mno-aes";
1045 const char *sha = has_sha ? " -msha" : " -mno-sha";
1046 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1047 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1048 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1049 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1050 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1051 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1052 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1053 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1054 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1055 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1056 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1057 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1058 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1059 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1060 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1061 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1062 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1063 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1064 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1065 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1066 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1067 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1068 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1069 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1070 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1071 const char *adx = has_adx ? " -madx" : " -mno-adx";
1072 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1073 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1074 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1075 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1076 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1077 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1078 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1079 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1080 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1081 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1082 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1083 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1084 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1085 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1086 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1087 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1088 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1089 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1090 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1091 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1092 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1093 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1094 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1095 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1096 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1097 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1098 const char *ibt = has_ibt ? " -mibt" : " -mno-ibt";
1099 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1100 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1101 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1102 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1103 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1104 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1105 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1106 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1107 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1108 pconfig, wbnoinvd,
1109 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1110 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1111 fxsr, xsave, xsaveopt, avx512f, avx512er,
1112 avx512cd, avx512pf, prefetchwt1, clflushopt,
1113 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1114 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1115 clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
1116 avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1117 avx512bitalg, movdiri, movdir64b, NULL);
1118 }
1119
1120 done:
1121 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1122 }
1123 #else
1124
1125 /* If we are compiling with GCC where %EBX register is fixed, then the
1126 driver will just ignore -march and -mtune "native" target and will leave
1127 to the newly built compiler to generate code for its default target. */
1128
1129 const char *host_detect_local_cpu (int, const char **)
1130 {
1131 return NULL;
1132 }
1133 #endif /* __GNUC__ */