]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/driver-i386.c
Enable AVX512_4FMAPS and AVX512_4VNNIW instructions
[thirdparty/gcc.git] / gcc / config / i386 / driver-i386.c
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24
25 const char *host_detect_local_cpu (int argc, const char **argv);
26
27 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
28 #include "cpuid.h"
29
30 struct cache_desc
31 {
32 unsigned sizekb;
33 unsigned assoc;
34 unsigned line;
35 };
36
37 /* Returns command line parameters that describe size and
38 cache line size of the processor caches. */
39
40 static char *
41 describe_cache (struct cache_desc level1, struct cache_desc level2)
42 {
43 char size[100], line[100], size2[100];
44
45 /* At the moment, gcc does not use the information
46 about the associativity of the cache. */
47
48 snprintf (size, sizeof (size),
49 "--param l1-cache-size=%u ", level1.sizekb);
50 snprintf (line, sizeof (line),
51 "--param l1-cache-line-size=%u ", level1.line);
52
53 snprintf (size2, sizeof (size2),
54 "--param l2-cache-size=%u ", level2.sizekb);
55
56 return concat (size, line, size2, NULL);
57 }
58
59 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
60
61 static void
62 detect_l2_cache (struct cache_desc *level2)
63 {
64 unsigned eax, ebx, ecx, edx;
65 unsigned assoc;
66
67 __cpuid (0x80000006, eax, ebx, ecx, edx);
68
69 level2->sizekb = (ecx >> 16) & 0xffff;
70 level2->line = ecx & 0xff;
71
72 assoc = (ecx >> 12) & 0xf;
73 if (assoc == 6)
74 assoc = 8;
75 else if (assoc == 8)
76 assoc = 16;
77 else if (assoc >= 0xa && assoc <= 0xc)
78 assoc = 32 + (assoc - 0xa) * 16;
79 else if (assoc >= 0xd && assoc <= 0xe)
80 assoc = 96 + (assoc - 0xd) * 32;
81
82 level2->assoc = assoc;
83 }
84
85 /* Returns the description of caches for an AMD processor. */
86
87 static const char *
88 detect_caches_amd (unsigned max_ext_level)
89 {
90 unsigned eax, ebx, ecx, edx;
91
92 struct cache_desc level1, level2 = {0, 0, 0};
93
94 if (max_ext_level < 0x80000005)
95 return "";
96
97 __cpuid (0x80000005, eax, ebx, ecx, edx);
98
99 level1.sizekb = (ecx >> 24) & 0xff;
100 level1.assoc = (ecx >> 16) & 0xff;
101 level1.line = ecx & 0xff;
102
103 if (max_ext_level >= 0x80000006)
104 detect_l2_cache (&level2);
105
106 return describe_cache (level1, level2);
107 }
108
109 /* Decodes the size, the associativity and the cache line size of
110 L1/L2 caches of an Intel processor. Values are based on
111 "Intel Processor Identification and the CPUID Instruction"
112 [Application Note 485], revision -032, December 2007. */
113
114 static void
115 decode_caches_intel (unsigned reg, bool xeon_mp,
116 struct cache_desc *level1, struct cache_desc *level2)
117 {
118 int i;
119
120 for (i = 24; i >= 0; i -= 8)
121 switch ((reg >> i) & 0xff)
122 {
123 case 0x0a:
124 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125 break;
126 case 0x0c:
127 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128 break;
129 case 0x0d:
130 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131 break;
132 case 0x0e:
133 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134 break;
135 case 0x21:
136 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137 break;
138 case 0x24:
139 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140 break;
141 case 0x2c:
142 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143 break;
144 case 0x39:
145 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146 break;
147 case 0x3a:
148 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149 break;
150 case 0x3b:
151 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152 break;
153 case 0x3c:
154 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155 break;
156 case 0x3d:
157 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158 break;
159 case 0x3e:
160 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161 break;
162 case 0x41:
163 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164 break;
165 case 0x42:
166 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167 break;
168 case 0x43:
169 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170 break;
171 case 0x44:
172 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173 break;
174 case 0x45:
175 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176 break;
177 case 0x48:
178 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179 break;
180 case 0x49:
181 if (xeon_mp)
182 break;
183 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184 break;
185 case 0x4e:
186 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187 break;
188 case 0x60:
189 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190 break;
191 case 0x66:
192 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193 break;
194 case 0x67:
195 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196 break;
197 case 0x68:
198 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199 break;
200 case 0x78:
201 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202 break;
203 case 0x79:
204 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205 break;
206 case 0x7a:
207 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208 break;
209 case 0x7b:
210 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211 break;
212 case 0x7c:
213 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214 break;
215 case 0x7d:
216 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217 break;
218 case 0x7f:
219 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220 break;
221 case 0x80:
222 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223 break;
224 case 0x82:
225 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226 break;
227 case 0x83:
228 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229 break;
230 case 0x84:
231 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232 break;
233 case 0x85:
234 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235 break;
236 case 0x86:
237 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238 break;
239 case 0x87:
240 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
241
242 default:
243 break;
244 }
245 }
246
247 /* Detect cache parameters using CPUID function 2. */
248
249 static void
250 detect_caches_cpuid2 (bool xeon_mp,
251 struct cache_desc *level1, struct cache_desc *level2)
252 {
253 unsigned regs[4];
254 int nreps, i;
255
256 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
257
258 nreps = regs[0] & 0x0f;
259 regs[0] &= ~0x0f;
260
261 while (--nreps >= 0)
262 {
263 for (i = 0; i < 4; i++)
264 if (regs[i] && !((regs[i] >> 31) & 1))
265 decode_caches_intel (regs[i], xeon_mp, level1, level2);
266
267 if (nreps)
268 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
269 }
270 }
271
272 /* Detect cache parameters using CPUID function 4. This
273 method doesn't require hardcoded tables. */
274
275 enum cache_type
276 {
277 CACHE_END = 0,
278 CACHE_DATA = 1,
279 CACHE_INST = 2,
280 CACHE_UNIFIED = 3
281 };
282
283 static void
284 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285 struct cache_desc *level3)
286 {
287 struct cache_desc *cache;
288
289 unsigned eax, ebx, ecx, edx;
290 int count;
291
292 for (count = 0;; count++)
293 {
294 __cpuid_count(4, count, eax, ebx, ecx, edx);
295 switch (eax & 0x1f)
296 {
297 case CACHE_END:
298 return;
299 case CACHE_DATA:
300 case CACHE_UNIFIED:
301 {
302 switch ((eax >> 5) & 0x07)
303 {
304 case 1:
305 cache = level1;
306 break;
307 case 2:
308 cache = level2;
309 break;
310 case 3:
311 cache = level3;
312 break;
313 default:
314 cache = NULL;
315 }
316
317 if (cache)
318 {
319 unsigned sets = ecx + 1;
320 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
321
322 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
323 cache->line = (ebx & 0x0fff) + 1;
324
325 cache->sizekb = (cache->assoc * part
326 * cache->line * sets) / 1024;
327 }
328 }
329 default:
330 break;
331 }
332 }
333 }
334
335 /* Returns the description of caches for an Intel processor. */
336
337 static const char *
338 detect_caches_intel (bool xeon_mp, unsigned max_level,
339 unsigned max_ext_level, unsigned *l2sizekb)
340 {
341 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
342
343 if (max_level >= 4)
344 detect_caches_cpuid4 (&level1, &level2, &level3);
345 else if (max_level >= 2)
346 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347 else
348 return "";
349
350 if (level1.sizekb == 0)
351 return "";
352
353 /* Let the L3 replace the L2. This assumes inclusive caches
354 and single threaded program for now. */
355 if (level3.sizekb)
356 level2 = level3;
357
358 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
359 method if other methods fail to provide L2 cache parameters. */
360 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361 detect_l2_cache (&level2);
362
363 *l2sizekb = level2.sizekb;
364
365 return describe_cache (level1, level2);
366 }
367
368 /* This will be called by the spec parser in gcc.c when it sees
369 a %:local_cpu_detect(args) construct. Currently it will be called
370 with either "arch" or "tune" as argument depending on if -march=native
371 or -mtune=native is to be substituted.
372
373 It returns a string containing new command line parameters to be
374 put at the place of the above two options, depending on what CPU
375 this is executed. E.g. "-march=k8" on an AMD64 machine
376 for -march=native.
377
378 ARGC and ARGV are set depending on the actual arguments given
379 in the spec. */
380
381 const char *host_detect_local_cpu (int argc, const char **argv)
382 {
383 enum processor_type processor = PROCESSOR_I386;
384 const char *cpu = "i386";
385
386 const char *cache = "";
387 const char *options = "";
388
389 unsigned int eax, ebx, ecx, edx;
390
391 unsigned int max_level, ext_level;
392
393 unsigned int vendor;
394 unsigned int model, family;
395
396 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
398
399 /* Extended features */
400 unsigned int has_lahf_lm = 0, has_sse4a = 0;
401 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
402 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
403 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
404 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
405 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
406 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
407 unsigned int has_hle = 0, has_rtm = 0;
408 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
409 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
410 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
411 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
412 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
413 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
414 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
415 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
416 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0;
417 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
418
419 bool arch;
420
421 unsigned int l2sizekb = 0;
422
423 if (argc < 1)
424 return NULL;
425
426 arch = !strcmp (argv[0], "arch");
427
428 if (!arch && strcmp (argv[0], "tune"))
429 return NULL;
430
431 max_level = __get_cpuid_max (0, &vendor);
432 if (max_level < 1)
433 goto done;
434
435 __cpuid (1, eax, ebx, ecx, edx);
436
437 model = (eax >> 4) & 0x0f;
438 family = (eax >> 8) & 0x0f;
439 if (vendor == signature_INTEL_ebx
440 || vendor == signature_AMD_ebx)
441 {
442 unsigned int extended_model, extended_family;
443
444 extended_model = (eax >> 12) & 0xf0;
445 extended_family = (eax >> 20) & 0xff;
446 if (family == 0x0f)
447 {
448 family += extended_family;
449 model += extended_model;
450 }
451 else if (family == 0x06)
452 model += extended_model;
453 }
454
455 has_sse3 = ecx & bit_SSE3;
456 has_ssse3 = ecx & bit_SSSE3;
457 has_sse4_1 = ecx & bit_SSE4_1;
458 has_sse4_2 = ecx & bit_SSE4_2;
459 has_avx = ecx & bit_AVX;
460 has_osxsave = ecx & bit_OSXSAVE;
461 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
462 has_movbe = ecx & bit_MOVBE;
463 has_popcnt = ecx & bit_POPCNT;
464 has_aes = ecx & bit_AES;
465 has_pclmul = ecx & bit_PCLMUL;
466 has_fma = ecx & bit_FMA;
467 has_f16c = ecx & bit_F16C;
468 has_rdrnd = ecx & bit_RDRND;
469 has_xsave = ecx & bit_XSAVE;
470
471 has_cmpxchg8b = edx & bit_CMPXCHG8B;
472 has_cmov = edx & bit_CMOV;
473 has_mmx = edx & bit_MMX;
474 has_fxsr = edx & bit_FXSAVE;
475 has_sse = edx & bit_SSE;
476 has_sse2 = edx & bit_SSE2;
477
478 if (max_level >= 7)
479 {
480 __cpuid_count (7, 0, eax, ebx, ecx, edx);
481
482 has_bmi = ebx & bit_BMI;
483 has_hle = ebx & bit_HLE;
484 has_rtm = ebx & bit_RTM;
485 has_avx2 = ebx & bit_AVX2;
486 has_bmi2 = ebx & bit_BMI2;
487 has_fsgsbase = ebx & bit_FSGSBASE;
488 has_rdseed = ebx & bit_RDSEED;
489 has_adx = ebx & bit_ADX;
490 has_avx512f = ebx & bit_AVX512F;
491 has_avx512er = ebx & bit_AVX512ER;
492 has_avx512pf = ebx & bit_AVX512PF;
493 has_avx512cd = ebx & bit_AVX512CD;
494 has_sha = ebx & bit_SHA;
495 has_clflushopt = ebx & bit_CLFLUSHOPT;
496 has_clwb = ebx & bit_CLWB;
497 has_avx512dq = ebx & bit_AVX512DQ;
498 has_avx512bw = ebx & bit_AVX512BW;
499 has_avx512vl = ebx & bit_AVX512VL;
500 has_avx512ifma = ebx & bit_AVX512IFMA;
501
502 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
503 has_avx512vbmi = ecx & bit_AVX512VBMI;
504 has_pku = ecx & bit_OSPKE;
505 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
506 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
507 }
508
509 if (max_level >= 13)
510 {
511 __cpuid_count (13, 1, eax, ebx, ecx, edx);
512
513 has_xsaveopt = eax & bit_XSAVEOPT;
514 has_xsavec = eax & bit_XSAVEC;
515 has_xsaves = eax & bit_XSAVES;
516 }
517
518 /* Check cpuid level of extended features. */
519 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
520
521 if (ext_level >= 0x80000001)
522 {
523 __cpuid (0x80000001, eax, ebx, ecx, edx);
524
525 has_lahf_lm = ecx & bit_LAHF_LM;
526 has_sse4a = ecx & bit_SSE4a;
527 has_abm = ecx & bit_ABM;
528 has_lwp = ecx & bit_LWP;
529 has_fma4 = ecx & bit_FMA4;
530 has_xop = ecx & bit_XOP;
531 has_tbm = ecx & bit_TBM;
532 has_lzcnt = ecx & bit_LZCNT;
533 has_prfchw = ecx & bit_PRFCHW;
534
535 has_longmode = edx & bit_LM;
536 has_3dnowp = edx & bit_3DNOWP;
537 has_3dnow = edx & bit_3DNOW;
538 has_mwaitx = ecx & bit_MWAITX;
539 }
540
541 if (ext_level >= 0x80000008)
542 {
543 __cpuid (0x80000008, eax, ebx, ecx, edx);
544 has_clzero = ebx & bit_CLZERO;
545 }
546
547 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
548 #define XCR_XFEATURE_ENABLED_MASK 0x0
549 #define XSTATE_FP 0x1
550 #define XSTATE_SSE 0x2
551 #define XSTATE_YMM 0x4
552 #define XSTATE_OPMASK 0x20
553 #define XSTATE_ZMM 0x40
554 #define XSTATE_HI_ZMM 0x80
555
556 #define XCR_AVX_ENABLED_MASK \
557 (XSTATE_SSE | XSTATE_YMM)
558 #define XCR_AVX512F_ENABLED_MASK \
559 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
560
561 if (has_osxsave)
562 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
563 : "=a" (eax), "=d" (edx)
564 : "c" (XCR_XFEATURE_ENABLED_MASK));
565 else
566 eax = 0;
567
568 /* Check if AVX registers are supported. */
569 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
570 {
571 has_avx = 0;
572 has_avx2 = 0;
573 has_fma = 0;
574 has_fma4 = 0;
575 has_f16c = 0;
576 has_xop = 0;
577 has_xsave = 0;
578 has_xsaveopt = 0;
579 has_xsaves = 0;
580 has_xsavec = 0;
581 }
582
583 /* Check if AVX512F registers are supported. */
584 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
585 {
586 has_avx512f = 0;
587 has_avx512er = 0;
588 has_avx512pf = 0;
589 has_avx512cd = 0;
590 has_avx512dq = 0;
591 has_avx512bw = 0;
592 has_avx512vl = 0;
593 }
594
595 if (!arch)
596 {
597 if (vendor == signature_AMD_ebx
598 || vendor == signature_CENTAUR_ebx
599 || vendor == signature_CYRIX_ebx
600 || vendor == signature_NSC_ebx)
601 cache = detect_caches_amd (ext_level);
602 else if (vendor == signature_INTEL_ebx)
603 {
604 bool xeon_mp = (family == 15 && model == 6);
605 cache = detect_caches_intel (xeon_mp, max_level,
606 ext_level, &l2sizekb);
607 }
608 }
609
610 if (vendor == signature_AMD_ebx)
611 {
612 unsigned int name;
613
614 /* Detect geode processor by its processor signature. */
615 if (ext_level >= 0x80000002)
616 __cpuid (0x80000002, name, ebx, ecx, edx);
617 else
618 name = 0;
619
620 if (name == signature_NSC_ebx)
621 processor = PROCESSOR_GEODE;
622 else if (has_movbe && family == 22)
623 processor = PROCESSOR_BTVER2;
624 else if (has_clzero)
625 processor = PROCESSOR_ZNVER1;
626 else if (has_avx2)
627 processor = PROCESSOR_BDVER4;
628 else if (has_xsaveopt)
629 processor = PROCESSOR_BDVER3;
630 else if (has_bmi)
631 processor = PROCESSOR_BDVER2;
632 else if (has_xop)
633 processor = PROCESSOR_BDVER1;
634 else if (has_sse4a && has_ssse3)
635 processor = PROCESSOR_BTVER1;
636 else if (has_sse4a)
637 processor = PROCESSOR_AMDFAM10;
638 else if (has_sse2 || has_longmode)
639 processor = PROCESSOR_K8;
640 else if (has_3dnowp && family == 6)
641 processor = PROCESSOR_ATHLON;
642 else if (has_mmx)
643 processor = PROCESSOR_K6;
644 else
645 processor = PROCESSOR_PENTIUM;
646 }
647 else if (vendor == signature_CENTAUR_ebx)
648 {
649 processor = PROCESSOR_GENERIC;
650
651 switch (family)
652 {
653 default:
654 /* We have no idea. */
655 break;
656
657 case 5:
658 if (has_3dnow || has_mmx)
659 processor = PROCESSOR_I486;
660 break;
661
662 case 6:
663 if (has_longmode)
664 processor = PROCESSOR_K8;
665 else if (model >= 9)
666 processor = PROCESSOR_PENTIUMPRO;
667 else if (model >= 6)
668 processor = PROCESSOR_I486;
669 }
670 }
671 else
672 {
673 switch (family)
674 {
675 case 4:
676 processor = PROCESSOR_I486;
677 break;
678 case 5:
679 processor = PROCESSOR_PENTIUM;
680 break;
681 case 6:
682 processor = PROCESSOR_PENTIUMPRO;
683 break;
684 case 15:
685 processor = PROCESSOR_PENTIUM4;
686 break;
687 default:
688 /* We have no idea. */
689 processor = PROCESSOR_GENERIC;
690 }
691 }
692
693 switch (processor)
694 {
695 case PROCESSOR_I386:
696 /* Default. */
697 break;
698 case PROCESSOR_I486:
699 if (arch && vendor == signature_CENTAUR_ebx)
700 {
701 if (model >= 6)
702 cpu = "c3";
703 else if (has_3dnow)
704 cpu = "winchip2";
705 else
706 /* Assume WinChip C6. */
707 cpu = "winchip-c6";
708 }
709 else
710 cpu = "i486";
711 break;
712 case PROCESSOR_PENTIUM:
713 if (arch && has_mmx)
714 cpu = "pentium-mmx";
715 else
716 cpu = "pentium";
717 break;
718 case PROCESSOR_PENTIUMPRO:
719 switch (model)
720 {
721 case 0x1c:
722 case 0x26:
723 /* Bonnell. */
724 cpu = "bonnell";
725 break;
726 case 0x37:
727 case 0x4a:
728 case 0x4d:
729 case 0x5a:
730 case 0x5d:
731 /* Silvermont. */
732 cpu = "silvermont";
733 break;
734 case 0x0f:
735 /* Merom. */
736 case 0x17:
737 case 0x1d:
738 /* Penryn. */
739 cpu = "core2";
740 break;
741 case 0x1a:
742 case 0x1e:
743 case 0x1f:
744 case 0x2e:
745 /* Nehalem. */
746 cpu = "nehalem";
747 break;
748 case 0x25:
749 case 0x2c:
750 case 0x2f:
751 /* Westmere. */
752 cpu = "westmere";
753 break;
754 case 0x2a:
755 case 0x2d:
756 /* Sandy Bridge. */
757 cpu = "sandybridge";
758 break;
759 case 0x3a:
760 case 0x3e:
761 /* Ivy Bridge. */
762 cpu = "ivybridge";
763 break;
764 case 0x3c:
765 case 0x3f:
766 case 0x45:
767 case 0x46:
768 /* Haswell. */
769 cpu = "haswell";
770 break;
771 case 0x3d:
772 case 0x47:
773 case 0x4f:
774 case 0x56:
775 /* Broadwell. */
776 cpu = "broadwell";
777 break;
778 case 0x4e:
779 case 0x5e:
780 /* Skylake. */
781 cpu = "skylake";
782 break;
783 case 0x57:
784 /* Knights Landing. */
785 cpu = "knl";
786 break;
787 default:
788 if (arch)
789 {
790 /* This is unknown family 0x6 CPU. */
791 /* Assume Knights Landing. */
792 if (has_avx512f)
793 cpu = "knl";
794 /* Assume Broadwell. */
795 else if (has_adx)
796 cpu = "broadwell";
797 else if (has_avx2)
798 /* Assume Haswell. */
799 cpu = "haswell";
800 else if (has_avx)
801 /* Assume Sandy Bridge. */
802 cpu = "sandybridge";
803 else if (has_sse4_2)
804 {
805 if (has_movbe)
806 /* Assume Silvermont. */
807 cpu = "silvermont";
808 else
809 /* Assume Nehalem. */
810 cpu = "nehalem";
811 }
812 else if (has_ssse3)
813 {
814 if (has_movbe)
815 /* Assume Bonnell. */
816 cpu = "bonnell";
817 else
818 /* Assume Core 2. */
819 cpu = "core2";
820 }
821 else if (has_longmode)
822 /* Perhaps some emulator? Assume x86-64, otherwise gcc
823 -march=native would be unusable for 64-bit compilations,
824 as all the CPUs below are 32-bit only. */
825 cpu = "x86-64";
826 else if (has_sse3)
827 {
828 if (vendor == signature_CENTAUR_ebx)
829 /* C7 / Eden "Esther" */
830 cpu = "c7";
831 else
832 /* It is Core Duo. */
833 cpu = "pentium-m";
834 }
835 else if (has_sse2)
836 /* It is Pentium M. */
837 cpu = "pentium-m";
838 else if (has_sse)
839 {
840 if (vendor == signature_CENTAUR_ebx)
841 {
842 if (model >= 9)
843 /* Eden "Nehemiah" */
844 cpu = "nehemiah";
845 else
846 cpu = "c3-2";
847 }
848 else
849 /* It is Pentium III. */
850 cpu = "pentium3";
851 }
852 else if (has_mmx)
853 /* It is Pentium II. */
854 cpu = "pentium2";
855 else
856 /* Default to Pentium Pro. */
857 cpu = "pentiumpro";
858 }
859 else
860 /* For -mtune, we default to -mtune=generic. */
861 cpu = "generic";
862 break;
863 }
864 break;
865 case PROCESSOR_PENTIUM4:
866 if (has_sse3)
867 {
868 if (has_longmode)
869 cpu = "nocona";
870 else
871 cpu = "prescott";
872 }
873 else
874 cpu = "pentium4";
875 break;
876 case PROCESSOR_GEODE:
877 cpu = "geode";
878 break;
879 case PROCESSOR_K6:
880 if (arch && has_3dnow)
881 cpu = "k6-3";
882 else
883 cpu = "k6";
884 break;
885 case PROCESSOR_ATHLON:
886 if (arch && has_sse)
887 cpu = "athlon-4";
888 else
889 cpu = "athlon";
890 break;
891 case PROCESSOR_K8:
892 if (arch)
893 {
894 if (vendor == signature_CENTAUR_ebx)
895 {
896 if (has_sse4_1)
897 /* Nano 3000 | Nano dual / quad core | Eden X4 */
898 cpu = "nano-3000";
899 else if (has_ssse3)
900 /* Nano 1000 | Nano 2000 */
901 cpu = "nano";
902 else if (has_sse3)
903 /* Eden X2 */
904 cpu = "eden-x2";
905 else
906 /* Default to k8 */
907 cpu = "k8";
908 }
909 else if (has_sse3)
910 cpu = "k8-sse3";
911 else
912 cpu = "k8";
913 }
914 else
915 /* For -mtune, we default to -mtune=k8 */
916 cpu = "k8";
917 break;
918 case PROCESSOR_AMDFAM10:
919 cpu = "amdfam10";
920 break;
921 case PROCESSOR_BDVER1:
922 cpu = "bdver1";
923 break;
924 case PROCESSOR_BDVER2:
925 cpu = "bdver2";
926 break;
927 case PROCESSOR_BDVER3:
928 cpu = "bdver3";
929 break;
930 case PROCESSOR_BDVER4:
931 cpu = "bdver4";
932 break;
933 case PROCESSOR_ZNVER1:
934 cpu = "znver1";
935 break;
936 case PROCESSOR_BTVER1:
937 cpu = "btver1";
938 break;
939 case PROCESSOR_BTVER2:
940 cpu = "btver2";
941 break;
942
943 default:
944 /* Use something reasonable. */
945 if (arch)
946 {
947 if (has_ssse3)
948 cpu = "core2";
949 else if (has_sse3)
950 {
951 if (has_longmode)
952 cpu = "nocona";
953 else
954 cpu = "prescott";
955 }
956 else if (has_longmode)
957 /* Perhaps some emulator? Assume x86-64, otherwise gcc
958 -march=native would be unusable for 64-bit compilations,
959 as all the CPUs below are 32-bit only. */
960 cpu = "x86-64";
961 else if (has_sse2)
962 cpu = "pentium4";
963 else if (has_cmov)
964 cpu = "pentiumpro";
965 else if (has_mmx)
966 cpu = "pentium-mmx";
967 else if (has_cmpxchg8b)
968 cpu = "pentium";
969 }
970 else
971 cpu = "generic";
972 }
973
974 if (arch)
975 {
976 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
977 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
978 const char *sse = has_sse ? " -msse" : " -mno-sse";
979 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
980 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
981 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
982 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
983 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
984 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
985 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
986 const char *aes = has_aes ? " -maes" : " -mno-aes";
987 const char *sha = has_sha ? " -msha" : " -mno-sha";
988 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
989 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
990 const char *abm = has_abm ? " -mabm" : " -mno-abm";
991 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
992 const char *fma = has_fma ? " -mfma" : " -mno-fma";
993 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
994 const char *xop = has_xop ? " -mxop" : " -mno-xop";
995 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
996 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
997 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
998 const char *avx = has_avx ? " -mavx" : " -mno-avx";
999 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1000 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1001 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1002 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1003 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1004 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1005 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1006 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1007 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1008 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1009 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1010 const char *adx = has_adx ? " -madx" : " -mno-adx";
1011 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1012 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1013 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1014 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1015 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1016 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1017 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1018 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1019 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1020 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1021 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1022 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1023 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1024 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1025 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1026 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1027 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1028 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1029 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1030 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1031 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1032 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1033 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1034 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1035 popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
1036 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1037 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1038 fxsr, xsave, xsaveopt, avx512f, avx512er,
1039 avx512cd, avx512pf, prefetchwt1, clflushopt,
1040 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1041 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1042 clwb, mwaitx, clzero, pku, NULL);
1043 }
1044
1045 done:
1046 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1047 }
1048 #else
1049
1050 /* If we are compiling with GCC where %EBX register is fixed, then the
1051 driver will just ignore -march and -mtune "native" target and will leave
1052 to the newly built compiler to generate code for its default target. */
1053
1054 const char *host_detect_local_cpu (int, const char **)
1055 {
1056 return NULL;
1057 }
1058 #endif /* __GNUC__ */