]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/driver-i386.c
Enable VAES support [1/5]
[thirdparty/gcc.git] / gcc / config / i386 / driver-i386.c
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24
25 const char *host_detect_local_cpu (int argc, const char **argv);
26
27 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
28 #include "cpuid.h"
29
30 struct cache_desc
31 {
32 unsigned sizekb;
33 unsigned assoc;
34 unsigned line;
35 };
36
37 /* Returns command line parameters that describe size and
38 cache line size of the processor caches. */
39
40 static char *
41 describe_cache (struct cache_desc level1, struct cache_desc level2)
42 {
43 char size[100], line[100], size2[100];
44
45 /* At the moment, gcc does not use the information
46 about the associativity of the cache. */
47
48 snprintf (size, sizeof (size),
49 "--param l1-cache-size=%u ", level1.sizekb);
50 snprintf (line, sizeof (line),
51 "--param l1-cache-line-size=%u ", level1.line);
52
53 snprintf (size2, sizeof (size2),
54 "--param l2-cache-size=%u ", level2.sizekb);
55
56 return concat (size, line, size2, NULL);
57 }
58
59 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
60
61 static void
62 detect_l2_cache (struct cache_desc *level2)
63 {
64 unsigned eax, ebx, ecx, edx;
65 unsigned assoc;
66
67 __cpuid (0x80000006, eax, ebx, ecx, edx);
68
69 level2->sizekb = (ecx >> 16) & 0xffff;
70 level2->line = ecx & 0xff;
71
72 assoc = (ecx >> 12) & 0xf;
73 if (assoc == 6)
74 assoc = 8;
75 else if (assoc == 8)
76 assoc = 16;
77 else if (assoc >= 0xa && assoc <= 0xc)
78 assoc = 32 + (assoc - 0xa) * 16;
79 else if (assoc >= 0xd && assoc <= 0xe)
80 assoc = 96 + (assoc - 0xd) * 32;
81
82 level2->assoc = assoc;
83 }
84
85 /* Returns the description of caches for an AMD processor. */
86
87 static const char *
88 detect_caches_amd (unsigned max_ext_level)
89 {
90 unsigned eax, ebx, ecx, edx;
91
92 struct cache_desc level1, level2 = {0, 0, 0};
93
94 if (max_ext_level < 0x80000005)
95 return "";
96
97 __cpuid (0x80000005, eax, ebx, ecx, edx);
98
99 level1.sizekb = (ecx >> 24) & 0xff;
100 level1.assoc = (ecx >> 16) & 0xff;
101 level1.line = ecx & 0xff;
102
103 if (max_ext_level >= 0x80000006)
104 detect_l2_cache (&level2);
105
106 return describe_cache (level1, level2);
107 }
108
109 /* Decodes the size, the associativity and the cache line size of
110 L1/L2 caches of an Intel processor. Values are based on
111 "Intel Processor Identification and the CPUID Instruction"
112 [Application Note 485], revision -032, December 2007. */
113
114 static void
115 decode_caches_intel (unsigned reg, bool xeon_mp,
116 struct cache_desc *level1, struct cache_desc *level2)
117 {
118 int i;
119
120 for (i = 24; i >= 0; i -= 8)
121 switch ((reg >> i) & 0xff)
122 {
123 case 0x0a:
124 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125 break;
126 case 0x0c:
127 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128 break;
129 case 0x0d:
130 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131 break;
132 case 0x0e:
133 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134 break;
135 case 0x21:
136 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137 break;
138 case 0x24:
139 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140 break;
141 case 0x2c:
142 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143 break;
144 case 0x39:
145 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146 break;
147 case 0x3a:
148 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149 break;
150 case 0x3b:
151 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152 break;
153 case 0x3c:
154 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155 break;
156 case 0x3d:
157 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158 break;
159 case 0x3e:
160 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161 break;
162 case 0x41:
163 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164 break;
165 case 0x42:
166 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167 break;
168 case 0x43:
169 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170 break;
171 case 0x44:
172 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173 break;
174 case 0x45:
175 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176 break;
177 case 0x48:
178 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179 break;
180 case 0x49:
181 if (xeon_mp)
182 break;
183 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184 break;
185 case 0x4e:
186 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187 break;
188 case 0x60:
189 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190 break;
191 case 0x66:
192 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193 break;
194 case 0x67:
195 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196 break;
197 case 0x68:
198 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199 break;
200 case 0x78:
201 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202 break;
203 case 0x79:
204 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205 break;
206 case 0x7a:
207 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208 break;
209 case 0x7b:
210 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211 break;
212 case 0x7c:
213 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214 break;
215 case 0x7d:
216 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217 break;
218 case 0x7f:
219 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220 break;
221 case 0x80:
222 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223 break;
224 case 0x82:
225 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226 break;
227 case 0x83:
228 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229 break;
230 case 0x84:
231 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232 break;
233 case 0x85:
234 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235 break;
236 case 0x86:
237 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238 break;
239 case 0x87:
240 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
241
242 default:
243 break;
244 }
245 }
246
247 /* Detect cache parameters using CPUID function 2. */
248
249 static void
250 detect_caches_cpuid2 (bool xeon_mp,
251 struct cache_desc *level1, struct cache_desc *level2)
252 {
253 unsigned regs[4];
254 int nreps, i;
255
256 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
257
258 nreps = regs[0] & 0x0f;
259 regs[0] &= ~0x0f;
260
261 while (--nreps >= 0)
262 {
263 for (i = 0; i < 4; i++)
264 if (regs[i] && !((regs[i] >> 31) & 1))
265 decode_caches_intel (regs[i], xeon_mp, level1, level2);
266
267 if (nreps)
268 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
269 }
270 }
271
272 /* Detect cache parameters using CPUID function 4. This
273 method doesn't require hardcoded tables. */
274
275 enum cache_type
276 {
277 CACHE_END = 0,
278 CACHE_DATA = 1,
279 CACHE_INST = 2,
280 CACHE_UNIFIED = 3
281 };
282
283 static void
284 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285 struct cache_desc *level3)
286 {
287 struct cache_desc *cache;
288
289 unsigned eax, ebx, ecx, edx;
290 int count;
291
292 for (count = 0;; count++)
293 {
294 __cpuid_count(4, count, eax, ebx, ecx, edx);
295 switch (eax & 0x1f)
296 {
297 case CACHE_END:
298 return;
299 case CACHE_DATA:
300 case CACHE_UNIFIED:
301 {
302 switch ((eax >> 5) & 0x07)
303 {
304 case 1:
305 cache = level1;
306 break;
307 case 2:
308 cache = level2;
309 break;
310 case 3:
311 cache = level3;
312 break;
313 default:
314 cache = NULL;
315 }
316
317 if (cache)
318 {
319 unsigned sets = ecx + 1;
320 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
321
322 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
323 cache->line = (ebx & 0x0fff) + 1;
324
325 cache->sizekb = (cache->assoc * part
326 * cache->line * sets) / 1024;
327 }
328 }
329 default:
330 break;
331 }
332 }
333 }
334
335 /* Returns the description of caches for an Intel processor. */
336
337 static const char *
338 detect_caches_intel (bool xeon_mp, unsigned max_level,
339 unsigned max_ext_level, unsigned *l2sizekb)
340 {
341 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
342
343 if (max_level >= 4)
344 detect_caches_cpuid4 (&level1, &level2, &level3);
345 else if (max_level >= 2)
346 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347 else
348 return "";
349
350 if (level1.sizekb == 0)
351 return "";
352
353 /* Let the L3 replace the L2. This assumes inclusive caches
354 and single threaded program for now. */
355 if (level3.sizekb)
356 level2 = level3;
357
358 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
359 method if other methods fail to provide L2 cache parameters. */
360 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361 detect_l2_cache (&level2);
362
363 *l2sizekb = level2.sizekb;
364
365 return describe_cache (level1, level2);
366 }
367
368 /* This will be called by the spec parser in gcc.c when it sees
369 a %:local_cpu_detect(args) construct. Currently it will be called
370 with either "arch" or "tune" as argument depending on if -march=native
371 or -mtune=native is to be substituted.
372
373 It returns a string containing new command line parameters to be
374 put at the place of the above two options, depending on what CPU
375 this is executed. E.g. "-march=k8" on an AMD64 machine
376 for -march=native.
377
378 ARGC and ARGV are set depending on the actual arguments given
379 in the spec. */
380
381 const char *host_detect_local_cpu (int argc, const char **argv)
382 {
383 enum processor_type processor = PROCESSOR_I386;
384 const char *cpu = "i386";
385
386 const char *cache = "";
387 const char *options = "";
388
389 unsigned int eax, ebx, ecx, edx;
390
391 unsigned int max_level, ext_level;
392
393 unsigned int vendor;
394 unsigned int model, family;
395
396 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
398
399 /* Extended features */
400 unsigned int has_lahf_lm = 0, has_sse4a = 0;
401 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
402 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
403 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
404 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
405 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
406 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
407 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
408 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
409 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
410 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
411 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
412 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
413 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
414 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
415 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
416 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
417 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
418 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
419 unsigned int has_ibt = 0, has_shstk = 0;
420 unsigned int has_avx512vnni = 0, has_vaes = 0;
421
422 bool arch;
423
424 unsigned int l2sizekb = 0;
425
426 if (argc < 1)
427 return NULL;
428
429 arch = !strcmp (argv[0], "arch");
430
431 if (!arch && strcmp (argv[0], "tune"))
432 return NULL;
433
434 max_level = __get_cpuid_max (0, &vendor);
435 if (max_level < 1)
436 goto done;
437
438 __cpuid (1, eax, ebx, ecx, edx);
439
440 model = (eax >> 4) & 0x0f;
441 family = (eax >> 8) & 0x0f;
442 if (vendor == signature_INTEL_ebx
443 || vendor == signature_AMD_ebx)
444 {
445 unsigned int extended_model, extended_family;
446
447 extended_model = (eax >> 12) & 0xf0;
448 extended_family = (eax >> 20) & 0xff;
449 if (family == 0x0f)
450 {
451 family += extended_family;
452 model += extended_model;
453 }
454 else if (family == 0x06)
455 model += extended_model;
456 }
457
458 has_sse3 = ecx & bit_SSE3;
459 has_ssse3 = ecx & bit_SSSE3;
460 has_sse4_1 = ecx & bit_SSE4_1;
461 has_sse4_2 = ecx & bit_SSE4_2;
462 has_avx = ecx & bit_AVX;
463 has_osxsave = ecx & bit_OSXSAVE;
464 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
465 has_movbe = ecx & bit_MOVBE;
466 has_popcnt = ecx & bit_POPCNT;
467 has_aes = ecx & bit_AES;
468 has_pclmul = ecx & bit_PCLMUL;
469 has_fma = ecx & bit_FMA;
470 has_f16c = ecx & bit_F16C;
471 has_rdrnd = ecx & bit_RDRND;
472 has_xsave = ecx & bit_XSAVE;
473
474 has_cmpxchg8b = edx & bit_CMPXCHG8B;
475 has_cmov = edx & bit_CMOV;
476 has_mmx = edx & bit_MMX;
477 has_fxsr = edx & bit_FXSAVE;
478 has_sse = edx & bit_SSE;
479 has_sse2 = edx & bit_SSE2;
480
481 if (max_level >= 7)
482 {
483 __cpuid_count (7, 0, eax, ebx, ecx, edx);
484
485 has_bmi = ebx & bit_BMI;
486 has_sgx = ebx & bit_SGX;
487 has_hle = ebx & bit_HLE;
488 has_rtm = ebx & bit_RTM;
489 has_avx2 = ebx & bit_AVX2;
490 has_bmi2 = ebx & bit_BMI2;
491 has_fsgsbase = ebx & bit_FSGSBASE;
492 has_rdseed = ebx & bit_RDSEED;
493 has_adx = ebx & bit_ADX;
494 has_avx512f = ebx & bit_AVX512F;
495 has_avx512er = ebx & bit_AVX512ER;
496 has_avx512pf = ebx & bit_AVX512PF;
497 has_avx512cd = ebx & bit_AVX512CD;
498 has_sha = ebx & bit_SHA;
499 has_clflushopt = ebx & bit_CLFLUSHOPT;
500 has_clwb = ebx & bit_CLWB;
501 has_avx512dq = ebx & bit_AVX512DQ;
502 has_avx512bw = ebx & bit_AVX512BW;
503 has_avx512vl = ebx & bit_AVX512VL;
504 has_avx512ifma = ebx & bit_AVX512IFMA;
505
506 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
507 has_avx512vbmi = ecx & bit_AVX512VBMI;
508 has_pku = ecx & bit_OSPKE;
509 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
510 has_avx512vnni = ecx & bit_AVX512VNNI;
511 has_rdpid = ecx & bit_RDPID;
512 has_gfni = ecx & bit_GFNI;
513 has_vaes = ecx & bit_VAES;
514
515 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
516 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
517
518 has_shstk = ecx & bit_SHSTK;
519 has_ibt = edx & bit_IBT;
520 }
521
522 if (max_level >= 13)
523 {
524 __cpuid_count (13, 1, eax, ebx, ecx, edx);
525
526 has_xsaveopt = eax & bit_XSAVEOPT;
527 has_xsavec = eax & bit_XSAVEC;
528 has_xsaves = eax & bit_XSAVES;
529 }
530
531 /* Check cpuid level of extended features. */
532 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
533
534 if (ext_level >= 0x80000001)
535 {
536 __cpuid (0x80000001, eax, ebx, ecx, edx);
537
538 has_lahf_lm = ecx & bit_LAHF_LM;
539 has_sse4a = ecx & bit_SSE4a;
540 has_abm = ecx & bit_ABM;
541 has_lwp = ecx & bit_LWP;
542 has_fma4 = ecx & bit_FMA4;
543 has_xop = ecx & bit_XOP;
544 has_tbm = ecx & bit_TBM;
545 has_lzcnt = ecx & bit_LZCNT;
546 has_prfchw = ecx & bit_PRFCHW;
547
548 has_longmode = edx & bit_LM;
549 has_3dnowp = edx & bit_3DNOWP;
550 has_3dnow = edx & bit_3DNOW;
551 has_mwaitx = ecx & bit_MWAITX;
552 }
553
554 if (ext_level >= 0x80000008)
555 {
556 __cpuid (0x80000008, eax, ebx, ecx, edx);
557 has_clzero = ebx & bit_CLZERO;
558 }
559
560 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
561 #define XCR_XFEATURE_ENABLED_MASK 0x0
562 #define XSTATE_FP 0x1
563 #define XSTATE_SSE 0x2
564 #define XSTATE_YMM 0x4
565 #define XSTATE_OPMASK 0x20
566 #define XSTATE_ZMM 0x40
567 #define XSTATE_HI_ZMM 0x80
568
569 #define XCR_AVX_ENABLED_MASK \
570 (XSTATE_SSE | XSTATE_YMM)
571 #define XCR_AVX512F_ENABLED_MASK \
572 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
573
574 if (has_osxsave)
575 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
576 : "=a" (eax), "=d" (edx)
577 : "c" (XCR_XFEATURE_ENABLED_MASK));
578 else
579 eax = 0;
580
581 /* Check if AVX registers are supported. */
582 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
583 {
584 has_avx = 0;
585 has_avx2 = 0;
586 has_fma = 0;
587 has_fma4 = 0;
588 has_f16c = 0;
589 has_xop = 0;
590 has_xsave = 0;
591 has_xsaveopt = 0;
592 has_xsaves = 0;
593 has_xsavec = 0;
594 }
595
596 /* Check if AVX512F registers are supported. */
597 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
598 {
599 has_avx512f = 0;
600 has_avx512er = 0;
601 has_avx512pf = 0;
602 has_avx512cd = 0;
603 has_avx512dq = 0;
604 has_avx512bw = 0;
605 has_avx512vl = 0;
606 }
607
608 if (!arch)
609 {
610 if (vendor == signature_AMD_ebx
611 || vendor == signature_CENTAUR_ebx
612 || vendor == signature_CYRIX_ebx
613 || vendor == signature_NSC_ebx)
614 cache = detect_caches_amd (ext_level);
615 else if (vendor == signature_INTEL_ebx)
616 {
617 bool xeon_mp = (family == 15 && model == 6);
618 cache = detect_caches_intel (xeon_mp, max_level,
619 ext_level, &l2sizekb);
620 }
621 }
622
623 if (vendor == signature_AMD_ebx)
624 {
625 unsigned int name;
626
627 /* Detect geode processor by its processor signature. */
628 if (ext_level >= 0x80000002)
629 __cpuid (0x80000002, name, ebx, ecx, edx);
630 else
631 name = 0;
632
633 if (name == signature_NSC_ebx)
634 processor = PROCESSOR_GEODE;
635 else if (has_movbe && family == 22)
636 processor = PROCESSOR_BTVER2;
637 else if (has_clzero)
638 processor = PROCESSOR_ZNVER1;
639 else if (has_avx2)
640 processor = PROCESSOR_BDVER4;
641 else if (has_xsaveopt)
642 processor = PROCESSOR_BDVER3;
643 else if (has_bmi)
644 processor = PROCESSOR_BDVER2;
645 else if (has_xop)
646 processor = PROCESSOR_BDVER1;
647 else if (has_sse4a && has_ssse3)
648 processor = PROCESSOR_BTVER1;
649 else if (has_sse4a)
650 processor = PROCESSOR_AMDFAM10;
651 else if (has_sse2 || has_longmode)
652 processor = PROCESSOR_K8;
653 else if (has_3dnowp && family == 6)
654 processor = PROCESSOR_ATHLON;
655 else if (has_mmx)
656 processor = PROCESSOR_K6;
657 else
658 processor = PROCESSOR_PENTIUM;
659 }
660 else if (vendor == signature_CENTAUR_ebx)
661 {
662 processor = PROCESSOR_GENERIC;
663
664 switch (family)
665 {
666 default:
667 /* We have no idea. */
668 break;
669
670 case 5:
671 if (has_3dnow || has_mmx)
672 processor = PROCESSOR_I486;
673 break;
674
675 case 6:
676 if (has_longmode)
677 processor = PROCESSOR_K8;
678 else if (model >= 9)
679 processor = PROCESSOR_PENTIUMPRO;
680 else if (model >= 6)
681 processor = PROCESSOR_I486;
682 }
683 }
684 else
685 {
686 switch (family)
687 {
688 case 4:
689 processor = PROCESSOR_I486;
690 break;
691 case 5:
692 processor = PROCESSOR_PENTIUM;
693 break;
694 case 6:
695 processor = PROCESSOR_PENTIUMPRO;
696 break;
697 case 15:
698 processor = PROCESSOR_PENTIUM4;
699 break;
700 default:
701 /* We have no idea. */
702 processor = PROCESSOR_GENERIC;
703 }
704 }
705
706 switch (processor)
707 {
708 case PROCESSOR_I386:
709 /* Default. */
710 break;
711 case PROCESSOR_I486:
712 if (arch && vendor == signature_CENTAUR_ebx)
713 {
714 if (model >= 6)
715 cpu = "c3";
716 else if (has_3dnow)
717 cpu = "winchip2";
718 else
719 /* Assume WinChip C6. */
720 cpu = "winchip-c6";
721 }
722 else
723 cpu = "i486";
724 break;
725 case PROCESSOR_PENTIUM:
726 if (arch && has_mmx)
727 cpu = "pentium-mmx";
728 else
729 cpu = "pentium";
730 break;
731 case PROCESSOR_PENTIUMPRO:
732 switch (model)
733 {
734 case 0x1c:
735 case 0x26:
736 /* Bonnell. */
737 cpu = "bonnell";
738 break;
739 case 0x37:
740 case 0x4a:
741 case 0x4d:
742 case 0x5a:
743 case 0x5d:
744 /* Silvermont. */
745 cpu = "silvermont";
746 break;
747 case 0x0f:
748 /* Merom. */
749 case 0x17:
750 case 0x1d:
751 /* Penryn. */
752 cpu = "core2";
753 break;
754 case 0x1a:
755 case 0x1e:
756 case 0x1f:
757 case 0x2e:
758 /* Nehalem. */
759 cpu = "nehalem";
760 break;
761 case 0x25:
762 case 0x2c:
763 case 0x2f:
764 /* Westmere. */
765 cpu = "westmere";
766 break;
767 case 0x2a:
768 case 0x2d:
769 /* Sandy Bridge. */
770 cpu = "sandybridge";
771 break;
772 case 0x3a:
773 case 0x3e:
774 /* Ivy Bridge. */
775 cpu = "ivybridge";
776 break;
777 case 0x3c:
778 case 0x3f:
779 case 0x45:
780 case 0x46:
781 /* Haswell. */
782 cpu = "haswell";
783 break;
784 case 0x3d:
785 case 0x47:
786 case 0x4f:
787 case 0x56:
788 /* Broadwell. */
789 cpu = "broadwell";
790 break;
791 case 0x4e:
792 case 0x5e:
793 /* Skylake. */
794 case 0x8e:
795 case 0x9e:
796 /* Kaby Lake. */
797 cpu = "skylake";
798 break;
799 case 0x55:
800 /* Skylake with AVX-512. */
801 cpu = "skylake-avx512";
802 break;
803 case 0x57:
804 /* Knights Landing. */
805 cpu = "knl";
806 break;
807 case 0x66:
808 /* Cannon Lake. */
809 cpu = "cannonlake";
810 break;
811 case 0x85:
812 /* Knights Mill. */
813 cpu = "knm";
814 break;
815 default:
816 if (arch)
817 {
818 /* This is unknown family 0x6 CPU. */
819 /* Assume Cannon Lake. */
820 if (has_avx512vbmi)
821 cpu = "cannonlake";
822 /* Assume Knights Mill. */
823 else if (has_avx5124vnniw)
824 cpu = "knm";
825 /* Assume Knights Landing. */
826 else if (has_avx512er)
827 cpu = "knl";
828 /* Assume Skylake with AVX-512. */
829 else if (has_avx512f)
830 cpu = "skylake-avx512";
831 /* Assume Skylake. */
832 else if (has_clflushopt)
833 cpu = "skylake";
834 /* Assume Broadwell. */
835 else if (has_adx)
836 cpu = "broadwell";
837 else if (has_avx2)
838 /* Assume Haswell. */
839 cpu = "haswell";
840 else if (has_avx)
841 /* Assume Sandy Bridge. */
842 cpu = "sandybridge";
843 else if (has_sse4_2)
844 {
845 if (has_movbe)
846 /* Assume Silvermont. */
847 cpu = "silvermont";
848 else
849 /* Assume Nehalem. */
850 cpu = "nehalem";
851 }
852 else if (has_ssse3)
853 {
854 if (has_movbe)
855 /* Assume Bonnell. */
856 cpu = "bonnell";
857 else
858 /* Assume Core 2. */
859 cpu = "core2";
860 }
861 else if (has_longmode)
862 /* Perhaps some emulator? Assume x86-64, otherwise gcc
863 -march=native would be unusable for 64-bit compilations,
864 as all the CPUs below are 32-bit only. */
865 cpu = "x86-64";
866 else if (has_sse3)
867 {
868 if (vendor == signature_CENTAUR_ebx)
869 /* C7 / Eden "Esther" */
870 cpu = "c7";
871 else
872 /* It is Core Duo. */
873 cpu = "pentium-m";
874 }
875 else if (has_sse2)
876 /* It is Pentium M. */
877 cpu = "pentium-m";
878 else if (has_sse)
879 {
880 if (vendor == signature_CENTAUR_ebx)
881 {
882 if (model >= 9)
883 /* Eden "Nehemiah" */
884 cpu = "nehemiah";
885 else
886 cpu = "c3-2";
887 }
888 else
889 /* It is Pentium III. */
890 cpu = "pentium3";
891 }
892 else if (has_mmx)
893 /* It is Pentium II. */
894 cpu = "pentium2";
895 else
896 /* Default to Pentium Pro. */
897 cpu = "pentiumpro";
898 }
899 else
900 /* For -mtune, we default to -mtune=generic. */
901 cpu = "generic";
902 break;
903 }
904 break;
905 case PROCESSOR_PENTIUM4:
906 if (has_sse3)
907 {
908 if (has_longmode)
909 cpu = "nocona";
910 else
911 cpu = "prescott";
912 }
913 else
914 cpu = "pentium4";
915 break;
916 case PROCESSOR_GEODE:
917 cpu = "geode";
918 break;
919 case PROCESSOR_K6:
920 if (arch && has_3dnow)
921 cpu = "k6-3";
922 else
923 cpu = "k6";
924 break;
925 case PROCESSOR_ATHLON:
926 if (arch && has_sse)
927 cpu = "athlon-4";
928 else
929 cpu = "athlon";
930 break;
931 case PROCESSOR_K8:
932 if (arch)
933 {
934 if (vendor == signature_CENTAUR_ebx)
935 {
936 if (has_sse4_1)
937 /* Nano 3000 | Nano dual / quad core | Eden X4 */
938 cpu = "nano-3000";
939 else if (has_ssse3)
940 /* Nano 1000 | Nano 2000 */
941 cpu = "nano";
942 else if (has_sse3)
943 /* Eden X2 */
944 cpu = "eden-x2";
945 else
946 /* Default to k8 */
947 cpu = "k8";
948 }
949 else if (has_sse3)
950 cpu = "k8-sse3";
951 else
952 cpu = "k8";
953 }
954 else
955 /* For -mtune, we default to -mtune=k8 */
956 cpu = "k8";
957 break;
958 case PROCESSOR_AMDFAM10:
959 cpu = "amdfam10";
960 break;
961 case PROCESSOR_BDVER1:
962 cpu = "bdver1";
963 break;
964 case PROCESSOR_BDVER2:
965 cpu = "bdver2";
966 break;
967 case PROCESSOR_BDVER3:
968 cpu = "bdver3";
969 break;
970 case PROCESSOR_BDVER4:
971 cpu = "bdver4";
972 break;
973 case PROCESSOR_ZNVER1:
974 cpu = "znver1";
975 break;
976 case PROCESSOR_BTVER1:
977 cpu = "btver1";
978 break;
979 case PROCESSOR_BTVER2:
980 cpu = "btver2";
981 break;
982
983 default:
984 /* Use something reasonable. */
985 if (arch)
986 {
987 if (has_ssse3)
988 cpu = "core2";
989 else if (has_sse3)
990 {
991 if (has_longmode)
992 cpu = "nocona";
993 else
994 cpu = "prescott";
995 }
996 else if (has_longmode)
997 /* Perhaps some emulator? Assume x86-64, otherwise gcc
998 -march=native would be unusable for 64-bit compilations,
999 as all the CPUs below are 32-bit only. */
1000 cpu = "x86-64";
1001 else if (has_sse2)
1002 cpu = "pentium4";
1003 else if (has_cmov)
1004 cpu = "pentiumpro";
1005 else if (has_mmx)
1006 cpu = "pentium-mmx";
1007 else if (has_cmpxchg8b)
1008 cpu = "pentium";
1009 }
1010 else
1011 cpu = "generic";
1012 }
1013
1014 if (arch)
1015 {
1016 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1017 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1018 const char *sse = has_sse ? " -msse" : " -mno-sse";
1019 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1020 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1021 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1022 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1023 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1024 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1025 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1026 const char *aes = has_aes ? " -maes" : " -mno-aes";
1027 const char *sha = has_sha ? " -msha" : " -mno-sha";
1028 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1029 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1030 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1031 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1032 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1033 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1034 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1035 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1036 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1037 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1038 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1039 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1040 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1041 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1042 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1043 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1044 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1045 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1046 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1047 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1048 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1049 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1050 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1051 const char *adx = has_adx ? " -madx" : " -mno-adx";
1052 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1053 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1054 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1055 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1056 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1057 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1058 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1059 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1060 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1061 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1062 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1063 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1064 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1065 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1066 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1067 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1068 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1069 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1070 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1071 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1072 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1073 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1074 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1075 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1076 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1077 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1078 const char *ibt = has_ibt ? " -mibt" : " -mno-ibt";
1079 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1080 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1081 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1082 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1083 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1084 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1085 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1086 fxsr, xsave, xsaveopt, avx512f, avx512er,
1087 avx512cd, avx512pf, prefetchwt1, clflushopt,
1088 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1089 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1090 clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
1091 avx512vbmi2, avx512vnni, vaes, NULL);
1092 }
1093
1094 done:
1095 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1096 }
1097 #else
1098
1099 /* If we are compiling with GCC where %EBX register is fixed, then the
1100 driver will just ignore -march and -mtune "native" target and will leave
1101 to the newly built compiler to generate code for its default target. */
1102
1103 const char *host_detect_local_cpu (int, const char **)
1104 {
1105 return NULL;
1106 }
1107 #endif /* __GNUC__ */