]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/driver-i386.c
Add PTWRITE builtins for x86
[thirdparty/gcc.git] / gcc / config / i386 / driver-i386.c
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26
27 const char *host_detect_local_cpu (int argc, const char **argv);
28
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30 #include "cpuid.h"
31
32 struct cache_desc
33 {
34 unsigned sizekb;
35 unsigned assoc;
36 unsigned line;
37 };
38
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
41
42 static char *
43 describe_cache (struct cache_desc level1, struct cache_desc level2)
44 {
45 char size[100], line[100], size2[100];
46
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
49
50 snprintf (size, sizeof (size),
51 "--param l1-cache-size=%u ", level1.sizekb);
52 snprintf (line, sizeof (line),
53 "--param l1-cache-line-size=%u ", level1.line);
54
55 snprintf (size2, sizeof (size2),
56 "--param l2-cache-size=%u ", level2.sizekb);
57
58 return concat (size, line, size2, NULL);
59 }
60
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
62
63 static void
64 detect_l2_cache (struct cache_desc *level2)
65 {
66 unsigned eax, ebx, ecx, edx;
67 unsigned assoc;
68
69 __cpuid (0x80000006, eax, ebx, ecx, edx);
70
71 level2->sizekb = (ecx >> 16) & 0xffff;
72 level2->line = ecx & 0xff;
73
74 assoc = (ecx >> 12) & 0xf;
75 if (assoc == 6)
76 assoc = 8;
77 else if (assoc == 8)
78 assoc = 16;
79 else if (assoc >= 0xa && assoc <= 0xc)
80 assoc = 32 + (assoc - 0xa) * 16;
81 else if (assoc >= 0xd && assoc <= 0xe)
82 assoc = 96 + (assoc - 0xd) * 32;
83
84 level2->assoc = assoc;
85 }
86
87 /* Returns the description of caches for an AMD processor. */
88
89 static const char *
90 detect_caches_amd (unsigned max_ext_level)
91 {
92 unsigned eax, ebx, ecx, edx;
93
94 struct cache_desc level1, level2 = {0, 0, 0};
95
96 if (max_ext_level < 0x80000005)
97 return "";
98
99 __cpuid (0x80000005, eax, ebx, ecx, edx);
100
101 level1.sizekb = (ecx >> 24) & 0xff;
102 level1.assoc = (ecx >> 16) & 0xff;
103 level1.line = ecx & 0xff;
104
105 if (max_ext_level >= 0x80000006)
106 detect_l2_cache (&level2);
107
108 return describe_cache (level1, level2);
109 }
110
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
115
116 static void
117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 struct cache_desc *level1, struct cache_desc *level2)
119 {
120 int i;
121
122 for (i = 24; i >= 0; i -= 8)
123 switch ((reg >> i) & 0xff)
124 {
125 case 0x0a:
126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127 break;
128 case 0x0c:
129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130 break;
131 case 0x0d:
132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133 break;
134 case 0x0e:
135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136 break;
137 case 0x21:
138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139 break;
140 case 0x24:
141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142 break;
143 case 0x2c:
144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145 break;
146 case 0x39:
147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148 break;
149 case 0x3a:
150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151 break;
152 case 0x3b:
153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154 break;
155 case 0x3c:
156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157 break;
158 case 0x3d:
159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160 break;
161 case 0x3e:
162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163 break;
164 case 0x41:
165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166 break;
167 case 0x42:
168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169 break;
170 case 0x43:
171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172 break;
173 case 0x44:
174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175 break;
176 case 0x45:
177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178 break;
179 case 0x48:
180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181 break;
182 case 0x49:
183 if (xeon_mp)
184 break;
185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186 break;
187 case 0x4e:
188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189 break;
190 case 0x60:
191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192 break;
193 case 0x66:
194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195 break;
196 case 0x67:
197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198 break;
199 case 0x68:
200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201 break;
202 case 0x78:
203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204 break;
205 case 0x79:
206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207 break;
208 case 0x7a:
209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210 break;
211 case 0x7b:
212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213 break;
214 case 0x7c:
215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216 break;
217 case 0x7d:
218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219 break;
220 case 0x7f:
221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222 break;
223 case 0x80:
224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225 break;
226 case 0x82:
227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228 break;
229 case 0x83:
230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231 break;
232 case 0x84:
233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234 break;
235 case 0x85:
236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237 break;
238 case 0x86:
239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240 break;
241 case 0x87:
242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
243
244 default:
245 break;
246 }
247 }
248
249 /* Detect cache parameters using CPUID function 2. */
250
251 static void
252 detect_caches_cpuid2 (bool xeon_mp,
253 struct cache_desc *level1, struct cache_desc *level2)
254 {
255 unsigned regs[4];
256 int nreps, i;
257
258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
259
260 nreps = regs[0] & 0x0f;
261 regs[0] &= ~0x0f;
262
263 while (--nreps >= 0)
264 {
265 for (i = 0; i < 4; i++)
266 if (regs[i] && !((regs[i] >> 31) & 1))
267 decode_caches_intel (regs[i], xeon_mp, level1, level2);
268
269 if (nreps)
270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
271 }
272 }
273
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
276
277 enum cache_type
278 {
279 CACHE_END = 0,
280 CACHE_DATA = 1,
281 CACHE_INST = 2,
282 CACHE_UNIFIED = 3
283 };
284
285 static void
286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 struct cache_desc *level3)
288 {
289 struct cache_desc *cache;
290
291 unsigned eax, ebx, ecx, edx;
292 int count;
293
294 for (count = 0;; count++)
295 {
296 __cpuid_count(4, count, eax, ebx, ecx, edx);
297 switch (eax & 0x1f)
298 {
299 case CACHE_END:
300 return;
301 case CACHE_DATA:
302 case CACHE_UNIFIED:
303 {
304 switch ((eax >> 5) & 0x07)
305 {
306 case 1:
307 cache = level1;
308 break;
309 case 2:
310 cache = level2;
311 break;
312 case 3:
313 cache = level3;
314 break;
315 default:
316 cache = NULL;
317 }
318
319 if (cache)
320 {
321 unsigned sets = ecx + 1;
322 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
323
324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 cache->line = (ebx & 0x0fff) + 1;
326
327 cache->sizekb = (cache->assoc * part
328 * cache->line * sets) / 1024;
329 }
330 }
331 default:
332 break;
333 }
334 }
335 }
336
337 /* Returns the description of caches for an Intel processor. */
338
339 static const char *
340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 unsigned max_ext_level, unsigned *l2sizekb)
342 {
343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
344
345 if (max_level >= 4)
346 detect_caches_cpuid4 (&level1, &level2, &level3);
347 else if (max_level >= 2)
348 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349 else
350 return "";
351
352 if (level1.sizekb == 0)
353 return "";
354
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
357 if (level3.sizekb)
358 level2 = level3;
359
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363 detect_l2_cache (&level2);
364
365 *l2sizekb = level2.sizekb;
366
367 return describe_cache (level1, level2);
368 }
369
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
374
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
378 for -march=native.
379
380 ARGC and ARGV are set depending on the actual arguments given
381 in the spec. */
382
383 const char *host_detect_local_cpu (int argc, const char **argv)
384 {
385 enum processor_type processor = PROCESSOR_I386;
386 const char *cpu = "i386";
387
388 const char *cache = "";
389 const char *options = "";
390
391 unsigned int eax, ebx, ecx, edx;
392
393 unsigned int max_level, ext_level;
394
395 unsigned int vendor;
396 unsigned int model, family;
397
398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
400
401 /* Extended features */
402 unsigned int has_lahf_lm = 0, has_sse4a = 0;
403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410 unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422 unsigned int has_avx512bitalg = 0;
423 unsigned int has_shstk = 0;
424 unsigned int has_avx512vnni = 0, has_vaes = 0;
425 unsigned int has_vpclmulqdq = 0;
426 unsigned int has_movdiri = 0, has_movdir64b = 0;
427 unsigned int has_waitpkg = 0;
428 unsigned int has_cldemote = 0;
429
430 unsigned int has_ptwrite = 0;
431
432 bool arch;
433
434 unsigned int l2sizekb = 0;
435
436 if (argc < 1)
437 return NULL;
438
439 arch = !strcmp (argv[0], "arch");
440
441 if (!arch && strcmp (argv[0], "tune"))
442 return NULL;
443
444 max_level = __get_cpuid_max (0, &vendor);
445 if (max_level < 1)
446 goto done;
447
448 __cpuid (1, eax, ebx, ecx, edx);
449
450 model = (eax >> 4) & 0x0f;
451 family = (eax >> 8) & 0x0f;
452 if (vendor == signature_INTEL_ebx
453 || vendor == signature_AMD_ebx)
454 {
455 unsigned int extended_model, extended_family;
456
457 extended_model = (eax >> 12) & 0xf0;
458 extended_family = (eax >> 20) & 0xff;
459 if (family == 0x0f)
460 {
461 family += extended_family;
462 model += extended_model;
463 }
464 else if (family == 0x06)
465 model += extended_model;
466 }
467
468 has_sse3 = ecx & bit_SSE3;
469 has_ssse3 = ecx & bit_SSSE3;
470 has_sse4_1 = ecx & bit_SSE4_1;
471 has_sse4_2 = ecx & bit_SSE4_2;
472 has_avx = ecx & bit_AVX;
473 has_osxsave = ecx & bit_OSXSAVE;
474 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
475 has_movbe = ecx & bit_MOVBE;
476 has_popcnt = ecx & bit_POPCNT;
477 has_aes = ecx & bit_AES;
478 has_pclmul = ecx & bit_PCLMUL;
479 has_fma = ecx & bit_FMA;
480 has_f16c = ecx & bit_F16C;
481 has_rdrnd = ecx & bit_RDRND;
482 has_xsave = ecx & bit_XSAVE;
483
484 has_cmpxchg8b = edx & bit_CMPXCHG8B;
485 has_cmov = edx & bit_CMOV;
486 has_mmx = edx & bit_MMX;
487 has_fxsr = edx & bit_FXSAVE;
488 has_sse = edx & bit_SSE;
489 has_sse2 = edx & bit_SSE2;
490
491 if (max_level >= 7)
492 {
493 __cpuid_count (7, 0, eax, ebx, ecx, edx);
494
495 has_bmi = ebx & bit_BMI;
496 has_sgx = ebx & bit_SGX;
497 has_hle = ebx & bit_HLE;
498 has_rtm = ebx & bit_RTM;
499 has_avx2 = ebx & bit_AVX2;
500 has_bmi2 = ebx & bit_BMI2;
501 has_fsgsbase = ebx & bit_FSGSBASE;
502 has_rdseed = ebx & bit_RDSEED;
503 has_adx = ebx & bit_ADX;
504 has_avx512f = ebx & bit_AVX512F;
505 has_avx512er = ebx & bit_AVX512ER;
506 has_avx512pf = ebx & bit_AVX512PF;
507 has_avx512cd = ebx & bit_AVX512CD;
508 has_sha = ebx & bit_SHA;
509 has_clflushopt = ebx & bit_CLFLUSHOPT;
510 has_clwb = ebx & bit_CLWB;
511 has_avx512dq = ebx & bit_AVX512DQ;
512 has_avx512bw = ebx & bit_AVX512BW;
513 has_avx512vl = ebx & bit_AVX512VL;
514 has_avx512ifma = ebx & bit_AVX512IFMA;
515
516 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
517 has_avx512vbmi = ecx & bit_AVX512VBMI;
518 has_pku = ecx & bit_OSPKE;
519 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
520 has_avx512vnni = ecx & bit_AVX512VNNI;
521 has_rdpid = ecx & bit_RDPID;
522 has_gfni = ecx & bit_GFNI;
523 has_vaes = ecx & bit_VAES;
524 has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
525 has_avx512bitalg = ecx & bit_AVX512BITALG;
526 has_movdiri = ecx & bit_MOVDIRI;
527 has_movdir64b = ecx & bit_MOVDIR64B;
528 has_cldemote = ecx & bit_CLDEMOTE;
529
530 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
531 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
532
533 has_shstk = ecx & bit_SHSTK;
534 has_pconfig = edx & bit_PCONFIG;
535 has_waitpkg = ecx & bit_WAITPKG;
536 }
537
538 if (max_level >= 13)
539 {
540 __cpuid_count (13, 1, eax, ebx, ecx, edx);
541
542 has_xsaveopt = eax & bit_XSAVEOPT;
543 has_xsavec = eax & bit_XSAVEC;
544 has_xsaves = eax & bit_XSAVES;
545 }
546
547 if (max_level >= 0x14)
548 {
549 __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
550
551 has_ptwrite = ebx & bit_PTWRITE;
552 }
553
554 /* Check cpuid level of extended features. */
555 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
556
557 if (ext_level >= 0x80000001)
558 {
559 __cpuid (0x80000001, eax, ebx, ecx, edx);
560
561 has_lahf_lm = ecx & bit_LAHF_LM;
562 has_sse4a = ecx & bit_SSE4a;
563 has_abm = ecx & bit_ABM;
564 has_lwp = ecx & bit_LWP;
565 has_fma4 = ecx & bit_FMA4;
566 has_xop = ecx & bit_XOP;
567 has_tbm = ecx & bit_TBM;
568 has_lzcnt = ecx & bit_LZCNT;
569 has_prfchw = ecx & bit_PRFCHW;
570
571 has_longmode = edx & bit_LM;
572 has_3dnowp = edx & bit_3DNOWP;
573 has_3dnow = edx & bit_3DNOW;
574 has_mwaitx = ecx & bit_MWAITX;
575 }
576
577 if (ext_level >= 0x80000008)
578 {
579 __cpuid (0x80000008, eax, ebx, ecx, edx);
580 has_clzero = ebx & bit_CLZERO;
581 has_wbnoinvd = ebx & bit_WBNOINVD;
582 }
583
584 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
585 #define XCR_XFEATURE_ENABLED_MASK 0x0
586 #define XSTATE_FP 0x1
587 #define XSTATE_SSE 0x2
588 #define XSTATE_YMM 0x4
589 #define XSTATE_OPMASK 0x20
590 #define XSTATE_ZMM 0x40
591 #define XSTATE_HI_ZMM 0x80
592
593 #define XCR_AVX_ENABLED_MASK \
594 (XSTATE_SSE | XSTATE_YMM)
595 #define XCR_AVX512F_ENABLED_MASK \
596 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
597
598 if (has_osxsave)
599 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
600 : "=a" (eax), "=d" (edx)
601 : "c" (XCR_XFEATURE_ENABLED_MASK));
602 else
603 eax = 0;
604
605 /* Check if AVX registers are supported. */
606 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
607 {
608 has_avx = 0;
609 has_avx2 = 0;
610 has_fma = 0;
611 has_fma4 = 0;
612 has_f16c = 0;
613 has_xop = 0;
614 has_xsave = 0;
615 has_xsaveopt = 0;
616 has_xsaves = 0;
617 has_xsavec = 0;
618 }
619
620 /* Check if AVX512F registers are supported. */
621 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
622 {
623 has_avx512f = 0;
624 has_avx512er = 0;
625 has_avx512pf = 0;
626 has_avx512cd = 0;
627 has_avx512dq = 0;
628 has_avx512bw = 0;
629 has_avx512vl = 0;
630 }
631
632 if (!arch)
633 {
634 if (vendor == signature_AMD_ebx
635 || vendor == signature_CENTAUR_ebx
636 || vendor == signature_CYRIX_ebx
637 || vendor == signature_NSC_ebx)
638 cache = detect_caches_amd (ext_level);
639 else if (vendor == signature_INTEL_ebx)
640 {
641 bool xeon_mp = (family == 15 && model == 6);
642 cache = detect_caches_intel (xeon_mp, max_level,
643 ext_level, &l2sizekb);
644 }
645 }
646
647 if (vendor == signature_AMD_ebx)
648 {
649 unsigned int name;
650
651 /* Detect geode processor by its processor signature. */
652 if (ext_level >= 0x80000002)
653 __cpuid (0x80000002, name, ebx, ecx, edx);
654 else
655 name = 0;
656
657 if (name == signature_NSC_ebx)
658 processor = PROCESSOR_GEODE;
659 else if (has_movbe && family == 22)
660 processor = PROCESSOR_BTVER2;
661 else if (has_clwb)
662 processor = PROCESSOR_ZNVER2;
663 else if (has_clzero)
664 processor = PROCESSOR_ZNVER1;
665 else if (has_avx2)
666 processor = PROCESSOR_BDVER4;
667 else if (has_xsaveopt)
668 processor = PROCESSOR_BDVER3;
669 else if (has_bmi)
670 processor = PROCESSOR_BDVER2;
671 else if (has_xop)
672 processor = PROCESSOR_BDVER1;
673 else if (has_sse4a && has_ssse3)
674 processor = PROCESSOR_BTVER1;
675 else if (has_sse4a)
676 processor = PROCESSOR_AMDFAM10;
677 else if (has_sse2 || has_longmode)
678 processor = PROCESSOR_K8;
679 else if (has_3dnowp && family == 6)
680 processor = PROCESSOR_ATHLON;
681 else if (has_mmx)
682 processor = PROCESSOR_K6;
683 else
684 processor = PROCESSOR_PENTIUM;
685 }
686 else if (vendor == signature_CENTAUR_ebx)
687 {
688 processor = PROCESSOR_GENERIC;
689
690 switch (family)
691 {
692 default:
693 /* We have no idea. */
694 break;
695
696 case 5:
697 if (has_3dnow || has_mmx)
698 processor = PROCESSOR_I486;
699 break;
700
701 case 6:
702 if (has_longmode)
703 processor = PROCESSOR_K8;
704 else if (model >= 9)
705 processor = PROCESSOR_PENTIUMPRO;
706 else if (model >= 6)
707 processor = PROCESSOR_I486;
708 }
709 }
710 else
711 {
712 switch (family)
713 {
714 case 4:
715 processor = PROCESSOR_I486;
716 break;
717 case 5:
718 processor = PROCESSOR_PENTIUM;
719 break;
720 case 6:
721 processor = PROCESSOR_PENTIUMPRO;
722 break;
723 case 15:
724 processor = PROCESSOR_PENTIUM4;
725 break;
726 default:
727 /* We have no idea. */
728 processor = PROCESSOR_GENERIC;
729 }
730 }
731
732 switch (processor)
733 {
734 case PROCESSOR_I386:
735 /* Default. */
736 break;
737 case PROCESSOR_I486:
738 if (arch && vendor == signature_CENTAUR_ebx)
739 {
740 if (model >= 6)
741 cpu = "c3";
742 else if (has_3dnow)
743 cpu = "winchip2";
744 else
745 /* Assume WinChip C6. */
746 cpu = "winchip-c6";
747 }
748 else
749 cpu = "i486";
750 break;
751 case PROCESSOR_PENTIUM:
752 if (arch && has_mmx)
753 cpu = "pentium-mmx";
754 else
755 cpu = "pentium";
756 break;
757 case PROCESSOR_PENTIUMPRO:
758 switch (model)
759 {
760 case 0x1c:
761 case 0x26:
762 /* Bonnell. */
763 cpu = "bonnell";
764 break;
765 case 0x37:
766 case 0x4a:
767 case 0x4d:
768 case 0x5a:
769 case 0x5d:
770 /* Silvermont. */
771 cpu = "silvermont";
772 break;
773 case 0x5c:
774 case 0x5f:
775 /* Goldmont. */
776 cpu = "goldmont";
777 break;
778 case 0x7a:
779 /* Goldmont Plus. */
780 cpu = "goldmont-plus";
781 break;
782 case 0x0f:
783 /* Merom. */
784 case 0x17:
785 case 0x1d:
786 /* Penryn. */
787 cpu = "core2";
788 break;
789 case 0x1a:
790 case 0x1e:
791 case 0x1f:
792 case 0x2e:
793 /* Nehalem. */
794 cpu = "nehalem";
795 break;
796 case 0x25:
797 case 0x2c:
798 case 0x2f:
799 /* Westmere. */
800 cpu = "westmere";
801 break;
802 case 0x2a:
803 case 0x2d:
804 /* Sandy Bridge. */
805 cpu = "sandybridge";
806 break;
807 case 0x3a:
808 case 0x3e:
809 /* Ivy Bridge. */
810 cpu = "ivybridge";
811 break;
812 case 0x3c:
813 case 0x3f:
814 case 0x45:
815 case 0x46:
816 /* Haswell. */
817 cpu = "haswell";
818 break;
819 case 0x3d:
820 case 0x47:
821 case 0x4f:
822 case 0x56:
823 /* Broadwell. */
824 cpu = "broadwell";
825 break;
826 case 0x4e:
827 case 0x5e:
828 /* Skylake. */
829 case 0x8e:
830 case 0x9e:
831 /* Kaby Lake. */
832 cpu = "skylake";
833 break;
834 case 0x55:
835 /* Skylake with AVX-512. */
836 cpu = "skylake-avx512";
837 break;
838 case 0x57:
839 /* Knights Landing. */
840 cpu = "knl";
841 break;
842 case 0x66:
843 /* Cannon Lake. */
844 cpu = "cannonlake";
845 break;
846 case 0x85:
847 /* Knights Mill. */
848 cpu = "knm";
849 break;
850 default:
851 if (arch)
852 {
853 /* This is unknown family 0x6 CPU. */
854 /* Assume Ice Lake Server. */
855 if (has_wbnoinvd)
856 cpu = "icelake-server";
857 /* Assume Ice Lake. */
858 else if (has_gfni)
859 cpu = "icelake-client";
860 /* Assume Cannon Lake. */
861 else if (has_avx512vbmi)
862 cpu = "cannonlake";
863 /* Assume Knights Mill. */
864 else if (has_avx5124vnniw)
865 cpu = "knm";
866 /* Assume Knights Landing. */
867 else if (has_avx512er)
868 cpu = "knl";
869 /* Assume Skylake with AVX-512. */
870 else if (has_avx512f)
871 cpu = "skylake-avx512";
872 /* Assume Skylake. */
873 else if (has_clflushopt)
874 cpu = "skylake";
875 /* Assume Broadwell. */
876 else if (has_adx)
877 cpu = "broadwell";
878 else if (has_avx2)
879 /* Assume Haswell. */
880 cpu = "haswell";
881 else if (has_avx)
882 /* Assume Sandy Bridge. */
883 cpu = "sandybridge";
884 else if (has_sse4_2)
885 {
886 if (has_gfni)
887 /* Assume Tremont. */
888 cpu = "tremont";
889 else if (has_sgx)
890 /* Assume Goldmont Plus. */
891 cpu = "goldmont-plus";
892 else if (has_xsave)
893 /* Assume Goldmont. */
894 cpu = "goldmont";
895 else if (has_movbe)
896 /* Assume Silvermont. */
897 cpu = "silvermont";
898 else
899 /* Assume Nehalem. */
900 cpu = "nehalem";
901 }
902 else if (has_ssse3)
903 {
904 if (has_movbe)
905 /* Assume Bonnell. */
906 cpu = "bonnell";
907 else
908 /* Assume Core 2. */
909 cpu = "core2";
910 }
911 else if (has_longmode)
912 /* Perhaps some emulator? Assume x86-64, otherwise gcc
913 -march=native would be unusable for 64-bit compilations,
914 as all the CPUs below are 32-bit only. */
915 cpu = "x86-64";
916 else if (has_sse3)
917 {
918 if (vendor == signature_CENTAUR_ebx)
919 /* C7 / Eden "Esther" */
920 cpu = "c7";
921 else
922 /* It is Core Duo. */
923 cpu = "pentium-m";
924 }
925 else if (has_sse2)
926 /* It is Pentium M. */
927 cpu = "pentium-m";
928 else if (has_sse)
929 {
930 if (vendor == signature_CENTAUR_ebx)
931 {
932 if (model >= 9)
933 /* Eden "Nehemiah" */
934 cpu = "nehemiah";
935 else
936 cpu = "c3-2";
937 }
938 else
939 /* It is Pentium III. */
940 cpu = "pentium3";
941 }
942 else if (has_mmx)
943 /* It is Pentium II. */
944 cpu = "pentium2";
945 else
946 /* Default to Pentium Pro. */
947 cpu = "pentiumpro";
948 }
949 else
950 /* For -mtune, we default to -mtune=generic. */
951 cpu = "generic";
952 break;
953 }
954 break;
955 case PROCESSOR_PENTIUM4:
956 if (has_sse3)
957 {
958 if (has_longmode)
959 cpu = "nocona";
960 else
961 cpu = "prescott";
962 }
963 else
964 cpu = "pentium4";
965 break;
966 case PROCESSOR_GEODE:
967 cpu = "geode";
968 break;
969 case PROCESSOR_K6:
970 if (arch && has_3dnow)
971 cpu = "k6-3";
972 else
973 cpu = "k6";
974 break;
975 case PROCESSOR_ATHLON:
976 if (arch && has_sse)
977 cpu = "athlon-4";
978 else
979 cpu = "athlon";
980 break;
981 case PROCESSOR_K8:
982 if (arch)
983 {
984 if (vendor == signature_CENTAUR_ebx)
985 {
986 if (has_sse4_1)
987 /* Nano 3000 | Nano dual / quad core | Eden X4 */
988 cpu = "nano-3000";
989 else if (has_ssse3)
990 /* Nano 1000 | Nano 2000 */
991 cpu = "nano";
992 else if (has_sse3)
993 /* Eden X2 */
994 cpu = "eden-x2";
995 else
996 /* Default to k8 */
997 cpu = "k8";
998 }
999 else if (has_sse3)
1000 cpu = "k8-sse3";
1001 else
1002 cpu = "k8";
1003 }
1004 else
1005 /* For -mtune, we default to -mtune=k8 */
1006 cpu = "k8";
1007 break;
1008 case PROCESSOR_AMDFAM10:
1009 cpu = "amdfam10";
1010 break;
1011 case PROCESSOR_BDVER1:
1012 cpu = "bdver1";
1013 break;
1014 case PROCESSOR_BDVER2:
1015 cpu = "bdver2";
1016 break;
1017 case PROCESSOR_BDVER3:
1018 cpu = "bdver3";
1019 break;
1020 case PROCESSOR_BDVER4:
1021 cpu = "bdver4";
1022 break;
1023 case PROCESSOR_ZNVER1:
1024 cpu = "znver1";
1025 break;
1026 case PROCESSOR_ZNVER2:
1027 cpu = "znver2";
1028 break;
1029 case PROCESSOR_BTVER1:
1030 cpu = "btver1";
1031 break;
1032 case PROCESSOR_BTVER2:
1033 cpu = "btver2";
1034 break;
1035
1036 default:
1037 /* Use something reasonable. */
1038 if (arch)
1039 {
1040 if (has_ssse3)
1041 cpu = "core2";
1042 else if (has_sse3)
1043 {
1044 if (has_longmode)
1045 cpu = "nocona";
1046 else
1047 cpu = "prescott";
1048 }
1049 else if (has_longmode)
1050 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1051 -march=native would be unusable for 64-bit compilations,
1052 as all the CPUs below are 32-bit only. */
1053 cpu = "x86-64";
1054 else if (has_sse2)
1055 cpu = "pentium4";
1056 else if (has_cmov)
1057 cpu = "pentiumpro";
1058 else if (has_mmx)
1059 cpu = "pentium-mmx";
1060 else if (has_cmpxchg8b)
1061 cpu = "pentium";
1062 }
1063 else
1064 cpu = "generic";
1065 }
1066
1067 if (arch)
1068 {
1069 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1070 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1071 const char *sse = has_sse ? " -msse" : " -mno-sse";
1072 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1073 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1074 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1075 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1076 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1077 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1078 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1079 const char *aes = has_aes ? " -maes" : " -mno-aes";
1080 const char *sha = has_sha ? " -msha" : " -mno-sha";
1081 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1082 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1083 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1084 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1085 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1086 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1087 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1088 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1089 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1090 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1091 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1092 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1093 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1094 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1095 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1096 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1097 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1098 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1099 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1100 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1101 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1102 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1103 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1104 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1105 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1106 const char *adx = has_adx ? " -madx" : " -mno-adx";
1107 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1108 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1109 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1110 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1111 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1112 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1113 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1114 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1115 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1116 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1117 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1118 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1119 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1120 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1121 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1122 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1123 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1124 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1125 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1126 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1127 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1128 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1129 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1130 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1131 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1132 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1133 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1134 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1135 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1136 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1137 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1138 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1139 const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
1140 const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
1141 const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
1142
1143 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1144 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1145 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1146 pconfig, wbnoinvd,
1147 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1148 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1149 fxsr, xsave, xsaveopt, avx512f, avx512er,
1150 avx512cd, avx512pf, prefetchwt1, clflushopt,
1151 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1152 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1153 clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1154 avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1155 avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
1156 ptwrite,
1157 NULL);
1158 }
1159
1160 done:
1161 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1162 }
1163 #else
1164
1165 /* If we are compiling with GCC where %EBX register is fixed, then the
1166 driver will just ignore -march and -mtune "native" target and will leave
1167 to the newly built compiler to generate code for its default target. */
1168
1169 const char *host_detect_local_cpu (int, const char **)
1170 {
1171 return NULL;
1172 }
1173 #endif /* __GNUC__ */