1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
25 const char *host_detect_local_cpu (int argc
, const char **argv
);
37 /* Returns command line parameters that describe size and
38 cache line size of the processor caches. */
41 describe_cache (struct cache_desc level1
, struct cache_desc level2
)
43 char size
[100], line
[100], size2
[100];
45 /* At the moment, gcc does not use the information
46 about the associativity of the cache. */
48 snprintf (size
, sizeof (size
),
49 "--param l1-cache-size=%u ", level1
.sizekb
);
50 snprintf (line
, sizeof (line
),
51 "--param l1-cache-line-size=%u ", level1
.line
);
53 snprintf (size2
, sizeof (size2
),
54 "--param l2-cache-size=%u ", level2
.sizekb
);
56 return concat (size
, line
, size2
, NULL
);
59 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
62 detect_l2_cache (struct cache_desc
*level2
)
64 unsigned eax
, ebx
, ecx
, edx
;
67 __cpuid (0x80000006, eax
, ebx
, ecx
, edx
);
69 level2
->sizekb
= (ecx
>> 16) & 0xffff;
70 level2
->line
= ecx
& 0xff;
72 assoc
= (ecx
>> 12) & 0xf;
77 else if (assoc
>= 0xa && assoc
<= 0xc)
78 assoc
= 32 + (assoc
- 0xa) * 16;
79 else if (assoc
>= 0xd && assoc
<= 0xe)
80 assoc
= 96 + (assoc
- 0xd) * 32;
82 level2
->assoc
= assoc
;
85 /* Returns the description of caches for an AMD processor. */
88 detect_caches_amd (unsigned max_ext_level
)
90 unsigned eax
, ebx
, ecx
, edx
;
92 struct cache_desc level1
, level2
= {0, 0, 0};
94 if (max_ext_level
< 0x80000005)
97 __cpuid (0x80000005, eax
, ebx
, ecx
, edx
);
99 level1
.sizekb
= (ecx
>> 24) & 0xff;
100 level1
.assoc
= (ecx
>> 16) & 0xff;
101 level1
.line
= ecx
& 0xff;
103 if (max_ext_level
>= 0x80000006)
104 detect_l2_cache (&level2
);
106 return describe_cache (level1
, level2
);
109 /* Decodes the size, the associativity and the cache line size of
110 L1/L2 caches of an Intel processor. Values are based on
111 "Intel Processor Identification and the CPUID Instruction"
112 [Application Note 485], revision -032, December 2007. */
115 decode_caches_intel (unsigned reg
, bool xeon_mp
,
116 struct cache_desc
*level1
, struct cache_desc
*level2
)
120 for (i
= 24; i
>= 0; i
-= 8)
121 switch ((reg
>> i
) & 0xff)
124 level1
->sizekb
= 8; level1
->assoc
= 2; level1
->line
= 32;
127 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 32;
130 level1
->sizekb
= 32; level1
->assoc
= 8; level1
->line
= 64;
133 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 64;
136 level2
->sizekb
= 192; level2
->assoc
= 6; level2
->line
= 64;
139 level2
->sizekb
= 128; level2
->assoc
= 2; level2
->line
= 64;
142 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 64;
145 level2
->sizekb
= 384; level2
->assoc
= 6; level2
->line
= 64;
148 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
151 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 32;
154 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 32;
157 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 32;
160 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 32;
163 level2
->sizekb
= 2048; level2
->assoc
= 4; level2
->line
= 32;
168 level2
->sizekb
= 4096; level2
->assoc
= 16; level2
->line
= 64;
171 level2
->sizekb
= 6144; level2
->assoc
= 24; level2
->line
= 64;
174 level1
->sizekb
= 16; level1
->assoc
= 8; level1
->line
= 64;
177 level1
->sizekb
= 8; level1
->assoc
= 4; level1
->line
= 64;
180 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
183 level1
->sizekb
= 32; level1
->assoc
= 4; level1
->line
= 64;
186 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 64;
189 level2
->sizekb
= 128; level2
->assoc
= 8; level2
->line
= 64;
192 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
195 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
198 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
201 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 64;
204 level2
->sizekb
= 512; level2
->assoc
= 2; level2
->line
= 64;
207 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 32;
210 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 32;
213 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 32;
216 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 32;
219 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
222 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
229 /* Detect cache parameters using CPUID function 2. */
232 detect_caches_cpuid2 (bool xeon_mp
,
233 struct cache_desc
*level1
, struct cache_desc
*level2
)
238 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
240 nreps
= regs
[0] & 0x0f;
245 for (i
= 0; i
< 4; i
++)
246 if (regs
[i
] && !((regs
[i
] >> 31) & 1))
247 decode_caches_intel (regs
[i
], xeon_mp
, level1
, level2
);
250 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
254 /* Detect cache parameters using CPUID function 4. This
255 method doesn't require hardcoded tables. */
266 detect_caches_cpuid4 (struct cache_desc
*level1
, struct cache_desc
*level2
,
267 struct cache_desc
*level3
)
269 struct cache_desc
*cache
;
271 unsigned eax
, ebx
, ecx
, edx
;
274 for (count
= 0;; count
++)
276 __cpuid_count(4, count
, eax
, ebx
, ecx
, edx
);
284 switch ((eax
>> 5) & 0x07)
301 unsigned sets
= ecx
+ 1;
302 unsigned part
= ((ebx
>> 12) & 0x03ff) + 1;
304 cache
->assoc
= ((ebx
>> 22) & 0x03ff) + 1;
305 cache
->line
= (ebx
& 0x0fff) + 1;
307 cache
->sizekb
= (cache
->assoc
* part
308 * cache
->line
* sets
) / 1024;
317 /* Returns the description of caches for an Intel processor. */
320 detect_caches_intel (bool xeon_mp
, unsigned max_level
,
321 unsigned max_ext_level
, unsigned *l2sizekb
)
323 struct cache_desc level1
= {0, 0, 0}, level2
= {0, 0, 0}, level3
= {0, 0, 0};
326 detect_caches_cpuid4 (&level1
, &level2
, &level3
);
327 else if (max_level
>= 2)
328 detect_caches_cpuid2 (xeon_mp
, &level1
, &level2
);
332 if (level1
.sizekb
== 0)
335 /* Let the L3 replace the L2. This assumes inclusive caches
336 and single threaded program for now. */
340 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
341 method if other methods fail to provide L2 cache parameters. */
342 if (level2
.sizekb
== 0 && max_ext_level
>= 0x80000006)
343 detect_l2_cache (&level2
);
345 *l2sizekb
= level2
.sizekb
;
347 return describe_cache (level1
, level2
);
350 /* This will be called by the spec parser in gcc.c when it sees
351 a %:local_cpu_detect(args) construct. Currently it will be called
352 with either "arch" or "tune" as argument depending on if -march=native
353 or -mtune=native is to be substituted.
355 It returns a string containing new command line parameters to be
356 put at the place of the above two options, depending on what CPU
357 this is executed. E.g. "-march=k8" on an AMD64 machine
360 ARGC and ARGV are set depending on the actual arguments given
363 const char *host_detect_local_cpu (int argc
, const char **argv
)
365 enum processor_type processor
= PROCESSOR_I386
;
366 const char *cpu
= "i386";
368 const char *cache
= "";
369 const char *options
= "";
371 unsigned int eax
, ebx
, ecx
, edx
;
373 unsigned int max_level
, ext_level
;
376 unsigned int model
, family
;
378 unsigned int has_sse3
, has_ssse3
, has_cmpxchg16b
;
379 unsigned int has_cmpxchg8b
, has_cmov
, has_mmx
, has_sse
, has_sse2
;
381 /* Extended features */
382 unsigned int has_lahf_lm
= 0, has_sse4a
= 0;
383 unsigned int has_longmode
= 0, has_3dnowp
= 0, has_3dnow
= 0;
384 unsigned int has_movbe
= 0, has_sse4_1
= 0, has_sse4_2
= 0;
385 unsigned int has_popcnt
= 0, has_aes
= 0, has_avx
= 0, has_avx2
= 0;
386 unsigned int has_pclmul
= 0, has_abm
= 0, has_lwp
= 0;
387 unsigned int has_fma
= 0, has_fma4
= 0, has_xop
= 0;
388 unsigned int has_bmi
= 0, has_bmi2
= 0, has_tbm
= 0, has_lzcnt
= 0;
389 unsigned int has_hle
= 0, has_rtm
= 0;
390 unsigned int has_rdrnd
= 0, has_f16c
= 0, has_fsgsbase
= 0;
391 unsigned int has_rdseed
= 0, has_prfchw
= 0, has_adx
= 0;
392 unsigned int has_osxsave
= 0, has_fxsr
= 0, has_xsave
= 0, has_xsaveopt
= 0;
393 unsigned int has_avx512er
= 0, has_avx512pf
= 0, has_avx512cd
= 0;
394 unsigned int has_avx512f
= 0;
398 unsigned int l2sizekb
= 0;
403 arch
= !strcmp (argv
[0], "arch");
405 if (!arch
&& strcmp (argv
[0], "tune"))
408 max_level
= __get_cpuid_max (0, &vendor
);
412 __cpuid (1, eax
, ebx
, ecx
, edx
);
414 model
= (eax
>> 4) & 0x0f;
415 family
= (eax
>> 8) & 0x0f;
416 if (vendor
== signature_INTEL_ebx
)
418 unsigned int extended_model
, extended_family
;
420 extended_model
= (eax
>> 12) & 0xf0;
421 extended_family
= (eax
>> 20) & 0xff;
424 family
+= extended_family
;
425 model
+= extended_model
;
427 else if (family
== 0x06)
428 model
+= extended_model
;
431 has_sse3
= ecx
& bit_SSE3
;
432 has_ssse3
= ecx
& bit_SSSE3
;
433 has_sse4_1
= ecx
& bit_SSE4_1
;
434 has_sse4_2
= ecx
& bit_SSE4_2
;
435 has_avx
= ecx
& bit_AVX
;
436 has_osxsave
= ecx
& bit_OSXSAVE
;
437 has_cmpxchg16b
= ecx
& bit_CMPXCHG16B
;
438 has_movbe
= ecx
& bit_MOVBE
;
439 has_popcnt
= ecx
& bit_POPCNT
;
440 has_aes
= ecx
& bit_AES
;
441 has_pclmul
= ecx
& bit_PCLMUL
;
442 has_fma
= ecx
& bit_FMA
;
443 has_f16c
= ecx
& bit_F16C
;
444 has_rdrnd
= ecx
& bit_RDRND
;
445 has_xsave
= ecx
& bit_XSAVE
;
447 has_cmpxchg8b
= edx
& bit_CMPXCHG8B
;
448 has_cmov
= edx
& bit_CMOV
;
449 has_mmx
= edx
& bit_MMX
;
450 has_fxsr
= edx
& bit_FXSAVE
;
451 has_sse
= edx
& bit_SSE
;
452 has_sse2
= edx
& bit_SSE2
;
456 __cpuid_count (7, 0, eax
, ebx
, ecx
, edx
);
458 has_bmi
= ebx
& bit_BMI
;
459 has_hle
= ebx
& bit_HLE
;
460 has_rtm
= ebx
& bit_RTM
;
461 has_avx2
= ebx
& bit_AVX2
;
462 has_bmi2
= ebx
& bit_BMI2
;
463 has_fsgsbase
= ebx
& bit_FSGSBASE
;
464 has_rdseed
= ebx
& bit_RDSEED
;
465 has_adx
= ebx
& bit_ADX
;
466 has_avx512f
= ebx
& bit_AVX512F
;
467 has_avx512er
= ebx
& bit_AVX512ER
;
468 has_avx512pf
= ebx
& bit_AVX512PF
;
469 has_avx512cd
= ebx
& bit_AVX512CD
;
474 __cpuid_count (13, 1, eax
, ebx
, ecx
, edx
);
476 has_xsaveopt
= eax
& bit_XSAVEOPT
;
479 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
480 #define XCR_XFEATURE_ENABLED_MASK 0x0
481 #define XSTATE_FP 0x1
482 #define XSTATE_SSE 0x2
483 #define XSTATE_YMM 0x4
485 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
486 : "=a" (eax
), "=d" (edx
)
487 : "c" (XCR_XFEATURE_ENABLED_MASK
));
489 /* Check if SSE and YMM states are supported. */
491 || (eax
& (XSTATE_SSE
| XSTATE_YMM
)) != (XSTATE_SSE
| XSTATE_YMM
))
502 /* Check cpuid level of extended features. */
503 __cpuid (0x80000000, ext_level
, ebx
, ecx
, edx
);
505 if (ext_level
> 0x80000000)
507 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
509 has_lahf_lm
= ecx
& bit_LAHF_LM
;
510 has_sse4a
= ecx
& bit_SSE4a
;
511 has_abm
= ecx
& bit_ABM
;
512 has_lwp
= ecx
& bit_LWP
;
513 has_fma4
= ecx
& bit_FMA4
;
514 has_xop
= ecx
& bit_XOP
;
515 has_tbm
= ecx
& bit_TBM
;
516 has_lzcnt
= ecx
& bit_LZCNT
;
517 has_prfchw
= ecx
& bit_PRFCHW
;
519 has_longmode
= edx
& bit_LM
;
520 has_3dnowp
= edx
& bit_3DNOWP
;
521 has_3dnow
= edx
& bit_3DNOW
;
526 if (vendor
== signature_AMD_ebx
527 || vendor
== signature_CENTAUR_ebx
528 || vendor
== signature_CYRIX_ebx
529 || vendor
== signature_NSC_ebx
)
530 cache
= detect_caches_amd (ext_level
);
531 else if (vendor
== signature_INTEL_ebx
)
533 bool xeon_mp
= (family
== 15 && model
== 6);
534 cache
= detect_caches_intel (xeon_mp
, max_level
,
535 ext_level
, &l2sizekb
);
539 if (vendor
== signature_AMD_ebx
)
543 /* Detect geode processor by its processor signature. */
544 if (ext_level
> 0x80000001)
545 __cpuid (0x80000002, name
, ebx
, ecx
, edx
);
549 if (name
== signature_NSC_ebx
)
550 processor
= PROCESSOR_GEODE
;
552 processor
= PROCESSOR_BTVER2
;
553 else if (has_xsaveopt
)
554 processor
= PROCESSOR_BDVER3
;
556 processor
= PROCESSOR_BDVER2
;
558 processor
= PROCESSOR_BDVER1
;
559 else if (has_sse4a
&& has_ssse3
)
560 processor
= PROCESSOR_BTVER1
;
562 processor
= PROCESSOR_AMDFAM10
;
563 else if (has_sse2
|| has_longmode
)
564 processor
= PROCESSOR_K8
;
565 else if (has_3dnowp
&& family
== 6)
566 processor
= PROCESSOR_ATHLON
;
568 processor
= PROCESSOR_K6
;
570 processor
= PROCESSOR_PENTIUM
;
572 else if (vendor
== signature_CENTAUR_ebx
)
580 /* Use the default detection procedure. */
581 processor
= PROCESSOR_GENERIC
;
587 processor
= PROCESSOR_GENERIC
;
595 processor
= PROCESSOR_GENERIC
;
598 /* We have no idea. */
599 processor
= PROCESSOR_GENERIC
;
608 processor
= PROCESSOR_I486
;
611 processor
= PROCESSOR_PENTIUM
;
614 processor
= PROCESSOR_PENTIUMPRO
;
617 processor
= PROCESSOR_PENTIUM4
;
620 /* We have no idea. */
621 processor
= PROCESSOR_GENERIC
;
633 case PROCESSOR_PENTIUM
:
639 case PROCESSOR_PENTIUMPRO
:
684 /* This is unknown family 0x6 CPU. */
686 /* Assume Haswell. */
689 /* Assume Sandy Bridge. */
697 /* Assume Core i7. */
710 /* It is Core Duo. */
713 /* It is Pentium M. */
716 /* It is Pentium III. */
719 /* It is Pentium II. */
722 /* Default to Pentium Pro. */
726 /* For -mtune, we default to -mtune=generic. */
731 case PROCESSOR_PENTIUM4
:
742 case PROCESSOR_GEODE
:
746 if (arch
&& has_3dnow
)
751 case PROCESSOR_ATHLON
:
758 if (arch
&& has_sse3
)
763 case PROCESSOR_AMDFAM10
:
766 case PROCESSOR_BDVER1
:
769 case PROCESSOR_BDVER2
:
772 case PROCESSOR_BDVER3
:
775 case PROCESSOR_BTVER1
:
778 case PROCESSOR_BTVER2
:
783 /* Use something reasonable. */
801 else if (has_cmpxchg8b
)
810 const char *mmx
= has_mmx
? " -mmmx" : " -mno-mmx";
811 const char *mmx3dnow
= has_3dnow
? " -m3dnow" : " -mno-3dnow";
812 const char *sse
= has_sse
? " -msse" : " -mno-sse";
813 const char *sse2
= has_sse2
? " -msse2" : " -mno-sse2";
814 const char *sse3
= has_sse3
? " -msse3" : " -mno-sse3";
815 const char *ssse3
= has_ssse3
? " -mssse3" : " -mno-ssse3";
816 const char *sse4a
= has_sse4a
? " -msse4a" : " -mno-sse4a";
817 const char *cx16
= has_cmpxchg16b
? " -mcx16" : " -mno-cx16";
818 const char *sahf
= has_lahf_lm
? " -msahf" : " -mno-sahf";
819 const char *movbe
= has_movbe
? " -mmovbe" : " -mno-movbe";
820 const char *aes
= has_aes
? " -maes" : " -mno-aes";
821 const char *pclmul
= has_pclmul
? " -mpclmul" : " -mno-pclmul";
822 const char *popcnt
= has_popcnt
? " -mpopcnt" : " -mno-popcnt";
823 const char *abm
= has_abm
? " -mabm" : " -mno-abm";
824 const char *lwp
= has_lwp
? " -mlwp" : " -mno-lwp";
825 const char *fma
= has_fma
? " -mfma" : " -mno-fma";
826 const char *fma4
= has_fma4
? " -mfma4" : " -mno-fma4";
827 const char *xop
= has_xop
? " -mxop" : " -mno-xop";
828 const char *bmi
= has_bmi
? " -mbmi" : " -mno-bmi";
829 const char *bmi2
= has_bmi2
? " -mbmi2" : " -mno-bmi2";
830 const char *tbm
= has_tbm
? " -mtbm" : " -mno-tbm";
831 const char *avx
= has_avx
? " -mavx" : " -mno-avx";
832 const char *avx2
= has_avx2
? " -mavx2" : " -mno-avx2";
833 const char *sse4_2
= has_sse4_2
? " -msse4.2" : " -mno-sse4.2";
834 const char *sse4_1
= has_sse4_1
? " -msse4.1" : " -mno-sse4.1";
835 const char *lzcnt
= has_lzcnt
? " -mlzcnt" : " -mno-lzcnt";
836 const char *hle
= has_hle
? " -mhle" : " -mno-hle";
837 const char *rtm
= has_rtm
? " -mrtm" : " -mno-rtm";
838 const char *rdrnd
= has_rdrnd
? " -mrdrnd" : " -mno-rdrnd";
839 const char *f16c
= has_f16c
? " -mf16c" : " -mno-f16c";
840 const char *fsgsbase
= has_fsgsbase
? " -mfsgsbase" : " -mno-fsgsbase";
841 const char *rdseed
= has_rdseed
? " -mrdseed" : " -mno-rdseed";
842 const char *prfchw
= has_prfchw
? " -mprfchw" : " -mno-prfchw";
843 const char *adx
= has_adx
? " -madx" : " -mno-adx";
844 const char *fxsr
= has_fxsr
? " -mfxsr" : " -mno-fxsr";
845 const char *xsave
= has_xsave
? " -mxsave" : " -mno-xsave";
846 const char *xsaveopt
= has_xsaveopt
? " -mxsaveopt" : " -mno-xsaveopt";
847 const char *avx512f
= has_avx512f
? " -mavx512f" : " -mno-avx512f";
848 const char *avx512er
= has_avx512er
? " -mavx512er" : " -mno-avx512er";
849 const char *avx512cd
= has_avx512cd
? " -mavx512cd" : " -mno-avx512cd";
850 const char *avx512pf
= has_avx512pf
? " -mavx512pf" : " -mno-avx512pf";
852 options
= concat (options
, mmx
, mmx3dnow
, sse
, sse2
, sse3
, ssse3
,
853 sse4a
, cx16
, sahf
, movbe
, aes
, pclmul
,
854 popcnt
, abm
, lwp
, fma
, fma4
, xop
, bmi
, bmi2
,
855 tbm
, avx
, avx2
, sse4_2
, sse4_1
, lzcnt
, rtm
,
856 hle
, rdrnd
, f16c
, fsgsbase
, rdseed
, prfchw
, adx
,
857 fxsr
, xsave
, xsaveopt
, avx512f
, avx512er
,
858 avx512cd
, avx512pf
, NULL
);
862 return concat (cache
, "-m", argv
[0], "=", cpu
, options
, NULL
);
866 /* If we aren't compiling with GCC then the driver will just ignore
867 -march and -mtune "native" target and will leave to the newly
868 built compiler to generate code for its default target. */
870 const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED
,
871 const char **argv ATTRIBUTE_UNUSED
)
875 #endif /* __GNUC__ */