]>
Commit | Line | Data |
---|---|---|
fa959ce4 | 1 | /* Subroutines for the gcc driver. |
9006f7f3 GG |
2 | Copyright (C) 2006, 2007, 2008, 2010, 2011, 2012 |
3 | Free Software Foundation, Inc. | |
fa959ce4 MM |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
2f83c7d6 | 9 | the Free Software Foundation; either version 3, or (at your option) |
fa959ce4 MM |
10 | any later version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
2f83c7d6 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
fa959ce4 MM |
20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
edccdcb1 L |
23 | #include "coretypes.h" |
24 | #include "tm.h" | |
fa959ce4 | 25 | |
895016f6 UB |
26 | const char *host_detect_local_cpu (int argc, const char **argv); |
27 | ||
a6ecb05c | 28 | #ifdef __GNUC__ |
b3172cab | 29 | #include "cpuid.h" |
fa959ce4 | 30 | |
cb0dee88 UB |
31 | struct cache_desc |
32 | { | |
33 | unsigned sizekb; | |
34 | unsigned assoc; | |
35 | unsigned line; | |
36 | }; | |
37 | ||
38 | /* Returns command line parameters that describe size and | |
39 | cache line size of the processor caches. */ | |
2711355f ZD |
40 | |
41 | static char * | |
cb0dee88 | 42 | describe_cache (struct cache_desc level1, struct cache_desc level2) |
2711355f | 43 | { |
f4a1dd0d | 44 | char size[100], line[100], size2[100]; |
2711355f | 45 | |
cb0dee88 UB |
46 | /* At the moment, gcc does not use the information |
47 | about the associativity of the cache. */ | |
48 | ||
f3afc8a7 UB |
49 | snprintf (size, sizeof (size), |
50 | "--param l1-cache-size=%u ", level1.sizekb); | |
51 | snprintf (line, sizeof (line), | |
52 | "--param l1-cache-line-size=%u ", level1.line); | |
2711355f | 53 | |
f3afc8a7 UB |
54 | snprintf (size2, sizeof (size2), |
55 | "--param l2-cache-size=%u ", level2.sizekb); | |
2711355f | 56 | |
f3afc8a7 | 57 | return concat (size, line, size2, NULL); |
f4a1dd0d ZM |
58 | } |
59 | ||
cb0dee88 UB |
60 | /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ |
61 | ||
f4a1dd0d | 62 | static void |
cb0dee88 | 63 | detect_l2_cache (struct cache_desc *level2) |
f4a1dd0d | 64 | { |
cb0dee88 UB |
65 | unsigned eax, ebx, ecx, edx; |
66 | unsigned assoc; | |
f4a1dd0d ZM |
67 | |
68 | __cpuid (0x80000006, eax, ebx, ecx, edx); | |
69 | ||
cb0dee88 UB |
70 | level2->sizekb = (ecx >> 16) & 0xffff; |
71 | level2->line = ecx & 0xff; | |
72 | ||
f4a1dd0d ZM |
73 | assoc = (ecx >> 12) & 0xf; |
74 | if (assoc == 6) | |
75 | assoc = 8; | |
76 | else if (assoc == 8) | |
77 | assoc = 16; | |
78 | else if (assoc >= 0xa && assoc <= 0xc) | |
79 | assoc = 32 + (assoc - 0xa) * 16; | |
80 | else if (assoc >= 0xd && assoc <= 0xe) | |
81 | assoc = 96 + (assoc - 0xd) * 32; | |
cb0dee88 UB |
82 | |
83 | level2->assoc = assoc; | |
2711355f ZD |
84 | } |
85 | ||
86 | /* Returns the description of caches for an AMD processor. */ | |
87 | ||
d3bfe4de | 88 | static const char * |
2711355f ZD |
89 | detect_caches_amd (unsigned max_ext_level) |
90 | { | |
91 | unsigned eax, ebx, ecx, edx; | |
cb0dee88 UB |
92 | |
93 | struct cache_desc level1, level2 = {0, 0, 0}; | |
2711355f ZD |
94 | |
95 | if (max_ext_level < 0x80000005) | |
d3bfe4de | 96 | return ""; |
2711355f | 97 | |
b3172cab | 98 | __cpuid (0x80000005, eax, ebx, ecx, edx); |
2711355f | 99 | |
cb0dee88 UB |
100 | level1.sizekb = (ecx >> 24) & 0xff; |
101 | level1.assoc = (ecx >> 16) & 0xff; | |
102 | level1.line = ecx & 0xff; | |
2711355f | 103 | |
f4a1dd0d | 104 | if (max_ext_level >= 0x80000006) |
cb0dee88 | 105 | detect_l2_cache (&level2); |
f4a1dd0d | 106 | |
cb0dee88 | 107 | return describe_cache (level1, level2); |
2711355f ZD |
108 | } |
109 | ||
cb0dee88 UB |
110 | /* Decodes the size, the associativity and the cache line size of |
111 | L1/L2 caches of an Intel processor. Values are based on | |
112 | "Intel Processor Identification and the CPUID Instruction" | |
113 | [Application Note 485], revision -032, December 2007. */ | |
2711355f ZD |
114 | |
115 | static void | |
cb0dee88 UB |
116 | decode_caches_intel (unsigned reg, bool xeon_mp, |
117 | struct cache_desc *level1, struct cache_desc *level2) | |
2711355f | 118 | { |
cb0dee88 UB |
119 | int i; |
120 | ||
121 | for (i = 24; i >= 0; i -= 8) | |
122 | switch ((reg >> i) & 0xff) | |
123 | { | |
124 | case 0x0a: | |
125 | level1->sizekb = 8; level1->assoc = 2; level1->line = 32; | |
126 | break; | |
127 | case 0x0c: | |
128 | level1->sizekb = 16; level1->assoc = 4; level1->line = 32; | |
129 | break; | |
130 | case 0x2c: | |
131 | level1->sizekb = 32; level1->assoc = 8; level1->line = 64; | |
132 | break; | |
133 | case 0x39: | |
134 | level2->sizekb = 128; level2->assoc = 4; level2->line = 64; | |
135 | break; | |
136 | case 0x3a: | |
137 | level2->sizekb = 192; level2->assoc = 6; level2->line = 64; | |
138 | break; | |
139 | case 0x3b: | |
140 | level2->sizekb = 128; level2->assoc = 2; level2->line = 64; | |
141 | break; | |
142 | case 0x3c: | |
143 | level2->sizekb = 256; level2->assoc = 4; level2->line = 64; | |
144 | break; | |
145 | case 0x3d: | |
146 | level2->sizekb = 384; level2->assoc = 6; level2->line = 64; | |
147 | break; | |
148 | case 0x3e: | |
149 | level2->sizekb = 512; level2->assoc = 4; level2->line = 64; | |
150 | break; | |
151 | case 0x41: | |
152 | level2->sizekb = 128; level2->assoc = 4; level2->line = 32; | |
153 | break; | |
154 | case 0x42: | |
155 | level2->sizekb = 256; level2->assoc = 4; level2->line = 32; | |
156 | break; | |
157 | case 0x43: | |
158 | level2->sizekb = 512; level2->assoc = 4; level2->line = 32; | |
159 | break; | |
160 | case 0x44: | |
161 | level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; | |
162 | break; | |
163 | case 0x45: | |
164 | level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; | |
165 | break; | |
166 | case 0x49: | |
167 | if (xeon_mp) | |
168 | break; | |
169 | level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; | |
170 | break; | |
171 | case 0x4e: | |
172 | level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; | |
173 | break; | |
174 | case 0x60: | |
175 | level1->sizekb = 16; level1->assoc = 8; level1->line = 64; | |
176 | break; | |
177 | case 0x66: | |
178 | level1->sizekb = 8; level1->assoc = 4; level1->line = 64; | |
179 | break; | |
180 | case 0x67: | |
181 | level1->sizekb = 16; level1->assoc = 4; level1->line = 64; | |
182 | break; | |
183 | case 0x68: | |
184 | level1->sizekb = 32; level1->assoc = 4; level1->line = 64; | |
185 | break; | |
186 | case 0x78: | |
187 | level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; | |
188 | break; | |
189 | case 0x79: | |
190 | level2->sizekb = 128; level2->assoc = 8; level2->line = 64; | |
191 | break; | |
192 | case 0x7a: | |
193 | level2->sizekb = 256; level2->assoc = 8; level2->line = 64; | |
194 | break; | |
195 | case 0x7b: | |
196 | level2->sizekb = 512; level2->assoc = 8; level2->line = 64; | |
197 | break; | |
198 | case 0x7c: | |
199 | level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; | |
200 | break; | |
201 | case 0x7d: | |
202 | level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; | |
203 | break; | |
204 | case 0x7f: | |
205 | level2->sizekb = 512; level2->assoc = 2; level2->line = 64; | |
206 | break; | |
207 | case 0x82: | |
208 | level2->sizekb = 256; level2->assoc = 8; level2->line = 32; | |
209 | break; | |
210 | case 0x83: | |
211 | level2->sizekb = 512; level2->assoc = 8; level2->line = 32; | |
212 | break; | |
213 | case 0x84: | |
214 | level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; | |
215 | break; | |
216 | case 0x85: | |
217 | level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; | |
218 | break; | |
219 | case 0x86: | |
220 | level2->sizekb = 512; level2->assoc = 4; level2->line = 64; | |
221 | break; | |
222 | case 0x87: | |
223 | level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; | |
224 | ||
225 | default: | |
226 | break; | |
227 | } | |
228 | } | |
2711355f | 229 | |
cb0dee88 | 230 | /* Detect cache parameters using CPUID function 2. */ |
2711355f | 231 | |
cb0dee88 UB |
232 | static void |
233 | detect_caches_cpuid2 (bool xeon_mp, | |
234 | struct cache_desc *level1, struct cache_desc *level2) | |
235 | { | |
dc8bd8d9 UB |
236 | unsigned regs[4]; |
237 | int nreps, i; | |
cb0dee88 | 238 | |
dc8bd8d9 | 239 | __cpuid (2, regs[0], regs[1], regs[2], regs[3]); |
cb0dee88 | 240 | |
dc8bd8d9 UB |
241 | nreps = regs[0] & 0x0f; |
242 | regs[0] &= ~0x0f; | |
cb0dee88 UB |
243 | |
244 | while (--nreps >= 0) | |
2711355f | 245 | { |
dc8bd8d9 UB |
246 | for (i = 0; i < 4; i++) |
247 | if (regs[i] && !((regs[i] >> 31) & 1)) | |
248 | decode_caches_intel (regs[i], xeon_mp, level1, level2); | |
cb0dee88 UB |
249 | |
250 | if (nreps) | |
dc8bd8d9 | 251 | __cpuid (2, regs[0], regs[1], regs[2], regs[3]); |
cb0dee88 UB |
252 | } |
253 | } | |
2711355f | 254 | |
cb0dee88 UB |
255 | /* Detect cache parameters using CPUID function 4. This |
256 | method doesn't require hardcoded tables. */ | |
2711355f | 257 | |
cb0dee88 UB |
258 | enum cache_type |
259 | { | |
260 | CACHE_END = 0, | |
261 | CACHE_DATA = 1, | |
262 | CACHE_INST = 2, | |
263 | CACHE_UNIFIED = 3 | |
264 | }; | |
265 | ||
266 | static void | |
a0463099 AK |
267 | detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, |
268 | struct cache_desc *level3) | |
cb0dee88 UB |
269 | { |
270 | struct cache_desc *cache; | |
271 | ||
272 | unsigned eax, ebx, ecx, edx; | |
273 | int count; | |
274 | ||
275 | for (count = 0;; count++) | |
276 | { | |
277 | __cpuid_count(4, count, eax, ebx, ecx, edx); | |
278 | switch (eax & 0x1f) | |
279 | { | |
280 | case CACHE_END: | |
281 | return; | |
282 | case CACHE_DATA: | |
283 | case CACHE_UNIFIED: | |
284 | { | |
285 | switch ((eax >> 5) & 0x07) | |
286 | { | |
287 | case 1: | |
288 | cache = level1; | |
289 | break; | |
290 | case 2: | |
291 | cache = level2; | |
292 | break; | |
a0463099 AK |
293 | case 3: |
294 | cache = level3; | |
295 | break; | |
cb0dee88 UB |
296 | default: |
297 | cache = NULL; | |
298 | } | |
299 | ||
300 | if (cache) | |
301 | { | |
302 | unsigned sets = ecx + 1; | |
dc8bd8d9 | 303 | unsigned part = ((ebx >> 12) & 0x03ff) + 1; |
cb0dee88 | 304 | |
dc8bd8d9 | 305 | cache->assoc = ((ebx >> 22) & 0x03ff) + 1; |
cb0dee88 | 306 | cache->line = (ebx & 0x0fff) + 1; |
cb0dee88 UB |
307 | |
308 | cache->sizekb = (cache->assoc * part | |
309 | * cache->line * sets) / 1024; | |
a0463099 | 310 | } |
cb0dee88 | 311 | } |
2711355f ZD |
312 | default: |
313 | break; | |
314 | } | |
315 | } | |
316 | } | |
317 | ||
cb0dee88 | 318 | /* Returns the description of caches for an Intel processor. */ |
2711355f | 319 | |
d3bfe4de | 320 | static const char * |
a0463099 AK |
321 | detect_caches_intel (bool xeon_mp, unsigned max_level, |
322 | unsigned max_ext_level, unsigned *l2sizekb) | |
2711355f | 323 | { |
a0463099 | 324 | struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; |
2711355f | 325 | |
cb0dee88 | 326 | if (max_level >= 4) |
a0463099 | 327 | detect_caches_cpuid4 (&level1, &level2, &level3); |
cb0dee88 UB |
328 | else if (max_level >= 2) |
329 | detect_caches_cpuid2 (xeon_mp, &level1, &level2); | |
330 | else | |
d3bfe4de | 331 | return ""; |
2711355f | 332 | |
cb0dee88 | 333 | if (level1.sizekb == 0) |
d3bfe4de | 334 | return ""; |
2711355f | 335 | |
a0463099 AK |
336 | /* Let the L3 replace the L2. This assumes inclusive caches |
337 | and single threaded program for now. */ | |
338 | if (level3.sizekb) | |
339 | level2 = level3; | |
340 | ||
cb0dee88 UB |
341 | /* Intel CPUs are equipped with AMD style L2 cache info. Try this |
342 | method if other methods fail to provide L2 cache parameters. */ | |
343 | if (level2.sizekb == 0 && max_ext_level >= 0x80000006) | |
344 | detect_l2_cache (&level2); | |
f4a1dd0d | 345 | |
a0463099 AK |
346 | *l2sizekb = level2.sizekb; |
347 | ||
cb0dee88 | 348 | return describe_cache (level1, level2); |
2711355f ZD |
349 | } |
350 | ||
fa959ce4 MM |
351 | /* This will be called by the spec parser in gcc.c when it sees |
352 | a %:local_cpu_detect(args) construct. Currently it will be called | |
353 | with either "arch" or "tune" as argument depending on if -march=native | |
354 | or -mtune=native is to be substituted. | |
355 | ||
356 | It returns a string containing new command line parameters to be | |
357 | put at the place of the above two options, depending on what CPU | |
358 | this is executed. E.g. "-march=k8" on an AMD64 machine | |
359 | for -march=native. | |
360 | ||
361 | ARGC and ARGV are set depending on the actual arguments given | |
362 | in the spec. */ | |
b3172cab | 363 | |
fa959ce4 MM |
364 | const char *host_detect_local_cpu (int argc, const char **argv) |
365 | { | |
b3172cab UB |
366 | enum processor_type processor = PROCESSOR_I386; |
367 | const char *cpu = "i386"; | |
368 | ||
2711355f | 369 | const char *cache = ""; |
5be6cb59 | 370 | const char *options = ""; |
b3172cab | 371 | |
cb0dee88 | 372 | unsigned int eax, ebx, ecx, edx; |
b3172cab UB |
373 | |
374 | unsigned int max_level, ext_level; | |
cb0dee88 | 375 | |
fa959ce4 | 376 | unsigned int vendor; |
cb0dee88 | 377 | unsigned int model, family; |
b3172cab UB |
378 | |
379 | unsigned int has_sse3, has_ssse3, has_cmpxchg16b; | |
380 | unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; | |
381 | ||
382 | /* Extended features */ | |
383 | unsigned int has_lahf_lm = 0, has_sse4a = 0; | |
384 | unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; | |
634fa334 | 385 | unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; |
7afac110 | 386 | unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; |
8ad9d49e | 387 | unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; |
5eed4f27 | 388 | unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; |
82feeb8d | 389 | unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; |
76a02e42 | 390 | unsigned int has_hle = 0, has_rtm = 0; |
d1925759 | 391 | unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; |
d05e383b | 392 | unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; |
a91529c4 | 393 | unsigned int has_osxsave = 0; |
b3172cab | 394 | |
edccdcb1 L |
395 | bool arch; |
396 | ||
a0463099 AK |
397 | unsigned int l2sizekb = 0; |
398 | ||
edccdcb1 L |
399 | if (argc < 1) |
400 | return NULL; | |
401 | ||
b3172cab UB |
402 | arch = !strcmp (argv[0], "arch"); |
403 | ||
edccdcb1 | 404 | if (!arch && strcmp (argv[0], "tune")) |
fa959ce4 MM |
405 | return NULL; |
406 | ||
b3172cab UB |
407 | max_level = __get_cpuid_max (0, &vendor); |
408 | if (max_level < 1) | |
fa959ce4 | 409 | goto done; |
fa959ce4 | 410 | |
b3172cab | 411 | __cpuid (1, eax, ebx, ecx, edx); |
fa959ce4 | 412 | |
cb0dee88 | 413 | model = (eax >> 4) & 0x0f; |
b3172cab | 414 | family = (eax >> 8) & 0x0f; |
ef64d158 | 415 | if (vendor == signature_INTEL_ebx) |
37c50435 L |
416 | { |
417 | unsigned int extended_model, extended_family; | |
418 | ||
419 | extended_model = (eax >> 12) & 0xf0; | |
420 | extended_family = (eax >> 20) & 0xff; | |
421 | if (family == 0x0f) | |
422 | { | |
423 | family += extended_family; | |
424 | model += extended_model; | |
425 | } | |
426 | else if (family == 0x06) | |
427 | model += extended_model; | |
428 | } | |
b3172cab UB |
429 | |
430 | has_sse3 = ecx & bit_SSE3; | |
431 | has_ssse3 = ecx & bit_SSSE3; | |
634fa334 L |
432 | has_sse4_1 = ecx & bit_SSE4_1; |
433 | has_sse4_2 = ecx & bit_SSE4_2; | |
434 | has_avx = ecx & bit_AVX; | |
a91529c4 | 435 | has_osxsave = ecx & bit_OSXSAVE; |
b3172cab | 436 | has_cmpxchg16b = ecx & bit_CMPXCHG16B; |
cabf85c3 | 437 | has_movbe = ecx & bit_MOVBE; |
634fa334 L |
438 | has_popcnt = ecx & bit_POPCNT; |
439 | has_aes = ecx & bit_AES; | |
440 | has_pclmul = ecx & bit_PCLMUL; | |
5eed4f27 | 441 | has_fma = ecx & bit_FMA; |
d1925759 L |
442 | has_f16c = ecx & bit_F16C; |
443 | has_rdrnd = ecx & bit_RDRND; | |
fa959ce4 | 444 | |
b3172cab UB |
445 | has_cmpxchg8b = edx & bit_CMPXCHG8B; |
446 | has_cmov = edx & bit_CMOV; | |
447 | has_mmx = edx & bit_MMX; | |
448 | has_sse = edx & bit_SSE; | |
449 | has_sse2 = edx & bit_SSE2; | |
450 | ||
2c9b39ef L |
451 | if (max_level >= 7) |
452 | { | |
453 | __cpuid_count (7, 0, eax, ebx, ecx, edx); | |
454 | ||
455 | has_bmi = ebx & bit_BMI; | |
5dcfdccd | 456 | has_hle = ebx & bit_HLE; |
76a02e42 | 457 | has_rtm = ebx & bit_RTM; |
2c9b39ef L |
458 | has_avx2 = ebx & bit_AVX2; |
459 | has_bmi2 = ebx & bit_BMI2; | |
d1925759 | 460 | has_fsgsbase = ebx & bit_FSGSBASE; |
4c340b5d | 461 | has_rdseed = ebx & bit_RDSEED; |
d05e383b | 462 | has_adx = ebx & bit_ADX; |
2c9b39ef L |
463 | } |
464 | ||
a91529c4 L |
465 | /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ |
466 | #define XCR_XFEATURE_ENABLED_MASK 0x0 | |
467 | #define XSTATE_FP 0x1 | |
468 | #define XSTATE_SSE 0x2 | |
469 | #define XSTATE_YMM 0x4 | |
470 | if (has_osxsave) | |
471 | asm (".byte 0x0f; .byte 0x01; .byte 0xd0" | |
472 | : "=a" (eax), "=d" (edx) | |
473 | : "c" (XCR_XFEATURE_ENABLED_MASK)); | |
474 | ||
475 | /* Check if SSE and YMM states are supported. */ | |
953ac966 AN |
476 | if (!has_osxsave |
477 | || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) | |
a91529c4 L |
478 | { |
479 | has_avx = 0; | |
480 | has_avx2 = 0; | |
481 | has_fma = 0; | |
482 | has_fma4 = 0; | |
483 | has_xop = 0; | |
484 | } | |
485 | ||
b3172cab UB |
486 | /* Check cpuid level of extended features. */ |
487 | __cpuid (0x80000000, ext_level, ebx, ecx, edx); | |
488 | ||
489 | if (ext_level > 0x80000000) | |
fa959ce4 | 490 | { |
b3172cab | 491 | __cpuid (0x80000001, eax, ebx, ecx, edx); |
fa959ce4 | 492 | |
b3172cab UB |
493 | has_lahf_lm = ecx & bit_LAHF_LM; |
494 | has_sse4a = ecx & bit_SSE4a; | |
c3d34a78 | 495 | has_abm = ecx & bit_ABM; |
8ad9d49e | 496 | has_lwp = ecx & bit_LWP; |
1133125e HJ |
497 | has_fma4 = ecx & bit_FMA4; |
498 | has_xop = ecx & bit_XOP; | |
94d13ad1 | 499 | has_tbm = ecx & bit_TBM; |
5fcafa60 | 500 | has_lzcnt = ecx & bit_LZCNT; |
9006f7f3 | 501 | has_prfchw = ecx & bit_PRFCHW; |
b3172cab UB |
502 | |
503 | has_longmode = edx & bit_LM; | |
504 | has_3dnowp = edx & bit_3DNOWP; | |
505 | has_3dnow = edx & bit_3DNOW; | |
506 | } | |
fa959ce4 | 507 | |
2711355f ZD |
508 | if (!arch) |
509 | { | |
ef64d158 | 510 | if (vendor == signature_AMD_ebx) |
2711355f | 511 | cache = detect_caches_amd (ext_level); |
ef64d158 | 512 | else if (vendor == signature_INTEL_ebx) |
cb0dee88 UB |
513 | { |
514 | bool xeon_mp = (family == 15 && model == 6); | |
a0463099 AK |
515 | cache = detect_caches_intel (xeon_mp, max_level, |
516 | ext_level, &l2sizekb); | |
cb0dee88 | 517 | } |
2711355f ZD |
518 | } |
519 | ||
ef64d158 | 520 | if (vendor == signature_AMD_ebx) |
fa959ce4 | 521 | { |
fbdf817d | 522 | unsigned int name; |
b3172cab | 523 | |
fbdf817d UB |
524 | /* Detect geode processor by its processor signature. */ |
525 | if (ext_level > 0x80000001) | |
526 | __cpuid (0x80000002, name, ebx, ecx, edx); | |
527 | else | |
528 | name = 0; | |
529 | ||
ef64d158 | 530 | if (name == signature_NSC_ebx) |
fbdf817d | 531 | processor = PROCESSOR_GEODE; |
e32bfc16 VK |
532 | else if (has_movbe) |
533 | processor = PROCESSOR_BTVER2; | |
4d652a18 HJ |
534 | else if (has_bmi) |
535 | processor = PROCESSOR_BDVER2; | |
1133125e HJ |
536 | else if (has_xop) |
537 | processor = PROCESSOR_BDVER1; | |
14b52538 CF |
538 | else if (has_sse4a && has_ssse3) |
539 | processor = PROCESSOR_BTVER1; | |
fbdf817d | 540 | else if (has_sse4a) |
35a63f21 | 541 | processor = PROCESSOR_AMDFAM10; |
fbdf817d UB |
542 | else if (has_sse2 || has_longmode) |
543 | processor = PROCESSOR_K8; | |
f7593cb4 | 544 | else if (has_3dnowp && family == 6) |
fbdf817d UB |
545 | processor = PROCESSOR_ATHLON; |
546 | else if (has_mmx) | |
547 | processor = PROCESSOR_K6; | |
548 | else | |
549 | processor = PROCESSOR_PENTIUM; | |
fa959ce4 MM |
550 | } |
551 | else | |
552 | { | |
edccdcb1 L |
553 | switch (family) |
554 | { | |
b3172cab UB |
555 | case 4: |
556 | processor = PROCESSOR_I486; | |
557 | break; | |
edccdcb1 | 558 | case 5: |
b3172cab | 559 | processor = PROCESSOR_PENTIUM; |
edccdcb1 L |
560 | break; |
561 | case 6: | |
562 | processor = PROCESSOR_PENTIUMPRO; | |
563 | break; | |
564 | case 15: | |
565 | processor = PROCESSOR_PENTIUM4; | |
566 | break; | |
567 | default: | |
b3172cab UB |
568 | /* We have no idea. */ |
569 | processor = PROCESSOR_GENERIC32; | |
edccdcb1 L |
570 | } |
571 | } | |
572 | ||
573 | switch (processor) | |
574 | { | |
575 | case PROCESSOR_I386: | |
b3172cab | 576 | /* Default. */ |
edccdcb1 L |
577 | break; |
578 | case PROCESSOR_I486: | |
579 | cpu = "i486"; | |
580 | break; | |
581 | case PROCESSOR_PENTIUM: | |
b3172cab | 582 | if (arch && has_mmx) |
edccdcb1 L |
583 | cpu = "pentium-mmx"; |
584 | else | |
585 | cpu = "pentium"; | |
586 | break; | |
587 | case PROCESSOR_PENTIUMPRO: | |
44f276c6 | 588 | switch (model) |
edccdcb1 | 589 | { |
44f276c6 L |
590 | case 0x1c: |
591 | case 0x26: | |
592 | /* Atom. */ | |
593 | cpu = "atom"; | |
594 | break; | |
595 | case 0x1a: | |
596 | case 0x1e: | |
597 | case 0x1f: | |
598 | case 0x2e: | |
eefe143b L |
599 | /* Nehalem. */ |
600 | cpu = "corei7"; | |
44f276c6 L |
601 | break; |
602 | case 0x25: | |
12bbb78f | 603 | case 0x2c: |
44f276c6 | 604 | case 0x2f: |
eefe143b L |
605 | /* Westmere. */ |
606 | cpu = "corei7"; | |
44f276c6 | 607 | break; |
35758e5b | 608 | case 0x2a: |
815cecbe | 609 | case 0x2d: |
35758e5b L |
610 | /* Sandy Bridge. */ |
611 | cpu = "corei7-avx"; | |
612 | break; | |
44f276c6 L |
613 | case 0x17: |
614 | case 0x1d: | |
eefe143b | 615 | /* Penryn. */ |
44f276c6 L |
616 | cpu = "core2"; |
617 | break; | |
618 | case 0x0f: | |
eefe143b | 619 | /* Merom. */ |
44f276c6 L |
620 | cpu = "core2"; |
621 | break; | |
622 | default: | |
623 | if (arch) | |
624 | { | |
4ffae7ff L |
625 | /* This is unknown family 0x6 CPU. */ |
626 | if (has_avx) | |
627 | /* Assume Sandy Bridge. */ | |
628 | cpu = "corei7-avx"; | |
629 | else if (has_sse4_2) | |
630 | /* Assume Core i7. */ | |
631 | cpu = "corei7"; | |
632 | else if (has_ssse3) | |
633 | { | |
634 | if (has_movbe) | |
635 | /* Assume Atom. */ | |
636 | cpu = "atom"; | |
637 | else | |
638 | /* Assume Core 2. */ | |
639 | cpu = "core2"; | |
640 | } | |
44f276c6 L |
641 | else if (has_sse3) |
642 | /* It is Core Duo. */ | |
643 | cpu = "pentium-m"; | |
644 | else if (has_sse2) | |
645 | /* It is Pentium M. */ | |
646 | cpu = "pentium-m"; | |
647 | else if (has_sse) | |
648 | /* It is Pentium III. */ | |
649 | cpu = "pentium3"; | |
650 | else if (has_mmx) | |
651 | /* It is Pentium II. */ | |
652 | cpu = "pentium2"; | |
653 | else | |
654 | /* Default to Pentium Pro. */ | |
655 | cpu = "pentiumpro"; | |
656 | } | |
b3172cab | 657 | else |
44f276c6 L |
658 | /* For -mtune, we default to -mtune=generic. */ |
659 | cpu = "generic"; | |
660 | break; | |
fa959ce4 | 661 | } |
b3172cab UB |
662 | break; |
663 | case PROCESSOR_PENTIUM4: | |
664 | if (has_sse3) | |
fa959ce4 | 665 | { |
b3172cab UB |
666 | if (has_longmode) |
667 | cpu = "nocona"; | |
fa959ce4 | 668 | else |
b3172cab | 669 | cpu = "prescott"; |
fa959ce4 | 670 | } |
b3172cab UB |
671 | else |
672 | cpu = "pentium4"; | |
edccdcb1 L |
673 | break; |
674 | case PROCESSOR_GEODE: | |
675 | cpu = "geode"; | |
676 | break; | |
677 | case PROCESSOR_K6: | |
b3172cab UB |
678 | if (arch && has_3dnow) |
679 | cpu = "k6-3"; | |
edccdcb1 L |
680 | else |
681 | cpu = "k6"; | |
682 | break; | |
683 | case PROCESSOR_ATHLON: | |
b3172cab | 684 | if (arch && has_sse) |
edccdcb1 L |
685 | cpu = "athlon-4"; |
686 | else | |
687 | cpu = "athlon"; | |
688 | break; | |
edccdcb1 | 689 | case PROCESSOR_K8: |
b3172cab UB |
690 | if (arch && has_sse3) |
691 | cpu = "k8-sse3"; | |
692 | else | |
693 | cpu = "k8"; | |
edccdcb1 | 694 | break; |
35a63f21 DR |
695 | case PROCESSOR_AMDFAM10: |
696 | cpu = "amdfam10"; | |
697 | break; | |
1133125e HJ |
698 | case PROCESSOR_BDVER1: |
699 | cpu = "bdver1"; | |
700 | break; | |
4d652a18 HJ |
701 | case PROCESSOR_BDVER2: |
702 | cpu = "bdver2"; | |
703 | break; | |
14b52538 CF |
704 | case PROCESSOR_BTVER1: |
705 | cpu = "btver1"; | |
706 | break; | |
e32bfc16 VK |
707 | case PROCESSOR_BTVER2: |
708 | cpu = "btver2"; | |
709 | break; | |
b3172cab | 710 | |
edccdcb1 | 711 | default: |
b3172cab UB |
712 | /* Use something reasonable. */ |
713 | if (arch) | |
714 | { | |
715 | if (has_ssse3) | |
716 | cpu = "core2"; | |
717 | else if (has_sse3) | |
718 | { | |
719 | if (has_longmode) | |
720 | cpu = "nocona"; | |
721 | else | |
722 | cpu = "prescott"; | |
723 | } | |
724 | else if (has_sse2) | |
725 | cpu = "pentium4"; | |
726 | else if (has_cmov) | |
727 | cpu = "pentiumpro"; | |
728 | else if (has_mmx) | |
729 | cpu = "pentium-mmx"; | |
730 | else if (has_cmpxchg8b) | |
731 | cpu = "pentium"; | |
732 | } | |
733 | else | |
734 | cpu = "generic"; | |
fa959ce4 MM |
735 | } |
736 | ||
5be6cb59 UB |
737 | if (arch) |
738 | { | |
5eed4f27 L |
739 | const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16"; |
740 | const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf"; | |
741 | const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe"; | |
742 | const char *ase = has_aes ? " -maes" : " -mno-aes"; | |
743 | const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul"; | |
744 | const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt"; | |
745 | const char *abm = has_abm ? " -mabm" : " -mno-abm"; | |
746 | const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp"; | |
747 | const char *fma = has_fma ? " -mfma" : " -mno-fma"; | |
748 | const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4"; | |
749 | const char *xop = has_xop ? " -mxop" : " -mno-xop"; | |
750 | const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi"; | |
82feeb8d | 751 | const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2"; |
5eed4f27 L |
752 | const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm"; |
753 | const char *avx = has_avx ? " -mavx" : " -mno-avx"; | |
7afac110 | 754 | const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2"; |
642a011d | 755 | const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2"; |
5eed4f27 | 756 | const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1"; |
3ed2c643 | 757 | const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt"; |
38d7f26e | 758 | const char *hle = has_hle ? " -mhle" : " -mno-hle"; |
76a02e42 | 759 | const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm"; |
d1925759 L |
760 | const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd"; |
761 | const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c"; | |
762 | const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase"; | |
4c340b5d | 763 | const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed"; |
e61c94dd | 764 | const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw"; |
d05e383b | 765 | const char *adx = has_adx ? " -madx" : " -mno-adx"; |
5eed4f27 L |
766 | |
767 | options = concat (options, cx16, sahf, movbe, ase, pclmul, | |
82feeb8d | 768 | popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, |
76a02e42 | 769 | tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, |
d05e383b | 770 | hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, NULL); |
5be6cb59 UB |
771 | } |
772 | ||
fa959ce4 | 773 | done: |
f3afc8a7 | 774 | return concat (cache, "-m", argv[0], "=", cpu, options, NULL); |
fa959ce4 MM |
775 | } |
776 | #else | |
b3172cab | 777 | |
f3afc8a7 UB |
778 | /* If we aren't compiling with GCC then the driver will just ignore |
779 | -march and -mtune "native" target and will leave to the newly | |
780 | built compiler to generate code for its default target. */ | |
b3172cab | 781 | |
f3afc8a7 UB |
782 | const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED, |
783 | const char **argv ATTRIBUTE_UNUSED) | |
fa959ce4 | 784 | { |
f3afc8a7 | 785 | return NULL; |
fa959ce4 | 786 | } |
a6ecb05c | 787 | #endif /* __GNUC__ */ |