]>
Commit | Line | Data |
---|---|---|
fa959ce4 | 1 | /* Subroutines for the gcc driver. |
35a63f21 | 2 | Copyright (C) 2006, 2007 Free Software Foundation, Inc. |
fa959ce4 MM |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
2f83c7d6 | 8 | the Free Software Foundation; either version 3, or (at your option) |
fa959ce4 MM |
9 | any later version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
2f83c7d6 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
fa959ce4 MM |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
edccdcb1 L |
22 | #include "coretypes.h" |
23 | #include "tm.h" | |
fa959ce4 MM |
24 | #include <stdlib.h> |
25 | ||
895016f6 UB |
26 | const char *host_detect_local_cpu (int argc, const char **argv); |
27 | ||
fa959ce4 MM |
28 | #ifdef GCC_VERSION |
29 | #define cpuid(num,a,b,c,d) \ | |
30 | asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" \ | |
31 | : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ | |
32 | : "0" (num)) | |
33 | ||
34 | #define bit_CMPXCHG8B (1 << 8) | |
35 | #define bit_CMOV (1 << 15) | |
36 | #define bit_MMX (1 << 23) | |
37 | #define bit_SSE (1 << 25) | |
38 | #define bit_SSE2 (1 << 26) | |
39 | ||
40 | #define bit_SSE3 (1 << 0) | |
edccdcb1 | 41 | #define bit_SSSE3 (1 << 9) |
35a63f21 | 42 | #define bit_SSE4a (1 << 6) |
fa959ce4 MM |
43 | #define bit_CMPXCHG16B (1 << 13) |
44 | ||
5be6cb59 | 45 | #define bit_LAHF_LM (1 << 0) |
fa959ce4 MM |
46 | #define bit_3DNOW (1 << 31) |
47 | #define bit_3DNOWP (1 << 30) | |
48 | #define bit_LM (1 << 29) | |
49 | ||
2711355f ZD |
50 | /* Returns parameters that describe L1_ASSOC associative cache of size |
51 | L1_SIZEKB with lines of size L1_LINE. */ | |
52 | ||
53 | static char * | |
54 | describe_cache (unsigned l1_sizekb, unsigned l1_line, | |
55 | unsigned l1_assoc ATTRIBUTE_UNUSED) | |
56 | { | |
57 | char size[1000], line[1000]; | |
2711355f ZD |
58 | |
59 | /* At the moment, gcc middle-end does not use the information about the | |
60 | associativity of the cache. */ | |
61 | ||
46cb0441 | 62 | sprintf (size, "--param l1-cache-size=%u", l1_sizekb); |
2711355f ZD |
63 | sprintf (line, "--param l1-cache-line-size=%u", l1_line); |
64 | ||
65 | return concat (size, " ", line, " ", NULL); | |
66 | } | |
67 | ||
68 | /* Returns the description of caches for an AMD processor. */ | |
69 | ||
70 | static char * | |
71 | detect_caches_amd (unsigned max_ext_level) | |
72 | { | |
73 | unsigned eax, ebx, ecx, edx; | |
74 | unsigned l1_sizekb, l1_line, l1_assoc; | |
75 | ||
76 | if (max_ext_level < 0x80000005) | |
77 | return NULL; | |
78 | ||
79 | cpuid (0x80000005, eax, ebx, ecx, edx); | |
80 | ||
81 | l1_line = ecx & 0xff; | |
82 | l1_sizekb = (ecx >> 24) & 0xff; | |
83 | l1_assoc = (ecx >> 16) & 0xff; | |
84 | ||
85 | return describe_cache (l1_sizekb, l1_line, l1_assoc); | |
86 | } | |
87 | ||
88 | /* Stores the size of the L1 cache and cache line, and the associativity | |
89 | of the cache according to REG to L1_SIZEKB, L1_LINE and L1_ASSOC. */ | |
90 | ||
91 | static void | |
92 | decode_caches_intel (unsigned reg, unsigned *l1_sizekb, unsigned *l1_line, | |
93 | unsigned *l1_assoc) | |
94 | { | |
95 | unsigned i, val; | |
96 | ||
97 | if (((reg >> 31) & 1) != 0) | |
98 | return; | |
99 | ||
100 | for (i = 0; i < 4; i++) | |
101 | { | |
102 | val = reg & 0xff; | |
103 | reg >>= 8; | |
104 | ||
105 | switch (val) | |
106 | { | |
107 | case 0xa: | |
108 | *l1_sizekb = 8; | |
109 | *l1_line = 32; | |
110 | *l1_assoc = 2; | |
111 | break; | |
112 | case 0xc: | |
113 | *l1_sizekb = 16; | |
114 | *l1_line = 32; | |
115 | *l1_assoc = 4; | |
116 | break; | |
117 | case 0x2c: | |
118 | *l1_sizekb = 32; | |
119 | *l1_line = 64; | |
120 | *l1_assoc = 8; | |
121 | break; | |
122 | case 0x60: | |
123 | *l1_sizekb = 16; | |
124 | *l1_line = 64; | |
125 | *l1_assoc = 8; | |
126 | break; | |
127 | case 0x66: | |
128 | *l1_sizekb = 8; | |
129 | *l1_line = 64; | |
130 | *l1_assoc = 4; | |
131 | break; | |
132 | case 0x67: | |
133 | *l1_sizekb = 16; | |
134 | *l1_line = 64; | |
135 | *l1_assoc = 4; | |
136 | break; | |
137 | case 0x68: | |
138 | *l1_sizekb = 32; | |
139 | *l1_line = 64; | |
140 | *l1_assoc = 4; | |
141 | break; | |
142 | ||
143 | default: | |
144 | break; | |
145 | } | |
146 | } | |
147 | } | |
148 | ||
149 | /* Returns the description of caches for an intel processor. */ | |
150 | ||
151 | static char * | |
152 | detect_caches_intel (unsigned max_level) | |
153 | { | |
154 | unsigned eax, ebx, ecx, edx; | |
155 | unsigned l1_sizekb = 0, l1_line = 0, assoc = 0; | |
156 | ||
157 | if (max_level < 2) | |
158 | return NULL; | |
159 | ||
160 | cpuid (2, eax, ebx, ecx, edx); | |
161 | ||
162 | decode_caches_intel (eax, &l1_sizekb, &l1_line, &assoc); | |
163 | decode_caches_intel (ebx, &l1_sizekb, &l1_line, &assoc); | |
164 | decode_caches_intel (ecx, &l1_sizekb, &l1_line, &assoc); | |
165 | decode_caches_intel (edx, &l1_sizekb, &l1_line, &assoc); | |
166 | if (!l1_sizekb) | |
167 | return (char *) ""; | |
168 | ||
169 | return describe_cache (l1_sizekb, l1_line, assoc); | |
170 | } | |
171 | ||
fa959ce4 MM |
172 | /* This will be called by the spec parser in gcc.c when it sees |
173 | a %:local_cpu_detect(args) construct. Currently it will be called | |
174 | with either "arch" or "tune" as argument depending on if -march=native | |
175 | or -mtune=native is to be substituted. | |
176 | ||
177 | It returns a string containing new command line parameters to be | |
178 | put at the place of the above two options, depending on what CPU | |
179 | this is executed. E.g. "-march=k8" on an AMD64 machine | |
180 | for -march=native. | |
181 | ||
182 | ARGC and ARGV are set depending on the actual arguments given | |
183 | in the spec. */ | |
184 | const char *host_detect_local_cpu (int argc, const char **argv) | |
185 | { | |
edccdcb1 | 186 | const char *cpu = NULL; |
2711355f | 187 | const char *cache = ""; |
5be6cb59 | 188 | const char *options = ""; |
edccdcb1 | 189 | enum processor_type processor = PROCESSOR_I386; |
fa959ce4 MM |
190 | unsigned int eax, ebx, ecx, edx; |
191 | unsigned int max_level; | |
192 | unsigned int vendor; | |
193 | unsigned int ext_level; | |
194 | unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0; | |
edccdcb1 | 195 | unsigned char has_sse2 = 0, has_sse3 = 0, has_ssse3 = 0, has_cmov = 0; |
5be6cb59 | 196 | unsigned char has_cmpxchg16b = 0, has_lahf_lm = 0; |
35a63f21 | 197 | unsigned char has_longmode = 0, has_cmpxchg8b = 0, has_sse4a = 0; |
fa959ce4 MM |
198 | unsigned char is_amd = 0; |
199 | unsigned int family = 0; | |
edccdcb1 L |
200 | bool arch; |
201 | ||
202 | if (argc < 1) | |
203 | return NULL; | |
204 | ||
205 | arch = strcmp (argv[0], "arch") == 0; | |
206 | if (!arch && strcmp (argv[0], "tune")) | |
fa959ce4 MM |
207 | return NULL; |
208 | ||
209 | #ifndef __x86_64__ | |
210 | /* See if we can use cpuid. */ | |
211 | asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;" | |
212 | "pushl %0; popfl; pushfl; popl %0; popfl" | |
213 | : "=&r" (eax), "=&r" (ebx) | |
214 | : "i" (0x00200000)); | |
215 | ||
216 | if (((eax ^ ebx) & 0x00200000) == 0) | |
217 | goto done; | |
218 | #endif | |
219 | ||
edccdcb1 | 220 | processor = PROCESSOR_PENTIUM; |
fa959ce4 MM |
221 | |
222 | /* Check the highest input value for eax. */ | |
223 | cpuid (0, eax, ebx, ecx, edx); | |
224 | max_level = eax; | |
225 | /* We only look at the first four characters. */ | |
226 | vendor = ebx; | |
227 | if (max_level == 0) | |
228 | goto done; | |
229 | ||
230 | cpuid (1, eax, ebx, ecx, edx); | |
edccdcb1 | 231 | has_cmpxchg8b = !!(edx & bit_CMPXCHG8B); |
fa959ce4 MM |
232 | has_cmov = !!(edx & bit_CMOV); |
233 | has_mmx = !!(edx & bit_MMX); | |
234 | has_sse = !!(edx & bit_SSE); | |
235 | has_sse2 = !!(edx & bit_SSE2); | |
236 | has_sse3 = !!(ecx & bit_SSE3); | |
edccdcb1 | 237 | has_ssse3 = !!(ecx & bit_SSSE3); |
5be6cb59 | 238 | has_cmpxchg16b = !!(ecx & bit_CMPXCHG16B); |
fa959ce4 MM |
239 | /* We don't care for extended family. */ |
240 | family = (eax >> 8) & ~(1 << 4); | |
241 | ||
242 | cpuid (0x80000000, eax, ebx, ecx, edx); | |
243 | ext_level = eax; | |
244 | if (ext_level >= 0x80000000) | |
245 | { | |
246 | cpuid (0x80000001, eax, ebx, ecx, edx); | |
5be6cb59 | 247 | has_lahf_lm = !!(ecx & bit_LAHF_LM); |
fa959ce4 MM |
248 | has_3dnow = !!(edx & bit_3DNOW); |
249 | has_3dnowp = !!(edx & bit_3DNOWP); | |
250 | has_longmode = !!(edx & bit_LM); | |
35a63f21 | 251 | has_sse4a = !!(ecx & bit_SSE4a); |
fa959ce4 MM |
252 | } |
253 | ||
254 | is_amd = vendor == *(unsigned int*)"Auth"; | |
255 | ||
2711355f ZD |
256 | if (!arch) |
257 | { | |
258 | if (is_amd) | |
259 | cache = detect_caches_amd (ext_level); | |
260 | else if (vendor == *(unsigned int*)"Genu") | |
261 | cache = detect_caches_intel (max_level); | |
262 | } | |
263 | ||
fa959ce4 MM |
264 | if (is_amd) |
265 | { | |
266 | if (has_mmx) | |
edccdcb1 | 267 | processor = PROCESSOR_K6; |
fa959ce4 | 268 | if (has_3dnowp) |
edccdcb1 | 269 | processor = PROCESSOR_ATHLON; |
fa959ce4 | 270 | if (has_sse2 || has_longmode) |
edccdcb1 | 271 | processor = PROCESSOR_K8; |
35a63f21 DR |
272 | if (has_sse4a) |
273 | processor = PROCESSOR_AMDFAM10; | |
fa959ce4 MM |
274 | } |
275 | else | |
276 | { | |
edccdcb1 L |
277 | switch (family) |
278 | { | |
279 | case 5: | |
280 | /* Default is PROCESSOR_PENTIUM. */ | |
281 | break; | |
282 | case 6: | |
283 | processor = PROCESSOR_PENTIUMPRO; | |
284 | break; | |
285 | case 15: | |
286 | processor = PROCESSOR_PENTIUM4; | |
287 | break; | |
288 | default: | |
289 | /* We have no idea. Use something reasonable. */ | |
290 | if (arch) | |
291 | { | |
292 | if (has_ssse3) | |
293 | cpu = "core2"; | |
294 | else if (has_sse3) | |
295 | { | |
296 | if (has_longmode) | |
297 | cpu = "nocona"; | |
298 | else | |
299 | cpu = "prescott"; | |
300 | } | |
301 | else if (has_sse2) | |
302 | cpu = "pentium4"; | |
303 | else if (has_cmov) | |
304 | cpu = "pentiumpro"; | |
305 | else if (has_mmx) | |
306 | cpu = "pentium-mmx"; | |
307 | else if (has_cmpxchg8b) | |
308 | cpu = "pentium"; | |
309 | else | |
310 | cpu = "i386"; | |
311 | } | |
312 | else | |
313 | cpu = "generic"; | |
314 | goto done; | |
315 | break; | |
316 | } | |
317 | } | |
318 | ||
319 | switch (processor) | |
320 | { | |
321 | case PROCESSOR_I386: | |
322 | cpu = "i386"; | |
323 | break; | |
324 | case PROCESSOR_I486: | |
325 | cpu = "i486"; | |
326 | break; | |
327 | case PROCESSOR_PENTIUM: | |
328 | if (has_mmx) | |
329 | cpu = "pentium-mmx"; | |
330 | else | |
331 | cpu = "pentium"; | |
332 | break; | |
333 | case PROCESSOR_PENTIUMPRO: | |
334 | if (has_longmode) | |
335 | { | |
336 | /* It is Core 2 Duo. */ | |
337 | cpu = "core2"; | |
fa959ce4 | 338 | } |
edccdcb1 | 339 | else |
fa959ce4 | 340 | { |
edccdcb1 L |
341 | if (arch) |
342 | { | |
343 | if (has_sse3) | |
344 | { | |
345 | /* It is Core Duo. */ | |
346 | cpu = "prescott"; | |
347 | } | |
348 | else if (has_sse2) | |
349 | { | |
350 | /* It is Pentium M. */ | |
351 | cpu = "pentium4"; | |
352 | } | |
353 | else if (has_sse) | |
354 | { | |
355 | /* It is Pentium III. */ | |
356 | cpu = "pentium3"; | |
357 | } | |
358 | else if (has_mmx) | |
359 | { | |
360 | /* It is Pentium II. */ | |
361 | cpu = "pentium2"; | |
362 | } | |
363 | else | |
364 | { | |
365 | /* Default to Pentium Pro. */ | |
366 | cpu = "pentiumpro"; | |
367 | } | |
368 | } | |
fa959ce4 | 369 | else |
edccdcb1 L |
370 | { |
371 | /* For -mtune, we default to -mtune=generic. */ | |
372 | cpu = "generic"; | |
373 | } | |
fa959ce4 | 374 | } |
edccdcb1 L |
375 | break; |
376 | case PROCESSOR_GEODE: | |
377 | cpu = "geode"; | |
378 | break; | |
379 | case PROCESSOR_K6: | |
380 | if (has_3dnow) | |
381 | cpu = "k6-3"; | |
382 | else | |
383 | cpu = "k6"; | |
384 | break; | |
385 | case PROCESSOR_ATHLON: | |
386 | if (has_sse) | |
387 | cpu = "athlon-4"; | |
388 | else | |
389 | cpu = "athlon"; | |
390 | break; | |
391 | case PROCESSOR_PENTIUM4: | |
fa959ce4 | 392 | if (has_sse3) |
edccdcb1 | 393 | { |
fa959ce4 MM |
394 | if (has_longmode) |
395 | cpu = "nocona"; | |
edccdcb1 L |
396 | else |
397 | cpu = "prescott"; | |
fa959ce4 | 398 | } |
edccdcb1 L |
399 | else |
400 | cpu = "pentium4"; | |
401 | break; | |
402 | case PROCESSOR_K8: | |
403 | cpu = "k8"; | |
404 | break; | |
405 | case PROCESSOR_NOCONA: | |
406 | cpu = "nocona"; | |
407 | break; | |
35a63f21 DR |
408 | case PROCESSOR_AMDFAM10: |
409 | cpu = "amdfam10"; | |
410 | break; | |
edccdcb1 L |
411 | case PROCESSOR_GENERIC32: |
412 | case PROCESSOR_GENERIC64: | |
413 | cpu = "generic"; | |
414 | break; | |
415 | default: | |
416 | abort (); | |
417 | break; | |
fa959ce4 MM |
418 | } |
419 | ||
5be6cb59 UB |
420 | if (arch) |
421 | { | |
422 | if (has_cmpxchg16b) | |
423 | options = concat (options, "-mcx16 ", NULL); | |
424 | if (has_lahf_lm) | |
425 | options = concat (options, "-msahf ", NULL); | |
426 | } | |
427 | ||
fa959ce4 | 428 | done: |
5be6cb59 | 429 | return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL); |
fa959ce4 MM |
430 | } |
431 | #else | |
432 | /* If we aren't compiling with GCC we just provide a minimal | |
433 | default value. */ | |
434 | const char *host_detect_local_cpu (int argc, const char **argv) | |
435 | { | |
edccdcb1 L |
436 | const char *cpu; |
437 | bool arch; | |
438 | ||
439 | if (argc < 1) | |
440 | return NULL; | |
441 | ||
442 | arch = strcmp (argv[0], "arch") == 0; | |
443 | if (!arch && strcmp (argv[0], "tune")) | |
444 | return NULL; | |
445 | ||
446 | if (arch) | |
447 | { | |
448 | /* FIXME: i386 is wrong for 64bit compiler. How can we tell if | |
449 | we are generating 64bit or 32bit code? */ | |
450 | cpu = "i386"; | |
451 | } | |
452 | else | |
453 | cpu = "generic"; | |
454 | ||
455 | return concat ("-m", argv[0], "=", cpu, NULL); | |
fa959ce4 | 456 | } |
682cd442 | 457 | #endif /* GCC_VERSION */ |