]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86/cpu-features.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86 / cpu-features.c
1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2016 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <cpuid.h>
20 #include <cpu-features.h>
21
22 static inline void
23 get_common_indeces (struct cpu_features *cpu_features,
24 unsigned int *family, unsigned int *model,
25 unsigned int *extended_model)
26 {
27 unsigned int eax;
28 __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
29 cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
30 cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
31 GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
32 *family = (eax >> 8) & 0x0f;
33 *model = (eax >> 4) & 0x0f;
34 *extended_model = (eax >> 12) & 0xf0;
35 if (*family == 0x0f)
36 {
37 *family += (eax >> 20) & 0xff;
38 *model += *extended_model;
39 }
40 }
41
42 static inline void
43 init_cpu_features (struct cpu_features *cpu_features)
44 {
45 unsigned int ebx, ecx, edx;
46 unsigned int family = 0;
47 unsigned int model = 0;
48 enum cpu_features_kind kind;
49
50 #if !HAS_CPUID
51 if (__get_cpuid_max (0, 0) == 0)
52 {
53 kind = arch_kind_other;
54 goto no_cpuid;
55 }
56 #endif
57
58 __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
59
60 /* This spells out "GenuineIntel". */
61 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
62 {
63 unsigned int extended_model;
64
65 kind = arch_kind_intel;
66
67 get_common_indeces (cpu_features, &family, &model, &extended_model);
68
69 if (family == 0x06)
70 {
71 ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
72 model += extended_model;
73 switch (model)
74 {
75 case 0x1c:
76 case 0x26:
77 /* BSF is slow on Atom. */
78 cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
79 break;
80
81 case 0x57:
82 /* Knights Landing. Enable Silvermont optimizations. */
83 cpu_features->feature[index_Prefer_No_VZEROUPPER]
84 |= bit_Prefer_No_VZEROUPPER;
85
86 case 0x37:
87 case 0x4a:
88 case 0x4d:
89 case 0x5a:
90 case 0x5d:
91 /* Unaligned load versions are faster than SSSE3
92 on Silvermont. */
93 #if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
94 # error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
95 #endif
96 #if index_Fast_Unaligned_Load != index_Slow_SSE4_2
97 # error index_Fast_Unaligned_Load != index_Slow_SSE4_2
98 #endif
99 cpu_features->feature[index_Fast_Unaligned_Load]
100 |= (bit_Fast_Unaligned_Load
101 | bit_Prefer_PMINUB_for_stringop
102 | bit_Slow_SSE4_2);
103 break;
104
105 default:
106 /* Unknown family 0x06 processors. Assuming this is one
107 of Core i3/i5/i7 processors if AVX is available. */
108 if ((ecx & bit_AVX) == 0)
109 break;
110
111 case 0x1a:
112 case 0x1e:
113 case 0x1f:
114 case 0x25:
115 case 0x2c:
116 case 0x2e:
117 case 0x2f:
118 /* Rep string instructions, copy backward, unaligned loads
119 and pminub are fast on Intel Core i3, i5 and i7. */
120 #if index_Fast_Rep_String != index_Fast_Copy_Backward
121 # error index_Fast_Rep_String != index_Fast_Copy_Backward
122 #endif
123 #if index_Fast_Rep_String != index_Fast_Unaligned_Load
124 # error index_Fast_Rep_String != index_Fast_Unaligned_Load
125 #endif
126 #if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
127 # error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
128 #endif
129 cpu_features->feature[index_Fast_Rep_String]
130 |= (bit_Fast_Rep_String
131 | bit_Fast_Copy_Backward
132 | bit_Fast_Unaligned_Load
133 | bit_Prefer_PMINUB_for_stringop);
134 break;
135 }
136 }
137 }
138 /* This spells out "AuthenticAMD". */
139 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
140 {
141 unsigned int extended_model;
142
143 kind = arch_kind_amd;
144
145 get_common_indeces (cpu_features, &family, &model, &extended_model);
146
147 ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
148
149 unsigned int eax;
150 __cpuid (0x80000000, eax, ebx, ecx, edx);
151 if (eax >= 0x80000001)
152 __cpuid (0x80000001,
153 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
154 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
155 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
156 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
157 }
158 else
159 kind = arch_kind_other;
160
161 /* Support i586 if CX8 is available. */
162 if (HAS_CPU_FEATURE (CX8))
163 cpu_features->feature[index_I586] |= bit_I586;
164
165 /* Support i686 if CMOV is available. */
166 if (HAS_CPU_FEATURE (CMOV))
167 cpu_features->feature[index_I686] |= bit_I686;
168
169 if (cpu_features->max_cpuid >= 7)
170 __cpuid_count (7, 0,
171 cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
172 cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
173 cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
174 cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
175
176 /* Can we call xgetbv? */
177 if (HAS_CPU_FEATURE (OSXSAVE))
178 {
179 unsigned int xcrlow;
180 unsigned int xcrhigh;
181 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
182 /* Is YMM and XMM state usable? */
183 if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
184 (bit_YMM_state | bit_XMM_state))
185 {
186 /* Determine if AVX is usable. */
187 if (HAS_CPU_FEATURE (AVX))
188 cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable;
189 #if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
190 # error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
191 #endif
192 /* Determine if AVX2 is usable. Unaligned load with 256-bit
193 AVX registers are faster on processors with AVX2. */
194 if (HAS_CPU_FEATURE (AVX2))
195 cpu_features->feature[index_AVX2_Usable]
196 |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
197 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
198 ZMM16-ZMM31 state are enabled. */
199 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
200 | bit_ZMM16_31_state)) ==
201 (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
202 {
203 /* Determine if AVX512F is usable. */
204 if (HAS_CPU_FEATURE (AVX512F))
205 {
206 cpu_features->feature[index_AVX512F_Usable]
207 |= bit_AVX512F_Usable;
208 /* Determine if AVX512DQ is usable. */
209 if (HAS_CPU_FEATURE (AVX512DQ))
210 cpu_features->feature[index_AVX512DQ_Usable]
211 |= bit_AVX512DQ_Usable;
212 }
213 }
214 /* Determine if FMA is usable. */
215 if (HAS_CPU_FEATURE (FMA))
216 cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable;
217 /* Determine if FMA4 is usable. */
218 if (HAS_CPU_FEATURE (FMA4))
219 cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
220 }
221 }
222
223 #if !HAS_CPUID
224 no_cpuid:
225 #endif
226
227 cpu_features->family = family;
228 cpu_features->model = model;
229 cpu_features->kind = kind;
230 }