]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - tools/power/x86/turbostat/turbostat.c
bc103851df70b9a68db7c34124a710294980aa07
[thirdparty/kernel/stable.git] / tools / power / x86 / turbostat / turbostat.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * turbostat -- show CPU frequency and C-state residency
4 * on modern Intel and AMD processors.
5 *
6 * Copyright (c) 2023 Intel Corporation.
7 * Len Brown <len.brown@intel.com>
8 */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <sys/capability.h>
34 #include <errno.h>
35 #include <math.h>
36 #include <linux/perf_event.h>
37 #include <asm/unistd.h>
38 #include <stdbool.h>
39 #include <assert.h>
40 #include <linux/kernel.h>
41
42 #define UNUSED(x) (void)(x)
43
44 /*
45 * This list matches the column headers, except
46 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
47 * 2. Core and CPU are moved to the end, we can't have strings that contain them
48 * matching on them for --show and --hide.
49 */
50
51 /*
52 * buffer size used by sscanf() for added column names
53 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
54 */
55 #define NAME_BYTES 20
56 #define PATH_BYTES 128
57
58 #define MAX_NOFILE 0x8000
59
60 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
61 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
62 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
63 enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
64 enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
65
66 struct msr_counter {
67 unsigned int msr_num;
68 char name[NAME_BYTES];
69 char path[PATH_BYTES];
70 unsigned int width;
71 enum counter_type type;
72 enum counter_format format;
73 struct msr_counter *next;
74 unsigned int flags;
75 #define FLAGS_HIDE (1 << 0)
76 #define FLAGS_SHOW (1 << 1)
77 #define SYSFS_PERCPU (1 << 1)
78 };
79
80 struct msr_counter bic[] = {
81 { 0x0, "usec", "", 0, 0, 0, NULL, 0 },
82 { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
83 { 0x0, "Package", "", 0, 0, 0, NULL, 0 },
84 { 0x0, "Node", "", 0, 0, 0, NULL, 0 },
85 { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
86 { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
87 { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
88 { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
89 { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
90 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
91 { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
92 { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
93 { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
94 { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
95 { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
96 { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
97 { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
98 { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
99 { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
100 { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
101 { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
102 { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
103 { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
104 { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
105 { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
106 { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
107 { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
108 { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
109 { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
110 { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
111 { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
112 { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
113 { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
114 { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
115 { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
116 { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
117 { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
118 { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
119 { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
120 { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
121 { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
122 { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
123 { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
124 { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
125 { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
126 { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
127 { 0x0, "Core", "", 0, 0, 0, NULL, 0 },
128 { 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
129 { 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
130 { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
131 { 0x0, "Die", "", 0, 0, 0, NULL, 0 },
132 { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
133 { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
134 { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
135 { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
136 { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
137 { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
138 { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
139 };
140
141 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
142 #define BIC_USEC (1ULL << 0)
143 #define BIC_TOD (1ULL << 1)
144 #define BIC_Package (1ULL << 2)
145 #define BIC_Node (1ULL << 3)
146 #define BIC_Avg_MHz (1ULL << 4)
147 #define BIC_Busy (1ULL << 5)
148 #define BIC_Bzy_MHz (1ULL << 6)
149 #define BIC_TSC_MHz (1ULL << 7)
150 #define BIC_IRQ (1ULL << 8)
151 #define BIC_SMI (1ULL << 9)
152 #define BIC_sysfs (1ULL << 10)
153 #define BIC_CPU_c1 (1ULL << 11)
154 #define BIC_CPU_c3 (1ULL << 12)
155 #define BIC_CPU_c6 (1ULL << 13)
156 #define BIC_CPU_c7 (1ULL << 14)
157 #define BIC_ThreadC (1ULL << 15)
158 #define BIC_CoreTmp (1ULL << 16)
159 #define BIC_CoreCnt (1ULL << 17)
160 #define BIC_PkgTmp (1ULL << 18)
161 #define BIC_GFX_rc6 (1ULL << 19)
162 #define BIC_GFXMHz (1ULL << 20)
163 #define BIC_Pkgpc2 (1ULL << 21)
164 #define BIC_Pkgpc3 (1ULL << 22)
165 #define BIC_Pkgpc6 (1ULL << 23)
166 #define BIC_Pkgpc7 (1ULL << 24)
167 #define BIC_Pkgpc8 (1ULL << 25)
168 #define BIC_Pkgpc9 (1ULL << 26)
169 #define BIC_Pkgpc10 (1ULL << 27)
170 #define BIC_CPU_LPI (1ULL << 28)
171 #define BIC_SYS_LPI (1ULL << 29)
172 #define BIC_PkgWatt (1ULL << 30)
173 #define BIC_CorWatt (1ULL << 31)
174 #define BIC_GFXWatt (1ULL << 32)
175 #define BIC_PkgCnt (1ULL << 33)
176 #define BIC_RAMWatt (1ULL << 34)
177 #define BIC_PKG__ (1ULL << 35)
178 #define BIC_RAM__ (1ULL << 36)
179 #define BIC_Pkg_J (1ULL << 37)
180 #define BIC_Cor_J (1ULL << 38)
181 #define BIC_GFX_J (1ULL << 39)
182 #define BIC_RAM_J (1ULL << 40)
183 #define BIC_Mod_c6 (1ULL << 41)
184 #define BIC_Totl_c0 (1ULL << 42)
185 #define BIC_Any_c0 (1ULL << 43)
186 #define BIC_GFX_c0 (1ULL << 44)
187 #define BIC_CPUGFX (1ULL << 45)
188 #define BIC_Core (1ULL << 46)
189 #define BIC_CPU (1ULL << 47)
190 #define BIC_APIC (1ULL << 48)
191 #define BIC_X2APIC (1ULL << 49)
192 #define BIC_Die (1ULL << 50)
193 #define BIC_GFXACTMHz (1ULL << 51)
194 #define BIC_IPC (1ULL << 52)
195 #define BIC_CORE_THROT_CNT (1ULL << 53)
196 #define BIC_UNCORE_MHZ (1ULL << 54)
197 #define BIC_SAM_mc6 (1ULL << 55)
198 #define BIC_SAMMHz (1ULL << 56)
199 #define BIC_SAMACTMHz (1ULL << 57)
200
201 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
202 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
203 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
204 #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
205 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
206
207 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
208
209 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
210 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
211
212 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
213 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
214 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
215 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
216 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
217 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
218
219 struct amperf_group_fd;
220
221 char *proc_stat = "/proc/stat";
222 FILE *outf;
223 int *fd_percpu;
224 int *fd_instr_count_percpu;
225 struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */
226 struct timeval interval_tv = { 5, 0 };
227 struct timespec interval_ts = { 5, 0 };
228
229 unsigned int num_iterations;
230 unsigned int header_iterations;
231 unsigned int debug;
232 unsigned int quiet;
233 unsigned int shown;
234 unsigned int sums_need_wide_columns;
235 unsigned int rapl_joules;
236 unsigned int summary_only;
237 unsigned int list_header_only;
238 unsigned int dump_only;
239 unsigned int has_aperf;
240 unsigned int has_epb;
241 unsigned int has_turbo;
242 unsigned int is_hybrid;
243 unsigned int units = 1000000; /* MHz etc */
244 unsigned int genuine_intel;
245 unsigned int authentic_amd;
246 unsigned int hygon_genuine;
247 unsigned int max_level, max_extended_level;
248 unsigned int has_invariant_tsc;
249 unsigned int aperf_mperf_multiplier = 1;
250 double bclk;
251 double base_hz;
252 unsigned int has_base_hz;
253 double tsc_tweak = 1.0;
254 unsigned int show_pkg_only;
255 unsigned int show_core_only;
256 char *output_buffer, *outp;
257 unsigned int do_dts;
258 unsigned int do_ptm;
259 unsigned int do_ipc;
260 unsigned long long cpuidle_cur_cpu_lpi_us;
261 unsigned long long cpuidle_cur_sys_lpi_us;
262 unsigned int tj_max;
263 unsigned int tj_max_override;
264 double rapl_power_units, rapl_time_units;
265 double rapl_dram_energy_units, rapl_energy_units;
266 double rapl_joule_counter_range;
267 unsigned int crystal_hz;
268 unsigned long long tsc_hz;
269 int base_cpu;
270 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
271 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
272 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
273 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
274 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
275 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
276 unsigned int first_counter_read = 1;
277 int ignore_stdin;
278 bool no_msr;
279 bool no_perf;
280 enum amperf_source amperf_source;
281
282 enum gfx_sysfs_idx {
283 GFX_rc6,
284 GFX_MHz,
285 GFX_ACTMHz,
286 SAM_mc6,
287 SAM_MHz,
288 SAM_ACTMHz,
289 GFX_MAX
290 };
291
292 struct gfx_sysfs_info {
293 const char *path;
294 FILE *fp;
295 unsigned int val;
296 unsigned long long val_ull;
297 };
298
299 static struct gfx_sysfs_info gfx_info[GFX_MAX];
300
301 int get_msr(int cpu, off_t offset, unsigned long long *msr);
302
303 /* Model specific support Start */
304
305 /* List of features that may diverge among different platforms */
306 struct platform_features {
307 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
308 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
309 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
310 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
311 int bclk_freq; /* CPU base clock */
312 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
313 int supported_cstates; /* Core cstates and Package cstates supported */
314 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
315 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
316 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
317 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
318 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
319 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
320 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
321 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
322 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
323 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
324 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
325 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
326 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
327 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
328 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
329 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
330 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
331 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
332 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
333 bool need_perf_multiplier; /* mperf/aperf multiplier */
334 };
335
336 struct platform_data {
337 unsigned int model;
338 const struct platform_features *features;
339 };
340
341 /* For BCLK */
342 enum bclk_freq {
343 BCLK_100MHZ = 1,
344 BCLK_133MHZ,
345 BCLK_SLV,
346 };
347
348 #define SLM_BCLK_FREQS 5
349 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
350
351 double slm_bclk(void)
352 {
353 unsigned long long msr = 3;
354 unsigned int i;
355 double freq;
356
357 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
358 fprintf(outf, "SLM BCLK: unknown\n");
359
360 i = msr & 0xf;
361 if (i >= SLM_BCLK_FREQS) {
362 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
363 i = 3;
364 }
365 freq = slm_freq_table[i];
366
367 if (!quiet)
368 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
369
370 return freq;
371 }
372
373 /* For Package cstate limit */
374 enum package_cstate_limit {
375 CST_LIMIT_NHM = 1,
376 CST_LIMIT_SNB,
377 CST_LIMIT_HSW,
378 CST_LIMIT_SKX,
379 CST_LIMIT_ICX,
380 CST_LIMIT_SLV,
381 CST_LIMIT_AMT,
382 CST_LIMIT_KNL,
383 CST_LIMIT_GMT,
384 };
385
386 /* For Turbo Ratio Limit MSRs */
387 enum turbo_ratio_limit_msrs {
388 TRL_BASE = BIT(0),
389 TRL_LIMIT1 = BIT(1),
390 TRL_LIMIT2 = BIT(2),
391 TRL_ATOM = BIT(3),
392 TRL_KNL = BIT(4),
393 TRL_CORECOUNT = BIT(5),
394 };
395
396 /* For Perf Limit Reason MSRs */
397 enum perf_limit_reason_msrs {
398 PLR_CORE = BIT(0),
399 PLR_GFX = BIT(1),
400 PLR_RING = BIT(2),
401 };
402
403 /* For RAPL MSRs */
404 enum rapl_msrs {
405 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
406 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
407 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
408 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
409 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
410 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
411 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
412 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
413 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
414 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
415 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
416 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
417 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
418 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
419 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
420 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
421 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
422 };
423
424 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
425 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
426 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
427 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
428
429 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
430 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
431 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
432 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
433
434 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
435
436 /* For Cstates */
437 enum cstates {
438 CC1 = BIT(0),
439 CC3 = BIT(1),
440 CC6 = BIT(2),
441 CC7 = BIT(3),
442 PC2 = BIT(4),
443 PC3 = BIT(5),
444 PC6 = BIT(6),
445 PC7 = BIT(7),
446 PC8 = BIT(8),
447 PC9 = BIT(9),
448 PC10 = BIT(10),
449 };
450
451 static const struct platform_features nhm_features = {
452 .has_msr_misc_pwr_mgmt = 1,
453 .has_nhm_msrs = 1,
454 .bclk_freq = BCLK_133MHZ,
455 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
456 .cst_limit = CST_LIMIT_NHM,
457 .trl_msrs = TRL_BASE,
458 };
459
460 static const struct platform_features nhx_features = {
461 .has_msr_misc_pwr_mgmt = 1,
462 .has_nhm_msrs = 1,
463 .bclk_freq = BCLK_133MHZ,
464 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
465 .cst_limit = CST_LIMIT_NHM,
466 };
467
468 static const struct platform_features snb_features = {
469 .has_msr_misc_feature_control = 1,
470 .has_msr_misc_pwr_mgmt = 1,
471 .has_nhm_msrs = 1,
472 .bclk_freq = BCLK_100MHZ,
473 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
474 .cst_limit = CST_LIMIT_SNB,
475 .has_irtl_msrs = 1,
476 .trl_msrs = TRL_BASE,
477 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
478 };
479
480 static const struct platform_features snx_features = {
481 .has_msr_misc_feature_control = 1,
482 .has_msr_misc_pwr_mgmt = 1,
483 .has_nhm_msrs = 1,
484 .bclk_freq = BCLK_100MHZ,
485 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
486 .cst_limit = CST_LIMIT_SNB,
487 .has_irtl_msrs = 1,
488 .trl_msrs = TRL_BASE,
489 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
490 };
491
492 static const struct platform_features ivb_features = {
493 .has_msr_misc_feature_control = 1,
494 .has_msr_misc_pwr_mgmt = 1,
495 .has_nhm_msrs = 1,
496 .has_config_tdp = 1,
497 .bclk_freq = BCLK_100MHZ,
498 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
499 .cst_limit = CST_LIMIT_SNB,
500 .has_irtl_msrs = 1,
501 .trl_msrs = TRL_BASE,
502 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
503 };
504
505 static const struct platform_features ivx_features = {
506 .has_msr_misc_feature_control = 1,
507 .has_msr_misc_pwr_mgmt = 1,
508 .has_nhm_msrs = 1,
509 .bclk_freq = BCLK_100MHZ,
510 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
511 .cst_limit = CST_LIMIT_SNB,
512 .has_irtl_msrs = 1,
513 .trl_msrs = TRL_BASE | TRL_LIMIT1,
514 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
515 };
516
517 static const struct platform_features hsw_features = {
518 .has_msr_misc_feature_control = 1,
519 .has_msr_misc_pwr_mgmt = 1,
520 .has_nhm_msrs = 1,
521 .has_config_tdp = 1,
522 .bclk_freq = BCLK_100MHZ,
523 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
524 .cst_limit = CST_LIMIT_HSW,
525 .has_irtl_msrs = 1,
526 .trl_msrs = TRL_BASE,
527 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
528 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
529 };
530
531 static const struct platform_features hsx_features = {
532 .has_msr_misc_feature_control = 1,
533 .has_msr_misc_pwr_mgmt = 1,
534 .has_nhm_msrs = 1,
535 .has_config_tdp = 1,
536 .bclk_freq = BCLK_100MHZ,
537 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
538 .cst_limit = CST_LIMIT_HSW,
539 .has_irtl_msrs = 1,
540 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
541 .plr_msrs = PLR_CORE | PLR_RING,
542 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
543 .has_fixed_rapl_unit = 1,
544 };
545
546 static const struct platform_features hswl_features = {
547 .has_msr_misc_feature_control = 1,
548 .has_msr_misc_pwr_mgmt = 1,
549 .has_nhm_msrs = 1,
550 .has_config_tdp = 1,
551 .bclk_freq = BCLK_100MHZ,
552 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
553 .cst_limit = CST_LIMIT_HSW,
554 .has_irtl_msrs = 1,
555 .trl_msrs = TRL_BASE,
556 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
557 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
558 };
559
560 static const struct platform_features hswg_features = {
561 .has_msr_misc_feature_control = 1,
562 .has_msr_misc_pwr_mgmt = 1,
563 .has_nhm_msrs = 1,
564 .has_config_tdp = 1,
565 .bclk_freq = BCLK_100MHZ,
566 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
567 .cst_limit = CST_LIMIT_HSW,
568 .has_irtl_msrs = 1,
569 .trl_msrs = TRL_BASE,
570 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
571 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
572 };
573
574 static const struct platform_features bdw_features = {
575 .has_msr_misc_feature_control = 1,
576 .has_msr_misc_pwr_mgmt = 1,
577 .has_nhm_msrs = 1,
578 .has_config_tdp = 1,
579 .bclk_freq = BCLK_100MHZ,
580 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
581 .cst_limit = CST_LIMIT_HSW,
582 .has_irtl_msrs = 1,
583 .trl_msrs = TRL_BASE,
584 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
585 };
586
587 static const struct platform_features bdwg_features = {
588 .has_msr_misc_feature_control = 1,
589 .has_msr_misc_pwr_mgmt = 1,
590 .has_nhm_msrs = 1,
591 .has_config_tdp = 1,
592 .bclk_freq = BCLK_100MHZ,
593 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
594 .cst_limit = CST_LIMIT_HSW,
595 .has_irtl_msrs = 1,
596 .trl_msrs = TRL_BASE,
597 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
598 };
599
600 static const struct platform_features bdx_features = {
601 .has_msr_misc_feature_control = 1,
602 .has_msr_misc_pwr_mgmt = 1,
603 .has_nhm_msrs = 1,
604 .has_config_tdp = 1,
605 .bclk_freq = BCLK_100MHZ,
606 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
607 .cst_limit = CST_LIMIT_HSW,
608 .has_irtl_msrs = 1,
609 .has_cst_auto_convension = 1,
610 .trl_msrs = TRL_BASE,
611 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
612 .has_fixed_rapl_unit = 1,
613 };
614
615 static const struct platform_features skl_features = {
616 .has_msr_misc_feature_control = 1,
617 .has_msr_misc_pwr_mgmt = 1,
618 .has_nhm_msrs = 1,
619 .has_config_tdp = 1,
620 .bclk_freq = BCLK_100MHZ,
621 .crystal_freq = 24000000,
622 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
623 .cst_limit = CST_LIMIT_HSW,
624 .has_irtl_msrs = 1,
625 .has_ext_cst_msrs = 1,
626 .trl_msrs = TRL_BASE,
627 .tcc_offset_bits = 6,
628 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
629 .enable_tsc_tweak = 1,
630 };
631
632 static const struct platform_features cnl_features = {
633 .has_msr_misc_feature_control = 1,
634 .has_msr_misc_pwr_mgmt = 1,
635 .has_nhm_msrs = 1,
636 .has_config_tdp = 1,
637 .bclk_freq = BCLK_100MHZ,
638 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
639 .cst_limit = CST_LIMIT_HSW,
640 .has_irtl_msrs = 1,
641 .has_msr_core_c1_res = 1,
642 .has_ext_cst_msrs = 1,
643 .trl_msrs = TRL_BASE,
644 .tcc_offset_bits = 6,
645 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
646 .enable_tsc_tweak = 1,
647 };
648
649 static const struct platform_features adl_features = {
650 .has_msr_misc_feature_control = 1,
651 .has_msr_misc_pwr_mgmt = 1,
652 .has_nhm_msrs = 1,
653 .has_config_tdp = 1,
654 .bclk_freq = BCLK_100MHZ,
655 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
656 .cst_limit = CST_LIMIT_HSW,
657 .has_irtl_msrs = 1,
658 .has_msr_core_c1_res = 1,
659 .has_ext_cst_msrs = 1,
660 .trl_msrs = TRL_BASE,
661 .tcc_offset_bits = 6,
662 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
663 .enable_tsc_tweak = 1,
664 };
665
666 static const struct platform_features skx_features = {
667 .has_msr_misc_feature_control = 1,
668 .has_msr_misc_pwr_mgmt = 1,
669 .has_nhm_msrs = 1,
670 .has_config_tdp = 1,
671 .bclk_freq = BCLK_100MHZ,
672 .supported_cstates = CC1 | CC6 | PC2 | PC6,
673 .cst_limit = CST_LIMIT_SKX,
674 .has_irtl_msrs = 1,
675 .has_cst_auto_convension = 1,
676 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
677 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
678 .has_fixed_rapl_unit = 1,
679 };
680
681 static const struct platform_features icx_features = {
682 .has_msr_misc_feature_control = 1,
683 .has_msr_misc_pwr_mgmt = 1,
684 .has_nhm_msrs = 1,
685 .has_config_tdp = 1,
686 .bclk_freq = BCLK_100MHZ,
687 .supported_cstates = CC1 | CC6 | PC2 | PC6,
688 .cst_limit = CST_LIMIT_ICX,
689 .has_msr_core_c1_res = 1,
690 .has_irtl_msrs = 1,
691 .has_cst_prewake_bit = 1,
692 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
693 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
694 .has_fixed_rapl_unit = 1,
695 };
696
697 static const struct platform_features spr_features = {
698 .has_msr_misc_feature_control = 1,
699 .has_msr_misc_pwr_mgmt = 1,
700 .has_nhm_msrs = 1,
701 .has_config_tdp = 1,
702 .bclk_freq = BCLK_100MHZ,
703 .supported_cstates = CC1 | CC6 | PC2 | PC6,
704 .cst_limit = CST_LIMIT_SKX,
705 .has_msr_core_c1_res = 1,
706 .has_irtl_msrs = 1,
707 .has_cst_prewake_bit = 1,
708 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
709 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
710 };
711
712 static const struct platform_features srf_features = {
713 .has_msr_misc_feature_control = 1,
714 .has_msr_misc_pwr_mgmt = 1,
715 .has_nhm_msrs = 1,
716 .has_config_tdp = 1,
717 .bclk_freq = BCLK_100MHZ,
718 .supported_cstates = CC1 | CC6 | PC2 | PC6,
719 .cst_limit = CST_LIMIT_SKX,
720 .has_msr_core_c1_res = 1,
721 .has_msr_module_c6_res_ms = 1,
722 .has_irtl_msrs = 1,
723 .has_cst_prewake_bit = 1,
724 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
725 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
726 };
727
728 static const struct platform_features grr_features = {
729 .has_msr_misc_feature_control = 1,
730 .has_msr_misc_pwr_mgmt = 1,
731 .has_nhm_msrs = 1,
732 .has_config_tdp = 1,
733 .bclk_freq = BCLK_100MHZ,
734 .supported_cstates = CC1 | CC6,
735 .cst_limit = CST_LIMIT_SKX,
736 .has_msr_core_c1_res = 1,
737 .has_msr_module_c6_res_ms = 1,
738 .has_irtl_msrs = 1,
739 .has_cst_prewake_bit = 1,
740 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
741 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
742 };
743
744 static const struct platform_features slv_features = {
745 .has_nhm_msrs = 1,
746 .bclk_freq = BCLK_SLV,
747 .supported_cstates = CC1 | CC6 | PC6,
748 .cst_limit = CST_LIMIT_SLV,
749 .has_msr_core_c1_res = 1,
750 .has_msr_module_c6_res_ms = 1,
751 .has_msr_c6_demotion_policy_config = 1,
752 .has_msr_atom_pkg_c6_residency = 1,
753 .trl_msrs = TRL_ATOM,
754 .rapl_msrs = RAPL_PKG | RAPL_CORE,
755 .has_rapl_divisor = 1,
756 .rapl_quirk_tdp = 30,
757 };
758
759 static const struct platform_features slvd_features = {
760 .has_msr_misc_pwr_mgmt = 1,
761 .has_nhm_msrs = 1,
762 .bclk_freq = BCLK_SLV,
763 .supported_cstates = CC1 | CC6 | PC3 | PC6,
764 .cst_limit = CST_LIMIT_SLV,
765 .has_msr_atom_pkg_c6_residency = 1,
766 .trl_msrs = TRL_BASE,
767 .rapl_msrs = RAPL_PKG | RAPL_CORE,
768 .rapl_quirk_tdp = 30,
769 };
770
771 static const struct platform_features amt_features = {
772 .has_nhm_msrs = 1,
773 .bclk_freq = BCLK_133MHZ,
774 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
775 .cst_limit = CST_LIMIT_AMT,
776 .trl_msrs = TRL_BASE,
777 };
778
779 static const struct platform_features gmt_features = {
780 .has_msr_misc_pwr_mgmt = 1,
781 .has_nhm_msrs = 1,
782 .bclk_freq = BCLK_100MHZ,
783 .crystal_freq = 19200000,
784 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
785 .cst_limit = CST_LIMIT_GMT,
786 .has_irtl_msrs = 1,
787 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
788 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
789 };
790
791 static const struct platform_features gmtd_features = {
792 .has_msr_misc_pwr_mgmt = 1,
793 .has_nhm_msrs = 1,
794 .bclk_freq = BCLK_100MHZ,
795 .crystal_freq = 25000000,
796 .supported_cstates = CC1 | CC6 | PC2 | PC6,
797 .cst_limit = CST_LIMIT_GMT,
798 .has_irtl_msrs = 1,
799 .has_msr_core_c1_res = 1,
800 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
801 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
802 };
803
804 static const struct platform_features gmtp_features = {
805 .has_msr_misc_pwr_mgmt = 1,
806 .has_nhm_msrs = 1,
807 .bclk_freq = BCLK_100MHZ,
808 .crystal_freq = 19200000,
809 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
810 .cst_limit = CST_LIMIT_GMT,
811 .has_irtl_msrs = 1,
812 .trl_msrs = TRL_BASE,
813 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
814 };
815
816 static const struct platform_features tmt_features = {
817 .has_msr_misc_pwr_mgmt = 1,
818 .has_nhm_msrs = 1,
819 .bclk_freq = BCLK_100MHZ,
820 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
821 .cst_limit = CST_LIMIT_GMT,
822 .has_irtl_msrs = 1,
823 .trl_msrs = TRL_BASE,
824 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
825 .enable_tsc_tweak = 1,
826 };
827
828 static const struct platform_features tmtd_features = {
829 .has_msr_misc_pwr_mgmt = 1,
830 .has_nhm_msrs = 1,
831 .bclk_freq = BCLK_100MHZ,
832 .supported_cstates = CC1 | CC6,
833 .cst_limit = CST_LIMIT_GMT,
834 .has_irtl_msrs = 1,
835 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
836 .rapl_msrs = RAPL_PKG_ALL,
837 };
838
839 static const struct platform_features knl_features = {
840 .has_msr_misc_pwr_mgmt = 1,
841 .has_nhm_msrs = 1,
842 .has_config_tdp = 1,
843 .bclk_freq = BCLK_100MHZ,
844 .supported_cstates = CC1 | CC6 | PC3 | PC6,
845 .cst_limit = CST_LIMIT_KNL,
846 .has_msr_knl_core_c6_residency = 1,
847 .trl_msrs = TRL_KNL,
848 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
849 .has_fixed_rapl_unit = 1,
850 .need_perf_multiplier = 1,
851 };
852
853 static const struct platform_features default_features = {
854 };
855
856 static const struct platform_features amd_features_with_rapl = {
857 .rapl_msrs = RAPL_AMD_F17H,
858 .has_per_core_rapl = 1,
859 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
860 };
861
862 static const struct platform_data turbostat_pdata[] = {
863 { INTEL_FAM6_NEHALEM, &nhm_features },
864 { INTEL_FAM6_NEHALEM_G, &nhm_features },
865 { INTEL_FAM6_NEHALEM_EP, &nhm_features },
866 { INTEL_FAM6_NEHALEM_EX, &nhx_features },
867 { INTEL_FAM6_WESTMERE, &nhm_features },
868 { INTEL_FAM6_WESTMERE_EP, &nhm_features },
869 { INTEL_FAM6_WESTMERE_EX, &nhx_features },
870 { INTEL_FAM6_SANDYBRIDGE, &snb_features },
871 { INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
872 { INTEL_FAM6_IVYBRIDGE, &ivb_features },
873 { INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
874 { INTEL_FAM6_HASWELL, &hsw_features },
875 { INTEL_FAM6_HASWELL_X, &hsx_features },
876 { INTEL_FAM6_HASWELL_L, &hswl_features },
877 { INTEL_FAM6_HASWELL_G, &hswg_features },
878 { INTEL_FAM6_BROADWELL, &bdw_features },
879 { INTEL_FAM6_BROADWELL_G, &bdwg_features },
880 { INTEL_FAM6_BROADWELL_X, &bdx_features },
881 { INTEL_FAM6_BROADWELL_D, &bdx_features },
882 { INTEL_FAM6_SKYLAKE_L, &skl_features },
883 { INTEL_FAM6_SKYLAKE, &skl_features },
884 { INTEL_FAM6_SKYLAKE_X, &skx_features },
885 { INTEL_FAM6_KABYLAKE_L, &skl_features },
886 { INTEL_FAM6_KABYLAKE, &skl_features },
887 { INTEL_FAM6_COMETLAKE, &skl_features },
888 { INTEL_FAM6_COMETLAKE_L, &skl_features },
889 { INTEL_FAM6_CANNONLAKE_L, &cnl_features },
890 { INTEL_FAM6_ICELAKE_X, &icx_features },
891 { INTEL_FAM6_ICELAKE_D, &icx_features },
892 { INTEL_FAM6_ICELAKE_L, &cnl_features },
893 { INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
894 { INTEL_FAM6_ROCKETLAKE, &cnl_features },
895 { INTEL_FAM6_TIGERLAKE_L, &cnl_features },
896 { INTEL_FAM6_TIGERLAKE, &cnl_features },
897 { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
898 { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
899 { INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
900 { INTEL_FAM6_LAKEFIELD, &cnl_features },
901 { INTEL_FAM6_ALDERLAKE, &adl_features },
902 { INTEL_FAM6_ALDERLAKE_L, &adl_features },
903 { INTEL_FAM6_RAPTORLAKE, &adl_features },
904 { INTEL_FAM6_RAPTORLAKE_P, &adl_features },
905 { INTEL_FAM6_RAPTORLAKE_S, &adl_features },
906 { INTEL_FAM6_METEORLAKE, &cnl_features },
907 { INTEL_FAM6_METEORLAKE_L, &cnl_features },
908 { INTEL_FAM6_ARROWLAKE, &cnl_features },
909 { INTEL_FAM6_LUNARLAKE_M, &cnl_features },
910 { INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
911 { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
912 { INTEL_FAM6_ATOM_AIRMONT, &amt_features },
913 { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
914 { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
915 { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
916 { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
917 { INTEL_FAM6_ATOM_TREMONT, &tmt_features },
918 { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
919 { INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
920 { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
921 { INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
922 { INTEL_FAM6_XEON_PHI_KNL, &knl_features },
923 { INTEL_FAM6_XEON_PHI_KNM, &knl_features },
924 /*
925 * Missing support for
926 * INTEL_FAM6_ICELAKE
927 * INTEL_FAM6_ATOM_SILVERMONT_MID
928 * INTEL_FAM6_ATOM_AIRMONT_MID
929 * INTEL_FAM6_ATOM_AIRMONT_NP
930 */
931 { 0, NULL },
932 };
933
934 static const struct platform_features *platform;
935
936 void probe_platform_features(unsigned int family, unsigned int model)
937 {
938 int i;
939
940 platform = &default_features;
941
942 if (authentic_amd || hygon_genuine) {
943 if (max_extended_level >= 0x80000007) {
944 unsigned int eax, ebx, ecx, edx;
945
946 __cpuid(0x80000007, eax, ebx, ecx, edx);
947 /* RAPL (Fam 17h+) */
948 if ((edx & (1 << 14)) && family >= 0x17)
949 platform = &amd_features_with_rapl;
950 }
951 return;
952 }
953
954 if (!genuine_intel || family != 6)
955 return;
956
957 for (i = 0; turbostat_pdata[i].features; i++) {
958 if (turbostat_pdata[i].model == model) {
959 platform = turbostat_pdata[i].features;
960 return;
961 }
962 }
963 }
964
965 /* Model specific support End */
966
967 #define TJMAX_DEFAULT 100
968
969 /* MSRs that are not yet in the kernel-provided header. */
970 #define MSR_RAPL_PWR_UNIT 0xc0010299
971 #define MSR_CORE_ENERGY_STAT 0xc001029a
972 #define MSR_PKG_ENERGY_STAT 0xc001029b
973
974 #define MAX(a, b) ((a) > (b) ? (a) : (b))
975
976 int backwards_count;
977 char *progname;
978
979 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
980 cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
981 size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
982 #define MAX_ADDED_COUNTERS 8
983 #define MAX_ADDED_THREAD_COUNTERS 24
984 #define BITMASK_SIZE 32
985
986 /* Indexes used to map data read from perf and MSRs into global variables */
987 enum rapl_rci_index {
988 RAPL_RCI_INDEX_ENERGY_PKG = 0,
989 RAPL_RCI_INDEX_ENERGY_CORES = 1,
990 RAPL_RCI_INDEX_DRAM = 2,
991 RAPL_RCI_INDEX_GFX = 3,
992 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
993 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
994 RAPL_RCI_INDEX_CORE_ENERGY = 6,
995 NUM_RAPL_COUNTERS,
996 };
997
998 enum rapl_unit {
999 RAPL_UNIT_INVALID,
1000 RAPL_UNIT_JOULES,
1001 RAPL_UNIT_WATTS,
1002 };
1003
1004 struct rapl_counter_info_t {
1005 unsigned long long data[NUM_RAPL_COUNTERS];
1006 enum rapl_source source[NUM_RAPL_COUNTERS];
1007 unsigned long long flags[NUM_RAPL_COUNTERS];
1008 double scale[NUM_RAPL_COUNTERS];
1009 enum rapl_unit unit[NUM_RAPL_COUNTERS];
1010
1011 union {
1012 /* Active when source == RAPL_SOURCE_MSR */
1013 struct {
1014 unsigned long long msr[NUM_RAPL_COUNTERS];
1015 unsigned long long msr_mask[NUM_RAPL_COUNTERS];
1016 int msr_shift[NUM_RAPL_COUNTERS];
1017 };
1018 };
1019
1020 int fd_perf;
1021 };
1022
1023 /* struct rapl_counter_info_t for each RAPL domain */
1024 struct rapl_counter_info_t *rapl_counter_info_perdomain;
1025
1026 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
1027
1028 struct rapl_counter_arch_info {
1029 int feature_mask; /* Mask for testing if the counter is supported on host */
1030 const char *perf_subsys;
1031 const char *perf_name;
1032 unsigned long long msr;
1033 unsigned long long msr_mask;
1034 int msr_shift; /* Positive mean shift right, negative mean shift left */
1035 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
1036 unsigned int rci_index; /* Maps data from perf counters to global variables */
1037 unsigned long long bic;
1038 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
1039 unsigned long long flags;
1040 };
1041
1042 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
1043 {
1044 .feature_mask = RAPL_PKG,
1045 .perf_subsys = "power",
1046 .perf_name = "energy-pkg",
1047 .msr = MSR_PKG_ENERGY_STATUS,
1048 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1049 .msr_shift = 0,
1050 .platform_rapl_msr_scale = &rapl_energy_units,
1051 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1052 .bic = BIC_PkgWatt | BIC_Pkg_J,
1053 .compat_scale = 1.0,
1054 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1055 },
1056 {
1057 .feature_mask = RAPL_AMD_F17H,
1058 .perf_subsys = "power",
1059 .perf_name = "energy-pkg",
1060 .msr = MSR_PKG_ENERGY_STAT,
1061 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1062 .msr_shift = 0,
1063 .platform_rapl_msr_scale = &rapl_energy_units,
1064 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1065 .bic = BIC_PkgWatt | BIC_Pkg_J,
1066 .compat_scale = 1.0,
1067 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1068 },
1069 {
1070 .feature_mask = RAPL_CORE_ENERGY_STATUS,
1071 .perf_subsys = "power",
1072 .perf_name = "energy-cores",
1073 .msr = MSR_PP0_ENERGY_STATUS,
1074 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1075 .msr_shift = 0,
1076 .platform_rapl_msr_scale = &rapl_energy_units,
1077 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
1078 .bic = BIC_CorWatt | BIC_Cor_J,
1079 .compat_scale = 1.0,
1080 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1081 },
1082 {
1083 .feature_mask = RAPL_DRAM,
1084 .perf_subsys = "power",
1085 .perf_name = "energy-ram",
1086 .msr = MSR_DRAM_ENERGY_STATUS,
1087 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1088 .msr_shift = 0,
1089 .platform_rapl_msr_scale = &rapl_dram_energy_units,
1090 .rci_index = RAPL_RCI_INDEX_DRAM,
1091 .bic = BIC_RAMWatt | BIC_RAM_J,
1092 .compat_scale = 1.0,
1093 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1094 },
1095 {
1096 .feature_mask = RAPL_GFX,
1097 .perf_subsys = "power",
1098 .perf_name = "energy-gpu",
1099 .msr = MSR_PP1_ENERGY_STATUS,
1100 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1101 .msr_shift = 0,
1102 .platform_rapl_msr_scale = &rapl_energy_units,
1103 .rci_index = RAPL_RCI_INDEX_GFX,
1104 .bic = BIC_GFXWatt | BIC_GFX_J,
1105 .compat_scale = 1.0,
1106 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1107 },
1108 {
1109 .feature_mask = RAPL_PKG_PERF_STATUS,
1110 .perf_subsys = NULL,
1111 .perf_name = NULL,
1112 .msr = MSR_PKG_PERF_STATUS,
1113 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1114 .msr_shift = 0,
1115 .platform_rapl_msr_scale = &rapl_time_units,
1116 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
1117 .bic = BIC_PKG__,
1118 .compat_scale = 100.0,
1119 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1120 },
1121 {
1122 .feature_mask = RAPL_DRAM_PERF_STATUS,
1123 .perf_subsys = NULL,
1124 .perf_name = NULL,
1125 .msr = MSR_DRAM_PERF_STATUS,
1126 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1127 .msr_shift = 0,
1128 .platform_rapl_msr_scale = &rapl_time_units,
1129 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
1130 .bic = BIC_RAM__,
1131 .compat_scale = 100.0,
1132 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1133 },
1134 {
1135 .feature_mask = RAPL_AMD_F17H,
1136 .perf_subsys = NULL,
1137 .perf_name = NULL,
1138 .msr = MSR_CORE_ENERGY_STAT,
1139 .msr_mask = 0xFFFFFFFF,
1140 .msr_shift = 0,
1141 .platform_rapl_msr_scale = &rapl_energy_units,
1142 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
1143 .bic = BIC_CorWatt | BIC_Cor_J,
1144 .compat_scale = 1.0,
1145 .flags = 0,
1146 },
1147 };
1148
1149 struct rapl_counter {
1150 unsigned long long raw_value;
1151 enum rapl_unit unit;
1152 double scale;
1153 };
1154
1155 struct thread_data {
1156 struct timeval tv_begin;
1157 struct timeval tv_end;
1158 struct timeval tv_delta;
1159 unsigned long long tsc;
1160 unsigned long long aperf;
1161 unsigned long long mperf;
1162 unsigned long long c1;
1163 unsigned long long instr_count;
1164 unsigned long long irq_count;
1165 unsigned int smi_count;
1166 unsigned int cpu_id;
1167 unsigned int apic_id;
1168 unsigned int x2apic_id;
1169 unsigned int flags;
1170 bool is_atom;
1171 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
1172 } *thread_even, *thread_odd;
1173
1174 struct core_data {
1175 int base_cpu;
1176 unsigned long long c3;
1177 unsigned long long c6;
1178 unsigned long long c7;
1179 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
1180 unsigned int core_temp_c;
1181 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
1182 unsigned int core_id;
1183 unsigned long long core_throt_cnt;
1184 unsigned long long counter[MAX_ADDED_COUNTERS];
1185 } *core_even, *core_odd;
1186
1187 struct pkg_data {
1188 int base_cpu;
1189 unsigned long long pc2;
1190 unsigned long long pc3;
1191 unsigned long long pc6;
1192 unsigned long long pc7;
1193 unsigned long long pc8;
1194 unsigned long long pc9;
1195 unsigned long long pc10;
1196 long long cpu_lpi;
1197 long long sys_lpi;
1198 unsigned long long pkg_wtd_core_c0;
1199 unsigned long long pkg_any_core_c0;
1200 unsigned long long pkg_any_gfxe_c0;
1201 unsigned long long pkg_both_core_gfxe_c0;
1202 long long gfx_rc6_ms;
1203 unsigned int gfx_mhz;
1204 unsigned int gfx_act_mhz;
1205 long long sam_mc6_ms;
1206 unsigned int sam_mhz;
1207 unsigned int sam_act_mhz;
1208 unsigned int package_id;
1209 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
1210 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
1211 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
1212 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
1213 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
1214 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
1215 unsigned int pkg_temp_c;
1216 unsigned int uncore_mhz;
1217 unsigned long long counter[MAX_ADDED_COUNTERS];
1218 } *package_even, *package_odd;
1219
1220 #define ODD_COUNTERS thread_odd, core_odd, package_odd
1221 #define EVEN_COUNTERS thread_even, core_even, package_even
1222
1223 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
1224 ((thread_base) + \
1225 ((pkg_no) * \
1226 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
1227 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
1228 ((core_no) * topo.threads_per_core) + \
1229 (thread_no))
1230
1231 #define GET_CORE(core_base, core_no, node_no, pkg_no) \
1232 ((core_base) + \
1233 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
1234 ((node_no) * topo.cores_per_node) + \
1235 (core_no))
1236
1237 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
1238
1239 /*
1240 * The accumulated sum of MSR is defined as a monotonic
1241 * increasing MSR, it will be accumulated periodically,
1242 * despite its register's bit width.
1243 */
1244 enum {
1245 IDX_PKG_ENERGY,
1246 IDX_DRAM_ENERGY,
1247 IDX_PP0_ENERGY,
1248 IDX_PP1_ENERGY,
1249 IDX_PKG_PERF,
1250 IDX_DRAM_PERF,
1251 IDX_COUNT,
1252 };
1253
1254 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
1255
1256 struct msr_sum_array {
1257 /* get_msr_sum() = sum + (get_msr() - last) */
1258 struct {
1259 /*The accumulated MSR value is updated by the timer */
1260 unsigned long long sum;
1261 /*The MSR footprint recorded in last timer */
1262 unsigned long long last;
1263 } entries[IDX_COUNT];
1264 };
1265
1266 /* The percpu MSR sum array.*/
1267 struct msr_sum_array *per_cpu_msr_sum;
1268
1269 off_t idx_to_offset(int idx)
1270 {
1271 off_t offset;
1272
1273 switch (idx) {
1274 case IDX_PKG_ENERGY:
1275 if (platform->rapl_msrs & RAPL_AMD_F17H)
1276 offset = MSR_PKG_ENERGY_STAT;
1277 else
1278 offset = MSR_PKG_ENERGY_STATUS;
1279 break;
1280 case IDX_DRAM_ENERGY:
1281 offset = MSR_DRAM_ENERGY_STATUS;
1282 break;
1283 case IDX_PP0_ENERGY:
1284 offset = MSR_PP0_ENERGY_STATUS;
1285 break;
1286 case IDX_PP1_ENERGY:
1287 offset = MSR_PP1_ENERGY_STATUS;
1288 break;
1289 case IDX_PKG_PERF:
1290 offset = MSR_PKG_PERF_STATUS;
1291 break;
1292 case IDX_DRAM_PERF:
1293 offset = MSR_DRAM_PERF_STATUS;
1294 break;
1295 default:
1296 offset = -1;
1297 }
1298 return offset;
1299 }
1300
1301 int offset_to_idx(off_t offset)
1302 {
1303 int idx;
1304
1305 switch (offset) {
1306 case MSR_PKG_ENERGY_STATUS:
1307 case MSR_PKG_ENERGY_STAT:
1308 idx = IDX_PKG_ENERGY;
1309 break;
1310 case MSR_DRAM_ENERGY_STATUS:
1311 idx = IDX_DRAM_ENERGY;
1312 break;
1313 case MSR_PP0_ENERGY_STATUS:
1314 idx = IDX_PP0_ENERGY;
1315 break;
1316 case MSR_PP1_ENERGY_STATUS:
1317 idx = IDX_PP1_ENERGY;
1318 break;
1319 case MSR_PKG_PERF_STATUS:
1320 idx = IDX_PKG_PERF;
1321 break;
1322 case MSR_DRAM_PERF_STATUS:
1323 idx = IDX_DRAM_PERF;
1324 break;
1325 default:
1326 idx = -1;
1327 }
1328 return idx;
1329 }
1330
1331 int idx_valid(int idx)
1332 {
1333 switch (idx) {
1334 case IDX_PKG_ENERGY:
1335 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
1336 case IDX_DRAM_ENERGY:
1337 return platform->rapl_msrs & RAPL_DRAM;
1338 case IDX_PP0_ENERGY:
1339 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
1340 case IDX_PP1_ENERGY:
1341 return platform->rapl_msrs & RAPL_GFX;
1342 case IDX_PKG_PERF:
1343 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
1344 case IDX_DRAM_PERF:
1345 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
1346 default:
1347 return 0;
1348 }
1349 }
1350
1351 struct sys_counters {
1352 unsigned int added_thread_counters;
1353 unsigned int added_core_counters;
1354 unsigned int added_package_counters;
1355 struct msr_counter *tp;
1356 struct msr_counter *cp;
1357 struct msr_counter *pp;
1358 } sys;
1359
1360 void free_sys_counters(void)
1361 {
1362 struct msr_counter *p = sys.tp, *pnext = NULL;
1363 while (p) {
1364 pnext = p->next;
1365 free(p);
1366 p = pnext;
1367 }
1368
1369 p = sys.cp, pnext = NULL;
1370 while (p) {
1371 pnext = p->next;
1372 free(p);
1373 p = pnext;
1374 }
1375
1376 p = sys.pp, pnext = NULL;
1377 while (p) {
1378 pnext = p->next;
1379 free(p);
1380 p = pnext;
1381 }
1382
1383 sys.added_thread_counters = 0;
1384 sys.added_core_counters = 0;
1385 sys.added_package_counters = 0;
1386 sys.tp = NULL;
1387 sys.cp = NULL;
1388 sys.pp = NULL;
1389 }
1390
1391 struct system_summary {
1392 struct thread_data threads;
1393 struct core_data cores;
1394 struct pkg_data packages;
1395 } average;
1396
1397 struct cpu_topology {
1398 int physical_package_id;
1399 int die_id;
1400 int logical_cpu_id;
1401 int physical_node_id;
1402 int logical_node_id; /* 0-based count within the package */
1403 int physical_core_id;
1404 int thread_id;
1405 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
1406 } *cpus;
1407
1408 struct topo_params {
1409 int num_packages;
1410 int num_die;
1411 int num_cpus;
1412 int num_cores;
1413 int allowed_packages;
1414 int allowed_cpus;
1415 int allowed_cores;
1416 int max_cpu_num;
1417 int max_node_num;
1418 int nodes_per_pkg;
1419 int cores_per_node;
1420 int threads_per_core;
1421 } topo;
1422
1423 struct timeval tv_even, tv_odd, tv_delta;
1424
1425 int *irq_column_2_cpu; /* /proc/interrupts column numbers */
1426 int *irqs_per_cpu; /* indexed by cpu_num */
1427
1428 void setup_all_buffers(bool startup);
1429
1430 char *sys_lpi_file;
1431 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
1432 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
1433
1434 int cpu_is_not_present(int cpu)
1435 {
1436 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
1437 }
1438
1439 int cpu_is_not_allowed(int cpu)
1440 {
1441 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
1442 }
1443
1444 /*
1445 * run func(thread, core, package) in topology order
1446 * skip non-present cpus
1447 */
1448
1449 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
1450 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
1451 {
1452 int retval, pkg_no, core_no, thread_no, node_no;
1453
1454 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1455 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
1456 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
1457 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
1458 struct thread_data *t;
1459 struct core_data *c;
1460 struct pkg_data *p;
1461 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
1462
1463 if (cpu_is_not_allowed(t->cpu_id))
1464 continue;
1465
1466 c = GET_CORE(core_base, core_no, node_no, pkg_no);
1467 p = GET_PKG(pkg_base, pkg_no);
1468
1469 retval = func(t, c, p);
1470 if (retval)
1471 return retval;
1472 }
1473 }
1474 }
1475 }
1476 return 0;
1477 }
1478
1479 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1480 {
1481 UNUSED(p);
1482
1483 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
1484 }
1485
1486 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1487 {
1488 UNUSED(c);
1489
1490 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
1491 }
1492
1493 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1494 {
1495 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
1496 }
1497
1498 int cpu_migrate(int cpu)
1499 {
1500 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
1501 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
1502 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
1503 return -1;
1504 else
1505 return 0;
1506 }
1507
1508 int get_msr_fd(int cpu)
1509 {
1510 char pathname[32];
1511 int fd;
1512
1513 fd = fd_percpu[cpu];
1514
1515 if (fd)
1516 return fd;
1517
1518 sprintf(pathname, "/dev/cpu/%d/msr", cpu);
1519 fd = open(pathname, O_RDONLY);
1520 if (fd < 0)
1521 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
1522 "or run with --no-msr, or run as root", pathname);
1523
1524 fd_percpu[cpu] = fd;
1525
1526 return fd;
1527 }
1528
1529 static void bic_disable_msr_access(void)
1530 {
1531 const unsigned long bic_msrs =
1532 BIC_SMI |
1533 BIC_CPU_c1 |
1534 BIC_CPU_c3 |
1535 BIC_CPU_c6 |
1536 BIC_CPU_c7 |
1537 BIC_Mod_c6 |
1538 BIC_CoreTmp |
1539 BIC_Totl_c0 |
1540 BIC_Any_c0 |
1541 BIC_GFX_c0 |
1542 BIC_CPUGFX |
1543 BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
1544
1545 bic_enabled &= ~bic_msrs;
1546
1547 free_sys_counters();
1548 }
1549
1550 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
1551 {
1552 assert(!no_perf);
1553
1554 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
1555 }
1556
1557 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
1558 {
1559 struct perf_event_attr attr;
1560 const pid_t pid = -1;
1561 const unsigned long flags = 0;
1562
1563 assert(!no_perf);
1564
1565 memset(&attr, 0, sizeof(struct perf_event_attr));
1566
1567 attr.type = type;
1568 attr.size = sizeof(struct perf_event_attr);
1569 attr.config = config;
1570 attr.disabled = 0;
1571 attr.sample_type = PERF_SAMPLE_IDENTIFIER;
1572 attr.read_format = read_format;
1573
1574 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
1575
1576 return fd;
1577 }
1578
1579 int get_instr_count_fd(int cpu)
1580 {
1581 if (fd_instr_count_percpu[cpu])
1582 return fd_instr_count_percpu[cpu];
1583
1584 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
1585
1586 return fd_instr_count_percpu[cpu];
1587 }
1588
1589 int get_msr(int cpu, off_t offset, unsigned long long *msr)
1590 {
1591 ssize_t retval;
1592
1593 assert(!no_msr);
1594
1595 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
1596
1597 if (retval != sizeof *msr)
1598 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
1599
1600 return 0;
1601 }
1602
1603 int probe_msr(int cpu, off_t offset)
1604 {
1605 ssize_t retval;
1606 unsigned long long dummy;
1607
1608 assert(!no_msr);
1609
1610 retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
1611
1612 if (retval != sizeof(dummy))
1613 return 1;
1614
1615 return 0;
1616 }
1617
1618 #define MAX_DEFERRED 16
1619 char *deferred_add_names[MAX_DEFERRED];
1620 char *deferred_skip_names[MAX_DEFERRED];
1621 int deferred_add_index;
1622 int deferred_skip_index;
1623
1624 /*
1625 * HIDE_LIST - hide this list of counters, show the rest [default]
1626 * SHOW_LIST - show this list of counters, hide the rest
1627 */
1628 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
1629
1630 void help(void)
1631 {
1632 fprintf(outf,
1633 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
1634 "\n"
1635 "Turbostat forks the specified COMMAND and prints statistics\n"
1636 "when COMMAND completes.\n"
1637 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
1638 "to print statistics, until interrupted.\n"
1639 " -a, --add add a counter\n"
1640 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
1641 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
1642 " {core | package | j,k,l..m,n-p }\n"
1643 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
1644 " -D, --Dump displays the raw counter values\n"
1645 " -e, --enable [all | column]\n"
1646 " shows all or the specified disabled column\n"
1647 " -H, --hide [column|column,column,...]\n"
1648 " hide the specified column(s)\n"
1649 " -i, --interval sec.subsec\n"
1650 " Override default 5-second measurement interval\n"
1651 " -J, --Joules displays energy in Joules instead of Watts\n"
1652 " -l, --list list column headers only\n"
1653 " -M, --no-msr Disable all uses of the MSR driver\n"
1654 " -P, --no-perf Disable all uses of the perf API\n"
1655 " -n, --num_iterations num\n"
1656 " number of the measurement iterations\n"
1657 " -N, --header_iterations num\n"
1658 " print header every num iterations\n"
1659 " -o, --out file\n"
1660 " create or truncate \"file\" for all output\n"
1661 " -q, --quiet skip decoding system configuration header\n"
1662 " -s, --show [column|column,column,...]\n"
1663 " show only the specified column(s)\n"
1664 " -S, --Summary\n"
1665 " limits output to 1-line system summary per interval\n"
1666 " -T, --TCC temperature\n"
1667 " sets the Thermal Control Circuit temperature in\n"
1668 " degrees Celsius\n"
1669 " -h, --help print this help message\n"
1670 " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n");
1671 }
1672
1673 /*
1674 * bic_lookup
1675 * for all the strings in comma separate name_list,
1676 * set the approprate bit in return value.
1677 */
1678 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
1679 {
1680 unsigned int i;
1681 unsigned long long retval = 0;
1682
1683 while (name_list) {
1684 char *comma;
1685
1686 comma = strchr(name_list, ',');
1687
1688 if (comma)
1689 *comma = '\0';
1690
1691 for (i = 0; i < MAX_BIC; ++i) {
1692 if (!strcmp(name_list, bic[i].name)) {
1693 retval |= (1ULL << i);
1694 break;
1695 }
1696 if (!strcmp(name_list, "all")) {
1697 retval |= ~0;
1698 break;
1699 } else if (!strcmp(name_list, "topology")) {
1700 retval |= BIC_TOPOLOGY;
1701 break;
1702 } else if (!strcmp(name_list, "power")) {
1703 retval |= BIC_THERMAL_PWR;
1704 break;
1705 } else if (!strcmp(name_list, "idle")) {
1706 retval |= BIC_IDLE;
1707 break;
1708 } else if (!strcmp(name_list, "frequency")) {
1709 retval |= BIC_FREQUENCY;
1710 break;
1711 } else if (!strcmp(name_list, "other")) {
1712 retval |= BIC_OTHER;
1713 break;
1714 }
1715
1716 }
1717 if (i == MAX_BIC) {
1718 if (mode == SHOW_LIST) {
1719 deferred_add_names[deferred_add_index++] = name_list;
1720 if (deferred_add_index >= MAX_DEFERRED) {
1721 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
1722 MAX_DEFERRED, name_list);
1723 help();
1724 exit(1);
1725 }
1726 } else {
1727 deferred_skip_names[deferred_skip_index++] = name_list;
1728 if (debug)
1729 fprintf(stderr, "deferred \"%s\"\n", name_list);
1730 if (deferred_skip_index >= MAX_DEFERRED) {
1731 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
1732 MAX_DEFERRED, name_list);
1733 help();
1734 exit(1);
1735 }
1736 }
1737 }
1738
1739 name_list = comma;
1740 if (name_list)
1741 name_list++;
1742
1743 }
1744 return retval;
1745 }
1746
1747 void print_header(char *delim)
1748 {
1749 struct msr_counter *mp;
1750 int printed = 0;
1751
1752 if (DO_BIC(BIC_USEC))
1753 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
1754 if (DO_BIC(BIC_TOD))
1755 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
1756 if (DO_BIC(BIC_Package))
1757 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
1758 if (DO_BIC(BIC_Die))
1759 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
1760 if (DO_BIC(BIC_Node))
1761 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
1762 if (DO_BIC(BIC_Core))
1763 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
1764 if (DO_BIC(BIC_CPU))
1765 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
1766 if (DO_BIC(BIC_APIC))
1767 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
1768 if (DO_BIC(BIC_X2APIC))
1769 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
1770 if (DO_BIC(BIC_Avg_MHz))
1771 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
1772 if (DO_BIC(BIC_Busy))
1773 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
1774 if (DO_BIC(BIC_Bzy_MHz))
1775 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
1776 if (DO_BIC(BIC_TSC_MHz))
1777 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
1778
1779 if (DO_BIC(BIC_IPC))
1780 outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
1781
1782 if (DO_BIC(BIC_IRQ)) {
1783 if (sums_need_wide_columns)
1784 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
1785 else
1786 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
1787 }
1788
1789 if (DO_BIC(BIC_SMI))
1790 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
1791
1792 for (mp = sys.tp; mp; mp = mp->next) {
1793
1794 if (mp->format == FORMAT_RAW) {
1795 if (mp->width == 64)
1796 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
1797 else
1798 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
1799 } else {
1800 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1801 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
1802 else
1803 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
1804 }
1805 }
1806
1807 if (DO_BIC(BIC_CPU_c1))
1808 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
1809 if (DO_BIC(BIC_CPU_c3))
1810 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
1811 if (DO_BIC(BIC_CPU_c6))
1812 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
1813 if (DO_BIC(BIC_CPU_c7))
1814 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
1815
1816 if (DO_BIC(BIC_Mod_c6))
1817 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
1818
1819 if (DO_BIC(BIC_CoreTmp))
1820 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
1821
1822 if (DO_BIC(BIC_CORE_THROT_CNT))
1823 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
1824
1825 if (platform->rapl_msrs && !rapl_joules) {
1826 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
1827 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
1828 } else if (platform->rapl_msrs && rapl_joules) {
1829 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
1830 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
1831 }
1832
1833 for (mp = sys.cp; mp; mp = mp->next) {
1834 if (mp->format == FORMAT_RAW) {
1835 if (mp->width == 64)
1836 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
1837 else
1838 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
1839 } else {
1840 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1841 outp += sprintf(outp, "%s%8s", delim, mp->name);
1842 else
1843 outp += sprintf(outp, "%s%s", delim, mp->name);
1844 }
1845 }
1846
1847 if (DO_BIC(BIC_PkgTmp))
1848 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
1849
1850 if (DO_BIC(BIC_GFX_rc6))
1851 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
1852
1853 if (DO_BIC(BIC_GFXMHz))
1854 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
1855
1856 if (DO_BIC(BIC_GFXACTMHz))
1857 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
1858
1859 if (DO_BIC(BIC_SAM_mc6))
1860 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
1861
1862 if (DO_BIC(BIC_SAMMHz))
1863 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
1864
1865 if (DO_BIC(BIC_SAMACTMHz))
1866 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
1867
1868 if (DO_BIC(BIC_Totl_c0))
1869 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
1870 if (DO_BIC(BIC_Any_c0))
1871 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
1872 if (DO_BIC(BIC_GFX_c0))
1873 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
1874 if (DO_BIC(BIC_CPUGFX))
1875 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
1876
1877 if (DO_BIC(BIC_Pkgpc2))
1878 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
1879 if (DO_BIC(BIC_Pkgpc3))
1880 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
1881 if (DO_BIC(BIC_Pkgpc6))
1882 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
1883 if (DO_BIC(BIC_Pkgpc7))
1884 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
1885 if (DO_BIC(BIC_Pkgpc8))
1886 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
1887 if (DO_BIC(BIC_Pkgpc9))
1888 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
1889 if (DO_BIC(BIC_Pkgpc10))
1890 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
1891 if (DO_BIC(BIC_CPU_LPI))
1892 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
1893 if (DO_BIC(BIC_SYS_LPI))
1894 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
1895
1896 if (platform->rapl_msrs && !rapl_joules) {
1897 if (DO_BIC(BIC_PkgWatt))
1898 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
1899 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
1900 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
1901 if (DO_BIC(BIC_GFXWatt))
1902 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
1903 if (DO_BIC(BIC_RAMWatt))
1904 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
1905 if (DO_BIC(BIC_PKG__))
1906 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
1907 if (DO_BIC(BIC_RAM__))
1908 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
1909 } else if (platform->rapl_msrs && rapl_joules) {
1910 if (DO_BIC(BIC_Pkg_J))
1911 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
1912 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
1913 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
1914 if (DO_BIC(BIC_GFX_J))
1915 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
1916 if (DO_BIC(BIC_RAM_J))
1917 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
1918 if (DO_BIC(BIC_PKG__))
1919 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
1920 if (DO_BIC(BIC_RAM__))
1921 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
1922 }
1923 if (DO_BIC(BIC_UNCORE_MHZ))
1924 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
1925
1926 for (mp = sys.pp; mp; mp = mp->next) {
1927 if (mp->format == FORMAT_RAW) {
1928 if (mp->width == 64)
1929 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
1930 else
1931 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
1932 } else {
1933 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1934 outp += sprintf(outp, "%s%8s", delim, mp->name);
1935 else
1936 outp += sprintf(outp, "%s%s", delim, mp->name);
1937 }
1938 }
1939
1940 outp += sprintf(outp, "\n");
1941 }
1942
1943 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1944 {
1945 int i;
1946 struct msr_counter *mp;
1947
1948 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
1949
1950 if (t) {
1951 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
1952 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
1953 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
1954 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
1955 outp += sprintf(outp, "c1: %016llX\n", t->c1);
1956
1957 if (DO_BIC(BIC_IPC))
1958 outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
1959
1960 if (DO_BIC(BIC_IRQ))
1961 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
1962 if (DO_BIC(BIC_SMI))
1963 outp += sprintf(outp, "SMI: %d\n", t->smi_count);
1964
1965 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1966 outp +=
1967 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
1968 t->counter[i], mp->path);
1969 }
1970 }
1971
1972 if (c && is_cpu_first_thread_in_core(t, c, p)) {
1973 outp += sprintf(outp, "core: %d\n", c->core_id);
1974 outp += sprintf(outp, "c3: %016llX\n", c->c3);
1975 outp += sprintf(outp, "c6: %016llX\n", c->c6);
1976 outp += sprintf(outp, "c7: %016llX\n", c->c7);
1977 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
1978 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
1979
1980 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
1981 const double energy_scale = c->core_energy.scale;
1982 if (c->core_energy.unit == RAPL_UNIT_JOULES)
1983 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
1984
1985 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1986 outp +=
1987 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
1988 c->counter[i], mp->path);
1989 }
1990 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
1991 }
1992
1993 if (p && is_cpu_first_core_in_package(t, c, p)) {
1994 outp += sprintf(outp, "package: %d\n", p->package_id);
1995
1996 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
1997 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
1998 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
1999 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
2000
2001 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
2002 if (DO_BIC(BIC_Pkgpc3))
2003 outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
2004 if (DO_BIC(BIC_Pkgpc6))
2005 outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
2006 if (DO_BIC(BIC_Pkgpc7))
2007 outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
2008 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
2009 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
2010 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
2011 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
2012 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
2013 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
2014 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
2015 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
2016 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
2017 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
2018 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
2019 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
2020
2021 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2022 outp +=
2023 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2024 p->counter[i], mp->path);
2025 }
2026 }
2027
2028 outp += sprintf(outp, "\n");
2029
2030 return 0;
2031 }
2032
2033 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
2034 {
2035 assert(desired_unit != RAPL_UNIT_INVALID);
2036
2037 /*
2038 * For now we don't expect anything other than joules,
2039 * so just simplify the logic.
2040 */
2041 assert(c->unit == RAPL_UNIT_JOULES);
2042
2043 const double scaled = c->raw_value * c->scale;
2044
2045 if (desired_unit == RAPL_UNIT_WATTS)
2046 return scaled / interval;
2047 return scaled;
2048 }
2049
2050 /*
2051 * column formatting convention & formats
2052 */
2053 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2054 {
2055 double interval_float, tsc;
2056 char *fmt8;
2057 int i;
2058 struct msr_counter *mp;
2059 char *delim = "\t";
2060 int printed = 0;
2061
2062 /* if showing only 1st thread in core and this isn't one, bail out */
2063 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
2064 return 0;
2065
2066 /* if showing only 1st thread in pkg and this isn't one, bail out */
2067 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
2068 return 0;
2069
2070 /*if not summary line and --cpu is used */
2071 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
2072 return 0;
2073
2074 if (DO_BIC(BIC_USEC)) {
2075 /* on each row, print how many usec each timestamp took to gather */
2076 struct timeval tv;
2077
2078 timersub(&t->tv_end, &t->tv_begin, &tv);
2079 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
2080 }
2081
2082 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
2083 if (DO_BIC(BIC_TOD))
2084 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
2085
2086 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
2087
2088 tsc = t->tsc * tsc_tweak;
2089
2090 /* topo columns, print blanks on 1st (average) line */
2091 if (t == &average.threads) {
2092 if (DO_BIC(BIC_Package))
2093 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2094 if (DO_BIC(BIC_Die))
2095 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2096 if (DO_BIC(BIC_Node))
2097 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2098 if (DO_BIC(BIC_Core))
2099 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2100 if (DO_BIC(BIC_CPU))
2101 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2102 if (DO_BIC(BIC_APIC))
2103 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2104 if (DO_BIC(BIC_X2APIC))
2105 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2106 } else {
2107 if (DO_BIC(BIC_Package)) {
2108 if (p)
2109 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
2110 else
2111 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2112 }
2113 if (DO_BIC(BIC_Die)) {
2114 if (c)
2115 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
2116 else
2117 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2118 }
2119 if (DO_BIC(BIC_Node)) {
2120 if (t)
2121 outp += sprintf(outp, "%s%d",
2122 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
2123 else
2124 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2125 }
2126 if (DO_BIC(BIC_Core)) {
2127 if (c)
2128 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
2129 else
2130 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2131 }
2132 if (DO_BIC(BIC_CPU))
2133 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
2134 if (DO_BIC(BIC_APIC))
2135 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
2136 if (DO_BIC(BIC_X2APIC))
2137 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
2138 }
2139
2140 if (DO_BIC(BIC_Avg_MHz))
2141 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
2142
2143 if (DO_BIC(BIC_Busy))
2144 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
2145
2146 if (DO_BIC(BIC_Bzy_MHz)) {
2147 if (has_base_hz)
2148 outp +=
2149 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
2150 else
2151 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
2152 tsc / units * t->aperf / t->mperf / interval_float);
2153 }
2154
2155 if (DO_BIC(BIC_TSC_MHz))
2156 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
2157
2158 if (DO_BIC(BIC_IPC))
2159 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
2160
2161 /* IRQ */
2162 if (DO_BIC(BIC_IRQ)) {
2163 if (sums_need_wide_columns)
2164 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
2165 else
2166 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
2167 }
2168
2169 /* SMI */
2170 if (DO_BIC(BIC_SMI))
2171 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
2172
2173 /* Added counters */
2174 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2175 if (mp->format == FORMAT_RAW) {
2176 if (mp->width == 32)
2177 outp +=
2178 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
2179 else
2180 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
2181 } else if (mp->format == FORMAT_DELTA) {
2182 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2183 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
2184 else
2185 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
2186 } else if (mp->format == FORMAT_PERCENT) {
2187 if (mp->type == COUNTER_USEC)
2188 outp +=
2189 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
2190 t->counter[i] / interval_float / 10000);
2191 else
2192 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
2193 }
2194 }
2195
2196 /* C1 */
2197 if (DO_BIC(BIC_CPU_c1))
2198 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
2199
2200 /* print per-core data only for 1st thread in core */
2201 if (!is_cpu_first_thread_in_core(t, c, p))
2202 goto done;
2203
2204 if (DO_BIC(BIC_CPU_c3))
2205 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
2206 if (DO_BIC(BIC_CPU_c6))
2207 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
2208 if (DO_BIC(BIC_CPU_c7))
2209 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
2210
2211 /* Mod%c6 */
2212 if (DO_BIC(BIC_Mod_c6))
2213 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
2214
2215 if (DO_BIC(BIC_CoreTmp))
2216 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
2217
2218 /* Core throttle count */
2219 if (DO_BIC(BIC_CORE_THROT_CNT))
2220 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
2221
2222 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2223 if (mp->format == FORMAT_RAW) {
2224 if (mp->width == 32)
2225 outp +=
2226 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
2227 else
2228 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
2229 } else if (mp->format == FORMAT_DELTA) {
2230 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2231 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
2232 else
2233 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
2234 } else if (mp->format == FORMAT_PERCENT) {
2235 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
2236 }
2237 }
2238
2239 fmt8 = "%s%.2f";
2240
2241 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
2242 outp +=
2243 sprintf(outp, fmt8, (printed++ ? delim : ""),
2244 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
2245 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
2246 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
2247 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
2248
2249 /* print per-package data only for 1st core in package */
2250 if (!is_cpu_first_core_in_package(t, c, p))
2251 goto done;
2252
2253 /* PkgTmp */
2254 if (DO_BIC(BIC_PkgTmp))
2255 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
2256
2257 /* GFXrc6 */
2258 if (DO_BIC(BIC_GFX_rc6)) {
2259 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
2260 outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
2261 } else {
2262 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
2263 p->gfx_rc6_ms / 10.0 / interval_float);
2264 }
2265 }
2266
2267 /* GFXMHz */
2268 if (DO_BIC(BIC_GFXMHz))
2269 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
2270
2271 /* GFXACTMHz */
2272 if (DO_BIC(BIC_GFXACTMHz))
2273 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
2274
2275 /* SAMmc6 */
2276 if (DO_BIC(BIC_SAM_mc6)) {
2277 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
2278 outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
2279 } else {
2280 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
2281 p->sam_mc6_ms / 10.0 / interval_float);
2282 }
2283 }
2284
2285 /* SAMMHz */
2286 if (DO_BIC(BIC_SAMMHz))
2287 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
2288
2289 /* SAMACTMHz */
2290 if (DO_BIC(BIC_SAMACTMHz))
2291 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
2292
2293 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
2294 if (DO_BIC(BIC_Totl_c0))
2295 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
2296 if (DO_BIC(BIC_Any_c0))
2297 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
2298 if (DO_BIC(BIC_GFX_c0))
2299 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
2300 if (DO_BIC(BIC_CPUGFX))
2301 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
2302
2303 if (DO_BIC(BIC_Pkgpc2))
2304 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
2305 if (DO_BIC(BIC_Pkgpc3))
2306 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
2307 if (DO_BIC(BIC_Pkgpc6))
2308 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
2309 if (DO_BIC(BIC_Pkgpc7))
2310 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
2311 if (DO_BIC(BIC_Pkgpc8))
2312 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
2313 if (DO_BIC(BIC_Pkgpc9))
2314 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
2315 if (DO_BIC(BIC_Pkgpc10))
2316 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
2317
2318 if (DO_BIC(BIC_CPU_LPI)) {
2319 if (p->cpu_lpi >= 0)
2320 outp +=
2321 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
2322 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
2323 else
2324 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
2325 }
2326 if (DO_BIC(BIC_SYS_LPI)) {
2327 if (p->sys_lpi >= 0)
2328 outp +=
2329 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
2330 100.0 * p->sys_lpi / 1000000.0 / interval_float);
2331 else
2332 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
2333 }
2334
2335 if (DO_BIC(BIC_PkgWatt))
2336 outp +=
2337 sprintf(outp, fmt8, (printed++ ? delim : ""),
2338 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
2339 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
2340 outp +=
2341 sprintf(outp, fmt8, (printed++ ? delim : ""),
2342 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
2343 if (DO_BIC(BIC_GFXWatt))
2344 outp +=
2345 sprintf(outp, fmt8, (printed++ ? delim : ""),
2346 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
2347 if (DO_BIC(BIC_RAMWatt))
2348 outp +=
2349 sprintf(outp, fmt8, (printed++ ? delim : ""),
2350 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
2351 if (DO_BIC(BIC_Pkg_J))
2352 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
2353 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
2354 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
2355 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
2356 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
2357 if (DO_BIC(BIC_GFX_J))
2358 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
2359 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
2360 if (DO_BIC(BIC_RAM_J))
2361 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
2362 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
2363 if (DO_BIC(BIC_PKG__))
2364 outp +=
2365 sprintf(outp, fmt8, (printed++ ? delim : ""),
2366 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
2367 if (DO_BIC(BIC_RAM__))
2368 outp +=
2369 sprintf(outp, fmt8, (printed++ ? delim : ""),
2370 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
2371 /* UncMHz */
2372 if (DO_BIC(BIC_UNCORE_MHZ))
2373 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
2374
2375 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2376 if (mp->format == FORMAT_RAW) {
2377 if (mp->width == 32)
2378 outp +=
2379 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
2380 else
2381 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
2382 } else if (mp->format == FORMAT_DELTA) {
2383 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2384 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
2385 else
2386 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
2387 } else if (mp->format == FORMAT_PERCENT) {
2388 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
2389 }
2390 }
2391
2392 done:
2393 if (*(outp - 1) != '\n')
2394 outp += sprintf(outp, "\n");
2395
2396 return 0;
2397 }
2398
2399 void flush_output_stdout(void)
2400 {
2401 FILE *filep;
2402
2403 if (outf == stderr)
2404 filep = stdout;
2405 else
2406 filep = outf;
2407
2408 fputs(output_buffer, filep);
2409 fflush(filep);
2410
2411 outp = output_buffer;
2412 }
2413
2414 void flush_output_stderr(void)
2415 {
2416 fputs(output_buffer, outf);
2417 fflush(outf);
2418 outp = output_buffer;
2419 }
2420
2421 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2422 {
2423 static int count;
2424
2425 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
2426 print_header("\t");
2427
2428 format_counters(&average.threads, &average.cores, &average.packages);
2429
2430 count++;
2431
2432 if (summary_only)
2433 return;
2434
2435 for_all_cpus(format_counters, t, c, p);
2436 }
2437
2438 #define DELTA_WRAP32(new, old) \
2439 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
2440
2441 int delta_package(struct pkg_data *new, struct pkg_data *old)
2442 {
2443 int i;
2444 struct msr_counter *mp;
2445
2446 if (DO_BIC(BIC_Totl_c0))
2447 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
2448 if (DO_BIC(BIC_Any_c0))
2449 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
2450 if (DO_BIC(BIC_GFX_c0))
2451 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
2452 if (DO_BIC(BIC_CPUGFX))
2453 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
2454
2455 old->pc2 = new->pc2 - old->pc2;
2456 if (DO_BIC(BIC_Pkgpc3))
2457 old->pc3 = new->pc3 - old->pc3;
2458 if (DO_BIC(BIC_Pkgpc6))
2459 old->pc6 = new->pc6 - old->pc6;
2460 if (DO_BIC(BIC_Pkgpc7))
2461 old->pc7 = new->pc7 - old->pc7;
2462 old->pc8 = new->pc8 - old->pc8;
2463 old->pc9 = new->pc9 - old->pc9;
2464 old->pc10 = new->pc10 - old->pc10;
2465 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
2466 old->sys_lpi = new->sys_lpi - old->sys_lpi;
2467 old->pkg_temp_c = new->pkg_temp_c;
2468
2469 /* flag an error when rc6 counter resets/wraps */
2470 if (old->gfx_rc6_ms > new->gfx_rc6_ms)
2471 old->gfx_rc6_ms = -1;
2472 else
2473 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
2474
2475 old->uncore_mhz = new->uncore_mhz;
2476 old->gfx_mhz = new->gfx_mhz;
2477 old->gfx_act_mhz = new->gfx_act_mhz;
2478
2479 /* flag an error when mc6 counter resets/wraps */
2480 if (old->sam_mc6_ms > new->sam_mc6_ms)
2481 old->sam_mc6_ms = -1;
2482 else
2483 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
2484
2485 old->sam_mhz = new->sam_mhz;
2486 old->sam_act_mhz = new->sam_act_mhz;
2487
2488 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
2489 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
2490 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
2491 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
2492 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
2493 old->rapl_dram_perf_status.raw_value =
2494 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
2495
2496 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2497 if (mp->format == FORMAT_RAW)
2498 old->counter[i] = new->counter[i];
2499 else
2500 old->counter[i] = new->counter[i] - old->counter[i];
2501 }
2502
2503 return 0;
2504 }
2505
2506 void delta_core(struct core_data *new, struct core_data *old)
2507 {
2508 int i;
2509 struct msr_counter *mp;
2510
2511 old->c3 = new->c3 - old->c3;
2512 old->c6 = new->c6 - old->c6;
2513 old->c7 = new->c7 - old->c7;
2514 old->core_temp_c = new->core_temp_c;
2515 old->core_throt_cnt = new->core_throt_cnt;
2516 old->mc6_us = new->mc6_us - old->mc6_us;
2517
2518 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
2519
2520 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2521 if (mp->format == FORMAT_RAW)
2522 old->counter[i] = new->counter[i];
2523 else
2524 old->counter[i] = new->counter[i] - old->counter[i];
2525 }
2526 }
2527
2528 int soft_c1_residency_display(int bic)
2529 {
2530 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
2531 return 0;
2532
2533 return DO_BIC_READ(bic);
2534 }
2535
2536 /*
2537 * old = new - old
2538 */
2539 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
2540 {
2541 int i;
2542 struct msr_counter *mp;
2543
2544 /* we run cpuid just the 1st time, copy the results */
2545 if (DO_BIC(BIC_APIC))
2546 new->apic_id = old->apic_id;
2547 if (DO_BIC(BIC_X2APIC))
2548 new->x2apic_id = old->x2apic_id;
2549
2550 /*
2551 * the timestamps from start of measurement interval are in "old"
2552 * the timestamp from end of measurement interval are in "new"
2553 * over-write old w/ new so we can print end of interval values
2554 */
2555
2556 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
2557 old->tv_begin = new->tv_begin;
2558 old->tv_end = new->tv_end;
2559
2560 old->tsc = new->tsc - old->tsc;
2561
2562 /* check for TSC < 1 Mcycles over interval */
2563 if (old->tsc < (1000 * 1000))
2564 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
2565 "You can disable all c-states by booting with \"idle=poll\"\n"
2566 "or just the deep ones with \"processor.max_cstate=1\"");
2567
2568 old->c1 = new->c1 - old->c1;
2569
2570 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
2571 || soft_c1_residency_display(BIC_Avg_MHz)) {
2572 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
2573 old->aperf = new->aperf - old->aperf;
2574 old->mperf = new->mperf - old->mperf;
2575 } else {
2576 return -1;
2577 }
2578 }
2579
2580 if (platform->has_msr_core_c1_res) {
2581 /*
2582 * Some models have a dedicated C1 residency MSR,
2583 * which should be more accurate than the derivation below.
2584 */
2585 } else {
2586 /*
2587 * As counter collection is not atomic,
2588 * it is possible for mperf's non-halted cycles + idle states
2589 * to exceed TSC's all cycles: show c1 = 0% in that case.
2590 */
2591 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
2592 old->c1 = 0;
2593 else {
2594 /* normal case, derive c1 */
2595 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
2596 - core_delta->c6 - core_delta->c7;
2597 }
2598 }
2599
2600 if (old->mperf == 0) {
2601 if (debug > 1)
2602 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
2603 old->mperf = 1; /* divide by 0 protection */
2604 }
2605
2606 if (DO_BIC(BIC_IPC))
2607 old->instr_count = new->instr_count - old->instr_count;
2608
2609 if (DO_BIC(BIC_IRQ))
2610 old->irq_count = new->irq_count - old->irq_count;
2611
2612 if (DO_BIC(BIC_SMI))
2613 old->smi_count = new->smi_count - old->smi_count;
2614
2615 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2616 if (mp->format == FORMAT_RAW)
2617 old->counter[i] = new->counter[i];
2618 else
2619 old->counter[i] = new->counter[i] - old->counter[i];
2620 }
2621 return 0;
2622 }
2623
2624 int delta_cpu(struct thread_data *t, struct core_data *c,
2625 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
2626 {
2627 int retval = 0;
2628
2629 /* calculate core delta only for 1st thread in core */
2630 if (is_cpu_first_thread_in_core(t, c, p))
2631 delta_core(c, c2);
2632
2633 /* always calculate thread delta */
2634 retval = delta_thread(t, t2, c2); /* c2 is core delta */
2635 if (retval)
2636 return retval;
2637
2638 /* calculate package delta only for 1st core in package */
2639 if (is_cpu_first_core_in_package(t, c, p))
2640 retval = delta_package(p, p2);
2641
2642 return retval;
2643 }
2644
2645 void rapl_counter_clear(struct rapl_counter *c)
2646 {
2647 c->raw_value = 0;
2648 c->scale = 0.0;
2649 c->unit = RAPL_UNIT_INVALID;
2650 }
2651
2652 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2653 {
2654 int i;
2655 struct msr_counter *mp;
2656
2657 t->tv_begin.tv_sec = 0;
2658 t->tv_begin.tv_usec = 0;
2659 t->tv_end.tv_sec = 0;
2660 t->tv_end.tv_usec = 0;
2661 t->tv_delta.tv_sec = 0;
2662 t->tv_delta.tv_usec = 0;
2663
2664 t->tsc = 0;
2665 t->aperf = 0;
2666 t->mperf = 0;
2667 t->c1 = 0;
2668
2669 t->instr_count = 0;
2670
2671 t->irq_count = 0;
2672 t->smi_count = 0;
2673
2674 c->c3 = 0;
2675 c->c6 = 0;
2676 c->c7 = 0;
2677 c->mc6_us = 0;
2678 c->core_temp_c = 0;
2679 rapl_counter_clear(&c->core_energy);
2680 c->core_throt_cnt = 0;
2681
2682 p->pkg_wtd_core_c0 = 0;
2683 p->pkg_any_core_c0 = 0;
2684 p->pkg_any_gfxe_c0 = 0;
2685 p->pkg_both_core_gfxe_c0 = 0;
2686
2687 p->pc2 = 0;
2688 if (DO_BIC(BIC_Pkgpc3))
2689 p->pc3 = 0;
2690 if (DO_BIC(BIC_Pkgpc6))
2691 p->pc6 = 0;
2692 if (DO_BIC(BIC_Pkgpc7))
2693 p->pc7 = 0;
2694 p->pc8 = 0;
2695 p->pc9 = 0;
2696 p->pc10 = 0;
2697 p->cpu_lpi = 0;
2698 p->sys_lpi = 0;
2699
2700 rapl_counter_clear(&p->energy_pkg);
2701 rapl_counter_clear(&p->energy_dram);
2702 rapl_counter_clear(&p->energy_cores);
2703 rapl_counter_clear(&p->energy_gfx);
2704 rapl_counter_clear(&p->rapl_pkg_perf_status);
2705 rapl_counter_clear(&p->rapl_dram_perf_status);
2706 p->pkg_temp_c = 0;
2707
2708 p->gfx_rc6_ms = 0;
2709 p->uncore_mhz = 0;
2710 p->gfx_mhz = 0;
2711 p->gfx_act_mhz = 0;
2712 p->sam_mc6_ms = 0;
2713 p->sam_mhz = 0;
2714 p->sam_act_mhz = 0;
2715 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
2716 t->counter[i] = 0;
2717
2718 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
2719 c->counter[i] = 0;
2720
2721 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
2722 p->counter[i] = 0;
2723 }
2724
2725 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
2726 {
2727 /* Copy unit and scale from src if dst is not initialized */
2728 if (dst->unit == RAPL_UNIT_INVALID) {
2729 dst->unit = src->unit;
2730 dst->scale = src->scale;
2731 }
2732
2733 assert(dst->unit == src->unit);
2734 assert(dst->scale == src->scale);
2735
2736 dst->raw_value += src->raw_value;
2737 }
2738
2739 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2740 {
2741 int i;
2742 struct msr_counter *mp;
2743
2744 /* copy un-changing apic_id's */
2745 if (DO_BIC(BIC_APIC))
2746 average.threads.apic_id = t->apic_id;
2747 if (DO_BIC(BIC_X2APIC))
2748 average.threads.x2apic_id = t->x2apic_id;
2749
2750 /* remember first tv_begin */
2751 if (average.threads.tv_begin.tv_sec == 0)
2752 average.threads.tv_begin = t->tv_begin;
2753
2754 /* remember last tv_end */
2755 average.threads.tv_end = t->tv_end;
2756
2757 average.threads.tsc += t->tsc;
2758 average.threads.aperf += t->aperf;
2759 average.threads.mperf += t->mperf;
2760 average.threads.c1 += t->c1;
2761
2762 average.threads.instr_count += t->instr_count;
2763
2764 average.threads.irq_count += t->irq_count;
2765 average.threads.smi_count += t->smi_count;
2766
2767 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2768 if (mp->format == FORMAT_RAW)
2769 continue;
2770 average.threads.counter[i] += t->counter[i];
2771 }
2772
2773 /* sum per-core values only for 1st thread in core */
2774 if (!is_cpu_first_thread_in_core(t, c, p))
2775 return 0;
2776
2777 average.cores.c3 += c->c3;
2778 average.cores.c6 += c->c6;
2779 average.cores.c7 += c->c7;
2780 average.cores.mc6_us += c->mc6_us;
2781
2782 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
2783 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
2784
2785 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
2786
2787 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2788 if (mp->format == FORMAT_RAW)
2789 continue;
2790 average.cores.counter[i] += c->counter[i];
2791 }
2792
2793 /* sum per-pkg values only for 1st core in pkg */
2794 if (!is_cpu_first_core_in_package(t, c, p))
2795 return 0;
2796
2797 if (DO_BIC(BIC_Totl_c0))
2798 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
2799 if (DO_BIC(BIC_Any_c0))
2800 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
2801 if (DO_BIC(BIC_GFX_c0))
2802 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
2803 if (DO_BIC(BIC_CPUGFX))
2804 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
2805
2806 average.packages.pc2 += p->pc2;
2807 if (DO_BIC(BIC_Pkgpc3))
2808 average.packages.pc3 += p->pc3;
2809 if (DO_BIC(BIC_Pkgpc6))
2810 average.packages.pc6 += p->pc6;
2811 if (DO_BIC(BIC_Pkgpc7))
2812 average.packages.pc7 += p->pc7;
2813 average.packages.pc8 += p->pc8;
2814 average.packages.pc9 += p->pc9;
2815 average.packages.pc10 += p->pc10;
2816
2817 average.packages.cpu_lpi = p->cpu_lpi;
2818 average.packages.sys_lpi = p->sys_lpi;
2819
2820 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
2821 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
2822 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
2823 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
2824
2825 average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
2826 average.packages.uncore_mhz = p->uncore_mhz;
2827 average.packages.gfx_mhz = p->gfx_mhz;
2828 average.packages.gfx_act_mhz = p->gfx_act_mhz;
2829 average.packages.sam_mc6_ms = p->sam_mc6_ms;
2830 average.packages.sam_mhz = p->sam_mhz;
2831 average.packages.sam_act_mhz = p->sam_act_mhz;
2832
2833 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
2834
2835 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
2836 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
2837
2838 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2839 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
2840 average.packages.counter[i] = p->counter[i];
2841 else
2842 average.packages.counter[i] += p->counter[i];
2843 }
2844 return 0;
2845 }
2846
2847 /*
2848 * sum the counters for all cpus in the system
2849 * compute the weighted average
2850 */
2851 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2852 {
2853 int i;
2854 struct msr_counter *mp;
2855
2856 clear_counters(&average.threads, &average.cores, &average.packages);
2857
2858 for_all_cpus(sum_counters, t, c, p);
2859
2860 /* Use the global time delta for the average. */
2861 average.threads.tv_delta = tv_delta;
2862
2863 average.threads.tsc /= topo.allowed_cpus;
2864 average.threads.aperf /= topo.allowed_cpus;
2865 average.threads.mperf /= topo.allowed_cpus;
2866 average.threads.instr_count /= topo.allowed_cpus;
2867 average.threads.c1 /= topo.allowed_cpus;
2868
2869 if (average.threads.irq_count > 9999999)
2870 sums_need_wide_columns = 1;
2871
2872 average.cores.c3 /= topo.allowed_cores;
2873 average.cores.c6 /= topo.allowed_cores;
2874 average.cores.c7 /= topo.allowed_cores;
2875 average.cores.mc6_us /= topo.allowed_cores;
2876
2877 if (DO_BIC(BIC_Totl_c0))
2878 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
2879 if (DO_BIC(BIC_Any_c0))
2880 average.packages.pkg_any_core_c0 /= topo.allowed_packages;
2881 if (DO_BIC(BIC_GFX_c0))
2882 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
2883 if (DO_BIC(BIC_CPUGFX))
2884 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
2885
2886 average.packages.pc2 /= topo.allowed_packages;
2887 if (DO_BIC(BIC_Pkgpc3))
2888 average.packages.pc3 /= topo.allowed_packages;
2889 if (DO_BIC(BIC_Pkgpc6))
2890 average.packages.pc6 /= topo.allowed_packages;
2891 if (DO_BIC(BIC_Pkgpc7))
2892 average.packages.pc7 /= topo.allowed_packages;
2893
2894 average.packages.pc8 /= topo.allowed_packages;
2895 average.packages.pc9 /= topo.allowed_packages;
2896 average.packages.pc10 /= topo.allowed_packages;
2897
2898 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2899 if (mp->format == FORMAT_RAW)
2900 continue;
2901 if (mp->type == COUNTER_ITEMS) {
2902 if (average.threads.counter[i] > 9999999)
2903 sums_need_wide_columns = 1;
2904 continue;
2905 }
2906 average.threads.counter[i] /= topo.allowed_cpus;
2907 }
2908 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2909 if (mp->format == FORMAT_RAW)
2910 continue;
2911 if (mp->type == COUNTER_ITEMS) {
2912 if (average.cores.counter[i] > 9999999)
2913 sums_need_wide_columns = 1;
2914 }
2915 average.cores.counter[i] /= topo.allowed_cores;
2916 }
2917 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2918 if (mp->format == FORMAT_RAW)
2919 continue;
2920 if (mp->type == COUNTER_ITEMS) {
2921 if (average.packages.counter[i] > 9999999)
2922 sums_need_wide_columns = 1;
2923 }
2924 average.packages.counter[i] /= topo.allowed_packages;
2925 }
2926 }
2927
2928 static unsigned long long rdtsc(void)
2929 {
2930 unsigned int low, high;
2931
2932 asm volatile ("rdtsc":"=a" (low), "=d"(high));
2933
2934 return low | ((unsigned long long)high) << 32;
2935 }
2936
2937 /*
2938 * Open a file, and exit on failure
2939 */
2940 FILE *fopen_or_die(const char *path, const char *mode)
2941 {
2942 FILE *filep = fopen(path, mode);
2943
2944 if (!filep)
2945 err(1, "%s: open failed", path);
2946 return filep;
2947 }
2948
2949 /*
2950 * snapshot_sysfs_counter()
2951 *
2952 * return snapshot of given counter
2953 */
2954 unsigned long long snapshot_sysfs_counter(char *path)
2955 {
2956 FILE *fp;
2957 int retval;
2958 unsigned long long counter;
2959
2960 fp = fopen_or_die(path, "r");
2961
2962 retval = fscanf(fp, "%lld", &counter);
2963 if (retval != 1)
2964 err(1, "snapshot_sysfs_counter(%s)", path);
2965
2966 fclose(fp);
2967
2968 return counter;
2969 }
2970
2971 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
2972 {
2973 if (mp->msr_num != 0) {
2974 assert(!no_msr);
2975 if (get_msr(cpu, mp->msr_num, counterp))
2976 return -1;
2977 } else {
2978 char path[128 + PATH_BYTES];
2979
2980 if (mp->flags & SYSFS_PERCPU) {
2981 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
2982
2983 *counterp = snapshot_sysfs_counter(path);
2984 } else {
2985 *counterp = snapshot_sysfs_counter(mp->path);
2986 }
2987 }
2988
2989 return 0;
2990 }
2991
2992 unsigned long long get_uncore_mhz(int package, int die)
2993 {
2994 char path[128];
2995
2996 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
2997 die);
2998
2999 return (snapshot_sysfs_counter(path) / 1000);
3000 }
3001
3002 int get_epb(int cpu)
3003 {
3004 char path[128 + PATH_BYTES];
3005 unsigned long long msr;
3006 int ret, epb = -1;
3007 FILE *fp;
3008
3009 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
3010
3011 fp = fopen(path, "r");
3012 if (!fp)
3013 goto msr_fallback;
3014
3015 ret = fscanf(fp, "%d", &epb);
3016 if (ret != 1)
3017 err(1, "%s(%s)", __func__, path);
3018
3019 fclose(fp);
3020
3021 return epb;
3022
3023 msr_fallback:
3024 if (no_msr)
3025 return -1;
3026
3027 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
3028
3029 return msr & 0xf;
3030 }
3031
3032 void get_apic_id(struct thread_data *t)
3033 {
3034 unsigned int eax, ebx, ecx, edx;
3035
3036 if (DO_BIC(BIC_APIC)) {
3037 eax = ebx = ecx = edx = 0;
3038 __cpuid(1, eax, ebx, ecx, edx);
3039
3040 t->apic_id = (ebx >> 24) & 0xff;
3041 }
3042
3043 if (!DO_BIC(BIC_X2APIC))
3044 return;
3045
3046 if (authentic_amd || hygon_genuine) {
3047 unsigned int topology_extensions;
3048
3049 if (max_extended_level < 0x8000001e)
3050 return;
3051
3052 eax = ebx = ecx = edx = 0;
3053 __cpuid(0x80000001, eax, ebx, ecx, edx);
3054 topology_extensions = ecx & (1 << 22);
3055
3056 if (topology_extensions == 0)
3057 return;
3058
3059 eax = ebx = ecx = edx = 0;
3060 __cpuid(0x8000001e, eax, ebx, ecx, edx);
3061
3062 t->x2apic_id = eax;
3063 return;
3064 }
3065
3066 if (!genuine_intel)
3067 return;
3068
3069 if (max_level < 0xb)
3070 return;
3071
3072 ecx = 0;
3073 __cpuid(0xb, eax, ebx, ecx, edx);
3074 t->x2apic_id = edx;
3075
3076 if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
3077 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
3078 }
3079
3080 int get_core_throt_cnt(int cpu, unsigned long long *cnt)
3081 {
3082 char path[128 + PATH_BYTES];
3083 unsigned long long tmp;
3084 FILE *fp;
3085 int ret;
3086
3087 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
3088 fp = fopen(path, "r");
3089 if (!fp)
3090 return -1;
3091 ret = fscanf(fp, "%lld", &tmp);
3092 fclose(fp);
3093 if (ret != 1)
3094 return -1;
3095 *cnt = tmp;
3096
3097 return 0;
3098 }
3099
3100 struct amperf_group_fd {
3101 int aperf; /* Also the group descriptor */
3102 int mperf;
3103 };
3104
3105 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
3106 {
3107 int fdmt;
3108 int bytes_read;
3109 char buf[64];
3110 int ret = -1;
3111
3112 fdmt = open(path, O_RDONLY, 0);
3113 if (fdmt == -1) {
3114 if (debug)
3115 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
3116 ret = -1;
3117 goto cleanup_and_exit;
3118 }
3119
3120 bytes_read = read(fdmt, buf, sizeof(buf) - 1);
3121 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
3122 if (debug)
3123 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
3124 ret = -1;
3125 goto cleanup_and_exit;
3126 }
3127
3128 buf[bytes_read] = '\0';
3129
3130 if (sscanf(buf, parse_format, value_ptr) != 1) {
3131 if (debug)
3132 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
3133 ret = -1;
3134 goto cleanup_and_exit;
3135 }
3136
3137 ret = 0;
3138
3139 cleanup_and_exit:
3140 close(fdmt);
3141 return ret;
3142 }
3143
3144 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
3145 {
3146 unsigned int v;
3147 int status;
3148
3149 status = read_perf_counter_info(path, parse_format, &v);
3150 if (status)
3151 v = -1;
3152
3153 return v;
3154 }
3155
3156 static unsigned read_msr_type(void)
3157 {
3158 const char *const path = "/sys/bus/event_source/devices/msr/type";
3159 const char *const format = "%u";
3160
3161 return read_perf_counter_info_n(path, format);
3162 }
3163
3164 static unsigned read_aperf_config(void)
3165 {
3166 const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
3167 const char *const format = "event=%x";
3168
3169 return read_perf_counter_info_n(path, format);
3170 }
3171
3172 static unsigned read_mperf_config(void)
3173 {
3174 const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
3175 const char *const format = "event=%x";
3176
3177 return read_perf_counter_info_n(path, format);
3178 }
3179
3180 static unsigned read_perf_type(const char *subsys)
3181 {
3182 const char *const path_format = "/sys/bus/event_source/devices/%s/type";
3183 const char *const format = "%u";
3184 char path[128];
3185
3186 snprintf(path, sizeof(path), path_format, subsys);
3187
3188 return read_perf_counter_info_n(path, format);
3189 }
3190
3191 static unsigned read_rapl_config(const char *subsys, const char *event_name)
3192 {
3193 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
3194 const char *const format = "event=%x";
3195 char path[128];
3196
3197 snprintf(path, sizeof(path), path_format, subsys, event_name);
3198
3199 return read_perf_counter_info_n(path, format);
3200 }
3201
3202 static unsigned read_perf_rapl_unit(const char *subsys, const char *event_name)
3203 {
3204 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
3205 const char *const format = "%s";
3206 char path[128];
3207 char unit_buffer[16];
3208
3209 snprintf(path, sizeof(path), path_format, subsys, event_name);
3210
3211 read_perf_counter_info(path, format, &unit_buffer);
3212 if (strcmp("Joules", unit_buffer) == 0)
3213 return RAPL_UNIT_JOULES;
3214
3215 return RAPL_UNIT_INVALID;
3216 }
3217
3218 static double read_perf_rapl_scale(const char *subsys, const char *event_name)
3219 {
3220 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
3221 const char *const format = "%lf";
3222 char path[128];
3223 double scale;
3224
3225 snprintf(path, sizeof(path), path_format, subsys, event_name);
3226
3227 if (read_perf_counter_info(path, format, &scale))
3228 return 0.0;
3229
3230 return scale;
3231 }
3232
3233 static struct amperf_group_fd open_amperf_fd(int cpu)
3234 {
3235 const unsigned int msr_type = read_msr_type();
3236 const unsigned int aperf_config = read_aperf_config();
3237 const unsigned int mperf_config = read_mperf_config();
3238 struct amperf_group_fd fds = {.aperf = -1,.mperf = -1 };
3239
3240 fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
3241 fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
3242
3243 return fds;
3244 }
3245
3246 static int get_amperf_fd(int cpu)
3247 {
3248 assert(fd_amperf_percpu);
3249
3250 if (fd_amperf_percpu[cpu].aperf)
3251 return fd_amperf_percpu[cpu].aperf;
3252
3253 fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
3254
3255 return fd_amperf_percpu[cpu].aperf;
3256 }
3257
3258 /* Read APERF, MPERF and TSC using the perf API. */
3259 static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
3260 {
3261 union {
3262 struct {
3263 unsigned long nr_entries;
3264 unsigned long aperf;
3265 unsigned long mperf;
3266 };
3267
3268 unsigned long as_array[3];
3269 } cnt;
3270
3271 const int fd_amperf = get_amperf_fd(cpu);
3272
3273 /*
3274 * Read the TSC with rdtsc, because we want the absolute value and not
3275 * the offset from the start of the counter.
3276 */
3277 t->tsc = rdtsc();
3278
3279 const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
3280 if (n != sizeof(cnt.as_array))
3281 return -2;
3282
3283 t->aperf = cnt.aperf * aperf_mperf_multiplier;
3284 t->mperf = cnt.mperf * aperf_mperf_multiplier;
3285
3286 return 0;
3287 }
3288
3289 /* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
3290 static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
3291 {
3292 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
3293 int aperf_mperf_retry_count = 0;
3294
3295 /*
3296 * The TSC, APERF and MPERF must be read together for
3297 * APERF/MPERF and MPERF/TSC to give accurate results.
3298 *
3299 * Unfortunately, APERF and MPERF are read by
3300 * individual system call, so delays may occur
3301 * between them. If the time to read them
3302 * varies by a large amount, we re-read them.
3303 */
3304
3305 /*
3306 * This initial dummy APERF read has been seen to
3307 * reduce jitter in the subsequent reads.
3308 */
3309
3310 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
3311 return -3;
3312
3313 retry:
3314 t->tsc = rdtsc(); /* re-read close to APERF */
3315
3316 tsc_before = t->tsc;
3317
3318 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
3319 return -3;
3320
3321 tsc_between = rdtsc();
3322
3323 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
3324 return -4;
3325
3326 tsc_after = rdtsc();
3327
3328 aperf_time = tsc_between - tsc_before;
3329 mperf_time = tsc_after - tsc_between;
3330
3331 /*
3332 * If the system call latency to read APERF and MPERF
3333 * differ by more than 2x, then try again.
3334 */
3335 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
3336 aperf_mperf_retry_count++;
3337 if (aperf_mperf_retry_count < 5)
3338 goto retry;
3339 else
3340 warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
3341 }
3342 aperf_mperf_retry_count = 0;
3343
3344 t->aperf = t->aperf * aperf_mperf_multiplier;
3345 t->mperf = t->mperf * aperf_mperf_multiplier;
3346
3347 return 0;
3348 }
3349
3350 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
3351 {
3352 size_t ret = 0;
3353
3354 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
3355 if (rci->source[i] == RAPL_SOURCE_PERF)
3356 ++ret;
3357
3358 return ret;
3359 }
3360
3361 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
3362 {
3363 rc->raw_value = rci->data[idx];
3364 rc->unit = rci->unit[idx];
3365 rc->scale = rci->scale[idx];
3366 }
3367
3368 int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
3369 {
3370 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
3371 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
3372
3373 if (debug)
3374 fprintf(stderr, "get_rapl_counters: cpu%d domain%d\n", cpu, domain);
3375
3376 assert(rapl_counter_info_perdomain);
3377
3378 /*
3379 * If we have any perf counters to read, read them all now, in bulk
3380 */
3381 if (rci->fd_perf != -1) {
3382 size_t num_perf_counters = rapl_counter_info_count_perf(rci);
3383 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
3384 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
3385 if (actual_read_size != expected_read_size)
3386 err(-1, "get_rapl_counters: failed to read perf_data (%zu %zu)", expected_read_size,
3387 actual_read_size);
3388 }
3389
3390 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
3391 switch (rci->source[i]) {
3392 case RAPL_SOURCE_NONE:
3393 break;
3394
3395 case RAPL_SOURCE_PERF:
3396 assert(pi < ARRAY_SIZE(perf_data));
3397 assert(rci->fd_perf != -1);
3398
3399 if (debug)
3400 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
3401 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
3402
3403 rci->data[i] = perf_data[pi];
3404
3405 ++pi;
3406 break;
3407
3408 case RAPL_SOURCE_MSR:
3409 if (debug)
3410 fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
3411
3412 assert(!no_msr);
3413 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
3414 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
3415 return -13 - i;
3416 } else {
3417 if (get_msr(cpu, rci->msr[i], &rci->data[i]))
3418 return -13 - i;
3419 }
3420
3421 rci->data[i] &= rci->msr_mask[i];
3422 if (rci->msr_shift[i] >= 0)
3423 rci->data[i] >>= abs(rci->msr_shift[i]);
3424 else
3425 rci->data[i] <<= abs(rci->msr_shift[i]);
3426
3427 break;
3428 }
3429 }
3430
3431 _Static_assert(NUM_RAPL_COUNTERS == 7);
3432 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
3433 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
3434 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
3435 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
3436 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
3437 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
3438 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
3439
3440 return 0;
3441 }
3442
3443 /*
3444 * get_counters(...)
3445 * migrate to cpu
3446 * acquire and record local counters for that cpu
3447 */
3448 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3449 {
3450 int cpu = t->cpu_id;
3451 unsigned long long msr;
3452 struct msr_counter *mp;
3453 int i;
3454 int status;
3455
3456 if (cpu_migrate(cpu)) {
3457 fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
3458 return -1;
3459 }
3460
3461 gettimeofday(&t->tv_begin, (struct timezone *)NULL);
3462
3463 if (first_counter_read)
3464 get_apic_id(t);
3465
3466 t->tsc = rdtsc(); /* we are running on local CPU of interest */
3467
3468 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
3469 || soft_c1_residency_display(BIC_Avg_MHz)) {
3470 int status = -1;
3471
3472 assert(!no_perf || !no_msr);
3473
3474 switch (amperf_source) {
3475 case AMPERF_SOURCE_PERF:
3476 status = read_aperf_mperf_tsc_perf(t, cpu);
3477 break;
3478 case AMPERF_SOURCE_MSR:
3479 status = read_aperf_mperf_tsc_msr(t, cpu);
3480 break;
3481 }
3482
3483 if (status != 0)
3484 return status;
3485 }
3486
3487 if (DO_BIC(BIC_IPC))
3488 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
3489 return -4;
3490
3491 if (DO_BIC(BIC_IRQ))
3492 t->irq_count = irqs_per_cpu[cpu];
3493 if (DO_BIC(BIC_SMI)) {
3494 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
3495 return -5;
3496 t->smi_count = msr & 0xFFFFFFFF;
3497 }
3498 if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
3499 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
3500 return -6;
3501 }
3502
3503 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3504 if (get_mp(cpu, mp, &t->counter[i]))
3505 return -10;
3506 }
3507
3508 /* collect core counters only for 1st thread in core */
3509 if (!is_cpu_first_thread_in_core(t, c, p))
3510 goto done;
3511
3512 if (platform->has_per_core_rapl) {
3513 status = get_rapl_counters(cpu, c->core_id, c, p);
3514 if (status != 0)
3515 return status;
3516 }
3517
3518 if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
3519 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
3520 return -6;
3521 }
3522
3523 if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
3524 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
3525 return -7;
3526 } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
3527 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
3528 return -7;
3529 }
3530
3531 if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
3532 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
3533 return -8;
3534 else if (t->is_atom) {
3535 /*
3536 * For Atom CPUs that has core cstate deeper than c6,
3537 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
3538 * Minus CC7 (and deeper cstates) residency to get
3539 * accturate cc6 residency.
3540 */
3541 c->c6 -= c->c7;
3542 }
3543 }
3544
3545 if (DO_BIC(BIC_Mod_c6))
3546 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
3547 return -8;
3548
3549 if (DO_BIC(BIC_CoreTmp)) {
3550 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3551 return -9;
3552 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
3553 }
3554
3555 if (DO_BIC(BIC_CORE_THROT_CNT))
3556 get_core_throt_cnt(cpu, &c->core_throt_cnt);
3557
3558 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3559 if (get_mp(cpu, mp, &c->counter[i]))
3560 return -10;
3561 }
3562
3563 /* collect package counters only for 1st core in package */
3564 if (!is_cpu_first_core_in_package(t, c, p))
3565 goto done;
3566
3567 if (DO_BIC(BIC_Totl_c0)) {
3568 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
3569 return -10;
3570 }
3571 if (DO_BIC(BIC_Any_c0)) {
3572 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
3573 return -11;
3574 }
3575 if (DO_BIC(BIC_GFX_c0)) {
3576 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
3577 return -12;
3578 }
3579 if (DO_BIC(BIC_CPUGFX)) {
3580 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
3581 return -13;
3582 }
3583 if (DO_BIC(BIC_Pkgpc3))
3584 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
3585 return -9;
3586 if (DO_BIC(BIC_Pkgpc6)) {
3587 if (platform->has_msr_atom_pkg_c6_residency) {
3588 if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
3589 return -10;
3590 } else {
3591 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
3592 return -10;
3593 }
3594 }
3595
3596 if (DO_BIC(BIC_Pkgpc2))
3597 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
3598 return -11;
3599 if (DO_BIC(BIC_Pkgpc7))
3600 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
3601 return -12;
3602 if (DO_BIC(BIC_Pkgpc8))
3603 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
3604 return -13;
3605 if (DO_BIC(BIC_Pkgpc9))
3606 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
3607 return -13;
3608 if (DO_BIC(BIC_Pkgpc10))
3609 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
3610 return -13;
3611
3612 if (DO_BIC(BIC_CPU_LPI))
3613 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
3614 if (DO_BIC(BIC_SYS_LPI))
3615 p->sys_lpi = cpuidle_cur_sys_lpi_us;
3616
3617 if (!platform->has_per_core_rapl) {
3618 status = get_rapl_counters(cpu, p->package_id, c, p);
3619 if (status != 0)
3620 return status;
3621 }
3622
3623 if (DO_BIC(BIC_PkgTmp)) {
3624 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3625 return -17;
3626 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
3627 }
3628
3629 /* n.b. assume die0 uncore frequency applies to whole package */
3630 if (DO_BIC(BIC_UNCORE_MHZ))
3631 p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
3632
3633 if (DO_BIC(BIC_GFX_rc6))
3634 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
3635
3636 if (DO_BIC(BIC_GFXMHz))
3637 p->gfx_mhz = gfx_info[GFX_MHz].val;
3638
3639 if (DO_BIC(BIC_GFXACTMHz))
3640 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
3641
3642 if (DO_BIC(BIC_SAM_mc6))
3643 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
3644
3645 if (DO_BIC(BIC_SAMMHz))
3646 p->sam_mhz = gfx_info[SAM_MHz].val;
3647
3648 if (DO_BIC(BIC_SAMACTMHz))
3649 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
3650
3651 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3652 if (get_mp(cpu, mp, &p->counter[i]))
3653 return -10;
3654 }
3655 done:
3656 gettimeofday(&t->tv_end, (struct timezone *)NULL);
3657
3658 return 0;
3659 }
3660
3661 /*
3662 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
3663 * If you change the values, note they are used both in comparisons
3664 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
3665 */
3666
3667 #define PCLUKN 0 /* Unknown */
3668 #define PCLRSV 1 /* Reserved */
3669 #define PCL__0 2 /* PC0 */
3670 #define PCL__1 3 /* PC1 */
3671 #define PCL__2 4 /* PC2 */
3672 #define PCL__3 5 /* PC3 */
3673 #define PCL__4 6 /* PC4 */
3674 #define PCL__6 7 /* PC6 */
3675 #define PCL_6N 8 /* PC6 No Retention */
3676 #define PCL_6R 9 /* PC6 Retention */
3677 #define PCL__7 10 /* PC7 */
3678 #define PCL_7S 11 /* PC7 Shrink */
3679 #define PCL__8 12 /* PC8 */
3680 #define PCL__9 13 /* PC9 */
3681 #define PCL_10 14 /* PC10 */
3682 #define PCLUNL 15 /* Unlimited */
3683
3684 int pkg_cstate_limit = PCLUKN;
3685 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
3686 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
3687 };
3688
3689 int nhm_pkg_cstate_limits[16] =
3690 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3691 PCLRSV, PCLRSV
3692 };
3693
3694 int snb_pkg_cstate_limits[16] =
3695 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3696 PCLRSV, PCLRSV
3697 };
3698
3699 int hsw_pkg_cstate_limits[16] =
3700 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3701 PCLRSV, PCLRSV
3702 };
3703
3704 int slv_pkg_cstate_limits[16] =
3705 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3706 PCL__6, PCL__7
3707 };
3708
3709 int amt_pkg_cstate_limits[16] =
3710 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3711 PCLRSV, PCLRSV
3712 };
3713
3714 int phi_pkg_cstate_limits[16] =
3715 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3716 PCLRSV, PCLRSV
3717 };
3718
3719 int glm_pkg_cstate_limits[16] =
3720 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3721 PCLRSV, PCLRSV
3722 };
3723
3724 int skx_pkg_cstate_limits[16] =
3725 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3726 PCLRSV, PCLRSV
3727 };
3728
3729 int icx_pkg_cstate_limits[16] =
3730 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
3731 PCLRSV, PCLRSV
3732 };
3733
3734 void probe_cst_limit(void)
3735 {
3736 unsigned long long msr;
3737 int *pkg_cstate_limits;
3738
3739 if (!platform->has_nhm_msrs || no_msr)
3740 return;
3741
3742 switch (platform->cst_limit) {
3743 case CST_LIMIT_NHM:
3744 pkg_cstate_limits = nhm_pkg_cstate_limits;
3745 break;
3746 case CST_LIMIT_SNB:
3747 pkg_cstate_limits = snb_pkg_cstate_limits;
3748 break;
3749 case CST_LIMIT_HSW:
3750 pkg_cstate_limits = hsw_pkg_cstate_limits;
3751 break;
3752 case CST_LIMIT_SKX:
3753 pkg_cstate_limits = skx_pkg_cstate_limits;
3754 break;
3755 case CST_LIMIT_ICX:
3756 pkg_cstate_limits = icx_pkg_cstate_limits;
3757 break;
3758 case CST_LIMIT_SLV:
3759 pkg_cstate_limits = slv_pkg_cstate_limits;
3760 break;
3761 case CST_LIMIT_AMT:
3762 pkg_cstate_limits = amt_pkg_cstate_limits;
3763 break;
3764 case CST_LIMIT_KNL:
3765 pkg_cstate_limits = phi_pkg_cstate_limits;
3766 break;
3767 case CST_LIMIT_GMT:
3768 pkg_cstate_limits = glm_pkg_cstate_limits;
3769 break;
3770 default:
3771 return;
3772 }
3773
3774 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3775 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3776 }
3777
3778 static void dump_platform_info(void)
3779 {
3780 unsigned long long msr;
3781 unsigned int ratio;
3782
3783 if (!platform->has_nhm_msrs || no_msr)
3784 return;
3785
3786 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3787
3788 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
3789
3790 ratio = (msr >> 40) & 0xFF;
3791 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
3792
3793 ratio = (msr >> 8) & 0xFF;
3794 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
3795 }
3796
3797 static void dump_power_ctl(void)
3798 {
3799 unsigned long long msr;
3800
3801 if (!platform->has_nhm_msrs || no_msr)
3802 return;
3803
3804 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
3805 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
3806 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
3807
3808 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
3809 if (platform->has_cst_prewake_bit)
3810 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
3811
3812 return;
3813 }
3814
3815 static void dump_turbo_ratio_limit2(void)
3816 {
3817 unsigned long long msr;
3818 unsigned int ratio;
3819
3820 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
3821
3822 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
3823
3824 ratio = (msr >> 8) & 0xFF;
3825 if (ratio)
3826 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
3827
3828 ratio = (msr >> 0) & 0xFF;
3829 if (ratio)
3830 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
3831 return;
3832 }
3833
3834 static void dump_turbo_ratio_limit1(void)
3835 {
3836 unsigned long long msr;
3837 unsigned int ratio;
3838
3839 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
3840
3841 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
3842
3843 ratio = (msr >> 56) & 0xFF;
3844 if (ratio)
3845 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
3846
3847 ratio = (msr >> 48) & 0xFF;
3848 if (ratio)
3849 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
3850
3851 ratio = (msr >> 40) & 0xFF;
3852 if (ratio)
3853 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
3854
3855 ratio = (msr >> 32) & 0xFF;
3856 if (ratio)
3857 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
3858
3859 ratio = (msr >> 24) & 0xFF;
3860 if (ratio)
3861 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
3862
3863 ratio = (msr >> 16) & 0xFF;
3864 if (ratio)
3865 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
3866
3867 ratio = (msr >> 8) & 0xFF;
3868 if (ratio)
3869 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
3870
3871 ratio = (msr >> 0) & 0xFF;
3872 if (ratio)
3873 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
3874 return;
3875 }
3876
3877 static void dump_turbo_ratio_limits(int trl_msr_offset)
3878 {
3879 unsigned long long msr, core_counts;
3880 int shift;
3881
3882 get_msr(base_cpu, trl_msr_offset, &msr);
3883 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
3884 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
3885
3886 if (platform->trl_msrs & TRL_CORECOUNT) {
3887 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
3888 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
3889 } else {
3890 core_counts = 0x0807060504030201;
3891 }
3892
3893 for (shift = 56; shift >= 0; shift -= 8) {
3894 unsigned int ratio, group_size;
3895
3896 ratio = (msr >> shift) & 0xFF;
3897 group_size = (core_counts >> shift) & 0xFF;
3898 if (ratio)
3899 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
3900 ratio, bclk, ratio * bclk, group_size);
3901 }
3902
3903 return;
3904 }
3905
3906 static void dump_atom_turbo_ratio_limits(void)
3907 {
3908 unsigned long long msr;
3909 unsigned int ratio;
3910
3911 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
3912 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
3913
3914 ratio = (msr >> 0) & 0x3F;
3915 if (ratio)
3916 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
3917
3918 ratio = (msr >> 8) & 0x3F;
3919 if (ratio)
3920 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
3921
3922 ratio = (msr >> 16) & 0x3F;
3923 if (ratio)
3924 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
3925
3926 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
3927 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
3928
3929 ratio = (msr >> 24) & 0x3F;
3930 if (ratio)
3931 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
3932
3933 ratio = (msr >> 16) & 0x3F;
3934 if (ratio)
3935 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
3936
3937 ratio = (msr >> 8) & 0x3F;
3938 if (ratio)
3939 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
3940
3941 ratio = (msr >> 0) & 0x3F;
3942 if (ratio)
3943 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
3944 }
3945
3946 static void dump_knl_turbo_ratio_limits(void)
3947 {
3948 const unsigned int buckets_no = 7;
3949
3950 unsigned long long msr;
3951 int delta_cores, delta_ratio;
3952 int i, b_nr;
3953 unsigned int cores[buckets_no];
3954 unsigned int ratio[buckets_no];
3955
3956 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
3957
3958 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
3959
3960 /*
3961 * Turbo encoding in KNL is as follows:
3962 * [0] -- Reserved
3963 * [7:1] -- Base value of number of active cores of bucket 1.
3964 * [15:8] -- Base value of freq ratio of bucket 1.
3965 * [20:16] -- +ve delta of number of active cores of bucket 2.
3966 * i.e. active cores of bucket 2 =
3967 * active cores of bucket 1 + delta
3968 * [23:21] -- Negative delta of freq ratio of bucket 2.
3969 * i.e. freq ratio of bucket 2 =
3970 * freq ratio of bucket 1 - delta
3971 * [28:24]-- +ve delta of number of active cores of bucket 3.
3972 * [31:29]-- -ve delta of freq ratio of bucket 3.
3973 * [36:32]-- +ve delta of number of active cores of bucket 4.
3974 * [39:37]-- -ve delta of freq ratio of bucket 4.
3975 * [44:40]-- +ve delta of number of active cores of bucket 5.
3976 * [47:45]-- -ve delta of freq ratio of bucket 5.
3977 * [52:48]-- +ve delta of number of active cores of bucket 6.
3978 * [55:53]-- -ve delta of freq ratio of bucket 6.
3979 * [60:56]-- +ve delta of number of active cores of bucket 7.
3980 * [63:61]-- -ve delta of freq ratio of bucket 7.
3981 */
3982
3983 b_nr = 0;
3984 cores[b_nr] = (msr & 0xFF) >> 1;
3985 ratio[b_nr] = (msr >> 8) & 0xFF;
3986
3987 for (i = 16; i < 64; i += 8) {
3988 delta_cores = (msr >> i) & 0x1F;
3989 delta_ratio = (msr >> (i + 5)) & 0x7;
3990
3991 cores[b_nr + 1] = cores[b_nr] + delta_cores;
3992 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
3993 b_nr++;
3994 }
3995
3996 for (i = buckets_no - 1; i >= 0; i--)
3997 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
3998 fprintf(outf,
3999 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
4000 ratio[i], bclk, ratio[i] * bclk, cores[i]);
4001 }
4002
4003 static void dump_cst_cfg(void)
4004 {
4005 unsigned long long msr;
4006
4007 if (!platform->has_nhm_msrs || no_msr)
4008 return;
4009
4010 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
4011
4012 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
4013
4014 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
4015 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
4016 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
4017 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
4018 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
4019 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
4020
4021 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
4022 if (platform->has_cst_auto_convension) {
4023 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
4024 }
4025
4026 fprintf(outf, ")\n");
4027
4028 return;
4029 }
4030
4031 static void dump_config_tdp(void)
4032 {
4033 unsigned long long msr;
4034
4035 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
4036 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
4037 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
4038
4039 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
4040 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
4041 if (msr) {
4042 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
4043 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
4044 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
4045 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
4046 }
4047 fprintf(outf, ")\n");
4048
4049 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
4050 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
4051 if (msr) {
4052 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
4053 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
4054 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
4055 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
4056 }
4057 fprintf(outf, ")\n");
4058
4059 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
4060 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
4061 if ((msr) & 0x3)
4062 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
4063 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
4064 fprintf(outf, ")\n");
4065
4066 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
4067 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
4068 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
4069 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
4070 fprintf(outf, ")\n");
4071 }
4072
4073 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
4074
4075 void print_irtl(void)
4076 {
4077 unsigned long long msr;
4078
4079 if (!platform->has_irtl_msrs || no_msr)
4080 return;
4081
4082 if (platform->supported_cstates & PC3) {
4083 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
4084 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
4085 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
4086 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
4087 }
4088
4089 if (platform->supported_cstates & PC6) {
4090 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
4091 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
4092 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
4093 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
4094 }
4095
4096 if (platform->supported_cstates & PC7) {
4097 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
4098 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
4099 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
4100 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
4101 }
4102
4103 if (platform->supported_cstates & PC8) {
4104 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
4105 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
4106 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
4107 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
4108 }
4109
4110 if (platform->supported_cstates & PC9) {
4111 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
4112 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
4113 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
4114 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
4115 }
4116
4117 if (platform->supported_cstates & PC10) {
4118 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
4119 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
4120 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
4121 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
4122 }
4123 }
4124
4125 void free_fd_percpu(void)
4126 {
4127 int i;
4128
4129 if (!fd_percpu)
4130 return;
4131
4132 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
4133 if (fd_percpu[i] != 0)
4134 close(fd_percpu[i]);
4135 }
4136
4137 free(fd_percpu);
4138 fd_percpu = NULL;
4139 }
4140
4141 void free_fd_amperf_percpu(void)
4142 {
4143 int i;
4144
4145 if (!fd_amperf_percpu)
4146 return;
4147
4148 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
4149 if (fd_amperf_percpu[i].mperf != 0)
4150 close(fd_amperf_percpu[i].mperf);
4151
4152 if (fd_amperf_percpu[i].aperf != 0)
4153 close(fd_amperf_percpu[i].aperf);
4154 }
4155
4156 free(fd_amperf_percpu);
4157 fd_amperf_percpu = NULL;
4158 }
4159
4160 void free_fd_instr_count_percpu(void)
4161 {
4162 if (!fd_instr_count_percpu)
4163 return;
4164
4165 for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
4166 if (fd_instr_count_percpu[i] != 0)
4167 close(fd_instr_count_percpu[i]);
4168 }
4169
4170 free(fd_instr_count_percpu);
4171 fd_instr_count_percpu = NULL;
4172 }
4173
4174 void free_fd_rapl_percpu(void)
4175 {
4176 if (!rapl_counter_info_perdomain)
4177 return;
4178
4179 const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
4180
4181 for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
4182 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
4183 close(rapl_counter_info_perdomain[domain_id].fd_perf);
4184 }
4185
4186 free(rapl_counter_info_perdomain);
4187 }
4188
4189 void free_all_buffers(void)
4190 {
4191 int i;
4192
4193 CPU_FREE(cpu_present_set);
4194 cpu_present_set = NULL;
4195 cpu_present_setsize = 0;
4196
4197 CPU_FREE(cpu_effective_set);
4198 cpu_effective_set = NULL;
4199 cpu_effective_setsize = 0;
4200
4201 CPU_FREE(cpu_allowed_set);
4202 cpu_allowed_set = NULL;
4203 cpu_allowed_setsize = 0;
4204
4205 CPU_FREE(cpu_affinity_set);
4206 cpu_affinity_set = NULL;
4207 cpu_affinity_setsize = 0;
4208
4209 free(thread_even);
4210 free(core_even);
4211 free(package_even);
4212
4213 thread_even = NULL;
4214 core_even = NULL;
4215 package_even = NULL;
4216
4217 free(thread_odd);
4218 free(core_odd);
4219 free(package_odd);
4220
4221 thread_odd = NULL;
4222 core_odd = NULL;
4223 package_odd = NULL;
4224
4225 free(output_buffer);
4226 output_buffer = NULL;
4227 outp = NULL;
4228
4229 free_fd_percpu();
4230 free_fd_instr_count_percpu();
4231 free_fd_amperf_percpu();
4232 free_fd_rapl_percpu();
4233
4234 free(irq_column_2_cpu);
4235 free(irqs_per_cpu);
4236
4237 for (i = 0; i <= topo.max_cpu_num; ++i) {
4238 if (cpus[i].put_ids)
4239 CPU_FREE(cpus[i].put_ids);
4240 }
4241 free(cpus);
4242 }
4243
4244 /*
4245 * Parse a file containing a single int.
4246 * Return 0 if file can not be opened
4247 * Exit if file can be opened, but can not be parsed
4248 */
4249 int parse_int_file(const char *fmt, ...)
4250 {
4251 va_list args;
4252 char path[PATH_MAX];
4253 FILE *filep;
4254 int value;
4255
4256 va_start(args, fmt);
4257 vsnprintf(path, sizeof(path), fmt, args);
4258 va_end(args);
4259 filep = fopen(path, "r");
4260 if (!filep)
4261 return 0;
4262 if (fscanf(filep, "%d", &value) != 1)
4263 err(1, "%s: failed to parse number from file", path);
4264 fclose(filep);
4265 return value;
4266 }
4267
4268 /*
4269 * cpu_is_first_core_in_package(cpu)
4270 * return 1 if given CPU is 1st core in package
4271 */
4272 int cpu_is_first_core_in_package(int cpu)
4273 {
4274 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
4275 }
4276
4277 int get_physical_package_id(int cpu)
4278 {
4279 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
4280 }
4281
4282 int get_die_id(int cpu)
4283 {
4284 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
4285 }
4286
4287 int get_core_id(int cpu)
4288 {
4289 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
4290 }
4291
4292 void set_node_data(void)
4293 {
4294 int pkg, node, lnode, cpu, cpux;
4295 int cpu_count;
4296
4297 /* initialize logical_node_id */
4298 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
4299 cpus[cpu].logical_node_id = -1;
4300
4301 cpu_count = 0;
4302 for (pkg = 0; pkg < topo.num_packages; pkg++) {
4303 lnode = 0;
4304 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
4305 if (cpus[cpu].physical_package_id != pkg)
4306 continue;
4307 /* find a cpu with an unset logical_node_id */
4308 if (cpus[cpu].logical_node_id != -1)
4309 continue;
4310 cpus[cpu].logical_node_id = lnode;
4311 node = cpus[cpu].physical_node_id;
4312 cpu_count++;
4313 /*
4314 * find all matching cpus on this pkg and set
4315 * the logical_node_id
4316 */
4317 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
4318 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
4319 cpus[cpux].logical_node_id = lnode;
4320 cpu_count++;
4321 }
4322 }
4323 lnode++;
4324 if (lnode > topo.nodes_per_pkg)
4325 topo.nodes_per_pkg = lnode;
4326 }
4327 if (cpu_count >= topo.max_cpu_num)
4328 break;
4329 }
4330 }
4331
4332 int get_physical_node_id(struct cpu_topology *thiscpu)
4333 {
4334 char path[80];
4335 FILE *filep;
4336 int i;
4337 int cpu = thiscpu->logical_cpu_id;
4338
4339 for (i = 0; i <= topo.max_cpu_num; i++) {
4340 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
4341 filep = fopen(path, "r");
4342 if (!filep)
4343 continue;
4344 fclose(filep);
4345 return i;
4346 }
4347 return -1;
4348 }
4349
4350 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
4351 {
4352 unsigned int start, end;
4353 char *next = cpu_str;
4354
4355 while (next && *next) {
4356
4357 if (*next == '-') /* no negative cpu numbers */
4358 return 1;
4359
4360 start = strtoul(next, &next, 10);
4361
4362 if (start >= CPU_SUBSET_MAXCPUS)
4363 return 1;
4364 CPU_SET_S(start, cpu_set_size, cpu_set);
4365
4366 if (*next == '\0' || *next == '\n')
4367 break;
4368
4369 if (*next == ',') {
4370 next += 1;
4371 continue;
4372 }
4373
4374 if (*next == '-') {
4375 next += 1; /* start range */
4376 } else if (*next == '.') {
4377 next += 1;
4378 if (*next == '.')
4379 next += 1; /* start range */
4380 else
4381 return 1;
4382 }
4383
4384 end = strtoul(next, &next, 10);
4385 if (end <= start)
4386 return 1;
4387
4388 while (++start <= end) {
4389 if (start >= CPU_SUBSET_MAXCPUS)
4390 return 1;
4391 CPU_SET_S(start, cpu_set_size, cpu_set);
4392 }
4393
4394 if (*next == ',')
4395 next += 1;
4396 else if (*next != '\0' && *next != '\n')
4397 return 1;
4398 }
4399
4400 return 0;
4401 }
4402
4403 int get_thread_siblings(struct cpu_topology *thiscpu)
4404 {
4405 char path[80], character;
4406 FILE *filep;
4407 unsigned long map;
4408 int so, shift, sib_core;
4409 int cpu = thiscpu->logical_cpu_id;
4410 int offset = topo.max_cpu_num + 1;
4411 size_t size;
4412 int thread_id = 0;
4413
4414 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
4415 if (thiscpu->thread_id < 0)
4416 thiscpu->thread_id = thread_id++;
4417 if (!thiscpu->put_ids)
4418 return -1;
4419
4420 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4421 CPU_ZERO_S(size, thiscpu->put_ids);
4422
4423 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
4424 filep = fopen(path, "r");
4425
4426 if (!filep) {
4427 warnx("%s: open failed", path);
4428 return -1;
4429 }
4430 do {
4431 offset -= BITMASK_SIZE;
4432 if (fscanf(filep, "%lx%c", &map, &character) != 2)
4433 err(1, "%s: failed to parse file", path);
4434 for (shift = 0; shift < BITMASK_SIZE; shift++) {
4435 if ((map >> shift) & 0x1) {
4436 so = shift + offset;
4437 sib_core = get_core_id(so);
4438 if (sib_core == thiscpu->physical_core_id) {
4439 CPU_SET_S(so, size, thiscpu->put_ids);
4440 if ((so != cpu) && (cpus[so].thread_id < 0))
4441 cpus[so].thread_id = thread_id++;
4442 }
4443 }
4444 }
4445 } while (character == ',');
4446 fclose(filep);
4447
4448 return CPU_COUNT_S(size, thiscpu->put_ids);
4449 }
4450
4451 /*
4452 * run func(thread, core, package) in topology order
4453 * skip non-present cpus
4454 */
4455
4456 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
4457 struct pkg_data *, struct thread_data *, struct core_data *,
4458 struct pkg_data *), struct thread_data *thread_base,
4459 struct core_data *core_base, struct pkg_data *pkg_base,
4460 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
4461 {
4462 int retval, pkg_no, node_no, core_no, thread_no;
4463
4464 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
4465 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
4466 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
4467 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
4468 struct thread_data *t, *t2;
4469 struct core_data *c, *c2;
4470 struct pkg_data *p, *p2;
4471
4472 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
4473
4474 if (cpu_is_not_allowed(t->cpu_id))
4475 continue;
4476
4477 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
4478
4479 c = GET_CORE(core_base, core_no, node_no, pkg_no);
4480 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
4481
4482 p = GET_PKG(pkg_base, pkg_no);
4483 p2 = GET_PKG(pkg_base2, pkg_no);
4484
4485 retval = func(t, c, p, t2, c2, p2);
4486 if (retval)
4487 return retval;
4488 }
4489 }
4490 }
4491 }
4492 return 0;
4493 }
4494
4495 /*
4496 * run func(cpu) on every cpu in /proc/stat
4497 * return max_cpu number
4498 */
4499 int for_all_proc_cpus(int (func) (int))
4500 {
4501 FILE *fp;
4502 int cpu_num;
4503 int retval;
4504
4505 fp = fopen_or_die(proc_stat, "r");
4506
4507 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
4508 if (retval != 0)
4509 err(1, "%s: failed to parse format", proc_stat);
4510
4511 while (1) {
4512 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
4513 if (retval != 1)
4514 break;
4515
4516 retval = func(cpu_num);
4517 if (retval) {
4518 fclose(fp);
4519 return (retval);
4520 }
4521 }
4522 fclose(fp);
4523 return 0;
4524 }
4525
4526 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
4527
4528 static char cpu_effective_str[1024];
4529
4530 static int update_effective_str(bool startup)
4531 {
4532 FILE *fp;
4533 char *pos;
4534 char buf[1024];
4535 int ret;
4536
4537 if (cpu_effective_str[0] == '\0' && !startup)
4538 return 0;
4539
4540 fp = fopen(PATH_EFFECTIVE_CPUS, "r");
4541 if (!fp)
4542 return 0;
4543
4544 pos = fgets(buf, 1024, fp);
4545 if (!pos)
4546 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
4547
4548 fclose(fp);
4549
4550 ret = strncmp(cpu_effective_str, buf, 1024);
4551 if (!ret)
4552 return 0;
4553
4554 strncpy(cpu_effective_str, buf, 1024);
4555 return 1;
4556 }
4557
4558 static void update_effective_set(bool startup)
4559 {
4560 update_effective_str(startup);
4561
4562 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
4563 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
4564 }
4565
4566 void linux_perf_init(void);
4567 void rapl_perf_init(void);
4568
4569 void re_initialize(void)
4570 {
4571 free_all_buffers();
4572 setup_all_buffers(false);
4573 linux_perf_init();
4574 rapl_perf_init();
4575 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
4576 topo.allowed_cpus);
4577 }
4578
4579 void set_max_cpu_num(void)
4580 {
4581 FILE *filep;
4582 int base_cpu;
4583 unsigned long dummy;
4584 char pathname[64];
4585
4586 base_cpu = sched_getcpu();
4587 if (base_cpu < 0)
4588 err(1, "cannot find calling cpu ID");
4589 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
4590
4591 filep = fopen_or_die(pathname, "r");
4592 topo.max_cpu_num = 0;
4593 while (fscanf(filep, "%lx,", &dummy) == 1)
4594 topo.max_cpu_num += BITMASK_SIZE;
4595 fclose(filep);
4596 topo.max_cpu_num--; /* 0 based */
4597 }
4598
4599 /*
4600 * count_cpus()
4601 * remember the last one seen, it will be the max
4602 */
4603 int count_cpus(int cpu)
4604 {
4605 UNUSED(cpu);
4606
4607 topo.num_cpus++;
4608 return 0;
4609 }
4610
4611 int mark_cpu_present(int cpu)
4612 {
4613 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
4614 return 0;
4615 }
4616
4617 int init_thread_id(int cpu)
4618 {
4619 cpus[cpu].thread_id = -1;
4620 return 0;
4621 }
4622
4623 /*
4624 * snapshot_proc_interrupts()
4625 *
4626 * read and record summary of /proc/interrupts
4627 *
4628 * return 1 if config change requires a restart, else return 0
4629 */
4630 int snapshot_proc_interrupts(void)
4631 {
4632 static FILE *fp;
4633 int column, retval;
4634
4635 if (fp == NULL)
4636 fp = fopen_or_die("/proc/interrupts", "r");
4637 else
4638 rewind(fp);
4639
4640 /* read 1st line of /proc/interrupts to get cpu* name for each column */
4641 for (column = 0; column < topo.num_cpus; ++column) {
4642 int cpu_number;
4643
4644 retval = fscanf(fp, " CPU%d", &cpu_number);
4645 if (retval != 1)
4646 break;
4647
4648 if (cpu_number > topo.max_cpu_num) {
4649 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
4650 return 1;
4651 }
4652
4653 irq_column_2_cpu[column] = cpu_number;
4654 irqs_per_cpu[cpu_number] = 0;
4655 }
4656
4657 /* read /proc/interrupt count lines and sum up irqs per cpu */
4658 while (1) {
4659 int column;
4660 char buf[64];
4661
4662 retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
4663 if (retval != 1)
4664 break;
4665
4666 /* read the count per cpu */
4667 for (column = 0; column < topo.num_cpus; ++column) {
4668
4669 int cpu_number, irq_count;
4670
4671 retval = fscanf(fp, " %d", &irq_count);
4672 if (retval != 1)
4673 break;
4674
4675 cpu_number = irq_column_2_cpu[column];
4676 irqs_per_cpu[cpu_number] += irq_count;
4677
4678 }
4679
4680 while (getc(fp) != '\n') ; /* flush interrupt description */
4681
4682 }
4683 return 0;
4684 }
4685
4686 /*
4687 * snapshot_graphics()
4688 *
4689 * record snapshot of specified graphics sysfs knob
4690 *
4691 * return 1 if config change requires a restart, else return 0
4692 */
4693 int snapshot_graphics(int idx)
4694 {
4695 FILE *fp;
4696 int retval;
4697
4698 switch (idx) {
4699 case GFX_rc6:
4700 case SAM_mc6:
4701 fp = fopen_or_die(gfx_info[idx].path, "r");
4702 retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
4703 if (retval != 1)
4704 err(1, "rc6");
4705 fclose(fp);
4706 return 0;
4707 case GFX_MHz:
4708 case GFX_ACTMHz:
4709 case SAM_MHz:
4710 case SAM_ACTMHz:
4711 if (gfx_info[idx].fp == NULL) {
4712 gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
4713 } else {
4714 rewind(gfx_info[idx].fp);
4715 fflush(gfx_info[idx].fp);
4716 }
4717 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
4718 if (retval != 1)
4719 err(1, "MHz");
4720 return 0;
4721 default:
4722 return -EINVAL;
4723 }
4724 }
4725
4726 /*
4727 * snapshot_cpu_lpi()
4728 *
4729 * record snapshot of
4730 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
4731 */
4732 int snapshot_cpu_lpi_us(void)
4733 {
4734 FILE *fp;
4735 int retval;
4736
4737 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
4738
4739 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
4740 if (retval != 1) {
4741 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
4742 BIC_NOT_PRESENT(BIC_CPU_LPI);
4743 fclose(fp);
4744 return -1;
4745 }
4746
4747 fclose(fp);
4748
4749 return 0;
4750 }
4751
4752 /*
4753 * snapshot_sys_lpi()
4754 *
4755 * record snapshot of sys_lpi_file
4756 */
4757 int snapshot_sys_lpi_us(void)
4758 {
4759 FILE *fp;
4760 int retval;
4761
4762 fp = fopen_or_die(sys_lpi_file, "r");
4763
4764 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
4765 if (retval != 1) {
4766 fprintf(stderr, "Disabling Low Power Idle System output\n");
4767 BIC_NOT_PRESENT(BIC_SYS_LPI);
4768 fclose(fp);
4769 return -1;
4770 }
4771 fclose(fp);
4772
4773 return 0;
4774 }
4775
4776 /*
4777 * snapshot /proc and /sys files
4778 *
4779 * return 1 if configuration restart needed, else return 0
4780 */
4781 int snapshot_proc_sysfs_files(void)
4782 {
4783 if (DO_BIC(BIC_IRQ))
4784 if (snapshot_proc_interrupts())
4785 return 1;
4786
4787 if (DO_BIC(BIC_GFX_rc6))
4788 snapshot_graphics(GFX_rc6);
4789
4790 if (DO_BIC(BIC_GFXMHz))
4791 snapshot_graphics(GFX_MHz);
4792
4793 if (DO_BIC(BIC_GFXACTMHz))
4794 snapshot_graphics(GFX_ACTMHz);
4795
4796 if (DO_BIC(BIC_SAM_mc6))
4797 snapshot_graphics(SAM_mc6);
4798
4799 if (DO_BIC(BIC_SAMMHz))
4800 snapshot_graphics(SAM_MHz);
4801
4802 if (DO_BIC(BIC_SAMACTMHz))
4803 snapshot_graphics(SAM_ACTMHz);
4804
4805 if (DO_BIC(BIC_CPU_LPI))
4806 snapshot_cpu_lpi_us();
4807
4808 if (DO_BIC(BIC_SYS_LPI))
4809 snapshot_sys_lpi_us();
4810
4811 return 0;
4812 }
4813
4814 int exit_requested;
4815
4816 static void signal_handler(int signal)
4817 {
4818 switch (signal) {
4819 case SIGINT:
4820 exit_requested = 1;
4821 if (debug)
4822 fprintf(stderr, " SIGINT\n");
4823 break;
4824 case SIGUSR1:
4825 if (debug > 1)
4826 fprintf(stderr, "SIGUSR1\n");
4827 break;
4828 }
4829 }
4830
4831 void setup_signal_handler(void)
4832 {
4833 struct sigaction sa;
4834
4835 memset(&sa, 0, sizeof(sa));
4836
4837 sa.sa_handler = &signal_handler;
4838
4839 if (sigaction(SIGINT, &sa, NULL) < 0)
4840 err(1, "sigaction SIGINT");
4841 if (sigaction(SIGUSR1, &sa, NULL) < 0)
4842 err(1, "sigaction SIGUSR1");
4843 }
4844
4845 void do_sleep(void)
4846 {
4847 struct timeval tout;
4848 struct timespec rest;
4849 fd_set readfds;
4850 int retval;
4851
4852 FD_ZERO(&readfds);
4853 FD_SET(0, &readfds);
4854
4855 if (ignore_stdin) {
4856 nanosleep(&interval_ts, NULL);
4857 return;
4858 }
4859
4860 tout = interval_tv;
4861 retval = select(1, &readfds, NULL, NULL, &tout);
4862
4863 if (retval == 1) {
4864 switch (getc(stdin)) {
4865 case 'q':
4866 exit_requested = 1;
4867 break;
4868 case EOF:
4869 /*
4870 * 'stdin' is a pipe closed on the other end. There
4871 * won't be any further input.
4872 */
4873 ignore_stdin = 1;
4874 /* Sleep the rest of the time */
4875 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
4876 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
4877 nanosleep(&rest, NULL);
4878 }
4879 }
4880 }
4881
4882 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
4883 {
4884 int ret, idx;
4885 unsigned long long msr_cur, msr_last;
4886
4887 assert(!no_msr);
4888
4889 if (!per_cpu_msr_sum)
4890 return 1;
4891
4892 idx = offset_to_idx(offset);
4893 if (idx < 0)
4894 return idx;
4895 /* get_msr_sum() = sum + (get_msr() - last) */
4896 ret = get_msr(cpu, offset, &msr_cur);
4897 if (ret)
4898 return ret;
4899 msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
4900 DELTA_WRAP32(msr_cur, msr_last);
4901 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
4902
4903 return 0;
4904 }
4905
4906 timer_t timerid;
4907
4908 /* Timer callback, update the sum of MSRs periodically. */
4909 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4910 {
4911 int i, ret;
4912 int cpu = t->cpu_id;
4913
4914 UNUSED(c);
4915 UNUSED(p);
4916
4917 assert(!no_msr);
4918
4919 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
4920 unsigned long long msr_cur, msr_last;
4921 off_t offset;
4922
4923 if (!idx_valid(i))
4924 continue;
4925 offset = idx_to_offset(i);
4926 if (offset < 0)
4927 continue;
4928 ret = get_msr(cpu, offset, &msr_cur);
4929 if (ret) {
4930 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
4931 continue;
4932 }
4933
4934 msr_last = per_cpu_msr_sum[cpu].entries[i].last;
4935 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
4936
4937 DELTA_WRAP32(msr_cur, msr_last);
4938 per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
4939 }
4940 return 0;
4941 }
4942
4943 static void msr_record_handler(union sigval v)
4944 {
4945 UNUSED(v);
4946
4947 for_all_cpus(update_msr_sum, EVEN_COUNTERS);
4948 }
4949
4950 void msr_sum_record(void)
4951 {
4952 struct itimerspec its;
4953 struct sigevent sev;
4954
4955 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
4956 if (!per_cpu_msr_sum) {
4957 fprintf(outf, "Can not allocate memory for long time MSR.\n");
4958 return;
4959 }
4960 /*
4961 * Signal handler might be restricted, so use thread notifier instead.
4962 */
4963 memset(&sev, 0, sizeof(struct sigevent));
4964 sev.sigev_notify = SIGEV_THREAD;
4965 sev.sigev_notify_function = msr_record_handler;
4966
4967 sev.sigev_value.sival_ptr = &timerid;
4968 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
4969 fprintf(outf, "Can not create timer.\n");
4970 goto release_msr;
4971 }
4972
4973 its.it_value.tv_sec = 0;
4974 its.it_value.tv_nsec = 1;
4975 /*
4976 * A wraparound time has been calculated early.
4977 * Some sources state that the peak power for a
4978 * microprocessor is usually 1.5 times the TDP rating,
4979 * use 2 * TDP for safety.
4980 */
4981 its.it_interval.tv_sec = rapl_joule_counter_range / 2;
4982 its.it_interval.tv_nsec = 0;
4983
4984 if (timer_settime(timerid, 0, &its, NULL) == -1) {
4985 fprintf(outf, "Can not set timer.\n");
4986 goto release_timer;
4987 }
4988 return;
4989
4990 release_timer:
4991 timer_delete(timerid);
4992 release_msr:
4993 free(per_cpu_msr_sum);
4994 }
4995
4996 /*
4997 * set_my_sched_priority(pri)
4998 * return previous priority on success
4999 * return value < -20 on failure
5000 */
5001 int set_my_sched_priority(int priority)
5002 {
5003 int retval;
5004 int original_priority;
5005
5006 errno = 0;
5007 original_priority = getpriority(PRIO_PROCESS, 0);
5008 if (errno && (original_priority == -1))
5009 return -21;
5010
5011 retval = setpriority(PRIO_PROCESS, 0, priority);
5012 if (retval)
5013 return -21;
5014
5015 errno = 0;
5016 retval = getpriority(PRIO_PROCESS, 0);
5017 if (retval != priority)
5018 return -21;
5019
5020 return original_priority;
5021 }
5022
5023 void turbostat_loop()
5024 {
5025 int retval;
5026 int restarted = 0;
5027 unsigned int done_iters = 0;
5028
5029 setup_signal_handler();
5030
5031 /*
5032 * elevate own priority for interval mode
5033 *
5034 * ignore on error - we probably don't have permission to set it, but
5035 * it's not a big deal
5036 */
5037 set_my_sched_priority(-20);
5038
5039 restart:
5040 restarted++;
5041
5042 snapshot_proc_sysfs_files();
5043 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
5044 first_counter_read = 0;
5045 if (retval < -1) {
5046 exit(retval);
5047 } else if (retval == -1) {
5048 if (restarted > 10) {
5049 exit(retval);
5050 }
5051 re_initialize();
5052 goto restart;
5053 }
5054 restarted = 0;
5055 done_iters = 0;
5056 gettimeofday(&tv_even, (struct timezone *)NULL);
5057
5058 while (1) {
5059 if (for_all_proc_cpus(cpu_is_not_present)) {
5060 re_initialize();
5061 goto restart;
5062 }
5063 if (update_effective_str(false)) {
5064 re_initialize();
5065 goto restart;
5066 }
5067 do_sleep();
5068 if (snapshot_proc_sysfs_files())
5069 goto restart;
5070 retval = for_all_cpus(get_counters, ODD_COUNTERS);
5071 if (retval < -1) {
5072 exit(retval);
5073 } else if (retval == -1) {
5074 re_initialize();
5075 goto restart;
5076 }
5077 gettimeofday(&tv_odd, (struct timezone *)NULL);
5078 timersub(&tv_odd, &tv_even, &tv_delta);
5079 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
5080 re_initialize();
5081 goto restart;
5082 }
5083 compute_average(EVEN_COUNTERS);
5084 format_all_counters(EVEN_COUNTERS);
5085 flush_output_stdout();
5086 if (exit_requested)
5087 break;
5088 if (num_iterations && ++done_iters >= num_iterations)
5089 break;
5090 do_sleep();
5091 if (snapshot_proc_sysfs_files())
5092 goto restart;
5093 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
5094 if (retval < -1) {
5095 exit(retval);
5096 } else if (retval == -1) {
5097 re_initialize();
5098 goto restart;
5099 }
5100 gettimeofday(&tv_even, (struct timezone *)NULL);
5101 timersub(&tv_even, &tv_odd, &tv_delta);
5102 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
5103 re_initialize();
5104 goto restart;
5105 }
5106 compute_average(ODD_COUNTERS);
5107 format_all_counters(ODD_COUNTERS);
5108 flush_output_stdout();
5109 if (exit_requested)
5110 break;
5111 if (num_iterations && ++done_iters >= num_iterations)
5112 break;
5113 }
5114 }
5115
5116 void check_dev_msr()
5117 {
5118 struct stat sb;
5119 char pathname[32];
5120
5121 if (no_msr)
5122 return;
5123
5124 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
5125 if (stat(pathname, &sb))
5126 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
5127 no_msr = 1;
5128 }
5129
5130 /*
5131 * check for CAP_SYS_RAWIO
5132 * return 0 on success
5133 * return 1 on fail
5134 */
5135 int check_for_cap_sys_rawio(void)
5136 {
5137 cap_t caps;
5138 cap_flag_value_t cap_flag_value;
5139 int ret = 0;
5140
5141 caps = cap_get_proc();
5142 if (caps == NULL)
5143 return 1;
5144
5145 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
5146 ret = 1;
5147 goto free_and_exit;
5148 }
5149
5150 if (cap_flag_value != CAP_SET) {
5151 ret = 1;
5152 goto free_and_exit;
5153 }
5154
5155 free_and_exit:
5156 if (cap_free(caps) == -1)
5157 err(-6, "cap_free\n");
5158
5159 return ret;
5160 }
5161
5162 void check_msr_permission(void)
5163 {
5164 int failed = 0;
5165 char pathname[32];
5166
5167 if (no_msr)
5168 return;
5169
5170 /* check for CAP_SYS_RAWIO */
5171 failed += check_for_cap_sys_rawio();
5172
5173 /* test file permissions */
5174 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
5175 if (euidaccess(pathname, R_OK)) {
5176 failed++;
5177 }
5178
5179 if (failed) {
5180 warnx("Failed to access %s. Some of the counters may not be available\n"
5181 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
5182 no_msr = 1;
5183 }
5184 }
5185
5186 void probe_bclk(void)
5187 {
5188 unsigned long long msr;
5189 unsigned int base_ratio;
5190
5191 if (!platform->has_nhm_msrs || no_msr)
5192 return;
5193
5194 if (platform->bclk_freq == BCLK_100MHZ)
5195 bclk = 100.00;
5196 else if (platform->bclk_freq == BCLK_133MHZ)
5197 bclk = 133.33;
5198 else if (platform->bclk_freq == BCLK_SLV)
5199 bclk = slm_bclk();
5200 else
5201 return;
5202
5203 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
5204 base_ratio = (msr >> 8) & 0xFF;
5205
5206 base_hz = base_ratio * bclk * 1000000;
5207 has_base_hz = 1;
5208
5209 if (platform->enable_tsc_tweak)
5210 tsc_tweak = base_hz / tsc_hz;
5211 }
5212
5213 static void remove_underbar(char *s)
5214 {
5215 char *to = s;
5216
5217 while (*s) {
5218 if (*s != '_')
5219 *to++ = *s;
5220 s++;
5221 }
5222
5223 *to = 0;
5224 }
5225
5226 static void dump_turbo_ratio_info(void)
5227 {
5228 if (!has_turbo)
5229 return;
5230
5231 if (!platform->has_nhm_msrs || no_msr)
5232 return;
5233
5234 if (platform->trl_msrs & TRL_LIMIT2)
5235 dump_turbo_ratio_limit2();
5236
5237 if (platform->trl_msrs & TRL_LIMIT1)
5238 dump_turbo_ratio_limit1();
5239
5240 if (platform->trl_msrs & TRL_BASE) {
5241 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
5242
5243 if (is_hybrid)
5244 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
5245 }
5246
5247 if (platform->trl_msrs & TRL_ATOM)
5248 dump_atom_turbo_ratio_limits();
5249
5250 if (platform->trl_msrs & TRL_KNL)
5251 dump_knl_turbo_ratio_limits();
5252
5253 if (platform->has_config_tdp)
5254 dump_config_tdp();
5255 }
5256
5257 static int read_sysfs_int(char *path)
5258 {
5259 FILE *input;
5260 int retval = -1;
5261
5262 input = fopen(path, "r");
5263 if (input == NULL) {
5264 if (debug)
5265 fprintf(outf, "NSFOD %s\n", path);
5266 return (-1);
5267 }
5268 if (fscanf(input, "%d", &retval) != 1)
5269 err(1, "%s: failed to read int from file", path);
5270 fclose(input);
5271
5272 return (retval);
5273 }
5274
5275 static void dump_sysfs_file(char *path)
5276 {
5277 FILE *input;
5278 char cpuidle_buf[64];
5279
5280 input = fopen(path, "r");
5281 if (input == NULL) {
5282 if (debug)
5283 fprintf(outf, "NSFOD %s\n", path);
5284 return;
5285 }
5286 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
5287 err(1, "%s: failed to read file", path);
5288 fclose(input);
5289
5290 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
5291 }
5292
5293 static void probe_intel_uncore_frequency(void)
5294 {
5295 int i, j;
5296 char path[256];
5297
5298 if (!genuine_intel)
5299 return;
5300
5301 if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
5302 goto probe_cluster;
5303
5304 BIC_PRESENT(BIC_UNCORE_MHZ);
5305
5306 if (quiet)
5307 return;
5308
5309 for (i = 0; i < topo.num_packages; ++i) {
5310 for (j = 0; j < topo.num_die; ++j) {
5311 int k, l;
5312 char path_base[128];
5313
5314 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
5315 j);
5316
5317 sprintf(path, "%s/min_freq_khz", path_base);
5318 k = read_sysfs_int(path);
5319 sprintf(path, "%s/max_freq_khz", path_base);
5320 l = read_sysfs_int(path);
5321 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
5322
5323 sprintf(path, "%s/initial_min_freq_khz", path_base);
5324 k = read_sysfs_int(path);
5325 sprintf(path, "%s/initial_max_freq_khz", path_base);
5326 l = read_sysfs_int(path);
5327 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
5328
5329 sprintf(path, "%s/current_freq_khz", path_base);
5330 k = read_sysfs_int(path);
5331 fprintf(outf, " %d MHz\n", k / 1000);
5332 }
5333 }
5334 return;
5335
5336 probe_cluster:
5337 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
5338 return;
5339
5340 if (quiet)
5341 return;
5342
5343 for (i = 0;; ++i) {
5344 int k, l;
5345 char path_base[128];
5346 int package_id, domain_id, cluster_id;
5347
5348 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
5349
5350 if (access(path_base, R_OK))
5351 break;
5352
5353 sprintf(path, "%s/package_id", path_base);
5354 package_id = read_sysfs_int(path);
5355
5356 sprintf(path, "%s/domain_id", path_base);
5357 domain_id = read_sysfs_int(path);
5358
5359 sprintf(path, "%s/fabric_cluster_id", path_base);
5360 cluster_id = read_sysfs_int(path);
5361
5362 sprintf(path, "%s/min_freq_khz", path_base);
5363 k = read_sysfs_int(path);
5364 sprintf(path, "%s/max_freq_khz", path_base);
5365 l = read_sysfs_int(path);
5366 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
5367 cluster_id, k / 1000, l / 1000);
5368
5369 sprintf(path, "%s/initial_min_freq_khz", path_base);
5370 k = read_sysfs_int(path);
5371 sprintf(path, "%s/initial_max_freq_khz", path_base);
5372 l = read_sysfs_int(path);
5373 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
5374
5375 sprintf(path, "%s/current_freq_khz", path_base);
5376 k = read_sysfs_int(path);
5377 fprintf(outf, " %d MHz\n", k / 1000);
5378 }
5379 }
5380
5381 static void probe_graphics(void)
5382 {
5383 /* Xe graphics sysfs knobs */
5384 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
5385 FILE *fp;
5386 char buf[8];
5387 bool gt0_is_gt;
5388 int idx;
5389
5390 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
5391 if (!fp)
5392 goto next;
5393
5394 if (!fread(buf, sizeof(char), 7, fp)) {
5395 fclose(fp);
5396 goto next;
5397 }
5398 fclose(fp);
5399
5400 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
5401 gt0_is_gt = true;
5402 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
5403 gt0_is_gt = false;
5404 else
5405 goto next;
5406
5407 idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
5408 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
5409
5410 idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
5411 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
5412 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
5413
5414 idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
5415 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
5416 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
5417
5418 idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
5419 if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
5420 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
5421
5422 idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
5423 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
5424 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
5425
5426 idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
5427 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
5428 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
5429
5430 goto end;
5431 }
5432
5433 next:
5434 /* New i915 graphics sysfs knobs */
5435 if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
5436 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
5437
5438 if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
5439 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
5440
5441 if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
5442 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
5443
5444 if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
5445 gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
5446
5447 if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
5448 gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
5449
5450 if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
5451 gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
5452
5453 goto end;
5454 }
5455
5456 /* Fall back to traditional i915 graphics sysfs knobs */
5457 if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
5458 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
5459
5460 if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
5461 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
5462 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
5463 gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
5464
5465
5466 if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
5467 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
5468 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
5469 gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
5470
5471 end:
5472 if (gfx_info[GFX_rc6].path)
5473 BIC_PRESENT(BIC_GFX_rc6);
5474 if (gfx_info[GFX_MHz].path)
5475 BIC_PRESENT(BIC_GFXMHz);
5476 if (gfx_info[GFX_ACTMHz].path)
5477 BIC_PRESENT(BIC_GFXACTMHz);
5478 if (gfx_info[SAM_mc6].path)
5479 BIC_PRESENT(BIC_SAM_mc6);
5480 if (gfx_info[SAM_MHz].path)
5481 BIC_PRESENT(BIC_SAMMHz);
5482 if (gfx_info[SAM_ACTMHz].path)
5483 BIC_PRESENT(BIC_SAMACTMHz);
5484 }
5485
5486 static void dump_sysfs_cstate_config(void)
5487 {
5488 char path[64];
5489 char name_buf[16];
5490 char desc[64];
5491 FILE *input;
5492 int state;
5493 char *sp;
5494
5495 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
5496 fprintf(outf, "cpuidle not loaded\n");
5497 return;
5498 }
5499
5500 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
5501 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
5502 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
5503
5504 for (state = 0; state < 10; ++state) {
5505
5506 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
5507 input = fopen(path, "r");
5508 if (input == NULL)
5509 continue;
5510 if (!fgets(name_buf, sizeof(name_buf), input))
5511 err(1, "%s: failed to read file", path);
5512
5513 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5514 sp = strchr(name_buf, '-');
5515 if (!sp)
5516 sp = strchrnul(name_buf, '\n');
5517 *sp = '\0';
5518 fclose(input);
5519
5520 remove_underbar(name_buf);
5521
5522 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
5523 input = fopen(path, "r");
5524 if (input == NULL)
5525 continue;
5526 if (!fgets(desc, sizeof(desc), input))
5527 err(1, "%s: failed to read file", path);
5528
5529 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
5530 fclose(input);
5531 }
5532 }
5533
5534 static void dump_sysfs_pstate_config(void)
5535 {
5536 char path[64];
5537 char driver_buf[64];
5538 char governor_buf[64];
5539 FILE *input;
5540 int turbo;
5541
5542 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
5543 input = fopen(path, "r");
5544 if (input == NULL) {
5545 fprintf(outf, "NSFOD %s\n", path);
5546 return;
5547 }
5548 if (!fgets(driver_buf, sizeof(driver_buf), input))
5549 err(1, "%s: failed to read file", path);
5550 fclose(input);
5551
5552 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
5553 input = fopen(path, "r");
5554 if (input == NULL) {
5555 fprintf(outf, "NSFOD %s\n", path);
5556 return;
5557 }
5558 if (!fgets(governor_buf, sizeof(governor_buf), input))
5559 err(1, "%s: failed to read file", path);
5560 fclose(input);
5561
5562 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
5563 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
5564
5565 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
5566 input = fopen(path, "r");
5567 if (input != NULL) {
5568 if (fscanf(input, "%d", &turbo) != 1)
5569 err(1, "%s: failed to parse number from file", path);
5570 fprintf(outf, "cpufreq boost: %d\n", turbo);
5571 fclose(input);
5572 }
5573
5574 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
5575 input = fopen(path, "r");
5576 if (input != NULL) {
5577 if (fscanf(input, "%d", &turbo) != 1)
5578 err(1, "%s: failed to parse number from file", path);
5579 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
5580 fclose(input);
5581 }
5582 }
5583
5584 /*
5585 * print_epb()
5586 * Decode the ENERGY_PERF_BIAS MSR
5587 */
5588 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5589 {
5590 char *epb_string;
5591 int cpu, epb;
5592
5593 UNUSED(c);
5594 UNUSED(p);
5595
5596 if (!has_epb)
5597 return 0;
5598
5599 cpu = t->cpu_id;
5600
5601 /* EPB is per-package */
5602 if (!is_cpu_first_thread_in_package(t, c, p))
5603 return 0;
5604
5605 if (cpu_migrate(cpu)) {
5606 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
5607 return -1;
5608 }
5609
5610 epb = get_epb(cpu);
5611 if (epb < 0)
5612 return 0;
5613
5614 switch (epb) {
5615 case ENERGY_PERF_BIAS_PERFORMANCE:
5616 epb_string = "performance";
5617 break;
5618 case ENERGY_PERF_BIAS_NORMAL:
5619 epb_string = "balanced";
5620 break;
5621 case ENERGY_PERF_BIAS_POWERSAVE:
5622 epb_string = "powersave";
5623 break;
5624 default:
5625 epb_string = "custom";
5626 break;
5627 }
5628 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
5629
5630 return 0;
5631 }
5632
5633 /*
5634 * print_hwp()
5635 * Decode the MSR_HWP_CAPABILITIES
5636 */
5637 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5638 {
5639 unsigned long long msr;
5640 int cpu;
5641
5642 UNUSED(c);
5643 UNUSED(p);
5644
5645 if (no_msr)
5646 return 0;
5647
5648 if (!has_hwp)
5649 return 0;
5650
5651 cpu = t->cpu_id;
5652
5653 /* MSR_HWP_CAPABILITIES is per-package */
5654 if (!is_cpu_first_thread_in_package(t, c, p))
5655 return 0;
5656
5657 if (cpu_migrate(cpu)) {
5658 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
5659 return -1;
5660 }
5661
5662 if (get_msr(cpu, MSR_PM_ENABLE, &msr))
5663 return 0;
5664
5665 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
5666
5667 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
5668 if ((msr & (1 << 0)) == 0)
5669 return 0;
5670
5671 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
5672 return 0;
5673
5674 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
5675 "(high %d guar %d eff %d low %d)\n",
5676 cpu, msr,
5677 (unsigned int)HWP_HIGHEST_PERF(msr),
5678 (unsigned int)HWP_GUARANTEED_PERF(msr),
5679 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
5680
5681 if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
5682 return 0;
5683
5684 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
5685 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
5686 cpu, msr,
5687 (unsigned int)(((msr) >> 0) & 0xff),
5688 (unsigned int)(((msr) >> 8) & 0xff),
5689 (unsigned int)(((msr) >> 16) & 0xff),
5690 (unsigned int)(((msr) >> 24) & 0xff),
5691 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
5692
5693 if (has_hwp_pkg) {
5694 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
5695 return 0;
5696
5697 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
5698 "(min %d max %d des %d epp 0x%x window 0x%x)\n",
5699 cpu, msr,
5700 (unsigned int)(((msr) >> 0) & 0xff),
5701 (unsigned int)(((msr) >> 8) & 0xff),
5702 (unsigned int)(((msr) >> 16) & 0xff),
5703 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
5704 }
5705 if (has_hwp_notify) {
5706 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
5707 return 0;
5708
5709 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
5710 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
5711 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
5712 }
5713 if (get_msr(cpu, MSR_HWP_STATUS, &msr))
5714 return 0;
5715
5716 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
5717 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
5718 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
5719
5720 return 0;
5721 }
5722
5723 /*
5724 * print_perf_limit()
5725 */
5726 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5727 {
5728 unsigned long long msr;
5729 int cpu;
5730
5731 UNUSED(c);
5732 UNUSED(p);
5733
5734 if (no_msr)
5735 return 0;
5736
5737 cpu = t->cpu_id;
5738
5739 /* per-package */
5740 if (!is_cpu_first_thread_in_package(t, c, p))
5741 return 0;
5742
5743 if (cpu_migrate(cpu)) {
5744 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
5745 return -1;
5746 }
5747
5748 if (platform->plr_msrs & PLR_CORE) {
5749 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
5750 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
5751 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
5752 (msr & 1 << 15) ? "bit15, " : "",
5753 (msr & 1 << 14) ? "bit14, " : "",
5754 (msr & 1 << 13) ? "Transitions, " : "",
5755 (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
5756 (msr & 1 << 11) ? "PkgPwrL2, " : "",
5757 (msr & 1 << 10) ? "PkgPwrL1, " : "",
5758 (msr & 1 << 9) ? "CorePwr, " : "",
5759 (msr & 1 << 8) ? "Amps, " : "",
5760 (msr & 1 << 6) ? "VR-Therm, " : "",
5761 (msr & 1 << 5) ? "Auto-HWP, " : "",
5762 (msr & 1 << 4) ? "Graphics, " : "",
5763 (msr & 1 << 2) ? "bit2, " : "",
5764 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
5765 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
5766 (msr & 1 << 31) ? "bit31, " : "",
5767 (msr & 1 << 30) ? "bit30, " : "",
5768 (msr & 1 << 29) ? "Transitions, " : "",
5769 (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
5770 (msr & 1 << 27) ? "PkgPwrL2, " : "",
5771 (msr & 1 << 26) ? "PkgPwrL1, " : "",
5772 (msr & 1 << 25) ? "CorePwr, " : "",
5773 (msr & 1 << 24) ? "Amps, " : "",
5774 (msr & 1 << 22) ? "VR-Therm, " : "",
5775 (msr & 1 << 21) ? "Auto-HWP, " : "",
5776 (msr & 1 << 20) ? "Graphics, " : "",
5777 (msr & 1 << 18) ? "bit18, " : "",
5778 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
5779
5780 }
5781 if (platform->plr_msrs & PLR_GFX) {
5782 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
5783 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
5784 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
5785 (msr & 1 << 0) ? "PROCHOT, " : "",
5786 (msr & 1 << 1) ? "ThermStatus, " : "",
5787 (msr & 1 << 4) ? "Graphics, " : "",
5788 (msr & 1 << 6) ? "VR-Therm, " : "",
5789 (msr & 1 << 8) ? "Amps, " : "",
5790 (msr & 1 << 9) ? "GFXPwr, " : "",
5791 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
5792 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
5793 (msr & 1 << 16) ? "PROCHOT, " : "",
5794 (msr & 1 << 17) ? "ThermStatus, " : "",
5795 (msr & 1 << 20) ? "Graphics, " : "",
5796 (msr & 1 << 22) ? "VR-Therm, " : "",
5797 (msr & 1 << 24) ? "Amps, " : "",
5798 (msr & 1 << 25) ? "GFXPwr, " : "",
5799 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
5800 }
5801 if (platform->plr_msrs & PLR_RING) {
5802 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
5803 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
5804 fprintf(outf, " (Active: %s%s%s%s%s%s)",
5805 (msr & 1 << 0) ? "PROCHOT, " : "",
5806 (msr & 1 << 1) ? "ThermStatus, " : "",
5807 (msr & 1 << 6) ? "VR-Therm, " : "",
5808 (msr & 1 << 8) ? "Amps, " : "",
5809 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
5810 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
5811 (msr & 1 << 16) ? "PROCHOT, " : "",
5812 (msr & 1 << 17) ? "ThermStatus, " : "",
5813 (msr & 1 << 22) ? "VR-Therm, " : "",
5814 (msr & 1 << 24) ? "Amps, " : "",
5815 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
5816 }
5817 return 0;
5818 }
5819
5820 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
5821 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
5822
5823 double get_quirk_tdp(void)
5824 {
5825 if (platform->rapl_quirk_tdp)
5826 return platform->rapl_quirk_tdp;
5827
5828 return 135.0;
5829 }
5830
5831 double get_tdp_intel(void)
5832 {
5833 unsigned long long msr;
5834
5835 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
5836 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
5837 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
5838 return get_quirk_tdp();
5839 }
5840
5841 double get_tdp_amd(void)
5842 {
5843 return get_quirk_tdp();
5844 }
5845
5846 void rapl_probe_intel(void)
5847 {
5848 unsigned long long msr;
5849 unsigned int time_unit;
5850 double tdp;
5851 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
5852 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
5853
5854 if (rapl_joules)
5855 bic_enabled &= ~bic_watt_bits;
5856 else
5857 bic_enabled &= ~bic_joules_bits;
5858
5859 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
5860 bic_enabled &= ~BIC_PKG__;
5861 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
5862 bic_enabled &= ~BIC_RAM__;
5863
5864 /* units on package 0, verify later other packages match */
5865 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
5866 return;
5867
5868 rapl_power_units = 1.0 / (1 << (msr & 0xF));
5869 if (platform->has_rapl_divisor)
5870 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
5871 else
5872 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
5873
5874 if (platform->has_fixed_rapl_unit)
5875 rapl_dram_energy_units = (15.3 / 1000000);
5876 else
5877 rapl_dram_energy_units = rapl_energy_units;
5878
5879 time_unit = msr >> 16 & 0xF;
5880 if (time_unit == 0)
5881 time_unit = 0xA;
5882
5883 rapl_time_units = 1.0 / (1 << (time_unit));
5884
5885 tdp = get_tdp_intel();
5886
5887 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
5888 if (!quiet)
5889 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
5890 }
5891
5892 void rapl_probe_amd(void)
5893 {
5894 unsigned long long msr;
5895 double tdp;
5896 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
5897 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
5898
5899 if (rapl_joules)
5900 bic_enabled &= ~bic_watt_bits;
5901 else
5902 bic_enabled &= ~bic_joules_bits;
5903
5904 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
5905 return;
5906
5907 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
5908 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
5909 rapl_power_units = ldexp(1.0, -(msr & 0xf));
5910
5911 tdp = get_tdp_amd();
5912
5913 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
5914 if (!quiet)
5915 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
5916 }
5917
5918 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
5919 {
5920 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
5921 cpu, label,
5922 ((msr >> 15) & 1) ? "EN" : "DIS",
5923 ((msr >> 0) & 0x7FFF) * rapl_power_units,
5924 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
5925 (((msr >> 16) & 1) ? "EN" : "DIS"));
5926
5927 return;
5928 }
5929
5930 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5931 {
5932 unsigned long long msr;
5933 const char *msr_name;
5934 int cpu;
5935
5936 UNUSED(c);
5937 UNUSED(p);
5938
5939 if (!platform->rapl_msrs)
5940 return 0;
5941
5942 /* RAPL counters are per package, so print only for 1st thread/package */
5943 if (!is_cpu_first_thread_in_package(t, c, p))
5944 return 0;
5945
5946 cpu = t->cpu_id;
5947 if (cpu_migrate(cpu)) {
5948 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
5949 return -1;
5950 }
5951
5952 if (platform->rapl_msrs & RAPL_AMD_F17H) {
5953 msr_name = "MSR_RAPL_PWR_UNIT";
5954 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
5955 return -1;
5956 } else {
5957 msr_name = "MSR_RAPL_POWER_UNIT";
5958 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
5959 return -1;
5960 }
5961
5962 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
5963 rapl_power_units, rapl_energy_units, rapl_time_units);
5964
5965 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
5966
5967 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
5968 return -5;
5969
5970 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
5971 cpu, msr,
5972 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5973 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5974 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5975 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
5976
5977 }
5978 if (platform->rapl_msrs & RAPL_PKG) {
5979
5980 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
5981 return -9;
5982
5983 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
5984 cpu, msr, (msr >> 63) & 1 ? "" : "UN");
5985
5986 print_power_limit_msr(cpu, msr, "PKG Limit #1");
5987 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
5988 cpu,
5989 ((msr >> 47) & 1) ? "EN" : "DIS",
5990 ((msr >> 32) & 0x7FFF) * rapl_power_units,
5991 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
5992 ((msr >> 48) & 1) ? "EN" : "DIS");
5993
5994 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
5995 return -9;
5996
5997 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
5998 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
5999 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
6000 }
6001
6002 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
6003 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
6004 return -6;
6005
6006 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
6007 cpu, msr,
6008 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
6009 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
6010 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
6011 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
6012 }
6013 if (platform->rapl_msrs & RAPL_DRAM) {
6014 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
6015 return -9;
6016 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
6017 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
6018
6019 print_power_limit_msr(cpu, msr, "DRAM Limit");
6020 }
6021 if (platform->rapl_msrs & RAPL_CORE_POLICY) {
6022 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
6023 return -7;
6024
6025 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
6026 }
6027 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
6028 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
6029 return -9;
6030 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
6031 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
6032 print_power_limit_msr(cpu, msr, "Cores Limit");
6033 }
6034 if (platform->rapl_msrs & RAPL_GFX) {
6035 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
6036 return -8;
6037
6038 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
6039
6040 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
6041 return -9;
6042 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
6043 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
6044 print_power_limit_msr(cpu, msr, "GFX Limit");
6045 }
6046 return 0;
6047 }
6048
6049 /*
6050 * probe_rapl()
6051 *
6052 * sets rapl_power_units, rapl_energy_units, rapl_time_units
6053 */
6054 void probe_rapl(void)
6055 {
6056 if (!platform->rapl_msrs || no_msr)
6057 return;
6058
6059 if (genuine_intel)
6060 rapl_probe_intel();
6061 if (authentic_amd || hygon_genuine)
6062 rapl_probe_amd();
6063
6064 if (quiet)
6065 return;
6066
6067 for_all_cpus(print_rapl, ODD_COUNTERS);
6068 }
6069
6070 /*
6071 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
6072 * the Thermal Control Circuit (TCC) activates.
6073 * This is usually equal to tjMax.
6074 *
6075 * Older processors do not have this MSR, so there we guess,
6076 * but also allow cmdline over-ride with -T.
6077 *
6078 * Several MSR temperature values are in units of degrees-C
6079 * below this value, including the Digital Thermal Sensor (DTS),
6080 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
6081 */
6082 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
6083 {
6084 unsigned long long msr;
6085 unsigned int tcc_default, tcc_offset;
6086 int cpu;
6087
6088 UNUSED(c);
6089 UNUSED(p);
6090
6091 /* tj_max is used only for dts or ptm */
6092 if (!(do_dts || do_ptm))
6093 return 0;
6094
6095 /* this is a per-package concept */
6096 if (!is_cpu_first_thread_in_package(t, c, p))
6097 return 0;
6098
6099 cpu = t->cpu_id;
6100 if (cpu_migrate(cpu)) {
6101 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
6102 return -1;
6103 }
6104
6105 if (tj_max_override != 0) {
6106 tj_max = tj_max_override;
6107 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
6108 return 0;
6109 }
6110
6111 /* Temperature Target MSR is Nehalem and newer only */
6112 if (!platform->has_nhm_msrs || no_msr)
6113 goto guess;
6114
6115 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
6116 goto guess;
6117
6118 tcc_default = (msr >> 16) & 0xFF;
6119
6120 if (!quiet) {
6121 int bits = platform->tcc_offset_bits;
6122 unsigned long long enabled = 0;
6123
6124 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
6125 enabled = (enabled >> 30) & 1;
6126
6127 if (bits && enabled) {
6128 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
6129 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
6130 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
6131 } else {
6132 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
6133 }
6134 }
6135
6136 if (!tcc_default)
6137 goto guess;
6138
6139 tj_max = tcc_default;
6140
6141 return 0;
6142
6143 guess:
6144 tj_max = TJMAX_DEFAULT;
6145 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
6146
6147 return 0;
6148 }
6149
6150 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
6151 {
6152 unsigned long long msr;
6153 unsigned int dts, dts2;
6154 int cpu;
6155
6156 UNUSED(c);
6157 UNUSED(p);
6158
6159 if (no_msr)
6160 return 0;
6161
6162 if (!(do_dts || do_ptm))
6163 return 0;
6164
6165 cpu = t->cpu_id;
6166
6167 /* DTS is per-core, no need to print for each thread */
6168 if (!is_cpu_first_thread_in_core(t, c, p))
6169 return 0;
6170
6171 if (cpu_migrate(cpu)) {
6172 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
6173 return -1;
6174 }
6175
6176 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
6177 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
6178 return 0;
6179
6180 dts = (msr >> 16) & 0x7F;
6181 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
6182
6183 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
6184 return 0;
6185
6186 dts = (msr >> 16) & 0x7F;
6187 dts2 = (msr >> 8) & 0x7F;
6188 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
6189 cpu, msr, tj_max - dts, tj_max - dts2);
6190 }
6191
6192 if (do_dts && debug) {
6193 unsigned int resolution;
6194
6195 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
6196 return 0;
6197
6198 dts = (msr >> 16) & 0x7F;
6199 resolution = (msr >> 27) & 0xF;
6200 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
6201 cpu, msr, tj_max - dts, resolution);
6202
6203 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
6204 return 0;
6205
6206 dts = (msr >> 16) & 0x7F;
6207 dts2 = (msr >> 8) & 0x7F;
6208 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
6209 cpu, msr, tj_max - dts, tj_max - dts2);
6210 }
6211
6212 return 0;
6213 }
6214
6215 void probe_thermal(void)
6216 {
6217 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
6218 BIC_PRESENT(BIC_CORE_THROT_CNT);
6219 else
6220 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
6221
6222 for_all_cpus(set_temperature_target, ODD_COUNTERS);
6223
6224 if (quiet)
6225 return;
6226
6227 for_all_cpus(print_thermal, ODD_COUNTERS);
6228 }
6229
6230 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
6231 {
6232 unsigned int eax, ebx, ecx, edx;
6233
6234 UNUSED(c);
6235 UNUSED(p);
6236
6237 if (!genuine_intel)
6238 return 0;
6239
6240 if (cpu_migrate(t->cpu_id)) {
6241 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
6242 return -1;
6243 }
6244
6245 if (max_level < 0x1a)
6246 return 0;
6247
6248 __cpuid(0x1a, eax, ebx, ecx, edx);
6249 eax = (eax >> 24) & 0xFF;
6250 if (eax == 0x20)
6251 t->is_atom = true;
6252 return 0;
6253 }
6254
6255 void decode_feature_control_msr(void)
6256 {
6257 unsigned long long msr;
6258
6259 if (no_msr)
6260 return;
6261
6262 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
6263 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
6264 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
6265 }
6266
6267 void decode_misc_enable_msr(void)
6268 {
6269 unsigned long long msr;
6270
6271 if (no_msr)
6272 return;
6273
6274 if (!genuine_intel)
6275 return;
6276
6277 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
6278 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
6279 base_cpu, msr,
6280 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
6281 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
6282 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
6283 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
6284 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
6285 }
6286
6287 void decode_misc_feature_control(void)
6288 {
6289 unsigned long long msr;
6290
6291 if (no_msr)
6292 return;
6293
6294 if (!platform->has_msr_misc_feature_control)
6295 return;
6296
6297 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
6298 fprintf(outf,
6299 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
6300 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
6301 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
6302 }
6303
6304 /*
6305 * Decode MSR_MISC_PWR_MGMT
6306 *
6307 * Decode the bits according to the Nehalem documentation
6308 * bit[0] seems to continue to have same meaning going forward
6309 * bit[1] less so...
6310 */
6311 void decode_misc_pwr_mgmt_msr(void)
6312 {
6313 unsigned long long msr;
6314
6315 if (no_msr)
6316 return;
6317
6318 if (!platform->has_msr_misc_pwr_mgmt)
6319 return;
6320
6321 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
6322 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
6323 base_cpu, msr,
6324 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
6325 }
6326
6327 /*
6328 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
6329 *
6330 * This MSRs are present on Silvermont processors,
6331 * Intel Atom processor E3000 series (Baytrail), and friends.
6332 */
6333 void decode_c6_demotion_policy_msr(void)
6334 {
6335 unsigned long long msr;
6336
6337 if (no_msr)
6338 return;
6339
6340 if (!platform->has_msr_c6_demotion_policy_config)
6341 return;
6342
6343 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
6344 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
6345 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
6346
6347 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
6348 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
6349 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
6350 }
6351
6352 void print_dev_latency(void)
6353 {
6354 char *path = "/dev/cpu_dma_latency";
6355 int fd;
6356 int value;
6357 int retval;
6358
6359 fd = open(path, O_RDONLY);
6360 if (fd < 0) {
6361 if (debug)
6362 warnx("Read %s failed", path);
6363 return;
6364 }
6365
6366 retval = read(fd, (void *)&value, sizeof(int));
6367 if (retval != sizeof(int)) {
6368 warn("read failed %s", path);
6369 close(fd);
6370 return;
6371 }
6372 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
6373
6374 close(fd);
6375 }
6376
6377 static int has_instr_count_access(void)
6378 {
6379 int fd;
6380 int has_access;
6381
6382 if (no_perf)
6383 return 0;
6384
6385 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
6386 has_access = fd != -1;
6387
6388 if (fd != -1)
6389 close(fd);
6390
6391 if (!has_access)
6392 warnx("Failed to access %s. Some of the counters may not be available\n"
6393 "\tRun as root to enable them or use %s to disable the access explicitly",
6394 "instructions retired perf counter", "--no-perf");
6395
6396 return has_access;
6397 }
6398
6399 bool is_aperf_access_required(void)
6400 {
6401 return BIC_IS_ENABLED(BIC_Avg_MHz)
6402 || BIC_IS_ENABLED(BIC_Busy)
6403 || BIC_IS_ENABLED(BIC_Bzy_MHz)
6404 || BIC_IS_ENABLED(BIC_IPC);
6405 }
6406
6407 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
6408 double *scale_, enum rapl_unit *unit_)
6409 {
6410 if (no_perf)
6411 return -1;
6412
6413 const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
6414 if (scale == 0.0)
6415 return -1;
6416
6417 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
6418 if (unit == RAPL_UNIT_INVALID)
6419 return -1;
6420
6421 const unsigned rapl_type = read_perf_type(cai->perf_subsys);
6422 const unsigned rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
6423
6424 const int fd_counter =
6425 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
6426 if (fd_counter == -1)
6427 return -1;
6428
6429 /* If it's the first counter opened, make it a group descriptor */
6430 if (rci->fd_perf == -1)
6431 rci->fd_perf = fd_counter;
6432
6433 *scale_ = scale;
6434 *unit_ = unit;
6435 return fd_counter;
6436 }
6437
6438 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
6439 double *scale, enum rapl_unit *unit)
6440 {
6441 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
6442
6443 if (debug)
6444 fprintf(stderr, "add_rapl_perf_counter: %d (cpu: %d)\n", ret, cpu);
6445
6446 return ret;
6447 }
6448
6449 /*
6450 * Linux-perf manages the HW instructions-retired counter
6451 * by enabling when requested, and hiding rollover
6452 */
6453 void linux_perf_init(void)
6454 {
6455 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
6456 return;
6457
6458 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
6459 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
6460 if (fd_instr_count_percpu == NULL)
6461 err(-1, "calloc fd_instr_count_percpu");
6462 }
6463
6464 const bool aperf_required = is_aperf_access_required();
6465 if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
6466 fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
6467 if (fd_amperf_percpu == NULL)
6468 err(-1, "calloc fd_amperf_percpu");
6469 }
6470 }
6471
6472 void rapl_perf_init(void)
6473 {
6474 const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
6475 bool *domain_visited = calloc(num_domains, sizeof(bool));
6476
6477 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
6478 if (rapl_counter_info_perdomain == NULL)
6479 err(-1, "calloc rapl_counter_info_percpu");
6480
6481 /*
6482 * Initialize rapl_counter_info_percpu
6483 */
6484 for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
6485 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
6486 rci->fd_perf = -1;
6487 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
6488 rci->data[i] = 0;
6489 rci->source[i] = RAPL_SOURCE_NONE;
6490 }
6491 }
6492
6493 /*
6494 * Open/probe the counters
6495 * If can't get it via perf, fallback to MSR
6496 */
6497 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
6498
6499 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
6500 bool has_counter = 0;
6501 double scale;
6502 enum rapl_unit unit;
6503 int next_domain;
6504
6505 memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
6506
6507 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
6508
6509 if (cpu_is_not_allowed(cpu))
6510 continue;
6511
6512 /* Skip already seen and handled RAPL domains */
6513 next_domain =
6514 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
6515
6516 if (domain_visited[next_domain])
6517 continue;
6518
6519 domain_visited[next_domain] = 1;
6520
6521 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
6522
6523 /* Check if the counter is enabled and accessible */
6524 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
6525
6526 /* Use perf API for this counter */
6527 if (!no_perf && cai->perf_name
6528 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
6529 rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
6530 rci->scale[cai->rci_index] = scale * cai->compat_scale;
6531 rci->unit[cai->rci_index] = unit;
6532 rci->flags[cai->rci_index] = cai->flags;
6533
6534 /* Use MSR for this counter */
6535 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
6536 rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
6537 rci->msr[cai->rci_index] = cai->msr;
6538 rci->msr_mask[cai->rci_index] = cai->msr_mask;
6539 rci->msr_shift[cai->rci_index] = cai->msr_shift;
6540 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
6541 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
6542 rci->flags[cai->rci_index] = cai->flags;
6543 }
6544 }
6545
6546 if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
6547 has_counter = 1;
6548 }
6549
6550 /* If any CPU has access to the counter, make it present */
6551 if (has_counter)
6552 BIC_PRESENT(cai->bic);
6553 }
6554
6555 free(domain_visited);
6556 }
6557
6558 static int has_amperf_access_via_msr(void)
6559 {
6560 if (no_msr)
6561 return 0;
6562
6563 if (probe_msr(base_cpu, MSR_IA32_APERF))
6564 return 0;
6565
6566 if (probe_msr(base_cpu, MSR_IA32_MPERF))
6567 return 0;
6568
6569 return 1;
6570 }
6571
6572 static int has_amperf_access_via_perf(void)
6573 {
6574 struct amperf_group_fd fds;
6575
6576 /*
6577 * Cache the last result, so we don't warn the user multiple times
6578 *
6579 * Negative means cached, no access
6580 * Zero means not cached
6581 * Positive means cached, has access
6582 */
6583 static int has_access_cached;
6584
6585 if (no_perf)
6586 return 0;
6587
6588 if (has_access_cached != 0)
6589 return has_access_cached > 0;
6590
6591 fds = open_amperf_fd(base_cpu);
6592 has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
6593
6594 if (fds.aperf == -1)
6595 warnx("Failed to access %s. Some of the counters may not be available\n"
6596 "\tRun as root to enable them or use %s to disable the access explicitly",
6597 "APERF perf counter", "--no-perf");
6598 else
6599 close(fds.aperf);
6600
6601 if (fds.mperf == -1)
6602 warnx("Failed to access %s. Some of the counters may not be available\n"
6603 "\tRun as root to enable them or use %s to disable the access explicitly",
6604 "MPERF perf counter", "--no-perf");
6605 else
6606 close(fds.mperf);
6607
6608 if (has_access_cached == 0)
6609 has_access_cached = -1;
6610
6611 return has_access_cached > 0;
6612 }
6613
6614 /* Check if we can access APERF and MPERF */
6615 static int has_amperf_access(void)
6616 {
6617 if (!is_aperf_access_required())
6618 return 0;
6619
6620 if (!no_msr && has_amperf_access_via_msr())
6621 return 1;
6622
6623 if (!no_perf && has_amperf_access_via_perf())
6624 return 1;
6625
6626 return 0;
6627 }
6628
6629 void probe_cstates(void)
6630 {
6631 probe_cst_limit();
6632
6633 if (platform->supported_cstates & CC1)
6634 BIC_PRESENT(BIC_CPU_c1);
6635
6636 if (platform->supported_cstates & CC3)
6637 BIC_PRESENT(BIC_CPU_c3);
6638
6639 if (platform->supported_cstates & CC6)
6640 BIC_PRESENT(BIC_CPU_c6);
6641
6642 if (platform->supported_cstates & CC7)
6643 BIC_PRESENT(BIC_CPU_c7);
6644
6645 if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2))
6646 BIC_PRESENT(BIC_Pkgpc2);
6647
6648 if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3))
6649 BIC_PRESENT(BIC_Pkgpc3);
6650
6651 if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6))
6652 BIC_PRESENT(BIC_Pkgpc6);
6653
6654 if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7))
6655 BIC_PRESENT(BIC_Pkgpc7);
6656
6657 if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8))
6658 BIC_PRESENT(BIC_Pkgpc8);
6659
6660 if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9))
6661 BIC_PRESENT(BIC_Pkgpc9);
6662
6663 if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10))
6664 BIC_PRESENT(BIC_Pkgpc10);
6665
6666 if (platform->has_msr_module_c6_res_ms)
6667 BIC_PRESENT(BIC_Mod_c6);
6668
6669 if (platform->has_ext_cst_msrs && !no_msr) {
6670 BIC_PRESENT(BIC_Totl_c0);
6671 BIC_PRESENT(BIC_Any_c0);
6672 BIC_PRESENT(BIC_GFX_c0);
6673 BIC_PRESENT(BIC_CPUGFX);
6674 }
6675
6676 if (quiet)
6677 return;
6678
6679 dump_power_ctl();
6680 dump_cst_cfg();
6681 decode_c6_demotion_policy_msr();
6682 print_dev_latency();
6683 dump_sysfs_cstate_config();
6684 print_irtl();
6685 }
6686
6687 void probe_lpi(void)
6688 {
6689 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
6690 BIC_PRESENT(BIC_CPU_LPI);
6691 else
6692 BIC_NOT_PRESENT(BIC_CPU_LPI);
6693
6694 if (!access(sys_lpi_file_sysfs, R_OK)) {
6695 sys_lpi_file = sys_lpi_file_sysfs;
6696 BIC_PRESENT(BIC_SYS_LPI);
6697 } else if (!access(sys_lpi_file_debugfs, R_OK)) {
6698 sys_lpi_file = sys_lpi_file_debugfs;
6699 BIC_PRESENT(BIC_SYS_LPI);
6700 } else {
6701 sys_lpi_file_sysfs = NULL;
6702 BIC_NOT_PRESENT(BIC_SYS_LPI);
6703 }
6704
6705 }
6706
6707 void probe_pstates(void)
6708 {
6709 probe_bclk();
6710
6711 if (quiet)
6712 return;
6713
6714 dump_platform_info();
6715 dump_turbo_ratio_info();
6716 dump_sysfs_pstate_config();
6717 decode_misc_pwr_mgmt_msr();
6718
6719 for_all_cpus(print_hwp, ODD_COUNTERS);
6720 for_all_cpus(print_epb, ODD_COUNTERS);
6721 for_all_cpus(print_perf_limit, ODD_COUNTERS);
6722 }
6723
6724 void process_cpuid()
6725 {
6726 unsigned int eax, ebx, ecx, edx;
6727 unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
6728 unsigned long long ucode_patch = 0;
6729 bool ucode_patch_valid = false;
6730
6731 eax = ebx = ecx = edx = 0;
6732
6733 __cpuid(0, max_level, ebx, ecx, edx);
6734
6735 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
6736 genuine_intel = 1;
6737 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
6738 authentic_amd = 1;
6739 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
6740 hygon_genuine = 1;
6741
6742 if (!quiet)
6743 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
6744 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
6745
6746 __cpuid(1, fms, ebx, ecx, edx);
6747 family = (fms >> 8) & 0xf;
6748 model = (fms >> 4) & 0xf;
6749 stepping = fms & 0xf;
6750 if (family == 0xf)
6751 family += (fms >> 20) & 0xff;
6752 if (family >= 6)
6753 model += ((fms >> 16) & 0xf) << 4;
6754 ecx_flags = ecx;
6755 edx_flags = edx;
6756
6757 if (!no_msr) {
6758 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
6759 warnx("get_msr(UCODE)");
6760 else
6761 ucode_patch_valid = true;
6762 }
6763
6764 /*
6765 * check max extended function levels of CPUID.
6766 * This is needed to check for invariant TSC.
6767 * This check is valid for both Intel and AMD.
6768 */
6769 ebx = ecx = edx = 0;
6770 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
6771
6772 if (!quiet) {
6773 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
6774 family, model, stepping, family, model, stepping);
6775 if (ucode_patch_valid)
6776 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
6777 fputc('\n', outf);
6778
6779 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
6780 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
6781 ecx_flags & (1 << 0) ? "SSE3" : "-",
6782 ecx_flags & (1 << 3) ? "MONITOR" : "-",
6783 ecx_flags & (1 << 6) ? "SMX" : "-",
6784 ecx_flags & (1 << 7) ? "EIST" : "-",
6785 ecx_flags & (1 << 8) ? "TM2" : "-",
6786 edx_flags & (1 << 4) ? "TSC" : "-",
6787 edx_flags & (1 << 5) ? "MSR" : "-",
6788 edx_flags & (1 << 22) ? "ACPI-TM" : "-",
6789 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
6790 }
6791
6792 probe_platform_features(family, model);
6793
6794 if (!(edx_flags & (1 << 5)))
6795 errx(1, "CPUID: no MSR");
6796
6797 if (max_extended_level >= 0x80000007) {
6798
6799 /*
6800 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
6801 * this check is valid for both Intel and AMD
6802 */
6803 __cpuid(0x80000007, eax, ebx, ecx, edx);
6804 has_invariant_tsc = edx & (1 << 8);
6805 }
6806
6807 /*
6808 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
6809 * this check is valid for both Intel and AMD
6810 */
6811
6812 __cpuid(0x6, eax, ebx, ecx, edx);
6813 has_aperf = ecx & (1 << 0);
6814 if (has_aperf && has_amperf_access()) {
6815 BIC_PRESENT(BIC_Avg_MHz);
6816 BIC_PRESENT(BIC_Busy);
6817 BIC_PRESENT(BIC_Bzy_MHz);
6818 BIC_PRESENT(BIC_IPC);
6819 }
6820 do_dts = eax & (1 << 0);
6821 if (do_dts)
6822 BIC_PRESENT(BIC_CoreTmp);
6823 has_turbo = eax & (1 << 1);
6824 do_ptm = eax & (1 << 6);
6825 if (do_ptm)
6826 BIC_PRESENT(BIC_PkgTmp);
6827 has_hwp = eax & (1 << 7);
6828 has_hwp_notify = eax & (1 << 8);
6829 has_hwp_activity_window = eax & (1 << 9);
6830 has_hwp_epp = eax & (1 << 10);
6831 has_hwp_pkg = eax & (1 << 11);
6832 has_epb = ecx & (1 << 3);
6833
6834 if (!quiet)
6835 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
6836 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
6837 has_aperf ? "" : "No-",
6838 has_turbo ? "" : "No-",
6839 do_dts ? "" : "No-",
6840 do_ptm ? "" : "No-",
6841 has_hwp ? "" : "No-",
6842 has_hwp_notify ? "" : "No-",
6843 has_hwp_activity_window ? "" : "No-",
6844 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
6845
6846 if (!quiet)
6847 decode_misc_enable_msr();
6848
6849 if (max_level >= 0x7 && !quiet) {
6850 int has_sgx;
6851
6852 ecx = 0;
6853
6854 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
6855
6856 has_sgx = ebx & (1 << 2);
6857
6858 is_hybrid = edx & (1 << 15);
6859
6860 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
6861
6862 if (has_sgx)
6863 decode_feature_control_msr();
6864 }
6865
6866 if (max_level >= 0x15) {
6867 unsigned int eax_crystal;
6868 unsigned int ebx_tsc;
6869
6870 /*
6871 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
6872 */
6873 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
6874 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
6875
6876 if (ebx_tsc != 0) {
6877 if (!quiet && (ebx != 0))
6878 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
6879 eax_crystal, ebx_tsc, crystal_hz);
6880
6881 if (crystal_hz == 0)
6882 crystal_hz = platform->crystal_freq;
6883
6884 if (crystal_hz) {
6885 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
6886 if (!quiet)
6887 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
6888 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
6889 }
6890 }
6891 }
6892 if (max_level >= 0x16) {
6893 unsigned int base_mhz, max_mhz, bus_mhz, edx;
6894
6895 /*
6896 * CPUID 16H Base MHz, Max MHz, Bus MHz
6897 */
6898 base_mhz = max_mhz = bus_mhz = edx = 0;
6899
6900 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
6901
6902 bclk = bus_mhz;
6903
6904 base_hz = base_mhz * 1000000;
6905 has_base_hz = 1;
6906
6907 if (platform->enable_tsc_tweak)
6908 tsc_tweak = base_hz / tsc_hz;
6909
6910 if (!quiet)
6911 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
6912 base_mhz, max_mhz, bus_mhz);
6913 }
6914
6915 if (has_aperf)
6916 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
6917
6918 BIC_PRESENT(BIC_IRQ);
6919 BIC_PRESENT(BIC_TSC_MHz);
6920 }
6921
6922 void probe_pm_features(void)
6923 {
6924 probe_pstates();
6925
6926 probe_cstates();
6927
6928 probe_lpi();
6929
6930 probe_intel_uncore_frequency();
6931
6932 probe_graphics();
6933
6934 probe_rapl();
6935
6936 probe_thermal();
6937
6938 if (platform->has_nhm_msrs && !no_msr)
6939 BIC_PRESENT(BIC_SMI);
6940
6941 if (!quiet)
6942 decode_misc_feature_control();
6943 }
6944
6945 /*
6946 * in /dev/cpu/ return success for names that are numbers
6947 * ie. filter out ".", "..", "microcode".
6948 */
6949 int dir_filter(const struct dirent *dirp)
6950 {
6951 if (isdigit(dirp->d_name[0]))
6952 return 1;
6953 else
6954 return 0;
6955 }
6956
6957 void topology_probe(bool startup)
6958 {
6959 int i;
6960 int max_core_id = 0;
6961 int max_package_id = 0;
6962 int max_die_id = 0;
6963 int max_siblings = 0;
6964
6965 /* Initialize num_cpus, max_cpu_num */
6966 set_max_cpu_num();
6967 topo.num_cpus = 0;
6968 for_all_proc_cpus(count_cpus);
6969 if (!summary_only && topo.num_cpus > 1)
6970 BIC_PRESENT(BIC_CPU);
6971
6972 if (debug > 1)
6973 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
6974
6975 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
6976 if (cpus == NULL)
6977 err(1, "calloc cpus");
6978
6979 /*
6980 * Allocate and initialize cpu_present_set
6981 */
6982 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
6983 if (cpu_present_set == NULL)
6984 err(3, "CPU_ALLOC");
6985 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
6986 CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
6987 for_all_proc_cpus(mark_cpu_present);
6988
6989 /*
6990 * Allocate and initialize cpu_effective_set
6991 */
6992 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
6993 if (cpu_effective_set == NULL)
6994 err(3, "CPU_ALLOC");
6995 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
6996 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
6997 update_effective_set(startup);
6998
6999 /*
7000 * Allocate and initialize cpu_allowed_set
7001 */
7002 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
7003 if (cpu_allowed_set == NULL)
7004 err(3, "CPU_ALLOC");
7005 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
7006 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
7007
7008 /*
7009 * Validate and update cpu_allowed_set.
7010 *
7011 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
7012 * Give a warning when cpus in cpu_subset become unavailable at runtime.
7013 * Give a warning when cpus are not effective because of cgroup setting.
7014 *
7015 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
7016 */
7017 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
7018 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
7019 continue;
7020
7021 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
7022 if (cpu_subset) {
7023 /* cpus in cpu_subset must be in cpu_present_set during startup */
7024 if (startup)
7025 err(1, "cpu%d not present", i);
7026 else
7027 fprintf(stderr, "cpu%d not present\n", i);
7028 }
7029 continue;
7030 }
7031
7032 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
7033 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
7034 fprintf(stderr, "cpu%d not effective\n", i);
7035 continue;
7036 }
7037 }
7038
7039 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
7040 }
7041
7042 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
7043 err(-ENODEV, "No valid cpus found");
7044 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
7045
7046 /*
7047 * Allocate and initialize cpu_affinity_set
7048 */
7049 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
7050 if (cpu_affinity_set == NULL)
7051 err(3, "CPU_ALLOC");
7052 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
7053 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
7054
7055 for_all_proc_cpus(init_thread_id);
7056
7057 /*
7058 * For online cpus
7059 * find max_core_id, max_package_id
7060 */
7061 for (i = 0; i <= topo.max_cpu_num; ++i) {
7062 int siblings;
7063
7064 if (cpu_is_not_present(i)) {
7065 if (debug > 1)
7066 fprintf(outf, "cpu%d NOT PRESENT\n", i);
7067 continue;
7068 }
7069
7070 cpus[i].logical_cpu_id = i;
7071
7072 /* get package information */
7073 cpus[i].physical_package_id = get_physical_package_id(i);
7074 if (cpus[i].physical_package_id > max_package_id)
7075 max_package_id = cpus[i].physical_package_id;
7076
7077 /* get die information */
7078 cpus[i].die_id = get_die_id(i);
7079 if (cpus[i].die_id > max_die_id)
7080 max_die_id = cpus[i].die_id;
7081
7082 /* get numa node information */
7083 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
7084 if (cpus[i].physical_node_id > topo.max_node_num)
7085 topo.max_node_num = cpus[i].physical_node_id;
7086
7087 /* get core information */
7088 cpus[i].physical_core_id = get_core_id(i);
7089 if (cpus[i].physical_core_id > max_core_id)
7090 max_core_id = cpus[i].physical_core_id;
7091
7092 /* get thread information */
7093 siblings = get_thread_siblings(&cpus[i]);
7094 if (siblings > max_siblings)
7095 max_siblings = siblings;
7096 if (cpus[i].thread_id == 0)
7097 topo.num_cores++;
7098 }
7099
7100 topo.cores_per_node = max_core_id + 1;
7101 if (debug > 1)
7102 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
7103 if (!summary_only && topo.cores_per_node > 1)
7104 BIC_PRESENT(BIC_Core);
7105
7106 topo.num_die = max_die_id + 1;
7107 if (debug > 1)
7108 fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die);
7109 if (!summary_only && topo.num_die > 1)
7110 BIC_PRESENT(BIC_Die);
7111
7112 topo.num_packages = max_package_id + 1;
7113 if (debug > 1)
7114 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
7115 if (!summary_only && topo.num_packages > 1)
7116 BIC_PRESENT(BIC_Package);
7117
7118 set_node_data();
7119 if (debug > 1)
7120 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
7121 if (!summary_only && topo.nodes_per_pkg > 1)
7122 BIC_PRESENT(BIC_Node);
7123
7124 topo.threads_per_core = max_siblings;
7125 if (debug > 1)
7126 fprintf(outf, "max_siblings %d\n", max_siblings);
7127
7128 if (debug < 1)
7129 return;
7130
7131 for (i = 0; i <= topo.max_cpu_num; ++i) {
7132 if (cpu_is_not_present(i))
7133 continue;
7134 fprintf(outf,
7135 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
7136 i, cpus[i].physical_package_id, cpus[i].die_id,
7137 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
7138 }
7139
7140 }
7141
7142 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
7143 {
7144 int i;
7145 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
7146 int num_threads = topo.threads_per_core * num_cores;
7147
7148 *t = calloc(num_threads, sizeof(struct thread_data));
7149 if (*t == NULL)
7150 goto error;
7151
7152 for (i = 0; i < num_threads; i++)
7153 (*t)[i].cpu_id = -1;
7154
7155 *c = calloc(num_cores, sizeof(struct core_data));
7156 if (*c == NULL)
7157 goto error;
7158
7159 for (i = 0; i < num_cores; i++) {
7160 (*c)[i].core_id = -1;
7161 (*c)[i].base_cpu = -1;
7162 }
7163
7164 *p = calloc(topo.num_packages, sizeof(struct pkg_data));
7165 if (*p == NULL)
7166 goto error;
7167
7168 for (i = 0; i < topo.num_packages; i++) {
7169 (*p)[i].package_id = i;
7170 (*p)[i].base_cpu = -1;
7171 }
7172
7173 return;
7174 error:
7175 err(1, "calloc counters");
7176 }
7177
7178 /*
7179 * init_counter()
7180 *
7181 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
7182 */
7183 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
7184 {
7185 int pkg_id = cpus[cpu_id].physical_package_id;
7186 int node_id = cpus[cpu_id].logical_node_id;
7187 int core_id = cpus[cpu_id].physical_core_id;
7188 int thread_id = cpus[cpu_id].thread_id;
7189 struct thread_data *t;
7190 struct core_data *c;
7191 struct pkg_data *p;
7192
7193 /* Workaround for systems where physical_node_id==-1
7194 * and logical_node_id==(-1 - topo.num_cpus)
7195 */
7196 if (node_id < 0)
7197 node_id = 0;
7198
7199 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
7200 c = GET_CORE(core_base, core_id, node_id, pkg_id);
7201 p = GET_PKG(pkg_base, pkg_id);
7202
7203 t->cpu_id = cpu_id;
7204 if (!cpu_is_not_allowed(cpu_id)) {
7205 if (c->base_cpu < 0)
7206 c->base_cpu = t->cpu_id;
7207 if (p->base_cpu < 0)
7208 p->base_cpu = t->cpu_id;
7209 }
7210
7211 c->core_id = core_id;
7212 p->package_id = pkg_id;
7213 }
7214
7215 int initialize_counters(int cpu_id)
7216 {
7217 init_counter(EVEN_COUNTERS, cpu_id);
7218 init_counter(ODD_COUNTERS, cpu_id);
7219 return 0;
7220 }
7221
7222 void allocate_output_buffer()
7223 {
7224 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
7225 outp = output_buffer;
7226 if (outp == NULL)
7227 err(-1, "calloc output buffer");
7228 }
7229
7230 void allocate_fd_percpu(void)
7231 {
7232 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
7233 if (fd_percpu == NULL)
7234 err(-1, "calloc fd_percpu");
7235 }
7236
7237 void allocate_irq_buffers(void)
7238 {
7239 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
7240 if (irq_column_2_cpu == NULL)
7241 err(-1, "calloc %d", topo.num_cpus);
7242
7243 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
7244 if (irqs_per_cpu == NULL)
7245 err(-1, "calloc %d", topo.max_cpu_num + 1);
7246 }
7247
7248 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7249 {
7250 topo.allowed_cpus++;
7251 if ((int)t->cpu_id == c->base_cpu)
7252 topo.allowed_cores++;
7253 if ((int)t->cpu_id == p->base_cpu)
7254 topo.allowed_packages++;
7255
7256 return 0;
7257 }
7258
7259 void topology_update(void)
7260 {
7261 topo.allowed_cpus = 0;
7262 topo.allowed_cores = 0;
7263 topo.allowed_packages = 0;
7264 for_all_cpus(update_topo, ODD_COUNTERS);
7265 }
7266
7267 void setup_all_buffers(bool startup)
7268 {
7269 topology_probe(startup);
7270 allocate_irq_buffers();
7271 allocate_fd_percpu();
7272 allocate_counters(&thread_even, &core_even, &package_even);
7273 allocate_counters(&thread_odd, &core_odd, &package_odd);
7274 allocate_output_buffer();
7275 for_all_proc_cpus(initialize_counters);
7276 topology_update();
7277 }
7278
7279 void set_base_cpu(void)
7280 {
7281 int i;
7282
7283 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
7284 if (cpu_is_not_allowed(i))
7285 continue;
7286 base_cpu = i;
7287 if (debug > 1)
7288 fprintf(outf, "base_cpu = %d\n", base_cpu);
7289 return;
7290 }
7291 err(-ENODEV, "No valid cpus found");
7292 }
7293
7294 static void set_amperf_source(void)
7295 {
7296 amperf_source = AMPERF_SOURCE_PERF;
7297
7298 const bool aperf_required = is_aperf_access_required();
7299 if (no_perf || !aperf_required || !has_amperf_access_via_perf())
7300 amperf_source = AMPERF_SOURCE_MSR;
7301
7302 if (quiet || !debug)
7303 return;
7304
7305 fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
7306 }
7307
7308 bool has_added_counters(void)
7309 {
7310 /*
7311 * It only makes sense to call this after the command line is parsed,
7312 * otherwise sys structure is not populated.
7313 */
7314
7315 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
7316 }
7317
7318 bool is_msr_access_required(void)
7319 {
7320 if (no_msr)
7321 return false;
7322
7323 if (has_added_counters())
7324 return true;
7325
7326 return BIC_IS_ENABLED(BIC_SMI)
7327 || BIC_IS_ENABLED(BIC_CPU_c1)
7328 || BIC_IS_ENABLED(BIC_CPU_c3)
7329 || BIC_IS_ENABLED(BIC_CPU_c6)
7330 || BIC_IS_ENABLED(BIC_CPU_c7)
7331 || BIC_IS_ENABLED(BIC_Mod_c6)
7332 || BIC_IS_ENABLED(BIC_CoreTmp)
7333 || BIC_IS_ENABLED(BIC_Totl_c0)
7334 || BIC_IS_ENABLED(BIC_Any_c0)
7335 || BIC_IS_ENABLED(BIC_GFX_c0)
7336 || BIC_IS_ENABLED(BIC_CPUGFX)
7337 || BIC_IS_ENABLED(BIC_Pkgpc3)
7338 || BIC_IS_ENABLED(BIC_Pkgpc6)
7339 || BIC_IS_ENABLED(BIC_Pkgpc2)
7340 || BIC_IS_ENABLED(BIC_Pkgpc7)
7341 || BIC_IS_ENABLED(BIC_Pkgpc8)
7342 || BIC_IS_ENABLED(BIC_Pkgpc9)
7343 || BIC_IS_ENABLED(BIC_Pkgpc10)
7344 /* TODO: Multiplex access with perf */
7345 || BIC_IS_ENABLED(BIC_CorWatt)
7346 || BIC_IS_ENABLED(BIC_Cor_J)
7347 || BIC_IS_ENABLED(BIC_PkgWatt)
7348 || BIC_IS_ENABLED(BIC_CorWatt)
7349 || BIC_IS_ENABLED(BIC_GFXWatt)
7350 || BIC_IS_ENABLED(BIC_RAMWatt)
7351 || BIC_IS_ENABLED(BIC_Pkg_J)
7352 || BIC_IS_ENABLED(BIC_Cor_J)
7353 || BIC_IS_ENABLED(BIC_GFX_J)
7354 || BIC_IS_ENABLED(BIC_RAM_J)
7355 || BIC_IS_ENABLED(BIC_PKG__)
7356 || BIC_IS_ENABLED(BIC_RAM__)
7357 || BIC_IS_ENABLED(BIC_PkgTmp)
7358 || (is_aperf_access_required() && !has_amperf_access_via_perf());
7359 }
7360
7361 void check_msr_access(void)
7362 {
7363 if (!is_msr_access_required())
7364 no_msr = 1;
7365
7366 check_dev_msr();
7367 check_msr_permission();
7368
7369 if (no_msr)
7370 bic_disable_msr_access();
7371 }
7372
7373 void check_perf_access(void)
7374 {
7375 const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
7376 if (no_perf || !intrcount_required || !has_instr_count_access())
7377 bic_enabled &= ~BIC_IPC;
7378
7379 const bool aperf_required = is_aperf_access_required();
7380 if (!aperf_required || !has_amperf_access()) {
7381 bic_enabled &= ~BIC_Avg_MHz;
7382 bic_enabled &= ~BIC_Busy;
7383 bic_enabled &= ~BIC_Bzy_MHz;
7384 bic_enabled &= ~BIC_IPC;
7385 }
7386 }
7387
7388 void turbostat_init()
7389 {
7390 setup_all_buffers(true);
7391 set_base_cpu();
7392 check_msr_access();
7393 check_perf_access();
7394 process_cpuid();
7395 probe_pm_features();
7396 set_amperf_source();
7397 linux_perf_init();
7398 rapl_perf_init();
7399
7400 for_all_cpus(get_cpu_type, ODD_COUNTERS);
7401 for_all_cpus(get_cpu_type, EVEN_COUNTERS);
7402
7403 if (DO_BIC(BIC_IPC))
7404 (void)get_instr_count_fd(base_cpu);
7405
7406 /*
7407 * If TSC tweak is needed, but couldn't get it,
7408 * disable more BICs, since it can't be reported accurately.
7409 */
7410 if (platform->enable_tsc_tweak && !has_base_hz) {
7411 bic_enabled &= ~BIC_Busy;
7412 bic_enabled &= ~BIC_Bzy_MHz;
7413 }
7414 }
7415
7416 int fork_it(char **argv)
7417 {
7418 pid_t child_pid;
7419 int status;
7420
7421 snapshot_proc_sysfs_files();
7422 status = for_all_cpus(get_counters, EVEN_COUNTERS);
7423 first_counter_read = 0;
7424 if (status)
7425 exit(status);
7426 gettimeofday(&tv_even, (struct timezone *)NULL);
7427
7428 child_pid = fork();
7429 if (!child_pid) {
7430 /* child */
7431 execvp(argv[0], argv);
7432 err(errno, "exec %s", argv[0]);
7433 } else {
7434
7435 /* parent */
7436 if (child_pid == -1)
7437 err(1, "fork");
7438
7439 signal(SIGINT, SIG_IGN);
7440 signal(SIGQUIT, SIG_IGN);
7441 if (waitpid(child_pid, &status, 0) == -1)
7442 err(status, "waitpid");
7443
7444 if (WIFEXITED(status))
7445 status = WEXITSTATUS(status);
7446 }
7447 /*
7448 * n.b. fork_it() does not check for errors from for_all_cpus()
7449 * because re-starting is problematic when forking
7450 */
7451 snapshot_proc_sysfs_files();
7452 for_all_cpus(get_counters, ODD_COUNTERS);
7453 gettimeofday(&tv_odd, (struct timezone *)NULL);
7454 timersub(&tv_odd, &tv_even, &tv_delta);
7455 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
7456 fprintf(outf, "%s: Counter reset detected\n", progname);
7457 else {
7458 compute_average(EVEN_COUNTERS);
7459 format_all_counters(EVEN_COUNTERS);
7460 }
7461
7462 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
7463
7464 flush_output_stderr();
7465
7466 return status;
7467 }
7468
7469 int get_and_dump_counters(void)
7470 {
7471 int status;
7472
7473 snapshot_proc_sysfs_files();
7474 status = for_all_cpus(get_counters, ODD_COUNTERS);
7475 if (status)
7476 return status;
7477
7478 status = for_all_cpus(dump_counters, ODD_COUNTERS);
7479 if (status)
7480 return status;
7481
7482 flush_output_stdout();
7483
7484 return status;
7485 }
7486
7487 void print_version()
7488 {
7489 fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
7490 }
7491
7492 #define COMMAND_LINE_SIZE 2048
7493
7494 void print_bootcmd(void)
7495 {
7496 char bootcmd[COMMAND_LINE_SIZE];
7497 FILE *fp;
7498 int ret;
7499
7500 memset(bootcmd, 0, COMMAND_LINE_SIZE);
7501 fp = fopen("/proc/cmdline", "r");
7502 if (!fp)
7503 return;
7504
7505 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
7506 if (ret) {
7507 bootcmd[ret] = '\0';
7508 /* the last character is already '\n' */
7509 fprintf(outf, "Kernel command line: %s", bootcmd);
7510 }
7511
7512 fclose(fp);
7513 }
7514
7515 int add_counter(unsigned int msr_num, char *path, char *name,
7516 unsigned int width, enum counter_scope scope,
7517 enum counter_type type, enum counter_format format, int flags)
7518 {
7519 struct msr_counter *msrp;
7520
7521 if (no_msr && msr_num)
7522 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
7523
7524 msrp = calloc(1, sizeof(struct msr_counter));
7525 if (msrp == NULL) {
7526 perror("calloc");
7527 exit(1);
7528 }
7529
7530 msrp->msr_num = msr_num;
7531 strncpy(msrp->name, name, NAME_BYTES - 1);
7532 if (path)
7533 strncpy(msrp->path, path, PATH_BYTES - 1);
7534 msrp->width = width;
7535 msrp->type = type;
7536 msrp->format = format;
7537 msrp->flags = flags;
7538
7539 switch (scope) {
7540
7541 case SCOPE_CPU:
7542 msrp->next = sys.tp;
7543 sys.tp = msrp;
7544 sys.added_thread_counters++;
7545 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
7546 fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS);
7547 exit(-1);
7548 }
7549 break;
7550
7551 case SCOPE_CORE:
7552 msrp->next = sys.cp;
7553 sys.cp = msrp;
7554 sys.added_core_counters++;
7555 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
7556 fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS);
7557 exit(-1);
7558 }
7559 break;
7560
7561 case SCOPE_PACKAGE:
7562 msrp->next = sys.pp;
7563 sys.pp = msrp;
7564 sys.added_package_counters++;
7565 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
7566 fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS);
7567 exit(-1);
7568 }
7569 break;
7570 }
7571
7572 return 0;
7573 }
7574
7575 void parse_add_command(char *add_command)
7576 {
7577 int msr_num = 0;
7578 char *path = NULL;
7579 char name_buffer[NAME_BYTES] = "";
7580 int width = 64;
7581 int fail = 0;
7582 enum counter_scope scope = SCOPE_CPU;
7583 enum counter_type type = COUNTER_CYCLES;
7584 enum counter_format format = FORMAT_DELTA;
7585
7586 while (add_command) {
7587
7588 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
7589 goto next;
7590
7591 if (sscanf(add_command, "msr%d", &msr_num) == 1)
7592 goto next;
7593
7594 if (*add_command == '/') {
7595 path = add_command;
7596 goto next;
7597 }
7598
7599 if (sscanf(add_command, "u%d", &width) == 1) {
7600 if ((width == 32) || (width == 64))
7601 goto next;
7602 width = 64;
7603 }
7604 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
7605 scope = SCOPE_CPU;
7606 goto next;
7607 }
7608 if (!strncmp(add_command, "core", strlen("core"))) {
7609 scope = SCOPE_CORE;
7610 goto next;
7611 }
7612 if (!strncmp(add_command, "package", strlen("package"))) {
7613 scope = SCOPE_PACKAGE;
7614 goto next;
7615 }
7616 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
7617 type = COUNTER_CYCLES;
7618 goto next;
7619 }
7620 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
7621 type = COUNTER_SECONDS;
7622 goto next;
7623 }
7624 if (!strncmp(add_command, "usec", strlen("usec"))) {
7625 type = COUNTER_USEC;
7626 goto next;
7627 }
7628 if (!strncmp(add_command, "raw", strlen("raw"))) {
7629 format = FORMAT_RAW;
7630 goto next;
7631 }
7632 if (!strncmp(add_command, "delta", strlen("delta"))) {
7633 format = FORMAT_DELTA;
7634 goto next;
7635 }
7636 if (!strncmp(add_command, "percent", strlen("percent"))) {
7637 format = FORMAT_PERCENT;
7638 goto next;
7639 }
7640
7641 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */
7642 char *eos;
7643
7644 eos = strchr(name_buffer, ',');
7645 if (eos)
7646 *eos = '\0';
7647 goto next;
7648 }
7649
7650 next:
7651 add_command = strchr(add_command, ',');
7652 if (add_command) {
7653 *add_command = '\0';
7654 add_command++;
7655 }
7656
7657 }
7658 if ((msr_num == 0) && (path == NULL)) {
7659 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
7660 fail++;
7661 }
7662
7663 /* generate default column header */
7664 if (*name_buffer == '\0') {
7665 if (width == 32)
7666 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
7667 else
7668 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
7669 }
7670
7671 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
7672 fail++;
7673
7674 if (fail) {
7675 help();
7676 exit(1);
7677 }
7678 }
7679
7680 int is_deferred_add(char *name)
7681 {
7682 int i;
7683
7684 for (i = 0; i < deferred_add_index; ++i)
7685 if (!strcmp(name, deferred_add_names[i]))
7686 return 1;
7687 return 0;
7688 }
7689
7690 int is_deferred_skip(char *name)
7691 {
7692 int i;
7693
7694 for (i = 0; i < deferred_skip_index; ++i)
7695 if (!strcmp(name, deferred_skip_names[i]))
7696 return 1;
7697 return 0;
7698 }
7699
7700 void probe_sysfs(void)
7701 {
7702 char path[64];
7703 char name_buf[16];
7704 FILE *input;
7705 int state;
7706 char *sp;
7707
7708 for (state = 10; state >= 0; --state) {
7709
7710 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
7711 input = fopen(path, "r");
7712 if (input == NULL)
7713 continue;
7714 if (!fgets(name_buf, sizeof(name_buf), input))
7715 err(1, "%s: failed to read file", path);
7716
7717 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
7718 sp = strchr(name_buf, '-');
7719 if (!sp)
7720 sp = strchrnul(name_buf, '\n');
7721 *sp = '%';
7722 *(sp + 1) = '\0';
7723
7724 remove_underbar(name_buf);
7725
7726 fclose(input);
7727
7728 sprintf(path, "cpuidle/state%d/time", state);
7729
7730 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
7731 continue;
7732
7733 if (is_deferred_skip(name_buf))
7734 continue;
7735
7736 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU);
7737 }
7738
7739 for (state = 10; state >= 0; --state) {
7740
7741 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
7742 input = fopen(path, "r");
7743 if (input == NULL)
7744 continue;
7745 if (!fgets(name_buf, sizeof(name_buf), input))
7746 err(1, "%s: failed to read file", path);
7747 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
7748 sp = strchr(name_buf, '-');
7749 if (!sp)
7750 sp = strchrnul(name_buf, '\n');
7751 *sp = '\0';
7752 fclose(input);
7753
7754 remove_underbar(name_buf);
7755
7756 sprintf(path, "cpuidle/state%d/usage", state);
7757
7758 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
7759 continue;
7760
7761 if (is_deferred_skip(name_buf))
7762 continue;
7763
7764 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU);
7765 }
7766
7767 }
7768
7769 /*
7770 * parse cpuset with following syntax
7771 * 1,2,4..6,8-10 and set bits in cpu_subset
7772 */
7773 void parse_cpu_command(char *optarg)
7774 {
7775 if (!strcmp(optarg, "core")) {
7776 if (cpu_subset)
7777 goto error;
7778 show_core_only++;
7779 return;
7780 }
7781 if (!strcmp(optarg, "package")) {
7782 if (cpu_subset)
7783 goto error;
7784 show_pkg_only++;
7785 return;
7786 }
7787 if (show_core_only || show_pkg_only)
7788 goto error;
7789
7790 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
7791 if (cpu_subset == NULL)
7792 err(3, "CPU_ALLOC");
7793 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
7794
7795 CPU_ZERO_S(cpu_subset_size, cpu_subset);
7796
7797 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
7798 goto error;
7799
7800 return;
7801
7802 error:
7803 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
7804 help();
7805 exit(-1);
7806 }
7807
7808 void cmdline(int argc, char **argv)
7809 {
7810 int opt;
7811 int option_index = 0;
7812 static struct option long_options[] = {
7813 { "add", required_argument, 0, 'a' },
7814 { "cpu", required_argument, 0, 'c' },
7815 { "Dump", no_argument, 0, 'D' },
7816 { "debug", no_argument, 0, 'd' }, /* internal, not documented */
7817 { "enable", required_argument, 0, 'e' },
7818 { "interval", required_argument, 0, 'i' },
7819 { "IPC", no_argument, 0, 'I' },
7820 { "num_iterations", required_argument, 0, 'n' },
7821 { "header_iterations", required_argument, 0, 'N' },
7822 { "help", no_argument, 0, 'h' },
7823 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
7824 { "Joules", no_argument, 0, 'J' },
7825 { "list", no_argument, 0, 'l' },
7826 { "out", required_argument, 0, 'o' },
7827 { "quiet", no_argument, 0, 'q' },
7828 { "no-msr", no_argument, 0, 'M' },
7829 { "no-perf", no_argument, 0, 'P' },
7830 { "show", required_argument, 0, 's' },
7831 { "Summary", no_argument, 0, 'S' },
7832 { "TCC", required_argument, 0, 'T' },
7833 { "version", no_argument, 0, 'v' },
7834 { 0, 0, 0, 0 }
7835 };
7836
7837 progname = argv[0];
7838
7839 /*
7840 * Parse some options early, because they may make other options invalid,
7841 * like adding the MSR counter with --add and at the same time using --no-msr.
7842 */
7843 while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
7844 switch (opt) {
7845 case 'M':
7846 no_msr = 1;
7847 break;
7848 case 'P':
7849 no_perf = 1;
7850 break;
7851 default:
7852 break;
7853 }
7854 }
7855 optind = 0;
7856
7857 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
7858 switch (opt) {
7859 case 'a':
7860 parse_add_command(optarg);
7861 break;
7862 case 'c':
7863 parse_cpu_command(optarg);
7864 break;
7865 case 'D':
7866 dump_only++;
7867 break;
7868 case 'e':
7869 /* --enable specified counter */
7870 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
7871 break;
7872 case 'd':
7873 debug++;
7874 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
7875 break;
7876 case 'H':
7877 /*
7878 * --hide: do not show those specified
7879 * multiple invocations simply clear more bits in enabled mask
7880 */
7881 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
7882 break;
7883 case 'h':
7884 default:
7885 help();
7886 exit(1);
7887 case 'i':
7888 {
7889 double interval = strtod(optarg, NULL);
7890
7891 if (interval < 0.001) {
7892 fprintf(outf, "interval %f seconds is too small\n", interval);
7893 exit(2);
7894 }
7895
7896 interval_tv.tv_sec = interval_ts.tv_sec = interval;
7897 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
7898 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
7899 }
7900 break;
7901 case 'J':
7902 rapl_joules++;
7903 break;
7904 case 'l':
7905 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
7906 list_header_only++;
7907 quiet++;
7908 break;
7909 case 'o':
7910 outf = fopen_or_die(optarg, "w");
7911 break;
7912 case 'q':
7913 quiet = 1;
7914 break;
7915 case 'M':
7916 case 'P':
7917 /* Parsed earlier */
7918 break;
7919 case 'n':
7920 num_iterations = strtod(optarg, NULL);
7921
7922 if (num_iterations <= 0) {
7923 fprintf(outf, "iterations %d should be positive number\n", num_iterations);
7924 exit(2);
7925 }
7926 break;
7927 case 'N':
7928 header_iterations = strtod(optarg, NULL);
7929
7930 if (header_iterations <= 0) {
7931 fprintf(outf, "iterations %d should be positive number\n", header_iterations);
7932 exit(2);
7933 }
7934 break;
7935 case 's':
7936 /*
7937 * --show: show only those specified
7938 * The 1st invocation will clear and replace the enabled mask
7939 * subsequent invocations can add to it.
7940 */
7941 if (shown == 0)
7942 bic_enabled = bic_lookup(optarg, SHOW_LIST);
7943 else
7944 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
7945 shown = 1;
7946 break;
7947 case 'S':
7948 summary_only++;
7949 break;
7950 case 'T':
7951 tj_max_override = atoi(optarg);
7952 break;
7953 case 'v':
7954 print_version();
7955 exit(0);
7956 break;
7957 }
7958 }
7959 }
7960
7961 void set_rlimit(void)
7962 {
7963 struct rlimit limit;
7964
7965 if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
7966 err(1, "Failed to get rlimit");
7967
7968 if (limit.rlim_max < MAX_NOFILE)
7969 limit.rlim_max = MAX_NOFILE;
7970 if (limit.rlim_cur < MAX_NOFILE)
7971 limit.rlim_cur = MAX_NOFILE;
7972
7973 if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
7974 err(1, "Failed to set rlimit");
7975 }
7976
7977 int main(int argc, char **argv)
7978 {
7979 int fd, ret;
7980
7981 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
7982 if (fd < 0)
7983 goto skip_cgroup_setting;
7984
7985 ret = write(fd, "0\n", 2);
7986 if (ret == -1)
7987 perror("Can't update cgroup\n");
7988
7989 close(fd);
7990
7991 skip_cgroup_setting:
7992 outf = stderr;
7993 cmdline(argc, argv);
7994
7995 if (!quiet) {
7996 print_version();
7997 print_bootcmd();
7998 }
7999
8000 probe_sysfs();
8001
8002 if (!getuid())
8003 set_rlimit();
8004
8005 turbostat_init();
8006
8007 if (!no_msr)
8008 msr_sum_record();
8009
8010 /* dump counters and exit */
8011 if (dump_only)
8012 return get_and_dump_counters();
8013
8014 /* list header and exit */
8015 if (list_header_only) {
8016 print_header(",");
8017 flush_output_stdout();
8018 return 0;
8019 }
8020
8021 /*
8022 * if any params left, it must be a command to fork
8023 */
8024 if (argc - optind)
8025 return fork_it(argv + optind);
8026 else
8027 turbostat_loop();
8028
8029 return 0;
8030 }