1 // SPDX-License-Identifier: GPL-2.0-only
3 * turbostat -- show CPU frequency and C-state residency
4 * on modern Intel and AMD processors.
6 * Copyright (c) 2023 Intel Corporation.
7 * Len Brown <len.brown@intel.com>
12 #include INTEL_FAMILY_HEADER
17 #include <sys/types.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
33 #include <sys/capability.h>
36 #include <linux/perf_event.h>
37 #include <asm/unistd.h>
40 #include <linux/kernel.h>
42 #define UNUSED(x) (void)(x)
45 * This list matches the column headers, except
46 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
47 * 2. Core and CPU are moved to the end, we can't have strings that contain them
48 * matching on them for --show and --hide.
52 * buffer size used by sscanf() for added column names
53 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
56 #define PATH_BYTES 128
58 #define MAX_NOFILE 0x8000
60 enum counter_scope
{ SCOPE_CPU
, SCOPE_CORE
, SCOPE_PACKAGE
};
61 enum counter_type
{ COUNTER_ITEMS
, COUNTER_CYCLES
, COUNTER_SECONDS
, COUNTER_USEC
};
62 enum counter_format
{ FORMAT_RAW
, FORMAT_DELTA
, FORMAT_PERCENT
};
63 enum amperf_source
{ AMPERF_SOURCE_PERF
, AMPERF_SOURCE_MSR
};
64 enum rapl_source
{ RAPL_SOURCE_NONE
, RAPL_SOURCE_PERF
, RAPL_SOURCE_MSR
};
68 char name
[NAME_BYTES
];
69 char path
[PATH_BYTES
];
71 enum counter_type type
;
72 enum counter_format format
;
73 struct msr_counter
*next
;
75 #define FLAGS_HIDE (1 << 0)
76 #define FLAGS_SHOW (1 << 1)
77 #define SYSFS_PERCPU (1 << 1)
80 struct msr_counter bic
[] = {
81 { 0x0, "usec", "", 0, 0, 0, NULL
, 0 },
82 { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL
, 0 },
83 { 0x0, "Package", "", 0, 0, 0, NULL
, 0 },
84 { 0x0, "Node", "", 0, 0, 0, NULL
, 0 },
85 { 0x0, "Avg_MHz", "", 0, 0, 0, NULL
, 0 },
86 { 0x0, "Busy%", "", 0, 0, 0, NULL
, 0 },
87 { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL
, 0 },
88 { 0x0, "TSC_MHz", "", 0, 0, 0, NULL
, 0 },
89 { 0x0, "IRQ", "", 0, 0, 0, NULL
, 0 },
90 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA
, NULL
, 0 },
91 { 0x0, "sysfs", "", 0, 0, 0, NULL
, 0 },
92 { 0x0, "CPU%c1", "", 0, 0, 0, NULL
, 0 },
93 { 0x0, "CPU%c3", "", 0, 0, 0, NULL
, 0 },
94 { 0x0, "CPU%c6", "", 0, 0, 0, NULL
, 0 },
95 { 0x0, "CPU%c7", "", 0, 0, 0, NULL
, 0 },
96 { 0x0, "ThreadC", "", 0, 0, 0, NULL
, 0 },
97 { 0x0, "CoreTmp", "", 0, 0, 0, NULL
, 0 },
98 { 0x0, "CoreCnt", "", 0, 0, 0, NULL
, 0 },
99 { 0x0, "PkgTmp", "", 0, 0, 0, NULL
, 0 },
100 { 0x0, "GFX%rc6", "", 0, 0, 0, NULL
, 0 },
101 { 0x0, "GFXMHz", "", 0, 0, 0, NULL
, 0 },
102 { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL
, 0 },
103 { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL
, 0 },
104 { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL
, 0 },
105 { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL
, 0 },
106 { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL
, 0 },
107 { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL
, 0 },
108 { 0x0, "Pk%pc10", "", 0, 0, 0, NULL
, 0 },
109 { 0x0, "CPU%LPI", "", 0, 0, 0, NULL
, 0 },
110 { 0x0, "SYS%LPI", "", 0, 0, 0, NULL
, 0 },
111 { 0x0, "PkgWatt", "", 0, 0, 0, NULL
, 0 },
112 { 0x0, "CorWatt", "", 0, 0, 0, NULL
, 0 },
113 { 0x0, "GFXWatt", "", 0, 0, 0, NULL
, 0 },
114 { 0x0, "PkgCnt", "", 0, 0, 0, NULL
, 0 },
115 { 0x0, "RAMWatt", "", 0, 0, 0, NULL
, 0 },
116 { 0x0, "PKG_%", "", 0, 0, 0, NULL
, 0 },
117 { 0x0, "RAM_%", "", 0, 0, 0, NULL
, 0 },
118 { 0x0, "Pkg_J", "", 0, 0, 0, NULL
, 0 },
119 { 0x0, "Cor_J", "", 0, 0, 0, NULL
, 0 },
120 { 0x0, "GFX_J", "", 0, 0, 0, NULL
, 0 },
121 { 0x0, "RAM_J", "", 0, 0, 0, NULL
, 0 },
122 { 0x0, "Mod%c6", "", 0, 0, 0, NULL
, 0 },
123 { 0x0, "Totl%C0", "", 0, 0, 0, NULL
, 0 },
124 { 0x0, "Any%C0", "", 0, 0, 0, NULL
, 0 },
125 { 0x0, "GFX%C0", "", 0, 0, 0, NULL
, 0 },
126 { 0x0, "CPUGFX%", "", 0, 0, 0, NULL
, 0 },
127 { 0x0, "Core", "", 0, 0, 0, NULL
, 0 },
128 { 0x0, "CPU", "", 0, 0, 0, NULL
, 0 },
129 { 0x0, "APIC", "", 0, 0, 0, NULL
, 0 },
130 { 0x0, "X2APIC", "", 0, 0, 0, NULL
, 0 },
131 { 0x0, "Die", "", 0, 0, 0, NULL
, 0 },
132 { 0x0, "GFXAMHz", "", 0, 0, 0, NULL
, 0 },
133 { 0x0, "IPC", "", 0, 0, 0, NULL
, 0 },
134 { 0x0, "CoreThr", "", 0, 0, 0, NULL
, 0 },
135 { 0x0, "UncMHz", "", 0, 0, 0, NULL
, 0 },
136 { 0x0, "SAM%mc6", "", 0, 0, 0, NULL
, 0 },
137 { 0x0, "SAMMHz", "", 0, 0, 0, NULL
, 0 },
138 { 0x0, "SAMAMHz", "", 0, 0, 0, NULL
, 0 },
141 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
142 #define BIC_USEC (1ULL << 0)
143 #define BIC_TOD (1ULL << 1)
144 #define BIC_Package (1ULL << 2)
145 #define BIC_Node (1ULL << 3)
146 #define BIC_Avg_MHz (1ULL << 4)
147 #define BIC_Busy (1ULL << 5)
148 #define BIC_Bzy_MHz (1ULL << 6)
149 #define BIC_TSC_MHz (1ULL << 7)
150 #define BIC_IRQ (1ULL << 8)
151 #define BIC_SMI (1ULL << 9)
152 #define BIC_sysfs (1ULL << 10)
153 #define BIC_CPU_c1 (1ULL << 11)
154 #define BIC_CPU_c3 (1ULL << 12)
155 #define BIC_CPU_c6 (1ULL << 13)
156 #define BIC_CPU_c7 (1ULL << 14)
157 #define BIC_ThreadC (1ULL << 15)
158 #define BIC_CoreTmp (1ULL << 16)
159 #define BIC_CoreCnt (1ULL << 17)
160 #define BIC_PkgTmp (1ULL << 18)
161 #define BIC_GFX_rc6 (1ULL << 19)
162 #define BIC_GFXMHz (1ULL << 20)
163 #define BIC_Pkgpc2 (1ULL << 21)
164 #define BIC_Pkgpc3 (1ULL << 22)
165 #define BIC_Pkgpc6 (1ULL << 23)
166 #define BIC_Pkgpc7 (1ULL << 24)
167 #define BIC_Pkgpc8 (1ULL << 25)
168 #define BIC_Pkgpc9 (1ULL << 26)
169 #define BIC_Pkgpc10 (1ULL << 27)
170 #define BIC_CPU_LPI (1ULL << 28)
171 #define BIC_SYS_LPI (1ULL << 29)
172 #define BIC_PkgWatt (1ULL << 30)
173 #define BIC_CorWatt (1ULL << 31)
174 #define BIC_GFXWatt (1ULL << 32)
175 #define BIC_PkgCnt (1ULL << 33)
176 #define BIC_RAMWatt (1ULL << 34)
177 #define BIC_PKG__ (1ULL << 35)
178 #define BIC_RAM__ (1ULL << 36)
179 #define BIC_Pkg_J (1ULL << 37)
180 #define BIC_Cor_J (1ULL << 38)
181 #define BIC_GFX_J (1ULL << 39)
182 #define BIC_RAM_J (1ULL << 40)
183 #define BIC_Mod_c6 (1ULL << 41)
184 #define BIC_Totl_c0 (1ULL << 42)
185 #define BIC_Any_c0 (1ULL << 43)
186 #define BIC_GFX_c0 (1ULL << 44)
187 #define BIC_CPUGFX (1ULL << 45)
188 #define BIC_Core (1ULL << 46)
189 #define BIC_CPU (1ULL << 47)
190 #define BIC_APIC (1ULL << 48)
191 #define BIC_X2APIC (1ULL << 49)
192 #define BIC_Die (1ULL << 50)
193 #define BIC_GFXACTMHz (1ULL << 51)
194 #define BIC_IPC (1ULL << 52)
195 #define BIC_CORE_THROT_CNT (1ULL << 53)
196 #define BIC_UNCORE_MHZ (1ULL << 54)
197 #define BIC_SAM_mc6 (1ULL << 55)
198 #define BIC_SAMMHz (1ULL << 56)
199 #define BIC_SAMACTMHz (1ULL << 57)
201 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
202 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
203 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
204 #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
205 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
207 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
209 unsigned long long bic_enabled
= (0xFFFFFFFFFFFFFFFFULL
& ~BIC_DISABLED_BY_DEFAULT
);
210 unsigned long long bic_present
= BIC_USEC
| BIC_TOD
| BIC_sysfs
| BIC_APIC
| BIC_X2APIC
;
212 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
213 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
214 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
215 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
216 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
217 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
219 struct amperf_group_fd
;
221 char *proc_stat
= "/proc/stat";
224 int *fd_instr_count_percpu
;
225 struct amperf_group_fd
*fd_amperf_percpu
; /* File descriptors for perf group with APERF and MPERF counters. */
226 struct timeval interval_tv
= { 5, 0 };
227 struct timespec interval_ts
= { 5, 0 };
229 unsigned int num_iterations
;
230 unsigned int header_iterations
;
234 unsigned int sums_need_wide_columns
;
235 unsigned int rapl_joules
;
236 unsigned int summary_only
;
237 unsigned int list_header_only
;
238 unsigned int dump_only
;
239 unsigned int has_aperf
;
240 unsigned int has_epb
;
241 unsigned int has_turbo
;
242 unsigned int is_hybrid
;
243 unsigned int units
= 1000000; /* MHz etc */
244 unsigned int genuine_intel
;
245 unsigned int authentic_amd
;
246 unsigned int hygon_genuine
;
247 unsigned int max_level
, max_extended_level
;
248 unsigned int has_invariant_tsc
;
249 unsigned int aperf_mperf_multiplier
= 1;
252 unsigned int has_base_hz
;
253 double tsc_tweak
= 1.0;
254 unsigned int show_pkg_only
;
255 unsigned int show_core_only
;
256 char *output_buffer
, *outp
;
260 unsigned long long cpuidle_cur_cpu_lpi_us
;
261 unsigned long long cpuidle_cur_sys_lpi_us
;
263 unsigned int tj_max_override
;
264 double rapl_power_units
, rapl_time_units
;
265 double rapl_dram_energy_units
, rapl_energy_units
;
266 double rapl_joule_counter_range
;
267 unsigned int crystal_hz
;
268 unsigned long long tsc_hz
;
270 unsigned int has_hwp
; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
271 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
272 unsigned int has_hwp_notify
; /* IA32_HWP_INTERRUPT */
273 unsigned int has_hwp_activity_window
; /* IA32_HWP_REQUEST[bits 41:32] */
274 unsigned int has_hwp_epp
; /* IA32_HWP_REQUEST[bits 31:24] */
275 unsigned int has_hwp_pkg
; /* IA32_HWP_REQUEST_PKG */
276 unsigned int first_counter_read
= 1;
280 enum amperf_source amperf_source
;
292 struct gfx_sysfs_info
{
296 unsigned long long val_ull
;
299 static struct gfx_sysfs_info gfx_info
[GFX_MAX
];
301 int get_msr(int cpu
, off_t offset
, unsigned long long *msr
);
303 /* Model specific support Start */
305 /* List of features that may diverge among different platforms */
306 struct platform_features
{
307 bool has_msr_misc_feature_control
; /* MSR_MISC_FEATURE_CONTROL */
308 bool has_msr_misc_pwr_mgmt
; /* MSR_MISC_PWR_MGMT */
309 bool has_nhm_msrs
; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
310 bool has_config_tdp
; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
311 int bclk_freq
; /* CPU base clock */
312 int crystal_freq
; /* Crystal clock to use when not available from CPUID.15 */
313 int supported_cstates
; /* Core cstates and Package cstates supported */
314 int cst_limit
; /* MSR_PKG_CST_CONFIG_CONTROL */
315 bool has_cst_auto_convension
; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
316 bool has_irtl_msrs
; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
317 bool has_msr_core_c1_res
; /* MSR_CORE_C1_RES */
318 bool has_msr_module_c6_res_ms
; /* MSR_MODULE_C6_RES_MS */
319 bool has_msr_c6_demotion_policy_config
; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
320 bool has_msr_atom_pkg_c6_residency
; /* MSR_ATOM_PKG_C6_RESIDENCY */
321 bool has_msr_knl_core_c6_residency
; /* MSR_KNL_CORE_C6_RESIDENCY */
322 bool has_ext_cst_msrs
; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
323 bool has_cst_prewake_bit
; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
324 int trl_msrs
; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
325 int plr_msrs
; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
326 int rapl_msrs
; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
327 bool has_per_core_rapl
; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
328 bool has_rapl_divisor
; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
329 bool has_fixed_rapl_unit
; /* Fixed Energy Unit used for DRAM RAPL Domain */
330 int rapl_quirk_tdp
; /* Hardcoded TDP value when cannot be retrieved from hardware */
331 int tcc_offset_bits
; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
332 bool enable_tsc_tweak
; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
333 bool need_perf_multiplier
; /* mperf/aperf multiplier */
336 struct platform_data
{
338 const struct platform_features
*features
;
348 #define SLM_BCLK_FREQS 5
349 double slm_freq_table
[SLM_BCLK_FREQS
] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
351 double slm_bclk(void)
353 unsigned long long msr
= 3;
357 if (get_msr(base_cpu
, MSR_FSB_FREQ
, &msr
))
358 fprintf(outf
, "SLM BCLK: unknown\n");
361 if (i
>= SLM_BCLK_FREQS
) {
362 fprintf(outf
, "SLM BCLK[%d] invalid\n", i
);
365 freq
= slm_freq_table
[i
];
368 fprintf(outf
, "SLM BCLK: %.1f Mhz\n", freq
);
373 /* For Package cstate limit */
374 enum package_cstate_limit
{
386 /* For Turbo Ratio Limit MSRs */
387 enum turbo_ratio_limit_msrs
{
393 TRL_CORECOUNT
= BIT(5),
396 /* For Perf Limit Reason MSRs */
397 enum perf_limit_reason_msrs
{
405 RAPL_PKG_POWER_LIMIT
= BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
406 RAPL_PKG_ENERGY_STATUS
= BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
407 RAPL_PKG_PERF_STATUS
= BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
408 RAPL_PKG_POWER_INFO
= BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
409 RAPL_DRAM_POWER_LIMIT
= BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
410 RAPL_DRAM_ENERGY_STATUS
= BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
411 RAPL_DRAM_PERF_STATUS
= BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
412 RAPL_DRAM_POWER_INFO
= BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
413 RAPL_CORE_POWER_LIMIT
= BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
414 RAPL_CORE_ENERGY_STATUS
= BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
415 RAPL_CORE_POLICY
= BIT(10), /* 0x63a MSR_PP0_POLICY */
416 RAPL_GFX_POWER_LIMIT
= BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
417 RAPL_GFX_ENERGY_STATUS
= BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
418 RAPL_GFX_POLICY
= BIT(13), /* 0x642 MSR_PP1_POLICY */
419 RAPL_AMD_PWR_UNIT
= BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
420 RAPL_AMD_CORE_ENERGY_STAT
= BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
421 RAPL_AMD_PKG_ENERGY_STAT
= BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
424 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
425 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
426 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
427 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
429 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
430 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
431 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
432 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
434 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
451 static const struct platform_features nhm_features
= {
452 .has_msr_misc_pwr_mgmt
= 1,
454 .bclk_freq
= BCLK_133MHZ
,
455 .supported_cstates
= CC1
| CC3
| CC6
| PC3
| PC6
,
456 .cst_limit
= CST_LIMIT_NHM
,
457 .trl_msrs
= TRL_BASE
,
460 static const struct platform_features nhx_features
= {
461 .has_msr_misc_pwr_mgmt
= 1,
463 .bclk_freq
= BCLK_133MHZ
,
464 .supported_cstates
= CC1
| CC3
| CC6
| PC3
| PC6
,
465 .cst_limit
= CST_LIMIT_NHM
,
468 static const struct platform_features snb_features
= {
469 .has_msr_misc_feature_control
= 1,
470 .has_msr_misc_pwr_mgmt
= 1,
472 .bclk_freq
= BCLK_100MHZ
,
473 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
474 .cst_limit
= CST_LIMIT_SNB
,
476 .trl_msrs
= TRL_BASE
,
477 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
480 static const struct platform_features snx_features
= {
481 .has_msr_misc_feature_control
= 1,
482 .has_msr_misc_pwr_mgmt
= 1,
484 .bclk_freq
= BCLK_100MHZ
,
485 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
486 .cst_limit
= CST_LIMIT_SNB
,
488 .trl_msrs
= TRL_BASE
,
489 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_CORE_ALL
| RAPL_DRAM_ALL
,
492 static const struct platform_features ivb_features
= {
493 .has_msr_misc_feature_control
= 1,
494 .has_msr_misc_pwr_mgmt
= 1,
497 .bclk_freq
= BCLK_100MHZ
,
498 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
499 .cst_limit
= CST_LIMIT_SNB
,
501 .trl_msrs
= TRL_BASE
,
502 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
505 static const struct platform_features ivx_features
= {
506 .has_msr_misc_feature_control
= 1,
507 .has_msr_misc_pwr_mgmt
= 1,
509 .bclk_freq
= BCLK_100MHZ
,
510 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
511 .cst_limit
= CST_LIMIT_SNB
,
513 .trl_msrs
= TRL_BASE
| TRL_LIMIT1
,
514 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_CORE_ALL
| RAPL_DRAM_ALL
,
517 static const struct platform_features hsw_features
= {
518 .has_msr_misc_feature_control
= 1,
519 .has_msr_misc_pwr_mgmt
= 1,
522 .bclk_freq
= BCLK_100MHZ
,
523 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
524 .cst_limit
= CST_LIMIT_HSW
,
526 .trl_msrs
= TRL_BASE
,
527 .plr_msrs
= PLR_CORE
| PLR_GFX
| PLR_RING
,
528 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
531 static const struct platform_features hsx_features
= {
532 .has_msr_misc_feature_control
= 1,
533 .has_msr_misc_pwr_mgmt
= 1,
536 .bclk_freq
= BCLK_100MHZ
,
537 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
538 .cst_limit
= CST_LIMIT_HSW
,
540 .trl_msrs
= TRL_BASE
| TRL_LIMIT1
| TRL_LIMIT2
,
541 .plr_msrs
= PLR_CORE
| PLR_RING
,
542 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
543 .has_fixed_rapl_unit
= 1,
546 static const struct platform_features hswl_features
= {
547 .has_msr_misc_feature_control
= 1,
548 .has_msr_misc_pwr_mgmt
= 1,
551 .bclk_freq
= BCLK_100MHZ
,
552 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
553 .cst_limit
= CST_LIMIT_HSW
,
555 .trl_msrs
= TRL_BASE
,
556 .plr_msrs
= PLR_CORE
| PLR_GFX
| PLR_RING
,
557 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
560 static const struct platform_features hswg_features
= {
561 .has_msr_misc_feature_control
= 1,
562 .has_msr_misc_pwr_mgmt
= 1,
565 .bclk_freq
= BCLK_100MHZ
,
566 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
567 .cst_limit
= CST_LIMIT_HSW
,
569 .trl_msrs
= TRL_BASE
,
570 .plr_msrs
= PLR_CORE
| PLR_GFX
| PLR_RING
,
571 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
574 static const struct platform_features bdw_features
= {
575 .has_msr_misc_feature_control
= 1,
576 .has_msr_misc_pwr_mgmt
= 1,
579 .bclk_freq
= BCLK_100MHZ
,
580 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
581 .cst_limit
= CST_LIMIT_HSW
,
583 .trl_msrs
= TRL_BASE
,
584 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
587 static const struct platform_features bdwg_features
= {
588 .has_msr_misc_feature_control
= 1,
589 .has_msr_misc_pwr_mgmt
= 1,
592 .bclk_freq
= BCLK_100MHZ
,
593 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
,
594 .cst_limit
= CST_LIMIT_HSW
,
596 .trl_msrs
= TRL_BASE
,
597 .rapl_msrs
= RAPL_PKG
| RAPL_CORE_ALL
| RAPL_GFX
| RAPL_PKG_POWER_INFO
,
600 static const struct platform_features bdx_features
= {
601 .has_msr_misc_feature_control
= 1,
602 .has_msr_misc_pwr_mgmt
= 1,
605 .bclk_freq
= BCLK_100MHZ
,
606 .supported_cstates
= CC1
| CC3
| CC6
| PC2
| PC3
| PC6
,
607 .cst_limit
= CST_LIMIT_HSW
,
609 .has_cst_auto_convension
= 1,
610 .trl_msrs
= TRL_BASE
,
611 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
612 .has_fixed_rapl_unit
= 1,
615 static const struct platform_features skl_features
= {
616 .has_msr_misc_feature_control
= 1,
617 .has_msr_misc_pwr_mgmt
= 1,
620 .bclk_freq
= BCLK_100MHZ
,
621 .crystal_freq
= 24000000,
622 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
623 .cst_limit
= CST_LIMIT_HSW
,
625 .has_ext_cst_msrs
= 1,
626 .trl_msrs
= TRL_BASE
,
627 .tcc_offset_bits
= 6,
628 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_CORE_ALL
| RAPL_DRAM
| RAPL_DRAM_PERF_STATUS
| RAPL_GFX
,
629 .enable_tsc_tweak
= 1,
632 static const struct platform_features cnl_features
= {
633 .has_msr_misc_feature_control
= 1,
634 .has_msr_misc_pwr_mgmt
= 1,
637 .bclk_freq
= BCLK_100MHZ
,
638 .supported_cstates
= CC1
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
639 .cst_limit
= CST_LIMIT_HSW
,
641 .has_msr_core_c1_res
= 1,
642 .has_ext_cst_msrs
= 1,
643 .trl_msrs
= TRL_BASE
,
644 .tcc_offset_bits
= 6,
645 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_CORE_ALL
| RAPL_DRAM
| RAPL_DRAM_PERF_STATUS
| RAPL_GFX
,
646 .enable_tsc_tweak
= 1,
649 static const struct platform_features adl_features
= {
650 .has_msr_misc_feature_control
= 1,
651 .has_msr_misc_pwr_mgmt
= 1,
654 .bclk_freq
= BCLK_100MHZ
,
655 .supported_cstates
= CC1
| CC6
| CC7
| PC2
| PC3
| PC6
| PC8
| PC10
,
656 .cst_limit
= CST_LIMIT_HSW
,
658 .has_msr_core_c1_res
= 1,
659 .has_ext_cst_msrs
= 1,
660 .trl_msrs
= TRL_BASE
,
661 .tcc_offset_bits
= 6,
662 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_CORE_ALL
| RAPL_DRAM
| RAPL_DRAM_PERF_STATUS
| RAPL_GFX
,
663 .enable_tsc_tweak
= 1,
666 static const struct platform_features skx_features
= {
667 .has_msr_misc_feature_control
= 1,
668 .has_msr_misc_pwr_mgmt
= 1,
671 .bclk_freq
= BCLK_100MHZ
,
672 .supported_cstates
= CC1
| CC6
| PC2
| PC6
,
673 .cst_limit
= CST_LIMIT_SKX
,
675 .has_cst_auto_convension
= 1,
676 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
677 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
678 .has_fixed_rapl_unit
= 1,
681 static const struct platform_features icx_features
= {
682 .has_msr_misc_feature_control
= 1,
683 .has_msr_misc_pwr_mgmt
= 1,
686 .bclk_freq
= BCLK_100MHZ
,
687 .supported_cstates
= CC1
| CC6
| PC2
| PC6
,
688 .cst_limit
= CST_LIMIT_ICX
,
689 .has_msr_core_c1_res
= 1,
691 .has_cst_prewake_bit
= 1,
692 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
693 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
694 .has_fixed_rapl_unit
= 1,
697 static const struct platform_features spr_features
= {
698 .has_msr_misc_feature_control
= 1,
699 .has_msr_misc_pwr_mgmt
= 1,
702 .bclk_freq
= BCLK_100MHZ
,
703 .supported_cstates
= CC1
| CC6
| PC2
| PC6
,
704 .cst_limit
= CST_LIMIT_SKX
,
705 .has_msr_core_c1_res
= 1,
707 .has_cst_prewake_bit
= 1,
708 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
709 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
712 static const struct platform_features srf_features
= {
713 .has_msr_misc_feature_control
= 1,
714 .has_msr_misc_pwr_mgmt
= 1,
717 .bclk_freq
= BCLK_100MHZ
,
718 .supported_cstates
= CC1
| CC6
| PC2
| PC6
,
719 .cst_limit
= CST_LIMIT_SKX
,
720 .has_msr_core_c1_res
= 1,
721 .has_msr_module_c6_res_ms
= 1,
723 .has_cst_prewake_bit
= 1,
724 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
725 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
728 static const struct platform_features grr_features
= {
729 .has_msr_misc_feature_control
= 1,
730 .has_msr_misc_pwr_mgmt
= 1,
733 .bclk_freq
= BCLK_100MHZ
,
734 .supported_cstates
= CC1
| CC6
,
735 .cst_limit
= CST_LIMIT_SKX
,
736 .has_msr_core_c1_res
= 1,
737 .has_msr_module_c6_res_ms
= 1,
739 .has_cst_prewake_bit
= 1,
740 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
741 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
744 static const struct platform_features slv_features
= {
746 .bclk_freq
= BCLK_SLV
,
747 .supported_cstates
= CC1
| CC6
| PC6
,
748 .cst_limit
= CST_LIMIT_SLV
,
749 .has_msr_core_c1_res
= 1,
750 .has_msr_module_c6_res_ms
= 1,
751 .has_msr_c6_demotion_policy_config
= 1,
752 .has_msr_atom_pkg_c6_residency
= 1,
753 .trl_msrs
= TRL_ATOM
,
754 .rapl_msrs
= RAPL_PKG
| RAPL_CORE
,
755 .has_rapl_divisor
= 1,
756 .rapl_quirk_tdp
= 30,
759 static const struct platform_features slvd_features
= {
760 .has_msr_misc_pwr_mgmt
= 1,
762 .bclk_freq
= BCLK_SLV
,
763 .supported_cstates
= CC1
| CC6
| PC3
| PC6
,
764 .cst_limit
= CST_LIMIT_SLV
,
765 .has_msr_atom_pkg_c6_residency
= 1,
766 .trl_msrs
= TRL_BASE
,
767 .rapl_msrs
= RAPL_PKG
| RAPL_CORE
,
768 .rapl_quirk_tdp
= 30,
771 static const struct platform_features amt_features
= {
773 .bclk_freq
= BCLK_133MHZ
,
774 .supported_cstates
= CC1
| CC3
| CC6
| PC3
| PC6
,
775 .cst_limit
= CST_LIMIT_AMT
,
776 .trl_msrs
= TRL_BASE
,
779 static const struct platform_features gmt_features
= {
780 .has_msr_misc_pwr_mgmt
= 1,
782 .bclk_freq
= BCLK_100MHZ
,
783 .crystal_freq
= 19200000,
784 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
785 .cst_limit
= CST_LIMIT_GMT
,
787 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
788 .rapl_msrs
= RAPL_PKG
| RAPL_PKG_POWER_INFO
,
791 static const struct platform_features gmtd_features
= {
792 .has_msr_misc_pwr_mgmt
= 1,
794 .bclk_freq
= BCLK_100MHZ
,
795 .crystal_freq
= 25000000,
796 .supported_cstates
= CC1
| CC6
| PC2
| PC6
,
797 .cst_limit
= CST_LIMIT_GMT
,
799 .has_msr_core_c1_res
= 1,
800 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
801 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
| RAPL_CORE_ENERGY_STATUS
,
804 static const struct platform_features gmtp_features
= {
805 .has_msr_misc_pwr_mgmt
= 1,
807 .bclk_freq
= BCLK_100MHZ
,
808 .crystal_freq
= 19200000,
809 .supported_cstates
= CC1
| CC3
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
810 .cst_limit
= CST_LIMIT_GMT
,
812 .trl_msrs
= TRL_BASE
,
813 .rapl_msrs
= RAPL_PKG
| RAPL_PKG_POWER_INFO
,
816 static const struct platform_features tmt_features
= {
817 .has_msr_misc_pwr_mgmt
= 1,
819 .bclk_freq
= BCLK_100MHZ
,
820 .supported_cstates
= CC1
| CC6
| CC7
| PC2
| PC3
| PC6
| PC7
| PC8
| PC9
| PC10
,
821 .cst_limit
= CST_LIMIT_GMT
,
823 .trl_msrs
= TRL_BASE
,
824 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_CORE_ALL
| RAPL_DRAM
| RAPL_DRAM_PERF_STATUS
| RAPL_GFX
,
825 .enable_tsc_tweak
= 1,
828 static const struct platform_features tmtd_features
= {
829 .has_msr_misc_pwr_mgmt
= 1,
831 .bclk_freq
= BCLK_100MHZ
,
832 .supported_cstates
= CC1
| CC6
,
833 .cst_limit
= CST_LIMIT_GMT
,
835 .trl_msrs
= TRL_BASE
| TRL_CORECOUNT
,
836 .rapl_msrs
= RAPL_PKG_ALL
,
839 static const struct platform_features knl_features
= {
840 .has_msr_misc_pwr_mgmt
= 1,
843 .bclk_freq
= BCLK_100MHZ
,
844 .supported_cstates
= CC1
| CC6
| PC3
| PC6
,
845 .cst_limit
= CST_LIMIT_KNL
,
846 .has_msr_knl_core_c6_residency
= 1,
848 .rapl_msrs
= RAPL_PKG_ALL
| RAPL_DRAM_ALL
,
849 .has_fixed_rapl_unit
= 1,
850 .need_perf_multiplier
= 1,
853 static const struct platform_features default_features
= {
856 static const struct platform_features amd_features_with_rapl
= {
857 .rapl_msrs
= RAPL_AMD_F17H
,
858 .has_per_core_rapl
= 1,
859 .rapl_quirk_tdp
= 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
862 static const struct platform_data turbostat_pdata
[] = {
863 { INTEL_FAM6_NEHALEM
, &nhm_features
},
864 { INTEL_FAM6_NEHALEM_G
, &nhm_features
},
865 { INTEL_FAM6_NEHALEM_EP
, &nhm_features
},
866 { INTEL_FAM6_NEHALEM_EX
, &nhx_features
},
867 { INTEL_FAM6_WESTMERE
, &nhm_features
},
868 { INTEL_FAM6_WESTMERE_EP
, &nhm_features
},
869 { INTEL_FAM6_WESTMERE_EX
, &nhx_features
},
870 { INTEL_FAM6_SANDYBRIDGE
, &snb_features
},
871 { INTEL_FAM6_SANDYBRIDGE_X
, &snx_features
},
872 { INTEL_FAM6_IVYBRIDGE
, &ivb_features
},
873 { INTEL_FAM6_IVYBRIDGE_X
, &ivx_features
},
874 { INTEL_FAM6_HASWELL
, &hsw_features
},
875 { INTEL_FAM6_HASWELL_X
, &hsx_features
},
876 { INTEL_FAM6_HASWELL_L
, &hswl_features
},
877 { INTEL_FAM6_HASWELL_G
, &hswg_features
},
878 { INTEL_FAM6_BROADWELL
, &bdw_features
},
879 { INTEL_FAM6_BROADWELL_G
, &bdwg_features
},
880 { INTEL_FAM6_BROADWELL_X
, &bdx_features
},
881 { INTEL_FAM6_BROADWELL_D
, &bdx_features
},
882 { INTEL_FAM6_SKYLAKE_L
, &skl_features
},
883 { INTEL_FAM6_SKYLAKE
, &skl_features
},
884 { INTEL_FAM6_SKYLAKE_X
, &skx_features
},
885 { INTEL_FAM6_KABYLAKE_L
, &skl_features
},
886 { INTEL_FAM6_KABYLAKE
, &skl_features
},
887 { INTEL_FAM6_COMETLAKE
, &skl_features
},
888 { INTEL_FAM6_COMETLAKE_L
, &skl_features
},
889 { INTEL_FAM6_CANNONLAKE_L
, &cnl_features
},
890 { INTEL_FAM6_ICELAKE_X
, &icx_features
},
891 { INTEL_FAM6_ICELAKE_D
, &icx_features
},
892 { INTEL_FAM6_ICELAKE_L
, &cnl_features
},
893 { INTEL_FAM6_ICELAKE_NNPI
, &cnl_features
},
894 { INTEL_FAM6_ROCKETLAKE
, &cnl_features
},
895 { INTEL_FAM6_TIGERLAKE_L
, &cnl_features
},
896 { INTEL_FAM6_TIGERLAKE
, &cnl_features
},
897 { INTEL_FAM6_SAPPHIRERAPIDS_X
, &spr_features
},
898 { INTEL_FAM6_EMERALDRAPIDS_X
, &spr_features
},
899 { INTEL_FAM6_GRANITERAPIDS_X
, &spr_features
},
900 { INTEL_FAM6_LAKEFIELD
, &cnl_features
},
901 { INTEL_FAM6_ALDERLAKE
, &adl_features
},
902 { INTEL_FAM6_ALDERLAKE_L
, &adl_features
},
903 { INTEL_FAM6_RAPTORLAKE
, &adl_features
},
904 { INTEL_FAM6_RAPTORLAKE_P
, &adl_features
},
905 { INTEL_FAM6_RAPTORLAKE_S
, &adl_features
},
906 { INTEL_FAM6_METEORLAKE
, &cnl_features
},
907 { INTEL_FAM6_METEORLAKE_L
, &cnl_features
},
908 { INTEL_FAM6_ARROWLAKE
, &cnl_features
},
909 { INTEL_FAM6_LUNARLAKE_M
, &cnl_features
},
910 { INTEL_FAM6_ATOM_SILVERMONT
, &slv_features
},
911 { INTEL_FAM6_ATOM_SILVERMONT_D
, &slvd_features
},
912 { INTEL_FAM6_ATOM_AIRMONT
, &amt_features
},
913 { INTEL_FAM6_ATOM_GOLDMONT
, &gmt_features
},
914 { INTEL_FAM6_ATOM_GOLDMONT_D
, &gmtd_features
},
915 { INTEL_FAM6_ATOM_GOLDMONT_PLUS
, &gmtp_features
},
916 { INTEL_FAM6_ATOM_TREMONT_D
, &tmtd_features
},
917 { INTEL_FAM6_ATOM_TREMONT
, &tmt_features
},
918 { INTEL_FAM6_ATOM_TREMONT_L
, &tmt_features
},
919 { INTEL_FAM6_ATOM_GRACEMONT
, &adl_features
},
920 { INTEL_FAM6_ATOM_CRESTMONT_X
, &srf_features
},
921 { INTEL_FAM6_ATOM_CRESTMONT
, &grr_features
},
922 { INTEL_FAM6_XEON_PHI_KNL
, &knl_features
},
923 { INTEL_FAM6_XEON_PHI_KNM
, &knl_features
},
925 * Missing support for
927 * INTEL_FAM6_ATOM_SILVERMONT_MID
928 * INTEL_FAM6_ATOM_AIRMONT_MID
929 * INTEL_FAM6_ATOM_AIRMONT_NP
934 static const struct platform_features
*platform
;
936 void probe_platform_features(unsigned int family
, unsigned int model
)
940 platform
= &default_features
;
942 if (authentic_amd
|| hygon_genuine
) {
943 if (max_extended_level
>= 0x80000007) {
944 unsigned int eax
, ebx
, ecx
, edx
;
946 __cpuid(0x80000007, eax
, ebx
, ecx
, edx
);
947 /* RAPL (Fam 17h+) */
948 if ((edx
& (1 << 14)) && family
>= 0x17)
949 platform
= &amd_features_with_rapl
;
954 if (!genuine_intel
|| family
!= 6)
957 for (i
= 0; turbostat_pdata
[i
].features
; i
++) {
958 if (turbostat_pdata
[i
].model
== model
) {
959 platform
= turbostat_pdata
[i
].features
;
965 /* Model specific support End */
967 #define TJMAX_DEFAULT 100
969 /* MSRs that are not yet in the kernel-provided header. */
970 #define MSR_RAPL_PWR_UNIT 0xc0010299
971 #define MSR_CORE_ENERGY_STAT 0xc001029a
972 #define MSR_PKG_ENERGY_STAT 0xc001029b
974 #define MAX(a, b) ((a) > (b) ? (a) : (b))
979 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
980 cpu_set_t
*cpu_present_set
, *cpu_effective_set
, *cpu_allowed_set
, *cpu_affinity_set
, *cpu_subset
;
981 size_t cpu_present_setsize
, cpu_effective_setsize
, cpu_allowed_setsize
, cpu_affinity_setsize
, cpu_subset_size
;
982 #define MAX_ADDED_COUNTERS 8
983 #define MAX_ADDED_THREAD_COUNTERS 24
984 #define BITMASK_SIZE 32
986 /* Indexes used to map data read from perf and MSRs into global variables */
987 enum rapl_rci_index
{
988 RAPL_RCI_INDEX_ENERGY_PKG
= 0,
989 RAPL_RCI_INDEX_ENERGY_CORES
= 1,
990 RAPL_RCI_INDEX_DRAM
= 2,
991 RAPL_RCI_INDEX_GFX
= 3,
992 RAPL_RCI_INDEX_PKG_PERF_STATUS
= 4,
993 RAPL_RCI_INDEX_DRAM_PERF_STATUS
= 5,
994 RAPL_RCI_INDEX_CORE_ENERGY
= 6,
1004 struct rapl_counter_info_t
{
1005 unsigned long long data
[NUM_RAPL_COUNTERS
];
1006 enum rapl_source source
[NUM_RAPL_COUNTERS
];
1007 unsigned long long flags
[NUM_RAPL_COUNTERS
];
1008 double scale
[NUM_RAPL_COUNTERS
];
1009 enum rapl_unit unit
[NUM_RAPL_COUNTERS
];
1012 /* Active when source == RAPL_SOURCE_MSR */
1014 unsigned long long msr
[NUM_RAPL_COUNTERS
];
1015 unsigned long long msr_mask
[NUM_RAPL_COUNTERS
];
1016 int msr_shift
[NUM_RAPL_COUNTERS
];
1023 /* struct rapl_counter_info_t for each RAPL domain */
1024 struct rapl_counter_info_t
*rapl_counter_info_perdomain
;
1026 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
1028 struct rapl_counter_arch_info
{
1029 int feature_mask
; /* Mask for testing if the counter is supported on host */
1030 const char *perf_subsys
;
1031 const char *perf_name
;
1032 unsigned long long msr
;
1033 unsigned long long msr_mask
;
1034 int msr_shift
; /* Positive mean shift right, negative mean shift left */
1035 double *platform_rapl_msr_scale
; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
1036 unsigned int rci_index
; /* Maps data from perf counters to global variables */
1037 unsigned long long bic
;
1038 double compat_scale
; /* Some counters require constant scaling to be in the same range as other, similar ones */
1039 unsigned long long flags
;
1042 static const struct rapl_counter_arch_info rapl_counter_arch_infos
[] = {
1044 .feature_mask
= RAPL_PKG
,
1045 .perf_subsys
= "power",
1046 .perf_name
= "energy-pkg",
1047 .msr
= MSR_PKG_ENERGY_STATUS
,
1048 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1050 .platform_rapl_msr_scale
= &rapl_energy_units
,
1051 .rci_index
= RAPL_RCI_INDEX_ENERGY_PKG
,
1052 .bic
= BIC_PkgWatt
| BIC_Pkg_J
,
1053 .compat_scale
= 1.0,
1054 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1057 .feature_mask
= RAPL_AMD_F17H
,
1058 .perf_subsys
= "power",
1059 .perf_name
= "energy-pkg",
1060 .msr
= MSR_PKG_ENERGY_STAT
,
1061 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1063 .platform_rapl_msr_scale
= &rapl_energy_units
,
1064 .rci_index
= RAPL_RCI_INDEX_ENERGY_PKG
,
1065 .bic
= BIC_PkgWatt
| BIC_Pkg_J
,
1066 .compat_scale
= 1.0,
1067 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1070 .feature_mask
= RAPL_CORE_ENERGY_STATUS
,
1071 .perf_subsys
= "power",
1072 .perf_name
= "energy-cores",
1073 .msr
= MSR_PP0_ENERGY_STATUS
,
1074 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1076 .platform_rapl_msr_scale
= &rapl_energy_units
,
1077 .rci_index
= RAPL_RCI_INDEX_ENERGY_CORES
,
1078 .bic
= BIC_CorWatt
| BIC_Cor_J
,
1079 .compat_scale
= 1.0,
1080 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1083 .feature_mask
= RAPL_DRAM
,
1084 .perf_subsys
= "power",
1085 .perf_name
= "energy-ram",
1086 .msr
= MSR_DRAM_ENERGY_STATUS
,
1087 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1089 .platform_rapl_msr_scale
= &rapl_dram_energy_units
,
1090 .rci_index
= RAPL_RCI_INDEX_DRAM
,
1091 .bic
= BIC_RAMWatt
| BIC_RAM_J
,
1092 .compat_scale
= 1.0,
1093 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1096 .feature_mask
= RAPL_GFX
,
1097 .perf_subsys
= "power",
1098 .perf_name
= "energy-gpu",
1099 .msr
= MSR_PP1_ENERGY_STATUS
,
1100 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1102 .platform_rapl_msr_scale
= &rapl_energy_units
,
1103 .rci_index
= RAPL_RCI_INDEX_GFX
,
1104 .bic
= BIC_GFXWatt
| BIC_GFX_J
,
1105 .compat_scale
= 1.0,
1106 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1109 .feature_mask
= RAPL_PKG_PERF_STATUS
,
1110 .perf_subsys
= NULL
,
1112 .msr
= MSR_PKG_PERF_STATUS
,
1113 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1115 .platform_rapl_msr_scale
= &rapl_time_units
,
1116 .rci_index
= RAPL_RCI_INDEX_PKG_PERF_STATUS
,
1118 .compat_scale
= 100.0,
1119 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1122 .feature_mask
= RAPL_DRAM_PERF_STATUS
,
1123 .perf_subsys
= NULL
,
1125 .msr
= MSR_DRAM_PERF_STATUS
,
1126 .msr_mask
= 0xFFFFFFFFFFFFFFFF,
1128 .platform_rapl_msr_scale
= &rapl_time_units
,
1129 .rci_index
= RAPL_RCI_INDEX_DRAM_PERF_STATUS
,
1131 .compat_scale
= 100.0,
1132 .flags
= RAPL_COUNTER_FLAG_USE_MSR_SUM
,
1135 .feature_mask
= RAPL_AMD_F17H
,
1136 .perf_subsys
= NULL
,
1138 .msr
= MSR_CORE_ENERGY_STAT
,
1139 .msr_mask
= 0xFFFFFFFF,
1141 .platform_rapl_msr_scale
= &rapl_energy_units
,
1142 .rci_index
= RAPL_RCI_INDEX_CORE_ENERGY
,
1143 .bic
= BIC_CorWatt
| BIC_Cor_J
,
1144 .compat_scale
= 1.0,
1149 struct rapl_counter
{
1150 unsigned long long raw_value
;
1151 enum rapl_unit unit
;
1155 struct thread_data
{
1156 struct timeval tv_begin
;
1157 struct timeval tv_end
;
1158 struct timeval tv_delta
;
1159 unsigned long long tsc
;
1160 unsigned long long aperf
;
1161 unsigned long long mperf
;
1162 unsigned long long c1
;
1163 unsigned long long instr_count
;
1164 unsigned long long irq_count
;
1165 unsigned int smi_count
;
1166 unsigned int cpu_id
;
1167 unsigned int apic_id
;
1168 unsigned int x2apic_id
;
1171 unsigned long long counter
[MAX_ADDED_THREAD_COUNTERS
];
1172 } *thread_even
, *thread_odd
;
1176 unsigned long long c3
;
1177 unsigned long long c6
;
1178 unsigned long long c7
;
1179 unsigned long long mc6_us
; /* duplicate as per-core for now, even though per module */
1180 unsigned int core_temp_c
;
1181 struct rapl_counter core_energy
; /* MSR_CORE_ENERGY_STAT */
1182 unsigned int core_id
;
1183 unsigned long long core_throt_cnt
;
1184 unsigned long long counter
[MAX_ADDED_COUNTERS
];
1185 } *core_even
, *core_odd
;
1189 unsigned long long pc2
;
1190 unsigned long long pc3
;
1191 unsigned long long pc6
;
1192 unsigned long long pc7
;
1193 unsigned long long pc8
;
1194 unsigned long long pc9
;
1195 unsigned long long pc10
;
1198 unsigned long long pkg_wtd_core_c0
;
1199 unsigned long long pkg_any_core_c0
;
1200 unsigned long long pkg_any_gfxe_c0
;
1201 unsigned long long pkg_both_core_gfxe_c0
;
1202 long long gfx_rc6_ms
;
1203 unsigned int gfx_mhz
;
1204 unsigned int gfx_act_mhz
;
1205 long long sam_mc6_ms
;
1206 unsigned int sam_mhz
;
1207 unsigned int sam_act_mhz
;
1208 unsigned int package_id
;
1209 struct rapl_counter energy_pkg
; /* MSR_PKG_ENERGY_STATUS */
1210 struct rapl_counter energy_dram
; /* MSR_DRAM_ENERGY_STATUS */
1211 struct rapl_counter energy_cores
; /* MSR_PP0_ENERGY_STATUS */
1212 struct rapl_counter energy_gfx
; /* MSR_PP1_ENERGY_STATUS */
1213 struct rapl_counter rapl_pkg_perf_status
; /* MSR_PKG_PERF_STATUS */
1214 struct rapl_counter rapl_dram_perf_status
; /* MSR_DRAM_PERF_STATUS */
1215 unsigned int pkg_temp_c
;
1216 unsigned int uncore_mhz
;
1217 unsigned long long counter
[MAX_ADDED_COUNTERS
];
1218 } *package_even
, *package_odd
;
1220 #define ODD_COUNTERS thread_odd, core_odd, package_odd
1221 #define EVEN_COUNTERS thread_even, core_even, package_even
1223 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
1226 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
1227 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
1228 ((core_no) * topo.threads_per_core) + \
1231 #define GET_CORE(core_base, core_no, node_no, pkg_no) \
1233 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
1234 ((node_no) * topo.cores_per_node) + \
1237 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
1240 * The accumulated sum of MSR is defined as a monotonic
1241 * increasing MSR, it will be accumulated periodically,
1242 * despite its register's bit width.
1254 int get_msr_sum(int cpu
, off_t offset
, unsigned long long *msr
);
1256 struct msr_sum_array
{
1257 /* get_msr_sum() = sum + (get_msr() - last) */
1259 /*The accumulated MSR value is updated by the timer */
1260 unsigned long long sum
;
1261 /*The MSR footprint recorded in last timer */
1262 unsigned long long last
;
1263 } entries
[IDX_COUNT
];
1266 /* The percpu MSR sum array.*/
1267 struct msr_sum_array
*per_cpu_msr_sum
;
1269 off_t
idx_to_offset(int idx
)
1274 case IDX_PKG_ENERGY
:
1275 if (platform
->rapl_msrs
& RAPL_AMD_F17H
)
1276 offset
= MSR_PKG_ENERGY_STAT
;
1278 offset
= MSR_PKG_ENERGY_STATUS
;
1280 case IDX_DRAM_ENERGY
:
1281 offset
= MSR_DRAM_ENERGY_STATUS
;
1283 case IDX_PP0_ENERGY
:
1284 offset
= MSR_PP0_ENERGY_STATUS
;
1286 case IDX_PP1_ENERGY
:
1287 offset
= MSR_PP1_ENERGY_STATUS
;
1290 offset
= MSR_PKG_PERF_STATUS
;
1293 offset
= MSR_DRAM_PERF_STATUS
;
1301 int offset_to_idx(off_t offset
)
1306 case MSR_PKG_ENERGY_STATUS
:
1307 case MSR_PKG_ENERGY_STAT
:
1308 idx
= IDX_PKG_ENERGY
;
1310 case MSR_DRAM_ENERGY_STATUS
:
1311 idx
= IDX_DRAM_ENERGY
;
1313 case MSR_PP0_ENERGY_STATUS
:
1314 idx
= IDX_PP0_ENERGY
;
1316 case MSR_PP1_ENERGY_STATUS
:
1317 idx
= IDX_PP1_ENERGY
;
1319 case MSR_PKG_PERF_STATUS
:
1322 case MSR_DRAM_PERF_STATUS
:
1323 idx
= IDX_DRAM_PERF
;
1331 int idx_valid(int idx
)
1334 case IDX_PKG_ENERGY
:
1335 return platform
->rapl_msrs
& (RAPL_PKG
| RAPL_AMD_F17H
);
1336 case IDX_DRAM_ENERGY
:
1337 return platform
->rapl_msrs
& RAPL_DRAM
;
1338 case IDX_PP0_ENERGY
:
1339 return platform
->rapl_msrs
& RAPL_CORE_ENERGY_STATUS
;
1340 case IDX_PP1_ENERGY
:
1341 return platform
->rapl_msrs
& RAPL_GFX
;
1343 return platform
->rapl_msrs
& RAPL_PKG_PERF_STATUS
;
1345 return platform
->rapl_msrs
& RAPL_DRAM_PERF_STATUS
;
1351 struct sys_counters
{
1352 unsigned int added_thread_counters
;
1353 unsigned int added_core_counters
;
1354 unsigned int added_package_counters
;
1355 struct msr_counter
*tp
;
1356 struct msr_counter
*cp
;
1357 struct msr_counter
*pp
;
1360 void free_sys_counters(void)
1362 struct msr_counter
*p
= sys
.tp
, *pnext
= NULL
;
1369 p
= sys
.cp
, pnext
= NULL
;
1376 p
= sys
.pp
, pnext
= NULL
;
1383 sys
.added_thread_counters
= 0;
1384 sys
.added_core_counters
= 0;
1385 sys
.added_package_counters
= 0;
1391 struct system_summary
{
1392 struct thread_data threads
;
1393 struct core_data cores
;
1394 struct pkg_data packages
;
1397 struct cpu_topology
{
1398 int physical_package_id
;
1401 int physical_node_id
;
1402 int logical_node_id
; /* 0-based count within the package */
1403 int physical_core_id
;
1405 cpu_set_t
*put_ids
; /* Processing Unit/Thread IDs */
1408 struct topo_params
{
1413 int allowed_packages
;
1420 int threads_per_core
;
1423 struct timeval tv_even
, tv_odd
, tv_delta
;
1425 int *irq_column_2_cpu
; /* /proc/interrupts column numbers */
1426 int *irqs_per_cpu
; /* indexed by cpu_num */
1428 void setup_all_buffers(bool startup
);
1431 char *sys_lpi_file_sysfs
= "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
1432 char *sys_lpi_file_debugfs
= "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
1434 int cpu_is_not_present(int cpu
)
1436 return !CPU_ISSET_S(cpu
, cpu_present_setsize
, cpu_present_set
);
1439 int cpu_is_not_allowed(int cpu
)
1441 return !CPU_ISSET_S(cpu
, cpu_allowed_setsize
, cpu_allowed_set
);
1445 * run func(thread, core, package) in topology order
1446 * skip non-present cpus
1449 int for_all_cpus(int (func
) (struct thread_data
*, struct core_data
*, struct pkg_data
*),
1450 struct thread_data
*thread_base
, struct core_data
*core_base
, struct pkg_data
*pkg_base
)
1452 int retval
, pkg_no
, core_no
, thread_no
, node_no
;
1454 for (pkg_no
= 0; pkg_no
< topo
.num_packages
; ++pkg_no
) {
1455 for (node_no
= 0; node_no
< topo
.nodes_per_pkg
; node_no
++) {
1456 for (core_no
= 0; core_no
< topo
.cores_per_node
; ++core_no
) {
1457 for (thread_no
= 0; thread_no
< topo
.threads_per_core
; ++thread_no
) {
1458 struct thread_data
*t
;
1459 struct core_data
*c
;
1461 t
= GET_THREAD(thread_base
, thread_no
, core_no
, node_no
, pkg_no
);
1463 if (cpu_is_not_allowed(t
->cpu_id
))
1466 c
= GET_CORE(core_base
, core_no
, node_no
, pkg_no
);
1467 p
= GET_PKG(pkg_base
, pkg_no
);
1469 retval
= func(t
, c
, p
);
1479 int is_cpu_first_thread_in_core(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
1483 return ((int)t
->cpu_id
== c
->base_cpu
|| c
->base_cpu
< 0);
1486 int is_cpu_first_core_in_package(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
1490 return ((int)t
->cpu_id
== p
->base_cpu
|| p
->base_cpu
< 0);
1493 int is_cpu_first_thread_in_package(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
1495 return is_cpu_first_thread_in_core(t
, c
, p
) && is_cpu_first_core_in_package(t
, c
, p
);
1498 int cpu_migrate(int cpu
)
1500 CPU_ZERO_S(cpu_affinity_setsize
, cpu_affinity_set
);
1501 CPU_SET_S(cpu
, cpu_affinity_setsize
, cpu_affinity_set
);
1502 if (sched_setaffinity(0, cpu_affinity_setsize
, cpu_affinity_set
) == -1)
1508 int get_msr_fd(int cpu
)
1513 fd
= fd_percpu
[cpu
];
1518 sprintf(pathname
, "/dev/cpu/%d/msr", cpu
);
1519 fd
= open(pathname
, O_RDONLY
);
1521 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
1522 "or run with --no-msr, or run as root", pathname
);
1524 fd_percpu
[cpu
] = fd
;
1529 static void bic_disable_msr_access(void)
1531 const unsigned long bic_msrs
=
1543 BIC_Pkgpc2
| BIC_Pkgpc3
| BIC_Pkgpc6
| BIC_Pkgpc7
| BIC_Pkgpc8
| BIC_Pkgpc9
| BIC_Pkgpc10
| BIC_PkgTmp
;
1545 bic_enabled
&= ~bic_msrs
;
1547 free_sys_counters();
1550 static long perf_event_open(struct perf_event_attr
*hw_event
, pid_t pid
, int cpu
, int group_fd
, unsigned long flags
)
1554 return syscall(__NR_perf_event_open
, hw_event
, pid
, cpu
, group_fd
, flags
);
1557 static long open_perf_counter(int cpu
, unsigned int type
, unsigned int config
, int group_fd
, __u64 read_format
)
1559 struct perf_event_attr attr
;
1560 const pid_t pid
= -1;
1561 const unsigned long flags
= 0;
1565 memset(&attr
, 0, sizeof(struct perf_event_attr
));
1568 attr
.size
= sizeof(struct perf_event_attr
);
1569 attr
.config
= config
;
1571 attr
.sample_type
= PERF_SAMPLE_IDENTIFIER
;
1572 attr
.read_format
= read_format
;
1574 const int fd
= perf_event_open(&attr
, pid
, cpu
, group_fd
, flags
);
1579 int get_instr_count_fd(int cpu
)
1581 if (fd_instr_count_percpu
[cpu
])
1582 return fd_instr_count_percpu
[cpu
];
1584 fd_instr_count_percpu
[cpu
] = open_perf_counter(cpu
, PERF_TYPE_HARDWARE
, PERF_COUNT_HW_INSTRUCTIONS
, -1, 0);
1586 return fd_instr_count_percpu
[cpu
];
1589 int get_msr(int cpu
, off_t offset
, unsigned long long *msr
)
1595 retval
= pread(get_msr_fd(cpu
), msr
, sizeof(*msr
), offset
);
1597 if (retval
!= sizeof *msr
)
1598 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu
, (unsigned long long)offset
);
1603 int probe_msr(int cpu
, off_t offset
)
1606 unsigned long long dummy
;
1610 retval
= pread(get_msr_fd(cpu
), &dummy
, sizeof(dummy
), offset
);
1612 if (retval
!= sizeof(dummy
))
1618 #define MAX_DEFERRED 16
1619 char *deferred_add_names
[MAX_DEFERRED
];
1620 char *deferred_skip_names
[MAX_DEFERRED
];
1621 int deferred_add_index
;
1622 int deferred_skip_index
;
1625 * HIDE_LIST - hide this list of counters, show the rest [default]
1626 * SHOW_LIST - show this list of counters, hide the rest
1628 enum show_hide_mode
{ SHOW_LIST
, HIDE_LIST
} global_show_hide_mode
= HIDE_LIST
;
1633 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
1635 "Turbostat forks the specified COMMAND and prints statistics\n"
1636 "when COMMAND completes.\n"
1637 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
1638 "to print statistics, until interrupted.\n"
1639 " -a, --add add a counter\n"
1640 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
1641 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
1642 " {core | package | j,k,l..m,n-p }\n"
1643 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
1644 " -D, --Dump displays the raw counter values\n"
1645 " -e, --enable [all | column]\n"
1646 " shows all or the specified disabled column\n"
1647 " -H, --hide [column|column,column,...]\n"
1648 " hide the specified column(s)\n"
1649 " -i, --interval sec.subsec\n"
1650 " Override default 5-second measurement interval\n"
1651 " -J, --Joules displays energy in Joules instead of Watts\n"
1652 " -l, --list list column headers only\n"
1653 " -M, --no-msr Disable all uses of the MSR driver\n"
1654 " -P, --no-perf Disable all uses of the perf API\n"
1655 " -n, --num_iterations num\n"
1656 " number of the measurement iterations\n"
1657 " -N, --header_iterations num\n"
1658 " print header every num iterations\n"
1660 " create or truncate \"file\" for all output\n"
1661 " -q, --quiet skip decoding system configuration header\n"
1662 " -s, --show [column|column,column,...]\n"
1663 " show only the specified column(s)\n"
1665 " limits output to 1-line system summary per interval\n"
1666 " -T, --TCC temperature\n"
1667 " sets the Thermal Control Circuit temperature in\n"
1668 " degrees Celsius\n"
1669 " -h, --help print this help message\n"
1670 " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n");
1675 * for all the strings in comma separate name_list,
1676 * set the approprate bit in return value.
1678 unsigned long long bic_lookup(char *name_list
, enum show_hide_mode mode
)
1681 unsigned long long retval
= 0;
1686 comma
= strchr(name_list
, ',');
1691 for (i
= 0; i
< MAX_BIC
; ++i
) {
1692 if (!strcmp(name_list
, bic
[i
].name
)) {
1693 retval
|= (1ULL << i
);
1696 if (!strcmp(name_list
, "all")) {
1699 } else if (!strcmp(name_list
, "topology")) {
1700 retval
|= BIC_TOPOLOGY
;
1702 } else if (!strcmp(name_list
, "power")) {
1703 retval
|= BIC_THERMAL_PWR
;
1705 } else if (!strcmp(name_list
, "idle")) {
1708 } else if (!strcmp(name_list
, "frequency")) {
1709 retval
|= BIC_FREQUENCY
;
1711 } else if (!strcmp(name_list
, "other")) {
1712 retval
|= BIC_OTHER
;
1718 if (mode
== SHOW_LIST
) {
1719 deferred_add_names
[deferred_add_index
++] = name_list
;
1720 if (deferred_add_index
>= MAX_DEFERRED
) {
1721 fprintf(stderr
, "More than max %d un-recognized --add options '%s'\n",
1722 MAX_DEFERRED
, name_list
);
1727 deferred_skip_names
[deferred_skip_index
++] = name_list
;
1729 fprintf(stderr
, "deferred \"%s\"\n", name_list
);
1730 if (deferred_skip_index
>= MAX_DEFERRED
) {
1731 fprintf(stderr
, "More than max %d un-recognized --skip options '%s'\n",
1732 MAX_DEFERRED
, name_list
);
1747 void print_header(char *delim
)
1749 struct msr_counter
*mp
;
1752 if (DO_BIC(BIC_USEC
))
1753 outp
+= sprintf(outp
, "%susec", (printed
++ ? delim
: ""));
1754 if (DO_BIC(BIC_TOD
))
1755 outp
+= sprintf(outp
, "%sTime_Of_Day_Seconds", (printed
++ ? delim
: ""));
1756 if (DO_BIC(BIC_Package
))
1757 outp
+= sprintf(outp
, "%sPackage", (printed
++ ? delim
: ""));
1758 if (DO_BIC(BIC_Die
))
1759 outp
+= sprintf(outp
, "%sDie", (printed
++ ? delim
: ""));
1760 if (DO_BIC(BIC_Node
))
1761 outp
+= sprintf(outp
, "%sNode", (printed
++ ? delim
: ""));
1762 if (DO_BIC(BIC_Core
))
1763 outp
+= sprintf(outp
, "%sCore", (printed
++ ? delim
: ""));
1764 if (DO_BIC(BIC_CPU
))
1765 outp
+= sprintf(outp
, "%sCPU", (printed
++ ? delim
: ""));
1766 if (DO_BIC(BIC_APIC
))
1767 outp
+= sprintf(outp
, "%sAPIC", (printed
++ ? delim
: ""));
1768 if (DO_BIC(BIC_X2APIC
))
1769 outp
+= sprintf(outp
, "%sX2APIC", (printed
++ ? delim
: ""));
1770 if (DO_BIC(BIC_Avg_MHz
))
1771 outp
+= sprintf(outp
, "%sAvg_MHz", (printed
++ ? delim
: ""));
1772 if (DO_BIC(BIC_Busy
))
1773 outp
+= sprintf(outp
, "%sBusy%%", (printed
++ ? delim
: ""));
1774 if (DO_BIC(BIC_Bzy_MHz
))
1775 outp
+= sprintf(outp
, "%sBzy_MHz", (printed
++ ? delim
: ""));
1776 if (DO_BIC(BIC_TSC_MHz
))
1777 outp
+= sprintf(outp
, "%sTSC_MHz", (printed
++ ? delim
: ""));
1779 if (DO_BIC(BIC_IPC
))
1780 outp
+= sprintf(outp
, "%sIPC", (printed
++ ? delim
: ""));
1782 if (DO_BIC(BIC_IRQ
)) {
1783 if (sums_need_wide_columns
)
1784 outp
+= sprintf(outp
, "%s IRQ", (printed
++ ? delim
: ""));
1786 outp
+= sprintf(outp
, "%sIRQ", (printed
++ ? delim
: ""));
1789 if (DO_BIC(BIC_SMI
))
1790 outp
+= sprintf(outp
, "%sSMI", (printed
++ ? delim
: ""));
1792 for (mp
= sys
.tp
; mp
; mp
= mp
->next
) {
1794 if (mp
->format
== FORMAT_RAW
) {
1795 if (mp
->width
== 64)
1796 outp
+= sprintf(outp
, "%s%18.18s", (printed
++ ? delim
: ""), mp
->name
);
1798 outp
+= sprintf(outp
, "%s%10.10s", (printed
++ ? delim
: ""), mp
->name
);
1800 if ((mp
->type
== COUNTER_ITEMS
) && sums_need_wide_columns
)
1801 outp
+= sprintf(outp
, "%s%8s", (printed
++ ? delim
: ""), mp
->name
);
1803 outp
+= sprintf(outp
, "%s%s", (printed
++ ? delim
: ""), mp
->name
);
1807 if (DO_BIC(BIC_CPU_c1
))
1808 outp
+= sprintf(outp
, "%sCPU%%c1", (printed
++ ? delim
: ""));
1809 if (DO_BIC(BIC_CPU_c3
))
1810 outp
+= sprintf(outp
, "%sCPU%%c3", (printed
++ ? delim
: ""));
1811 if (DO_BIC(BIC_CPU_c6
))
1812 outp
+= sprintf(outp
, "%sCPU%%c6", (printed
++ ? delim
: ""));
1813 if (DO_BIC(BIC_CPU_c7
))
1814 outp
+= sprintf(outp
, "%sCPU%%c7", (printed
++ ? delim
: ""));
1816 if (DO_BIC(BIC_Mod_c6
))
1817 outp
+= sprintf(outp
, "%sMod%%c6", (printed
++ ? delim
: ""));
1819 if (DO_BIC(BIC_CoreTmp
))
1820 outp
+= sprintf(outp
, "%sCoreTmp", (printed
++ ? delim
: ""));
1822 if (DO_BIC(BIC_CORE_THROT_CNT
))
1823 outp
+= sprintf(outp
, "%sCoreThr", (printed
++ ? delim
: ""));
1825 if (platform
->rapl_msrs
&& !rapl_joules
) {
1826 if (DO_BIC(BIC_CorWatt
) && platform
->has_per_core_rapl
)
1827 outp
+= sprintf(outp
, "%sCorWatt", (printed
++ ? delim
: ""));
1828 } else if (platform
->rapl_msrs
&& rapl_joules
) {
1829 if (DO_BIC(BIC_Cor_J
) && platform
->has_per_core_rapl
)
1830 outp
+= sprintf(outp
, "%sCor_J", (printed
++ ? delim
: ""));
1833 for (mp
= sys
.cp
; mp
; mp
= mp
->next
) {
1834 if (mp
->format
== FORMAT_RAW
) {
1835 if (mp
->width
== 64)
1836 outp
+= sprintf(outp
, "%s%18.18s", delim
, mp
->name
);
1838 outp
+= sprintf(outp
, "%s%10.10s", delim
, mp
->name
);
1840 if ((mp
->type
== COUNTER_ITEMS
) && sums_need_wide_columns
)
1841 outp
+= sprintf(outp
, "%s%8s", delim
, mp
->name
);
1843 outp
+= sprintf(outp
, "%s%s", delim
, mp
->name
);
1847 if (DO_BIC(BIC_PkgTmp
))
1848 outp
+= sprintf(outp
, "%sPkgTmp", (printed
++ ? delim
: ""));
1850 if (DO_BIC(BIC_GFX_rc6
))
1851 outp
+= sprintf(outp
, "%sGFX%%rc6", (printed
++ ? delim
: ""));
1853 if (DO_BIC(BIC_GFXMHz
))
1854 outp
+= sprintf(outp
, "%sGFXMHz", (printed
++ ? delim
: ""));
1856 if (DO_BIC(BIC_GFXACTMHz
))
1857 outp
+= sprintf(outp
, "%sGFXAMHz", (printed
++ ? delim
: ""));
1859 if (DO_BIC(BIC_SAM_mc6
))
1860 outp
+= sprintf(outp
, "%sSAM%%mc6", (printed
++ ? delim
: ""));
1862 if (DO_BIC(BIC_SAMMHz
))
1863 outp
+= sprintf(outp
, "%sSAMMHz", (printed
++ ? delim
: ""));
1865 if (DO_BIC(BIC_SAMACTMHz
))
1866 outp
+= sprintf(outp
, "%sSAMAMHz", (printed
++ ? delim
: ""));
1868 if (DO_BIC(BIC_Totl_c0
))
1869 outp
+= sprintf(outp
, "%sTotl%%C0", (printed
++ ? delim
: ""));
1870 if (DO_BIC(BIC_Any_c0
))
1871 outp
+= sprintf(outp
, "%sAny%%C0", (printed
++ ? delim
: ""));
1872 if (DO_BIC(BIC_GFX_c0
))
1873 outp
+= sprintf(outp
, "%sGFX%%C0", (printed
++ ? delim
: ""));
1874 if (DO_BIC(BIC_CPUGFX
))
1875 outp
+= sprintf(outp
, "%sCPUGFX%%", (printed
++ ? delim
: ""));
1877 if (DO_BIC(BIC_Pkgpc2
))
1878 outp
+= sprintf(outp
, "%sPkg%%pc2", (printed
++ ? delim
: ""));
1879 if (DO_BIC(BIC_Pkgpc3
))
1880 outp
+= sprintf(outp
, "%sPkg%%pc3", (printed
++ ? delim
: ""));
1881 if (DO_BIC(BIC_Pkgpc6
))
1882 outp
+= sprintf(outp
, "%sPkg%%pc6", (printed
++ ? delim
: ""));
1883 if (DO_BIC(BIC_Pkgpc7
))
1884 outp
+= sprintf(outp
, "%sPkg%%pc7", (printed
++ ? delim
: ""));
1885 if (DO_BIC(BIC_Pkgpc8
))
1886 outp
+= sprintf(outp
, "%sPkg%%pc8", (printed
++ ? delim
: ""));
1887 if (DO_BIC(BIC_Pkgpc9
))
1888 outp
+= sprintf(outp
, "%sPkg%%pc9", (printed
++ ? delim
: ""));
1889 if (DO_BIC(BIC_Pkgpc10
))
1890 outp
+= sprintf(outp
, "%sPk%%pc10", (printed
++ ? delim
: ""));
1891 if (DO_BIC(BIC_CPU_LPI
))
1892 outp
+= sprintf(outp
, "%sCPU%%LPI", (printed
++ ? delim
: ""));
1893 if (DO_BIC(BIC_SYS_LPI
))
1894 outp
+= sprintf(outp
, "%sSYS%%LPI", (printed
++ ? delim
: ""));
1896 if (platform
->rapl_msrs
&& !rapl_joules
) {
1897 if (DO_BIC(BIC_PkgWatt
))
1898 outp
+= sprintf(outp
, "%sPkgWatt", (printed
++ ? delim
: ""));
1899 if (DO_BIC(BIC_CorWatt
) && !platform
->has_per_core_rapl
)
1900 outp
+= sprintf(outp
, "%sCorWatt", (printed
++ ? delim
: ""));
1901 if (DO_BIC(BIC_GFXWatt
))
1902 outp
+= sprintf(outp
, "%sGFXWatt", (printed
++ ? delim
: ""));
1903 if (DO_BIC(BIC_RAMWatt
))
1904 outp
+= sprintf(outp
, "%sRAMWatt", (printed
++ ? delim
: ""));
1905 if (DO_BIC(BIC_PKG__
))
1906 outp
+= sprintf(outp
, "%sPKG_%%", (printed
++ ? delim
: ""));
1907 if (DO_BIC(BIC_RAM__
))
1908 outp
+= sprintf(outp
, "%sRAM_%%", (printed
++ ? delim
: ""));
1909 } else if (platform
->rapl_msrs
&& rapl_joules
) {
1910 if (DO_BIC(BIC_Pkg_J
))
1911 outp
+= sprintf(outp
, "%sPkg_J", (printed
++ ? delim
: ""));
1912 if (DO_BIC(BIC_Cor_J
) && !platform
->has_per_core_rapl
)
1913 outp
+= sprintf(outp
, "%sCor_J", (printed
++ ? delim
: ""));
1914 if (DO_BIC(BIC_GFX_J
))
1915 outp
+= sprintf(outp
, "%sGFX_J", (printed
++ ? delim
: ""));
1916 if (DO_BIC(BIC_RAM_J
))
1917 outp
+= sprintf(outp
, "%sRAM_J", (printed
++ ? delim
: ""));
1918 if (DO_BIC(BIC_PKG__
))
1919 outp
+= sprintf(outp
, "%sPKG_%%", (printed
++ ? delim
: ""));
1920 if (DO_BIC(BIC_RAM__
))
1921 outp
+= sprintf(outp
, "%sRAM_%%", (printed
++ ? delim
: ""));
1923 if (DO_BIC(BIC_UNCORE_MHZ
))
1924 outp
+= sprintf(outp
, "%sUncMHz", (printed
++ ? delim
: ""));
1926 for (mp
= sys
.pp
; mp
; mp
= mp
->next
) {
1927 if (mp
->format
== FORMAT_RAW
) {
1928 if (mp
->width
== 64)
1929 outp
+= sprintf(outp
, "%s%18.18s", delim
, mp
->name
);
1931 outp
+= sprintf(outp
, "%s%10.10s", delim
, mp
->name
);
1933 if ((mp
->type
== COUNTER_ITEMS
) && sums_need_wide_columns
)
1934 outp
+= sprintf(outp
, "%s%8s", delim
, mp
->name
);
1936 outp
+= sprintf(outp
, "%s%s", delim
, mp
->name
);
1940 outp
+= sprintf(outp
, "\n");
1943 int dump_counters(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
1946 struct msr_counter
*mp
;
1948 outp
+= sprintf(outp
, "t %p, c %p, p %p\n", t
, c
, p
);
1951 outp
+= sprintf(outp
, "CPU: %d flags 0x%x\n", t
->cpu_id
, t
->flags
);
1952 outp
+= sprintf(outp
, "TSC: %016llX\n", t
->tsc
);
1953 outp
+= sprintf(outp
, "aperf: %016llX\n", t
->aperf
);
1954 outp
+= sprintf(outp
, "mperf: %016llX\n", t
->mperf
);
1955 outp
+= sprintf(outp
, "c1: %016llX\n", t
->c1
);
1957 if (DO_BIC(BIC_IPC
))
1958 outp
+= sprintf(outp
, "IPC: %lld\n", t
->instr_count
);
1960 if (DO_BIC(BIC_IRQ
))
1961 outp
+= sprintf(outp
, "IRQ: %lld\n", t
->irq_count
);
1962 if (DO_BIC(BIC_SMI
))
1963 outp
+= sprintf(outp
, "SMI: %d\n", t
->smi_count
);
1965 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
) {
1967 sprintf(outp
, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i
, mp
->name
, mp
->msr_num
,
1968 t
->counter
[i
], mp
->path
);
1972 if (c
&& is_cpu_first_thread_in_core(t
, c
, p
)) {
1973 outp
+= sprintf(outp
, "core: %d\n", c
->core_id
);
1974 outp
+= sprintf(outp
, "c3: %016llX\n", c
->c3
);
1975 outp
+= sprintf(outp
, "c6: %016llX\n", c
->c6
);
1976 outp
+= sprintf(outp
, "c7: %016llX\n", c
->c7
);
1977 outp
+= sprintf(outp
, "DTS: %dC\n", c
->core_temp_c
);
1978 outp
+= sprintf(outp
, "cpu_throt_count: %016llX\n", c
->core_throt_cnt
);
1980 const unsigned long long energy_value
= c
->core_energy
.raw_value
* c
->core_energy
.scale
;
1981 const double energy_scale
= c
->core_energy
.scale
;
1982 if (c
->core_energy
.unit
== RAPL_UNIT_JOULES
)
1983 outp
+= sprintf(outp
, "Joules: %0llX (scale: %lf)\n", energy_value
, energy_scale
);
1985 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
) {
1987 sprintf(outp
, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i
, mp
->name
, mp
->msr_num
,
1988 c
->counter
[i
], mp
->path
);
1990 outp
+= sprintf(outp
, "mc6_us: %016llX\n", c
->mc6_us
);
1993 if (p
&& is_cpu_first_core_in_package(t
, c
, p
)) {
1994 outp
+= sprintf(outp
, "package: %d\n", p
->package_id
);
1996 outp
+= sprintf(outp
, "Weighted cores: %016llX\n", p
->pkg_wtd_core_c0
);
1997 outp
+= sprintf(outp
, "Any cores: %016llX\n", p
->pkg_any_core_c0
);
1998 outp
+= sprintf(outp
, "Any GFX: %016llX\n", p
->pkg_any_gfxe_c0
);
1999 outp
+= sprintf(outp
, "CPU + GFX: %016llX\n", p
->pkg_both_core_gfxe_c0
);
2001 outp
+= sprintf(outp
, "pc2: %016llX\n", p
->pc2
);
2002 if (DO_BIC(BIC_Pkgpc3
))
2003 outp
+= sprintf(outp
, "pc3: %016llX\n", p
->pc3
);
2004 if (DO_BIC(BIC_Pkgpc6
))
2005 outp
+= sprintf(outp
, "pc6: %016llX\n", p
->pc6
);
2006 if (DO_BIC(BIC_Pkgpc7
))
2007 outp
+= sprintf(outp
, "pc7: %016llX\n", p
->pc7
);
2008 outp
+= sprintf(outp
, "pc8: %016llX\n", p
->pc8
);
2009 outp
+= sprintf(outp
, "pc9: %016llX\n", p
->pc9
);
2010 outp
+= sprintf(outp
, "pc10: %016llX\n", p
->pc10
);
2011 outp
+= sprintf(outp
, "cpu_lpi: %016llX\n", p
->cpu_lpi
);
2012 outp
+= sprintf(outp
, "sys_lpi: %016llX\n", p
->sys_lpi
);
2013 outp
+= sprintf(outp
, "Joules PKG: %0llX\n", p
->energy_pkg
.raw_value
);
2014 outp
+= sprintf(outp
, "Joules COR: %0llX\n", p
->energy_cores
.raw_value
);
2015 outp
+= sprintf(outp
, "Joules GFX: %0llX\n", p
->energy_gfx
.raw_value
);
2016 outp
+= sprintf(outp
, "Joules RAM: %0llX\n", p
->energy_dram
.raw_value
);
2017 outp
+= sprintf(outp
, "Throttle PKG: %0llX\n", p
->rapl_pkg_perf_status
.raw_value
);
2018 outp
+= sprintf(outp
, "Throttle RAM: %0llX\n", p
->rapl_dram_perf_status
.raw_value
);
2019 outp
+= sprintf(outp
, "PTM: %dC\n", p
->pkg_temp_c
);
2021 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
) {
2023 sprintf(outp
, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i
, mp
->name
, mp
->msr_num
,
2024 p
->counter
[i
], mp
->path
);
2028 outp
+= sprintf(outp
, "\n");
2033 double rapl_counter_get_value(const struct rapl_counter
*c
, enum rapl_unit desired_unit
, double interval
)
2035 assert(desired_unit
!= RAPL_UNIT_INVALID
);
2038 * For now we don't expect anything other than joules,
2039 * so just simplify the logic.
2041 assert(c
->unit
== RAPL_UNIT_JOULES
);
2043 const double scaled
= c
->raw_value
* c
->scale
;
2045 if (desired_unit
== RAPL_UNIT_WATTS
)
2046 return scaled
/ interval
;
2051 * column formatting convention & formats
2053 int format_counters(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
2055 double interval_float
, tsc
;
2058 struct msr_counter
*mp
;
2062 /* if showing only 1st thread in core and this isn't one, bail out */
2063 if (show_core_only
&& !is_cpu_first_thread_in_core(t
, c
, p
))
2066 /* if showing only 1st thread in pkg and this isn't one, bail out */
2067 if (show_pkg_only
&& !is_cpu_first_core_in_package(t
, c
, p
))
2070 /*if not summary line and --cpu is used */
2071 if ((t
!= &average
.threads
) && (cpu_subset
&& !CPU_ISSET_S(t
->cpu_id
, cpu_subset_size
, cpu_subset
)))
2074 if (DO_BIC(BIC_USEC
)) {
2075 /* on each row, print how many usec each timestamp took to gather */
2078 timersub(&t
->tv_end
, &t
->tv_begin
, &tv
);
2079 outp
+= sprintf(outp
, "%5ld\t", tv
.tv_sec
* 1000000 + tv
.tv_usec
);
2082 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
2083 if (DO_BIC(BIC_TOD
))
2084 outp
+= sprintf(outp
, "%10ld.%06ld\t", t
->tv_end
.tv_sec
, t
->tv_end
.tv_usec
);
2086 interval_float
= t
->tv_delta
.tv_sec
+ t
->tv_delta
.tv_usec
/ 1000000.0;
2088 tsc
= t
->tsc
* tsc_tweak
;
2090 /* topo columns, print blanks on 1st (average) line */
2091 if (t
== &average
.threads
) {
2092 if (DO_BIC(BIC_Package
))
2093 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2094 if (DO_BIC(BIC_Die
))
2095 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2096 if (DO_BIC(BIC_Node
))
2097 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2098 if (DO_BIC(BIC_Core
))
2099 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2100 if (DO_BIC(BIC_CPU
))
2101 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2102 if (DO_BIC(BIC_APIC
))
2103 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2104 if (DO_BIC(BIC_X2APIC
))
2105 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2107 if (DO_BIC(BIC_Package
)) {
2109 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->package_id
);
2111 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2113 if (DO_BIC(BIC_Die
)) {
2115 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), cpus
[t
->cpu_id
].die_id
);
2117 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2119 if (DO_BIC(BIC_Node
)) {
2121 outp
+= sprintf(outp
, "%s%d",
2122 (printed
++ ? delim
: ""), cpus
[t
->cpu_id
].physical_node_id
);
2124 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2126 if (DO_BIC(BIC_Core
)) {
2128 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), c
->core_id
);
2130 outp
+= sprintf(outp
, "%s-", (printed
++ ? delim
: ""));
2132 if (DO_BIC(BIC_CPU
))
2133 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), t
->cpu_id
);
2134 if (DO_BIC(BIC_APIC
))
2135 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), t
->apic_id
);
2136 if (DO_BIC(BIC_X2APIC
))
2137 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), t
->x2apic_id
);
2140 if (DO_BIC(BIC_Avg_MHz
))
2141 outp
+= sprintf(outp
, "%s%.0f", (printed
++ ? delim
: ""), 1.0 / units
* t
->aperf
/ interval_float
);
2143 if (DO_BIC(BIC_Busy
))
2144 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * t
->mperf
/ tsc
);
2146 if (DO_BIC(BIC_Bzy_MHz
)) {
2149 sprintf(outp
, "%s%.0f", (printed
++ ? delim
: ""), base_hz
/ units
* t
->aperf
/ t
->mperf
);
2151 outp
+= sprintf(outp
, "%s%.0f", (printed
++ ? delim
: ""),
2152 tsc
/ units
* t
->aperf
/ t
->mperf
/ interval_float
);
2155 if (DO_BIC(BIC_TSC_MHz
))
2156 outp
+= sprintf(outp
, "%s%.0f", (printed
++ ? delim
: ""), 1.0 * t
->tsc
/ units
/ interval_float
);
2158 if (DO_BIC(BIC_IPC
))
2159 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 1.0 * t
->instr_count
/ t
->aperf
);
2162 if (DO_BIC(BIC_IRQ
)) {
2163 if (sums_need_wide_columns
)
2164 outp
+= sprintf(outp
, "%s%8lld", (printed
++ ? delim
: ""), t
->irq_count
);
2166 outp
+= sprintf(outp
, "%s%lld", (printed
++ ? delim
: ""), t
->irq_count
);
2170 if (DO_BIC(BIC_SMI
))
2171 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), t
->smi_count
);
2173 /* Added counters */
2174 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
) {
2175 if (mp
->format
== FORMAT_RAW
) {
2176 if (mp
->width
== 32)
2178 sprintf(outp
, "%s0x%08x", (printed
++ ? delim
: ""), (unsigned int)t
->counter
[i
]);
2180 outp
+= sprintf(outp
, "%s0x%016llx", (printed
++ ? delim
: ""), t
->counter
[i
]);
2181 } else if (mp
->format
== FORMAT_DELTA
) {
2182 if ((mp
->type
== COUNTER_ITEMS
) && sums_need_wide_columns
)
2183 outp
+= sprintf(outp
, "%s%8lld", (printed
++ ? delim
: ""), t
->counter
[i
]);
2185 outp
+= sprintf(outp
, "%s%lld", (printed
++ ? delim
: ""), t
->counter
[i
]);
2186 } else if (mp
->format
== FORMAT_PERCENT
) {
2187 if (mp
->type
== COUNTER_USEC
)
2189 sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""),
2190 t
->counter
[i
] / interval_float
/ 10000);
2192 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * t
->counter
[i
] / tsc
);
2197 if (DO_BIC(BIC_CPU_c1
))
2198 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * t
->c1
/ tsc
);
2200 /* print per-core data only for 1st thread in core */
2201 if (!is_cpu_first_thread_in_core(t
, c
, p
))
2204 if (DO_BIC(BIC_CPU_c3
))
2205 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * c
->c3
/ tsc
);
2206 if (DO_BIC(BIC_CPU_c6
))
2207 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * c
->c6
/ tsc
);
2208 if (DO_BIC(BIC_CPU_c7
))
2209 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * c
->c7
/ tsc
);
2212 if (DO_BIC(BIC_Mod_c6
))
2213 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * c
->mc6_us
/ tsc
);
2215 if (DO_BIC(BIC_CoreTmp
))
2216 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), c
->core_temp_c
);
2218 /* Core throttle count */
2219 if (DO_BIC(BIC_CORE_THROT_CNT
))
2220 outp
+= sprintf(outp
, "%s%lld", (printed
++ ? delim
: ""), c
->core_throt_cnt
);
2222 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
) {
2223 if (mp
->format
== FORMAT_RAW
) {
2224 if (mp
->width
== 32)
2226 sprintf(outp
, "%s0x%08x", (printed
++ ? delim
: ""), (unsigned int)c
->counter
[i
]);
2228 outp
+= sprintf(outp
, "%s0x%016llx", (printed
++ ? delim
: ""), c
->counter
[i
]);
2229 } else if (mp
->format
== FORMAT_DELTA
) {
2230 if ((mp
->type
== COUNTER_ITEMS
) && sums_need_wide_columns
)
2231 outp
+= sprintf(outp
, "%s%8lld", (printed
++ ? delim
: ""), c
->counter
[i
]);
2233 outp
+= sprintf(outp
, "%s%lld", (printed
++ ? delim
: ""), c
->counter
[i
]);
2234 } else if (mp
->format
== FORMAT_PERCENT
) {
2235 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * c
->counter
[i
] / tsc
);
2241 if (DO_BIC(BIC_CorWatt
) && platform
->has_per_core_rapl
)
2243 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2244 rapl_counter_get_value(&c
->core_energy
, RAPL_UNIT_WATTS
, interval_float
));
2245 if (DO_BIC(BIC_Cor_J
) && platform
->has_per_core_rapl
)
2246 outp
+= sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2247 rapl_counter_get_value(&c
->core_energy
, RAPL_UNIT_JOULES
, interval_float
));
2249 /* print per-package data only for 1st core in package */
2250 if (!is_cpu_first_core_in_package(t
, c
, p
))
2254 if (DO_BIC(BIC_PkgTmp
))
2255 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->pkg_temp_c
);
2258 if (DO_BIC(BIC_GFX_rc6
)) {
2259 if (p
->gfx_rc6_ms
== -1) { /* detect GFX counter reset */
2260 outp
+= sprintf(outp
, "%s**.**", (printed
++ ? delim
: ""));
2262 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""),
2263 p
->gfx_rc6_ms
/ 10.0 / interval_float
);
2268 if (DO_BIC(BIC_GFXMHz
))
2269 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->gfx_mhz
);
2272 if (DO_BIC(BIC_GFXACTMHz
))
2273 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->gfx_act_mhz
);
2276 if (DO_BIC(BIC_SAM_mc6
)) {
2277 if (p
->sam_mc6_ms
== -1) { /* detect GFX counter reset */
2278 outp
+= sprintf(outp
, "%s**.**", (printed
++ ? delim
: ""));
2280 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""),
2281 p
->sam_mc6_ms
/ 10.0 / interval_float
);
2286 if (DO_BIC(BIC_SAMMHz
))
2287 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->sam_mhz
);
2290 if (DO_BIC(BIC_SAMACTMHz
))
2291 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->sam_act_mhz
);
2293 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
2294 if (DO_BIC(BIC_Totl_c0
))
2295 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pkg_wtd_core_c0
/ tsc
);
2296 if (DO_BIC(BIC_Any_c0
))
2297 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pkg_any_core_c0
/ tsc
);
2298 if (DO_BIC(BIC_GFX_c0
))
2299 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pkg_any_gfxe_c0
/ tsc
);
2300 if (DO_BIC(BIC_CPUGFX
))
2301 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pkg_both_core_gfxe_c0
/ tsc
);
2303 if (DO_BIC(BIC_Pkgpc2
))
2304 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc2
/ tsc
);
2305 if (DO_BIC(BIC_Pkgpc3
))
2306 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc3
/ tsc
);
2307 if (DO_BIC(BIC_Pkgpc6
))
2308 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc6
/ tsc
);
2309 if (DO_BIC(BIC_Pkgpc7
))
2310 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc7
/ tsc
);
2311 if (DO_BIC(BIC_Pkgpc8
))
2312 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc8
/ tsc
);
2313 if (DO_BIC(BIC_Pkgpc9
))
2314 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc9
/ tsc
);
2315 if (DO_BIC(BIC_Pkgpc10
))
2316 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->pc10
/ tsc
);
2318 if (DO_BIC(BIC_CPU_LPI
)) {
2319 if (p
->cpu_lpi
>= 0)
2321 sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""),
2322 100.0 * p
->cpu_lpi
/ 1000000.0 / interval_float
);
2324 outp
+= sprintf(outp
, "%s(neg)", (printed
++ ? delim
: ""));
2326 if (DO_BIC(BIC_SYS_LPI
)) {
2327 if (p
->sys_lpi
>= 0)
2329 sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""),
2330 100.0 * p
->sys_lpi
/ 1000000.0 / interval_float
);
2332 outp
+= sprintf(outp
, "%s(neg)", (printed
++ ? delim
: ""));
2335 if (DO_BIC(BIC_PkgWatt
))
2337 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2338 rapl_counter_get_value(&p
->energy_pkg
, RAPL_UNIT_WATTS
, interval_float
));
2339 if (DO_BIC(BIC_CorWatt
) && !platform
->has_per_core_rapl
)
2341 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2342 rapl_counter_get_value(&p
->energy_cores
, RAPL_UNIT_WATTS
, interval_float
));
2343 if (DO_BIC(BIC_GFXWatt
))
2345 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2346 rapl_counter_get_value(&p
->energy_gfx
, RAPL_UNIT_WATTS
, interval_float
));
2347 if (DO_BIC(BIC_RAMWatt
))
2349 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2350 rapl_counter_get_value(&p
->energy_dram
, RAPL_UNIT_WATTS
, interval_float
));
2351 if (DO_BIC(BIC_Pkg_J
))
2352 outp
+= sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2353 rapl_counter_get_value(&p
->energy_pkg
, RAPL_UNIT_JOULES
, interval_float
));
2354 if (DO_BIC(BIC_Cor_J
) && !platform
->has_per_core_rapl
)
2355 outp
+= sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2356 rapl_counter_get_value(&p
->energy_cores
, RAPL_UNIT_JOULES
, interval_float
));
2357 if (DO_BIC(BIC_GFX_J
))
2358 outp
+= sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2359 rapl_counter_get_value(&p
->energy_gfx
, RAPL_UNIT_JOULES
, interval_float
));
2360 if (DO_BIC(BIC_RAM_J
))
2361 outp
+= sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2362 rapl_counter_get_value(&p
->energy_dram
, RAPL_UNIT_JOULES
, interval_float
));
2363 if (DO_BIC(BIC_PKG__
))
2365 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2366 rapl_counter_get_value(&p
->rapl_pkg_perf_status
, RAPL_UNIT_WATTS
, interval_float
));
2367 if (DO_BIC(BIC_RAM__
))
2369 sprintf(outp
, fmt8
, (printed
++ ? delim
: ""),
2370 rapl_counter_get_value(&p
->rapl_dram_perf_status
, RAPL_UNIT_WATTS
, interval_float
));
2372 if (DO_BIC(BIC_UNCORE_MHZ
))
2373 outp
+= sprintf(outp
, "%s%d", (printed
++ ? delim
: ""), p
->uncore_mhz
);
2375 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
) {
2376 if (mp
->format
== FORMAT_RAW
) {
2377 if (mp
->width
== 32)
2379 sprintf(outp
, "%s0x%08x", (printed
++ ? delim
: ""), (unsigned int)p
->counter
[i
]);
2381 outp
+= sprintf(outp
, "%s0x%016llx", (printed
++ ? delim
: ""), p
->counter
[i
]);
2382 } else if (mp
->format
== FORMAT_DELTA
) {
2383 if ((mp
->type
== COUNTER_ITEMS
) && sums_need_wide_columns
)
2384 outp
+= sprintf(outp
, "%s%8lld", (printed
++ ? delim
: ""), p
->counter
[i
]);
2386 outp
+= sprintf(outp
, "%s%lld", (printed
++ ? delim
: ""), p
->counter
[i
]);
2387 } else if (mp
->format
== FORMAT_PERCENT
) {
2388 outp
+= sprintf(outp
, "%s%.2f", (printed
++ ? delim
: ""), 100.0 * p
->counter
[i
] / tsc
);
2393 if (*(outp
- 1) != '\n')
2394 outp
+= sprintf(outp
, "\n");
2399 void flush_output_stdout(void)
2408 fputs(output_buffer
, filep
);
2411 outp
= output_buffer
;
2414 void flush_output_stderr(void)
2416 fputs(output_buffer
, outf
);
2418 outp
= output_buffer
;
2421 void format_all_counters(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
2425 if ((!count
|| (header_iterations
&& !(count
% header_iterations
))) || !summary_only
)
2428 format_counters(&average
.threads
, &average
.cores
, &average
.packages
);
2435 for_all_cpus(format_counters
, t
, c
, p
);
2438 #define DELTA_WRAP32(new, old) \
2439 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
2441 int delta_package(struct pkg_data
*new, struct pkg_data
*old
)
2444 struct msr_counter
*mp
;
2446 if (DO_BIC(BIC_Totl_c0
))
2447 old
->pkg_wtd_core_c0
= new->pkg_wtd_core_c0
- old
->pkg_wtd_core_c0
;
2448 if (DO_BIC(BIC_Any_c0
))
2449 old
->pkg_any_core_c0
= new->pkg_any_core_c0
- old
->pkg_any_core_c0
;
2450 if (DO_BIC(BIC_GFX_c0
))
2451 old
->pkg_any_gfxe_c0
= new->pkg_any_gfxe_c0
- old
->pkg_any_gfxe_c0
;
2452 if (DO_BIC(BIC_CPUGFX
))
2453 old
->pkg_both_core_gfxe_c0
= new->pkg_both_core_gfxe_c0
- old
->pkg_both_core_gfxe_c0
;
2455 old
->pc2
= new->pc2
- old
->pc2
;
2456 if (DO_BIC(BIC_Pkgpc3
))
2457 old
->pc3
= new->pc3
- old
->pc3
;
2458 if (DO_BIC(BIC_Pkgpc6
))
2459 old
->pc6
= new->pc6
- old
->pc6
;
2460 if (DO_BIC(BIC_Pkgpc7
))
2461 old
->pc7
= new->pc7
- old
->pc7
;
2462 old
->pc8
= new->pc8
- old
->pc8
;
2463 old
->pc9
= new->pc9
- old
->pc9
;
2464 old
->pc10
= new->pc10
- old
->pc10
;
2465 old
->cpu_lpi
= new->cpu_lpi
- old
->cpu_lpi
;
2466 old
->sys_lpi
= new->sys_lpi
- old
->sys_lpi
;
2467 old
->pkg_temp_c
= new->pkg_temp_c
;
2469 /* flag an error when rc6 counter resets/wraps */
2470 if (old
->gfx_rc6_ms
> new->gfx_rc6_ms
)
2471 old
->gfx_rc6_ms
= -1;
2473 old
->gfx_rc6_ms
= new->gfx_rc6_ms
- old
->gfx_rc6_ms
;
2475 old
->uncore_mhz
= new->uncore_mhz
;
2476 old
->gfx_mhz
= new->gfx_mhz
;
2477 old
->gfx_act_mhz
= new->gfx_act_mhz
;
2479 /* flag an error when mc6 counter resets/wraps */
2480 if (old
->sam_mc6_ms
> new->sam_mc6_ms
)
2481 old
->sam_mc6_ms
= -1;
2483 old
->sam_mc6_ms
= new->sam_mc6_ms
- old
->sam_mc6_ms
;
2485 old
->sam_mhz
= new->sam_mhz
;
2486 old
->sam_act_mhz
= new->sam_act_mhz
;
2488 old
->energy_pkg
.raw_value
= new->energy_pkg
.raw_value
- old
->energy_pkg
.raw_value
;
2489 old
->energy_cores
.raw_value
= new->energy_cores
.raw_value
- old
->energy_cores
.raw_value
;
2490 old
->energy_gfx
.raw_value
= new->energy_gfx
.raw_value
- old
->energy_gfx
.raw_value
;
2491 old
->energy_dram
.raw_value
= new->energy_dram
.raw_value
- old
->energy_dram
.raw_value
;
2492 old
->rapl_pkg_perf_status
.raw_value
= new->rapl_pkg_perf_status
.raw_value
- old
->rapl_pkg_perf_status
.raw_value
;
2493 old
->rapl_dram_perf_status
.raw_value
=
2494 new->rapl_dram_perf_status
.raw_value
- old
->rapl_dram_perf_status
.raw_value
;
2496 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
) {
2497 if (mp
->format
== FORMAT_RAW
)
2498 old
->counter
[i
] = new->counter
[i
];
2500 old
->counter
[i
] = new->counter
[i
] - old
->counter
[i
];
2506 void delta_core(struct core_data
*new, struct core_data
*old
)
2509 struct msr_counter
*mp
;
2511 old
->c3
= new->c3
- old
->c3
;
2512 old
->c6
= new->c6
- old
->c6
;
2513 old
->c7
= new->c7
- old
->c7
;
2514 old
->core_temp_c
= new->core_temp_c
;
2515 old
->core_throt_cnt
= new->core_throt_cnt
;
2516 old
->mc6_us
= new->mc6_us
- old
->mc6_us
;
2518 DELTA_WRAP32(new->core_energy
.raw_value
, old
->core_energy
.raw_value
);
2520 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
) {
2521 if (mp
->format
== FORMAT_RAW
)
2522 old
->counter
[i
] = new->counter
[i
];
2524 old
->counter
[i
] = new->counter
[i
] - old
->counter
[i
];
2528 int soft_c1_residency_display(int bic
)
2530 if (!DO_BIC(BIC_CPU_c1
) || platform
->has_msr_core_c1_res
)
2533 return DO_BIC_READ(bic
);
2539 int delta_thread(struct thread_data
*new, struct thread_data
*old
, struct core_data
*core_delta
)
2542 struct msr_counter
*mp
;
2544 /* we run cpuid just the 1st time, copy the results */
2545 if (DO_BIC(BIC_APIC
))
2546 new->apic_id
= old
->apic_id
;
2547 if (DO_BIC(BIC_X2APIC
))
2548 new->x2apic_id
= old
->x2apic_id
;
2551 * the timestamps from start of measurement interval are in "old"
2552 * the timestamp from end of measurement interval are in "new"
2553 * over-write old w/ new so we can print end of interval values
2556 timersub(&new->tv_begin
, &old
->tv_begin
, &old
->tv_delta
);
2557 old
->tv_begin
= new->tv_begin
;
2558 old
->tv_end
= new->tv_end
;
2560 old
->tsc
= new->tsc
- old
->tsc
;
2562 /* check for TSC < 1 Mcycles over interval */
2563 if (old
->tsc
< (1000 * 1000))
2564 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
2565 "You can disable all c-states by booting with \"idle=poll\"\n"
2566 "or just the deep ones with \"processor.max_cstate=1\"");
2568 old
->c1
= new->c1
- old
->c1
;
2570 if (DO_BIC(BIC_Avg_MHz
) || DO_BIC(BIC_Busy
) || DO_BIC(BIC_Bzy_MHz
) || DO_BIC(BIC_IPC
)
2571 || soft_c1_residency_display(BIC_Avg_MHz
)) {
2572 if ((new->aperf
> old
->aperf
) && (new->mperf
> old
->mperf
)) {
2573 old
->aperf
= new->aperf
- old
->aperf
;
2574 old
->mperf
= new->mperf
- old
->mperf
;
2580 if (platform
->has_msr_core_c1_res
) {
2582 * Some models have a dedicated C1 residency MSR,
2583 * which should be more accurate than the derivation below.
2587 * As counter collection is not atomic,
2588 * it is possible for mperf's non-halted cycles + idle states
2589 * to exceed TSC's all cycles: show c1 = 0% in that case.
2591 if ((old
->mperf
+ core_delta
->c3
+ core_delta
->c6
+ core_delta
->c7
) > (old
->tsc
* tsc_tweak
))
2594 /* normal case, derive c1 */
2595 old
->c1
= (old
->tsc
* tsc_tweak
) - old
->mperf
- core_delta
->c3
2596 - core_delta
->c6
- core_delta
->c7
;
2600 if (old
->mperf
== 0) {
2602 fprintf(outf
, "cpu%d MPERF 0!\n", old
->cpu_id
);
2603 old
->mperf
= 1; /* divide by 0 protection */
2606 if (DO_BIC(BIC_IPC
))
2607 old
->instr_count
= new->instr_count
- old
->instr_count
;
2609 if (DO_BIC(BIC_IRQ
))
2610 old
->irq_count
= new->irq_count
- old
->irq_count
;
2612 if (DO_BIC(BIC_SMI
))
2613 old
->smi_count
= new->smi_count
- old
->smi_count
;
2615 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
) {
2616 if (mp
->format
== FORMAT_RAW
)
2617 old
->counter
[i
] = new->counter
[i
];
2619 old
->counter
[i
] = new->counter
[i
] - old
->counter
[i
];
2624 int delta_cpu(struct thread_data
*t
, struct core_data
*c
,
2625 struct pkg_data
*p
, struct thread_data
*t2
, struct core_data
*c2
, struct pkg_data
*p2
)
2629 /* calculate core delta only for 1st thread in core */
2630 if (is_cpu_first_thread_in_core(t
, c
, p
))
2633 /* always calculate thread delta */
2634 retval
= delta_thread(t
, t2
, c2
); /* c2 is core delta */
2638 /* calculate package delta only for 1st core in package */
2639 if (is_cpu_first_core_in_package(t
, c
, p
))
2640 retval
= delta_package(p
, p2
);
2645 void rapl_counter_clear(struct rapl_counter
*c
)
2649 c
->unit
= RAPL_UNIT_INVALID
;
2652 void clear_counters(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
2655 struct msr_counter
*mp
;
2657 t
->tv_begin
.tv_sec
= 0;
2658 t
->tv_begin
.tv_usec
= 0;
2659 t
->tv_end
.tv_sec
= 0;
2660 t
->tv_end
.tv_usec
= 0;
2661 t
->tv_delta
.tv_sec
= 0;
2662 t
->tv_delta
.tv_usec
= 0;
2679 rapl_counter_clear(&c
->core_energy
);
2680 c
->core_throt_cnt
= 0;
2682 p
->pkg_wtd_core_c0
= 0;
2683 p
->pkg_any_core_c0
= 0;
2684 p
->pkg_any_gfxe_c0
= 0;
2685 p
->pkg_both_core_gfxe_c0
= 0;
2688 if (DO_BIC(BIC_Pkgpc3
))
2690 if (DO_BIC(BIC_Pkgpc6
))
2692 if (DO_BIC(BIC_Pkgpc7
))
2700 rapl_counter_clear(&p
->energy_pkg
);
2701 rapl_counter_clear(&p
->energy_dram
);
2702 rapl_counter_clear(&p
->energy_cores
);
2703 rapl_counter_clear(&p
->energy_gfx
);
2704 rapl_counter_clear(&p
->rapl_pkg_perf_status
);
2705 rapl_counter_clear(&p
->rapl_dram_perf_status
);
2715 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
)
2718 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
)
2721 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
)
2725 void rapl_counter_accumulate(struct rapl_counter
*dst
, const struct rapl_counter
*src
)
2727 /* Copy unit and scale from src if dst is not initialized */
2728 if (dst
->unit
== RAPL_UNIT_INVALID
) {
2729 dst
->unit
= src
->unit
;
2730 dst
->scale
= src
->scale
;
2733 assert(dst
->unit
== src
->unit
);
2734 assert(dst
->scale
== src
->scale
);
2736 dst
->raw_value
+= src
->raw_value
;
2739 int sum_counters(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
2742 struct msr_counter
*mp
;
2744 /* copy un-changing apic_id's */
2745 if (DO_BIC(BIC_APIC
))
2746 average
.threads
.apic_id
= t
->apic_id
;
2747 if (DO_BIC(BIC_X2APIC
))
2748 average
.threads
.x2apic_id
= t
->x2apic_id
;
2750 /* remember first tv_begin */
2751 if (average
.threads
.tv_begin
.tv_sec
== 0)
2752 average
.threads
.tv_begin
= t
->tv_begin
;
2754 /* remember last tv_end */
2755 average
.threads
.tv_end
= t
->tv_end
;
2757 average
.threads
.tsc
+= t
->tsc
;
2758 average
.threads
.aperf
+= t
->aperf
;
2759 average
.threads
.mperf
+= t
->mperf
;
2760 average
.threads
.c1
+= t
->c1
;
2762 average
.threads
.instr_count
+= t
->instr_count
;
2764 average
.threads
.irq_count
+= t
->irq_count
;
2765 average
.threads
.smi_count
+= t
->smi_count
;
2767 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
) {
2768 if (mp
->format
== FORMAT_RAW
)
2770 average
.threads
.counter
[i
] += t
->counter
[i
];
2773 /* sum per-core values only for 1st thread in core */
2774 if (!is_cpu_first_thread_in_core(t
, c
, p
))
2777 average
.cores
.c3
+= c
->c3
;
2778 average
.cores
.c6
+= c
->c6
;
2779 average
.cores
.c7
+= c
->c7
;
2780 average
.cores
.mc6_us
+= c
->mc6_us
;
2782 average
.cores
.core_temp_c
= MAX(average
.cores
.core_temp_c
, c
->core_temp_c
);
2783 average
.cores
.core_throt_cnt
= MAX(average
.cores
.core_throt_cnt
, c
->core_throt_cnt
);
2785 rapl_counter_accumulate(&average
.cores
.core_energy
, &c
->core_energy
);
2787 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
) {
2788 if (mp
->format
== FORMAT_RAW
)
2790 average
.cores
.counter
[i
] += c
->counter
[i
];
2793 /* sum per-pkg values only for 1st core in pkg */
2794 if (!is_cpu_first_core_in_package(t
, c
, p
))
2797 if (DO_BIC(BIC_Totl_c0
))
2798 average
.packages
.pkg_wtd_core_c0
+= p
->pkg_wtd_core_c0
;
2799 if (DO_BIC(BIC_Any_c0
))
2800 average
.packages
.pkg_any_core_c0
+= p
->pkg_any_core_c0
;
2801 if (DO_BIC(BIC_GFX_c0
))
2802 average
.packages
.pkg_any_gfxe_c0
+= p
->pkg_any_gfxe_c0
;
2803 if (DO_BIC(BIC_CPUGFX
))
2804 average
.packages
.pkg_both_core_gfxe_c0
+= p
->pkg_both_core_gfxe_c0
;
2806 average
.packages
.pc2
+= p
->pc2
;
2807 if (DO_BIC(BIC_Pkgpc3
))
2808 average
.packages
.pc3
+= p
->pc3
;
2809 if (DO_BIC(BIC_Pkgpc6
))
2810 average
.packages
.pc6
+= p
->pc6
;
2811 if (DO_BIC(BIC_Pkgpc7
))
2812 average
.packages
.pc7
+= p
->pc7
;
2813 average
.packages
.pc8
+= p
->pc8
;
2814 average
.packages
.pc9
+= p
->pc9
;
2815 average
.packages
.pc10
+= p
->pc10
;
2817 average
.packages
.cpu_lpi
= p
->cpu_lpi
;
2818 average
.packages
.sys_lpi
= p
->sys_lpi
;
2820 rapl_counter_accumulate(&average
.packages
.energy_pkg
, &p
->energy_pkg
);
2821 rapl_counter_accumulate(&average
.packages
.energy_dram
, &p
->energy_dram
);
2822 rapl_counter_accumulate(&average
.packages
.energy_cores
, &p
->energy_cores
);
2823 rapl_counter_accumulate(&average
.packages
.energy_gfx
, &p
->energy_gfx
);
2825 average
.packages
.gfx_rc6_ms
= p
->gfx_rc6_ms
;
2826 average
.packages
.uncore_mhz
= p
->uncore_mhz
;
2827 average
.packages
.gfx_mhz
= p
->gfx_mhz
;
2828 average
.packages
.gfx_act_mhz
= p
->gfx_act_mhz
;
2829 average
.packages
.sam_mc6_ms
= p
->sam_mc6_ms
;
2830 average
.packages
.sam_mhz
= p
->sam_mhz
;
2831 average
.packages
.sam_act_mhz
= p
->sam_act_mhz
;
2833 average
.packages
.pkg_temp_c
= MAX(average
.packages
.pkg_temp_c
, p
->pkg_temp_c
);
2835 rapl_counter_accumulate(&average
.packages
.rapl_pkg_perf_status
, &p
->rapl_pkg_perf_status
);
2836 rapl_counter_accumulate(&average
.packages
.rapl_dram_perf_status
, &p
->rapl_dram_perf_status
);
2838 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
) {
2839 if ((mp
->format
== FORMAT_RAW
) && (topo
.num_packages
== 0))
2840 average
.packages
.counter
[i
] = p
->counter
[i
];
2842 average
.packages
.counter
[i
] += p
->counter
[i
];
2848 * sum the counters for all cpus in the system
2849 * compute the weighted average
2851 void compute_average(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
2854 struct msr_counter
*mp
;
2856 clear_counters(&average
.threads
, &average
.cores
, &average
.packages
);
2858 for_all_cpus(sum_counters
, t
, c
, p
);
2860 /* Use the global time delta for the average. */
2861 average
.threads
.tv_delta
= tv_delta
;
2863 average
.threads
.tsc
/= topo
.allowed_cpus
;
2864 average
.threads
.aperf
/= topo
.allowed_cpus
;
2865 average
.threads
.mperf
/= topo
.allowed_cpus
;
2866 average
.threads
.instr_count
/= topo
.allowed_cpus
;
2867 average
.threads
.c1
/= topo
.allowed_cpus
;
2869 if (average
.threads
.irq_count
> 9999999)
2870 sums_need_wide_columns
= 1;
2872 average
.cores
.c3
/= topo
.allowed_cores
;
2873 average
.cores
.c6
/= topo
.allowed_cores
;
2874 average
.cores
.c7
/= topo
.allowed_cores
;
2875 average
.cores
.mc6_us
/= topo
.allowed_cores
;
2877 if (DO_BIC(BIC_Totl_c0
))
2878 average
.packages
.pkg_wtd_core_c0
/= topo
.allowed_packages
;
2879 if (DO_BIC(BIC_Any_c0
))
2880 average
.packages
.pkg_any_core_c0
/= topo
.allowed_packages
;
2881 if (DO_BIC(BIC_GFX_c0
))
2882 average
.packages
.pkg_any_gfxe_c0
/= topo
.allowed_packages
;
2883 if (DO_BIC(BIC_CPUGFX
))
2884 average
.packages
.pkg_both_core_gfxe_c0
/= topo
.allowed_packages
;
2886 average
.packages
.pc2
/= topo
.allowed_packages
;
2887 if (DO_BIC(BIC_Pkgpc3
))
2888 average
.packages
.pc3
/= topo
.allowed_packages
;
2889 if (DO_BIC(BIC_Pkgpc6
))
2890 average
.packages
.pc6
/= topo
.allowed_packages
;
2891 if (DO_BIC(BIC_Pkgpc7
))
2892 average
.packages
.pc7
/= topo
.allowed_packages
;
2894 average
.packages
.pc8
/= topo
.allowed_packages
;
2895 average
.packages
.pc9
/= topo
.allowed_packages
;
2896 average
.packages
.pc10
/= topo
.allowed_packages
;
2898 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
) {
2899 if (mp
->format
== FORMAT_RAW
)
2901 if (mp
->type
== COUNTER_ITEMS
) {
2902 if (average
.threads
.counter
[i
] > 9999999)
2903 sums_need_wide_columns
= 1;
2906 average
.threads
.counter
[i
] /= topo
.allowed_cpus
;
2908 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
) {
2909 if (mp
->format
== FORMAT_RAW
)
2911 if (mp
->type
== COUNTER_ITEMS
) {
2912 if (average
.cores
.counter
[i
] > 9999999)
2913 sums_need_wide_columns
= 1;
2915 average
.cores
.counter
[i
] /= topo
.allowed_cores
;
2917 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
) {
2918 if (mp
->format
== FORMAT_RAW
)
2920 if (mp
->type
== COUNTER_ITEMS
) {
2921 if (average
.packages
.counter
[i
] > 9999999)
2922 sums_need_wide_columns
= 1;
2924 average
.packages
.counter
[i
] /= topo
.allowed_packages
;
2928 static unsigned long long rdtsc(void)
2930 unsigned int low
, high
;
2932 asm volatile ("rdtsc":"=a" (low
), "=d"(high
));
2934 return low
| ((unsigned long long)high
) << 32;
2938 * Open a file, and exit on failure
2940 FILE *fopen_or_die(const char *path
, const char *mode
)
2942 FILE *filep
= fopen(path
, mode
);
2945 err(1, "%s: open failed", path
);
2950 * snapshot_sysfs_counter()
2952 * return snapshot of given counter
2954 unsigned long long snapshot_sysfs_counter(char *path
)
2958 unsigned long long counter
;
2960 fp
= fopen_or_die(path
, "r");
2962 retval
= fscanf(fp
, "%lld", &counter
);
2964 err(1, "snapshot_sysfs_counter(%s)", path
);
2971 int get_mp(int cpu
, struct msr_counter
*mp
, unsigned long long *counterp
)
2973 if (mp
->msr_num
!= 0) {
2975 if (get_msr(cpu
, mp
->msr_num
, counterp
))
2978 char path
[128 + PATH_BYTES
];
2980 if (mp
->flags
& SYSFS_PERCPU
) {
2981 sprintf(path
, "/sys/devices/system/cpu/cpu%d/%s", cpu
, mp
->path
);
2983 *counterp
= snapshot_sysfs_counter(path
);
2985 *counterp
= snapshot_sysfs_counter(mp
->path
);
2992 unsigned long long get_uncore_mhz(int package
, int die
)
2996 sprintf(path
, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package
,
2999 return (snapshot_sysfs_counter(path
) / 1000);
3002 int get_epb(int cpu
)
3004 char path
[128 + PATH_BYTES
];
3005 unsigned long long msr
;
3009 sprintf(path
, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu
);
3011 fp
= fopen(path
, "r");
3015 ret
= fscanf(fp
, "%d", &epb
);
3017 err(1, "%s(%s)", __func__
, path
);
3027 get_msr(cpu
, MSR_IA32_ENERGY_PERF_BIAS
, &msr
);
3032 void get_apic_id(struct thread_data
*t
)
3034 unsigned int eax
, ebx
, ecx
, edx
;
3036 if (DO_BIC(BIC_APIC
)) {
3037 eax
= ebx
= ecx
= edx
= 0;
3038 __cpuid(1, eax
, ebx
, ecx
, edx
);
3040 t
->apic_id
= (ebx
>> 24) & 0xff;
3043 if (!DO_BIC(BIC_X2APIC
))
3046 if (authentic_amd
|| hygon_genuine
) {
3047 unsigned int topology_extensions
;
3049 if (max_extended_level
< 0x8000001e)
3052 eax
= ebx
= ecx
= edx
= 0;
3053 __cpuid(0x80000001, eax
, ebx
, ecx
, edx
);
3054 topology_extensions
= ecx
& (1 << 22);
3056 if (topology_extensions
== 0)
3059 eax
= ebx
= ecx
= edx
= 0;
3060 __cpuid(0x8000001e, eax
, ebx
, ecx
, edx
);
3069 if (max_level
< 0xb)
3073 __cpuid(0xb, eax
, ebx
, ecx
, edx
);
3076 if (debug
&& (t
->apic_id
!= (t
->x2apic_id
& 0xff)))
3077 fprintf(outf
, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t
->cpu_id
, t
->apic_id
, t
->x2apic_id
);
3080 int get_core_throt_cnt(int cpu
, unsigned long long *cnt
)
3082 char path
[128 + PATH_BYTES
];
3083 unsigned long long tmp
;
3087 sprintf(path
, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu
);
3088 fp
= fopen(path
, "r");
3091 ret
= fscanf(fp
, "%lld", &tmp
);
3100 struct amperf_group_fd
{
3101 int aperf
; /* Also the group descriptor */
3105 static int read_perf_counter_info(const char *const path
, const char *const parse_format
, void *value_ptr
)
3112 fdmt
= open(path
, O_RDONLY
, 0);
3115 fprintf(stderr
, "Failed to parse perf counter info %s\n", path
);
3117 goto cleanup_and_exit
;
3120 bytes_read
= read(fdmt
, buf
, sizeof(buf
) - 1);
3121 if (bytes_read
<= 0 || bytes_read
>= (int)sizeof(buf
)) {
3123 fprintf(stderr
, "Failed to parse perf counter info %s\n", path
);
3125 goto cleanup_and_exit
;
3128 buf
[bytes_read
] = '\0';
3130 if (sscanf(buf
, parse_format
, value_ptr
) != 1) {
3132 fprintf(stderr
, "Failed to parse perf counter info %s\n", path
);
3134 goto cleanup_and_exit
;
3144 static unsigned int read_perf_counter_info_n(const char *const path
, const char *const parse_format
)
3149 status
= read_perf_counter_info(path
, parse_format
, &v
);
3156 static unsigned read_msr_type(void)
3158 const char *const path
= "/sys/bus/event_source/devices/msr/type";
3159 const char *const format
= "%u";
3161 return read_perf_counter_info_n(path
, format
);
3164 static unsigned read_aperf_config(void)
3166 const char *const path
= "/sys/bus/event_source/devices/msr/events/aperf";
3167 const char *const format
= "event=%x";
3169 return read_perf_counter_info_n(path
, format
);
3172 static unsigned read_mperf_config(void)
3174 const char *const path
= "/sys/bus/event_source/devices/msr/events/mperf";
3175 const char *const format
= "event=%x";
3177 return read_perf_counter_info_n(path
, format
);
3180 static unsigned read_perf_type(const char *subsys
)
3182 const char *const path_format
= "/sys/bus/event_source/devices/%s/type";
3183 const char *const format
= "%u";
3186 snprintf(path
, sizeof(path
), path_format
, subsys
);
3188 return read_perf_counter_info_n(path
, format
);
3191 static unsigned read_rapl_config(const char *subsys
, const char *event_name
)
3193 const char *const path_format
= "/sys/bus/event_source/devices/%s/events/%s";
3194 const char *const format
= "event=%x";
3197 snprintf(path
, sizeof(path
), path_format
, subsys
, event_name
);
3199 return read_perf_counter_info_n(path
, format
);
3202 static unsigned read_perf_rapl_unit(const char *subsys
, const char *event_name
)
3204 const char *const path_format
= "/sys/bus/event_source/devices/%s/events/%s.unit";
3205 const char *const format
= "%s";
3207 char unit_buffer
[16];
3209 snprintf(path
, sizeof(path
), path_format
, subsys
, event_name
);
3211 read_perf_counter_info(path
, format
, &unit_buffer
);
3212 if (strcmp("Joules", unit_buffer
) == 0)
3213 return RAPL_UNIT_JOULES
;
3215 return RAPL_UNIT_INVALID
;
3218 static double read_perf_rapl_scale(const char *subsys
, const char *event_name
)
3220 const char *const path_format
= "/sys/bus/event_source/devices/%s/events/%s.scale";
3221 const char *const format
= "%lf";
3225 snprintf(path
, sizeof(path
), path_format
, subsys
, event_name
);
3227 if (read_perf_counter_info(path
, format
, &scale
))
3233 static struct amperf_group_fd
open_amperf_fd(int cpu
)
3235 const unsigned int msr_type
= read_msr_type();
3236 const unsigned int aperf_config
= read_aperf_config();
3237 const unsigned int mperf_config
= read_mperf_config();
3238 struct amperf_group_fd fds
= {.aperf
= -1,.mperf
= -1 };
3240 fds
.aperf
= open_perf_counter(cpu
, msr_type
, aperf_config
, -1, PERF_FORMAT_GROUP
);
3241 fds
.mperf
= open_perf_counter(cpu
, msr_type
, mperf_config
, fds
.aperf
, PERF_FORMAT_GROUP
);
3246 static int get_amperf_fd(int cpu
)
3248 assert(fd_amperf_percpu
);
3250 if (fd_amperf_percpu
[cpu
].aperf
)
3251 return fd_amperf_percpu
[cpu
].aperf
;
3253 fd_amperf_percpu
[cpu
] = open_amperf_fd(cpu
);
3255 return fd_amperf_percpu
[cpu
].aperf
;
3258 /* Read APERF, MPERF and TSC using the perf API. */
3259 static int read_aperf_mperf_tsc_perf(struct thread_data
*t
, int cpu
)
3263 unsigned long nr_entries
;
3264 unsigned long aperf
;
3265 unsigned long mperf
;
3268 unsigned long as_array
[3];
3271 const int fd_amperf
= get_amperf_fd(cpu
);
3274 * Read the TSC with rdtsc, because we want the absolute value and not
3275 * the offset from the start of the counter.
3279 const int n
= read(fd_amperf
, &cnt
.as_array
[0], sizeof(cnt
.as_array
));
3280 if (n
!= sizeof(cnt
.as_array
))
3283 t
->aperf
= cnt
.aperf
* aperf_mperf_multiplier
;
3284 t
->mperf
= cnt
.mperf
* aperf_mperf_multiplier
;
3289 /* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
3290 static int read_aperf_mperf_tsc_msr(struct thread_data
*t
, int cpu
)
3292 unsigned long long tsc_before
, tsc_between
, tsc_after
, aperf_time
, mperf_time
;
3293 int aperf_mperf_retry_count
= 0;
3296 * The TSC, APERF and MPERF must be read together for
3297 * APERF/MPERF and MPERF/TSC to give accurate results.
3299 * Unfortunately, APERF and MPERF are read by
3300 * individual system call, so delays may occur
3301 * between them. If the time to read them
3302 * varies by a large amount, we re-read them.
3306 * This initial dummy APERF read has been seen to
3307 * reduce jitter in the subsequent reads.
3310 if (get_msr(cpu
, MSR_IA32_APERF
, &t
->aperf
))
3314 t
->tsc
= rdtsc(); /* re-read close to APERF */
3316 tsc_before
= t
->tsc
;
3318 if (get_msr(cpu
, MSR_IA32_APERF
, &t
->aperf
))
3321 tsc_between
= rdtsc();
3323 if (get_msr(cpu
, MSR_IA32_MPERF
, &t
->mperf
))
3326 tsc_after
= rdtsc();
3328 aperf_time
= tsc_between
- tsc_before
;
3329 mperf_time
= tsc_after
- tsc_between
;
3332 * If the system call latency to read APERF and MPERF
3333 * differ by more than 2x, then try again.
3335 if ((aperf_time
> (2 * mperf_time
)) || (mperf_time
> (2 * aperf_time
))) {
3336 aperf_mperf_retry_count
++;
3337 if (aperf_mperf_retry_count
< 5)
3340 warnx("cpu%d jitter %lld %lld", cpu
, aperf_time
, mperf_time
);
3342 aperf_mperf_retry_count
= 0;
3344 t
->aperf
= t
->aperf
* aperf_mperf_multiplier
;
3345 t
->mperf
= t
->mperf
* aperf_mperf_multiplier
;
3350 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t
*rci
)
3354 for (int i
= 0; i
< NUM_RAPL_COUNTERS
; ++i
)
3355 if (rci
->source
[i
] == RAPL_SOURCE_PERF
)
3361 void write_rapl_counter(struct rapl_counter
*rc
, struct rapl_counter_info_t
*rci
, unsigned int idx
)
3363 rc
->raw_value
= rci
->data
[idx
];
3364 rc
->unit
= rci
->unit
[idx
];
3365 rc
->scale
= rci
->scale
[idx
];
3368 int get_rapl_counters(int cpu
, int domain
, struct core_data
*c
, struct pkg_data
*p
)
3370 unsigned long long perf_data
[NUM_RAPL_COUNTERS
+ 1];
3371 struct rapl_counter_info_t
*rci
= &rapl_counter_info_perdomain
[domain
];
3374 fprintf(stderr
, "get_rapl_counters: cpu%d domain%d\n", cpu
, domain
);
3376 assert(rapl_counter_info_perdomain
);
3379 * If we have any perf counters to read, read them all now, in bulk
3381 if (rci
->fd_perf
!= -1) {
3382 size_t num_perf_counters
= rapl_counter_info_count_perf(rci
);
3383 const ssize_t expected_read_size
= (num_perf_counters
+ 1) * sizeof(unsigned long long);
3384 const ssize_t actual_read_size
= read(rci
->fd_perf
, &perf_data
[0], sizeof(perf_data
));
3385 if (actual_read_size
!= expected_read_size
)
3386 err(-1, "get_rapl_counters: failed to read perf_data (%zu %zu)", expected_read_size
,
3390 for (unsigned int i
= 0, pi
= 1; i
< NUM_RAPL_COUNTERS
; ++i
) {
3391 switch (rci
->source
[i
]) {
3392 case RAPL_SOURCE_NONE
:
3395 case RAPL_SOURCE_PERF
:
3396 assert(pi
< ARRAY_SIZE(perf_data
));
3397 assert(rci
->fd_perf
!= -1);
3400 fprintf(stderr
, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
3401 i
, perf_data
[pi
], rci
->scale
[i
], perf_data
[pi
] * rci
->scale
[i
]);
3403 rci
->data
[i
] = perf_data
[pi
];
3408 case RAPL_SOURCE_MSR
:
3410 fprintf(stderr
, "Reading rapl counter via msr at %u\n", i
);
3413 if (rci
->flags
[i
] & RAPL_COUNTER_FLAG_USE_MSR_SUM
) {
3414 if (get_msr_sum(cpu
, rci
->msr
[i
], &rci
->data
[i
]))
3417 if (get_msr(cpu
, rci
->msr
[i
], &rci
->data
[i
]))
3421 rci
->data
[i
] &= rci
->msr_mask
[i
];
3422 if (rci
->msr_shift
[i
] >= 0)
3423 rci
->data
[i
] >>= abs(rci
->msr_shift
[i
]);
3425 rci
->data
[i
] <<= abs(rci
->msr_shift
[i
]);
3431 _Static_assert(NUM_RAPL_COUNTERS
== 7);
3432 write_rapl_counter(&p
->energy_pkg
, rci
, RAPL_RCI_INDEX_ENERGY_PKG
);
3433 write_rapl_counter(&p
->energy_cores
, rci
, RAPL_RCI_INDEX_ENERGY_CORES
);
3434 write_rapl_counter(&p
->energy_dram
, rci
, RAPL_RCI_INDEX_DRAM
);
3435 write_rapl_counter(&p
->energy_gfx
, rci
, RAPL_RCI_INDEX_GFX
);
3436 write_rapl_counter(&p
->rapl_pkg_perf_status
, rci
, RAPL_RCI_INDEX_PKG_PERF_STATUS
);
3437 write_rapl_counter(&p
->rapl_dram_perf_status
, rci
, RAPL_RCI_INDEX_DRAM_PERF_STATUS
);
3438 write_rapl_counter(&c
->core_energy
, rci
, RAPL_RCI_INDEX_CORE_ENERGY
);
3446 * acquire and record local counters for that cpu
3448 int get_counters(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
3450 int cpu
= t
->cpu_id
;
3451 unsigned long long msr
;
3452 struct msr_counter
*mp
;
3456 if (cpu_migrate(cpu
)) {
3457 fprintf(outf
, "get_counters: Could not migrate to CPU %d\n", cpu
);
3461 gettimeofday(&t
->tv_begin
, (struct timezone
*)NULL
);
3463 if (first_counter_read
)
3466 t
->tsc
= rdtsc(); /* we are running on local CPU of interest */
3468 if (DO_BIC(BIC_Avg_MHz
) || DO_BIC(BIC_Busy
) || DO_BIC(BIC_Bzy_MHz
) || DO_BIC(BIC_IPC
)
3469 || soft_c1_residency_display(BIC_Avg_MHz
)) {
3472 assert(!no_perf
|| !no_msr
);
3474 switch (amperf_source
) {
3475 case AMPERF_SOURCE_PERF
:
3476 status
= read_aperf_mperf_tsc_perf(t
, cpu
);
3478 case AMPERF_SOURCE_MSR
:
3479 status
= read_aperf_mperf_tsc_msr(t
, cpu
);
3487 if (DO_BIC(BIC_IPC
))
3488 if (read(get_instr_count_fd(cpu
), &t
->instr_count
, sizeof(long long)) != sizeof(long long))
3491 if (DO_BIC(BIC_IRQ
))
3492 t
->irq_count
= irqs_per_cpu
[cpu
];
3493 if (DO_BIC(BIC_SMI
)) {
3494 if (get_msr(cpu
, MSR_SMI_COUNT
, &msr
))
3496 t
->smi_count
= msr
& 0xFFFFFFFF;
3498 if (DO_BIC(BIC_CPU_c1
) && platform
->has_msr_core_c1_res
) {
3499 if (get_msr(cpu
, MSR_CORE_C1_RES
, &t
->c1
))
3503 for (i
= 0, mp
= sys
.tp
; mp
; i
++, mp
= mp
->next
) {
3504 if (get_mp(cpu
, mp
, &t
->counter
[i
]))
3508 /* collect core counters only for 1st thread in core */
3509 if (!is_cpu_first_thread_in_core(t
, c
, p
))
3512 if (platform
->has_per_core_rapl
) {
3513 status
= get_rapl_counters(cpu
, c
->core_id
, c
, p
);
3518 if (DO_BIC(BIC_CPU_c3
) || soft_c1_residency_display(BIC_CPU_c3
)) {
3519 if (get_msr(cpu
, MSR_CORE_C3_RESIDENCY
, &c
->c3
))
3523 if ((DO_BIC(BIC_CPU_c6
) || soft_c1_residency_display(BIC_CPU_c6
)) && !platform
->has_msr_knl_core_c6_residency
) {
3524 if (get_msr(cpu
, MSR_CORE_C6_RESIDENCY
, &c
->c6
))
3526 } else if (platform
->has_msr_knl_core_c6_residency
&& soft_c1_residency_display(BIC_CPU_c6
)) {
3527 if (get_msr(cpu
, MSR_KNL_CORE_C6_RESIDENCY
, &c
->c6
))
3531 if (DO_BIC(BIC_CPU_c7
) || soft_c1_residency_display(BIC_CPU_c7
)) {
3532 if (get_msr(cpu
, MSR_CORE_C7_RESIDENCY
, &c
->c7
))
3534 else if (t
->is_atom
) {
3536 * For Atom CPUs that has core cstate deeper than c6,
3537 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
3538 * Minus CC7 (and deeper cstates) residency to get
3539 * accturate cc6 residency.
3545 if (DO_BIC(BIC_Mod_c6
))
3546 if (get_msr(cpu
, MSR_MODULE_C6_RES_MS
, &c
->mc6_us
))
3549 if (DO_BIC(BIC_CoreTmp
)) {
3550 if (get_msr(cpu
, MSR_IA32_THERM_STATUS
, &msr
))
3552 c
->core_temp_c
= tj_max
- ((msr
>> 16) & 0x7F);
3555 if (DO_BIC(BIC_CORE_THROT_CNT
))
3556 get_core_throt_cnt(cpu
, &c
->core_throt_cnt
);
3558 for (i
= 0, mp
= sys
.cp
; mp
; i
++, mp
= mp
->next
) {
3559 if (get_mp(cpu
, mp
, &c
->counter
[i
]))
3563 /* collect package counters only for 1st core in package */
3564 if (!is_cpu_first_core_in_package(t
, c
, p
))
3567 if (DO_BIC(BIC_Totl_c0
)) {
3568 if (get_msr(cpu
, MSR_PKG_WEIGHTED_CORE_C0_RES
, &p
->pkg_wtd_core_c0
))
3571 if (DO_BIC(BIC_Any_c0
)) {
3572 if (get_msr(cpu
, MSR_PKG_ANY_CORE_C0_RES
, &p
->pkg_any_core_c0
))
3575 if (DO_BIC(BIC_GFX_c0
)) {
3576 if (get_msr(cpu
, MSR_PKG_ANY_GFXE_C0_RES
, &p
->pkg_any_gfxe_c0
))
3579 if (DO_BIC(BIC_CPUGFX
)) {
3580 if (get_msr(cpu
, MSR_PKG_BOTH_CORE_GFXE_C0_RES
, &p
->pkg_both_core_gfxe_c0
))
3583 if (DO_BIC(BIC_Pkgpc3
))
3584 if (get_msr(cpu
, MSR_PKG_C3_RESIDENCY
, &p
->pc3
))
3586 if (DO_BIC(BIC_Pkgpc6
)) {
3587 if (platform
->has_msr_atom_pkg_c6_residency
) {
3588 if (get_msr(cpu
, MSR_ATOM_PKG_C6_RESIDENCY
, &p
->pc6
))
3591 if (get_msr(cpu
, MSR_PKG_C6_RESIDENCY
, &p
->pc6
))
3596 if (DO_BIC(BIC_Pkgpc2
))
3597 if (get_msr(cpu
, MSR_PKG_C2_RESIDENCY
, &p
->pc2
))
3599 if (DO_BIC(BIC_Pkgpc7
))
3600 if (get_msr(cpu
, MSR_PKG_C7_RESIDENCY
, &p
->pc7
))
3602 if (DO_BIC(BIC_Pkgpc8
))
3603 if (get_msr(cpu
, MSR_PKG_C8_RESIDENCY
, &p
->pc8
))
3605 if (DO_BIC(BIC_Pkgpc9
))
3606 if (get_msr(cpu
, MSR_PKG_C9_RESIDENCY
, &p
->pc9
))
3608 if (DO_BIC(BIC_Pkgpc10
))
3609 if (get_msr(cpu
, MSR_PKG_C10_RESIDENCY
, &p
->pc10
))
3612 if (DO_BIC(BIC_CPU_LPI
))
3613 p
->cpu_lpi
= cpuidle_cur_cpu_lpi_us
;
3614 if (DO_BIC(BIC_SYS_LPI
))
3615 p
->sys_lpi
= cpuidle_cur_sys_lpi_us
;
3617 if (!platform
->has_per_core_rapl
) {
3618 status
= get_rapl_counters(cpu
, p
->package_id
, c
, p
);
3623 if (DO_BIC(BIC_PkgTmp
)) {
3624 if (get_msr(cpu
, MSR_IA32_PACKAGE_THERM_STATUS
, &msr
))
3626 p
->pkg_temp_c
= tj_max
- ((msr
>> 16) & 0x7F);
3629 /* n.b. assume die0 uncore frequency applies to whole package */
3630 if (DO_BIC(BIC_UNCORE_MHZ
))
3631 p
->uncore_mhz
= get_uncore_mhz(p
->package_id
, 0);
3633 if (DO_BIC(BIC_GFX_rc6
))
3634 p
->gfx_rc6_ms
= gfx_info
[GFX_rc6
].val_ull
;
3636 if (DO_BIC(BIC_GFXMHz
))
3637 p
->gfx_mhz
= gfx_info
[GFX_MHz
].val
;
3639 if (DO_BIC(BIC_GFXACTMHz
))
3640 p
->gfx_act_mhz
= gfx_info
[GFX_ACTMHz
].val
;
3642 if (DO_BIC(BIC_SAM_mc6
))
3643 p
->sam_mc6_ms
= gfx_info
[SAM_mc6
].val_ull
;
3645 if (DO_BIC(BIC_SAMMHz
))
3646 p
->sam_mhz
= gfx_info
[SAM_MHz
].val
;
3648 if (DO_BIC(BIC_SAMACTMHz
))
3649 p
->sam_act_mhz
= gfx_info
[SAM_ACTMHz
].val
;
3651 for (i
= 0, mp
= sys
.pp
; mp
; i
++, mp
= mp
->next
) {
3652 if (get_mp(cpu
, mp
, &p
->counter
[i
]))
3656 gettimeofday(&t
->tv_end
, (struct timezone
*)NULL
);
3662 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
3663 * If you change the values, note they are used both in comparisons
3664 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
3667 #define PCLUKN 0 /* Unknown */
3668 #define PCLRSV 1 /* Reserved */
3669 #define PCL__0 2 /* PC0 */
3670 #define PCL__1 3 /* PC1 */
3671 #define PCL__2 4 /* PC2 */
3672 #define PCL__3 5 /* PC3 */
3673 #define PCL__4 6 /* PC4 */
3674 #define PCL__6 7 /* PC6 */
3675 #define PCL_6N 8 /* PC6 No Retention */
3676 #define PCL_6R 9 /* PC6 Retention */
3677 #define PCL__7 10 /* PC7 */
3678 #define PCL_7S 11 /* PC7 Shrink */
3679 #define PCL__8 12 /* PC8 */
3680 #define PCL__9 13 /* PC9 */
3681 #define PCL_10 14 /* PC10 */
3682 #define PCLUNL 15 /* Unlimited */
3684 int pkg_cstate_limit
= PCLUKN
;
3685 char *pkg_cstate_limit_strings
[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
3686 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
3689 int nhm_pkg_cstate_limits
[16] =
3690 { PCL__0
, PCL__1
, PCL__3
, PCL__6
, PCL__7
, PCLRSV
, PCLRSV
, PCLUNL
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3694 int snb_pkg_cstate_limits
[16] =
3695 { PCL__0
, PCL__2
, PCL_6N
, PCL_6R
, PCL__7
, PCL_7S
, PCLRSV
, PCLUNL
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3699 int hsw_pkg_cstate_limits
[16] =
3700 { PCL__0
, PCL__2
, PCL__3
, PCL__6
, PCL__7
, PCL_7S
, PCL__8
, PCL__9
, PCLUNL
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3704 int slv_pkg_cstate_limits
[16] =
3705 { PCL__0
, PCL__1
, PCLRSV
, PCLRSV
, PCL__4
, PCLRSV
, PCL__6
, PCL__7
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3709 int amt_pkg_cstate_limits
[16] =
3710 { PCLUNL
, PCL__1
, PCL__2
, PCLRSV
, PCLRSV
, PCLRSV
, PCL__6
, PCL__7
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3714 int phi_pkg_cstate_limits
[16] =
3715 { PCL__0
, PCL__2
, PCL_6N
, PCL_6R
, PCLRSV
, PCLRSV
, PCLRSV
, PCLUNL
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3719 int glm_pkg_cstate_limits
[16] =
3720 { PCLUNL
, PCL__1
, PCL__3
, PCL__6
, PCL__7
, PCL_7S
, PCL__8
, PCL__9
, PCL_10
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3724 int skx_pkg_cstate_limits
[16] =
3725 { PCL__0
, PCL__2
, PCL_6N
, PCL_6R
, PCLRSV
, PCLRSV
, PCLRSV
, PCLUNL
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3729 int icx_pkg_cstate_limits
[16] =
3730 { PCL__0
, PCL__2
, PCL__6
, PCL__6
, PCLRSV
, PCLRSV
, PCLRSV
, PCLUNL
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
, PCLRSV
,
3734 void probe_cst_limit(void)
3736 unsigned long long msr
;
3737 int *pkg_cstate_limits
;
3739 if (!platform
->has_nhm_msrs
|| no_msr
)
3742 switch (platform
->cst_limit
) {
3744 pkg_cstate_limits
= nhm_pkg_cstate_limits
;
3747 pkg_cstate_limits
= snb_pkg_cstate_limits
;
3750 pkg_cstate_limits
= hsw_pkg_cstate_limits
;
3753 pkg_cstate_limits
= skx_pkg_cstate_limits
;
3756 pkg_cstate_limits
= icx_pkg_cstate_limits
;
3759 pkg_cstate_limits
= slv_pkg_cstate_limits
;
3762 pkg_cstate_limits
= amt_pkg_cstate_limits
;
3765 pkg_cstate_limits
= phi_pkg_cstate_limits
;
3768 pkg_cstate_limits
= glm_pkg_cstate_limits
;
3774 get_msr(base_cpu
, MSR_PKG_CST_CONFIG_CONTROL
, &msr
);
3775 pkg_cstate_limit
= pkg_cstate_limits
[msr
& 0xF];
3778 static void dump_platform_info(void)
3780 unsigned long long msr
;
3783 if (!platform
->has_nhm_msrs
|| no_msr
)
3786 get_msr(base_cpu
, MSR_PLATFORM_INFO
, &msr
);
3788 fprintf(outf
, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu
, msr
);
3790 ratio
= (msr
>> 40) & 0xFF;
3791 fprintf(outf
, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio
, bclk
, ratio
* bclk
);
3793 ratio
= (msr
>> 8) & 0xFF;
3794 fprintf(outf
, "%d * %.1f = %.1f MHz base frequency\n", ratio
, bclk
, ratio
* bclk
);
3797 static void dump_power_ctl(void)
3799 unsigned long long msr
;
3801 if (!platform
->has_nhm_msrs
|| no_msr
)
3804 get_msr(base_cpu
, MSR_IA32_POWER_CTL
, &msr
);
3805 fprintf(outf
, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
3806 base_cpu
, msr
, msr
& 0x2 ? "EN" : "DIS");
3808 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
3809 if (platform
->has_cst_prewake_bit
)
3810 fprintf(outf
, "C-state Pre-wake: %sabled\n", msr
& 0x40000000 ? "DIS" : "EN");
3815 static void dump_turbo_ratio_limit2(void)
3817 unsigned long long msr
;
3820 get_msr(base_cpu
, MSR_TURBO_RATIO_LIMIT2
, &msr
);
3822 fprintf(outf
, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu
, msr
);
3824 ratio
= (msr
>> 8) & 0xFF;
3826 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio
, bclk
, ratio
* bclk
);
3828 ratio
= (msr
>> 0) & 0xFF;
3830 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio
, bclk
, ratio
* bclk
);
3834 static void dump_turbo_ratio_limit1(void)
3836 unsigned long long msr
;
3839 get_msr(base_cpu
, MSR_TURBO_RATIO_LIMIT1
, &msr
);
3841 fprintf(outf
, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu
, msr
);
3843 ratio
= (msr
>> 56) & 0xFF;
3845 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio
, bclk
, ratio
* bclk
);
3847 ratio
= (msr
>> 48) & 0xFF;
3849 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio
, bclk
, ratio
* bclk
);
3851 ratio
= (msr
>> 40) & 0xFF;
3853 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio
, bclk
, ratio
* bclk
);
3855 ratio
= (msr
>> 32) & 0xFF;
3857 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio
, bclk
, ratio
* bclk
);
3859 ratio
= (msr
>> 24) & 0xFF;
3861 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio
, bclk
, ratio
* bclk
);
3863 ratio
= (msr
>> 16) & 0xFF;
3865 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio
, bclk
, ratio
* bclk
);
3867 ratio
= (msr
>> 8) & 0xFF;
3869 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio
, bclk
, ratio
* bclk
);
3871 ratio
= (msr
>> 0) & 0xFF;
3873 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio
, bclk
, ratio
* bclk
);
3877 static void dump_turbo_ratio_limits(int trl_msr_offset
)
3879 unsigned long long msr
, core_counts
;
3882 get_msr(base_cpu
, trl_msr_offset
, &msr
);
3883 fprintf(outf
, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
3884 base_cpu
, trl_msr_offset
== MSR_SECONDARY_TURBO_RATIO_LIMIT
? "SECONDARY_" : "", msr
);
3886 if (platform
->trl_msrs
& TRL_CORECOUNT
) {
3887 get_msr(base_cpu
, MSR_TURBO_RATIO_LIMIT1
, &core_counts
);
3888 fprintf(outf
, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu
, core_counts
);
3890 core_counts
= 0x0807060504030201;
3893 for (shift
= 56; shift
>= 0; shift
-= 8) {
3894 unsigned int ratio
, group_size
;
3896 ratio
= (msr
>> shift
) & 0xFF;
3897 group_size
= (core_counts
>> shift
) & 0xFF;
3899 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
3900 ratio
, bclk
, ratio
* bclk
, group_size
);
3906 static void dump_atom_turbo_ratio_limits(void)
3908 unsigned long long msr
;
3911 get_msr(base_cpu
, MSR_ATOM_CORE_RATIOS
, &msr
);
3912 fprintf(outf
, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu
, msr
& 0xFFFFFFFF);
3914 ratio
= (msr
>> 0) & 0x3F;
3916 fprintf(outf
, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio
, bclk
, ratio
* bclk
);
3918 ratio
= (msr
>> 8) & 0x3F;
3920 fprintf(outf
, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio
, bclk
, ratio
* bclk
);
3922 ratio
= (msr
>> 16) & 0x3F;
3924 fprintf(outf
, "%d * %.1f = %.1f MHz base frequency\n", ratio
, bclk
, ratio
* bclk
);
3926 get_msr(base_cpu
, MSR_ATOM_CORE_TURBO_RATIOS
, &msr
);
3927 fprintf(outf
, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu
, msr
& 0xFFFFFFFF);
3929 ratio
= (msr
>> 24) & 0x3F;
3931 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio
, bclk
, ratio
* bclk
);
3933 ratio
= (msr
>> 16) & 0x3F;
3935 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio
, bclk
, ratio
* bclk
);
3937 ratio
= (msr
>> 8) & 0x3F;
3939 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio
, bclk
, ratio
* bclk
);
3941 ratio
= (msr
>> 0) & 0x3F;
3943 fprintf(outf
, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio
, bclk
, ratio
* bclk
);
3946 static void dump_knl_turbo_ratio_limits(void)
3948 const unsigned int buckets_no
= 7;
3950 unsigned long long msr
;
3951 int delta_cores
, delta_ratio
;
3953 unsigned int cores
[buckets_no
];
3954 unsigned int ratio
[buckets_no
];
3956 get_msr(base_cpu
, MSR_TURBO_RATIO_LIMIT
, &msr
);
3958 fprintf(outf
, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu
, msr
);
3961 * Turbo encoding in KNL is as follows:
3963 * [7:1] -- Base value of number of active cores of bucket 1.
3964 * [15:8] -- Base value of freq ratio of bucket 1.
3965 * [20:16] -- +ve delta of number of active cores of bucket 2.
3966 * i.e. active cores of bucket 2 =
3967 * active cores of bucket 1 + delta
3968 * [23:21] -- Negative delta of freq ratio of bucket 2.
3969 * i.e. freq ratio of bucket 2 =
3970 * freq ratio of bucket 1 - delta
3971 * [28:24]-- +ve delta of number of active cores of bucket 3.
3972 * [31:29]-- -ve delta of freq ratio of bucket 3.
3973 * [36:32]-- +ve delta of number of active cores of bucket 4.
3974 * [39:37]-- -ve delta of freq ratio of bucket 4.
3975 * [44:40]-- +ve delta of number of active cores of bucket 5.
3976 * [47:45]-- -ve delta of freq ratio of bucket 5.
3977 * [52:48]-- +ve delta of number of active cores of bucket 6.
3978 * [55:53]-- -ve delta of freq ratio of bucket 6.
3979 * [60:56]-- +ve delta of number of active cores of bucket 7.
3980 * [63:61]-- -ve delta of freq ratio of bucket 7.
3984 cores
[b_nr
] = (msr
& 0xFF) >> 1;
3985 ratio
[b_nr
] = (msr
>> 8) & 0xFF;
3987 for (i
= 16; i
< 64; i
+= 8) {
3988 delta_cores
= (msr
>> i
) & 0x1F;
3989 delta_ratio
= (msr
>> (i
+ 5)) & 0x7;
3991 cores
[b_nr
+ 1] = cores
[b_nr
] + delta_cores
;
3992 ratio
[b_nr
+ 1] = ratio
[b_nr
] - delta_ratio
;
3996 for (i
= buckets_no
- 1; i
>= 0; i
--)
3997 if (i
> 0 ? ratio
[i
] != ratio
[i
- 1] : 1)
3999 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
4000 ratio
[i
], bclk
, ratio
[i
] * bclk
, cores
[i
]);
4003 static void dump_cst_cfg(void)
4005 unsigned long long msr
;
4007 if (!platform
->has_nhm_msrs
|| no_msr
)
4010 get_msr(base_cpu
, MSR_PKG_CST_CONFIG_CONTROL
, &msr
);
4012 fprintf(outf
, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu
, msr
);
4014 fprintf(outf
, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
4015 (msr
& SNB_C3_AUTO_UNDEMOTE
) ? "UNdemote-C3, " : "",
4016 (msr
& SNB_C1_AUTO_UNDEMOTE
) ? "UNdemote-C1, " : "",
4017 (msr
& NHM_C3_AUTO_DEMOTE
) ? "demote-C3, " : "",
4018 (msr
& NHM_C1_AUTO_DEMOTE
) ? "demote-C1, " : "",
4019 (msr
& (1 << 15)) ? "" : "UN", (unsigned int)msr
& 0xF, pkg_cstate_limit_strings
[pkg_cstate_limit
]);
4021 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
4022 if (platform
->has_cst_auto_convension
) {
4023 fprintf(outf
, ", automatic c-state conversion=%s", (msr
& AUTOMATIC_CSTATE_CONVERSION
) ? "on" : "off");
4026 fprintf(outf
, ")\n");
4031 static void dump_config_tdp(void)
4033 unsigned long long msr
;
4035 get_msr(base_cpu
, MSR_CONFIG_TDP_NOMINAL
, &msr
);
4036 fprintf(outf
, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu
, msr
);
4037 fprintf(outf
, " (base_ratio=%d)\n", (unsigned int)msr
& 0xFF);
4039 get_msr(base_cpu
, MSR_CONFIG_TDP_LEVEL_1
, &msr
);
4040 fprintf(outf
, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu
, msr
);
4042 fprintf(outf
, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr
>> 48) & 0x7FFF);
4043 fprintf(outf
, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr
>> 32) & 0x7FFF);
4044 fprintf(outf
, "LVL1_RATIO=%d ", (unsigned int)(msr
>> 16) & 0xFF);
4045 fprintf(outf
, "PKG_TDP_LVL1=%d", (unsigned int)(msr
) & 0x7FFF);
4047 fprintf(outf
, ")\n");
4049 get_msr(base_cpu
, MSR_CONFIG_TDP_LEVEL_2
, &msr
);
4050 fprintf(outf
, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu
, msr
);
4052 fprintf(outf
, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr
>> 48) & 0x7FFF);
4053 fprintf(outf
, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr
>> 32) & 0x7FFF);
4054 fprintf(outf
, "LVL2_RATIO=%d ", (unsigned int)(msr
>> 16) & 0xFF);
4055 fprintf(outf
, "PKG_TDP_LVL2=%d", (unsigned int)(msr
) & 0x7FFF);
4057 fprintf(outf
, ")\n");
4059 get_msr(base_cpu
, MSR_CONFIG_TDP_CONTROL
, &msr
);
4060 fprintf(outf
, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu
, msr
);
4062 fprintf(outf
, "TDP_LEVEL=%d ", (unsigned int)(msr
) & 0x3);
4063 fprintf(outf
, " lock=%d", (unsigned int)(msr
>> 31) & 1);
4064 fprintf(outf
, ")\n");
4066 get_msr(base_cpu
, MSR_TURBO_ACTIVATION_RATIO
, &msr
);
4067 fprintf(outf
, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu
, msr
);
4068 fprintf(outf
, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr
) & 0xFF);
4069 fprintf(outf
, " lock=%d", (unsigned int)(msr
>> 31) & 1);
4070 fprintf(outf
, ")\n");
4073 unsigned int irtl_time_units
[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
4075 void print_irtl(void)
4077 unsigned long long msr
;
4079 if (!platform
->has_irtl_msrs
|| no_msr
)
4082 if (platform
->supported_cstates
& PC3
) {
4083 get_msr(base_cpu
, MSR_PKGC3_IRTL
, &msr
);
4084 fprintf(outf
, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu
, msr
);
4085 fprintf(outf
, "%svalid, %lld ns)\n", msr
& (1 << 15) ? "" : "NOT",
4086 (msr
& 0x3FF) * irtl_time_units
[(msr
>> 10) & 0x3]);
4089 if (platform
->supported_cstates
& PC6
) {
4090 get_msr(base_cpu
, MSR_PKGC6_IRTL
, &msr
);
4091 fprintf(outf
, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu
, msr
);
4092 fprintf(outf
, "%svalid, %lld ns)\n", msr
& (1 << 15) ? "" : "NOT",
4093 (msr
& 0x3FF) * irtl_time_units
[(msr
>> 10) & 0x3]);
4096 if (platform
->supported_cstates
& PC7
) {
4097 get_msr(base_cpu
, MSR_PKGC7_IRTL
, &msr
);
4098 fprintf(outf
, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu
, msr
);
4099 fprintf(outf
, "%svalid, %lld ns)\n", msr
& (1 << 15) ? "" : "NOT",
4100 (msr
& 0x3FF) * irtl_time_units
[(msr
>> 10) & 0x3]);
4103 if (platform
->supported_cstates
& PC8
) {
4104 get_msr(base_cpu
, MSR_PKGC8_IRTL
, &msr
);
4105 fprintf(outf
, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu
, msr
);
4106 fprintf(outf
, "%svalid, %lld ns)\n", msr
& (1 << 15) ? "" : "NOT",
4107 (msr
& 0x3FF) * irtl_time_units
[(msr
>> 10) & 0x3]);
4110 if (platform
->supported_cstates
& PC9
) {
4111 get_msr(base_cpu
, MSR_PKGC9_IRTL
, &msr
);
4112 fprintf(outf
, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu
, msr
);
4113 fprintf(outf
, "%svalid, %lld ns)\n", msr
& (1 << 15) ? "" : "NOT",
4114 (msr
& 0x3FF) * irtl_time_units
[(msr
>> 10) & 0x3]);
4117 if (platform
->supported_cstates
& PC10
) {
4118 get_msr(base_cpu
, MSR_PKGC10_IRTL
, &msr
);
4119 fprintf(outf
, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu
, msr
);
4120 fprintf(outf
, "%svalid, %lld ns)\n", msr
& (1 << 15) ? "" : "NOT",
4121 (msr
& 0x3FF) * irtl_time_units
[(msr
>> 10) & 0x3]);
4125 void free_fd_percpu(void)
4132 for (i
= 0; i
< topo
.max_cpu_num
+ 1; ++i
) {
4133 if (fd_percpu
[i
] != 0)
4134 close(fd_percpu
[i
]);
4141 void free_fd_amperf_percpu(void)
4145 if (!fd_amperf_percpu
)
4148 for (i
= 0; i
< topo
.max_cpu_num
+ 1; ++i
) {
4149 if (fd_amperf_percpu
[i
].mperf
!= 0)
4150 close(fd_amperf_percpu
[i
].mperf
);
4152 if (fd_amperf_percpu
[i
].aperf
!= 0)
4153 close(fd_amperf_percpu
[i
].aperf
);
4156 free(fd_amperf_percpu
);
4157 fd_amperf_percpu
= NULL
;
4160 void free_fd_instr_count_percpu(void)
4162 if (!fd_instr_count_percpu
)
4165 for (int i
= 0; i
< topo
.max_cpu_num
+ 1; ++i
) {
4166 if (fd_instr_count_percpu
[i
] != 0)
4167 close(fd_instr_count_percpu
[i
]);
4170 free(fd_instr_count_percpu
);
4171 fd_instr_count_percpu
= NULL
;
4174 void free_fd_rapl_percpu(void)
4176 if (!rapl_counter_info_perdomain
)
4179 const int num_domains
= platform
->has_per_core_rapl
? topo
.num_cores
: topo
.num_packages
;
4181 for (int domain_id
= 0; domain_id
< num_domains
; ++domain_id
) {
4182 if (rapl_counter_info_perdomain
[domain_id
].fd_perf
!= -1)
4183 close(rapl_counter_info_perdomain
[domain_id
].fd_perf
);
4186 free(rapl_counter_info_perdomain
);
4189 void free_all_buffers(void)
4193 CPU_FREE(cpu_present_set
);
4194 cpu_present_set
= NULL
;
4195 cpu_present_setsize
= 0;
4197 CPU_FREE(cpu_effective_set
);
4198 cpu_effective_set
= NULL
;
4199 cpu_effective_setsize
= 0;
4201 CPU_FREE(cpu_allowed_set
);
4202 cpu_allowed_set
= NULL
;
4203 cpu_allowed_setsize
= 0;
4205 CPU_FREE(cpu_affinity_set
);
4206 cpu_affinity_set
= NULL
;
4207 cpu_affinity_setsize
= 0;
4215 package_even
= NULL
;
4225 free(output_buffer
);
4226 output_buffer
= NULL
;
4230 free_fd_instr_count_percpu();
4231 free_fd_amperf_percpu();
4232 free_fd_rapl_percpu();
4234 free(irq_column_2_cpu
);
4237 for (i
= 0; i
<= topo
.max_cpu_num
; ++i
) {
4238 if (cpus
[i
].put_ids
)
4239 CPU_FREE(cpus
[i
].put_ids
);
4245 * Parse a file containing a single int.
4246 * Return 0 if file can not be opened
4247 * Exit if file can be opened, but can not be parsed
4249 int parse_int_file(const char *fmt
, ...)
4252 char path
[PATH_MAX
];
4256 va_start(args
, fmt
);
4257 vsnprintf(path
, sizeof(path
), fmt
, args
);
4259 filep
= fopen(path
, "r");
4262 if (fscanf(filep
, "%d", &value
) != 1)
4263 err(1, "%s: failed to parse number from file", path
);
4269 * cpu_is_first_core_in_package(cpu)
4270 * return 1 if given CPU is 1st core in package
4272 int cpu_is_first_core_in_package(int cpu
)
4274 return cpu
== parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu
);
4277 int get_physical_package_id(int cpu
)
4279 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu
);
4282 int get_die_id(int cpu
)
4284 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu
);
4287 int get_core_id(int cpu
)
4289 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu
);
4292 void set_node_data(void)
4294 int pkg
, node
, lnode
, cpu
, cpux
;
4297 /* initialize logical_node_id */
4298 for (cpu
= 0; cpu
<= topo
.max_cpu_num
; ++cpu
)
4299 cpus
[cpu
].logical_node_id
= -1;
4302 for (pkg
= 0; pkg
< topo
.num_packages
; pkg
++) {
4304 for (cpu
= 0; cpu
<= topo
.max_cpu_num
; ++cpu
) {
4305 if (cpus
[cpu
].physical_package_id
!= pkg
)
4307 /* find a cpu with an unset logical_node_id */
4308 if (cpus
[cpu
].logical_node_id
!= -1)
4310 cpus
[cpu
].logical_node_id
= lnode
;
4311 node
= cpus
[cpu
].physical_node_id
;
4314 * find all matching cpus on this pkg and set
4315 * the logical_node_id
4317 for (cpux
= cpu
; cpux
<= topo
.max_cpu_num
; cpux
++) {
4318 if ((cpus
[cpux
].physical_package_id
== pkg
) && (cpus
[cpux
].physical_node_id
== node
)) {
4319 cpus
[cpux
].logical_node_id
= lnode
;
4324 if (lnode
> topo
.nodes_per_pkg
)
4325 topo
.nodes_per_pkg
= lnode
;
4327 if (cpu_count
>= topo
.max_cpu_num
)
4332 int get_physical_node_id(struct cpu_topology
*thiscpu
)
4337 int cpu
= thiscpu
->logical_cpu_id
;
4339 for (i
= 0; i
<= topo
.max_cpu_num
; i
++) {
4340 sprintf(path
, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu
, i
);
4341 filep
= fopen(path
, "r");
4350 static int parse_cpu_str(char *cpu_str
, cpu_set_t
*cpu_set
, int cpu_set_size
)
4352 unsigned int start
, end
;
4353 char *next
= cpu_str
;
4355 while (next
&& *next
) {
4357 if (*next
== '-') /* no negative cpu numbers */
4360 start
= strtoul(next
, &next
, 10);
4362 if (start
>= CPU_SUBSET_MAXCPUS
)
4364 CPU_SET_S(start
, cpu_set_size
, cpu_set
);
4366 if (*next
== '\0' || *next
== '\n')
4375 next
+= 1; /* start range */
4376 } else if (*next
== '.') {
4379 next
+= 1; /* start range */
4384 end
= strtoul(next
, &next
, 10);
4388 while (++start
<= end
) {
4389 if (start
>= CPU_SUBSET_MAXCPUS
)
4391 CPU_SET_S(start
, cpu_set_size
, cpu_set
);
4396 else if (*next
!= '\0' && *next
!= '\n')
4403 int get_thread_siblings(struct cpu_topology
*thiscpu
)
4405 char path
[80], character
;
4408 int so
, shift
, sib_core
;
4409 int cpu
= thiscpu
->logical_cpu_id
;
4410 int offset
= topo
.max_cpu_num
+ 1;
4414 thiscpu
->put_ids
= CPU_ALLOC((topo
.max_cpu_num
+ 1));
4415 if (thiscpu
->thread_id
< 0)
4416 thiscpu
->thread_id
= thread_id
++;
4417 if (!thiscpu
->put_ids
)
4420 size
= CPU_ALLOC_SIZE((topo
.max_cpu_num
+ 1));
4421 CPU_ZERO_S(size
, thiscpu
->put_ids
);
4423 sprintf(path
, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu
);
4424 filep
= fopen(path
, "r");
4427 warnx("%s: open failed", path
);
4431 offset
-= BITMASK_SIZE
;
4432 if (fscanf(filep
, "%lx%c", &map
, &character
) != 2)
4433 err(1, "%s: failed to parse file", path
);
4434 for (shift
= 0; shift
< BITMASK_SIZE
; shift
++) {
4435 if ((map
>> shift
) & 0x1) {
4436 so
= shift
+ offset
;
4437 sib_core
= get_core_id(so
);
4438 if (sib_core
== thiscpu
->physical_core_id
) {
4439 CPU_SET_S(so
, size
, thiscpu
->put_ids
);
4440 if ((so
!= cpu
) && (cpus
[so
].thread_id
< 0))
4441 cpus
[so
].thread_id
= thread_id
++;
4445 } while (character
== ',');
4448 return CPU_COUNT_S(size
, thiscpu
->put_ids
);
4452 * run func(thread, core, package) in topology order
4453 * skip non-present cpus
4456 int for_all_cpus_2(int (func
) (struct thread_data
*, struct core_data
*,
4457 struct pkg_data
*, struct thread_data
*, struct core_data
*,
4458 struct pkg_data
*), struct thread_data
*thread_base
,
4459 struct core_data
*core_base
, struct pkg_data
*pkg_base
,
4460 struct thread_data
*thread_base2
, struct core_data
*core_base2
, struct pkg_data
*pkg_base2
)
4462 int retval
, pkg_no
, node_no
, core_no
, thread_no
;
4464 for (pkg_no
= 0; pkg_no
< topo
.num_packages
; ++pkg_no
) {
4465 for (node_no
= 0; node_no
< topo
.nodes_per_pkg
; ++node_no
) {
4466 for (core_no
= 0; core_no
< topo
.cores_per_node
; ++core_no
) {
4467 for (thread_no
= 0; thread_no
< topo
.threads_per_core
; ++thread_no
) {
4468 struct thread_data
*t
, *t2
;
4469 struct core_data
*c
, *c2
;
4470 struct pkg_data
*p
, *p2
;
4472 t
= GET_THREAD(thread_base
, thread_no
, core_no
, node_no
, pkg_no
);
4474 if (cpu_is_not_allowed(t
->cpu_id
))
4477 t2
= GET_THREAD(thread_base2
, thread_no
, core_no
, node_no
, pkg_no
);
4479 c
= GET_CORE(core_base
, core_no
, node_no
, pkg_no
);
4480 c2
= GET_CORE(core_base2
, core_no
, node_no
, pkg_no
);
4482 p
= GET_PKG(pkg_base
, pkg_no
);
4483 p2
= GET_PKG(pkg_base2
, pkg_no
);
4485 retval
= func(t
, c
, p
, t2
, c2
, p2
);
4496 * run func(cpu) on every cpu in /proc/stat
4497 * return max_cpu number
4499 int for_all_proc_cpus(int (func
) (int))
4505 fp
= fopen_or_die(proc_stat
, "r");
4507 retval
= fscanf(fp
, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
4509 err(1, "%s: failed to parse format", proc_stat
);
4512 retval
= fscanf(fp
, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num
);
4516 retval
= func(cpu_num
);
4526 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
4528 static char cpu_effective_str
[1024];
4530 static int update_effective_str(bool startup
)
4537 if (cpu_effective_str
[0] == '\0' && !startup
)
4540 fp
= fopen(PATH_EFFECTIVE_CPUS
, "r");
4544 pos
= fgets(buf
, 1024, fp
);
4546 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS
);
4550 ret
= strncmp(cpu_effective_str
, buf
, 1024);
4554 strncpy(cpu_effective_str
, buf
, 1024);
4558 static void update_effective_set(bool startup
)
4560 update_effective_str(startup
);
4562 if (parse_cpu_str(cpu_effective_str
, cpu_effective_set
, cpu_effective_setsize
))
4563 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS
, cpu_effective_str
);
4566 void linux_perf_init(void);
4567 void rapl_perf_init(void);
4569 void re_initialize(void)
4572 setup_all_buffers(false);
4575 fprintf(outf
, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo
.num_cpus
,
4579 void set_max_cpu_num(void)
4583 unsigned long dummy
;
4586 base_cpu
= sched_getcpu();
4588 err(1, "cannot find calling cpu ID");
4589 sprintf(pathname
, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu
);
4591 filep
= fopen_or_die(pathname
, "r");
4592 topo
.max_cpu_num
= 0;
4593 while (fscanf(filep
, "%lx,", &dummy
) == 1)
4594 topo
.max_cpu_num
+= BITMASK_SIZE
;
4596 topo
.max_cpu_num
--; /* 0 based */
4601 * remember the last one seen, it will be the max
4603 int count_cpus(int cpu
)
4611 int mark_cpu_present(int cpu
)
4613 CPU_SET_S(cpu
, cpu_present_setsize
, cpu_present_set
);
4617 int init_thread_id(int cpu
)
4619 cpus
[cpu
].thread_id
= -1;
4624 * snapshot_proc_interrupts()
4626 * read and record summary of /proc/interrupts
4628 * return 1 if config change requires a restart, else return 0
4630 int snapshot_proc_interrupts(void)
4636 fp
= fopen_or_die("/proc/interrupts", "r");
4640 /* read 1st line of /proc/interrupts to get cpu* name for each column */
4641 for (column
= 0; column
< topo
.num_cpus
; ++column
) {
4644 retval
= fscanf(fp
, " CPU%d", &cpu_number
);
4648 if (cpu_number
> topo
.max_cpu_num
) {
4649 warn("/proc/interrupts: cpu%d: > %d", cpu_number
, topo
.max_cpu_num
);
4653 irq_column_2_cpu
[column
] = cpu_number
;
4654 irqs_per_cpu
[cpu_number
] = 0;
4657 /* read /proc/interrupt count lines and sum up irqs per cpu */
4662 retval
= fscanf(fp
, " %s:", buf
); /* flush irq# "N:" */
4666 /* read the count per cpu */
4667 for (column
= 0; column
< topo
.num_cpus
; ++column
) {
4669 int cpu_number
, irq_count
;
4671 retval
= fscanf(fp
, " %d", &irq_count
);
4675 cpu_number
= irq_column_2_cpu
[column
];
4676 irqs_per_cpu
[cpu_number
] += irq_count
;
4680 while (getc(fp
) != '\n') ; /* flush interrupt description */
4687 * snapshot_graphics()
4689 * record snapshot of specified graphics sysfs knob
4691 * return 1 if config change requires a restart, else return 0
4693 int snapshot_graphics(int idx
)
4701 fp
= fopen_or_die(gfx_info
[idx
].path
, "r");
4702 retval
= fscanf(fp
, "%lld", &gfx_info
[idx
].val_ull
);
4711 if (gfx_info
[idx
].fp
== NULL
) {
4712 gfx_info
[idx
].fp
= fopen_or_die(gfx_info
[idx
].path
, "r");
4714 rewind(gfx_info
[idx
].fp
);
4715 fflush(gfx_info
[idx
].fp
);
4717 retval
= fscanf(gfx_info
[idx
].fp
, "%d", &gfx_info
[idx
].val
);
4727 * snapshot_cpu_lpi()
4729 * record snapshot of
4730 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
4732 int snapshot_cpu_lpi_us(void)
4737 fp
= fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
4739 retval
= fscanf(fp
, "%lld", &cpuidle_cur_cpu_lpi_us
);
4741 fprintf(stderr
, "Disabling Low Power Idle CPU output\n");
4742 BIC_NOT_PRESENT(BIC_CPU_LPI
);
4753 * snapshot_sys_lpi()
4755 * record snapshot of sys_lpi_file
4757 int snapshot_sys_lpi_us(void)
4762 fp
= fopen_or_die(sys_lpi_file
, "r");
4764 retval
= fscanf(fp
, "%lld", &cpuidle_cur_sys_lpi_us
);
4766 fprintf(stderr
, "Disabling Low Power Idle System output\n");
4767 BIC_NOT_PRESENT(BIC_SYS_LPI
);
4777 * snapshot /proc and /sys files
4779 * return 1 if configuration restart needed, else return 0
4781 int snapshot_proc_sysfs_files(void)
4783 if (DO_BIC(BIC_IRQ
))
4784 if (snapshot_proc_interrupts())
4787 if (DO_BIC(BIC_GFX_rc6
))
4788 snapshot_graphics(GFX_rc6
);
4790 if (DO_BIC(BIC_GFXMHz
))
4791 snapshot_graphics(GFX_MHz
);
4793 if (DO_BIC(BIC_GFXACTMHz
))
4794 snapshot_graphics(GFX_ACTMHz
);
4796 if (DO_BIC(BIC_SAM_mc6
))
4797 snapshot_graphics(SAM_mc6
);
4799 if (DO_BIC(BIC_SAMMHz
))
4800 snapshot_graphics(SAM_MHz
);
4802 if (DO_BIC(BIC_SAMACTMHz
))
4803 snapshot_graphics(SAM_ACTMHz
);
4805 if (DO_BIC(BIC_CPU_LPI
))
4806 snapshot_cpu_lpi_us();
4808 if (DO_BIC(BIC_SYS_LPI
))
4809 snapshot_sys_lpi_us();
4816 static void signal_handler(int signal
)
4822 fprintf(stderr
, " SIGINT\n");
4826 fprintf(stderr
, "SIGUSR1\n");
4831 void setup_signal_handler(void)
4833 struct sigaction sa
;
4835 memset(&sa
, 0, sizeof(sa
));
4837 sa
.sa_handler
= &signal_handler
;
4839 if (sigaction(SIGINT
, &sa
, NULL
) < 0)
4840 err(1, "sigaction SIGINT");
4841 if (sigaction(SIGUSR1
, &sa
, NULL
) < 0)
4842 err(1, "sigaction SIGUSR1");
4847 struct timeval tout
;
4848 struct timespec rest
;
4853 FD_SET(0, &readfds
);
4856 nanosleep(&interval_ts
, NULL
);
4861 retval
= select(1, &readfds
, NULL
, NULL
, &tout
);
4864 switch (getc(stdin
)) {
4870 * 'stdin' is a pipe closed on the other end. There
4871 * won't be any further input.
4874 /* Sleep the rest of the time */
4875 rest
.tv_sec
= (tout
.tv_sec
+ tout
.tv_usec
/ 1000000);
4876 rest
.tv_nsec
= (tout
.tv_usec
% 1000000) * 1000;
4877 nanosleep(&rest
, NULL
);
4882 int get_msr_sum(int cpu
, off_t offset
, unsigned long long *msr
)
4885 unsigned long long msr_cur
, msr_last
;
4889 if (!per_cpu_msr_sum
)
4892 idx
= offset_to_idx(offset
);
4895 /* get_msr_sum() = sum + (get_msr() - last) */
4896 ret
= get_msr(cpu
, offset
, &msr_cur
);
4899 msr_last
= per_cpu_msr_sum
[cpu
].entries
[idx
].last
;
4900 DELTA_WRAP32(msr_cur
, msr_last
);
4901 *msr
= msr_last
+ per_cpu_msr_sum
[cpu
].entries
[idx
].sum
;
4908 /* Timer callback, update the sum of MSRs periodically. */
4909 static int update_msr_sum(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
4912 int cpu
= t
->cpu_id
;
4919 for (i
= IDX_PKG_ENERGY
; i
< IDX_COUNT
; i
++) {
4920 unsigned long long msr_cur
, msr_last
;
4925 offset
= idx_to_offset(i
);
4928 ret
= get_msr(cpu
, offset
, &msr_cur
);
4930 fprintf(outf
, "Can not update msr(0x%llx)\n", (unsigned long long)offset
);
4934 msr_last
= per_cpu_msr_sum
[cpu
].entries
[i
].last
;
4935 per_cpu_msr_sum
[cpu
].entries
[i
].last
= msr_cur
& 0xffffffff;
4937 DELTA_WRAP32(msr_cur
, msr_last
);
4938 per_cpu_msr_sum
[cpu
].entries
[i
].sum
+= msr_last
;
4943 static void msr_record_handler(union sigval v
)
4947 for_all_cpus(update_msr_sum
, EVEN_COUNTERS
);
4950 void msr_sum_record(void)
4952 struct itimerspec its
;
4953 struct sigevent sev
;
4955 per_cpu_msr_sum
= calloc(topo
.max_cpu_num
+ 1, sizeof(struct msr_sum_array
));
4956 if (!per_cpu_msr_sum
) {
4957 fprintf(outf
, "Can not allocate memory for long time MSR.\n");
4961 * Signal handler might be restricted, so use thread notifier instead.
4963 memset(&sev
, 0, sizeof(struct sigevent
));
4964 sev
.sigev_notify
= SIGEV_THREAD
;
4965 sev
.sigev_notify_function
= msr_record_handler
;
4967 sev
.sigev_value
.sival_ptr
= &timerid
;
4968 if (timer_create(CLOCK_REALTIME
, &sev
, &timerid
) == -1) {
4969 fprintf(outf
, "Can not create timer.\n");
4973 its
.it_value
.tv_sec
= 0;
4974 its
.it_value
.tv_nsec
= 1;
4976 * A wraparound time has been calculated early.
4977 * Some sources state that the peak power for a
4978 * microprocessor is usually 1.5 times the TDP rating,
4979 * use 2 * TDP for safety.
4981 its
.it_interval
.tv_sec
= rapl_joule_counter_range
/ 2;
4982 its
.it_interval
.tv_nsec
= 0;
4984 if (timer_settime(timerid
, 0, &its
, NULL
) == -1) {
4985 fprintf(outf
, "Can not set timer.\n");
4991 timer_delete(timerid
);
4993 free(per_cpu_msr_sum
);
4997 * set_my_sched_priority(pri)
4998 * return previous priority on success
4999 * return value < -20 on failure
5001 int set_my_sched_priority(int priority
)
5004 int original_priority
;
5007 original_priority
= getpriority(PRIO_PROCESS
, 0);
5008 if (errno
&& (original_priority
== -1))
5011 retval
= setpriority(PRIO_PROCESS
, 0, priority
);
5016 retval
= getpriority(PRIO_PROCESS
, 0);
5017 if (retval
!= priority
)
5020 return original_priority
;
5023 void turbostat_loop()
5027 unsigned int done_iters
= 0;
5029 setup_signal_handler();
5032 * elevate own priority for interval mode
5034 * ignore on error - we probably don't have permission to set it, but
5035 * it's not a big deal
5037 set_my_sched_priority(-20);
5042 snapshot_proc_sysfs_files();
5043 retval
= for_all_cpus(get_counters
, EVEN_COUNTERS
);
5044 first_counter_read
= 0;
5047 } else if (retval
== -1) {
5048 if (restarted
> 10) {
5056 gettimeofday(&tv_even
, (struct timezone
*)NULL
);
5059 if (for_all_proc_cpus(cpu_is_not_present
)) {
5063 if (update_effective_str(false)) {
5068 if (snapshot_proc_sysfs_files())
5070 retval
= for_all_cpus(get_counters
, ODD_COUNTERS
);
5073 } else if (retval
== -1) {
5077 gettimeofday(&tv_odd
, (struct timezone
*)NULL
);
5078 timersub(&tv_odd
, &tv_even
, &tv_delta
);
5079 if (for_all_cpus_2(delta_cpu
, ODD_COUNTERS
, EVEN_COUNTERS
)) {
5083 compute_average(EVEN_COUNTERS
);
5084 format_all_counters(EVEN_COUNTERS
);
5085 flush_output_stdout();
5088 if (num_iterations
&& ++done_iters
>= num_iterations
)
5091 if (snapshot_proc_sysfs_files())
5093 retval
= for_all_cpus(get_counters
, EVEN_COUNTERS
);
5096 } else if (retval
== -1) {
5100 gettimeofday(&tv_even
, (struct timezone
*)NULL
);
5101 timersub(&tv_even
, &tv_odd
, &tv_delta
);
5102 if (for_all_cpus_2(delta_cpu
, EVEN_COUNTERS
, ODD_COUNTERS
)) {
5106 compute_average(ODD_COUNTERS
);
5107 format_all_counters(ODD_COUNTERS
);
5108 flush_output_stdout();
5111 if (num_iterations
&& ++done_iters
>= num_iterations
)
5116 void check_dev_msr()
5124 sprintf(pathname
, "/dev/cpu/%d/msr", base_cpu
);
5125 if (stat(pathname
, &sb
))
5126 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
5131 * check for CAP_SYS_RAWIO
5132 * return 0 on success
5135 int check_for_cap_sys_rawio(void)
5138 cap_flag_value_t cap_flag_value
;
5141 caps
= cap_get_proc();
5145 if (cap_get_flag(caps
, CAP_SYS_RAWIO
, CAP_EFFECTIVE
, &cap_flag_value
)) {
5150 if (cap_flag_value
!= CAP_SET
) {
5156 if (cap_free(caps
) == -1)
5157 err(-6, "cap_free\n");
5162 void check_msr_permission(void)
5170 /* check for CAP_SYS_RAWIO */
5171 failed
+= check_for_cap_sys_rawio();
5173 /* test file permissions */
5174 sprintf(pathname
, "/dev/cpu/%d/msr", base_cpu
);
5175 if (euidaccess(pathname
, R_OK
)) {
5180 warnx("Failed to access %s. Some of the counters may not be available\n"
5181 "\tRun as root to enable them or use %s to disable the access explicitly", pathname
, "--no-msr");
5186 void probe_bclk(void)
5188 unsigned long long msr
;
5189 unsigned int base_ratio
;
5191 if (!platform
->has_nhm_msrs
|| no_msr
)
5194 if (platform
->bclk_freq
== BCLK_100MHZ
)
5196 else if (platform
->bclk_freq
== BCLK_133MHZ
)
5198 else if (platform
->bclk_freq
== BCLK_SLV
)
5203 get_msr(base_cpu
, MSR_PLATFORM_INFO
, &msr
);
5204 base_ratio
= (msr
>> 8) & 0xFF;
5206 base_hz
= base_ratio
* bclk
* 1000000;
5209 if (platform
->enable_tsc_tweak
)
5210 tsc_tweak
= base_hz
/ tsc_hz
;
5213 static void remove_underbar(char *s
)
5226 static void dump_turbo_ratio_info(void)
5231 if (!platform
->has_nhm_msrs
|| no_msr
)
5234 if (platform
->trl_msrs
& TRL_LIMIT2
)
5235 dump_turbo_ratio_limit2();
5237 if (platform
->trl_msrs
& TRL_LIMIT1
)
5238 dump_turbo_ratio_limit1();
5240 if (platform
->trl_msrs
& TRL_BASE
) {
5241 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT
);
5244 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT
);
5247 if (platform
->trl_msrs
& TRL_ATOM
)
5248 dump_atom_turbo_ratio_limits();
5250 if (platform
->trl_msrs
& TRL_KNL
)
5251 dump_knl_turbo_ratio_limits();
5253 if (platform
->has_config_tdp
)
5257 static int read_sysfs_int(char *path
)
5262 input
= fopen(path
, "r");
5263 if (input
== NULL
) {
5265 fprintf(outf
, "NSFOD %s\n", path
);
5268 if (fscanf(input
, "%d", &retval
) != 1)
5269 err(1, "%s: failed to read int from file", path
);
5275 static void dump_sysfs_file(char *path
)
5278 char cpuidle_buf
[64];
5280 input
= fopen(path
, "r");
5281 if (input
== NULL
) {
5283 fprintf(outf
, "NSFOD %s\n", path
);
5286 if (!fgets(cpuidle_buf
, sizeof(cpuidle_buf
), input
))
5287 err(1, "%s: failed to read file", path
);
5290 fprintf(outf
, "%s: %s", strrchr(path
, '/') + 1, cpuidle_buf
);
5293 static void probe_intel_uncore_frequency(void)
5301 if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK
))
5304 BIC_PRESENT(BIC_UNCORE_MHZ
);
5309 for (i
= 0; i
< topo
.num_packages
; ++i
) {
5310 for (j
= 0; j
< topo
.num_die
; ++j
) {
5312 char path_base
[128];
5314 sprintf(path_base
, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i
,
5317 sprintf(path
, "%s/min_freq_khz", path_base
);
5318 k
= read_sysfs_int(path
);
5319 sprintf(path
, "%s/max_freq_khz", path_base
);
5320 l
= read_sysfs_int(path
);
5321 fprintf(outf
, "Uncore Frequency package%d die%d: %d - %d MHz ", i
, j
, k
/ 1000, l
/ 1000);
5323 sprintf(path
, "%s/initial_min_freq_khz", path_base
);
5324 k
= read_sysfs_int(path
);
5325 sprintf(path
, "%s/initial_max_freq_khz", path_base
);
5326 l
= read_sysfs_int(path
);
5327 fprintf(outf
, "(%d - %d MHz)", k
/ 1000, l
/ 1000);
5329 sprintf(path
, "%s/current_freq_khz", path_base
);
5330 k
= read_sysfs_int(path
);
5331 fprintf(outf
, " %d MHz\n", k
/ 1000);
5337 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK
))
5345 char path_base
[128];
5346 int package_id
, domain_id
, cluster_id
;
5348 sprintf(path_base
, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i
);
5350 if (access(path_base
, R_OK
))
5353 sprintf(path
, "%s/package_id", path_base
);
5354 package_id
= read_sysfs_int(path
);
5356 sprintf(path
, "%s/domain_id", path_base
);
5357 domain_id
= read_sysfs_int(path
);
5359 sprintf(path
, "%s/fabric_cluster_id", path_base
);
5360 cluster_id
= read_sysfs_int(path
);
5362 sprintf(path
, "%s/min_freq_khz", path_base
);
5363 k
= read_sysfs_int(path
);
5364 sprintf(path
, "%s/max_freq_khz", path_base
);
5365 l
= read_sysfs_int(path
);
5366 fprintf(outf
, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id
, domain_id
,
5367 cluster_id
, k
/ 1000, l
/ 1000);
5369 sprintf(path
, "%s/initial_min_freq_khz", path_base
);
5370 k
= read_sysfs_int(path
);
5371 sprintf(path
, "%s/initial_max_freq_khz", path_base
);
5372 l
= read_sysfs_int(path
);
5373 fprintf(outf
, "(%d - %d MHz)", k
/ 1000, l
/ 1000);
5375 sprintf(path
, "%s/current_freq_khz", path_base
);
5376 k
= read_sysfs_int(path
);
5377 fprintf(outf
, " %d MHz\n", k
/ 1000);
5381 static void probe_graphics(void)
5383 /* Xe graphics sysfs knobs */
5384 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK
)) {
5390 fp
= fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
5394 if (!fread(buf
, sizeof(char), 7, fp
)) {
5400 if (!strncmp(buf
, "gt0-rc", strlen("gt0-rc")))
5402 else if (!strncmp(buf
, "gt0-mc", strlen("gt0-mc")))
5407 idx
= gt0_is_gt
? GFX_rc6
: SAM_mc6
;
5408 gfx_info
[idx
].path
= "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
5410 idx
= gt0_is_gt
? GFX_MHz
: SAM_MHz
;
5411 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK
))
5412 gfx_info
[idx
].path
= "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
5414 idx
= gt0_is_gt
? GFX_ACTMHz
: SAM_ACTMHz
;
5415 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK
))
5416 gfx_info
[idx
].path
= "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
5418 idx
= gt0_is_gt
? SAM_mc6
: GFX_rc6
;
5419 if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK
))
5420 gfx_info
[idx
].path
= "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
5422 idx
= gt0_is_gt
? SAM_MHz
: GFX_MHz
;
5423 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK
))
5424 gfx_info
[idx
].path
= "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
5426 idx
= gt0_is_gt
? SAM_ACTMHz
: GFX_ACTMHz
;
5427 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK
))
5428 gfx_info
[idx
].path
= "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
5434 /* New i915 graphics sysfs knobs */
5435 if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK
)) {
5436 gfx_info
[GFX_rc6
].path
= "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
5438 if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK
))
5439 gfx_info
[GFX_MHz
].path
= "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
5441 if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK
))
5442 gfx_info
[GFX_ACTMHz
].path
= "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
5444 if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK
))
5445 gfx_info
[SAM_mc6
].path
= "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
5447 if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK
))
5448 gfx_info
[SAM_MHz
].path
= "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
5450 if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK
))
5451 gfx_info
[SAM_ACTMHz
].path
= "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
5456 /* Fall back to traditional i915 graphics sysfs knobs */
5457 if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK
))
5458 gfx_info
[GFX_rc6
].path
= "/sys/class/drm/card0/power/rc6_residency_ms";
5460 if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK
))
5461 gfx_info
[GFX_MHz
].path
= "/sys/class/drm/card0/gt_cur_freq_mhz";
5462 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK
))
5463 gfx_info
[GFX_MHz
].path
= "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
5466 if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK
))
5467 gfx_info
[GFX_ACTMHz
].path
= "/sys/class/drm/card0/gt_act_freq_mhz";
5468 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK
))
5469 gfx_info
[GFX_ACTMHz
].path
= "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
5472 if (gfx_info
[GFX_rc6
].path
)
5473 BIC_PRESENT(BIC_GFX_rc6
);
5474 if (gfx_info
[GFX_MHz
].path
)
5475 BIC_PRESENT(BIC_GFXMHz
);
5476 if (gfx_info
[GFX_ACTMHz
].path
)
5477 BIC_PRESENT(BIC_GFXACTMHz
);
5478 if (gfx_info
[SAM_mc6
].path
)
5479 BIC_PRESENT(BIC_SAM_mc6
);
5480 if (gfx_info
[SAM_MHz
].path
)
5481 BIC_PRESENT(BIC_SAMMHz
);
5482 if (gfx_info
[SAM_ACTMHz
].path
)
5483 BIC_PRESENT(BIC_SAMACTMHz
);
5486 static void dump_sysfs_cstate_config(void)
5495 if (access("/sys/devices/system/cpu/cpuidle", R_OK
)) {
5496 fprintf(outf
, "cpuidle not loaded\n");
5500 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
5501 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
5502 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
5504 for (state
= 0; state
< 10; ++state
) {
5506 sprintf(path
, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu
, state
);
5507 input
= fopen(path
, "r");
5510 if (!fgets(name_buf
, sizeof(name_buf
), input
))
5511 err(1, "%s: failed to read file", path
);
5513 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5514 sp
= strchr(name_buf
, '-');
5516 sp
= strchrnul(name_buf
, '\n');
5520 remove_underbar(name_buf
);
5522 sprintf(path
, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu
, state
);
5523 input
= fopen(path
, "r");
5526 if (!fgets(desc
, sizeof(desc
), input
))
5527 err(1, "%s: failed to read file", path
);
5529 fprintf(outf
, "cpu%d: %s: %s", base_cpu
, name_buf
, desc
);
5534 static void dump_sysfs_pstate_config(void)
5537 char driver_buf
[64];
5538 char governor_buf
[64];
5542 sprintf(path
, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu
);
5543 input
= fopen(path
, "r");
5544 if (input
== NULL
) {
5545 fprintf(outf
, "NSFOD %s\n", path
);
5548 if (!fgets(driver_buf
, sizeof(driver_buf
), input
))
5549 err(1, "%s: failed to read file", path
);
5552 sprintf(path
, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu
);
5553 input
= fopen(path
, "r");
5554 if (input
== NULL
) {
5555 fprintf(outf
, "NSFOD %s\n", path
);
5558 if (!fgets(governor_buf
, sizeof(governor_buf
), input
))
5559 err(1, "%s: failed to read file", path
);
5562 fprintf(outf
, "cpu%d: cpufreq driver: %s", base_cpu
, driver_buf
);
5563 fprintf(outf
, "cpu%d: cpufreq governor: %s", base_cpu
, governor_buf
);
5565 sprintf(path
, "/sys/devices/system/cpu/cpufreq/boost");
5566 input
= fopen(path
, "r");
5567 if (input
!= NULL
) {
5568 if (fscanf(input
, "%d", &turbo
) != 1)
5569 err(1, "%s: failed to parse number from file", path
);
5570 fprintf(outf
, "cpufreq boost: %d\n", turbo
);
5574 sprintf(path
, "/sys/devices/system/cpu/intel_pstate/no_turbo");
5575 input
= fopen(path
, "r");
5576 if (input
!= NULL
) {
5577 if (fscanf(input
, "%d", &turbo
) != 1)
5578 err(1, "%s: failed to parse number from file", path
);
5579 fprintf(outf
, "cpufreq intel_pstate no_turbo: %d\n", turbo
);
5586 * Decode the ENERGY_PERF_BIAS MSR
5588 int print_epb(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
5601 /* EPB is per-package */
5602 if (!is_cpu_first_thread_in_package(t
, c
, p
))
5605 if (cpu_migrate(cpu
)) {
5606 fprintf(outf
, "print_epb: Could not migrate to CPU %d\n", cpu
);
5615 case ENERGY_PERF_BIAS_PERFORMANCE
:
5616 epb_string
= "performance";
5618 case ENERGY_PERF_BIAS_NORMAL
:
5619 epb_string
= "balanced";
5621 case ENERGY_PERF_BIAS_POWERSAVE
:
5622 epb_string
= "powersave";
5625 epb_string
= "custom";
5628 fprintf(outf
, "cpu%d: EPB: %d (%s)\n", cpu
, epb
, epb_string
);
5635 * Decode the MSR_HWP_CAPABILITIES
5637 int print_hwp(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
5639 unsigned long long msr
;
5653 /* MSR_HWP_CAPABILITIES is per-package */
5654 if (!is_cpu_first_thread_in_package(t
, c
, p
))
5657 if (cpu_migrate(cpu
)) {
5658 fprintf(outf
, "print_hwp: Could not migrate to CPU %d\n", cpu
);
5662 if (get_msr(cpu
, MSR_PM_ENABLE
, &msr
))
5665 fprintf(outf
, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu
, msr
, (msr
& (1 << 0)) ? "" : "No-");
5667 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
5668 if ((msr
& (1 << 0)) == 0)
5671 if (get_msr(cpu
, MSR_HWP_CAPABILITIES
, &msr
))
5674 fprintf(outf
, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
5675 "(high %d guar %d eff %d low %d)\n",
5677 (unsigned int)HWP_HIGHEST_PERF(msr
),
5678 (unsigned int)HWP_GUARANTEED_PERF(msr
),
5679 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr
), (unsigned int)HWP_LOWEST_PERF(msr
));
5681 if (get_msr(cpu
, MSR_HWP_REQUEST
, &msr
))
5684 fprintf(outf
, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
5685 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
5687 (unsigned int)(((msr
) >> 0) & 0xff),
5688 (unsigned int)(((msr
) >> 8) & 0xff),
5689 (unsigned int)(((msr
) >> 16) & 0xff),
5690 (unsigned int)(((msr
) >> 24) & 0xff),
5691 (unsigned int)(((msr
) >> 32) & 0xff3), (unsigned int)(((msr
) >> 42) & 0x1));
5694 if (get_msr(cpu
, MSR_HWP_REQUEST_PKG
, &msr
))
5697 fprintf(outf
, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
5698 "(min %d max %d des %d epp 0x%x window 0x%x)\n",
5700 (unsigned int)(((msr
) >> 0) & 0xff),
5701 (unsigned int)(((msr
) >> 8) & 0xff),
5702 (unsigned int)(((msr
) >> 16) & 0xff),
5703 (unsigned int)(((msr
) >> 24) & 0xff), (unsigned int)(((msr
) >> 32) & 0xff3));
5705 if (has_hwp_notify
) {
5706 if (get_msr(cpu
, MSR_HWP_INTERRUPT
, &msr
))
5709 fprintf(outf
, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
5710 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
5711 cpu
, msr
, ((msr
) & 0x1) ? "EN" : "Dis", ((msr
) & 0x2) ? "EN" : "Dis");
5713 if (get_msr(cpu
, MSR_HWP_STATUS
, &msr
))
5716 fprintf(outf
, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
5717 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
5718 cpu
, msr
, ((msr
) & 0x1) ? "" : "No-", ((msr
) & 0x4) ? "" : "No-");
5724 * print_perf_limit()
5726 int print_perf_limit(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
5728 unsigned long long msr
;
5740 if (!is_cpu_first_thread_in_package(t
, c
, p
))
5743 if (cpu_migrate(cpu
)) {
5744 fprintf(outf
, "print_perf_limit: Could not migrate to CPU %d\n", cpu
);
5748 if (platform
->plr_msrs
& PLR_CORE
) {
5749 get_msr(cpu
, MSR_CORE_PERF_LIMIT_REASONS
, &msr
);
5750 fprintf(outf
, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu
, msr
);
5751 fprintf(outf
, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
5752 (msr
& 1 << 15) ? "bit15, " : "",
5753 (msr
& 1 << 14) ? "bit14, " : "",
5754 (msr
& 1 << 13) ? "Transitions, " : "",
5755 (msr
& 1 << 12) ? "MultiCoreTurbo, " : "",
5756 (msr
& 1 << 11) ? "PkgPwrL2, " : "",
5757 (msr
& 1 << 10) ? "PkgPwrL1, " : "",
5758 (msr
& 1 << 9) ? "CorePwr, " : "",
5759 (msr
& 1 << 8) ? "Amps, " : "",
5760 (msr
& 1 << 6) ? "VR-Therm, " : "",
5761 (msr
& 1 << 5) ? "Auto-HWP, " : "",
5762 (msr
& 1 << 4) ? "Graphics, " : "",
5763 (msr
& 1 << 2) ? "bit2, " : "",
5764 (msr
& 1 << 1) ? "ThermStatus, " : "", (msr
& 1 << 0) ? "PROCHOT, " : "");
5765 fprintf(outf
, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
5766 (msr
& 1 << 31) ? "bit31, " : "",
5767 (msr
& 1 << 30) ? "bit30, " : "",
5768 (msr
& 1 << 29) ? "Transitions, " : "",
5769 (msr
& 1 << 28) ? "MultiCoreTurbo, " : "",
5770 (msr
& 1 << 27) ? "PkgPwrL2, " : "",
5771 (msr
& 1 << 26) ? "PkgPwrL1, " : "",
5772 (msr
& 1 << 25) ? "CorePwr, " : "",
5773 (msr
& 1 << 24) ? "Amps, " : "",
5774 (msr
& 1 << 22) ? "VR-Therm, " : "",
5775 (msr
& 1 << 21) ? "Auto-HWP, " : "",
5776 (msr
& 1 << 20) ? "Graphics, " : "",
5777 (msr
& 1 << 18) ? "bit18, " : "",
5778 (msr
& 1 << 17) ? "ThermStatus, " : "", (msr
& 1 << 16) ? "PROCHOT, " : "");
5781 if (platform
->plr_msrs
& PLR_GFX
) {
5782 get_msr(cpu
, MSR_GFX_PERF_LIMIT_REASONS
, &msr
);
5783 fprintf(outf
, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu
, msr
);
5784 fprintf(outf
, " (Active: %s%s%s%s%s%s%s%s)",
5785 (msr
& 1 << 0) ? "PROCHOT, " : "",
5786 (msr
& 1 << 1) ? "ThermStatus, " : "",
5787 (msr
& 1 << 4) ? "Graphics, " : "",
5788 (msr
& 1 << 6) ? "VR-Therm, " : "",
5789 (msr
& 1 << 8) ? "Amps, " : "",
5790 (msr
& 1 << 9) ? "GFXPwr, " : "",
5791 (msr
& 1 << 10) ? "PkgPwrL1, " : "", (msr
& 1 << 11) ? "PkgPwrL2, " : "");
5792 fprintf(outf
, " (Logged: %s%s%s%s%s%s%s%s)\n",
5793 (msr
& 1 << 16) ? "PROCHOT, " : "",
5794 (msr
& 1 << 17) ? "ThermStatus, " : "",
5795 (msr
& 1 << 20) ? "Graphics, " : "",
5796 (msr
& 1 << 22) ? "VR-Therm, " : "",
5797 (msr
& 1 << 24) ? "Amps, " : "",
5798 (msr
& 1 << 25) ? "GFXPwr, " : "",
5799 (msr
& 1 << 26) ? "PkgPwrL1, " : "", (msr
& 1 << 27) ? "PkgPwrL2, " : "");
5801 if (platform
->plr_msrs
& PLR_RING
) {
5802 get_msr(cpu
, MSR_RING_PERF_LIMIT_REASONS
, &msr
);
5803 fprintf(outf
, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu
, msr
);
5804 fprintf(outf
, " (Active: %s%s%s%s%s%s)",
5805 (msr
& 1 << 0) ? "PROCHOT, " : "",
5806 (msr
& 1 << 1) ? "ThermStatus, " : "",
5807 (msr
& 1 << 6) ? "VR-Therm, " : "",
5808 (msr
& 1 << 8) ? "Amps, " : "",
5809 (msr
& 1 << 10) ? "PkgPwrL1, " : "", (msr
& 1 << 11) ? "PkgPwrL2, " : "");
5810 fprintf(outf
, " (Logged: %s%s%s%s%s%s)\n",
5811 (msr
& 1 << 16) ? "PROCHOT, " : "",
5812 (msr
& 1 << 17) ? "ThermStatus, " : "",
5813 (msr
& 1 << 22) ? "VR-Therm, " : "",
5814 (msr
& 1 << 24) ? "Amps, " : "",
5815 (msr
& 1 << 26) ? "PkgPwrL1, " : "", (msr
& 1 << 27) ? "PkgPwrL2, " : "");
5820 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
5821 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
5823 double get_quirk_tdp(void)
5825 if (platform
->rapl_quirk_tdp
)
5826 return platform
->rapl_quirk_tdp
;
5831 double get_tdp_intel(void)
5833 unsigned long long msr
;
5835 if (platform
->rapl_msrs
& RAPL_PKG_POWER_INFO
)
5836 if (!get_msr(base_cpu
, MSR_PKG_POWER_INFO
, &msr
))
5837 return ((msr
>> 0) & RAPL_POWER_GRANULARITY
) * rapl_power_units
;
5838 return get_quirk_tdp();
5841 double get_tdp_amd(void)
5843 return get_quirk_tdp();
5846 void rapl_probe_intel(void)
5848 unsigned long long msr
;
5849 unsigned int time_unit
;
5851 const unsigned long long bic_watt_bits
= BIC_PkgWatt
| BIC_CorWatt
| BIC_RAMWatt
| BIC_GFXWatt
;
5852 const unsigned long long bic_joules_bits
= BIC_Pkg_J
| BIC_Cor_J
| BIC_RAM_J
| BIC_GFX_J
;
5855 bic_enabled
&= ~bic_watt_bits
;
5857 bic_enabled
&= ~bic_joules_bits
;
5859 if (!(platform
->rapl_msrs
& RAPL_PKG_PERF_STATUS
))
5860 bic_enabled
&= ~BIC_PKG__
;
5861 if (!(platform
->rapl_msrs
& RAPL_DRAM_PERF_STATUS
))
5862 bic_enabled
&= ~BIC_RAM__
;
5864 /* units on package 0, verify later other packages match */
5865 if (get_msr(base_cpu
, MSR_RAPL_POWER_UNIT
, &msr
))
5868 rapl_power_units
= 1.0 / (1 << (msr
& 0xF));
5869 if (platform
->has_rapl_divisor
)
5870 rapl_energy_units
= 1.0 * (1 << (msr
>> 8 & 0x1F)) / 1000000;
5872 rapl_energy_units
= 1.0 / (1 << (msr
>> 8 & 0x1F));
5874 if (platform
->has_fixed_rapl_unit
)
5875 rapl_dram_energy_units
= (15.3 / 1000000);
5877 rapl_dram_energy_units
= rapl_energy_units
;
5879 time_unit
= msr
>> 16 & 0xF;
5883 rapl_time_units
= 1.0 / (1 << (time_unit
));
5885 tdp
= get_tdp_intel();
5887 rapl_joule_counter_range
= 0xFFFFFFFF * rapl_energy_units
/ tdp
;
5889 fprintf(outf
, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range
, tdp
);
5892 void rapl_probe_amd(void)
5894 unsigned long long msr
;
5896 const unsigned long long bic_watt_bits
= BIC_PkgWatt
| BIC_CorWatt
;
5897 const unsigned long long bic_joules_bits
= BIC_Pkg_J
| BIC_Cor_J
;
5900 bic_enabled
&= ~bic_watt_bits
;
5902 bic_enabled
&= ~bic_joules_bits
;
5904 if (get_msr(base_cpu
, MSR_RAPL_PWR_UNIT
, &msr
))
5907 rapl_time_units
= ldexp(1.0, -(msr
>> 16 & 0xf));
5908 rapl_energy_units
= ldexp(1.0, -(msr
>> 8 & 0x1f));
5909 rapl_power_units
= ldexp(1.0, -(msr
& 0xf));
5911 tdp
= get_tdp_amd();
5913 rapl_joule_counter_range
= 0xFFFFFFFF * rapl_energy_units
/ tdp
;
5915 fprintf(outf
, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range
, tdp
);
5918 void print_power_limit_msr(int cpu
, unsigned long long msr
, char *label
)
5920 fprintf(outf
, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
5922 ((msr
>> 15) & 1) ? "EN" : "DIS",
5923 ((msr
>> 0) & 0x7FFF) * rapl_power_units
,
5924 (1.0 + (((msr
>> 22) & 0x3) / 4.0)) * (1 << ((msr
>> 17) & 0x1F)) * rapl_time_units
,
5925 (((msr
>> 16) & 1) ? "EN" : "DIS"));
5930 int print_rapl(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
5932 unsigned long long msr
;
5933 const char *msr_name
;
5939 if (!platform
->rapl_msrs
)
5942 /* RAPL counters are per package, so print only for 1st thread/package */
5943 if (!is_cpu_first_thread_in_package(t
, c
, p
))
5947 if (cpu_migrate(cpu
)) {
5948 fprintf(outf
, "print_rapl: Could not migrate to CPU %d\n", cpu
);
5952 if (platform
->rapl_msrs
& RAPL_AMD_F17H
) {
5953 msr_name
= "MSR_RAPL_PWR_UNIT";
5954 if (get_msr(cpu
, MSR_RAPL_PWR_UNIT
, &msr
))
5957 msr_name
= "MSR_RAPL_POWER_UNIT";
5958 if (get_msr(cpu
, MSR_RAPL_POWER_UNIT
, &msr
))
5962 fprintf(outf
, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu
, msr_name
, msr
,
5963 rapl_power_units
, rapl_energy_units
, rapl_time_units
);
5965 if (platform
->rapl_msrs
& RAPL_PKG_POWER_INFO
) {
5967 if (get_msr(cpu
, MSR_PKG_POWER_INFO
, &msr
))
5970 fprintf(outf
, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
5972 ((msr
>> 0) & RAPL_POWER_GRANULARITY
) * rapl_power_units
,
5973 ((msr
>> 16) & RAPL_POWER_GRANULARITY
) * rapl_power_units
,
5974 ((msr
>> 32) & RAPL_POWER_GRANULARITY
) * rapl_power_units
,
5975 ((msr
>> 48) & RAPL_TIME_GRANULARITY
) * rapl_time_units
);
5978 if (platform
->rapl_msrs
& RAPL_PKG
) {
5980 if (get_msr(cpu
, MSR_PKG_POWER_LIMIT
, &msr
))
5983 fprintf(outf
, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
5984 cpu
, msr
, (msr
>> 63) & 1 ? "" : "UN");
5986 print_power_limit_msr(cpu
, msr
, "PKG Limit #1");
5987 fprintf(outf
, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
5989 ((msr
>> 47) & 1) ? "EN" : "DIS",
5990 ((msr
>> 32) & 0x7FFF) * rapl_power_units
,
5991 (1.0 + (((msr
>> 54) & 0x3) / 4.0)) * (1 << ((msr
>> 49) & 0x1F)) * rapl_time_units
,
5992 ((msr
>> 48) & 1) ? "EN" : "DIS");
5994 if (get_msr(cpu
, MSR_VR_CURRENT_CONFIG
, &msr
))
5997 fprintf(outf
, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu
, msr
);
5998 fprintf(outf
, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
5999 cpu
, ((msr
>> 0) & 0x1FFF) * rapl_power_units
, (msr
>> 31) & 1 ? "" : "UN");
6002 if (platform
->rapl_msrs
& RAPL_DRAM_POWER_INFO
) {
6003 if (get_msr(cpu
, MSR_DRAM_POWER_INFO
, &msr
))
6006 fprintf(outf
, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
6008 ((msr
>> 0) & RAPL_POWER_GRANULARITY
) * rapl_power_units
,
6009 ((msr
>> 16) & RAPL_POWER_GRANULARITY
) * rapl_power_units
,
6010 ((msr
>> 32) & RAPL_POWER_GRANULARITY
) * rapl_power_units
,
6011 ((msr
>> 48) & RAPL_TIME_GRANULARITY
) * rapl_time_units
);
6013 if (platform
->rapl_msrs
& RAPL_DRAM
) {
6014 if (get_msr(cpu
, MSR_DRAM_POWER_LIMIT
, &msr
))
6016 fprintf(outf
, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
6017 cpu
, msr
, (msr
>> 31) & 1 ? "" : "UN");
6019 print_power_limit_msr(cpu
, msr
, "DRAM Limit");
6021 if (platform
->rapl_msrs
& RAPL_CORE_POLICY
) {
6022 if (get_msr(cpu
, MSR_PP0_POLICY
, &msr
))
6025 fprintf(outf
, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu
, msr
& 0xF);
6027 if (platform
->rapl_msrs
& RAPL_CORE_POWER_LIMIT
) {
6028 if (get_msr(cpu
, MSR_PP0_POWER_LIMIT
, &msr
))
6030 fprintf(outf
, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
6031 cpu
, msr
, (msr
>> 31) & 1 ? "" : "UN");
6032 print_power_limit_msr(cpu
, msr
, "Cores Limit");
6034 if (platform
->rapl_msrs
& RAPL_GFX
) {
6035 if (get_msr(cpu
, MSR_PP1_POLICY
, &msr
))
6038 fprintf(outf
, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu
, msr
& 0xF);
6040 if (get_msr(cpu
, MSR_PP1_POWER_LIMIT
, &msr
))
6042 fprintf(outf
, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
6043 cpu
, msr
, (msr
>> 31) & 1 ? "" : "UN");
6044 print_power_limit_msr(cpu
, msr
, "GFX Limit");
6052 * sets rapl_power_units, rapl_energy_units, rapl_time_units
6054 void probe_rapl(void)
6056 if (!platform
->rapl_msrs
|| no_msr
)
6061 if (authentic_amd
|| hygon_genuine
)
6067 for_all_cpus(print_rapl
, ODD_COUNTERS
);
6071 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
6072 * the Thermal Control Circuit (TCC) activates.
6073 * This is usually equal to tjMax.
6075 * Older processors do not have this MSR, so there we guess,
6076 * but also allow cmdline over-ride with -T.
6078 * Several MSR temperature values are in units of degrees-C
6079 * below this value, including the Digital Thermal Sensor (DTS),
6080 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
6082 int set_temperature_target(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
6084 unsigned long long msr
;
6085 unsigned int tcc_default
, tcc_offset
;
6091 /* tj_max is used only for dts or ptm */
6092 if (!(do_dts
|| do_ptm
))
6095 /* this is a per-package concept */
6096 if (!is_cpu_first_thread_in_package(t
, c
, p
))
6100 if (cpu_migrate(cpu
)) {
6101 fprintf(outf
, "Could not migrate to CPU %d\n", cpu
);
6105 if (tj_max_override
!= 0) {
6106 tj_max
= tj_max_override
;
6107 fprintf(outf
, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu
, tj_max
);
6111 /* Temperature Target MSR is Nehalem and newer only */
6112 if (!platform
->has_nhm_msrs
|| no_msr
)
6115 if (get_msr(base_cpu
, MSR_IA32_TEMPERATURE_TARGET
, &msr
))
6118 tcc_default
= (msr
>> 16) & 0xFF;
6121 int bits
= platform
->tcc_offset_bits
;
6122 unsigned long long enabled
= 0;
6124 if (bits
&& !get_msr(base_cpu
, MSR_PLATFORM_INFO
, &enabled
))
6125 enabled
= (enabled
>> 30) & 1;
6127 if (bits
&& enabled
) {
6128 tcc_offset
= (msr
>> 24) & GENMASK(bits
- 1, 0);
6129 fprintf(outf
, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
6130 cpu
, msr
, tcc_default
- tcc_offset
, tcc_default
, tcc_offset
);
6132 fprintf(outf
, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu
, msr
, tcc_default
);
6139 tj_max
= tcc_default
;
6144 tj_max
= TJMAX_DEFAULT
;
6145 fprintf(outf
, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu
, tj_max
);
6150 int print_thermal(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
6152 unsigned long long msr
;
6153 unsigned int dts
, dts2
;
6162 if (!(do_dts
|| do_ptm
))
6167 /* DTS is per-core, no need to print for each thread */
6168 if (!is_cpu_first_thread_in_core(t
, c
, p
))
6171 if (cpu_migrate(cpu
)) {
6172 fprintf(outf
, "print_thermal: Could not migrate to CPU %d\n", cpu
);
6176 if (do_ptm
&& is_cpu_first_core_in_package(t
, c
, p
)) {
6177 if (get_msr(cpu
, MSR_IA32_PACKAGE_THERM_STATUS
, &msr
))
6180 dts
= (msr
>> 16) & 0x7F;
6181 fprintf(outf
, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu
, msr
, tj_max
- dts
);
6183 if (get_msr(cpu
, MSR_IA32_PACKAGE_THERM_INTERRUPT
, &msr
))
6186 dts
= (msr
>> 16) & 0x7F;
6187 dts2
= (msr
>> 8) & 0x7F;
6188 fprintf(outf
, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
6189 cpu
, msr
, tj_max
- dts
, tj_max
- dts2
);
6192 if (do_dts
&& debug
) {
6193 unsigned int resolution
;
6195 if (get_msr(cpu
, MSR_IA32_THERM_STATUS
, &msr
))
6198 dts
= (msr
>> 16) & 0x7F;
6199 resolution
= (msr
>> 27) & 0xF;
6200 fprintf(outf
, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
6201 cpu
, msr
, tj_max
- dts
, resolution
);
6203 if (get_msr(cpu
, MSR_IA32_THERM_INTERRUPT
, &msr
))
6206 dts
= (msr
>> 16) & 0x7F;
6207 dts2
= (msr
>> 8) & 0x7F;
6208 fprintf(outf
, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
6209 cpu
, msr
, tj_max
- dts
, tj_max
- dts2
);
6215 void probe_thermal(void)
6217 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK
))
6218 BIC_PRESENT(BIC_CORE_THROT_CNT
);
6220 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT
);
6222 for_all_cpus(set_temperature_target
, ODD_COUNTERS
);
6227 for_all_cpus(print_thermal
, ODD_COUNTERS
);
6230 int get_cpu_type(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
6232 unsigned int eax
, ebx
, ecx
, edx
;
6240 if (cpu_migrate(t
->cpu_id
)) {
6241 fprintf(outf
, "Could not migrate to CPU %d\n", t
->cpu_id
);
6245 if (max_level
< 0x1a)
6248 __cpuid(0x1a, eax
, ebx
, ecx
, edx
);
6249 eax
= (eax
>> 24) & 0xFF;
6255 void decode_feature_control_msr(void)
6257 unsigned long long msr
;
6262 if (!get_msr(base_cpu
, MSR_IA32_FEAT_CTL
, &msr
))
6263 fprintf(outf
, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
6264 base_cpu
, msr
, msr
& FEAT_CTL_LOCKED
? "" : "UN-", msr
& (1 << 18) ? "SGX" : "");
6267 void decode_misc_enable_msr(void)
6269 unsigned long long msr
;
6277 if (!get_msr(base_cpu
, MSR_IA32_MISC_ENABLE
, &msr
))
6278 fprintf(outf
, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
6280 msr
& MSR_IA32_MISC_ENABLE_TM1
? "" : "No-",
6281 msr
& MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP
? "" : "No-",
6282 msr
& MSR_IA32_MISC_ENABLE_MWAIT
? "" : "No-",
6283 msr
& MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE
? "No-" : "",
6284 msr
& MSR_IA32_MISC_ENABLE_TURBO_DISABLE
? "No-" : "");
6287 void decode_misc_feature_control(void)
6289 unsigned long long msr
;
6294 if (!platform
->has_msr_misc_feature_control
)
6297 if (!get_msr(base_cpu
, MSR_MISC_FEATURE_CONTROL
, &msr
))
6299 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
6300 base_cpu
, msr
, msr
& (0 << 0) ? "No-" : "", msr
& (1 << 0) ? "No-" : "",
6301 msr
& (2 << 0) ? "No-" : "", msr
& (3 << 0) ? "No-" : "");
6305 * Decode MSR_MISC_PWR_MGMT
6307 * Decode the bits according to the Nehalem documentation
6308 * bit[0] seems to continue to have same meaning going forward
6311 void decode_misc_pwr_mgmt_msr(void)
6313 unsigned long long msr
;
6318 if (!platform
->has_msr_misc_pwr_mgmt
)
6321 if (!get_msr(base_cpu
, MSR_MISC_PWR_MGMT
, &msr
))
6322 fprintf(outf
, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
6324 msr
& (1 << 0) ? "DIS" : "EN", msr
& (1 << 1) ? "EN" : "DIS", msr
& (1 << 8) ? "EN" : "DIS");
6328 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
6330 * This MSRs are present on Silvermont processors,
6331 * Intel Atom processor E3000 series (Baytrail), and friends.
6333 void decode_c6_demotion_policy_msr(void)
6335 unsigned long long msr
;
6340 if (!platform
->has_msr_c6_demotion_policy_config
)
6343 if (!get_msr(base_cpu
, MSR_CC6_DEMOTION_POLICY_CONFIG
, &msr
))
6344 fprintf(outf
, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
6345 base_cpu
, msr
, msr
& (1 << 0) ? "EN" : "DIS");
6347 if (!get_msr(base_cpu
, MSR_MC6_DEMOTION_POLICY_CONFIG
, &msr
))
6348 fprintf(outf
, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
6349 base_cpu
, msr
, msr
& (1 << 0) ? "EN" : "DIS");
6352 void print_dev_latency(void)
6354 char *path
= "/dev/cpu_dma_latency";
6359 fd
= open(path
, O_RDONLY
);
6362 warnx("Read %s failed", path
);
6366 retval
= read(fd
, (void *)&value
, sizeof(int));
6367 if (retval
!= sizeof(int)) {
6368 warn("read failed %s", path
);
6372 fprintf(outf
, "/dev/cpu_dma_latency: %d usec (%s)\n", value
, value
== 2000000000 ? "default" : "constrained");
6377 static int has_instr_count_access(void)
6385 fd
= open_perf_counter(base_cpu
, PERF_TYPE_HARDWARE
, PERF_COUNT_HW_INSTRUCTIONS
, -1, 0);
6386 has_access
= fd
!= -1;
6392 warnx("Failed to access %s. Some of the counters may not be available\n"
6393 "\tRun as root to enable them or use %s to disable the access explicitly",
6394 "instructions retired perf counter", "--no-perf");
6399 bool is_aperf_access_required(void)
6401 return BIC_IS_ENABLED(BIC_Avg_MHz
)
6402 || BIC_IS_ENABLED(BIC_Busy
)
6403 || BIC_IS_ENABLED(BIC_Bzy_MHz
)
6404 || BIC_IS_ENABLED(BIC_IPC
);
6407 int add_rapl_perf_counter_(int cpu
, struct rapl_counter_info_t
*rci
, const struct rapl_counter_arch_info
*cai
,
6408 double *scale_
, enum rapl_unit
*unit_
)
6413 const double scale
= read_perf_rapl_scale(cai
->perf_subsys
, cai
->perf_name
);
6417 const enum rapl_unit unit
= read_perf_rapl_unit(cai
->perf_subsys
, cai
->perf_name
);
6418 if (unit
== RAPL_UNIT_INVALID
)
6421 const unsigned rapl_type
= read_perf_type(cai
->perf_subsys
);
6422 const unsigned rapl_energy_pkg_config
= read_rapl_config(cai
->perf_subsys
, cai
->perf_name
);
6424 const int fd_counter
=
6425 open_perf_counter(cpu
, rapl_type
, rapl_energy_pkg_config
, rci
->fd_perf
, PERF_FORMAT_GROUP
);
6426 if (fd_counter
== -1)
6429 /* If it's the first counter opened, make it a group descriptor */
6430 if (rci
->fd_perf
== -1)
6431 rci
->fd_perf
= fd_counter
;
6438 int add_rapl_perf_counter(int cpu
, struct rapl_counter_info_t
*rci
, const struct rapl_counter_arch_info
*cai
,
6439 double *scale
, enum rapl_unit
*unit
)
6441 int ret
= add_rapl_perf_counter_(cpu
, rci
, cai
, scale
, unit
);
6444 fprintf(stderr
, "add_rapl_perf_counter: %d (cpu: %d)\n", ret
, cpu
);
6450 * Linux-perf manages the HW instructions-retired counter
6451 * by enabling when requested, and hiding rollover
6453 void linux_perf_init(void)
6455 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK
))
6458 if (BIC_IS_ENABLED(BIC_IPC
) && has_aperf
) {
6459 fd_instr_count_percpu
= calloc(topo
.max_cpu_num
+ 1, sizeof(int));
6460 if (fd_instr_count_percpu
== NULL
)
6461 err(-1, "calloc fd_instr_count_percpu");
6464 const bool aperf_required
= is_aperf_access_required();
6465 if (aperf_required
&& has_aperf
&& amperf_source
== AMPERF_SOURCE_PERF
) {
6466 fd_amperf_percpu
= calloc(topo
.max_cpu_num
+ 1, sizeof(*fd_amperf_percpu
));
6467 if (fd_amperf_percpu
== NULL
)
6468 err(-1, "calloc fd_amperf_percpu");
6472 void rapl_perf_init(void)
6474 const int num_domains
= platform
->has_per_core_rapl
? topo
.num_cores
: topo
.num_packages
;
6475 bool *domain_visited
= calloc(num_domains
, sizeof(bool));
6477 rapl_counter_info_perdomain
= calloc(num_domains
, sizeof(*rapl_counter_info_perdomain
));
6478 if (rapl_counter_info_perdomain
== NULL
)
6479 err(-1, "calloc rapl_counter_info_percpu");
6482 * Initialize rapl_counter_info_percpu
6484 for (int domain_id
= 0; domain_id
< num_domains
; ++domain_id
) {
6485 struct rapl_counter_info_t
*rci
= &rapl_counter_info_perdomain
[domain_id
];
6487 for (size_t i
= 0; i
< NUM_RAPL_COUNTERS
; ++i
) {
6489 rci
->source
[i
] = RAPL_SOURCE_NONE
;
6494 * Open/probe the counters
6495 * If can't get it via perf, fallback to MSR
6497 for (size_t i
= 0; i
< ARRAY_SIZE(rapl_counter_arch_infos
); ++i
) {
6499 const struct rapl_counter_arch_info
*const cai
= &rapl_counter_arch_infos
[i
];
6500 bool has_counter
= 0;
6502 enum rapl_unit unit
;
6505 memset(domain_visited
, 0, num_domains
* sizeof(*domain_visited
));
6507 for (int cpu
= 0; cpu
< topo
.max_cpu_num
+ 1; ++cpu
) {
6509 if (cpu_is_not_allowed(cpu
))
6512 /* Skip already seen and handled RAPL domains */
6514 platform
->has_per_core_rapl
? cpus
[cpu
].physical_core_id
: cpus
[cpu
].physical_package_id
;
6516 if (domain_visited
[next_domain
])
6519 domain_visited
[next_domain
] = 1;
6521 struct rapl_counter_info_t
*rci
= &rapl_counter_info_perdomain
[next_domain
];
6523 /* Check if the counter is enabled and accessible */
6524 if (BIC_IS_ENABLED(cai
->bic
) && (platform
->rapl_msrs
& cai
->feature_mask
)) {
6526 /* Use perf API for this counter */
6527 if (!no_perf
&& cai
->perf_name
6528 && add_rapl_perf_counter(cpu
, rci
, cai
, &scale
, &unit
) != -1) {
6529 rci
->source
[cai
->rci_index
] = RAPL_SOURCE_PERF
;
6530 rci
->scale
[cai
->rci_index
] = scale
* cai
->compat_scale
;
6531 rci
->unit
[cai
->rci_index
] = unit
;
6532 rci
->flags
[cai
->rci_index
] = cai
->flags
;
6534 /* Use MSR for this counter */
6535 } else if (!no_msr
&& cai
->msr
&& probe_msr(cpu
, cai
->msr
) == 0) {
6536 rci
->source
[cai
->rci_index
] = RAPL_SOURCE_MSR
;
6537 rci
->msr
[cai
->rci_index
] = cai
->msr
;
6538 rci
->msr_mask
[cai
->rci_index
] = cai
->msr_mask
;
6539 rci
->msr_shift
[cai
->rci_index
] = cai
->msr_shift
;
6540 rci
->unit
[cai
->rci_index
] = RAPL_UNIT_JOULES
;
6541 rci
->scale
[cai
->rci_index
] = *cai
->platform_rapl_msr_scale
* cai
->compat_scale
;
6542 rci
->flags
[cai
->rci_index
] = cai
->flags
;
6546 if (rci
->source
[cai
->rci_index
] != RAPL_SOURCE_NONE
)
6550 /* If any CPU has access to the counter, make it present */
6552 BIC_PRESENT(cai
->bic
);
6555 free(domain_visited
);
6558 static int has_amperf_access_via_msr(void)
6563 if (probe_msr(base_cpu
, MSR_IA32_APERF
))
6566 if (probe_msr(base_cpu
, MSR_IA32_MPERF
))
6572 static int has_amperf_access_via_perf(void)
6574 struct amperf_group_fd fds
;
6577 * Cache the last result, so we don't warn the user multiple times
6579 * Negative means cached, no access
6580 * Zero means not cached
6581 * Positive means cached, has access
6583 static int has_access_cached
;
6588 if (has_access_cached
!= 0)
6589 return has_access_cached
> 0;
6591 fds
= open_amperf_fd(base_cpu
);
6592 has_access_cached
= (fds
.aperf
!= -1) && (fds
.mperf
!= -1);
6594 if (fds
.aperf
== -1)
6595 warnx("Failed to access %s. Some of the counters may not be available\n"
6596 "\tRun as root to enable them or use %s to disable the access explicitly",
6597 "APERF perf counter", "--no-perf");
6601 if (fds
.mperf
== -1)
6602 warnx("Failed to access %s. Some of the counters may not be available\n"
6603 "\tRun as root to enable them or use %s to disable the access explicitly",
6604 "MPERF perf counter", "--no-perf");
6608 if (has_access_cached
== 0)
6609 has_access_cached
= -1;
6611 return has_access_cached
> 0;
6614 /* Check if we can access APERF and MPERF */
6615 static int has_amperf_access(void)
6617 if (!is_aperf_access_required())
6620 if (!no_msr
&& has_amperf_access_via_msr())
6623 if (!no_perf
&& has_amperf_access_via_perf())
6629 void probe_cstates(void)
6633 if (platform
->supported_cstates
& CC1
)
6634 BIC_PRESENT(BIC_CPU_c1
);
6636 if (platform
->supported_cstates
& CC3
)
6637 BIC_PRESENT(BIC_CPU_c3
);
6639 if (platform
->supported_cstates
& CC6
)
6640 BIC_PRESENT(BIC_CPU_c6
);
6642 if (platform
->supported_cstates
& CC7
)
6643 BIC_PRESENT(BIC_CPU_c7
);
6645 if (platform
->supported_cstates
& PC2
&& (pkg_cstate_limit
>= PCL__2
))
6646 BIC_PRESENT(BIC_Pkgpc2
);
6648 if (platform
->supported_cstates
& PC3
&& (pkg_cstate_limit
>= PCL__3
))
6649 BIC_PRESENT(BIC_Pkgpc3
);
6651 if (platform
->supported_cstates
& PC6
&& (pkg_cstate_limit
>= PCL__6
))
6652 BIC_PRESENT(BIC_Pkgpc6
);
6654 if (platform
->supported_cstates
& PC7
&& (pkg_cstate_limit
>= PCL__7
))
6655 BIC_PRESENT(BIC_Pkgpc7
);
6657 if (platform
->supported_cstates
& PC8
&& (pkg_cstate_limit
>= PCL__8
))
6658 BIC_PRESENT(BIC_Pkgpc8
);
6660 if (platform
->supported_cstates
& PC9
&& (pkg_cstate_limit
>= PCL__9
))
6661 BIC_PRESENT(BIC_Pkgpc9
);
6663 if (platform
->supported_cstates
& PC10
&& (pkg_cstate_limit
>= PCL_10
))
6664 BIC_PRESENT(BIC_Pkgpc10
);
6666 if (platform
->has_msr_module_c6_res_ms
)
6667 BIC_PRESENT(BIC_Mod_c6
);
6669 if (platform
->has_ext_cst_msrs
&& !no_msr
) {
6670 BIC_PRESENT(BIC_Totl_c0
);
6671 BIC_PRESENT(BIC_Any_c0
);
6672 BIC_PRESENT(BIC_GFX_c0
);
6673 BIC_PRESENT(BIC_CPUGFX
);
6681 decode_c6_demotion_policy_msr();
6682 print_dev_latency();
6683 dump_sysfs_cstate_config();
6687 void probe_lpi(void)
6689 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK
))
6690 BIC_PRESENT(BIC_CPU_LPI
);
6692 BIC_NOT_PRESENT(BIC_CPU_LPI
);
6694 if (!access(sys_lpi_file_sysfs
, R_OK
)) {
6695 sys_lpi_file
= sys_lpi_file_sysfs
;
6696 BIC_PRESENT(BIC_SYS_LPI
);
6697 } else if (!access(sys_lpi_file_debugfs
, R_OK
)) {
6698 sys_lpi_file
= sys_lpi_file_debugfs
;
6699 BIC_PRESENT(BIC_SYS_LPI
);
6701 sys_lpi_file_sysfs
= NULL
;
6702 BIC_NOT_PRESENT(BIC_SYS_LPI
);
6707 void probe_pstates(void)
6714 dump_platform_info();
6715 dump_turbo_ratio_info();
6716 dump_sysfs_pstate_config();
6717 decode_misc_pwr_mgmt_msr();
6719 for_all_cpus(print_hwp
, ODD_COUNTERS
);
6720 for_all_cpus(print_epb
, ODD_COUNTERS
);
6721 for_all_cpus(print_perf_limit
, ODD_COUNTERS
);
6724 void process_cpuid()
6726 unsigned int eax
, ebx
, ecx
, edx
;
6727 unsigned int fms
, family
, model
, stepping
, ecx_flags
, edx_flags
;
6728 unsigned long long ucode_patch
= 0;
6729 bool ucode_patch_valid
= false;
6731 eax
= ebx
= ecx
= edx
= 0;
6733 __cpuid(0, max_level
, ebx
, ecx
, edx
);
6735 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
6737 else if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
6739 else if (ebx
== 0x6f677948 && ecx
== 0x656e6975 && edx
== 0x6e65476e)
6743 fprintf(outf
, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
6744 (char *)&ebx
, (char *)&edx
, (char *)&ecx
, max_level
);
6746 __cpuid(1, fms
, ebx
, ecx
, edx
);
6747 family
= (fms
>> 8) & 0xf;
6748 model
= (fms
>> 4) & 0xf;
6749 stepping
= fms
& 0xf;
6751 family
+= (fms
>> 20) & 0xff;
6753 model
+= ((fms
>> 16) & 0xf) << 4;
6758 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV
, &ucode_patch
))
6759 warnx("get_msr(UCODE)");
6761 ucode_patch_valid
= true;
6765 * check max extended function levels of CPUID.
6766 * This is needed to check for invariant TSC.
6767 * This check is valid for both Intel and AMD.
6769 ebx
= ecx
= edx
= 0;
6770 __cpuid(0x80000000, max_extended_level
, ebx
, ecx
, edx
);
6773 fprintf(outf
, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
6774 family
, model
, stepping
, family
, model
, stepping
);
6775 if (ucode_patch_valid
)
6776 fprintf(outf
, " microcode 0x%x", (unsigned int)((ucode_patch
>> 32) & 0xFFFFFFFF));
6779 fprintf(outf
, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level
);
6780 fprintf(outf
, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
6781 ecx_flags
& (1 << 0) ? "SSE3" : "-",
6782 ecx_flags
& (1 << 3) ? "MONITOR" : "-",
6783 ecx_flags
& (1 << 6) ? "SMX" : "-",
6784 ecx_flags
& (1 << 7) ? "EIST" : "-",
6785 ecx_flags
& (1 << 8) ? "TM2" : "-",
6786 edx_flags
& (1 << 4) ? "TSC" : "-",
6787 edx_flags
& (1 << 5) ? "MSR" : "-",
6788 edx_flags
& (1 << 22) ? "ACPI-TM" : "-",
6789 edx_flags
& (1 << 28) ? "HT" : "-", edx_flags
& (1 << 29) ? "TM" : "-");
6792 probe_platform_features(family
, model
);
6794 if (!(edx_flags
& (1 << 5)))
6795 errx(1, "CPUID: no MSR");
6797 if (max_extended_level
>= 0x80000007) {
6800 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
6801 * this check is valid for both Intel and AMD
6803 __cpuid(0x80000007, eax
, ebx
, ecx
, edx
);
6804 has_invariant_tsc
= edx
& (1 << 8);
6808 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
6809 * this check is valid for both Intel and AMD
6812 __cpuid(0x6, eax
, ebx
, ecx
, edx
);
6813 has_aperf
= ecx
& (1 << 0);
6814 if (has_aperf
&& has_amperf_access()) {
6815 BIC_PRESENT(BIC_Avg_MHz
);
6816 BIC_PRESENT(BIC_Busy
);
6817 BIC_PRESENT(BIC_Bzy_MHz
);
6818 BIC_PRESENT(BIC_IPC
);
6820 do_dts
= eax
& (1 << 0);
6822 BIC_PRESENT(BIC_CoreTmp
);
6823 has_turbo
= eax
& (1 << 1);
6824 do_ptm
= eax
& (1 << 6);
6826 BIC_PRESENT(BIC_PkgTmp
);
6827 has_hwp
= eax
& (1 << 7);
6828 has_hwp_notify
= eax
& (1 << 8);
6829 has_hwp_activity_window
= eax
& (1 << 9);
6830 has_hwp_epp
= eax
& (1 << 10);
6831 has_hwp_pkg
= eax
& (1 << 11);
6832 has_epb
= ecx
& (1 << 3);
6835 fprintf(outf
, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
6836 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
6837 has_aperf
? "" : "No-",
6838 has_turbo
? "" : "No-",
6839 do_dts
? "" : "No-",
6840 do_ptm
? "" : "No-",
6841 has_hwp
? "" : "No-",
6842 has_hwp_notify
? "" : "No-",
6843 has_hwp_activity_window
? "" : "No-",
6844 has_hwp_epp
? "" : "No-", has_hwp_pkg
? "" : "No-", has_epb
? "" : "No-");
6847 decode_misc_enable_msr();
6849 if (max_level
>= 0x7 && !quiet
) {
6854 __cpuid_count(0x7, 0, eax
, ebx
, ecx
, edx
);
6856 has_sgx
= ebx
& (1 << 2);
6858 is_hybrid
= edx
& (1 << 15);
6860 fprintf(outf
, "CPUID(7): %sSGX %sHybrid\n", has_sgx
? "" : "No-", is_hybrid
? "" : "No-");
6863 decode_feature_control_msr();
6866 if (max_level
>= 0x15) {
6867 unsigned int eax_crystal
;
6868 unsigned int ebx_tsc
;
6871 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
6873 eax_crystal
= ebx_tsc
= crystal_hz
= edx
= 0;
6874 __cpuid(0x15, eax_crystal
, ebx_tsc
, crystal_hz
, edx
);
6877 if (!quiet
&& (ebx
!= 0))
6878 fprintf(outf
, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
6879 eax_crystal
, ebx_tsc
, crystal_hz
);
6881 if (crystal_hz
== 0)
6882 crystal_hz
= platform
->crystal_freq
;
6885 tsc_hz
= (unsigned long long)crystal_hz
*ebx_tsc
/ eax_crystal
;
6887 fprintf(outf
, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
6888 tsc_hz
/ 1000000, crystal_hz
, ebx_tsc
, eax_crystal
);
6892 if (max_level
>= 0x16) {
6893 unsigned int base_mhz
, max_mhz
, bus_mhz
, edx
;
6896 * CPUID 16H Base MHz, Max MHz, Bus MHz
6898 base_mhz
= max_mhz
= bus_mhz
= edx
= 0;
6900 __cpuid(0x16, base_mhz
, max_mhz
, bus_mhz
, edx
);
6904 base_hz
= base_mhz
* 1000000;
6907 if (platform
->enable_tsc_tweak
)
6908 tsc_tweak
= base_hz
/ tsc_hz
;
6911 fprintf(outf
, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
6912 base_mhz
, max_mhz
, bus_mhz
);
6916 aperf_mperf_multiplier
= platform
->need_perf_multiplier
? 1024 : 1;
6918 BIC_PRESENT(BIC_IRQ
);
6919 BIC_PRESENT(BIC_TSC_MHz
);
6922 void probe_pm_features(void)
6930 probe_intel_uncore_frequency();
6938 if (platform
->has_nhm_msrs
&& !no_msr
)
6939 BIC_PRESENT(BIC_SMI
);
6942 decode_misc_feature_control();
6946 * in /dev/cpu/ return success for names that are numbers
6947 * ie. filter out ".", "..", "microcode".
6949 int dir_filter(const struct dirent
*dirp
)
6951 if (isdigit(dirp
->d_name
[0]))
6957 void topology_probe(bool startup
)
6960 int max_core_id
= 0;
6961 int max_package_id
= 0;
6963 int max_siblings
= 0;
6965 /* Initialize num_cpus, max_cpu_num */
6968 for_all_proc_cpus(count_cpus
);
6969 if (!summary_only
&& topo
.num_cpus
> 1)
6970 BIC_PRESENT(BIC_CPU
);
6973 fprintf(outf
, "num_cpus %d max_cpu_num %d\n", topo
.num_cpus
, topo
.max_cpu_num
);
6975 cpus
= calloc(1, (topo
.max_cpu_num
+ 1) * sizeof(struct cpu_topology
));
6977 err(1, "calloc cpus");
6980 * Allocate and initialize cpu_present_set
6982 cpu_present_set
= CPU_ALLOC((topo
.max_cpu_num
+ 1));
6983 if (cpu_present_set
== NULL
)
6984 err(3, "CPU_ALLOC");
6985 cpu_present_setsize
= CPU_ALLOC_SIZE((topo
.max_cpu_num
+ 1));
6986 CPU_ZERO_S(cpu_present_setsize
, cpu_present_set
);
6987 for_all_proc_cpus(mark_cpu_present
);
6990 * Allocate and initialize cpu_effective_set
6992 cpu_effective_set
= CPU_ALLOC((topo
.max_cpu_num
+ 1));
6993 if (cpu_effective_set
== NULL
)
6994 err(3, "CPU_ALLOC");
6995 cpu_effective_setsize
= CPU_ALLOC_SIZE((topo
.max_cpu_num
+ 1));
6996 CPU_ZERO_S(cpu_effective_setsize
, cpu_effective_set
);
6997 update_effective_set(startup
);
7000 * Allocate and initialize cpu_allowed_set
7002 cpu_allowed_set
= CPU_ALLOC((topo
.max_cpu_num
+ 1));
7003 if (cpu_allowed_set
== NULL
)
7004 err(3, "CPU_ALLOC");
7005 cpu_allowed_setsize
= CPU_ALLOC_SIZE((topo
.max_cpu_num
+ 1));
7006 CPU_ZERO_S(cpu_allowed_setsize
, cpu_allowed_set
);
7009 * Validate and update cpu_allowed_set.
7011 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
7012 * Give a warning when cpus in cpu_subset become unavailable at runtime.
7013 * Give a warning when cpus are not effective because of cgroup setting.
7015 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
7017 for (i
= 0; i
< CPU_SUBSET_MAXCPUS
; ++i
) {
7018 if (cpu_subset
&& !CPU_ISSET_S(i
, cpu_subset_size
, cpu_subset
))
7021 if (!CPU_ISSET_S(i
, cpu_present_setsize
, cpu_present_set
)) {
7023 /* cpus in cpu_subset must be in cpu_present_set during startup */
7025 err(1, "cpu%d not present", i
);
7027 fprintf(stderr
, "cpu%d not present\n", i
);
7032 if (CPU_COUNT_S(cpu_effective_setsize
, cpu_effective_set
)) {
7033 if (!CPU_ISSET_S(i
, cpu_effective_setsize
, cpu_effective_set
)) {
7034 fprintf(stderr
, "cpu%d not effective\n", i
);
7039 CPU_SET_S(i
, cpu_allowed_setsize
, cpu_allowed_set
);
7042 if (!CPU_COUNT_S(cpu_allowed_setsize
, cpu_allowed_set
))
7043 err(-ENODEV
, "No valid cpus found");
7044 sched_setaffinity(0, cpu_allowed_setsize
, cpu_allowed_set
);
7047 * Allocate and initialize cpu_affinity_set
7049 cpu_affinity_set
= CPU_ALLOC((topo
.max_cpu_num
+ 1));
7050 if (cpu_affinity_set
== NULL
)
7051 err(3, "CPU_ALLOC");
7052 cpu_affinity_setsize
= CPU_ALLOC_SIZE((topo
.max_cpu_num
+ 1));
7053 CPU_ZERO_S(cpu_affinity_setsize
, cpu_affinity_set
);
7055 for_all_proc_cpus(init_thread_id
);
7059 * find max_core_id, max_package_id
7061 for (i
= 0; i
<= topo
.max_cpu_num
; ++i
) {
7064 if (cpu_is_not_present(i
)) {
7066 fprintf(outf
, "cpu%d NOT PRESENT\n", i
);
7070 cpus
[i
].logical_cpu_id
= i
;
7072 /* get package information */
7073 cpus
[i
].physical_package_id
= get_physical_package_id(i
);
7074 if (cpus
[i
].physical_package_id
> max_package_id
)
7075 max_package_id
= cpus
[i
].physical_package_id
;
7077 /* get die information */
7078 cpus
[i
].die_id
= get_die_id(i
);
7079 if (cpus
[i
].die_id
> max_die_id
)
7080 max_die_id
= cpus
[i
].die_id
;
7082 /* get numa node information */
7083 cpus
[i
].physical_node_id
= get_physical_node_id(&cpus
[i
]);
7084 if (cpus
[i
].physical_node_id
> topo
.max_node_num
)
7085 topo
.max_node_num
= cpus
[i
].physical_node_id
;
7087 /* get core information */
7088 cpus
[i
].physical_core_id
= get_core_id(i
);
7089 if (cpus
[i
].physical_core_id
> max_core_id
)
7090 max_core_id
= cpus
[i
].physical_core_id
;
7092 /* get thread information */
7093 siblings
= get_thread_siblings(&cpus
[i
]);
7094 if (siblings
> max_siblings
)
7095 max_siblings
= siblings
;
7096 if (cpus
[i
].thread_id
== 0)
7100 topo
.cores_per_node
= max_core_id
+ 1;
7102 fprintf(outf
, "max_core_id %d, sizing for %d cores per package\n", max_core_id
, topo
.cores_per_node
);
7103 if (!summary_only
&& topo
.cores_per_node
> 1)
7104 BIC_PRESENT(BIC_Core
);
7106 topo
.num_die
= max_die_id
+ 1;
7108 fprintf(outf
, "max_die_id %d, sizing for %d die\n", max_die_id
, topo
.num_die
);
7109 if (!summary_only
&& topo
.num_die
> 1)
7110 BIC_PRESENT(BIC_Die
);
7112 topo
.num_packages
= max_package_id
+ 1;
7114 fprintf(outf
, "max_package_id %d, sizing for %d packages\n", max_package_id
, topo
.num_packages
);
7115 if (!summary_only
&& topo
.num_packages
> 1)
7116 BIC_PRESENT(BIC_Package
);
7120 fprintf(outf
, "nodes_per_pkg %d\n", topo
.nodes_per_pkg
);
7121 if (!summary_only
&& topo
.nodes_per_pkg
> 1)
7122 BIC_PRESENT(BIC_Node
);
7124 topo
.threads_per_core
= max_siblings
;
7126 fprintf(outf
, "max_siblings %d\n", max_siblings
);
7131 for (i
= 0; i
<= topo
.max_cpu_num
; ++i
) {
7132 if (cpu_is_not_present(i
))
7135 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
7136 i
, cpus
[i
].physical_package_id
, cpus
[i
].die_id
,
7137 cpus
[i
].physical_node_id
, cpus
[i
].logical_node_id
, cpus
[i
].physical_core_id
, cpus
[i
].thread_id
);
7142 void allocate_counters(struct thread_data
**t
, struct core_data
**c
, struct pkg_data
**p
)
7145 int num_cores
= topo
.cores_per_node
* topo
.nodes_per_pkg
* topo
.num_packages
;
7146 int num_threads
= topo
.threads_per_core
* num_cores
;
7148 *t
= calloc(num_threads
, sizeof(struct thread_data
));
7152 for (i
= 0; i
< num_threads
; i
++)
7153 (*t
)[i
].cpu_id
= -1;
7155 *c
= calloc(num_cores
, sizeof(struct core_data
));
7159 for (i
= 0; i
< num_cores
; i
++) {
7160 (*c
)[i
].core_id
= -1;
7161 (*c
)[i
].base_cpu
= -1;
7164 *p
= calloc(topo
.num_packages
, sizeof(struct pkg_data
));
7168 for (i
= 0; i
< topo
.num_packages
; i
++) {
7169 (*p
)[i
].package_id
= i
;
7170 (*p
)[i
].base_cpu
= -1;
7175 err(1, "calloc counters");
7181 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
7183 void init_counter(struct thread_data
*thread_base
, struct core_data
*core_base
, struct pkg_data
*pkg_base
, int cpu_id
)
7185 int pkg_id
= cpus
[cpu_id
].physical_package_id
;
7186 int node_id
= cpus
[cpu_id
].logical_node_id
;
7187 int core_id
= cpus
[cpu_id
].physical_core_id
;
7188 int thread_id
= cpus
[cpu_id
].thread_id
;
7189 struct thread_data
*t
;
7190 struct core_data
*c
;
7193 /* Workaround for systems where physical_node_id==-1
7194 * and logical_node_id==(-1 - topo.num_cpus)
7199 t
= GET_THREAD(thread_base
, thread_id
, core_id
, node_id
, pkg_id
);
7200 c
= GET_CORE(core_base
, core_id
, node_id
, pkg_id
);
7201 p
= GET_PKG(pkg_base
, pkg_id
);
7204 if (!cpu_is_not_allowed(cpu_id
)) {
7205 if (c
->base_cpu
< 0)
7206 c
->base_cpu
= t
->cpu_id
;
7207 if (p
->base_cpu
< 0)
7208 p
->base_cpu
= t
->cpu_id
;
7211 c
->core_id
= core_id
;
7212 p
->package_id
= pkg_id
;
7215 int initialize_counters(int cpu_id
)
7217 init_counter(EVEN_COUNTERS
, cpu_id
);
7218 init_counter(ODD_COUNTERS
, cpu_id
);
7222 void allocate_output_buffer()
7224 output_buffer
= calloc(1, (1 + topo
.num_cpus
) * 2048);
7225 outp
= output_buffer
;
7227 err(-1, "calloc output buffer");
7230 void allocate_fd_percpu(void)
7232 fd_percpu
= calloc(topo
.max_cpu_num
+ 1, sizeof(int));
7233 if (fd_percpu
== NULL
)
7234 err(-1, "calloc fd_percpu");
7237 void allocate_irq_buffers(void)
7239 irq_column_2_cpu
= calloc(topo
.num_cpus
, sizeof(int));
7240 if (irq_column_2_cpu
== NULL
)
7241 err(-1, "calloc %d", topo
.num_cpus
);
7243 irqs_per_cpu
= calloc(topo
.max_cpu_num
+ 1, sizeof(int));
7244 if (irqs_per_cpu
== NULL
)
7245 err(-1, "calloc %d", topo
.max_cpu_num
+ 1);
7248 int update_topo(struct thread_data
*t
, struct core_data
*c
, struct pkg_data
*p
)
7250 topo
.allowed_cpus
++;
7251 if ((int)t
->cpu_id
== c
->base_cpu
)
7252 topo
.allowed_cores
++;
7253 if ((int)t
->cpu_id
== p
->base_cpu
)
7254 topo
.allowed_packages
++;
7259 void topology_update(void)
7261 topo
.allowed_cpus
= 0;
7262 topo
.allowed_cores
= 0;
7263 topo
.allowed_packages
= 0;
7264 for_all_cpus(update_topo
, ODD_COUNTERS
);
7267 void setup_all_buffers(bool startup
)
7269 topology_probe(startup
);
7270 allocate_irq_buffers();
7271 allocate_fd_percpu();
7272 allocate_counters(&thread_even
, &core_even
, &package_even
);
7273 allocate_counters(&thread_odd
, &core_odd
, &package_odd
);
7274 allocate_output_buffer();
7275 for_all_proc_cpus(initialize_counters
);
7279 void set_base_cpu(void)
7283 for (i
= 0; i
< topo
.max_cpu_num
+ 1; ++i
) {
7284 if (cpu_is_not_allowed(i
))
7288 fprintf(outf
, "base_cpu = %d\n", base_cpu
);
7291 err(-ENODEV
, "No valid cpus found");
7294 static void set_amperf_source(void)
7296 amperf_source
= AMPERF_SOURCE_PERF
;
7298 const bool aperf_required
= is_aperf_access_required();
7299 if (no_perf
|| !aperf_required
|| !has_amperf_access_via_perf())
7300 amperf_source
= AMPERF_SOURCE_MSR
;
7302 if (quiet
|| !debug
)
7305 fprintf(outf
, "aperf/mperf source preference: %s\n", amperf_source
== AMPERF_SOURCE_MSR
? "msr" : "perf");
7308 bool has_added_counters(void)
7311 * It only makes sense to call this after the command line is parsed,
7312 * otherwise sys structure is not populated.
7315 return sys
.added_core_counters
| sys
.added_thread_counters
| sys
.added_package_counters
;
7318 bool is_msr_access_required(void)
7323 if (has_added_counters())
7326 return BIC_IS_ENABLED(BIC_SMI
)
7327 || BIC_IS_ENABLED(BIC_CPU_c1
)
7328 || BIC_IS_ENABLED(BIC_CPU_c3
)
7329 || BIC_IS_ENABLED(BIC_CPU_c6
)
7330 || BIC_IS_ENABLED(BIC_CPU_c7
)
7331 || BIC_IS_ENABLED(BIC_Mod_c6
)
7332 || BIC_IS_ENABLED(BIC_CoreTmp
)
7333 || BIC_IS_ENABLED(BIC_Totl_c0
)
7334 || BIC_IS_ENABLED(BIC_Any_c0
)
7335 || BIC_IS_ENABLED(BIC_GFX_c0
)
7336 || BIC_IS_ENABLED(BIC_CPUGFX
)
7337 || BIC_IS_ENABLED(BIC_Pkgpc3
)
7338 || BIC_IS_ENABLED(BIC_Pkgpc6
)
7339 || BIC_IS_ENABLED(BIC_Pkgpc2
)
7340 || BIC_IS_ENABLED(BIC_Pkgpc7
)
7341 || BIC_IS_ENABLED(BIC_Pkgpc8
)
7342 || BIC_IS_ENABLED(BIC_Pkgpc9
)
7343 || BIC_IS_ENABLED(BIC_Pkgpc10
)
7344 /* TODO: Multiplex access with perf */
7345 || BIC_IS_ENABLED(BIC_CorWatt
)
7346 || BIC_IS_ENABLED(BIC_Cor_J
)
7347 || BIC_IS_ENABLED(BIC_PkgWatt
)
7348 || BIC_IS_ENABLED(BIC_CorWatt
)
7349 || BIC_IS_ENABLED(BIC_GFXWatt
)
7350 || BIC_IS_ENABLED(BIC_RAMWatt
)
7351 || BIC_IS_ENABLED(BIC_Pkg_J
)
7352 || BIC_IS_ENABLED(BIC_Cor_J
)
7353 || BIC_IS_ENABLED(BIC_GFX_J
)
7354 || BIC_IS_ENABLED(BIC_RAM_J
)
7355 || BIC_IS_ENABLED(BIC_PKG__
)
7356 || BIC_IS_ENABLED(BIC_RAM__
)
7357 || BIC_IS_ENABLED(BIC_PkgTmp
)
7358 || (is_aperf_access_required() && !has_amperf_access_via_perf());
7361 void check_msr_access(void)
7363 if (!is_msr_access_required())
7367 check_msr_permission();
7370 bic_disable_msr_access();
7373 void check_perf_access(void)
7375 const bool intrcount_required
= BIC_IS_ENABLED(BIC_IPC
);
7376 if (no_perf
|| !intrcount_required
|| !has_instr_count_access())
7377 bic_enabled
&= ~BIC_IPC
;
7379 const bool aperf_required
= is_aperf_access_required();
7380 if (!aperf_required
|| !has_amperf_access()) {
7381 bic_enabled
&= ~BIC_Avg_MHz
;
7382 bic_enabled
&= ~BIC_Busy
;
7383 bic_enabled
&= ~BIC_Bzy_MHz
;
7384 bic_enabled
&= ~BIC_IPC
;
7388 void turbostat_init()
7390 setup_all_buffers(true);
7393 check_perf_access();
7395 probe_pm_features();
7396 set_amperf_source();
7400 for_all_cpus(get_cpu_type
, ODD_COUNTERS
);
7401 for_all_cpus(get_cpu_type
, EVEN_COUNTERS
);
7403 if (DO_BIC(BIC_IPC
))
7404 (void)get_instr_count_fd(base_cpu
);
7407 * If TSC tweak is needed, but couldn't get it,
7408 * disable more BICs, since it can't be reported accurately.
7410 if (platform
->enable_tsc_tweak
&& !has_base_hz
) {
7411 bic_enabled
&= ~BIC_Busy
;
7412 bic_enabled
&= ~BIC_Bzy_MHz
;
7416 int fork_it(char **argv
)
7421 snapshot_proc_sysfs_files();
7422 status
= for_all_cpus(get_counters
, EVEN_COUNTERS
);
7423 first_counter_read
= 0;
7426 gettimeofday(&tv_even
, (struct timezone
*)NULL
);
7431 execvp(argv
[0], argv
);
7432 err(errno
, "exec %s", argv
[0]);
7436 if (child_pid
== -1)
7439 signal(SIGINT
, SIG_IGN
);
7440 signal(SIGQUIT
, SIG_IGN
);
7441 if (waitpid(child_pid
, &status
, 0) == -1)
7442 err(status
, "waitpid");
7444 if (WIFEXITED(status
))
7445 status
= WEXITSTATUS(status
);
7448 * n.b. fork_it() does not check for errors from for_all_cpus()
7449 * because re-starting is problematic when forking
7451 snapshot_proc_sysfs_files();
7452 for_all_cpus(get_counters
, ODD_COUNTERS
);
7453 gettimeofday(&tv_odd
, (struct timezone
*)NULL
);
7454 timersub(&tv_odd
, &tv_even
, &tv_delta
);
7455 if (for_all_cpus_2(delta_cpu
, ODD_COUNTERS
, EVEN_COUNTERS
))
7456 fprintf(outf
, "%s: Counter reset detected\n", progname
);
7458 compute_average(EVEN_COUNTERS
);
7459 format_all_counters(EVEN_COUNTERS
);
7462 fprintf(outf
, "%.6f sec\n", tv_delta
.tv_sec
+ tv_delta
.tv_usec
/ 1000000.0);
7464 flush_output_stderr();
7469 int get_and_dump_counters(void)
7473 snapshot_proc_sysfs_files();
7474 status
= for_all_cpus(get_counters
, ODD_COUNTERS
);
7478 status
= for_all_cpus(dump_counters
, ODD_COUNTERS
);
7482 flush_output_stdout();
7487 void print_version()
7489 fprintf(outf
, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
7492 #define COMMAND_LINE_SIZE 2048
7494 void print_bootcmd(void)
7496 char bootcmd
[COMMAND_LINE_SIZE
];
7500 memset(bootcmd
, 0, COMMAND_LINE_SIZE
);
7501 fp
= fopen("/proc/cmdline", "r");
7505 ret
= fread(bootcmd
, sizeof(char), COMMAND_LINE_SIZE
- 1, fp
);
7507 bootcmd
[ret
] = '\0';
7508 /* the last character is already '\n' */
7509 fprintf(outf
, "Kernel command line: %s", bootcmd
);
7515 int add_counter(unsigned int msr_num
, char *path
, char *name
,
7516 unsigned int width
, enum counter_scope scope
,
7517 enum counter_type type
, enum counter_format format
, int flags
)
7519 struct msr_counter
*msrp
;
7521 if (no_msr
&& msr_num
)
7522 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num
);
7524 msrp
= calloc(1, sizeof(struct msr_counter
));
7530 msrp
->msr_num
= msr_num
;
7531 strncpy(msrp
->name
, name
, NAME_BYTES
- 1);
7533 strncpy(msrp
->path
, path
, PATH_BYTES
- 1);
7534 msrp
->width
= width
;
7536 msrp
->format
= format
;
7537 msrp
->flags
= flags
;
7542 msrp
->next
= sys
.tp
;
7544 sys
.added_thread_counters
++;
7545 if (sys
.added_thread_counters
> MAX_ADDED_THREAD_COUNTERS
) {
7546 fprintf(stderr
, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS
);
7552 msrp
->next
= sys
.cp
;
7554 sys
.added_core_counters
++;
7555 if (sys
.added_core_counters
> MAX_ADDED_COUNTERS
) {
7556 fprintf(stderr
, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS
);
7562 msrp
->next
= sys
.pp
;
7564 sys
.added_package_counters
++;
7565 if (sys
.added_package_counters
> MAX_ADDED_COUNTERS
) {
7566 fprintf(stderr
, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS
);
7575 void parse_add_command(char *add_command
)
7579 char name_buffer
[NAME_BYTES
] = "";
7582 enum counter_scope scope
= SCOPE_CPU
;
7583 enum counter_type type
= COUNTER_CYCLES
;
7584 enum counter_format format
= FORMAT_DELTA
;
7586 while (add_command
) {
7588 if (sscanf(add_command
, "msr0x%x", &msr_num
) == 1)
7591 if (sscanf(add_command
, "msr%d", &msr_num
) == 1)
7594 if (*add_command
== '/') {
7599 if (sscanf(add_command
, "u%d", &width
) == 1) {
7600 if ((width
== 32) || (width
== 64))
7604 if (!strncmp(add_command
, "cpu", strlen("cpu"))) {
7608 if (!strncmp(add_command
, "core", strlen("core"))) {
7612 if (!strncmp(add_command
, "package", strlen("package"))) {
7613 scope
= SCOPE_PACKAGE
;
7616 if (!strncmp(add_command
, "cycles", strlen("cycles"))) {
7617 type
= COUNTER_CYCLES
;
7620 if (!strncmp(add_command
, "seconds", strlen("seconds"))) {
7621 type
= COUNTER_SECONDS
;
7624 if (!strncmp(add_command
, "usec", strlen("usec"))) {
7625 type
= COUNTER_USEC
;
7628 if (!strncmp(add_command
, "raw", strlen("raw"))) {
7629 format
= FORMAT_RAW
;
7632 if (!strncmp(add_command
, "delta", strlen("delta"))) {
7633 format
= FORMAT_DELTA
;
7636 if (!strncmp(add_command
, "percent", strlen("percent"))) {
7637 format
= FORMAT_PERCENT
;
7641 if (sscanf(add_command
, "%18s,%*s", name_buffer
) == 1) { /* 18 < NAME_BYTES */
7644 eos
= strchr(name_buffer
, ',');
7651 add_command
= strchr(add_command
, ',');
7653 *add_command
= '\0';
7658 if ((msr_num
== 0) && (path
== NULL
)) {
7659 fprintf(stderr
, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
7663 /* generate default column header */
7664 if (*name_buffer
== '\0') {
7666 sprintf(name_buffer
, "M0x%x%s", msr_num
, format
== FORMAT_PERCENT
? "%" : "");
7668 sprintf(name_buffer
, "M0X%x%s", msr_num
, format
== FORMAT_PERCENT
? "%" : "");
7671 if (add_counter(msr_num
, path
, name_buffer
, width
, scope
, type
, format
, 0))
7680 int is_deferred_add(char *name
)
7684 for (i
= 0; i
< deferred_add_index
; ++i
)
7685 if (!strcmp(name
, deferred_add_names
[i
]))
7690 int is_deferred_skip(char *name
)
7694 for (i
= 0; i
< deferred_skip_index
; ++i
)
7695 if (!strcmp(name
, deferred_skip_names
[i
]))
7700 void probe_sysfs(void)
7708 for (state
= 10; state
>= 0; --state
) {
7710 sprintf(path
, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu
, state
);
7711 input
= fopen(path
, "r");
7714 if (!fgets(name_buf
, sizeof(name_buf
), input
))
7715 err(1, "%s: failed to read file", path
);
7717 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
7718 sp
= strchr(name_buf
, '-');
7720 sp
= strchrnul(name_buf
, '\n');
7724 remove_underbar(name_buf
);
7728 sprintf(path
, "cpuidle/state%d/time", state
);
7730 if (!DO_BIC(BIC_sysfs
) && !is_deferred_add(name_buf
))
7733 if (is_deferred_skip(name_buf
))
7736 add_counter(0, path
, name_buf
, 64, SCOPE_CPU
, COUNTER_USEC
, FORMAT_PERCENT
, SYSFS_PERCPU
);
7739 for (state
= 10; state
>= 0; --state
) {
7741 sprintf(path
, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu
, state
);
7742 input
= fopen(path
, "r");
7745 if (!fgets(name_buf
, sizeof(name_buf
), input
))
7746 err(1, "%s: failed to read file", path
);
7747 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
7748 sp
= strchr(name_buf
, '-');
7750 sp
= strchrnul(name_buf
, '\n');
7754 remove_underbar(name_buf
);
7756 sprintf(path
, "cpuidle/state%d/usage", state
);
7758 if (!DO_BIC(BIC_sysfs
) && !is_deferred_add(name_buf
))
7761 if (is_deferred_skip(name_buf
))
7764 add_counter(0, path
, name_buf
, 64, SCOPE_CPU
, COUNTER_ITEMS
, FORMAT_DELTA
, SYSFS_PERCPU
);
7770 * parse cpuset with following syntax
7771 * 1,2,4..6,8-10 and set bits in cpu_subset
7773 void parse_cpu_command(char *optarg
)
7775 if (!strcmp(optarg
, "core")) {
7781 if (!strcmp(optarg
, "package")) {
7787 if (show_core_only
|| show_pkg_only
)
7790 cpu_subset
= CPU_ALLOC(CPU_SUBSET_MAXCPUS
);
7791 if (cpu_subset
== NULL
)
7792 err(3, "CPU_ALLOC");
7793 cpu_subset_size
= CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS
);
7795 CPU_ZERO_S(cpu_subset_size
, cpu_subset
);
7797 if (parse_cpu_str(optarg
, cpu_subset
, cpu_subset_size
))
7803 fprintf(stderr
, "\"--cpu %s\" malformed\n", optarg
);
7808 void cmdline(int argc
, char **argv
)
7811 int option_index
= 0;
7812 static struct option long_options
[] = {
7813 { "add", required_argument
, 0, 'a' },
7814 { "cpu", required_argument
, 0, 'c' },
7815 { "Dump", no_argument
, 0, 'D' },
7816 { "debug", no_argument
, 0, 'd' }, /* internal, not documented */
7817 { "enable", required_argument
, 0, 'e' },
7818 { "interval", required_argument
, 0, 'i' },
7819 { "IPC", no_argument
, 0, 'I' },
7820 { "num_iterations", required_argument
, 0, 'n' },
7821 { "header_iterations", required_argument
, 0, 'N' },
7822 { "help", no_argument
, 0, 'h' },
7823 { "hide", required_argument
, 0, 'H' }, // meh, -h taken by --help
7824 { "Joules", no_argument
, 0, 'J' },
7825 { "list", no_argument
, 0, 'l' },
7826 { "out", required_argument
, 0, 'o' },
7827 { "quiet", no_argument
, 0, 'q' },
7828 { "no-msr", no_argument
, 0, 'M' },
7829 { "no-perf", no_argument
, 0, 'P' },
7830 { "show", required_argument
, 0, 's' },
7831 { "Summary", no_argument
, 0, 'S' },
7832 { "TCC", required_argument
, 0, 'T' },
7833 { "version", no_argument
, 0, 'v' },
7840 * Parse some options early, because they may make other options invalid,
7841 * like adding the MSR counter with --add and at the same time using --no-msr.
7843 while ((opt
= getopt_long_only(argc
, argv
, "MP", long_options
, &option_index
)) != -1) {
7857 while ((opt
= getopt_long_only(argc
, argv
, "+C:c:Dde:hi:Jn:o:qMST:v", long_options
, &option_index
)) != -1) {
7860 parse_add_command(optarg
);
7863 parse_cpu_command(optarg
);
7869 /* --enable specified counter */
7870 bic_enabled
= bic_enabled
| bic_lookup(optarg
, SHOW_LIST
);
7874 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT
);
7878 * --hide: do not show those specified
7879 * multiple invocations simply clear more bits in enabled mask
7881 bic_enabled
&= ~bic_lookup(optarg
, HIDE_LIST
);
7889 double interval
= strtod(optarg
, NULL
);
7891 if (interval
< 0.001) {
7892 fprintf(outf
, "interval %f seconds is too small\n", interval
);
7896 interval_tv
.tv_sec
= interval_ts
.tv_sec
= interval
;
7897 interval_tv
.tv_usec
= (interval
- interval_tv
.tv_sec
) * 1000000;
7898 interval_ts
.tv_nsec
= (interval
- interval_ts
.tv_sec
) * 1000000000;
7905 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT
);
7910 outf
= fopen_or_die(optarg
, "w");
7917 /* Parsed earlier */
7920 num_iterations
= strtod(optarg
, NULL
);
7922 if (num_iterations
<= 0) {
7923 fprintf(outf
, "iterations %d should be positive number\n", num_iterations
);
7928 header_iterations
= strtod(optarg
, NULL
);
7930 if (header_iterations
<= 0) {
7931 fprintf(outf
, "iterations %d should be positive number\n", header_iterations
);
7937 * --show: show only those specified
7938 * The 1st invocation will clear and replace the enabled mask
7939 * subsequent invocations can add to it.
7942 bic_enabled
= bic_lookup(optarg
, SHOW_LIST
);
7944 bic_enabled
|= bic_lookup(optarg
, SHOW_LIST
);
7951 tj_max_override
= atoi(optarg
);
7961 void set_rlimit(void)
7963 struct rlimit limit
;
7965 if (getrlimit(RLIMIT_NOFILE
, &limit
) < 0)
7966 err(1, "Failed to get rlimit");
7968 if (limit
.rlim_max
< MAX_NOFILE
)
7969 limit
.rlim_max
= MAX_NOFILE
;
7970 if (limit
.rlim_cur
< MAX_NOFILE
)
7971 limit
.rlim_cur
= MAX_NOFILE
;
7973 if (setrlimit(RLIMIT_NOFILE
, &limit
) < 0)
7974 err(1, "Failed to set rlimit");
7977 int main(int argc
, char **argv
)
7981 fd
= open("/sys/fs/cgroup/cgroup.procs", O_WRONLY
);
7983 goto skip_cgroup_setting
;
7985 ret
= write(fd
, "0\n", 2);
7987 perror("Can't update cgroup\n");
7991 skip_cgroup_setting
:
7993 cmdline(argc
, argv
);
8010 /* dump counters and exit */
8012 return get_and_dump_counters();
8014 /* list header and exit */
8015 if (list_header_only
) {
8017 flush_output_stdout();
8022 * if any params left, it must be a command to fork
8025 return fork_it(argv
+ optind
);