From fc807b6bde6ad332c333ae5be65e8bd1b5611be7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Apr 2025 10:41:33 -0700 Subject: [PATCH] perf pmu-events: Add retirement latency to JSON events inside of perf MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The updated Intel vendor events add retirement latency for graniterapids: https://lore.kernel.org/lkml/20250322063403.364981-14-irogers@google.com/ This change makes those values available within an alias/event within a PMU and saves them into the evsel at event parse time. When no TPEBS data is available the default values are substituted in for TMA metrics that are using retirement latency events - currently just those on graniterapids. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Weilin Wang Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Torgue Cc: Andreas Färber Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Manivannan Sadhasivam Cc: Mark Rutland Cc: Maxime Coquelin Cc: Perry Taylor Cc: Peter Zijlstra Cc: Thomas Falcon Link: https://lore.kernel.org/r/20250414174134.3095492-16-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/empty-pmu-events.c | 216 ++++++++++++----------- tools/perf/pmu-events/jevents.py | 6 + tools/perf/pmu-events/pmu-events.h | 3 + tools/perf/util/evsel.h | 6 + tools/perf/util/intel-tpebs.c | 52 ++++-- tools/perf/util/parse-events.c | 4 + tools/perf/util/pmu.c | 52 +++++- tools/perf/util/pmu.h | 3 + 8 files changed, 220 insertions(+), 122 deletions(-) diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index 0cb7ba7912e8a..0361bcc1eb586 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -20,73 +20,73 @@ struct pmu_table_entry { static const char *const big_c_string = /* offset=0 */ "tool\000" -/* offset=5 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000" -/* offset=78 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000" -/* offset=145 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000" -/* offset=210 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000" -/* offset=283 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000" -/* offset=425 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000" -/* offset=525 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000" -/* offset=639 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000" -/* offset=712 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000" -/* offset=795 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000" -/* offset=902 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000" -/* offset=1006 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000" -/* offset=1102 */ "default_core\000" -/* offset=1115 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" -/* offset=1174 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" -/* offset=1233 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" -/* offset=1328 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" -/* offset=1427 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" -/* offset=1557 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" -/* offset=1672 */ "hisi_sccl,ddrc\000" -/* offset=1687 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" -/* offset=1773 */ "uncore_cbox\000" -/* offset=1785 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" -/* offset=2016 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" -/* offset=2081 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" -/* offset=2152 */ "hisi_sccl,l3c\000" -/* offset=2166 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" -/* offset=2246 */ "uncore_imc_free_running\000" -/* offset=2270 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" -/* offset=2365 */ "uncore_imc\000" -/* offset=2376 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" -/* offset=2454 */ "uncore_sys_ddr_pmu\000" -/* offset=2473 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" -/* offset=2546 */ "uncore_sys_ccn_pmu\000" -/* offset=2565 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" -/* offset=2639 */ "uncore_sys_cmn_pmu\000" -/* offset=2658 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" -/* offset=2798 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=2820 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=2883 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=3049 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=3113 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=3180 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=3251 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=3345 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=3479 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=3543 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=3611 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=3681 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=3703 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=3725 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=3745 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=5 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000" +/* offset=81 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000" +/* offset=151 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000" +/* offset=219 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000" +/* offset=295 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000" +/* offset=440 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000" +/* offset=543 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000" +/* offset=660 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000" +/* offset=736 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000" +/* offset=822 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000" +/* offset=932 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000" +/* offset=1039 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000" +/* offset=1138 */ "default_core\000" +/* offset=1151 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000" +/* offset=1213 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000" +/* offset=1275 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000" +/* offset=1373 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000" +/* offset=1475 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000" +/* offset=1608 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000" +/* offset=1726 */ "hisi_sccl,ddrc\000" +/* offset=1741 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000DDRC write commands\000" +/* offset=1830 */ "uncore_cbox\000" +/* offset=1842 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" +/* offset=2076 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000UNC_CBO_HYPHEN\000" +/* offset=2144 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000UNC_CBO_TWO_HYPH\000" +/* offset=2218 */ "hisi_sccl,l3c\000" +/* offset=2232 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000Total read hits\000" +/* offset=2315 */ "uncore_imc_free_running\000" +/* offset=2339 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000Total cache misses\000" +/* offset=2437 */ "uncore_imc\000" +/* offset=2448 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000Total cache hits\000" +/* offset=2529 */ "uncore_sys_ddr_pmu\000" +/* offset=2548 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000" +/* offset=2624 */ "uncore_sys_ccn_pmu\000" +/* offset=2643 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" +/* offset=2720 */ "uncore_sys_cmn_pmu\000" +/* offset=2739 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" +/* offset=2882 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" +/* offset=2904 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" +/* offset=2967 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" +/* offset=3133 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3197 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3264 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" +/* offset=3335 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" +/* offset=3429 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" +/* offset=3563 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" +/* offset=3627 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3695 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3765 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" +/* offset=3787 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" +/* offset=3809 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" +/* offset=3829 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" ; static const struct compact_pmu_event pmu_events__common_tool[] = { -{ 5 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000 */ -{ 210 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000 */ -{ 283 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000 */ -{ 425 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000 */ -{ 525 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000 */ -{ 639 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000 */ -{ 712 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000 */ -{ 795 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000 */ -{ 902 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000 */ -{ 145 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000 */ -{ 1006 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000 */ -{ 78 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000 */ +{ 5 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000 */ +{ 219 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000 */ +{ 295 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000 */ +{ 440 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000 */ +{ 543 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000 */ +{ 660 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000 */ +{ 736 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000 */ +{ 822 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000 */ +{ 932 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000 */ +{ 151 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000 */ +{ 1039 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000 */ +{ 81 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000 */ }; @@ -99,29 +99,29 @@ const struct pmu_table_entry pmu_events__common[] = { }; static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = { -{ 1115 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ -{ 1174 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ -{ 1427 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ -{ 1557 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ -{ 1233 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ -{ 1328 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ +{ 1151 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000 */ +{ 1213 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000 */ +{ 1475 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000 */ +{ 1608 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000 */ +{ 1275 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000 */ +{ 1373 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = { -{ 1687 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ +{ 1741 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000DDRC write commands\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = { -{ 2166 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ +{ 2232 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000Total read hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = { -{ 2016 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ -{ 2081 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ -{ 1785 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ +{ 2076 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000UNC_CBO_HYPHEN\000 */ +{ 2144 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000UNC_CBO_TWO_HYPH\000 */ +{ 1842 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = { -{ 2376 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ +{ 2448 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000Total cache hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = { -{ 2270 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ +{ 2339 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000Total cache misses\000 */ }; @@ -129,51 +129,51 @@ const struct pmu_table_entry pmu_events__test_soc_cpu[] = { { .entries = pmu_events__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core), - .pmu_name = { 1102 /* default_core\000 */ }, + .pmu_name = { 1138 /* default_core\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc), - .pmu_name = { 1672 /* hisi_sccl,ddrc\000 */ }, + .pmu_name = { 1726 /* hisi_sccl,ddrc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c), - .pmu_name = { 2152 /* hisi_sccl,l3c\000 */ }, + .pmu_name = { 2218 /* hisi_sccl,l3c\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_cbox, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox), - .pmu_name = { 1773 /* uncore_cbox\000 */ }, + .pmu_name = { 1830 /* uncore_cbox\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc), - .pmu_name = { 2365 /* uncore_imc\000 */ }, + .pmu_name = { 2437 /* uncore_imc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc_free_running, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running), - .pmu_name = { 2246 /* uncore_imc_free_running\000 */ }, + .pmu_name = { 2315 /* uncore_imc_free_running\000 */ }, }, }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 2798 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 3479 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 3251 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 3345 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 3543 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 3611 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 2883 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 2820 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 3745 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 3681 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 3703 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 3725 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 3180 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 3049 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 3113 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 2882 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ +{ 3563 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ +{ 3335 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ +{ 3429 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ +{ 3627 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 3695 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 2967 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ +{ 2904 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ +{ 3829 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ +{ 3765 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ +{ 3787 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ +{ 3809 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ +{ 3264 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ +{ 3133 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 3197 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ }; @@ -181,18 +181,18 @@ const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { { .entries = pmu_metrics__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core), - .pmu_name = { 1102 /* default_core\000 */ }, + .pmu_name = { 1138 /* default_core\000 */ }, }, }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = { -{ 2565 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ +{ 2643 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = { -{ 2658 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ +{ 2739 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = { -{ 2473 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ +{ 2548 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */ }; @@ -200,17 +200,17 @@ const struct pmu_table_entry pmu_events__test_soc_sys[] = { { .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu), - .pmu_name = { 2546 /* uncore_sys_ccn_pmu\000 */ }, + .pmu_name = { 2624 /* uncore_sys_ccn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu), - .pmu_name = { 2639 /* uncore_sys_cmn_pmu\000 */ }, + .pmu_name = { 2720 /* uncore_sys_cmn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu), - .pmu_name = { 2454 /* uncore_sys_ddr_pmu\000 */ }, + .pmu_name = { 2529 /* uncore_sys_ddr_pmu\000 */ }, }, }; @@ -316,6 +316,12 @@ static void decompress_event(int offset, struct pmu_event *pe) p++; pe->unit = (*p == '\0' ? NULL : p); while (*p++); + pe->retirement_latency_mean = (*p == '\0' ? NULL : p); + while (*p++); + pe->retirement_latency_min = (*p == '\0' ? NULL : p); + while (*p++); + pe->retirement_latency_max = (*p == '\0' ? NULL : p); + while (*p++); pe->long_desc = (*p == '\0' ? NULL : p); } diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 7499a35bfadd0..e3a55486c08e4 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -47,6 +47,9 @@ _json_event_attributes = [ 'event', # Short things in alphabetical order. 'compat', 'deprecated', 'perpkg', 'unit', + # Retirement latency specific to Intel granite rapids currently. + 'retirement_latency_mean', 'retirement_latency_min', + 'retirement_latency_max', # Longer things (the last won't be iterated over during decompress). 'long_desc' ] @@ -341,6 +344,9 @@ class JsonEvent: self.perpkg = jd.get('PerPkg') self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode')) self.deprecated = jd.get('Deprecated') + self.retirement_latency_mean = jd.get('RetirementLatencyMean') + self.retirement_latency_min = jd.get('RetirementLatencyMin') + self.retirement_latency_max = jd.get('RetirementLatencyMax') self.metric_name = jd.get('MetricName') self.metric_group = jd.get('MetricGroup') self.metricgroup_no_group = jd.get('MetricgroupNoGroup') diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 675562e6f7704..a95fee5616229 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -47,6 +47,9 @@ struct pmu_event { const char *long_desc; const char *pmu; const char *unit; + const char *retirement_latency_mean; + const char *retirement_latency_min; + const char *retirement_latency_max; bool perpkg; bool deprecated; }; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index aae431d63d647..42dcadfef8cee 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -177,6 +177,12 @@ struct evsel { /* For tool events */ /* Beginning time subtracted when the counter is read. */ union { + /* Defaults for retirement latency events. */ + struct _retirement_latency { + double mean; + double min; + double max; + } retirement_latency; /* duration_time is a single global time. */ __u64 start_time; /* diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 5a158395c7faa..7fd6cae1063e6 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -514,27 +514,49 @@ int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread) * process. Allow the sample reader a chance to read by releasing and * reacquiring the lock. */ - if (&t->nd == tpebs_results.next) { + if (t && &t->nd == tpebs_results.next) { ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG); mutex_unlock(tpebs_mtx_get()); if (ret) return ret; mutex_lock(tpebs_mtx_get()); } - switch (tpebs_mode) { - case TPEBS_MODE__MIN: - val = rint(t->stats.min); - break; - case TPEBS_MODE__MAX: - val = rint(t->stats.max); - break; - case TPEBS_MODE__LAST: - val = t->last; - break; - default: - case TPEBS_MODE__MEAN: - val = rint(t->stats.mean); - break; + if (t == NULL || t->stats.n == 0) { + /* No sample data, use default. */ + if (tpebs_recording) { + pr_warning_once( + "Using precomputed retirement latency data as no samples\n"); + } + val = 0; + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = rint(evsel->retirement_latency.min); + break; + case TPEBS_MODE__MAX: + val = rint(evsel->retirement_latency.max); + break; + default: + case TPEBS_MODE__LAST: + case TPEBS_MODE__MEAN: + val = rint(evsel->retirement_latency.mean); + break; + } + } else { + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = t->stats.min; + break; + case TPEBS_MODE__MAX: + val = t->stats.max; + break; + case TPEBS_MODE__LAST: + val = t->last; + break; + default: + case TPEBS_MODE__MEAN: + val = rint(t->stats.mean); + break; + } } mutex_unlock(tpebs_mtx_get()); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5152fd5a6eada..89708d1769c87 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1539,6 +1539,10 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->retirement_latency.mean = info.retirement_latency_mean; + evsel->retirement_latency.min = info.retirement_latency_min; + evsel->retirement_latency.max = info.retirement_latency_max; + return 0; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b7ebac5ab1d11..bbb906bb2159f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -77,6 +77,12 @@ struct perf_pmu_alias { char unit[UNIT_MAX_LEN+1]; /** @scale: Value to scale read counter values by. */ double scale; + /** @retirement_latency_mean: Value to be given for unsampled retirement latency mean. */ + double retirement_latency_mean; + /** @retirement_latency_min: Value to be given for unsampled retirement latency min. */ + double retirement_latency_min; + /** @retirement_latency_max: Value to be given for unsampled retirement latency max. */ + double retirement_latency_max; /** * @per_pkg: Does the file * /bus/event_source/devices//events/.per-pkg or @@ -257,7 +263,7 @@ static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name, bool ea return 0; } -int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +static int parse_double(const char *scale, char **end, double *sval) { char *lc; int ret = 0; @@ -294,6 +300,11 @@ out: return ret; } +int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +{ + return parse_double(scale, end, sval); +} + static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { struct stat st; @@ -525,6 +536,18 @@ static int update_alias(const struct pmu_event *pe, if (!ret) snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit); } + if (!ret && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &data->alias->retirement_latency_mean); + } + if (!ret && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &data->alias->retirement_latency_min); + } + if (!ret && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &data->alias->retirement_latency_max); + } return ret; } @@ -533,7 +556,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, const struct pmu_event *pe, enum event_source src) { struct perf_pmu_alias *alias; - int ret; + int ret = 0; const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL; bool deprecated = false, perpkg = false; @@ -562,6 +585,24 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, alias->per_pkg = perpkg; alias->snapshot = false; alias->deprecated = deprecated; + alias->retirement_latency_mean = 0.0; + alias->retirement_latency_min = 0.0; + alias->retirement_latency_max = 0.0; + + if (!ret && pe && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &alias->retirement_latency_mean); + } + if (!ret && pe && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &alias->retirement_latency_min); + } + if (!ret && pe && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &alias->retirement_latency_max); + } + if (ret) + return ret; ret = parse_events_terms(&alias->terms, val, val_fd); if (ret) { @@ -1678,6 +1719,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->retirement_latency_mean = 0.0; + info->retirement_latency_min = 0.0; + info->retirement_latency_max = 0.0; if (perf_pmu__is_hwmon(pmu)) { ret = hwmon_pmu__check_alias(head_terms, info, err); @@ -1711,6 +1755,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (term->alternate_hw_config) *alternate_hw_config = term->val.num; + info->retirement_latency_mean = alias->retirement_latency_mean; + info->retirement_latency_min = alias->retirement_latency_min; + info->retirement_latency_max = alias->retirement_latency_max; + list_del_init(&term->list); parse_events_term__delete(term); } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b93014cc3670e..13dd3511f5042 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -194,6 +194,9 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; double scale; + double retirement_latency_mean; + double retirement_latency_min; + double retirement_latency_max; bool per_pkg; bool snapshot; }; -- 2.39.5