From a3248b5b5427dc2126c19aa9c32f1e840b65024f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 11 Nov 2025 13:21:52 -0800 Subject: [PATCH] perf jevents: Add metric DefaultShowEvents Some Default group metrics require their events showing for consistency with perf's previous behavior. Add a flag to indicate when this is the case and use it in stat-display. As events are coming from Default metrics remove that default hardware and software events from perf stat. Following this change the default perf stat output on an alderlake looks like: ``` $ perf stat -a -- sleep 1 Performance counter stats for 'system wide': 20,550 context-switches # nan cs/sec cs_per_second TopdownL1 (cpu_core) # 9.0 % tma_bad_speculation # 28.1 % tma_frontend_bound TopdownL1 (cpu_core) # 29.2 % tma_backend_bound # 33.7 % tma_retiring 6,685 page-faults # nan faults/sec page_faults_per_second 790,091,064 cpu_atom/cpu-cycles/ # nan GHz cycles_frequency (49.83%) 2,563,918,366 cpu_core/cpu-cycles/ # nan GHz cycles_frequency # 12.3 % tma_bad_speculation # 14.5 % tma_retiring (50.20%) # 33.8 % tma_frontend_bound (50.24%) 76,390,322 cpu_atom/branches/ # nan M/sec branch_frequency (60.20%) 1,015,173,047 cpu_core/branches/ # nan M/sec branch_frequency 1,325 cpu-migrations # nan migrations/sec migrations_per_second # 39.3 % tma_backend_bound (60.17%) 0.00 msec cpu-clock # 0.000 CPUs utilized # 0.0 CPUs CPUs_utilized 554,347,072 cpu_atom/instructions/ # 0.64 insn per cycle # 0.6 instructions insn_per_cycle (60.14%) 5,228,931,991 cpu_core/instructions/ # 2.04 insn per cycle # 2.0 instructions insn_per_cycle 4,308,874 cpu_atom/branch-misses/ # 5.65% of all branches # 5.6 % branch_miss_rate (49.76%) 9,890,606 cpu_core/branch-misses/ # 0.97% of all branches # 1.0 % branch_miss_rate 1.005477803 seconds time elapsed ``` Signed-off-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-stat.c | 42 +------ .../arch/common/common/metrics.json | 33 ++++-- tools/perf/pmu-events/empty-pmu-events.c | 106 +++++++++--------- tools/perf/pmu-events/jevents.py | 7 +- tools/perf/pmu-events/pmu-events.h | 1 + tools/perf/util/evsel.h | 1 + tools/perf/util/metricgroup.c | 13 +++ tools/perf/util/stat-display.c | 4 +- tools/perf/util/stat-shadow.c | 2 +- 9 files changed, 102 insertions(+), 107 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3c46b92a53ab6..31c762695d4b0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1857,16 +1857,6 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } -/* Add given software event to evlist without wildcarding. */ -static int parse_software_event(struct evlist *evlist, const char *event, - struct parse_events_error *err) -{ - char buf[256]; - - snprintf(buf, sizeof(buf), "software/%s,name=%s/", event, event); - return parse_events(evlist, buf, err); -} - /* Add legacy hardware/hardware-cache event to evlist for all core PMUs without wildcarding. */ static int parse_hardware_event(struct evlist *evlist, const char *event, struct parse_events_error *err) @@ -2011,36 +2001,10 @@ static int add_default_events(void) stat_config.topdown_level = 1; if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { - /* No events so add defaults. */ - const char *sw_events[] = { - target__has_cpu(&target) ? "cpu-clock" : "task-clock", - "context-switches", - "cpu-migrations", - "page-faults", - }; - const char *hw_events[] = { - "instructions", - "cycles", - "stalled-cycles-frontend", - "stalled-cycles-backend", - "branches", - "branch-misses", - }; - - for (size_t i = 0; i < ARRAY_SIZE(sw_events); i++) { - ret = parse_software_event(evlist, sw_events[i], &err); - if (ret) - goto out; - } - for (size_t i = 0; i < ARRAY_SIZE(hw_events); i++) { - ret = parse_hardware_event(evlist, hw_events[i], &err); - if (ret) - goto out; - } - /* - * Add TopdownL1 metrics if they exist. To minimize - * multiplexing, don't request threshold computation. + * Add Default metrics. To minimize multiplexing, don't request + * threshold computation, but it will be computed if the events + * are present. */ if (metricgroup__has_metric_or_groups(pmu, "Default")) { struct evlist *metric_evlist = evlist__new(); diff --git a/tools/perf/pmu-events/arch/common/common/metrics.json b/tools/perf/pmu-events/arch/common/common/metrics.json index d915be51e300b..d6ea967a40453 100644 --- a/tools/perf/pmu-events/arch/common/common/metrics.json +++ b/tools/perf/pmu-events/arch/common/common/metrics.json @@ -5,7 +5,8 @@ "MetricGroup": "Default", "MetricName": "CPUs_utilized", "ScaleUnit": "1CPUs", - "MetricConstraint": "NO_GROUP_EVENTS" + "MetricConstraint": "NO_GROUP_EVENTS", + "DefaultShowEvents": "1" }, { "BriefDescription": "Context switches per CPU second", @@ -13,7 +14,8 @@ "MetricGroup": "Default", "MetricName": "cs_per_second", "ScaleUnit": "1cs/sec", - "MetricConstraint": "NO_GROUP_EVENTS" + "MetricConstraint": "NO_GROUP_EVENTS", + "DefaultShowEvents": "1" }, { "BriefDescription": "Process migrations to a new CPU per CPU second", @@ -21,7 +23,8 @@ "MetricGroup": "Default", "MetricName": "migrations_per_second", "ScaleUnit": "1migrations/sec", - "MetricConstraint": "NO_GROUP_EVENTS" + "MetricConstraint": "NO_GROUP_EVENTS", + "DefaultShowEvents": "1" }, { "BriefDescription": "Page faults per CPU second", @@ -29,7 +32,8 @@ "MetricGroup": "Default", "MetricName": "page_faults_per_second", "ScaleUnit": "1faults/sec", - "MetricConstraint": "NO_GROUP_EVENTS" + "MetricConstraint": "NO_GROUP_EVENTS", + "DefaultShowEvents": "1" }, { "BriefDescription": "Instructions Per Cycle", @@ -37,27 +41,31 @@ "MetricGroup": "Default", "MetricName": "insn_per_cycle", "MetricThreshold": "insn_per_cycle < 1", - "ScaleUnit": "1instructions" + "ScaleUnit": "1instructions", + "DefaultShowEvents": "1" }, { "BriefDescription": "Max front or backend stalls per instruction", "MetricExpr": "max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions", "MetricGroup": "Default", - "MetricName": "stalled_cycles_per_instruction" + "MetricName": "stalled_cycles_per_instruction", + "DefaultShowEvents": "1" }, { "BriefDescription": "Frontend stalls per cycle", "MetricExpr": "stalled\\-cycles\\-frontend / cpu\\-cycles", "MetricGroup": "Default", "MetricName": "frontend_cycles_idle", - "MetricThreshold": "frontend_cycles_idle > 0.1" + "MetricThreshold": "frontend_cycles_idle > 0.1", + "DefaultShowEvents": "1" }, { "BriefDescription": "Backend stalls per cycle", "MetricExpr": "stalled\\-cycles\\-backend / cpu\\-cycles", "MetricGroup": "Default", "MetricName": "backend_cycles_idle", - "MetricThreshold": "backend_cycles_idle > 0.2" + "MetricThreshold": "backend_cycles_idle > 0.2", + "DefaultShowEvents": "1" }, { "BriefDescription": "Cycles per CPU second", @@ -65,7 +73,8 @@ "MetricGroup": "Default", "MetricName": "cycles_frequency", "ScaleUnit": "1GHz", - "MetricConstraint": "NO_GROUP_EVENTS" + "MetricConstraint": "NO_GROUP_EVENTS", + "DefaultShowEvents": "1" }, { "BriefDescription": "Branches per CPU second", @@ -73,7 +82,8 @@ "MetricGroup": "Default", "MetricName": "branch_frequency", "ScaleUnit": "1000M/sec", - "MetricConstraint": "NO_GROUP_EVENTS" + "MetricConstraint": "NO_GROUP_EVENTS", + "DefaultShowEvents": "1" }, { "BriefDescription": "Branch miss rate", @@ -81,6 +91,7 @@ "MetricGroup": "Default", "MetricName": "branch_miss_rate", "MetricThreshold": "branch_miss_rate > 0.05", - "ScaleUnit": "100%" + "ScaleUnit": "100%", + "DefaultShowEvents": "1" } ] diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index e4d00f6b2b5d9..333a449309103 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -1303,32 +1303,32 @@ static const char *const big_c_string = /* offset=127519 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" /* offset=127596 */ "uncore_sys_cmn_pmu\000" /* offset=127615 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" -/* offset=127758 */ "CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\00001" -/* offset=127943 */ "cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\00001" -/* offset=128175 */ "migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\00001" -/* offset=128434 */ "page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\00001" -/* offset=128664 */ "insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\00000" -/* offset=128776 */ "stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\00000" -/* offset=128939 */ "frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\00000" -/* offset=129068 */ "backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\00000" -/* offset=129193 */ "cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\00001" -/* offset=129368 */ "branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\00001" -/* offset=129547 */ "branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\00000" -/* offset=129650 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=129672 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=129735 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=129901 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=129965 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=130032 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=130103 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=130197 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=130331 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=130395 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=130463 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=130533 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=130555 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=130577 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=130597 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=127758 */ "CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\000011" +/* offset=127944 */ "cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\000011" +/* offset=128177 */ "migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\000011" +/* offset=128437 */ "page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\000011" +/* offset=128668 */ "insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\000001" +/* offset=128781 */ "stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\000001" +/* offset=128945 */ "frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\000001" +/* offset=129075 */ "backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\000001" +/* offset=129201 */ "cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\000011" +/* offset=129377 */ "branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\000011" +/* offset=129557 */ "branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\000001" +/* offset=129661 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\000000" +/* offset=129684 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\000000" +/* offset=129748 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\000000" +/* offset=129915 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000" +/* offset=129980 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000" +/* offset=130048 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\000000" +/* offset=130120 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\000000" +/* offset=130215 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\000000" +/* offset=130350 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\000000" +/* offset=130415 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\000000" +/* offset=130484 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\000000" +/* offset=130555 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\000000" +/* offset=130578 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\000000" +/* offset=130601 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\000000" +/* offset=130622 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\000000" ; static const struct compact_pmu_event pmu_events__common_default_core[] = { @@ -2615,17 +2615,17 @@ static const struct pmu_table_entry pmu_events__common[] = { }; static const struct compact_pmu_event pmu_metrics__common_default_core[] = { -{ 127758 }, /* CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\00001 */ -{ 129068 }, /* backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\00000 */ -{ 129368 }, /* branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\00001 */ -{ 129547 }, /* branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\00000 */ -{ 127943 }, /* cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\00001 */ -{ 129193 }, /* cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\00001 */ -{ 128939 }, /* frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\00000 */ -{ 128664 }, /* insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\00000 */ -{ 128175 }, /* migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\00001 */ -{ 128434 }, /* page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\00001 */ -{ 128776 }, /* stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\00000 */ +{ 127758 }, /* CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\000011 */ +{ 129075 }, /* backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\000001 */ +{ 129377 }, /* branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\000011 */ +{ 129557 }, /* branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\000001 */ +{ 127944 }, /* cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\000011 */ +{ 129201 }, /* cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\000011 */ +{ 128945 }, /* frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\000001 */ +{ 128668 }, /* insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\000001 */ +{ 128177 }, /* migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\000011 */ +{ 128437 }, /* page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\000011 */ +{ 128781 }, /* stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\000001 */ }; @@ -2698,21 +2698,21 @@ static const struct pmu_table_entry pmu_events__test_soc_cpu[] = { }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 129650 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 130331 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 130103 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 130197 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 130395 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 130463 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 129735 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 129672 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 130597 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 130533 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 130555 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 130577 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 130032 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 129901 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 129965 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 129661 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\000000 */ +{ 130350 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\000000 */ +{ 130120 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\000000 */ +{ 130215 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\000000 */ +{ 130415 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\000000 */ +{ 130484 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\000000 */ +{ 129748 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\000000 */ +{ 129684 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\000000 */ +{ 130622 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\000000 */ +{ 130555 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\000000 */ +{ 130578 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\000000 */ +{ 130601 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\000000 */ +{ 130048 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\000000 */ +{ 129915 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000 */ +{ 129980 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000 */ }; @@ -2894,6 +2894,8 @@ static void decompress_metric(int offset, struct pmu_metric *pm) pm->aggr_mode = *p - '0'; p++; pm->event_grouping = *p - '0'; + p++; + pm->default_show_events = *p - '0'; } static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table, diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 5d3f4b44cfb7f..3413ee5d02274 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -58,10 +58,12 @@ _json_event_attributes = [ _json_metric_attributes = [ 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group', - 'default_metricgroup_name', 'aggr_mode', 'event_grouping' + 'default_metricgroup_name', 'aggr_mode', 'event_grouping', + 'default_show_events' ] # Attributes that are bools or enum int values, encoded as '0', '1',... -_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg'] +_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg', + 'default_show_events'] def removesuffix(s: str, suffix: str) -> str: """Remove the suffix from a string @@ -356,6 +358,7 @@ class JsonEvent: self.metricgroup_no_group = jd.get('MetricgroupNoGroup') self.default_metricgroup_name = jd.get('DefaultMetricgroupName') self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint')) + self.default_show_events = jd.get('DefaultShowEvents') self.metric_expr = None if 'MetricExpr' in jd: self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify() diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 559265a903c85..d3b24014c6ff1 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -74,6 +74,7 @@ struct pmu_metric { const char *default_metricgroup_name; enum aggr_mode_class aggr_mode; enum metric_event_groups event_grouping; + bool default_show_events; }; struct pmu_events_table; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 71f74c7036efb..3ae4ac8f9a37e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -122,6 +122,7 @@ struct evsel { bool reset_group; bool needs_auxtrace_mmap; bool default_metricgroup; /* A member of the Default metricgroup */ + bool default_show_events; /* If a default group member, show the event */ bool needs_uniquify; struct hashmap *per_pkg_mask; int err; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index e67e04ce01c99..25c75fdbfc525 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -152,6 +152,8 @@ struct metric { * Should events of the metric be grouped? */ bool group_events; + /** Show events even if in the Default metric group. */ + bool default_show_events; /** * Parsed events for the metric. Optional as events may be taken from a * different metric whose group contains all the IDs necessary for this @@ -255,6 +257,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold); + m->default_show_events = pm->default_show_events; m->metric_refs = NULL; m->evlist = NULL; @@ -1513,6 +1516,16 @@ static int parse_groups(struct evlist *perf_evlist, free(metric_events); goto out; } + if (m->default_show_events) { + struct evsel *pos; + + for (int i = 0; metric_events[i]; i++) + metric_events[i]->default_show_events = true; + evlist__for_each_entry(metric_evlist, pos) { + if (pos->metric_leader && pos->metric_leader->default_show_events) + pos->default_show_events = true; + } + } expr->metric_threshold = m->metric_threshold; expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a67b991f4e810..4d0e353846ead 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -872,7 +872,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, out.ctx = os; out.force_header = false; - if (!config->metric_only && !counter->default_metricgroup) { + if (!config->metric_only && (!counter->default_metricgroup || counter->default_show_events)) { abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok); print_noise(config, os, counter, noise, /*before_metric=*/true); @@ -880,7 +880,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, } if (ok) { - if (!config->metric_only && counter->default_metricgroup) { + if (!config->metric_only && counter->default_metricgroup && !counter->default_show_events) { void *from = NULL; aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index abaf6b579bfc2..4df614f8e2005 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -665,7 +665,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, if (strcmp(name, mexp->default_metricgroup_name)) return (void *)mexp; /* Only print the name of the metricgroup once */ - if (!header_printed) { + if (!header_printed && !evsel->default_show_events) { header_printed = true; perf_stat__print_metricgroup_header(config, evsel, ctxp, name, out); -- 2.47.3