This is an actual example of the he_mem_stat based sample breakdown. It
uses 'mem_op' field of union perf_mem_data_src which means memory
operations.
It'd have basically 'load' or 'store' which can be useful if PMU doesn't
have separate events for them like IBS or SPE. In addition, there's an
entry in case load and store happen at the same time. Also adds entries
for prefetching and execution.
$ perf mem report -F +op -s comm --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 4K of event 'ibs_op//'
# Total weight : 9559
# Sort order : comm
#
# --------------------- Mem Op ----------------------
# Overhead Samples Load Store Ld+St Pfetch Exec Other N/A N/A Command
# ........ ....... ................................................... ...............
#
44.85% 4077 21.1% 30.7% 0.0% 0.0% 0.0% 48.3% 0.0% 0.0% swapper
26.82% 45 98.8% 0.3% 0.0% 0.0% 0.0% 0.9% 0.0% 0.0% netsli-prober
7.19% 442 51.7% 13.7% 0.0% 0.0% 0.0% 34.6% 0.0% 0.0% perf
5.81% 75 89.7% 2.2% 0.0% 0.0% 0.0% 8.1% 0.0% 0.0% qemu-system-ppc
4.77% 1 100.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% notifications_c
1.77% 10 95.9% 1.2% 0.0% 0.0% 0.0% 3.0% 0.0% 0.0% MemoryReleaser
0.77% 32 71.6% 4.1% 0.0% 0.0% 0.0% 24.3% 0.0% 0.0% DefaultEventMan
0.19% 10 66.7% 22.2% 0.0% 0.0% 0.0% 11.1% 0.0% 0.0% gnome-shell
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20250430205548.789750-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, PERF_HPP_FMT_TYPE__PERCENT)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period, PERF_HPP_FMT_TYPE__PERCENT)
__HPP_COLOR_ACC_PERCENT_FN(latency_acc, latency, PERF_HPP_FMT_TYPE__LATENCY)
+__HPP_COLOR_MEM_STAT_FN(op, OP)
#undef __HPP_COLOR_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
hist_browser__hpp_color_overhead_acc;
perf_hpp__format[PERF_HPP__LATENCY_ACC].color =
hist_browser__hpp_color_latency_acc;
+ perf_hpp__format[PERF_HPP__MEM_STAT_OP].color =
+ hist_browser__hpp_color_mem_stat_op;
res_sample_init();
}
if (!perf_hpp__is_mem_stat_entry(fmt))
return -1;
+ switch (fmt->idx) {
+ case PERF_HPP__MEM_STAT_OP:
+ return PERF_MEM_STAT_OP;
+ default:
+ break;
+ }
pr_debug("Should not reach here\n");
return -1;
}
HPP_AVERAGE_FNS(weight2, weight2)
HPP_AVERAGE_FNS(weight3, weight3)
-HPP_MEM_STAT_FNS(unknown, UNKNOWN) /* placeholder */
+HPP_MEM_STAT_FNS(op, OP)
static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *a __maybe_unused,
HPP__PRINT_FNS("Weight1", weight1, WEIGHT1),
HPP__PRINT_FNS("Weight2", weight2, WEIGHT2),
HPP__PRINT_FNS("Weight3", weight3, WEIGHT3),
- HPP__MEM_STAT_PRINT_FNS("Unknown", unknown, UNKNOWN), /* placeholder */
+ HPP__MEM_STAT_PRINT_FNS("Mem Op", op, OP),
};
struct perf_hpp_list perf_hpp_list = {
fmt->len = 8;
break;
- case PERF_HPP__MEM_STAT_UNKNOWN: /* placeholder */
+ case PERF_HPP__MEM_STAT_OP:
fmt->len = MEM_STAT_LEN * MEM_STAT_PRINT_LEN;
break;
PERF_HPP__WEIGHT1,
PERF_HPP__WEIGHT2,
PERF_HPP__WEIGHT3,
- PERF_HPP__MEM_STAT_UNKNOWN, /* placeholder */
+ PERF_HPP__MEM_STAT_OP,
PERF_HPP__MAX_INDEX
};
}
if (cpu_map) {
- struct perf_cpu_map *online = cpu_map__online();
-
- if (!perf_cpu_map__equal(cpu_map, online)) {
+ if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
char buf[200];
cpu_map__snprint(cpu_map, buf, sizeof(buf));
pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
}
- perf_cpu_map__put(online);
perf_cpu_map__put(cpu_map);
}
/*
* It returns an index in hist_entry->mem_stat array for the given val which
* represents a data-src based on the mem_stat_type.
- *
- * For example, when mst is about cache level, the index can be 1 for L1, 2 for
- * L2 and so on.
*/
int mem_stat_index(const enum mem_stat_type mst, const u64 val)
{
+ union perf_mem_data_src src = {
+ .val = val,
+ };
+
switch (mst) {
- case PERF_MEM_STAT_UNKNOWN: /* placeholder */
+ case PERF_MEM_STAT_OP:
+ switch (src.mem_op) {
+ case PERF_MEM_OP_LOAD:
+ return MEM_STAT_OP_LOAD;
+ case PERF_MEM_OP_STORE:
+ return MEM_STAT_OP_STORE;
+ case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE:
+ return MEM_STAT_OP_LDST;
+ default:
+ if (src.mem_op & PERF_MEM_OP_PFETCH)
+ return MEM_STAT_OP_PFETCH;
+ if (src.mem_op & PERF_MEM_OP_EXEC)
+ return MEM_STAT_OP_EXEC;
+ return MEM_STAT_OP_OTHER;
+ }
default:
break;
}
- (void)val;
return -1;
}
const char *mem_stat_name(const enum mem_stat_type mst, const int idx)
{
switch (mst) {
- case PERF_MEM_STAT_UNKNOWN:
+ case PERF_MEM_STAT_OP:
+ switch (idx) {
+ case MEM_STAT_OP_LOAD:
+ return "Load";
+ case MEM_STAT_OP_STORE:
+ return "Store";
+ case MEM_STAT_OP_LDST:
+ return "Ld+St";
+ case MEM_STAT_OP_PFETCH:
+ return "Pfetch";
+ case MEM_STAT_OP_EXEC:
+ return "Exec";
+ case MEM_STAT_OP_OTHER:
+ return "Other";
+ default:
+ break;
+ }
default:
break;
}
- (void)idx;
return "N/A";
}
void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
enum mem_stat_type {
- PERF_MEM_STAT_UNKNOWN, /* placeholder */
+ PERF_MEM_STAT_OP,
+};
+
+enum mem_stat_op {
+ MEM_STAT_OP_LOAD,
+ MEM_STAT_OP_STORE,
+ MEM_STAT_OP_LDST,
+ MEM_STAT_OP_PFETCH,
+ MEM_STAT_OP_EXEC,
+ MEM_STAT_OP_OTHER,
};
#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */
DIM(PERF_HPP__WEIGHT3, "retire_lat"),
DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"),
/* used for output only when SORT_MODE__MEM */
- DIM_MEM(PERF_HPP__MEM_STAT_UNKNOWN, "unknown"), /* placeholder */
+ DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"),
};
#undef DIM_MEM