1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
14 * AGGR_GLOBAL: Use CPU 0
15 * AGGR_SOCKET: Use first CPU of socket
16 * AGGR_DIE: Use first CPU of die
17 * AGGR_CORE: Use first CPU of core
18 * AGGR_NONE: Use matching CPU
19 * AGGR_THREAD: Not supported?
22 struct runtime_stat rt_stat
;
23 struct stats walltime_nsecs_stats
;
26 struct rb_node rb_node
;
31 struct runtime_stat
*stat
;
37 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
39 struct saved_value
*a
= container_of(rb_node
,
42 const struct saved_value
*b
= entry
;
45 return a
->cpu
- b
->cpu
;
48 * Previously the rbtree was used to link generic metrics.
49 * The keys were evsel/cpu. Now the rbtree is extended to support
50 * per-thread shadow stats. For shadow stats case, the keys
51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
54 if (a
->type
!= b
->type
)
55 return a
->type
- b
->type
;
58 return a
->ctx
- b
->ctx
;
60 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
61 if (a
->stat
== b
->stat
)
64 if ((char *)a
->stat
< (char *)b
->stat
)
70 if (a
->evsel
== b
->evsel
)
72 if ((char *)a
->evsel
< (char *)b
->evsel
)
77 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
80 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
84 memcpy(nd
, entry
, sizeof(struct saved_value
));
88 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
89 struct rb_node
*rb_node
)
91 struct saved_value
*v
;
94 v
= container_of(rb_node
, struct saved_value
, rb_node
);
98 static struct saved_value
*saved_value_lookup(struct evsel
*evsel
,
103 struct runtime_stat
*st
)
105 struct rblist
*rblist
;
107 struct saved_value dm
= {
115 rblist
= &st
->value_list
;
117 nd
= rblist__find(rblist
, &dm
);
119 return container_of(nd
, struct saved_value
, rb_node
);
121 rblist__add_node(rblist
, &dm
);
122 nd
= rblist__find(rblist
, &dm
);
124 return container_of(nd
, struct saved_value
, rb_node
);
129 void runtime_stat__init(struct runtime_stat
*st
)
131 struct rblist
*rblist
= &st
->value_list
;
133 rblist__init(rblist
);
134 rblist
->node_cmp
= saved_value_cmp
;
135 rblist
->node_new
= saved_value_new
;
136 rblist
->node_delete
= saved_value_delete
;
139 void runtime_stat__exit(struct runtime_stat
*st
)
141 rblist__exit(&st
->value_list
);
144 void perf_stat__init_shadow_stats(void)
146 runtime_stat__init(&rt_stat
);
149 static int evsel_context(struct evsel
*evsel
)
153 if (evsel
->core
.attr
.exclude_kernel
)
154 ctx
|= CTX_BIT_KERNEL
;
155 if (evsel
->core
.attr
.exclude_user
)
157 if (evsel
->core
.attr
.exclude_hv
)
159 if (evsel
->core
.attr
.exclude_host
)
161 if (evsel
->core
.attr
.exclude_idle
)
167 static void reset_stat(struct runtime_stat
*st
)
169 struct rblist
*rblist
;
170 struct rb_node
*pos
, *next
;
172 rblist
= &st
->value_list
;
173 next
= rb_first_cached(&rblist
->entries
);
177 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
179 sizeof(struct stats
));
183 void perf_stat__reset_shadow_stats(void)
185 reset_stat(&rt_stat
);
186 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
189 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
194 static void update_runtime_stat(struct runtime_stat
*st
,
196 int ctx
, int cpu
, u64 count
)
198 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
202 update_stats(&v
->stats
, count
);
206 * Update various tracking values we maintain to print
207 * more semantic information such as miss/hit ratios,
208 * instruction rates, etc:
210 void perf_stat__update_shadow_stats(struct evsel
*counter
, u64 count
,
211 int cpu
, struct runtime_stat
*st
)
213 int ctx
= evsel_context(counter
);
214 u64 count_ns
= count
;
215 struct saved_value
*v
;
217 count
*= counter
->scale
;
219 if (perf_evsel__is_clock(counter
))
220 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count_ns
);
221 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
222 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
223 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
224 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
225 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
226 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
227 else if (perf_stat_evsel__is(counter
, ELISION_START
))
228 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
229 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
230 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
232 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
233 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
235 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
236 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
238 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
239 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
241 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
242 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
244 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
245 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
247 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
248 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
250 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
251 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
252 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
253 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
254 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
255 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
256 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
257 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
258 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
259 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
260 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
261 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
262 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
263 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
264 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
265 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
266 else if (perf_stat_evsel__is(counter
, APERF
))
267 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
269 if (counter
->collect_stat
) {
270 v
= saved_value_lookup(counter
, cpu
, true, STAT_NONE
, 0, st
);
271 update_stats(&v
->stats
, count
);
272 if (counter
->metric_leader
)
273 v
->metric_total
+= count
;
274 } else if (counter
->metric_leader
) {
275 v
= saved_value_lookup(counter
->metric_leader
,
276 cpu
, true, STAT_NONE
, 0, st
);
277 v
->metric_total
+= count
;
282 /* used for get_ratio_color() */
284 GRC_STALLED_CYCLES_FE
,
285 GRC_STALLED_CYCLES_BE
,
290 static const char *get_ratio_color(enum grc_type type
, double ratio
)
292 static const double grc_table
[GRC_MAX_NR
][3] = {
293 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
294 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
295 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
297 const char *color
= PERF_COLOR_NORMAL
;
299 if (ratio
> grc_table
[type
][0])
300 color
= PERF_COLOR_RED
;
301 else if (ratio
> grc_table
[type
][1])
302 color
= PERF_COLOR_MAGENTA
;
303 else if (ratio
> grc_table
[type
][2])
304 color
= PERF_COLOR_YELLOW
;
309 static struct evsel
*perf_stat__find_event(struct evlist
*evsel_list
,
314 evlist__for_each_entry (evsel_list
, c2
) {
315 if (!strcasecmp(c2
->name
, name
) && !c2
->collect_stat
)
321 /* Mark MetricExpr target events and link events using them to them. */
322 void perf_stat__collect_metric_expr(struct evlist
*evsel_list
)
324 struct evsel
*counter
, *leader
, **metric_events
, *oc
;
326 const char **metric_names
;
328 int num_metric_names
;
330 evlist__for_each_entry(evsel_list
, counter
) {
331 bool invalid
= false;
333 leader
= counter
->leader
;
334 if (!counter
->metric_expr
)
336 metric_events
= counter
->metric_events
;
337 if (!metric_events
) {
338 if (expr__find_other(counter
->metric_expr
, counter
->name
,
339 &metric_names
, &num_metric_names
) < 0)
342 metric_events
= calloc(sizeof(struct evsel
*),
343 num_metric_names
+ 1);
346 counter
->metric_events
= metric_events
;
349 for (i
= 0; i
< num_metric_names
; i
++) {
352 /* Search in group */
353 for_each_group_member (oc
, leader
) {
354 if (!strcasecmp(oc
->name
, metric_names
[i
]) &&
362 /* Search ignoring groups */
363 oc
= perf_stat__find_event(evsel_list
, metric_names
[i
]);
366 /* Deduping one is good enough to handle duplicated PMUs. */
367 static char *printed
;
370 * Adding events automatically would be difficult, because
371 * it would risk creating groups that are not schedulable.
372 * perf stat doesn't understand all the scheduling constraints
373 * of events. So we ask the user instead to add the missing
376 if (!printed
|| strcasecmp(printed
, metric_names
[i
])) {
378 "Add %s event to groups to get metric expression for %s\n",
381 printed
= strdup(metric_names
[i
]);
386 metric_events
[i
] = oc
;
387 oc
->collect_stat
= true;
389 metric_events
[i
] = NULL
;
393 counter
->metric_events
= NULL
;
394 counter
->metric_expr
= NULL
;
399 static double runtime_stat_avg(struct runtime_stat
*st
,
400 enum stat_type type
, int ctx
, int cpu
)
402 struct saved_value
*v
;
404 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
408 return avg_stats(&v
->stats
);
411 static double runtime_stat_n(struct runtime_stat
*st
,
412 enum stat_type type
, int ctx
, int cpu
)
414 struct saved_value
*v
;
416 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
423 static void print_stalled_cycles_frontend(struct perf_stat_config
*config
,
425 struct evsel
*evsel
, double avg
,
426 struct perf_stat_output_ctx
*out
,
427 struct runtime_stat
*st
)
429 double total
, ratio
= 0.0;
431 int ctx
= evsel_context(evsel
);
433 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
436 ratio
= avg
/ total
* 100.0;
438 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
441 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
444 out
->print_metric(config
, out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
447 static void print_stalled_cycles_backend(struct perf_stat_config
*config
,
449 struct evsel
*evsel
, double avg
,
450 struct perf_stat_output_ctx
*out
,
451 struct runtime_stat
*st
)
453 double total
, ratio
= 0.0;
455 int ctx
= evsel_context(evsel
);
457 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
460 ratio
= avg
/ total
* 100.0;
462 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
464 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
467 static void print_branch_misses(struct perf_stat_config
*config
,
471 struct perf_stat_output_ctx
*out
,
472 struct runtime_stat
*st
)
474 double total
, ratio
= 0.0;
476 int ctx
= evsel_context(evsel
);
478 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
481 ratio
= avg
/ total
* 100.0;
483 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
485 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
488 static void print_l1_dcache_misses(struct perf_stat_config
*config
,
492 struct perf_stat_output_ctx
*out
,
493 struct runtime_stat
*st
)
496 double total
, ratio
= 0.0;
498 int ctx
= evsel_context(evsel
);
500 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
503 ratio
= avg
/ total
* 100.0;
505 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
507 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
510 static void print_l1_icache_misses(struct perf_stat_config
*config
,
514 struct perf_stat_output_ctx
*out
,
515 struct runtime_stat
*st
)
518 double total
, ratio
= 0.0;
520 int ctx
= evsel_context(evsel
);
522 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
525 ratio
= avg
/ total
* 100.0;
527 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
528 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
531 static void print_dtlb_cache_misses(struct perf_stat_config
*config
,
535 struct perf_stat_output_ctx
*out
,
536 struct runtime_stat
*st
)
538 double total
, ratio
= 0.0;
540 int ctx
= evsel_context(evsel
);
542 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
545 ratio
= avg
/ total
* 100.0;
547 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
548 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
551 static void print_itlb_cache_misses(struct perf_stat_config
*config
,
555 struct perf_stat_output_ctx
*out
,
556 struct runtime_stat
*st
)
558 double total
, ratio
= 0.0;
560 int ctx
= evsel_context(evsel
);
562 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
565 ratio
= avg
/ total
* 100.0;
567 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
568 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
571 static void print_ll_cache_misses(struct perf_stat_config
*config
,
575 struct perf_stat_output_ctx
*out
,
576 struct runtime_stat
*st
)
578 double total
, ratio
= 0.0;
580 int ctx
= evsel_context(evsel
);
582 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
585 ratio
= avg
/ total
* 100.0;
587 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
588 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
592 * High level "TopDown" CPU core pipe line bottleneck break down.
594 * Basic concept following
595 * Yasin, A Top Down Method for Performance analysis and Counter architecture
598 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
600 * Frontend -> Backend -> Retiring
601 * BadSpeculation in addition means out of order execution that is thrown away
602 * (for example branch mispredictions)
603 * Frontend is instruction decoding.
604 * Backend is execution, like computation and accessing data in memory
605 * Retiring is good execution that is not directly bottlenecked
607 * The formulas are computed in slots.
608 * A slot is an entry in the pipeline each for the pipeline width
609 * (for example a 4-wide pipeline has 4 slots for each cycle)
612 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
614 * Retiring = SlotsRetired / TotalSlots
615 * FrontendBound = FetchBubbles / TotalSlots
616 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
618 * The kernel provides the mapping to the low level CPU events and any scaling
619 * needed for the CPU pipeline width, for example:
621 * TotalSlots = Cycles * 4
623 * The scaling factor is communicated in the sysfs unit.
625 * In some cases the CPU may not be able to measure all the formulas due to
626 * missing events. In this case multiple formulas are combined, as possible.
628 * Full TopDown supports more levels to sub-divide each area: for example
629 * BackendBound into computing bound and memory bound. For now we only
630 * support Level 1 TopDown.
633 static double sanitize_val(double x
)
635 if (x
< 0 && x
>= -0.02)
640 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
642 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
645 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
651 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
652 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
653 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
655 total_slots
= td_total_slots(ctx
, cpu
, st
);
657 bad_spec
= total
/ total_slots
;
658 return sanitize_val(bad_spec
);
661 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
664 double total_slots
= td_total_slots(ctx
, cpu
, st
);
665 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
669 retiring
= ret_slots
/ total_slots
;
673 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
676 double total_slots
= td_total_slots(ctx
, cpu
, st
);
677 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
681 fe_bound
= fetch_bub
/ total_slots
;
685 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
687 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
688 td_bad_spec(ctx
, cpu
, st
) +
689 td_retiring(ctx
, cpu
, st
));
692 return sanitize_val(1.0 - sum
);
695 static void print_smi_cost(struct perf_stat_config
*config
,
696 int cpu
, struct evsel
*evsel
,
697 struct perf_stat_output_ctx
*out
,
698 struct runtime_stat
*st
)
700 double smi_num
, aperf
, cycles
, cost
= 0.0;
701 int ctx
= evsel_context(evsel
);
702 const char *color
= NULL
;
704 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
705 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
706 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
708 if ((cycles
== 0) || (aperf
== 0))
712 cost
= (aperf
- cycles
) / aperf
* 100.00;
715 color
= PERF_COLOR_RED
;
716 out
->print_metric(config
, out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
717 out
->print_metric(config
, out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
720 static void generic_metric(struct perf_stat_config
*config
,
721 const char *metric_expr
,
722 struct evsel
**metric_events
,
724 const char *metric_name
,
725 const char *metric_unit
,
728 struct perf_stat_output_ctx
*out
,
729 struct runtime_stat
*st
)
731 print_metric_t print_metric
= out
->print_metric
;
732 struct parse_ctx pctx
;
735 void *ctxp
= out
->ctx
;
738 expr__ctx_init(&pctx
);
739 /* Must be first id entry */
740 expr__add_id(&pctx
, name
, avg
);
741 for (i
= 0; metric_events
[i
]; i
++) {
742 struct saved_value
*v
;
744 u64 metric_total
= 0;
746 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
747 stats
= &walltime_nsecs_stats
;
750 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
758 metric_total
= v
->metric_total
;
761 n
= strdup(metric_events
[i
]->name
);
765 * This display code with --no-merge adds [cpu] postfixes.
766 * These are not supported by the parser. Remove everything
774 expr__add_id(&pctx
, n
, metric_total
);
776 expr__add_id(&pctx
, n
, avg_stats(stats
)*scale
);
779 if (!metric_events
[i
]) {
780 if (expr__parse(&ratio
, &pctx
, metric_expr
) == 0) {
784 if (metric_unit
&& metric_name
) {
785 if (perf_pmu__convert_scale(metric_unit
,
786 &unit
, &scale
) >= 0) {
790 scnprintf(metric_bf
, sizeof(metric_bf
),
791 "%s %s", unit
, metric_name
);
792 print_metric(config
, ctxp
, NULL
, "%8.1f",
795 print_metric(config
, ctxp
, NULL
, "%8.1f",
798 out
->force_header
? name
: "",
802 print_metric(config
, ctxp
, NULL
, NULL
,
804 (metric_name
? metric_name
: name
) : "", 0);
807 print_metric(config
, ctxp
, NULL
, NULL
,
809 (metric_name
? metric_name
: name
) : "", 0);
812 for (i
= 1; i
< pctx
.num_ids
; i
++)
813 zfree(&pctx
.ids
[i
].name
);
816 void perf_stat__print_shadow_stats(struct perf_stat_config
*config
,
819 struct perf_stat_output_ctx
*out
,
820 struct rblist
*metric_events
,
821 struct runtime_stat
*st
)
823 void *ctxp
= out
->ctx
;
824 print_metric_t print_metric
= out
->print_metric
;
825 double total
, ratio
= 0.0, total2
;
826 const char *color
= NULL
;
827 int ctx
= evsel_context(evsel
);
828 struct metric_event
*me
;
831 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
832 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
836 print_metric(config
, ctxp
, NULL
, "%7.2f ",
837 "insn per cycle", ratio
);
839 print_metric(config
, ctxp
, NULL
, NULL
, "insn per cycle", 0);
842 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
845 total
= max(total
, runtime_stat_avg(st
,
846 STAT_STALLED_CYCLES_BACK
,
850 out
->new_line(config
, ctxp
);
852 print_metric(config
, ctxp
, NULL
, "%7.2f ",
853 "stalled cycles per insn",
856 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
857 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
858 print_branch_misses(config
, cpu
, evsel
, avg
, out
, st
);
860 print_metric(config
, ctxp
, NULL
, NULL
, "of all branches", 0);
862 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
863 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
864 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
865 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
867 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
868 print_l1_dcache_misses(config
, cpu
, evsel
, avg
, out
, st
);
870 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
872 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
873 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
874 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
875 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
877 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
878 print_l1_icache_misses(config
, cpu
, evsel
, avg
, out
, st
);
880 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
882 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
883 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
884 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
885 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
887 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
888 print_dtlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
890 print_metric(config
, ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
892 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
893 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
894 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
895 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
897 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
898 print_itlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
900 print_metric(config
, ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
902 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
903 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
904 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
905 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
907 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
908 print_ll_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
910 print_metric(config
, ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
911 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
912 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
915 ratio
= avg
* 100 / total
;
917 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
918 print_metric(config
, ctxp
, NULL
, "%8.3f %%",
919 "of all cache refs", ratio
);
921 print_metric(config
, ctxp
, NULL
, NULL
, "of all cache refs", 0);
922 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
923 print_stalled_cycles_frontend(config
, cpu
, evsel
, avg
, out
, st
);
924 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
925 print_stalled_cycles_backend(config
, cpu
, evsel
, avg
, out
, st
);
926 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
927 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
931 print_metric(config
, ctxp
, NULL
, "%8.3f", "GHz", ratio
);
933 print_metric(config
, ctxp
, NULL
, NULL
, "Ghz", 0);
935 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
936 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
939 print_metric(config
, ctxp
, NULL
,
940 "%7.2f%%", "transactional cycles",
941 100.0 * (avg
/ total
));
943 print_metric(config
, ctxp
, NULL
, NULL
, "transactional cycles",
945 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
946 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
947 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
952 print_metric(config
, ctxp
, NULL
, "%7.2f%%", "aborted cycles",
953 100.0 * ((total2
-avg
) / total
));
955 print_metric(config
, ctxp
, NULL
, NULL
, "aborted cycles", 0);
956 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
957 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
963 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
964 print_metric(config
, ctxp
, NULL
, "%8.0f",
965 "cycles / transaction", ratio
);
967 print_metric(config
, ctxp
, NULL
, NULL
, "cycles / transaction",
969 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
970 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
976 print_metric(config
, ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
977 } else if (perf_evsel__is_clock(evsel
)) {
978 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
979 print_metric(config
, ctxp
, NULL
, "%8.3f", "CPUs utilized",
980 avg
/ (ratio
* evsel
->scale
));
982 print_metric(config
, ctxp
, NULL
, NULL
, "CPUs utilized", 0);
983 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
984 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
987 color
= PERF_COLOR_RED
;
988 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
990 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
991 double retiring
= td_retiring(ctx
, cpu
, st
);
994 color
= PERF_COLOR_GREEN
;
995 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
997 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
998 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
1001 color
= PERF_COLOR_RED
;
1002 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
1004 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
1005 double be_bound
= td_be_bound(ctx
, cpu
, st
);
1006 const char *name
= "backend bound";
1007 static int have_recovery_bubbles
= -1;
1009 /* In case the CPU does not support topdown-recovery-bubbles */
1010 if (have_recovery_bubbles
< 0)
1011 have_recovery_bubbles
= pmu_have_event("cpu",
1012 "topdown-recovery-bubbles");
1013 if (!have_recovery_bubbles
)
1014 name
= "backend bound/bad spec";
1017 color
= PERF_COLOR_RED
;
1018 if (td_total_slots(ctx
, cpu
, st
) > 0)
1019 print_metric(config
, ctxp
, color
, "%8.1f%%", name
,
1022 print_metric(config
, ctxp
, NULL
, NULL
, name
, 0);
1023 } else if (evsel
->metric_expr
) {
1024 generic_metric(config
, evsel
->metric_expr
, evsel
->metric_events
, evsel
->name
,
1025 evsel
->metric_name
, NULL
, avg
, cpu
, out
, st
);
1026 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
1030 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
1033 ratio
= 1000.0 * avg
/ total
;
1034 if (ratio
< 0.001) {
1038 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
1039 print_metric(config
, ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
1040 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
1041 print_smi_cost(config
, cpu
, evsel
, out
, st
);
1046 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
1047 struct metric_expr
*mexp
;
1049 list_for_each_entry (mexp
, &me
->head
, nd
) {
1051 out
->new_line(config
, ctxp
);
1052 generic_metric(config
, mexp
->metric_expr
, mexp
->metric_events
,
1053 evsel
->name
, mexp
->metric_name
,
1054 mexp
->metric_unit
, avg
, cpu
, out
, st
);
1058 print_metric(config
, ctxp
, NULL
, NULL
, NULL
, 0);