From: Namhyung Kim Date: Tue, 7 Jan 2025 22:43:50 +0000 (-0800) Subject: perf ftrace: Display latency statistics at the end X-Git-Tag: v6.14-rc1~120^2~68 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=86a12b92a97326e17a621a8f759089a3beab8f3c;p=thirdparty%2Flinux.git perf ftrace: Display latency statistics at the end Sometimes users also want to see average latency as well as histogram. Display latency statistics like avg, max, min at the end. $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ... # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 us | 0 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 1 | ##### | 16 - 32 ms | 7 | ######################################## | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # statistics (in usec) total time: 171832 avg time: 21479 max time: 30906 min time: 15869 count: 8 Committer testing: root@number:~# perf ftrace latency -nab --bucket-range 100 --max-latency 512 -T switch_mm_irqs_off sleep 1 # DURATION | COUNT | GRAPH | 0 - 100 ns | 314 | ## | 100 - 200 ns | 1843 | ############# | 200 - 300 ns | 1390 | ########## | 300 - 400 ns | 844 | ###### | 400 - 500 ns | 480 | ### | 500 - 512 ns | 315 | ## | 512 - ... ns | 16 | | # statistics (in nsec) total time: 2448936 avg time: 387 max time: 3285 min time: 82 count: 6328 root@number:~# Reviewed-by: James Clark Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250107224352.1128669-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 834074162a63b..2a786047ba32b 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -43,6 +43,8 @@ static volatile sig_atomic_t workload_exec_errno; static volatile sig_atomic_t done; +static struct stats latency_stats; /* for tracepoints */ + static void sig_handler(int sig __maybe_unused) { done = true; @@ -798,8 +800,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[], if (i >= NUM_BUCKET || num >= max_latency - min_latency) i = NUM_BUCKET - 1; + num += min_latency; do_inc: buckets[i]++; + update_stats(&latency_stats, num); next: /* empty the line buffer for the next output */ @@ -894,6 +898,12 @@ print_bucket_info: printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1], bar_len, bar, bar_total - bar_len, ""); + printf("\n# statistics (in %s)\n", ftrace->use_nsec ? "nsec" : "usec"); + printf(" total time: %20.0f\n", latency_stats.mean * latency_stats.n); + printf(" avg time: %20.0f\n", latency_stats.mean); + printf(" max time: %20"PRIu64"\n", latency_stats.max); + printf(" min time: %20"PRIu64"\n", latency_stats.min); + printf(" count: %20.0f\n", latency_stats.n); } static int prepare_func_latency(struct perf_ftrace *ftrace) @@ -932,6 +942,8 @@ static int prepare_func_latency(struct perf_ftrace *ftrace) if (fd < 0) pr_err("failed to open trace_pipe\n"); + init_stats(&latency_stats); + put_tracing_file(trace_file); return fd; } @@ -961,7 +973,7 @@ static int stop_func_latency(struct perf_ftrace *ftrace) static int read_func_latency(struct perf_ftrace *ftrace, int buckets[]) { if (ftrace->target.use_bpf) - return perf_ftrace__latency_read_bpf(ftrace, buckets); + return perf_ftrace__latency_read_bpf(ftrace, buckets, &latency_stats); return 0; } diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index bc484e65fb8f6..25fc280e414ac 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -11,6 +11,7 @@ #include "util/debug.h" #include "util/evlist.h" #include "util/bpf_counter.h" +#include "util/stat.h" #include "util/bpf_skel/func_latency.skel.h" @@ -86,6 +87,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) } } + skel->bss->min = INT64_MAX; + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, false, func->name); if (IS_ERR(skel->links.func_begin)) { @@ -122,7 +125,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) } int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, - int buckets[]) + int buckets[], struct stats *stats) { int i, fd, err; u32 idx; @@ -146,6 +149,13 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, buckets[idx] += hist[i]; } + if (skel->bss->count) { + stats->mean = skel->bss->total / skel->bss->count; + stats->n = skel->bss->count; + stats->max = skel->bss->max; + stats->min = skel->bss->min; + } + free(hist); return 0; } diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index 4df54e1b1411b..fb144811b34fc 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -38,6 +38,12 @@ struct { int enabled = 0; +// stats +__s64 total; +__s64 count; +__s64 max; +__s64 min; + const volatile int has_cpu = 0; const volatile int has_task = 0; const volatile int use_nsec = 0; @@ -122,6 +128,8 @@ int BPF_PROG(func_end) delta >= max_latency - min_latency) key = NUM_BUCKET - 1; } + + delta += min_latency; goto do_lookup; } // calculate index using delta @@ -136,6 +144,17 @@ do_lookup: return 0; *hist += 1; + + if (bucket_range == 0) + delta /= cmp_base; + + __sync_fetch_and_add(&total, delta); + __sync_fetch_and_add(&count, 1); + + if (delta > max) + max = delta; + if (delta < min) + min = delta; } return 0; diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index f218703063f74..5dee2caba0fe4 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -7,6 +7,7 @@ struct evlist; struct hashamp; +struct stats; struct perf_ftrace { struct evlist *evlist; @@ -46,7 +47,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, - int buckets[]); + int buckets[], struct stats *stats); int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); #else /* !HAVE_BPF_SKEL */ @@ -71,7 +72,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) static inline int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, - int buckets[] __maybe_unused) + int buckets[] __maybe_unused, + struct stats *stats __maybe_unused) { return -1; }