]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf record: Add --call-graph fp,defer option for deferred callchains
authorNamhyung Kim <namhyung@kernel.org>
Thu, 20 Nov 2025 23:48:01 +0000 (15:48 -0800)
committerNamhyung Kim <namhyung@kernel.org>
Wed, 3 Dec 2025 05:59:13 +0000 (21:59 -0800)
Add a new callchain record mode option for deferred callchains.  For now
it only works with FP (frame-pointer) mode.

And add the missing feature detection logic to clear the flag on old
kernels.

  $ perf record --call-graph fp,defer -vv true
  ...
  ------------------------------------------------------------
  perf_event_attr:
    type                             0 (PERF_TYPE_HARDWARE)
    size                             136
    config                           0 (PERF_COUNT_HW_CPU_CYCLES)
    { sample_period, sample_freq }   4000
    sample_type                      IP|TID|TIME|CALLCHAIN|PERIOD
    read_format                      ID|LOST
    disabled                         1
    inherit                          1
    mmap                             1
    comm                             1
    freq                             1
    enable_on_exec                   1
    task                             1
    sample_id_all                    1
    mmap2                            1
    comm_exec                        1
    ksymbol                          1
    bpf_event                        1
    defer_callchain                  1
    defer_output                     1
  ------------------------------------------------------------
  sys_perf_event_open: pid 162755  cpu 0  group_fd -1  flags 0x8
  sys_perf_event_open failed, error -22
  switching off deferred callchain support

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-record.txt
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h

index c6f33565966735feca1b74d79c6d3127d1d2bdcf..642d1c490d9e3bcd7509d949b766add78004e147 100644 (file)
@@ -452,6 +452,9 @@ call-graph.*::
                kernel space is controlled not by this option but by the
                kernel config (CONFIG_UNWINDER_*).
 
+               The 'defer' mode can be used with 'fp' mode to enable deferred
+               user callchains (like 'fp,defer').
+
        call-graph.dump-size::
                The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
                When using dwarf into record-mode, the default size will be used if omitted.
index 067891bd7da6edc8771507b08c5a66d96f5650b6..e8b9aadbbfa5057411f0fdcda7aae7186b795fc2 100644 (file)
@@ -325,6 +325,10 @@ OPTIONS
        by default.  User can change the number by passing it after comma
        like "--call-graph fp,32".
 
+       Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
+       enable deferred user callchain which will collect user-space callchains
+       when the thread returns to the user space.
+
 -q::
 --quiet::
        Don't print any warnings or messages, useful for scripting.
index d7b7eef740b9d6edf725c8a32f65f98d48ae9b0f..2884187ccbbecfdc024bf7579bee7d4af346a1ce 100644 (file)
@@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
                        if (tok) {
                                unsigned long size;
 
-                               size = strtoul(tok, &name, 0);
-                               if (size < (unsigned) sysctl__max_stack())
-                                       param->max_stack = size;
+                               if (!strncmp(tok, "defer", sizeof("defer"))) {
+                                       param->defer = true;
+                               } else {
+                                       size = strtoul(tok, &name, 0);
+                                       if (size < (unsigned) sysctl__max_stack())
+                                               param->max_stack = size;
+                               }
                        }
                        break;
 
@@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
        } while (0);
 
        free(buf);
+
+       if (param->defer && param->record_mode != CALLCHAIN_FP) {
+               pr_err("callchain: deferred callchain only works with FP\n");
+               return -EINVAL;
+       }
+
        return ret;
 }
 
index 86ed9e4d04f9ee7b019efbf7e8f22bc82db2d752..d5ae4fbb7ce5fa4433625a9f982453450fa6fe3a 100644 (file)
@@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
 
 struct callchain_param {
        bool                    enabled;
+       bool                    defer;
        enum perf_call_graph_mode record_mode;
        u32                     dump_size;
        enum chain_mode         mode;
index df5351fde33987c8c6e7833c9ac279573ced8dc5..9cd706f6279313c2cc53eba1adbd45f352090f75 100644 (file)
@@ -1066,6 +1066,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
                pr_info("Disabling user space callchains for function trace event.\n");
                attr->exclude_callchain_user = 1;
        }
+
+       if (param->defer && !attr->exclude_callchain_user)
+               attr->defer_callchain = 1;
 }
 
 void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
@@ -1512,6 +1515,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
        attr->mmap2    = track && !perf_missing_features.mmap2;
        attr->comm     = track;
        attr->build_id = track && opts->build_id;
+       attr->defer_output = track && callchain && callchain->defer;
 
        /*
         * ksymbol is tracked separately with text poke because it needs to be
@@ -2200,6 +2204,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
+       if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
+               evsel->core.attr.defer_callchain = 0;
+       if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
+               evsel->core.attr.defer_output = 0;
        if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
            (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
                evsel->core.attr.inherit = 0;
@@ -2474,6 +2482,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
 
        /* Please add new feature detection here. */
 
+       attr.defer_callchain = true;
+       if (has_attr_feature(&attr, /*flags=*/0))
+               goto found;
+       perf_missing_features.defer_callchain = true;
+       pr_debug2("switching off deferred callchain support\n");
+       attr.defer_callchain = false;
+
        attr.inherit = true;
        attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
        if (has_attr_feature(&attr, /*flags=*/0))
@@ -2585,6 +2600,10 @@ found:
        errno = old_errno;
 
 check:
+       if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
+           perf_missing_features.defer_callchain)
+               return true;
+
        if (evsel->core.attr.inherit &&
            (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
            perf_missing_features.inherit_sample_read)
index 3ae4ac8f9a37e00901947f917fbbe08c2939e69d..a08130ff2e47a887b19f6c47bfa9f51e0c40d226 100644 (file)
@@ -221,6 +221,7 @@ struct perf_missing_features {
        bool branch_counters;
        bool aux_action;
        bool inherit_sample_read;
+       bool defer_callchain;
 };
 
 extern struct perf_missing_features perf_missing_features;