]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf tools: Minimal DEFERRED_CALLCHAIN support
authorNamhyung Kim <namhyung@kernel.org>
Thu, 20 Nov 2025 23:48:00 +0000 (15:48 -0800)
committerNamhyung Kim <namhyung@kernel.org>
Wed, 3 Dec 2025 00:13:32 +0000 (16:13 -0800)
Add a new event type for deferred callchains and a new callback for the
struct perf_tool.  For now it doesn't actually handle the deferred
callchains but it just marks the sample if it has the PERF_CONTEXT_
USER_DEFFERED in the callchain array.

At least, perf report can dump the raw data with this change.  Actually
this requires the next commit to enable attr.defer_callchain, but if you
already have a data file, it'll show the following result.

  $ perf report -D
  ...
  0x2158@perf.data [0x40]: event: 22
  .
  . ... raw event: size 64 bytes
  .  0000:  16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00  ......@.........
  .  0010:  03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00  ..........3.....
  .  0020:  0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00  ..3.....H.3.....
  .  0030:  08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00  .........z.5....

  121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
  ... FP chain: nr:3
  .....  0: 00007f18fe337fa7
  .....  1: 00007f18fe330e0f
  .....  2: 00007f18fe331448
  : unhandled!

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/lib/perf/include/perf/event.h
tools/perf/util/event.c
tools/perf/util/evsel.c
tools/perf/util/machine.c
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/sample.h
tools/perf/util/session.c
tools/perf/util/tool.c
tools/perf/util/tool.h

index aa1e91c97a226e1ab16774bc7440d60aa8dd4f59..43a8cb04994fa033f9ae8d74753a984ffb15ea9e 100644 (file)
@@ -151,6 +151,18 @@ struct perf_record_switch {
        __u32                    next_prev_tid;
 };
 
+struct perf_record_callchain_deferred {
+       struct perf_event_header header;
+       /*
+        * This is to match kernel and (deferred) user stacks together.
+        * The kernel part will be in the sample callchain array after
+        * the PERF_CONTEXT_USER_DEFERRED entry.
+        */
+       __u64                    cookie;
+       __u64                    nr;
+       __u64                    ips[];
+};
+
 struct perf_record_header_attr {
        struct perf_event_header header;
        struct perf_event_attr   attr;
@@ -523,6 +535,7 @@ union perf_event {
        struct perf_record_read                 read;
        struct perf_record_throttle             throttle;
        struct perf_record_sample               sample;
+       struct perf_record_callchain_deferred   callchain_deferred;
        struct perf_record_bpf_event            bpf;
        struct perf_record_ksymbol              ksymbol;
        struct perf_record_text_poke_event      text_poke;
index fcf44149feb20c356cbc6a66f2b71e9767b4dcf0..4c92cc1a952c1d9f3ef9e6be9161e1ea0a8ae20b 100644 (file)
@@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_CGROUP]                    = "CGROUP",
        [PERF_RECORD_TEXT_POKE]                 = "TEXT_POKE",
        [PERF_RECORD_AUX_OUTPUT_HW_ID]          = "AUX_OUTPUT_HW_ID",
+       [PERF_RECORD_CALLCHAIN_DEFERRED]        = "CALLCHAIN_DEFERRED",
        [PERF_RECORD_HEADER_ATTR]               = "ATTR",
        [PERF_RECORD_HEADER_EVENT_TYPE]         = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]       = "TRACING_DATA",
index 5aae7f791bc27088c487336af964e43441b9d7e5..df5351fde33987c8c6e7833c9ac279573ced8dc5 100644 (file)
@@ -3090,6 +3090,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
        data->data_src = PERF_MEM_DATA_SRC_NONE;
        data->vcpu = -1;
 
+       if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
+               const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+               data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
+               if (data->callchain->nr > max_callchain_nr)
+                       return -EFAULT;
+
+               data->deferred_cookie = event->callchain_deferred.cookie;
+
+               if (evsel->core.attr.sample_id_all)
+                       perf_evsel__parse_id_sample(evsel, event, data);
+               return 0;
+       }
+
        if (event->header.type != PERF_RECORD_SAMPLE) {
                if (!evsel->core.attr.sample_id_all)
                        return 0;
@@ -3214,12 +3228,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 
        if (type & PERF_SAMPLE_CALLCHAIN) {
                const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+               u64 callchain_nr;
 
                OVERFLOW_CHECK_u64(array);
                data->callchain = (struct ip_callchain *)array++;
-               if (data->callchain->nr > max_callchain_nr)
+               callchain_nr = data->callchain->nr;
+               if (callchain_nr > max_callchain_nr)
                        return -EFAULT;
-               sz = data->callchain->nr * sizeof(u64);
+               sz = callchain_nr * sizeof(u64);
+               /*
+                * Save the cookie for the deferred user callchain.  The last 2
+                * entries in the callchain should be the context marker and the
+                * cookie.  The cookie will be used to match PERF_RECORD_
+                * CALLCHAIN_DEFERRED later.
+                */
+               if (evsel->core.attr.defer_callchain && callchain_nr >= 2 &&
+                   data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
+                       data->deferred_cookie = data->callchain->ips[callchain_nr - 1];
+                       data->deferred_callchain = true;
+               }
                OVERFLOW_CHECK(array, sz, max_size);
                array = (void *)array + sz;
        }
index b5dd42588c916d9195ef0301314cbb08b0bc8dd6..841b711d970e9457b14297116dc7be5a42a38fdb 100644 (file)
@@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
                                *cpumode = PERF_RECORD_MISC_KERNEL;
                                break;
                        case PERF_CONTEXT_USER:
+                       case PERF_CONTEXT_USER_DEFERRED:
                                *cpumode = PERF_RECORD_MISC_USER;
                                break;
                        default:
index 66b666d9ce649dd77ab9469e15073bddf04b5509..741c3d657a8b6ae7d455e19e73fee367413f7ed5 100644 (file)
@@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
        PRINT_ATTRf(inherit_thread, p_unsigned);
        PRINT_ATTRf(remove_on_exec, p_unsigned);
        PRINT_ATTRf(sigtrap, p_unsigned);
+       PRINT_ATTRf(defer_callchain, p_unsigned);
+       PRINT_ATTRf(defer_output, p_unsigned);
 
        PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
        PRINT_ATTRf(bp_type, p_unsigned);
index fae834144ef42105d08a59704ee75cd4852bbc5a..a8307b20a9ea80668deecf65e316ab6036afbfeb 100644 (file)
@@ -107,6 +107,8 @@ struct perf_sample {
        /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
        u16 weight3;
        bool no_hw_idx;         /* No hw_idx collected in branch_stack */
+       bool deferred_callchain;        /* Has deferred user callchains */
+       u64 deferred_cookie;
        char insn[MAX_INSN];
        void *raw_data;
        struct ip_callchain *callchain;
index 4b0236b2df2913e14ce6de2b9f5ea35d254c495e..361e15c1f26a96d0c8e0fef13f1e7b8444afc7e6 100644 (file)
@@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
        [PERF_RECORD_CGROUP]              = perf_event__cgroup_swap,
        [PERF_RECORD_TEXT_POKE]           = perf_event__text_poke_swap,
        [PERF_RECORD_AUX_OUTPUT_HW_ID]    = perf_event__all64_swap,
+       [PERF_RECORD_CALLCHAIN_DEFERRED]  = perf_event__all64_swap,
        [PERF_RECORD_HEADER_ATTR]         = perf_event__hdr_attr_swap,
        [PERF_RECORD_HEADER_EVENT_TYPE]   = perf_event__event_type_swap,
        [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
        for (i = 0; i < callchain->nr; i++)
                printf("..... %2d: %016" PRIx64 "\n",
                       i, callchain->ips[i]);
+
+       if (sample->deferred_callchain)
+               printf("...... (deferred)\n");
 }
 
 static void branch_stack__printf(struct perf_sample *sample,
@@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
                sample_read__printf(sample, evsel->core.attr.read_format);
 }
 
+static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
+                                   struct perf_sample *sample)
+{
+       if (!dump_trace)
+               return;
+
+       printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
+              event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
+
+       if (evsel__has_callchain(evsel))
+               callchain__printf(evsel, sample);
+}
+
 static void dump_read(struct evsel *evsel, union perf_event *event)
 {
        struct perf_record_read *read_event = &event->read;
@@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *machines,
                return tool->text_poke(tool, event, sample, machine);
        case PERF_RECORD_AUX_OUTPUT_HW_ID:
                return tool->aux_output_hw_id(tool, event, sample, machine);
+       case PERF_RECORD_CALLCHAIN_DEFERRED:
+               dump_deferred_callchain(evsel, event, sample);
+               return tool->callchain_deferred(tool, event, sample, evsel, machine);
        default:
                ++evlist->stats.nr_unknown_events;
                return -1;
index 22a8a4ffe05f778e4251028ec09391e3f92e86b2..e77f0e2ecc1f79dba1bbdf4f7967166c02deb850 100644 (file)
@@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
        tool->read = process_event_sample_stub;
        tool->throttle = process_event_stub;
        tool->unthrottle = process_event_stub;
+       tool->callchain_deferred = process_event_sample_stub;
        tool->attr = process_event_synth_attr_stub;
        tool->event_update = process_event_synth_event_update_stub;
        tool->tracing_data = process_event_synth_tracing_data_stub;
@@ -335,6 +336,7 @@ bool perf_tool__compressed_is_stub(const struct perf_tool *tool)
        }
 CREATE_DELEGATE_SAMPLE(read);
 CREATE_DELEGATE_SAMPLE(sample);
+CREATE_DELEGATE_SAMPLE(callchain_deferred);
 
 #define CREATE_DELEGATE_ATTR(name)                                     \
        static int delegate_ ## name(const struct perf_tool *tool,      \
@@ -468,6 +470,7 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate)
        tool->tool.ksymbol = delegate_ksymbol;
        tool->tool.bpf = delegate_bpf;
        tool->tool.text_poke = delegate_text_poke;
+       tool->tool.callchain_deferred = delegate_callchain_deferred;
 
        tool->tool.attr = delegate_attr;
        tool->tool.event_update = delegate_event_update;
index 88337cee1e3e2be3a2ee65047839dfa60bfabb29..9b9f0a8cbf3de4b5ac9c29e45f2610fa76a50f2e 100644 (file)
@@ -44,7 +44,8 @@ enum show_feature_header {
 
 struct perf_tool {
        event_sample    sample,
-                       read;
+                       read,
+                       callchain_deferred;
        event_op        mmap,
                        mmap2,
                        comm,