]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf event: Fix size of synthesized sample with branch stacks
authorIan Rogers <irogers@google.com>
Mon, 18 May 2026 22:43:24 +0000 (15:43 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 20 May 2026 19:11:30 +0000 (16:11 -0300)
Synthesizing branch stacks for Intel-PT highlighted an issue where
PERF_SAMPLE_BRANCH_HW_INDEX was assumed to always be set in the
perf_event_attr branch_sample_type. This caused an incorrect size
calculation.

Fix the writing of the nr and hw_idx values during sample event
synthesis by passing the branch_sample_type into the sample size
and synthesis functions. Also update hardware tracers (Intel PT,
ARM SPE, CS-ETM) to retrieve and pass their branch_sample_type
dynamically to prevent payload misalignment.

Fixes: d3f85437ad6a5511 ("perf evsel: Support PERF_SAMPLE_BRANCH_HW_INDEX")
Assisted-by: Gemini:gemini-3.1-pro-preview
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Dapeng Mi <dapeng1.mi@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/bench/inject-buildid.c
tools/perf/builtin-inject.c
tools/perf/tests/dlfilter-test.c
tools/perf/tests/sample-parsing.c
tools/perf/util/arm-spe.c
tools/perf/util/cs-etm.c
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt.c
tools/perf/util/synthetic-events.c
tools/perf/util/synthetic-events.h

index aad572a78d7fcfdf5091d1d0be09f2e3e0365e08..bfd2c5ec9488e02119295926ce3fb84e8a136ff0 100644 (file)
@@ -228,9 +228,12 @@ static ssize_t synthesize_sample(struct bench_data *data, struct bench_dso *dso,
 
        event.header.type = PERF_RECORD_SAMPLE;
        event.header.misc = PERF_RECORD_MISC_USER;
-       event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0);
-
-       perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample);
+       event.header.size = perf_event__sample_event_size(&sample, bench_sample_type,
+                                                          /*read_format=*/0,
+                                                          /*branch_sample_type=*/0);
+       perf_event__synthesize_sample(&event, bench_sample_type,
+                                     /*read_format=*/0,
+                                     /*branch_sample_type=*/0, &sample);
 
        return writen(data->input_pipe[1], &event, event.header.size);
 }
index a2493f1097df97f5bacd20962b6dbb9b1deb9182..2f20e782c7f2726eea4ee50e4f3d4f1a03883853 100644 (file)
@@ -465,8 +465,13 @@ out:
        /* remove sample_type {STACK,REGS}_USER for synthesize */
        sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
 
-       perf_event__synthesize_sample(event_copy, sample_type,
-                                     evsel->core.attr.read_format, sample);
+       ret = perf_event__synthesize_sample(event_copy, sample_type,
+                                           evsel->core.attr.read_format,
+                                           evsel->core.attr.branch_sample_type, sample);
+       if (ret) {
+               pr_err("Failed to synthesize sample\n");
+               return ret;
+       }
        return perf_event__repipe_synth(tool, event_copy);
 }
 
@@ -1102,7 +1107,8 @@ found:
        sample_sw.period = sample->period;
        sample_sw.time   = sample->time;
        perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
-                                     evsel->core.attr.read_format, &sample_sw);
+                                     evsel->core.attr.read_format,
+                                     evsel->core.attr.branch_sample_type, &sample_sw);
        build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
        ret = perf_event__repipe(tool, event_sw, &sample_sw, machine);
        perf_sample__exit(&sample_sw);
index e63790c61d53a014d226d6beef3e779fee4765a1..204663571943cb5dd521335789894400a4f31c68 100644 (file)
@@ -188,8 +188,12 @@ static int write_sample(struct test_data *td, u64 sample_type, u64 id, pid_t pid
 
        event->header.type = PERF_RECORD_SAMPLE;
        event->header.misc = PERF_RECORD_MISC_USER;
-       event->header.size = perf_event__sample_event_size(&sample, sample_type, 0);
-       err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+       event->header.size = perf_event__sample_event_size(&sample, sample_type,
+                                                          /*read_format=*/0,
+                                                          /*branch_sample_type=*/0);
+       err = perf_event__synthesize_sample(event, sample_type,
+                                           /*read_format=*/0,
+                                           /*branch_sample_type=*/0, &sample);
        if (err)
                return test_result("perf_event__synthesize_sample() failed", TEST_FAIL);
 
index a7327c942ca209ccf26be47af5ece753b9488ba2..55f0b73ca20e05dec9a9a643ba2f76d1dd06a0a2 100644 (file)
@@ -310,7 +310,8 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
                sample.read.one.lost  = 1;
        }
 
-       sz = perf_event__sample_event_size(&sample, sample_type, read_format);
+       sz = perf_event__sample_event_size(&sample, sample_type, read_format,
+                                          evsel.core.attr.branch_sample_type);
        bufsz = sz + 4096; /* Add a bit for overrun checking */
        event = malloc(bufsz);
        if (!event) {
@@ -324,7 +325,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
        event->header.size = sz;
 
        err = perf_event__synthesize_sample(event, sample_type, read_format,
-                                           &sample);
+                                           evsel.core.attr.branch_sample_type, &sample);
        if (err) {
                pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
                         "perf_event__synthesize_sample", sample_type, err);
index 2b31da231ef3ec84a37b5a314ab1fc1151dca907..31f05f46781092c16fa925475f3e55dd45b483de 100644 (file)
@@ -487,10 +487,30 @@ static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
        bstack->hw_idx = -1ULL;
 }
 
-static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
+static int arm_spe__inject_event(struct arm_spe *spe, union perf_event *event,
+                                struct perf_sample *sample, u64 type)
 {
-       event->header.size = perf_event__sample_event_size(sample, type, 0);
-       return perf_event__synthesize_sample(event, type, 0, sample);
+       struct evsel *evsel = sample->evsel;
+       u64 branch_sample_type = 0;
+       size_t sz;
+
+       if (!evsel && spe->session && spe->session->evlist)
+               evsel = evlist__id2evsel(spe->session->evlist, sample->id);
+
+       if (evsel)
+               branch_sample_type = evsel->core.attr.branch_sample_type;
+
+       event->header.type = PERF_RECORD_SAMPLE;
+       sz = perf_event__sample_event_size(sample, type, /*read_format=*/0,
+                                          branch_sample_type);
+       if (sz >= PERF_SAMPLE_MAX_SIZE) {
+               pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE);
+               return -EFAULT;
+       }
+       event->header.size = sz;
+
+       return perf_event__synthesize_sample(event, type, /*read_format=*/0,
+                                            branch_sample_type, sample);
 }
 
 static inline int
@@ -502,7 +522,7 @@ arm_spe_deliver_synth_event(struct arm_spe *spe,
        int ret;
 
        if (spe->synth_opts.inject) {
-               ret = arm_spe__inject_event(event, sample, spe->sample_type);
+               ret = arm_spe__inject_event(spe, event, sample, spe->sample_type);
                if (ret)
                        return ret;
        }
index 8a639d2e51a4c5bfdb0e4c42702061a790a3d01d..6ec48de29441012f3d827d50616349c6c0d1f037 100644 (file)
@@ -1422,11 +1422,29 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
                bs->nr += 1;
 }
 
-static int cs_etm__inject_event(union perf_event *event,
+static int cs_etm__inject_event(struct cs_etm_auxtrace *etm, union perf_event *event,
                               struct perf_sample *sample, u64 type)
 {
-       event->header.size = perf_event__sample_event_size(sample, type, 0);
-       return perf_event__synthesize_sample(event, type, 0, sample);
+       struct evsel *evsel = sample->evsel;
+       u64 branch_sample_type = 0;
+       size_t sz;
+
+       if (!evsel && etm->session && etm->session->evlist)
+               evsel = evlist__id2evsel(etm->session->evlist, sample->id);
+
+       if (evsel)
+               branch_sample_type = evsel->core.attr.branch_sample_type;
+
+       sz = perf_event__sample_event_size(sample, type, /*read_format=*/0,
+                                          branch_sample_type);
+       if (sz >= PERF_SAMPLE_MAX_SIZE) {
+               pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE);
+               return -EFAULT;
+       }
+       event->header.size = sz;
+
+       return perf_event__synthesize_sample(event, type, /*read_format=*/0,
+                                            branch_sample_type, sample);
 }
 
 
@@ -1592,7 +1610,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
                sample.branch_stack = tidq->last_branch;
 
        if (etm->synth_opts.inject) {
-               ret = cs_etm__inject_event(event, &sample,
+               ret = cs_etm__inject_event(etm, event, &sample,
                                           etm->instructions_sample_type);
                if (ret)
                        return ret;
@@ -1667,7 +1685,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
        }
 
        if (etm->synth_opts.inject) {
-               ret = cs_etm__inject_event(event, &sample,
+               ret = cs_etm__inject_event(etm, event, &sample,
                                           etm->branches_sample_type);
                if (ret)
                        return ret;
index 382255393fb3bf6b934c3485d5beab229eb36ad9..0b18ebd13f7c8456ddc3521a45750051902b5930 100644 (file)
@@ -303,7 +303,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
                event.sample.header.size = bts->branches_event_size;
                ret = perf_event__synthesize_sample(&event,
                                                    bts->branches_sample_type,
-                                                   0, &sample);
+                                                   /*read_format=*/0, /*branch_sample_type=*/0,
+                                                   &sample);
                if (ret)
                        return ret;
        }
index fc9eec8b54b824b9804e34f6dd9d10087d40acd9..dd2637678b405c08f37faaf5ee49d7c9b8c82e6a 100644 (file)
@@ -1728,11 +1728,30 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
        event->sample.header.misc = sample->cpumode;
 }
 
-static int intel_pt_inject_event(union perf_event *event,
+static int intel_pt_inject_event(struct intel_pt *pt, union perf_event *event,
                                 struct perf_sample *sample, u64 type)
 {
-       event->header.size = perf_event__sample_event_size(sample, type, 0);
-       return perf_event__synthesize_sample(event, type, 0, sample);
+       struct evsel *evsel = sample->evsel;
+       u64 branch_sample_type = 0;
+       size_t sz;
+
+       if (!evsel && pt->session && pt->session->evlist)
+               evsel = evlist__id2evsel(pt->session->evlist, sample->id);
+
+       if (evsel)
+               branch_sample_type = evsel->core.attr.branch_sample_type;
+
+       event->header.type = PERF_RECORD_SAMPLE;
+       sz = perf_event__sample_event_size(sample, type, /*read_format=*/0,
+                                          branch_sample_type);
+       if (sz >= PERF_SAMPLE_MAX_SIZE) {
+               pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE);
+               return -EFAULT;
+       }
+       event->header.size = sz;
+
+       return perf_event__synthesize_sample(event, type, /*read_format=*/0,
+                                            branch_sample_type, sample);
 }
 
 static inline int intel_pt_opt_inject(struct intel_pt *pt,
@@ -1742,7 +1761,7 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt,
        if (!pt->synth_opts.inject)
                return 0;
 
-       return intel_pt_inject_event(event, sample, type);
+       return intel_pt_inject_event(pt, event, sample, type);
 }
 
 static int intel_pt_deliver_synth_event(struct intel_pt *pt,
index 85bee747f4cd2a73c9133093c5bef0140a60c7bc..2461f25a4d7dc2d72d63ea320fb5c9597d15dc90 100644 (file)
@@ -1455,7 +1455,8 @@ int perf_event__synthesize_stat_round(const struct perf_tool *tool,
        return process(tool, (union perf_event *) &event, NULL, machine);
 }
 
-size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format)
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format,
+                                    u64 branch_sample_type)
 {
        size_t sz, result = sizeof(struct perf_record_sample);
 
@@ -1515,8 +1516,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 
        if (type & PERF_SAMPLE_BRANCH_STACK) {
                sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-               /* nr, hw_idx */
-               sz += 2 * sizeof(u64);
+               /* nr */
+               sz += sizeof(u64);
+               if (branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)
+                       sz += sizeof(u64);
                result += sz;
        }
 
@@ -1605,7 +1608,7 @@ static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
 }
 
 int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
-                                 const struct perf_sample *sample)
+                                 u64 branch_sample_type, const struct perf_sample *sample)
 {
        __u64 *array;
        size_t sz;
@@ -1719,9 +1722,17 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
 
        if (type & PERF_SAMPLE_BRANCH_STACK) {
                sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-               /* nr, hw_idx */
-               sz += 2 * sizeof(u64);
-               memcpy(array, sample->branch_stack, sz);
+
+               *array++ = sample->branch_stack->nr;
+
+               if (branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) {
+                       if (sample->no_hw_idx)
+                               *array++ = 0;
+                       else
+                               *array++ = sample->branch_stack->hw_idx;
+               }
+
+               memcpy(array, perf_sample__branch_entries((struct perf_sample *)sample), sz);
                array = (void *)array + sz;
        }
 
index b0edad0c310010b7029ae1a6e396106969645fd7..8c7f49f9ccf54a69952dc0b7cfe16c3c774301d0 100644 (file)
@@ -81,7 +81,8 @@ int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_
 int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
 int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
 int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
+                                 u64 branch_sample_type, const struct perf_sample *sample);
 int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
 int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
 int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
@@ -97,7 +98,8 @@ void perf_event__synthesize_final_bpf_metadata(struct perf_session *session,
 
 int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
 
-size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+                                    u64 read_format, u64 branch_sample_type);
 
 int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool,
                                  struct target *target, struct perf_thread_map *threads,