From: Howard Chu Date: Thu, 1 May 2025 02:28:06 +0000 (-0700) Subject: perf record --off-cpu: Dump the remaining PERF_SAMPLE_ in sample_type from BPF's... X-Git-Tag: v6.16-rc1~57^2~95 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=74069a01609ef0f499fa6bf89bb1b017eba16b0d;p=thirdparty%2Flinux.git perf record --off-cpu: Dump the remaining PERF_SAMPLE_ in sample_type from BPF's stack trace map Dump the remaining PERF_SAMPLE_ data, as if it is dumping a direct sample. Put the stack trace, tid, off-cpu time and cgroup id into the raw_data section, just like a direct off-cpu sample coming from BPF's bpf_perf_event_output(). This ensures that evsel__parse_sample() correctly parses both direct samples and accumulated samples. Suggested-by: Namhyung Kim Reviewed-by: Ian Rogers Signed-off-by: Howard Chu Tested-by: Arnaldo Carvalho de Melo Tested-by: Gautam Menghani Tested-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20241108204137.2444151-10-howardchu95@gmail.com Link: https://lore.kernel.org/r/20250501022809.449767-9-howardchu95@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 1975a02c16830..c7fde66bb8f95 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -37,6 +37,8 @@ union off_cpu_data { u64 array[1024 / sizeof(u64)]; }; +u64 off_cpu_raw[MAX_STACKS + 5]; + static int off_cpu_config(struct evlist *evlist) { char off_cpu_event[64]; @@ -313,6 +315,7 @@ int off_cpu_write(struct perf_session *session) { int bytes = 0, size; int fd, stack; + u32 raw_size; u64 sample_type, val, sid = 0; struct evsel *evsel; struct perf_data_file *file = &session->data->file; @@ -352,46 +355,54 @@ int off_cpu_write(struct perf_session *session) while (!bpf_map_get_next_key(fd, &prev, &key)) { int n = 1; /* start from perf_event_header */ - int ip_pos = -1; bpf_map_lookup_elem(fd, &key, &val); + /* zero-fill some of the fields, will be overwritten by raw_data when parsing */ if (sample_type & PERF_SAMPLE_IDENTIFIER) data.array[n++] = sid; - if (sample_type & PERF_SAMPLE_IP) { - ip_pos = n; + if (sample_type & PERF_SAMPLE_IP) data.array[n++] = 0; /* will be updated */ - } if (sample_type & PERF_SAMPLE_TID) - data.array[n++] = (u64)key.pid << 32 | key.tgid; + data.array[n++] = 0; if (sample_type & PERF_SAMPLE_TIME) data.array[n++] = tstamp; - if (sample_type & PERF_SAMPLE_ID) - data.array[n++] = sid; if (sample_type & PERF_SAMPLE_CPU) data.array[n++] = 0; if (sample_type & PERF_SAMPLE_PERIOD) - data.array[n++] = val; - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int len = 0; - - /* data.array[n] is callchain->nr (updated later) */ - data.array[n + 1] = PERF_CONTEXT_USER; - data.array[n + 2] = 0; - - bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); - while (data.array[n + 2 + len]) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_RAW) { + /* + * [ size ][ data ] + * [ data ] + * [ data ] + * [ data ] + * [ data ][ empty] + */ + int len = 0, i = 0; + void *raw_data = (void *)data.array + n * sizeof(u64); + + off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid; + off_cpu_raw[i++] = val; + + /* off_cpu_raw[i] is callchain->nr (updated later) */ + off_cpu_raw[i + 1] = PERF_CONTEXT_USER; + off_cpu_raw[i + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]); + while (off_cpu_raw[i + 2 + len]) len++; - /* update length of callchain */ - data.array[n] = len + 1; + off_cpu_raw[i] = len + 1; + i += len + 2; + + off_cpu_raw[i++] = key.cgroup_id; - /* update sample ip with the first callchain entry */ - if (ip_pos >= 0) - data.array[ip_pos] = data.array[n + 2]; + raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */ + memcpy(raw_data, &raw_size, sizeof(raw_size)); + memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64)); - /* calculate sample callchain data array length */ - n += len + 2; + n += i + 1; } if (sample_type & PERF_SAMPLE_CGROUP) data.array[n++] = key.cgroup_id;