]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf session: Bound nr_cpus_avail and validate sample CPU
authorArnaldo Carvalho de Melo <acme@redhat.com>
Sat, 2 May 2026 17:55:43 +0000 (14:55 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 29 May 2026 14:44:35 +0000 (11:44 -0300)
Several downstream consumers (timechart, kwork, sched) use fixed-size
arrays indexed by CPU.  A crafted perf.data can supply arbitrary CPU
values that index past these arrays, causing out-of-bounds access.

Validate sample.cpu against min(nr_cpus_avail, MAX_NR_CPUS) in
perf_session__deliver_event() before any tool callback runs.  The
cap at MAX_NR_CPUS protects fixed-size downstream arrays; the true
nr_cpus_avail is preserved in env for header parsing (e.g.
process_cpu_topology) which needs the real count.

Fall back to MAX_NR_CPUS when HEADER_NRCPUS is missing (truncated
files, pipe mode, pre-2017 perf).

Only validate when PERF_SAMPLE_CPU is set in sample_type — when
absent, evsel__parse_sample() leaves sample.cpu as (u32)-1, a
sentinel that downstream tools (script, inject) check to identify
events without CPU info.  Clamping it to 0 would break those checks.

Inline evlist__parse_sample() into perf_session__deliver_event()
so the evsel lookup needed for sample_type checking reuses the same
evsel that parsed the sample, avoiding a second evlist__event2evsel()
call on every event.

For pipe-mode streams where HEADER_NRCPUS may arrive late or not at
all, the MAX_NR_CPUS fallback ensures the bounds check is still
effective against the fixed-size downstream arrays.

Reported-by: sashiko-bot@kernel.org # Running on a local machine
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Assisted-by: Claude:claude-opus-4.6-1m
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/header.c
tools/perf/util/session.c

index f771a76321c10a0286dc7382b0679def4062627f..5b1fa1653d2a48ccacd154e241083269f66ca40e 100644 (file)
@@ -48,6 +48,7 @@
 #include <api/io_dir.h>
 #include "asm/bug.h"
 #include "tool.h"
+#include "../perf.h"
 #include "time-utils.h"
 #include "units.h"
 #include "util/util.h" // perf_exe()
@@ -2895,6 +2896,17 @@ static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
        if (ret)
                return ret;
 
+       /*
+        * Cap at 1M CPUs — generous for any real system but prevents
+        * stack overflow from VLA allocations sized by nr_cpus_avail
+        * (e.g. DECLARE_BITMAP in builtin-c2c.c node_entry()).
+        */
+       if (nr_cpus_avail > (1U << 20)) {
+               pr_err("Invalid HEADER_NRCPUS: nr_cpus_avail (%u) exceeds maximum (%u)\n",
+                      nr_cpus_avail, 1U << 20);
+               return -1;
+       }
+
        if (nr_cpus_online > nr_cpus_avail) {
                pr_err("Invalid HEADER_NRCPUS: nr_cpus_online (%u) > nr_cpus_avail (%u)\n",
                       nr_cpus_online, nr_cpus_avail);
@@ -5250,6 +5262,24 @@ int perf_session__read_header(struct perf_session *session)
 #endif
        }
 
+       /*
+        * Without nr_cpus_avail the sample CPU bounds check in
+        * perf_session__deliver_event() is bypassed, allowing crafted
+        * CPU IDs to reach downstream consumers that index fixed-size
+        * arrays (timechart, kwork, sched — all sized MAX_NR_CPUS).
+        *
+        * This can happen with truncated files (interrupted recording
+        * loses all feature sections), very old files that predate
+        * HEADER_NRCPUS, or crafted files that omit it.  Fall back to
+        * MAX_NR_CPUS so the bounds check is still effective — any
+        * CPU ID below that limit is safe for all downstream arrays.
+        */
+       if (header->env.nr_cpus_avail == 0) {
+               header->env.nr_cpus_avail = MAX_NR_CPUS;
+               pr_warning("WARNING: perf.data is missing HEADER_NRCPUS, using MAX_NR_CPUS (%d) as CPU bound\n",
+                          MAX_NR_CPUS);
+       }
+
        return 0;
 out_errno:
        return -errno;
index 9271885e3920f897c075c34209c868cc41177bcc..6de665d3c9054179ed1e2a763618b16bbb5251f9 100644 (file)
@@ -2110,14 +2110,100 @@ static int perf_session__deliver_event(struct perf_session *session,
                                       const char *file_path)
 {
        struct perf_sample sample;
+       struct evsel *evsel;
        int ret;
 
        perf_sample__init(&sample, /*all=*/false);
-       ret = evlist__parse_sample(session->evlist, event, &sample);
+       evsel = evlist__event2evsel(session->evlist, event);
+       if (!evsel) {
+               pr_err("No evsel found for event type %u\n",
+                      event->header.type);
+               ret = -EFAULT;
+               goto out;
+       }
+       ret = evsel__parse_sample(evsel, event, &sample);
        if (ret) {
                pr_err("Can't parse sample, err = %d\n", ret);
                goto out;
        }
+       /*
+        * evsel__parse_sample() doesn't populate machine_pid/vcpu,
+        * which are needed by machines__find_for_cpumode() to
+        * attribute samples to guest VMs.  The SID table maps
+        * sample IDs to the guest that owns the event.
+        */
+       if (perf_guest && sample.id) {
+               struct perf_sample_id *sid = evlist__id2sid(session->evlist, sample.id);
+
+               if (sid) {
+                       sample.machine_pid = sid->machine_pid;
+                       sample.vcpu = sid->vcpu.cpu;
+               }
+       }
+
+       /*
+        * Validate sample.cpu before any callback can use it as an
+        * array index (kwork cpus_runtime, timechart cpus_cstate_*,
+        * sched cpu_last_switched).
+        *
+        * When PERF_SAMPLE_CPU is absent, evsel__parse_sample() leaves
+        * sample.cpu as (u32)-1 — a sentinel that downstream tools
+        * (script, inject) check to identify events without CPU info.
+        * Only check when sample.cpu was actually populated from event
+        * data: PERF_RECORD_SAMPLE always has it when PERF_SAMPLE_CPU
+        * is set; non-sample events only have it when sample_id_all is
+        * enabled.  Otherwise sample.cpu is the (u32)-1 sentinel from
+        * evsel__parse_sample() and must not be validated or clamped.
+        */
+       if ((evsel->core.attr.sample_type & PERF_SAMPLE_CPU) &&
+           (event->header.type == PERF_RECORD_SAMPLE ||
+            evsel->core.attr.sample_id_all)) {
+               int nr_cpus_avail = perf_session__env(session)->nr_cpus_avail;
+
+               /*
+                * For perf.data files the MAX_NR_CPUS fallback in
+                * perf_session__read_header() guarantees this is set.
+                * For pipe mode, HEADER_NRCPUS may arrive late or not
+                * at all (pre-2017 perf, third-party tools).  Fall
+                * back to MAX_NR_CPUS so the bounds check still works
+                * against fixed-size downstream arrays.
+                *
+                * Do NOT write back to env: this function runs during
+                * recording (synthesized events) when nr_cpus_avail is
+                * legitimately 0.  Writing MAX_NR_CPUS would cause
+                * write_cpu_topology() to emit 4096 core_id/socket_id
+                * pairs instead of the real CPU count, corrupting the
+                * topology section in the generated perf.data.
+                */
+               if (nr_cpus_avail <= 0)
+                       nr_cpus_avail = MAX_NR_CPUS;
+               /*
+                * Cap at MAX_NR_CPUS for the bounds check — downstream
+                * consumers use fixed-size arrays of that size.  Keep
+                * the true nr_cpus_avail in env for header parsing
+                * (e.g. process_cpu_topology) which needs the real count.
+                */
+               if (nr_cpus_avail > MAX_NR_CPUS)
+                       nr_cpus_avail = MAX_NR_CPUS;
+               if (sample.cpu >= (u32)nr_cpus_avail &&
+                   sample.cpu != (u32)-1) {
+                       /*
+                        * Warn rather than abort: synthesized events
+                        * (MMAP, COMM) lack sample_id_all data, so
+                        * parse_id_sample reads garbage from the event
+                        * payload.  Clamping to 0 protects downstream
+                        * array indexing while keeping the session alive.
+                        *
+                        * Preserve (u32)-1: perf script and perf inject
+                        * use it as a sentinel for "CPU not applicable."
+                        * Downstream array users (timechart, kwork) have
+                        * their own per-callback bounds checks.
+                        */
+                       pr_warning_once("WARNING: sample CPU %u >= nr_cpus_avail %u, clamping to 0\n",
+                                       sample.cpu, nr_cpus_avail);
+                       sample.cpu = 0;
+               }
+       }
 
        ret = auxtrace__process_event(session, event, &sample, tool);
        if (ret < 0)