]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf header: Support CPU DOMAIN relation info
authorSwapnil Sapkal <swapnil.sapkal@amd.com>
Mon, 19 Jan 2026 17:58:24 +0000 (17:58 +0000)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 22 Jan 2026 00:55:09 +0000 (21:55 -0300)
The '/proc/schedstat' file gives info about load balancing statistics
within a given domain.

It also contains the cpu_mask giving information about the sibling cpus
and domain names after schedstat version 17.

Storing this information in perf header will help tools like `perf sched
stats` for better analysis.

Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anubhav Shelat <ashelat@redhat.com>
Cc: Ben Gainey <ben.gainey@arm.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Chun-Tse Shao <ctshao@google.com>
Cc: David Vernet <void@manifault.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Gautham Shenoy <gautham.shenoy@amd.com>
Cc: Graham Woodward <graham.woodward@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Shrikanth Hegde <sshegde@linux.ibm.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Yang Jihong <yangjihong@bytedance.com>
Cc: Yujie Liu <yujie.liu@intel.com>
Cc: Zhongqiu Han <quic_zhonhan@quicinc.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf.data-file-format.txt
tools/perf/builtin-inject.c
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/util.c
tools/perf/util/util.h

index c9d4dec6534409b3ce4d0df56e70a9e3ba0942d5..0e4d0ecc9e12511d73ec1d087b8975abf5068998 100644 (file)
@@ -447,6 +447,23 @@ struct {
        } [nr_pmu];
 };
 
+       HEADER_CPU_DOMAIN_INFO = 32,
+
+List of cpu-domain relation info. The format of the data is as below.
+
+struct domain_info {
+       int domain;
+       char dname[];
+       char cpumask[];
+       char cpulist[];
+};
+
+struct cpu_domain_info {
+       int cpu;
+       int nr_domains;
+       struct domain_info domains[];
+};
+
        other bits are reserved and should ignored for now
        HEADER_FEAT_BITS        = 256,
 
index e2a653280e1b83610497e12aef601b86c714189d..c89ac85ec112cd987047f97ef50118b9e1c82012 100644 (file)
@@ -2133,6 +2133,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
        case HEADER_CLOCK_DATA:
        case HEADER_HYBRID_TOPOLOGY:
        case HEADER_PMU_CAPS:
+       case HEADER_CPU_DOMAIN_INFO:
                return true;
        /* Information that can be updated */
        case HEADER_BUILD_ID:
index f1626d2032cdedd6810b332a6d2ad95ad6fe0839..93d475a80f14deb2989cd1b1b28536ccba0a1549 100644 (file)
@@ -216,6 +216,34 @@ static void perf_env__purge_bpf(struct perf_env *env __maybe_unused)
 }
 #endif // HAVE_LIBBPF_SUPPORT
 
+void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr)
+{
+       if (!cd_map)
+               return;
+
+       for (u32 i = 0; i < nr; i++) {
+               if (!cd_map[i])
+                       continue;
+
+               for (u32 j = 0; j < cd_map[i]->nr_domains; j++) {
+                       struct domain_info *d_info = cd_map[i]->domains[j];
+
+                       if (!d_info)
+                               continue;
+
+                       if (schedstat_version >= 17)
+                               zfree(&d_info->dname);
+
+                       zfree(&d_info->cpumask);
+                       zfree(&d_info->cpulist);
+                       zfree(&d_info);
+               }
+               zfree(&cd_map[i]->domains);
+               zfree(&cd_map[i]);
+       }
+       zfree(&cd_map);
+}
+
 void perf_env__exit(struct perf_env *env)
 {
        int i, j;
@@ -265,6 +293,7 @@ void perf_env__exit(struct perf_env *env)
                zfree(&env->pmu_caps[i].pmu_name);
        }
        zfree(&env->pmu_caps);
+       free_cpu_domain_info(env->cpu_domain, env->schedstat_version, env->nr_cpus_avail);
 }
 
 void perf_env__init(struct perf_env *env)
index 9977b85523a8c30a1c79008ce6263f822b8b6463..76ba1a36e9ff62432a458d1de95d55fb8b785d27 100644 (file)
@@ -54,6 +54,19 @@ struct pmu_caps {
        char            *pmu_name;
 };
 
+struct domain_info {
+       u32     domain;
+       char    *dname;
+       char    *cpumask;
+       char    *cpulist;
+};
+
+struct cpu_domain_map {
+       u32                     cpu;
+       u32                     nr_domains;
+       struct domain_info      **domains;
+};
+
 typedef const char *(arch_syscalls__strerrno_t)(int err);
 
 struct perf_env {
@@ -70,6 +83,8 @@ struct perf_env {
        unsigned int            max_branches;
        unsigned int            br_cntr_nr;
        unsigned int            br_cntr_width;
+       unsigned int            schedstat_version;
+       unsigned int            max_sched_domains;
        int                     kernel_is_64_bit;
 
        int                     nr_cmdline;
@@ -92,6 +107,7 @@ struct perf_env {
        char                    **cpu_pmu_caps;
        struct cpu_topology_map *cpu;
        struct cpu_cache_level  *caches;
+       struct cpu_domain_map   **cpu_domain;
        int                      caches_cnt;
        u32                     comp_ratio;
        u32                     comp_ver;
@@ -151,6 +167,7 @@ struct bpf_prog_info_node;
 struct btf_node;
 
 int perf_env__read_core_pmu_caps(struct perf_env *env);
+void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr);
 void perf_env__exit(struct perf_env *env);
 
 int perf_env__kernel_is_64_bit(struct perf_env *env);
index f5cad377c99ef7f49f8c4476d8590947dd673fd5..673d53bb2a2c59d8c08508b76dd69bd261ed95a0 100644 (file)
@@ -1614,6 +1614,162 @@ static int write_pmu_caps(struct feat_fd *ff,
        return 0;
 }
 
+static struct cpu_domain_map **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains,
+                                                   u32 nr)
+{
+       struct domain_info *domain_info;
+       struct cpu_domain_map **cd_map;
+       char dname[16], cpumask[256];
+       char cpulist[1024];
+       char *line = NULL;
+       u32 cpu, domain;
+       u32 dcount = 0;
+       size_t len;
+       FILE *fp;
+
+       fp = fopen("/proc/schedstat", "r");
+       if (!fp) {
+               pr_err("Failed to open /proc/schedstat\n");
+               return NULL;
+       }
+
+       cd_map = zalloc(sizeof(*cd_map) * nr);
+       if (!cd_map)
+               goto out;
+
+       while (getline(&line, &len, fp) > 0) {
+               int retval;
+
+               if (strncmp(line, "version", 7) == 0) {
+                       retval = sscanf(line, "version %d\n", schedstat_version);
+                       if (retval != 1)
+                               continue;
+
+               } else if (strncmp(line, "cpu", 3) == 0) {
+                       retval = sscanf(line, "cpu%u %*s", &cpu);
+                       if (retval == 1) {
+                               cd_map[cpu] = zalloc(sizeof(*cd_map[cpu]));
+                               if (!cd_map[cpu])
+                                       goto out_free_line;
+                               cd_map[cpu]->cpu = cpu;
+                       } else
+                               continue;
+
+                       dcount = 0;
+               } else if (strncmp(line, "domain", 6) == 0) {
+                       struct domain_info **temp_domains;
+
+                       dcount++;
+                       temp_domains = realloc(cd_map[cpu]->domains, dcount * sizeof(domain_info));
+                       if (!temp_domains)
+                               goto out_free_line;
+                       else
+                               cd_map[cpu]->domains = temp_domains;
+
+                       domain_info = zalloc(sizeof(*domain_info));
+                       if (!domain_info)
+                               goto out_free_line;
+
+                       cd_map[cpu]->domains[dcount - 1] = domain_info;
+
+                       if (*schedstat_version >= 17) {
+                               retval = sscanf(line, "domain%u %s %s %*s", &domain, dname,
+                                               cpumask);
+                               if (retval != 3)
+                                       continue;
+
+                               domain_info->dname = strdup(dname);
+                               if (!domain_info->dname)
+                                       goto out_free_line;
+                       } else {
+                               retval = sscanf(line, "domain%u %s %*s", &domain, cpumask);
+                               if (retval != 2)
+                                       continue;
+                       }
+
+                       domain_info->domain = domain;
+                       if (domain > *max_sched_domains)
+                               *max_sched_domains = domain;
+
+                       domain_info->cpumask = strdup(cpumask);
+                       if (!domain_info->cpumask)
+                               goto out_free_line;
+
+                       cpumask_to_cpulist(cpumask, cpulist);
+                       domain_info->cpulist = strdup(cpulist);
+                       if (!domain_info->cpulist)
+                               goto out_free_line;
+
+                       cd_map[cpu]->nr_domains = dcount;
+               }
+       }
+
+out_free_line:
+       free(line);
+out:
+       fclose(fp);
+       return cd_map;
+}
+
+static int write_cpu_domain_info(struct feat_fd *ff,
+                                struct evlist *evlist __maybe_unused)
+{
+       u32 max_sched_domains = 0, schedstat_version = 0;
+       struct cpu_domain_map **cd_map;
+       u32 i, j, nr, ret;
+
+       nr = cpu__max_present_cpu().cpu;
+
+       cd_map = build_cpu_domain_map(&schedstat_version, &max_sched_domains, nr);
+       if (!cd_map)
+               return -1;
+
+       ret = do_write(ff, &schedstat_version, sizeof(u32));
+       if (ret < 0)
+               goto out;
+
+       max_sched_domains += 1;
+       ret = do_write(ff, &max_sched_domains, sizeof(u32));
+       if (ret < 0)
+               goto out;
+
+       for (i = 0; i < nr; i++) {
+               if (!cd_map[i])
+                       continue;
+
+               ret = do_write(ff, &cd_map[i]->cpu, sizeof(u32));
+               if (ret < 0)
+                       goto out;
+
+               ret = do_write(ff, &cd_map[i]->nr_domains, sizeof(u32));
+               if (ret < 0)
+                       goto out;
+
+               for (j = 0; j < cd_map[i]->nr_domains; j++) {
+                       ret = do_write(ff, &cd_map[i]->domains[j]->domain, sizeof(u32));
+                       if (ret < 0)
+                               goto out;
+                       if (schedstat_version >= 17) {
+                               ret = do_write_string(ff, cd_map[i]->domains[j]->dname);
+                               if (ret < 0)
+                                       goto out;
+                       }
+
+                       ret = do_write_string(ff, cd_map[i]->domains[j]->cpumask);
+                       if (ret < 0)
+                               goto out;
+
+                       ret = do_write_string(ff, cd_map[i]->domains[j]->cpulist);
+                       if (ret < 0)
+                               goto out;
+               }
+       }
+
+out:
+       free_cpu_domain_info(cd_map, schedstat_version, nr);
+       return ret;
+}
+
 static void print_hostname(struct feat_fd *ff, FILE *fp)
 {
        fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -2247,6 +2403,39 @@ static void print_mem_topology(struct feat_fd *ff, FILE *fp)
        }
 }
 
+static void print_cpu_domain_info(struct feat_fd *ff, FILE *fp)
+{
+       struct cpu_domain_map **cd_map = ff->ph->env.cpu_domain;
+       u32 nr = ff->ph->env.nr_cpus_avail;
+       struct domain_info *d_info;
+       u32 i, j;
+
+       fprintf(fp, "# schedstat version        : %u\n", ff->ph->env.schedstat_version);
+       fprintf(fp, "# Maximum sched domains    : %u\n", ff->ph->env.max_sched_domains);
+
+       for (i = 0; i < nr; i++) {
+               if (!cd_map[i])
+                       continue;
+
+               fprintf(fp, "# cpu              : %u\n", cd_map[i]->cpu);
+               fprintf(fp, "# nr_domains       : %u\n", cd_map[i]->nr_domains);
+
+               for (j = 0; j < cd_map[i]->nr_domains; j++) {
+                       d_info = cd_map[i]->domains[j];
+                       if (!d_info)
+                               continue;
+
+                       fprintf(fp, "# Domain           : %u\n", d_info->domain);
+
+                       if (ff->ph->env.schedstat_version >= 17)
+                               fprintf(fp, "# Domain name      : %s\n", d_info->dname);
+
+                       fprintf(fp, "# Domain cpu map   : %s\n", d_info->cpumask);
+                       fprintf(fp, "# Domain cpu list  : %s\n", d_info->cpulist);
+               }
+       }
+}
+
 static int __event_process_build_id(struct perf_record_header_build_id *bev,
                                    char *filename,
                                    struct perf_session *session)
@@ -3388,6 +3577,102 @@ err:
        return ret;
 }
 
+static int process_cpu_domain_info(struct feat_fd *ff, void *data __maybe_unused)
+{
+       u32 schedstat_version, max_sched_domains, cpu, domain, nr_domains;
+       struct perf_env *env = &ff->ph->env;
+       char *dname, *cpumask, *cpulist;
+       struct cpu_domain_map **cd_map;
+       struct domain_info *d_info;
+       u32 nra, nr, i, j;
+       int ret;
+
+       nra = env->nr_cpus_avail;
+       nr = env->nr_cpus_online;
+
+       cd_map = zalloc(sizeof(*cd_map) * nra);
+       if (!cd_map)
+               return -1;
+
+       env->cpu_domain = cd_map;
+
+       ret = do_read_u32(ff, &schedstat_version);
+       if (ret)
+               return ret;
+
+       env->schedstat_version = schedstat_version;
+
+       ret = do_read_u32(ff, &max_sched_domains);
+       if (ret)
+               return ret;
+
+       env->max_sched_domains = max_sched_domains;
+
+       for (i = 0; i < nr; i++) {
+               if (do_read_u32(ff, &cpu))
+                       return -1;
+
+               cd_map[cpu] = zalloc(sizeof(*cd_map[cpu]));
+               if (!cd_map[cpu])
+                       return -1;
+
+               cd_map[cpu]->cpu = cpu;
+
+               if (do_read_u32(ff, &nr_domains))
+                       return -1;
+
+               cd_map[cpu]->nr_domains = nr_domains;
+
+               cd_map[cpu]->domains = zalloc(sizeof(*d_info) * max_sched_domains);
+               if (!cd_map[cpu]->domains)
+                       return -1;
+
+               for (j = 0; j < nr_domains; j++) {
+                       if (do_read_u32(ff, &domain))
+                               return -1;
+
+                       d_info = zalloc(sizeof(*d_info));
+                       if (!d_info)
+                               return -1;
+
+                       cd_map[cpu]->domains[domain] = d_info;
+                       d_info->domain = domain;
+
+                       if (schedstat_version >= 17) {
+                               dname = do_read_string(ff);
+                               if (!dname)
+                                       return -1;
+
+                               d_info->dname = zalloc(strlen(dname) + 1);
+                               if (!d_info->dname)
+                                       return -1;
+
+                               d_info->dname = strdup(dname);
+                       }
+
+                       cpumask = do_read_string(ff);
+                       if (!cpumask)
+                               return -1;
+
+                       d_info->cpumask = zalloc(strlen(cpumask) + 1);
+                       if (!d_info->cpumask)
+                               return -1;
+                       d_info->cpumask = strdup(cpumask);
+
+                       cpulist = do_read_string(ff);
+                       if (!cpulist)
+                               return -1;
+
+                       d_info->cpulist = zalloc(strlen(cpulist) + 1);
+                       if (!d_info->cpulist)
+                               return -1;
+                       d_info->cpulist = strdup(cpulist);
+               }
+       }
+
+       return ret;
+}
+
 #define FEAT_OPR(n, func, __full_only) \
        [HEADER_##n] = {                                        \
                .name       = __stringify(n),                   \
@@ -3453,6 +3738,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPR(CLOCK_DATA,    clock_data,     false),
        FEAT_OPN(HYBRID_TOPOLOGY,       hybrid_topology,        true),
        FEAT_OPR(PMU_CAPS,      pmu_caps,       false),
+       FEAT_OPR(CPU_DOMAIN_INFO,       cpu_domain_info,        true),
 };
 
 struct header_print_data {
index c058021c3150b3d500d4f9beda2a0eb247d29dbb..c62f3275a80f629308bc398ed2f865fda9f94fee 100644 (file)
@@ -53,6 +53,7 @@ enum {
        HEADER_CLOCK_DATA,
        HEADER_HYBRID_TOPOLOGY,
        HEADER_PMU_CAPS,
+       HEADER_CPU_DOMAIN_INFO,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };
index 0f031eb80b4c5b816e01fa34f92bc4449ae8d665..b87ff96a9f45e8b27e236e2368476d8c0b953e57 100644 (file)
@@ -257,6 +257,48 @@ static int rm_rf_kcore_dir(const char *path)
        return 0;
 }
 
+void cpumask_to_cpulist(char *cpumask, char *cpulist)
+{
+       int i, j, bm_size, nbits;
+       int len = strlen(cpumask);
+       unsigned long *bm;
+       char cpus[1024];
+
+       for (i = 0; i < len; i++) {
+               if (cpumask[i] == ',') {
+                       for (j = i; j < len; j++)
+                               cpumask[j] = cpumask[j + 1];
+               }
+       }
+
+       len = strlen(cpumask);
+       bm_size = (len + 15) / 16;
+       nbits = bm_size * 64;
+       if (nbits <= 0)
+               return;
+
+       bm = calloc(bm_size, sizeof(unsigned long));
+       if (!cpumask)
+               goto free_bm;
+
+       for (i = 0; i < bm_size; i++) {
+               char blk[17];
+               int blklen = len > 16 ? 16 : len;
+
+               strncpy(blk, cpumask + len - blklen, blklen);
+               blk[blklen] = '\0';
+               bm[i] = strtoul(blk, NULL, 16);
+               cpumask[len - blklen] = '\0';
+               len = strlen(cpumask);
+       }
+
+       bitmap_scnprintf(bm, nbits, cpus, sizeof(cpus));
+       strcpy(cpulist, cpus);
+
+free_bm:
+       free(bm);
+}
+
 int rm_rf_perf_data(const char *path)
 {
        const char *pat[] = {
index 3423778e39a568ac70e128e944aa48b1f7065ac3..1572c8cf04e59a88974aa5d8070ce123b8c3930d 100644 (file)
@@ -11,6 +11,7 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <linux/compiler.h>
+#include <linux/bitmap.h>
 #include <sys/types.h>
 #ifndef __cplusplus
 #include <internal/cpumap.h>
@@ -48,6 +49,8 @@ bool sysctl__nmi_watchdog_enabled(void);
 
 int perf_tip(char **strp, const char *dirpath);
 
+void cpumask_to_cpulist(char *cpumask, char *cpulist);
+
 #ifndef HAVE_SCHED_GETCPU_SUPPORT
 int sched_getcpu(void);
 #endif