From: Swapnil Sapkal Date: Mon, 19 Jan 2026 17:58:24 +0000 (+0000) Subject: perf header: Support CPU DOMAIN relation info X-Git-Tag: v7.0-rc1~16^2~152 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d40c68a49f69c9bdb4ca14b3e6a0422bbaeb5d8f;p=thirdparty%2Flinux.git perf header: Support CPU DOMAIN relation info The '/proc/schedstat' file gives info about load balancing statistics within a given domain. It also contains the cpu_mask giving information about the sibling cpus and domain names after schedstat version 17. Storing this information in perf header will help tools like `perf sched stats` for better analysis. Signed-off-by: Swapnil Sapkal Tested-by: Chen Yu Acked-by: Ian Rogers Acked-by: Namhyung Kim Acked-by: Peter Zijlstra Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Anubhav Shelat Cc: Ben Gainey Cc: Blake Jones Cc: Chun-Tse Shao Cc: David Vernet Cc: Dmitriy Vyukov Cc: Dr. David Alan Gilbert Cc: Gautham Shenoy Cc: Graham Woodward Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Juri Lelli Cc: K Prateek Nayak Cc: Kan Liang Cc: Leo Yan Cc: Madadi Vineeth Reddy Cc: Mark Rutland Cc: Ravi Bangoria Cc: Sandipan Das Cc: Santosh Shukla Cc: Shrikanth Hegde Cc: Steven Rostedt (VMware) Cc: Tejun Heo Cc: Thomas Falcon Cc: Tim Chen Cc: Vincent Guittot Cc: Yang Jihong Cc: Yujie Liu Cc: Zhongqiu Han Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index c9d4dec65344..0e4d0ecc9e12 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -447,6 +447,23 @@ struct { } [nr_pmu]; }; + HEADER_CPU_DOMAIN_INFO = 32, + +List of cpu-domain relation info. The format of the data is as below. + +struct domain_info { + int domain; + char dname[]; + char cpumask[]; + char cpulist[]; +}; + +struct cpu_domain_info { + int cpu; + int nr_domains; + struct domain_info domains[]; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index e2a653280e1b..c89ac85ec112 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2133,6 +2133,7 @@ static bool keep_feat(struct perf_inject *inject, int feat) case HEADER_CLOCK_DATA: case HEADER_HYBRID_TOPOLOGY: case HEADER_PMU_CAPS: + case HEADER_CPU_DOMAIN_INFO: return true; /* Information that can be updated */ case HEADER_BUILD_ID: diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index f1626d2032cd..93d475a80f14 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -216,6 +216,34 @@ static void perf_env__purge_bpf(struct perf_env *env __maybe_unused) } #endif // HAVE_LIBBPF_SUPPORT +void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr) +{ + if (!cd_map) + return; + + for (u32 i = 0; i < nr; i++) { + if (!cd_map[i]) + continue; + + for (u32 j = 0; j < cd_map[i]->nr_domains; j++) { + struct domain_info *d_info = cd_map[i]->domains[j]; + + if (!d_info) + continue; + + if (schedstat_version >= 17) + zfree(&d_info->dname); + + zfree(&d_info->cpumask); + zfree(&d_info->cpulist); + zfree(&d_info); + } + zfree(&cd_map[i]->domains); + zfree(&cd_map[i]); + } + zfree(&cd_map); +} + void perf_env__exit(struct perf_env *env) { int i, j; @@ -265,6 +293,7 @@ void perf_env__exit(struct perf_env *env) zfree(&env->pmu_caps[i].pmu_name); } zfree(&env->pmu_caps); + free_cpu_domain_info(env->cpu_domain, env->schedstat_version, env->nr_cpus_avail); } void perf_env__init(struct perf_env *env) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 9977b85523a8..76ba1a36e9ff 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -54,6 +54,19 @@ struct pmu_caps { char *pmu_name; }; +struct domain_info { + u32 domain; + char *dname; + char *cpumask; + char *cpulist; +}; + +struct cpu_domain_map { + u32 cpu; + u32 nr_domains; + struct domain_info **domains; +}; + typedef const char *(arch_syscalls__strerrno_t)(int err); struct perf_env { @@ -70,6 +83,8 @@ struct perf_env { unsigned int max_branches; unsigned int br_cntr_nr; unsigned int br_cntr_width; + unsigned int schedstat_version; + unsigned int max_sched_domains; int kernel_is_64_bit; int nr_cmdline; @@ -92,6 +107,7 @@ struct perf_env { char **cpu_pmu_caps; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; + struct cpu_domain_map **cpu_domain; int caches_cnt; u32 comp_ratio; u32 comp_ver; @@ -151,6 +167,7 @@ struct bpf_prog_info_node; struct btf_node; int perf_env__read_core_pmu_caps(struct perf_env *env); +void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr); void perf_env__exit(struct perf_env *env); int perf_env__kernel_is_64_bit(struct perf_env *env); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f5cad377c99e..673d53bb2a2c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1614,6 +1614,162 @@ static int write_pmu_caps(struct feat_fd *ff, return 0; } +static struct cpu_domain_map **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains, + u32 nr) +{ + struct domain_info *domain_info; + struct cpu_domain_map **cd_map; + char dname[16], cpumask[256]; + char cpulist[1024]; + char *line = NULL; + u32 cpu, domain; + u32 dcount = 0; + size_t len; + FILE *fp; + + fp = fopen("/proc/schedstat", "r"); + if (!fp) { + pr_err("Failed to open /proc/schedstat\n"); + return NULL; + } + + cd_map = zalloc(sizeof(*cd_map) * nr); + if (!cd_map) + goto out; + + while (getline(&line, &len, fp) > 0) { + int retval; + + if (strncmp(line, "version", 7) == 0) { + retval = sscanf(line, "version %d\n", schedstat_version); + if (retval != 1) + continue; + + } else if (strncmp(line, "cpu", 3) == 0) { + retval = sscanf(line, "cpu%u %*s", &cpu); + if (retval == 1) { + cd_map[cpu] = zalloc(sizeof(*cd_map[cpu])); + if (!cd_map[cpu]) + goto out_free_line; + cd_map[cpu]->cpu = cpu; + } else + continue; + + dcount = 0; + } else if (strncmp(line, "domain", 6) == 0) { + struct domain_info **temp_domains; + + dcount++; + temp_domains = realloc(cd_map[cpu]->domains, dcount * sizeof(domain_info)); + if (!temp_domains) + goto out_free_line; + else + cd_map[cpu]->domains = temp_domains; + + domain_info = zalloc(sizeof(*domain_info)); + if (!domain_info) + goto out_free_line; + + cd_map[cpu]->domains[dcount - 1] = domain_info; + + if (*schedstat_version >= 17) { + retval = sscanf(line, "domain%u %s %s %*s", &domain, dname, + cpumask); + if (retval != 3) + continue; + + domain_info->dname = strdup(dname); + if (!domain_info->dname) + goto out_free_line; + } else { + retval = sscanf(line, "domain%u %s %*s", &domain, cpumask); + if (retval != 2) + continue; + } + + domain_info->domain = domain; + if (domain > *max_sched_domains) + *max_sched_domains = domain; + + domain_info->cpumask = strdup(cpumask); + if (!domain_info->cpumask) + goto out_free_line; + + cpumask_to_cpulist(cpumask, cpulist); + domain_info->cpulist = strdup(cpulist); + if (!domain_info->cpulist) + goto out_free_line; + + cd_map[cpu]->nr_domains = dcount; + } + } + +out_free_line: + free(line); +out: + fclose(fp); + return cd_map; +} + +static int write_cpu_domain_info(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + u32 max_sched_domains = 0, schedstat_version = 0; + struct cpu_domain_map **cd_map; + u32 i, j, nr, ret; + + nr = cpu__max_present_cpu().cpu; + + cd_map = build_cpu_domain_map(&schedstat_version, &max_sched_domains, nr); + if (!cd_map) + return -1; + + ret = do_write(ff, &schedstat_version, sizeof(u32)); + if (ret < 0) + goto out; + + max_sched_domains += 1; + ret = do_write(ff, &max_sched_domains, sizeof(u32)); + if (ret < 0) + goto out; + + for (i = 0; i < nr; i++) { + if (!cd_map[i]) + continue; + + ret = do_write(ff, &cd_map[i]->cpu, sizeof(u32)); + if (ret < 0) + goto out; + + ret = do_write(ff, &cd_map[i]->nr_domains, sizeof(u32)); + if (ret < 0) + goto out; + + for (j = 0; j < cd_map[i]->nr_domains; j++) { + ret = do_write(ff, &cd_map[i]->domains[j]->domain, sizeof(u32)); + if (ret < 0) + goto out; + if (schedstat_version >= 17) { + ret = do_write_string(ff, cd_map[i]->domains[j]->dname); + if (ret < 0) + goto out; + } + + ret = do_write_string(ff, cd_map[i]->domains[j]->cpumask); + if (ret < 0) + goto out; + + ret = do_write_string(ff, cd_map[i]->domains[j]->cpulist); + if (ret < 0) + goto out; + } + } + +out: + free_cpu_domain_info(cd_map, schedstat_version, nr); + return ret; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -2247,6 +2403,39 @@ static void print_mem_topology(struct feat_fd *ff, FILE *fp) } } +static void print_cpu_domain_info(struct feat_fd *ff, FILE *fp) +{ + struct cpu_domain_map **cd_map = ff->ph->env.cpu_domain; + u32 nr = ff->ph->env.nr_cpus_avail; + struct domain_info *d_info; + u32 i, j; + + fprintf(fp, "# schedstat version : %u\n", ff->ph->env.schedstat_version); + fprintf(fp, "# Maximum sched domains : %u\n", ff->ph->env.max_sched_domains); + + for (i = 0; i < nr; i++) { + if (!cd_map[i]) + continue; + + fprintf(fp, "# cpu : %u\n", cd_map[i]->cpu); + fprintf(fp, "# nr_domains : %u\n", cd_map[i]->nr_domains); + + for (j = 0; j < cd_map[i]->nr_domains; j++) { + d_info = cd_map[i]->domains[j]; + if (!d_info) + continue; + + fprintf(fp, "# Domain : %u\n", d_info->domain); + + if (ff->ph->env.schedstat_version >= 17) + fprintf(fp, "# Domain name : %s\n", d_info->dname); + + fprintf(fp, "# Domain cpu map : %s\n", d_info->cpumask); + fprintf(fp, "# Domain cpu list : %s\n", d_info->cpulist); + } + } +} + static int __event_process_build_id(struct perf_record_header_build_id *bev, char *filename, struct perf_session *session) @@ -3388,6 +3577,102 @@ err: return ret; } +static int process_cpu_domain_info(struct feat_fd *ff, void *data __maybe_unused) +{ + u32 schedstat_version, max_sched_domains, cpu, domain, nr_domains; + struct perf_env *env = &ff->ph->env; + char *dname, *cpumask, *cpulist; + struct cpu_domain_map **cd_map; + struct domain_info *d_info; + u32 nra, nr, i, j; + int ret; + + nra = env->nr_cpus_avail; + nr = env->nr_cpus_online; + + cd_map = zalloc(sizeof(*cd_map) * nra); + if (!cd_map) + return -1; + + env->cpu_domain = cd_map; + + ret = do_read_u32(ff, &schedstat_version); + if (ret) + return ret; + + env->schedstat_version = schedstat_version; + + ret = do_read_u32(ff, &max_sched_domains); + if (ret) + return ret; + + env->max_sched_domains = max_sched_domains; + + for (i = 0; i < nr; i++) { + if (do_read_u32(ff, &cpu)) + return -1; + + cd_map[cpu] = zalloc(sizeof(*cd_map[cpu])); + if (!cd_map[cpu]) + return -1; + + cd_map[cpu]->cpu = cpu; + + if (do_read_u32(ff, &nr_domains)) + return -1; + + cd_map[cpu]->nr_domains = nr_domains; + + cd_map[cpu]->domains = zalloc(sizeof(*d_info) * max_sched_domains); + if (!cd_map[cpu]->domains) + return -1; + + for (j = 0; j < nr_domains; j++) { + if (do_read_u32(ff, &domain)) + return -1; + + d_info = zalloc(sizeof(*d_info)); + if (!d_info) + return -1; + + cd_map[cpu]->domains[domain] = d_info; + d_info->domain = domain; + + if (schedstat_version >= 17) { + dname = do_read_string(ff); + if (!dname) + return -1; + + d_info->dname = zalloc(strlen(dname) + 1); + if (!d_info->dname) + return -1; + + d_info->dname = strdup(dname); + } + + cpumask = do_read_string(ff); + if (!cpumask) + return -1; + + d_info->cpumask = zalloc(strlen(cpumask) + 1); + if (!d_info->cpumask) + return -1; + d_info->cpumask = strdup(cpumask); + + cpulist = do_read_string(ff); + if (!cpulist) + return -1; + + d_info->cpulist = zalloc(strlen(cpulist) + 1); + if (!d_info->cpulist) + return -1; + d_info->cpulist = strdup(cpulist); + } + } + + return ret; +} + #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -3453,6 +3738,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(CLOCK_DATA, clock_data, false), FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true), FEAT_OPR(PMU_CAPS, pmu_caps, false), + FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index c058021c3150..c62f3275a80f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -53,6 +53,7 @@ enum { HEADER_CLOCK_DATA, HEADER_HYBRID_TOPOLOGY, HEADER_PMU_CAPS, + HEADER_CPU_DOMAIN_INFO, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 0f031eb80b4c..b87ff96a9f45 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -257,6 +257,48 @@ static int rm_rf_kcore_dir(const char *path) return 0; } +void cpumask_to_cpulist(char *cpumask, char *cpulist) +{ + int i, j, bm_size, nbits; + int len = strlen(cpumask); + unsigned long *bm; + char cpus[1024]; + + for (i = 0; i < len; i++) { + if (cpumask[i] == ',') { + for (j = i; j < len; j++) + cpumask[j] = cpumask[j + 1]; + } + } + + len = strlen(cpumask); + bm_size = (len + 15) / 16; + nbits = bm_size * 64; + if (nbits <= 0) + return; + + bm = calloc(bm_size, sizeof(unsigned long)); + if (!cpumask) + goto free_bm; + + for (i = 0; i < bm_size; i++) { + char blk[17]; + int blklen = len > 16 ? 16 : len; + + strncpy(blk, cpumask + len - blklen, blklen); + blk[blklen] = '\0'; + bm[i] = strtoul(blk, NULL, 16); + cpumask[len - blklen] = '\0'; + len = strlen(cpumask); + } + + bitmap_scnprintf(bm, nbits, cpus, sizeof(cpus)); + strcpy(cpulist, cpus); + +free_bm: + free(bm); +} + int rm_rf_perf_data(const char *path) { const char *pat[] = { diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 3423778e39a5..1572c8cf04e5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifndef __cplusplus #include @@ -48,6 +49,8 @@ bool sysctl__nmi_watchdog_enabled(void); int perf_tip(char **strp, const char *dirpath); +void cpumask_to_cpulist(char *cpumask, char *cpulist); + #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif