]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
cgroup/rstat: Tracking cgroup-level niced CPU time
authorJoshua Hahn <joshua.hahn6@gmail.com>
Wed, 2 Oct 2024 18:47:16 +0000 (11:47 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 10 Apr 2025 12:39:35 +0000 (14:39 +0200)
[ Upstream commit aefa398d93d5db7c555be78a605ff015357f127d ]

Cgroup-level CPU statistics currently include time spent on
user/system processes, but do not include niced CPU time (despite
already being tracked). This patch exposes niced CPU time to the
userspace, allowing users to get a better understanding of their
hardware limits and can facilitate more informed workload distribution.

A new field 'ntime' is added to struct cgroup_base_stat as opposed to
struct task_cputime to minimize footprint.

Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Stable-dep-of: c4af66a95aa3 ("cgroup/rstat: Fix forceidle time in cpu.stat")
Signed-off-by: Sasha Levin <sashal@kernel.org>
include/linux/cgroup-defs.h
kernel/cgroup/rstat.c

index a32eebcd23da470c1ea2aa11305c63141fd73a14..38b2af336e4a01e91657f074de7084ac62580137 100644 (file)
@@ -324,6 +324,7 @@ struct cgroup_base_stat {
 #ifdef CONFIG_SCHED_CORE
        u64 forceidle_sum;
 #endif
+       u64 ntime;
 };
 
 /*
index ce295b73c0a3669ac39f6d81191a3943fa02a700..aac91466279f17f76a8a0e56cb47b9f9ceaa0bbf 100644 (file)
@@ -444,6 +444,7 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
 #ifdef CONFIG_SCHED_CORE
        dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
 #endif
+       dst_bstat->ntime += src_bstat->ntime;
 }
 
 static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
@@ -455,6 +456,7 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
 #ifdef CONFIG_SCHED_CORE
        dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
 #endif
+       dst_bstat->ntime -= src_bstat->ntime;
 }
 
 static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
@@ -534,8 +536,10 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
        rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
 
        switch (index) {
-       case CPUTIME_USER:
        case CPUTIME_NICE:
+               rstatc->bstat.ntime += delta_exec;
+               fallthrough;
+       case CPUTIME_USER:
                rstatc->bstat.cputime.utime += delta_exec;
                break;
        case CPUTIME_SYSTEM:
@@ -590,6 +594,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
 #ifdef CONFIG_SCHED_CORE
                bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
 #endif
+               bstat->ntime += cpustat[CPUTIME_NICE];
        }
 }
 
@@ -607,13 +612,14 @@ static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat
 void cgroup_base_stat_cputime_show(struct seq_file *seq)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
-       u64 usage, utime, stime;
+       u64 usage, utime, stime, ntime;
 
        if (cgroup_parent(cgrp)) {
                cgroup_rstat_flush_hold(cgrp);
                usage = cgrp->bstat.cputime.sum_exec_runtime;
                cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
                               &utime, &stime);
+               ntime = cgrp->bstat.ntime;
                cgroup_rstat_flush_release(cgrp);
        } else {
                /* cgrp->bstat of root is not actually used, reuse it */
@@ -621,16 +627,19 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
                usage = cgrp->bstat.cputime.sum_exec_runtime;
                utime = cgrp->bstat.cputime.utime;
                stime = cgrp->bstat.cputime.stime;
+               ntime = cgrp->bstat.ntime;
        }
 
        do_div(usage, NSEC_PER_USEC);
        do_div(utime, NSEC_PER_USEC);
        do_div(stime, NSEC_PER_USEC);
+       do_div(ntime, NSEC_PER_USEC);
 
        seq_printf(seq, "usage_usec %llu\n"
-                  "user_usec %llu\n"
-                  "system_usec %llu\n",
-                  usage, utime, stime);
+                       "user_usec %llu\n"
+                       "system_usec %llu\n"
+                       "nice_usec %llu\n",
+                       usage, utime, stime, ntime);
 
        cgroup_force_idle_show(seq, &cgrp->bstat);
 }