cgroup: Show # of subsystem CSSes in cgroup.stat

author Waiman Long <longman@redhat.com>

Mon, 15 Jul 2024 15:00:34 +0000 (11:00 -0400)

committer Tejun Heo <tj@kernel.org>

Wed, 31 Jul 2024 17:00:02 +0000 (07:00 -1000)
author Waiman Long <longman@redhat.com>
Mon, 15 Jul 2024 15:00:34 +0000 (11:00 -0400)
committer Tejun Heo <tj@kernel.org>
Wed, 31 Jul 2024 17:00:02 +0000 (07:00 -1000)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst

index 86311c2907cd3afdc54e47e34fa0f1536aa65cd8..70cefccd07cee5803a1ddd28768126a579b76eae 100644 (file)
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -981,6 +981,14 @@ All cgroup core files are prefixed with "cgroup."
                 A dying cgroup can consume system resources not exceeding
                 limits, which were active at the moment of cgroup deletion.
  
+         nr_subsys_<cgroup_subsys>
+               Total number of live cgroup subsystems (e.g memory
+               cgroup) at and beneath the current cgroup.
+
+         nr_dying_subsys_<cgroup_subsys>
+               Total number of dying cgroup subsystems (e.g. memory
+               cgroup) at and beneath the current cgroup.
+
    cgroup.freeze
         A read-write single value file which exists on non-root cgroups.
         Allowed values are "0" and "1". The default is "0".
@@ -2939,8 +2947,8 @@ Deprecated v1 Core Features
  
  - "cgroup.clone_children" is removed.
  
-- /proc/cgroups is meaningless for v2.  Use "cgroup.controllers" file
-  at the root instead.
+- /proc/cgroups is meaningless for v2.  Use "cgroup.controllers" or
+  "cgroup.stat" files at the root instead.
  
  
  Issues with v1 and Rationales for v2
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h

index ae04035b6cbe58453760fb175fe2be0f87a37ff0..eb0f6f349496f56ce28f57b93b260161d43b670d 100644 (file)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -210,6 +210,14 @@ struct cgroup_subsys_state {
          * fields of the containing structure.
          */
         struct cgroup_subsys_state *parent;
+
+       /*
+        * Keep track of total numbers of visible descendant CSSes.
+        * The total number of dying CSSes is tracked in
+        * css->cgroup->nr_dying_subsys[ssid].
+        * Protected by cgroup_mutex.
+        */
+       int nr_descendants;
  };
  
  /*
@@ -470,6 +478,12 @@ struct cgroup {
         /* Private pointers for each registered subsystem */
         struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
  
+       /*
+        * Keep track of total number of dying CSSes at and below this cgroup.
+        * Protected by cgroup_mutex.
+        */
+       int nr_dying_subsys[CGROUP_SUBSYS_COUNT];
+
         struct cgroup_root *root;
  
         /*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index c8e4b62b436a48ae74d6242236b4f29a7f43c159..601600afdd2024d8f18a161707f3b3a09cffb722 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3669,12 +3669,40 @@ static int cgroup_events_show(struct seq_file *seq, void *v)
  static int cgroup_stat_show(struct seq_file *seq, void *v)
  {
         struct cgroup *cgroup = seq_css(seq)->cgroup;
+       struct cgroup_subsys_state *css;
+       int dying_cnt[CGROUP_SUBSYS_COUNT];
+       int ssid;
  
         seq_printf(seq, "nr_descendants %d\n",
                    cgroup->nr_descendants);
+
+       /*
+        * Show the number of live and dying csses associated with each of
+        * non-inhibited cgroup subsystems that is bound to cgroup v2.
+        *
+        * Without proper lock protection, racing is possible. So the
+        * numbers may not be consistent when that happens.
+        */
+       rcu_read_lock();
+       for (ssid = 0; ssid < CGROUP_SUBSYS_COUNT; ssid++) {
+               dying_cnt[ssid] = -1;
+               if ((BIT(ssid) & cgrp_dfl_inhibit_ss_mask) ||
+                   (cgroup_subsys[ssid]->root !=  &cgrp_dfl_root))
+                       continue;
+               css = rcu_dereference_raw(cgroup->subsys[ssid]);
+               dying_cnt[ssid] = cgroup->nr_dying_subsys[ssid];
+               seq_printf(seq, "nr_subsys_%s %d\n", cgroup_subsys[ssid]->name,
+                          css ? (css->nr_descendants + 1) : 0);
+       }
+
         seq_printf(seq, "nr_dying_descendants %d\n",
                    cgroup->nr_dying_descendants);
-
+       for (ssid = 0; ssid < CGROUP_SUBSYS_COUNT; ssid++) {
+               if (dying_cnt[ssid] >= 0)
+                       seq_printf(seq, "nr_dying_subsys_%s %d\n",
+                                  cgroup_subsys[ssid]->name, dying_cnt[ssid]);
+       }
+       rcu_read_unlock();
         return 0;
  }
  
@@ -5424,6 +5452,8 @@ static void css_release_work_fn(struct work_struct *work)
         list_del_rcu(&css->sibling);
  
         if (ss) {
+               struct cgroup *parent_cgrp;
+
                 /* css release path */
                 if (!list_empty(&css->rstat_css_node)) {
                         cgroup_rstat_flush(cgrp);
@@ -5433,6 +5463,14 @@ static void css_release_work_fn(struct work_struct *work)
                 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
                 if (ss->css_released)
                         ss->css_released(css);
+
+               cgrp->nr_dying_subsys[ss->id]--;
+               WARN_ON_ONCE(css->nr_descendants || cgrp->nr_dying_subsys[ss->id]);
+               parent_cgrp = cgroup_parent(cgrp);
+               while (parent_cgrp) {
+                       parent_cgrp->nr_dying_subsys[ss->id]--;
+                       parent_cgrp = cgroup_parent(parent_cgrp);
+               }
         } else {
                 struct cgroup *tcgrp;
  
@@ -5517,8 +5555,11 @@ static int online_css(struct cgroup_subsys_state *css)
                 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
  
                 atomic_inc(&css->online_cnt);
-               if (css->parent)
+               if (css->parent) {
                         atomic_inc(&css->parent->online_cnt);
+                       while ((css = css->parent))
+                               css->nr_descendants++;
+               }
         }
         return ret;
  }
@@ -5540,6 +5581,16 @@ static void offline_css(struct cgroup_subsys_state *css)
         RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
  
         wake_up_all(&css->cgroup->offline_waitq);
+
+       css->cgroup->nr_dying_subsys[ss->id]++;
+       /*
+        * Parent css and cgroup cannot be freed until after the freeing
+        * of child css, see css_free_rwork_fn().
+        */
+       while ((css = css->parent)) {
+               css->nr_descendants--;
+               css->cgroup->nr_dying_subsys[ss->id]++;
+       }
  }
  
  /**
author	Waiman Long <longman@redhat.com>
	Mon, 15 Jul 2024 15:00:34 +0000 (11:00 -0400)
committer	Tejun Heo <tj@kernel.org>
	Wed, 31 Jul 2024 17:00:02 +0000 (07:00 -1000)
Documentation/admin-guide/cgroup-v2.rst		patch \| blob \| blame \| history
include/linux/cgroup-defs.h		patch \| blob \| blame \| history
kernel/cgroup/cgroup.c		patch \| blob \| blame \| history