A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
+ nr_subsys_<cgroup_subsys>
+ Total number of live cgroup subsystems (e.g memory
+ cgroup) at and beneath the current cgroup.
+
+ nr_dying_subsys_<cgroup_subsys>
+ Total number of dying cgroup subsystems (e.g. memory
+ cgroup) at and beneath the current cgroup.
+
cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".
- "cgroup.clone_children" is removed.
-- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
- at the root instead.
+- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
+ "cgroup.stat" files at the root instead.
Issues with v1 and Rationales for v2
* fields of the containing structure.
*/
struct cgroup_subsys_state *parent;
+
+ /*
+ * Keep track of total numbers of visible descendant CSSes.
+ * The total number of dying CSSes is tracked in
+ * css->cgroup->nr_dying_subsys[ssid].
+ * Protected by cgroup_mutex.
+ */
+ int nr_descendants;
};
/*
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
+ /*
+ * Keep track of total number of dying CSSes at and below this cgroup.
+ * Protected by cgroup_mutex.
+ */
+ int nr_dying_subsys[CGROUP_SUBSYS_COUNT];
+
struct cgroup_root *root;
/*
static int cgroup_stat_show(struct seq_file *seq, void *v)
{
struct cgroup *cgroup = seq_css(seq)->cgroup;
+ struct cgroup_subsys_state *css;
+ int dying_cnt[CGROUP_SUBSYS_COUNT];
+ int ssid;
seq_printf(seq, "nr_descendants %d\n",
cgroup->nr_descendants);
+
+ /*
+ * Show the number of live and dying csses associated with each of
+ * non-inhibited cgroup subsystems that is bound to cgroup v2.
+ *
+ * Without proper lock protection, racing is possible. So the
+ * numbers may not be consistent when that happens.
+ */
+ rcu_read_lock();
+ for (ssid = 0; ssid < CGROUP_SUBSYS_COUNT; ssid++) {
+ dying_cnt[ssid] = -1;
+ if ((BIT(ssid) & cgrp_dfl_inhibit_ss_mask) ||
+ (cgroup_subsys[ssid]->root != &cgrp_dfl_root))
+ continue;
+ css = rcu_dereference_raw(cgroup->subsys[ssid]);
+ dying_cnt[ssid] = cgroup->nr_dying_subsys[ssid];
+ seq_printf(seq, "nr_subsys_%s %d\n", cgroup_subsys[ssid]->name,
+ css ? (css->nr_descendants + 1) : 0);
+ }
+
seq_printf(seq, "nr_dying_descendants %d\n",
cgroup->nr_dying_descendants);
-
+ for (ssid = 0; ssid < CGROUP_SUBSYS_COUNT; ssid++) {
+ if (dying_cnt[ssid] >= 0)
+ seq_printf(seq, "nr_dying_subsys_%s %d\n",
+ cgroup_subsys[ssid]->name, dying_cnt[ssid]);
+ }
+ rcu_read_unlock();
return 0;
}
list_del_rcu(&css->sibling);
if (ss) {
+ struct cgroup *parent_cgrp;
+
/* css release path */
if (!list_empty(&css->rstat_css_node)) {
cgroup_rstat_flush(cgrp);
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
if (ss->css_released)
ss->css_released(css);
+
+ cgrp->nr_dying_subsys[ss->id]--;
+ WARN_ON_ONCE(css->nr_descendants || cgrp->nr_dying_subsys[ss->id]);
+ parent_cgrp = cgroup_parent(cgrp);
+ while (parent_cgrp) {
+ parent_cgrp->nr_dying_subsys[ss->id]--;
+ parent_cgrp = cgroup_parent(parent_cgrp);
+ }
} else {
struct cgroup *tcgrp;
rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
atomic_inc(&css->online_cnt);
- if (css->parent)
+ if (css->parent) {
atomic_inc(&css->parent->online_cnt);
+ while ((css = css->parent))
+ css->nr_descendants++;
+ }
}
return ret;
}
RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
wake_up_all(&css->cgroup->offline_waitq);
+
+ css->cgroup->nr_dying_subsys[ss->id]++;
+ /*
+ * Parent css and cgroup cannot be freed until after the freeing
+ * of child css, see css_free_rwork_fn().
+ */
+ while ((css = css->parent)) {
+ css->nr_descendants--;
+ css->cgroup->nr_dying_subsys[ss->id]++;
+ }
}
/**