will not be tracked by the memory controller (even if cgroup
v2 is remounted later on).
+ pids_localevents
+ Represent fork failures inside cgroup's pids.events:max (v1 behavior),
+ not its limit being hit (v2 behavior).
+
Organizing Processes and Threads
--------------------------------
descendants has ever reached.
pids.events
- A read-only flat-keyed file which exists on non-root cgroups. The
- following entries are defined. Unless specified otherwise, a value
- change in this file generates a file modified event.
+ A read-only flat-keyed file which exists on non-root cgroups. Unless
+ specified otherwise, a value change in this file generates a file
+ modified event. The following entries are defined.
max
- Number of times fork failed because limit was hit.
+ The number of times the cgroup's number of processes hit the
+ limit (see also pids_localevents).
Organisational operations are not blocked by cgroup policies, so it is
possible to have pids.current > pids.max. This can be done by either
Opt_memory_localevents,
Opt_memory_recursiveprot,
Opt_memory_hugetlb_accounting,
+ Opt_pids_localevents,
nr__cgroup2_params
};
fsparam_flag("memory_localevents", Opt_memory_localevents),
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting),
+ fsparam_flag("pids_localevents", Opt_pids_localevents),
{}
};
case Opt_memory_hugetlb_accounting:
ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
return 0;
+ case Opt_pids_localevents:
+ ctx->flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS;
+ return 0;
}
return -EINVAL;
}
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
else
cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
+
+ if (root_flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
+ cgrp_dfl_root.flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS;
+ else
+ cgrp_dfl_root.flags &= ~CGRP_ROOT_PIDS_LOCAL_EVENTS;
}
}
seq_puts(seq, ",memory_recursiveprot");
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING)
seq_puts(seq, ",memory_hugetlb_accounting");
+ if (cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
+ seq_puts(seq, ",pids_localevents");
return 0;
}
"favordynmods\n"
"memory_localevents\n"
"memory_recursiveprot\n"
- "memory_hugetlb_accounting\n");
+ "memory_hugetlb_accounting\n"
+ "pids_localevents\n");
}
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
#define PIDS_MAX_STR "max"
+enum pidcg_event {
+ /* Fork failed in subtree because this pids_cgroup limit was hit. */
+ PIDCG_MAX,
+ /* Fork failed in this pids_cgroup because ancestor limit was hit. */
+ PIDCG_FORKFAIL,
+ NR_PIDCG_EVENTS,
+};
+
struct pids_cgroup {
struct cgroup_subsys_state css;
/* Handle for "pids.events" */
struct cgroup_file events_file;
- /* Number of times fork failed because limit was hit. */
- atomic64_t events_limit;
+ atomic64_t events[NR_PIDCG_EVENTS];
};
static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
* pids_try_charge - hierarchically try to charge the pid count
* @pids: the pid cgroup state
* @num: the number of pids to charge
+ * @fail: storage of pid cgroup causing the fail
*
* This function follows the set limit. It will fail if the charge would cause
* the new value to exceed the hierarchical limit. Returns 0 if the charge
* succeeded, otherwise -EAGAIN.
*/
-static int pids_try_charge(struct pids_cgroup *pids, int num)
+static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup **fail)
{
struct pids_cgroup *p, *q;
* p->limit is %PIDS_MAX then we know that this test will never
* fail.
*/
- if (new > limit)
+ if (new > limit) {
+ *fail = p;
goto revert;
-
+ }
/*
* Not technically accurate if we go over limit somewhere up
* the hierarchy, but that's tolerable for the watermark.
static int pids_can_fork(struct task_struct *task, struct css_set *cset)
{
struct cgroup_subsys_state *css;
- struct pids_cgroup *pids;
+ struct pids_cgroup *pids, *pids_over_limit;
int err;
if (cset)
else
css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
- err = pids_try_charge(pids, 1);
+ err = pids_try_charge(pids, 1, &pids_over_limit);
if (err) {
- /* Only log the first time events_limit is incremented. */
- if (atomic64_inc_return(&pids->events_limit) == 1) {
+ /* compatibility on v1 where events were notified in leaves. */
+ if (!cgroup_subsys_on_dfl(pids_cgrp_subsys))
+ pids_over_limit = pids;
+
+ /* Only log the first time limit is hit. */
+ if (atomic64_inc_return(&pids->events[PIDCG_FORKFAIL]) == 1) {
pr_info("cgroup: fork rejected by pids controller in ");
- pr_cont_cgroup_path(css->cgroup);
+ pr_cont_cgroup_path(pids->css.cgroup);
pr_cont("\n");
}
+ atomic64_inc(&pids_over_limit->events[PIDCG_MAX]);
+
cgroup_file_notify(&pids->events_file);
+ if (pids_over_limit != pids)
+ cgroup_file_notify(&pids_over_limit->events_file);
}
return err;
}
static int pids_events_show(struct seq_file *sf, void *v)
{
struct pids_cgroup *pids = css_pids(seq_css(sf));
+ enum pidcg_event pe = PIDCG_MAX;
+
+ if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) ||
+ cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
+ pe = PIDCG_FORKFAIL;
- seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
+ seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events[pe]));
return 0;
}