]> git.ipfire.org Git - people/arne_f/kernel.git/commitdiff
cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release() to fix the...
authorOleg Nesterov <oleg@redhat.com>
Mon, 28 Jan 2019 16:00:13 +0000 (17:00 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Apr 2019 20:31:37 +0000 (22:31 +0200)
[ Upstream commit 51bee5abeab2058ea5813c5615d6197a23dbf041 ]

The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which
needs pids_free() to uncharge the pid.

However, ->free() is called from __put_task_struct()->cgroup_free() and this
is too late. Even the trivial program which does

for (;;) {
int pid = fork();
assert(pid >= 0);
if (pid)
wait(NULL);
else
exit(0);
}

can run out of limits because release_task()->call_rcu(delayed_put_task_struct)
implies an RCU gp after the task/pid goes away and before the final put().

Test-case:

mkdir -p /tmp/CG
mount -t cgroup2 none /tmp/CG
echo '+pids' > /tmp/CG/cgroup.subtree_control

mkdir /tmp/CG/PID
echo 2 > /tmp/CG/PID/pids.max

perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' &
echo $! > /tmp/CG/PID/cgroup.procs

Without this patch the forking process fails soon after migration.

Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite
into the new helper, cgroup_release(), called by release_task() which actually
frees the pid(s).

Reported-by: Herton R. Krzesinski <hkrzesin@redhat.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
include/linux/cgroup-defs.h
include/linux/cgroup.h
kernel/cgroup/cgroup.c
kernel/cgroup/pids.c
kernel/exit.c

index e7905d9353e89466c0e2a2909d70d4b752ce7b36..93a2469a9130c201e7e8970da37d64cbc5bae597 100644 (file)
@@ -523,7 +523,7 @@ struct cgroup_subsys {
        void (*cancel_fork)(struct task_struct *task);
        void (*fork)(struct task_struct *task);
        void (*exit)(struct task_struct *task);
-       void (*free)(struct task_struct *task);
+       void (*release)(struct task_struct *task);
        void (*bind)(struct cgroup_subsys_state *root_css);
 
        bool early_init:1;
index dddbc29e20098e0a9f0377f64b0945c87691d315..8e83c9055ccb586da9357b19eb8bba71212c4751 100644 (file)
@@ -118,6 +118,7 @@ extern int cgroup_can_fork(struct task_struct *p);
 extern void cgroup_cancel_fork(struct task_struct *p);
 extern void cgroup_post_fork(struct task_struct *p);
 void cgroup_exit(struct task_struct *p);
+void cgroup_release(struct task_struct *p);
 void cgroup_free(struct task_struct *p);
 
 int cgroup_init_early(void);
@@ -668,6 +669,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; }
 static inline void cgroup_cancel_fork(struct task_struct *p) {}
 static inline void cgroup_post_fork(struct task_struct *p) {}
 static inline void cgroup_exit(struct task_struct *p) {}
+static inline void cgroup_release(struct task_struct *p) {}
 static inline void cgroup_free(struct task_struct *p) {}
 
 static inline int cgroup_init_early(void) { return 0; }
index 7e79358b447305ce6d1438cf9bd46c000774dfb2..694b1cc8d144e9514e1efe0e3afa3558b3fc750b 100644 (file)
@@ -187,7 +187,7 @@ static u64 css_serial_nr_next = 1;
  */
 static u16 have_fork_callback __read_mostly;
 static u16 have_exit_callback __read_mostly;
-static u16 have_free_callback __read_mostly;
+static u16 have_release_callback __read_mostly;
 static u16 have_canfork_callback __read_mostly;
 
 /* cgroup namespace for init task */
@@ -5112,7 +5112,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 
        have_fork_callback |= (bool)ss->fork << ss->id;
        have_exit_callback |= (bool)ss->exit << ss->id;
-       have_free_callback |= (bool)ss->free << ss->id;
+       have_release_callback |= (bool)ss->release << ss->id;
        have_canfork_callback |= (bool)ss->can_fork << ss->id;
 
        /* At system boot, before all subsystems have been
@@ -5546,16 +5546,19 @@ void cgroup_exit(struct task_struct *tsk)
        } while_each_subsys_mask();
 }
 
-void cgroup_free(struct task_struct *task)
+void cgroup_release(struct task_struct *task)
 {
-       struct css_set *cset = task_css_set(task);
        struct cgroup_subsys *ss;
        int ssid;
 
-       do_each_subsys_mask(ss, ssid, have_free_callback) {
-               ss->free(task);
+       do_each_subsys_mask(ss, ssid, have_release_callback) {
+               ss->release(task);
        } while_each_subsys_mask();
+}
 
+void cgroup_free(struct task_struct *task)
+{
+       struct css_set *cset = task_css_set(task);
        put_css_set(cset);
 }
 
index 9829c67ebc0ae0236af31913b2ce858b0a2c33b2..c9960baaa14f23f7b85a47b8f6f067da083c21cc 100644 (file)
@@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task)
        pids_uncharge(pids, 1);
 }
 
-static void pids_free(struct task_struct *task)
+static void pids_release(struct task_struct *task)
 {
        struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
 
@@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = {
        .cancel_attach  = pids_cancel_attach,
        .can_fork       = pids_can_fork,
        .cancel_fork    = pids_cancel_fork,
-       .free           = pids_free,
+       .release        = pids_release,
        .legacy_cftypes = pids_files,
        .dfl_cftypes    = pids_files,
        .threaded       = true,
index 5523fb0c20c8c1db584b4f2636419ca5c7f17193..95ce231ff5e24c7774f1da96dee249b69e2a4dfd 100644 (file)
@@ -218,6 +218,7 @@ repeat:
        }
 
        write_unlock_irq(&tasklist_lock);
+       cgroup_release(p);
        release_thread(p);
        call_rcu(&p->rcu, delayed_put_task_struct);