]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.19.34/cgroup-pids-turn-cgroup_subsys-free-into-cgroup_subs.patch
Linux 4.19.34
[thirdparty/kernel/stable-queue.git] / releases / 4.19.34 / cgroup-pids-turn-cgroup_subsys-free-into-cgroup_subs.patch
1 From 086f65b78b6b847d942d872ae98f6d5919bb88db Mon Sep 17 00:00:00 2001
2 From: Oleg Nesterov <oleg@redhat.com>
3 Date: Mon, 28 Jan 2019 17:00:13 +0100
4 Subject: cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release()
5 to fix the accounting
6
7 [ Upstream commit 51bee5abeab2058ea5813c5615d6197a23dbf041 ]
8
9 The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which
10 needs pids_free() to uncharge the pid.
11
12 However, ->free() is called from __put_task_struct()->cgroup_free() and this
13 is too late. Even the trivial program which does
14
15 for (;;) {
16 int pid = fork();
17 assert(pid >= 0);
18 if (pid)
19 wait(NULL);
20 else
21 exit(0);
22 }
23
24 can run out of limits because release_task()->call_rcu(delayed_put_task_struct)
25 implies an RCU gp after the task/pid goes away and before the final put().
26
27 Test-case:
28
29 mkdir -p /tmp/CG
30 mount -t cgroup2 none /tmp/CG
31 echo '+pids' > /tmp/CG/cgroup.subtree_control
32
33 mkdir /tmp/CG/PID
34 echo 2 > /tmp/CG/PID/pids.max
35
36 perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' &
37 echo $! > /tmp/CG/PID/cgroup.procs
38
39 Without this patch the forking process fails soon after migration.
40
41 Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite
42 into the new helper, cgroup_release(), called by release_task() which actually
43 frees the pid(s).
44
45 Reported-by: Herton R. Krzesinski <hkrzesin@redhat.com>
46 Reported-by: Jan Stancek <jstancek@redhat.com>
47 Signed-off-by: Oleg Nesterov <oleg@redhat.com>
48 Signed-off-by: Tejun Heo <tj@kernel.org>
49 Signed-off-by: Sasha Levin <sashal@kernel.org>
50 ---
51 include/linux/cgroup-defs.h | 2 +-
52 include/linux/cgroup.h | 2 ++
53 kernel/cgroup/cgroup.c | 15 +++++++++------
54 kernel/cgroup/pids.c | 4 ++--
55 kernel/exit.c | 1 +
56 5 files changed, 15 insertions(+), 9 deletions(-)
57
58 diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
59 index 22254c1fe1c5..6002275937f5 100644
60 --- a/include/linux/cgroup-defs.h
61 +++ b/include/linux/cgroup-defs.h
62 @@ -597,7 +597,7 @@ struct cgroup_subsys {
63 void (*cancel_fork)(struct task_struct *task);
64 void (*fork)(struct task_struct *task);
65 void (*exit)(struct task_struct *task);
66 - void (*free)(struct task_struct *task);
67 + void (*release)(struct task_struct *task);
68 void (*bind)(struct cgroup_subsys_state *root_css);
69
70 bool early_init:1;
71 diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
72 index 32c553556bbd..ca51b2c15bcc 100644
73 --- a/include/linux/cgroup.h
74 +++ b/include/linux/cgroup.h
75 @@ -119,6 +119,7 @@ extern int cgroup_can_fork(struct task_struct *p);
76 extern void cgroup_cancel_fork(struct task_struct *p);
77 extern void cgroup_post_fork(struct task_struct *p);
78 void cgroup_exit(struct task_struct *p);
79 +void cgroup_release(struct task_struct *p);
80 void cgroup_free(struct task_struct *p);
81
82 int cgroup_init_early(void);
83 @@ -699,6 +700,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; }
84 static inline void cgroup_cancel_fork(struct task_struct *p) {}
85 static inline void cgroup_post_fork(struct task_struct *p) {}
86 static inline void cgroup_exit(struct task_struct *p) {}
87 +static inline void cgroup_release(struct task_struct *p) {}
88 static inline void cgroup_free(struct task_struct *p) {}
89
90 static inline int cgroup_init_early(void) { return 0; }
91 diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
92 index e710ac7fbbbf..63dae7e0ccae 100644
93 --- a/kernel/cgroup/cgroup.c
94 +++ b/kernel/cgroup/cgroup.c
95 @@ -195,7 +195,7 @@ static u64 css_serial_nr_next = 1;
96 */
97 static u16 have_fork_callback __read_mostly;
98 static u16 have_exit_callback __read_mostly;
99 -static u16 have_free_callback __read_mostly;
100 +static u16 have_release_callback __read_mostly;
101 static u16 have_canfork_callback __read_mostly;
102
103 /* cgroup namespace for init task */
104 @@ -5240,7 +5240,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
105
106 have_fork_callback |= (bool)ss->fork << ss->id;
107 have_exit_callback |= (bool)ss->exit << ss->id;
108 - have_free_callback |= (bool)ss->free << ss->id;
109 + have_release_callback |= (bool)ss->release << ss->id;
110 have_canfork_callback |= (bool)ss->can_fork << ss->id;
111
112 /* At system boot, before all subsystems have been
113 @@ -5676,16 +5676,19 @@ void cgroup_exit(struct task_struct *tsk)
114 } while_each_subsys_mask();
115 }
116
117 -void cgroup_free(struct task_struct *task)
118 +void cgroup_release(struct task_struct *task)
119 {
120 - struct css_set *cset = task_css_set(task);
121 struct cgroup_subsys *ss;
122 int ssid;
123
124 - do_each_subsys_mask(ss, ssid, have_free_callback) {
125 - ss->free(task);
126 + do_each_subsys_mask(ss, ssid, have_release_callback) {
127 + ss->release(task);
128 } while_each_subsys_mask();
129 +}
130
131 +void cgroup_free(struct task_struct *task)
132 +{
133 + struct css_set *cset = task_css_set(task);
134 put_css_set(cset);
135 }
136
137 diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
138 index 9829c67ebc0a..c9960baaa14f 100644
139 --- a/kernel/cgroup/pids.c
140 +++ b/kernel/cgroup/pids.c
141 @@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task)
142 pids_uncharge(pids, 1);
143 }
144
145 -static void pids_free(struct task_struct *task)
146 +static void pids_release(struct task_struct *task)
147 {
148 struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
149
150 @@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = {
151 .cancel_attach = pids_cancel_attach,
152 .can_fork = pids_can_fork,
153 .cancel_fork = pids_cancel_fork,
154 - .free = pids_free,
155 + .release = pids_release,
156 .legacy_cftypes = pids_files,
157 .dfl_cftypes = pids_files,
158 .threaded = true,
159 diff --git a/kernel/exit.c b/kernel/exit.c
160 index d607e23fd0c3..5c0964dc805a 100644
161 --- a/kernel/exit.c
162 +++ b/kernel/exit.c
163 @@ -219,6 +219,7 @@ repeat:
164 }
165
166 write_unlock_irq(&tasklist_lock);
167 + cgroup_release(p);
168 release_thread(p);
169 call_rcu(&p->rcu, delayed_put_task_struct);
170
171 --
172 2.19.1
173