]>
Commit | Line | Data |
---|---|---|
3f487e65 GKH |
1 | From 3fd37226216620c1a468afa999739d5016fbc349 Mon Sep 17 00:00:00 2001 |
2 | From: Kirill Tkhai <ktkhai@virtuozzo.com> | |
3 | Date: Fri, 12 May 2017 19:11:31 +0300 | |
4 | Subject: pid_ns: Fix race between setns'ed fork() and zap_pid_ns_processes() | |
5 | ||
6 | From: Kirill Tkhai <ktkhai@virtuozzo.com> | |
7 | ||
8 | commit 3fd37226216620c1a468afa999739d5016fbc349 upstream. | |
9 | ||
10 | Imagine we have a pid namespace and a task from its parent's pid_ns, | |
11 | which made setns() to the pid namespace. The task is doing fork(), | |
12 | while the pid namespace's child reaper is dying. We have the race | |
13 | between them: | |
14 | ||
15 | Task from parent pid_ns Child reaper | |
16 | copy_process() .. | |
17 | alloc_pid() .. | |
18 | .. zap_pid_ns_processes() | |
19 | .. disable_pid_allocation() | |
20 | .. read_lock(&tasklist_lock) | |
21 | .. iterate over pids in pid_ns | |
22 | .. kill tasks linked to pids | |
23 | .. read_unlock(&tasklist_lock) | |
24 | write_lock_irq(&tasklist_lock); .. | |
25 | attach_pid(p, PIDTYPE_PID); .. | |
26 | .. .. | |
27 | ||
28 | So, just created task p won't receive SIGKILL signal, | |
29 | and the pid namespace will be in contradictory state. | |
30 | Only manual kill will help there, but does the userspace | |
31 | care about this? I suppose, the most users just inject | |
32 | a task into a pid namespace and wait a SIGCHLD from it. | |
33 | ||
34 | The patch fixes the problem. It simply checks for | |
35 | (pid_ns->nr_hashed & PIDNS_HASH_ADDING) in copy_process(). | |
36 | We do it under the tasklist_lock, and can't skip | |
37 | PIDNS_HASH_ADDING as noted by Oleg: | |
38 | ||
39 | "zap_pid_ns_processes() does disable_pid_allocation() | |
40 | and then takes tasklist_lock to kill the whole namespace. | |
41 | Given that copy_process() checks PIDNS_HASH_ADDING | |
42 | under write_lock(tasklist) they can't race; | |
43 | if copy_process() takes this lock first, the new child will | |
44 | be killed, otherwise copy_process() can't miss | |
45 | the change in ->nr_hashed." | |
46 | ||
47 | If allocation is disabled, we just return -ENOMEM | |
48 | like it's made for such cases in alloc_pid(). | |
49 | ||
50 | v2: Do not move disable_pid_allocation(), do not | |
51 | introduce a new variable in copy_process() and simplify | |
52 | the patch as suggested by Oleg Nesterov. | |
53 | Account the problem with double irq enabling | |
54 | found by Eric W. Biederman. | |
55 | ||
56 | Fixes: c876ad768215 ("pidns: Stop pid allocation when init dies") | |
57 | Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> | |
58 | CC: Andrew Morton <akpm@linux-foundation.org> | |
59 | CC: Ingo Molnar <mingo@kernel.org> | |
60 | CC: Peter Zijlstra <peterz@infradead.org> | |
61 | CC: Oleg Nesterov <oleg@redhat.com> | |
62 | CC: Mike Rapoport <rppt@linux.vnet.ibm.com> | |
63 | CC: Michal Hocko <mhocko@suse.com> | |
64 | CC: Andy Lutomirski <luto@kernel.org> | |
65 | CC: "Eric W. Biederman" <ebiederm@xmission.com> | |
66 | CC: Andrei Vagin <avagin@openvz.org> | |
67 | CC: Cyrill Gorcunov <gorcunov@openvz.org> | |
68 | CC: Serge Hallyn <serge@hallyn.com> | |
69 | Acked-by: Oleg Nesterov <oleg@redhat.com> | |
70 | Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> | |
71 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
72 | ||
73 | --- | |
74 | kernel/fork.c | 8 ++++++-- | |
75 | 1 file changed, 6 insertions(+), 2 deletions(-) | |
76 | ||
77 | --- a/kernel/fork.c | |
78 | +++ b/kernel/fork.c | |
79 | @@ -1773,11 +1773,13 @@ static __latent_entropy struct task_stru | |
80 | */ | |
81 | recalc_sigpending(); | |
82 | if (signal_pending(current)) { | |
83 | - spin_unlock(¤t->sighand->siglock); | |
84 | - write_unlock_irq(&tasklist_lock); | |
85 | retval = -ERESTARTNOINTR; | |
86 | goto bad_fork_cancel_cgroup; | |
87 | } | |
88 | + if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) { | |
89 | + retval = -ENOMEM; | |
90 | + goto bad_fork_cancel_cgroup; | |
91 | + } | |
92 | ||
93 | if (likely(p->pid)) { | |
94 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); | |
95 | @@ -1828,6 +1830,8 @@ static __latent_entropy struct task_stru | |
96 | return p; | |
97 | ||
98 | bad_fork_cancel_cgroup: | |
99 | + spin_unlock(¤t->sighand->siglock); | |
100 | + write_unlock_irq(&tasklist_lock); | |
101 | cgroup_cancel_fork(p); | |
102 | bad_fork_free_pid: | |
103 | threadgroup_change_end(current); |