]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.19.45/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch
Linux 4.14.121
[thirdparty/kernel/stable-queue.git] / releases / 4.19.45 / userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch
1 From c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 Mon Sep 17 00:00:00 2001
2 From: Andrea Arcangeli <aarcange@redhat.com>
3 Date: Tue, 14 May 2019 15:40:46 -0700
4 Subject: userfaultfd: use RCU to free the task struct when fork fails
5
6 From: Andrea Arcangeli <aarcange@redhat.com>
7
8 commit c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 upstream.
9
10 The task structure is freed while get_mem_cgroup_from_mm() holds
11 rcu_read_lock() and dereferences mm->owner.
12
13 get_mem_cgroup_from_mm() failing fork()
14 ---- ---
15 task = mm->owner
16 mm->owner = NULL;
17 free(task)
18 if (task) *task; /* use after free */
19
20 The fix consists in freeing the task with RCU also in the fork failure
21 case, exactly like it always happens for the regular exit(2) path. That
22 is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm()
23 (left side above) effective to avoid a use after free when dereferencing
24 the task structure.
25
26 An alternate possible fix would be to defer the delivery of the
27 userfaultfd contexts to the monitor until after fork() is guaranteed to
28 succeed. Such a change would require more changes because it would
29 create a strict ordering dependency where the uffd methods would need to
30 be called beyond the last potentially failing branch in order to be
31 safe. This solution as opposed only adds the dependency to common code
32 to set mm->owner to NULL and to free the task struct that was pointed by
33 mm->owner with RCU, if fork ends up failing. The userfaultfd methods
34 can still be called anywhere during the fork runtime and the monitor
35 will keep discarding orphaned "mm" coming from failed forks in userland.
36
37 This race condition couldn't trigger if CONFIG_MEMCG was set =n at build
38 time.
39
40 [aarcange@redhat.com: improve changelog, reduce #ifdefs per Michal]
41 Link: http://lkml.kernel.org/r/20190429035752.4508-1-aarcange@redhat.com
42 Link: http://lkml.kernel.org/r/20190325225636.11635-2-aarcange@redhat.com
43 Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event")
44 Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
45 Tested-by: zhong jiang <zhongjiang@huawei.com>
46 Reported-by: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com
47 Cc: Oleg Nesterov <oleg@redhat.com>
48 Cc: Jann Horn <jannh@google.com>
49 Cc: Hugh Dickins <hughd@google.com>
50 Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
51 Cc: Mike Kravetz <mike.kravetz@oracle.com>
52 Cc: Peter Xu <peterx@redhat.com>
53 Cc: Jason Gunthorpe <jgg@mellanox.com>
54 Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
55 Cc: Michal Hocko <mhocko@suse.com>
56 Cc: zhong jiang <zhongjiang@huawei.com>
57 Cc: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com
58 Cc: <stable@vger.kernel.org>
59 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
60 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
61 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
62
63 ---
64 kernel/fork.c | 31 +++++++++++++++++++++++++++++--
65 1 file changed, 29 insertions(+), 2 deletions(-)
66
67 --- a/kernel/fork.c
68 +++ b/kernel/fork.c
69 @@ -907,6 +907,15 @@ static void mm_init_aio(struct mm_struct
70 #endif
71 }
72
73 +static __always_inline void mm_clear_owner(struct mm_struct *mm,
74 + struct task_struct *p)
75 +{
76 +#ifdef CONFIG_MEMCG
77 + if (mm->owner == p)
78 + WRITE_ONCE(mm->owner, NULL);
79 +#endif
80 +}
81 +
82 static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
83 {
84 #ifdef CONFIG_MEMCG
85 @@ -1286,6 +1295,7 @@ static struct mm_struct *dup_mm(struct t
86 free_pt:
87 /* don't put binfmt in mmput, we haven't got module yet */
88 mm->binfmt = NULL;
89 + mm_init_owner(mm, NULL);
90 mmput(mm);
91
92 fail_nomem:
93 @@ -1617,6 +1627,21 @@ static inline void rcu_copy_process(stru
94 #endif /* #ifdef CONFIG_TASKS_RCU */
95 }
96
97 +static void __delayed_free_task(struct rcu_head *rhp)
98 +{
99 + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
100 +
101 + free_task(tsk);
102 +}
103 +
104 +static __always_inline void delayed_free_task(struct task_struct *tsk)
105 +{
106 + if (IS_ENABLED(CONFIG_MEMCG))
107 + call_rcu(&tsk->rcu, __delayed_free_task);
108 + else
109 + free_task(tsk);
110 +}
111 +
112 /*
113 * This creates a new process as a copy of the old one,
114 * but does not actually start it yet.
115 @@ -2072,8 +2097,10 @@ bad_fork_cleanup_io:
116 bad_fork_cleanup_namespaces:
117 exit_task_namespaces(p);
118 bad_fork_cleanup_mm:
119 - if (p->mm)
120 + if (p->mm) {
121 + mm_clear_owner(p->mm, p);
122 mmput(p->mm);
123 + }
124 bad_fork_cleanup_signal:
125 if (!(clone_flags & CLONE_THREAD))
126 free_signal_struct(p->signal);
127 @@ -2104,7 +2131,7 @@ bad_fork_cleanup_count:
128 bad_fork_free:
129 p->state = TASK_DEAD;
130 put_task_stack(p);
131 - free_task(p);
132 + delayed_free_task(p);
133 fork_out:
134 spin_lock_irq(&current->sighand->siglock);
135 hlist_del_init(&delayed.node);