]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.4.181/userfaultfd-don-t-pin-the-user-memory-in-userfaultfd_file_create.patch
Linux 4.4.181
[thirdparty/kernel/stable-queue.git] / releases / 4.4.181 / userfaultfd-don-t-pin-the-user-memory-in-userfaultfd_file_create.patch
1 From foo@baz Tue 04 Jun 2019 04:46:27 PM CEST
2 From: Oleg Nesterov <oleg@redhat.com>
3 Date: Fri, 20 May 2016 16:58:36 -0700
4 Subject: userfaultfd: don't pin the user memory in userfaultfd_file_create()
5
6 From: Oleg Nesterov <oleg@redhat.com>
7
8 commit d2005e3f41d4f9299e2df6a967c8beb5086967a9 upstream.
9
10 userfaultfd_file_create() increments mm->mm_users; this means that the
11 memory won't be unmapped/freed if mm owner exits/execs, and UFFDIO_COPY
12 after that can populate the orphaned mm more.
13
14 Change userfaultfd_file_create() and userfaultfd_ctx_put() to use
15 mm->mm_count to pin mm_struct. This means that
16 atomic_inc_not_zero(mm->mm_users) is needed when we are going to
17 actually play with this memory. Except handle_userfault() path doesn't
18 need this, the caller must already have a reference.
19
20 The patch adds the new trivial helper, mmget_not_zero(), it can have
21 more users.
22
23 Link: http://lkml.kernel.org/r/20160516172254.GA8595@redhat.com
24 Signed-off-by: Oleg Nesterov <oleg@redhat.com>
25 Cc: Andrea Arcangeli <aarcange@redhat.com>
26 Cc: Michal Hocko <mhocko@kernel.org>
27 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
28 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
29 Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
30 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
31 ---
32 fs/userfaultfd.c | 41 ++++++++++++++++++++++++++++-------------
33 include/linux/sched.h | 7 ++++++-
34 2 files changed, 34 insertions(+), 14 deletions(-)
35
36 --- a/fs/userfaultfd.c
37 +++ b/fs/userfaultfd.c
38 @@ -137,7 +137,7 @@ static void userfaultfd_ctx_put(struct u
39 VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
40 VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
41 VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
42 - mmput(ctx->mm);
43 + mmdrop(ctx->mm);
44 kmem_cache_free(userfaultfd_ctx_cachep, ctx);
45 }
46 }
47 @@ -434,6 +434,9 @@ static int userfaultfd_release(struct in
48
49 ACCESS_ONCE(ctx->released) = true;
50
51 + if (!mmget_not_zero(mm))
52 + goto wakeup;
53 +
54 /*
55 * Flush page faults out of all CPUs. NOTE: all page faults
56 * must be retried without returning VM_FAULT_SIGBUS if
57 @@ -466,7 +469,8 @@ static int userfaultfd_release(struct in
58 vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
59 }
60 up_write(&mm->mmap_sem);
61 -
62 + mmput(mm);
63 +wakeup:
64 /*
65 * After no new page faults can wait on this fault_*wqh, flush
66 * the last page faults that may have been already waiting on
67 @@ -760,10 +764,12 @@ static int userfaultfd_register(struct u
68 start = uffdio_register.range.start;
69 end = start + uffdio_register.range.len;
70
71 + ret = -ENOMEM;
72 + if (!mmget_not_zero(mm))
73 + goto out;
74 +
75 down_write(&mm->mmap_sem);
76 vma = find_vma_prev(mm, start, &prev);
77 -
78 - ret = -ENOMEM;
79 if (!vma)
80 goto out_unlock;
81
82 @@ -864,6 +870,7 @@ static int userfaultfd_register(struct u
83 } while (vma && vma->vm_start < end);
84 out_unlock:
85 up_write(&mm->mmap_sem);
86 + mmput(mm);
87 if (!ret) {
88 /*
89 * Now that we scanned all vmas we can already tell
90 @@ -902,10 +909,12 @@ static int userfaultfd_unregister(struct
91 start = uffdio_unregister.start;
92 end = start + uffdio_unregister.len;
93
94 + ret = -ENOMEM;
95 + if (!mmget_not_zero(mm))
96 + goto out;
97 +
98 down_write(&mm->mmap_sem);
99 vma = find_vma_prev(mm, start, &prev);
100 -
101 - ret = -ENOMEM;
102 if (!vma)
103 goto out_unlock;
104
105 @@ -998,6 +1007,7 @@ static int userfaultfd_unregister(struct
106 } while (vma && vma->vm_start < end);
107 out_unlock:
108 up_write(&mm->mmap_sem);
109 + mmput(mm);
110 out:
111 return ret;
112 }
113 @@ -1067,9 +1077,11 @@ static int userfaultfd_copy(struct userf
114 goto out;
115 if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE)
116 goto out;
117 -
118 - ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
119 - uffdio_copy.len);
120 + if (mmget_not_zero(ctx->mm)) {
121 + ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
122 + uffdio_copy.len);
123 + mmput(ctx->mm);
124 + }
125 if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
126 return -EFAULT;
127 if (ret < 0)
128 @@ -1110,8 +1122,11 @@ static int userfaultfd_zeropage(struct u
129 if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE)
130 goto out;
131
132 - ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
133 - uffdio_zeropage.range.len);
134 + if (mmget_not_zero(ctx->mm)) {
135 + ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
136 + uffdio_zeropage.range.len);
137 + mmput(ctx->mm);
138 + }
139 if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
140 return -EFAULT;
141 if (ret < 0)
142 @@ -1289,12 +1304,12 @@ static struct file *userfaultfd_file_cre
143 ctx->released = false;
144 ctx->mm = current->mm;
145 /* prevent the mm struct to be freed */
146 - atomic_inc(&ctx->mm->mm_users);
147 + atomic_inc(&ctx->mm->mm_count);
148
149 file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
150 O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
151 if (IS_ERR(file)) {
152 - mmput(ctx->mm);
153 + mmdrop(ctx->mm);
154 kmem_cache_free(userfaultfd_ctx_cachep, ctx);
155 }
156 out:
157 --- a/include/linux/sched.h
158 +++ b/include/linux/sched.h
159 @@ -2614,12 +2614,17 @@ extern struct mm_struct * mm_alloc(void)
160
161 /* mmdrop drops the mm and the page tables */
162 extern void __mmdrop(struct mm_struct *);
163 -static inline void mmdrop(struct mm_struct * mm)
164 +static inline void mmdrop(struct mm_struct *mm)
165 {
166 if (unlikely(atomic_dec_and_test(&mm->mm_count)))
167 __mmdrop(mm);
168 }
169
170 +static inline bool mmget_not_zero(struct mm_struct *mm)
171 +{
172 + return atomic_inc_not_zero(&mm->mm_users);
173 +}
174 +
175 /* mmput gets rid of the mappings and all user-space */
176 extern void mmput(struct mm_struct *);
177 /* Grab a reference to a task's mm, if it is not already going away */