]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.19.42/locking-futex-allow-low-level-atomic-operations-to-return-eagain.patch
Linux 4.19.42
[thirdparty/kernel/stable-queue.git] / releases / 4.19.42 / locking-futex-allow-low-level-atomic-operations-to-return-eagain.patch
1 From 6b4f4bc9cb22875f97023984a625386f0c7cc1c0 Mon Sep 17 00:00:00 2001
2 From: Will Deacon <will.deacon@arm.com>
3 Date: Thu, 28 Feb 2019 11:58:08 +0000
4 Subject: locking/futex: Allow low-level atomic operations to return -EAGAIN
5
6 From: Will Deacon <will.deacon@arm.com>
7
8 commit 6b4f4bc9cb22875f97023984a625386f0c7cc1c0 upstream.
9
10 Some futex() operations, including FUTEX_WAKE_OP, require the kernel to
11 perform an atomic read-modify-write of the futex word via the userspace
12 mapping. These operations are implemented by each architecture in
13 arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which
14 are called in atomic context with the relevant hash bucket locks held.
15
16 Although these routines may return -EFAULT in response to a page fault
17 generated when accessing userspace, they are expected to succeed (i.e.
18 return 0) in all other cases. This poses a problem for architectures
19 that do not provide bounded forward progress guarantees or fairness of
20 contended atomic operations and can lead to starvation in some cases.
21
22 In these problematic scenarios, we must return back to the core futex
23 code so that we can drop the hash bucket locks and reschedule if
24 necessary, much like we do in the case of a page fault.
25
26 Allow architectures to return -EAGAIN from their implementations of
27 arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which
28 will cause the core futex code to reschedule if necessary and return
29 back to the architecture code later on.
30
31 Cc: <stable@kernel.org>
32 Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
33 Signed-off-by: Will Deacon <will.deacon@arm.com>
34 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
35
36 ---
37 kernel/futex.c | 188 +++++++++++++++++++++++++++++++++++----------------------
38 1 file changed, 117 insertions(+), 71 deletions(-)
39
40 --- a/kernel/futex.c
41 +++ b/kernel/futex.c
42 @@ -1306,13 +1306,15 @@ static int lookup_pi_state(u32 __user *u
43
44 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
45 {
46 + int err;
47 u32 uninitialized_var(curval);
48
49 if (unlikely(should_fail_futex(true)))
50 return -EFAULT;
51
52 - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
53 - return -EFAULT;
54 + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
55 + if (unlikely(err))
56 + return err;
57
58 /* If user space value changed, let the caller retry */
59 return curval != uval ? -EAGAIN : 0;
60 @@ -1498,10 +1500,8 @@ static int wake_futex_pi(u32 __user *uad
61 if (unlikely(should_fail_futex(true)))
62 ret = -EFAULT;
63
64 - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
65 - ret = -EFAULT;
66 -
67 - } else if (curval != uval) {
68 + ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
69 + if (!ret && (curval != uval)) {
70 /*
71 * If a unconditional UNLOCK_PI operation (user space did not
72 * try the TID->0 transition) raced with a waiter setting the
73 @@ -1696,32 +1696,32 @@ retry_private:
74 double_lock_hb(hb1, hb2);
75 op_ret = futex_atomic_op_inuser(op, uaddr2);
76 if (unlikely(op_ret < 0)) {
77 -
78 double_unlock_hb(hb1, hb2);
79
80 -#ifndef CONFIG_MMU
81 - /*
82 - * we don't get EFAULT from MMU faults if we don't have an MMU,
83 - * but we might get them from range checking
84 - */
85 - ret = op_ret;
86 - goto out_put_keys;
87 -#endif
88 -
89 - if (unlikely(op_ret != -EFAULT)) {
90 + if (!IS_ENABLED(CONFIG_MMU) ||
91 + unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
92 + /*
93 + * we don't get EFAULT from MMU faults if we don't have
94 + * an MMU, but we might get them from range checking
95 + */
96 ret = op_ret;
97 goto out_put_keys;
98 }
99
100 - ret = fault_in_user_writeable(uaddr2);
101 - if (ret)
102 - goto out_put_keys;
103 + if (op_ret == -EFAULT) {
104 + ret = fault_in_user_writeable(uaddr2);
105 + if (ret)
106 + goto out_put_keys;
107 + }
108
109 - if (!(flags & FLAGS_SHARED))
110 + if (!(flags & FLAGS_SHARED)) {
111 + cond_resched();
112 goto retry_private;
113 + }
114
115 put_futex_key(&key2);
116 put_futex_key(&key1);
117 + cond_resched();
118 goto retry;
119 }
120
121 @@ -2346,7 +2346,7 @@ static int fixup_pi_state_owner(u32 __us
122 u32 uval, uninitialized_var(curval), newval;
123 struct task_struct *oldowner, *newowner;
124 u32 newtid;
125 - int ret;
126 + int ret, err = 0;
127
128 lockdep_assert_held(q->lock_ptr);
129
130 @@ -2417,14 +2417,17 @@ retry:
131 if (!pi_state->owner)
132 newtid |= FUTEX_OWNER_DIED;
133
134 - if (get_futex_value_locked(&uval, uaddr))
135 - goto handle_fault;
136 + err = get_futex_value_locked(&uval, uaddr);
137 + if (err)
138 + goto handle_err;
139
140 for (;;) {
141 newval = (uval & FUTEX_OWNER_DIED) | newtid;
142
143 - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
144 - goto handle_fault;
145 + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
146 + if (err)
147 + goto handle_err;
148 +
149 if (curval == uval)
150 break;
151 uval = curval;
152 @@ -2452,23 +2455,37 @@ retry:
153 return 0;
154
155 /*
156 - * To handle the page fault we need to drop the locks here. That gives
157 - * the other task (either the highest priority waiter itself or the
158 - * task which stole the rtmutex) the chance to try the fixup of the
159 - * pi_state. So once we are back from handling the fault we need to
160 - * check the pi_state after reacquiring the locks and before trying to
161 - * do another fixup. When the fixup has been done already we simply
162 - * return.
163 + * In order to reschedule or handle a page fault, we need to drop the
164 + * locks here. In the case of a fault, this gives the other task
165 + * (either the highest priority waiter itself or the task which stole
166 + * the rtmutex) the chance to try the fixup of the pi_state. So once we
167 + * are back from handling the fault we need to check the pi_state after
168 + * reacquiring the locks and before trying to do another fixup. When
169 + * the fixup has been done already we simply return.
170 *
171 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
172 * drop hb->lock since the caller owns the hb -> futex_q relation.
173 * Dropping the pi_mutex->wait_lock requires the state revalidate.
174 */
175 -handle_fault:
176 +handle_err:
177 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
178 spin_unlock(q->lock_ptr);
179
180 - ret = fault_in_user_writeable(uaddr);
181 + switch (err) {
182 + case -EFAULT:
183 + ret = fault_in_user_writeable(uaddr);
184 + break;
185 +
186 + case -EAGAIN:
187 + cond_resched();
188 + ret = 0;
189 + break;
190 +
191 + default:
192 + WARN_ON_ONCE(1);
193 + ret = err;
194 + break;
195 + }
196
197 spin_lock(q->lock_ptr);
198 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
199 @@ -3037,10 +3054,8 @@ retry:
200 * A unconditional UNLOCK_PI op raced against a waiter
201 * setting the FUTEX_WAITERS bit. Try again.
202 */
203 - if (ret == -EAGAIN) {
204 - put_futex_key(&key);
205 - goto retry;
206 - }
207 + if (ret == -EAGAIN)
208 + goto pi_retry;
209 /*
210 * wake_futex_pi has detected invalid state. Tell user
211 * space.
212 @@ -3055,9 +3070,19 @@ retry:
213 * preserve the WAITERS bit not the OWNER_DIED one. We are the
214 * owner.
215 */
216 - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) {
217 + if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
218 spin_unlock(&hb->lock);
219 - goto pi_faulted;
220 + switch (ret) {
221 + case -EFAULT:
222 + goto pi_faulted;
223 +
224 + case -EAGAIN:
225 + goto pi_retry;
226 +
227 + default:
228 + WARN_ON_ONCE(1);
229 + goto out_putkey;
230 + }
231 }
232
233 /*
234 @@ -3071,6 +3096,11 @@ out_putkey:
235 put_futex_key(&key);
236 return ret;
237
238 +pi_retry:
239 + put_futex_key(&key);
240 + cond_resched();
241 + goto retry;
242 +
243 pi_faulted:
244 put_futex_key(&key);
245
246 @@ -3431,6 +3461,7 @@ err_unlock:
247 int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
248 {
249 u32 uval, uninitialized_var(nval), mval;
250 + int err;
251
252 /* Futex address must be 32bit aligned */
253 if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
254 @@ -3440,42 +3471,57 @@ retry:
255 if (get_user(uval, uaddr))
256 return -1;
257
258 - if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
259 - /*
260 - * Ok, this dying thread is truly holding a futex
261 - * of interest. Set the OWNER_DIED bit atomically
262 - * via cmpxchg, and if the value had FUTEX_WAITERS
263 - * set, wake up a waiter (if any). (We have to do a
264 - * futex_wake() even if OWNER_DIED is already set -
265 - * to handle the rare but possible case of recursive
266 - * thread-death.) The rest of the cleanup is done in
267 - * userspace.
268 - */
269 - mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
270 - /*
271 - * We are not holding a lock here, but we want to have
272 - * the pagefault_disable/enable() protection because
273 - * we want to handle the fault gracefully. If the
274 - * access fails we try to fault in the futex with R/W
275 - * verification via get_user_pages. get_user() above
276 - * does not guarantee R/W access. If that fails we
277 - * give up and leave the futex locked.
278 - */
279 - if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
280 + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
281 + return 0;
282 +
283 + /*
284 + * Ok, this dying thread is truly holding a futex
285 + * of interest. Set the OWNER_DIED bit atomically
286 + * via cmpxchg, and if the value had FUTEX_WAITERS
287 + * set, wake up a waiter (if any). (We have to do a
288 + * futex_wake() even if OWNER_DIED is already set -
289 + * to handle the rare but possible case of recursive
290 + * thread-death.) The rest of the cleanup is done in
291 + * userspace.
292 + */
293 + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
294 +
295 + /*
296 + * We are not holding a lock here, but we want to have
297 + * the pagefault_disable/enable() protection because
298 + * we want to handle the fault gracefully. If the
299 + * access fails we try to fault in the futex with R/W
300 + * verification via get_user_pages. get_user() above
301 + * does not guarantee R/W access. If that fails we
302 + * give up and leave the futex locked.
303 + */
304 + if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
305 + switch (err) {
306 + case -EFAULT:
307 if (fault_in_user_writeable(uaddr))
308 return -1;
309 goto retry;
310 - }
311 - if (nval != uval)
312 +
313 + case -EAGAIN:
314 + cond_resched();
315 goto retry;
316
317 - /*
318 - * Wake robust non-PI futexes here. The wakeup of
319 - * PI futexes happens in exit_pi_state():
320 - */
321 - if (!pi && (uval & FUTEX_WAITERS))
322 - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
323 + default:
324 + WARN_ON_ONCE(1);
325 + return err;
326 + }
327 }
328 +
329 + if (nval != uval)
330 + goto retry;
331 +
332 + /*
333 + * Wake robust non-PI futexes here. The wakeup of
334 + * PI futexes happens in exit_pi_state():
335 + */
336 + if (!pi && (uval & FUTEX_WAITERS))
337 + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
338 +
339 return 0;
340 }
341