From: Greg Kroah-Hartman Date: Mon, 18 Feb 2019 12:49:01 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v3.18.135~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7c4e1ebb3614e30ecaf3e99ba9246d553d2c0c5e;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: futex-cure-exit-race.patch sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch --- diff --git a/queue-4.14/futex-cure-exit-race.patch b/queue-4.14/futex-cure-exit-race.patch new file mode 100644 index 00000000000..7b0947614cb --- /dev/null +++ b/queue-4.14/futex-cure-exit-race.patch @@ -0,0 +1,178 @@ +From da791a667536bf8322042e38ca85d55a78d3c273 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 10 Dec 2018 14:35:14 +0100 +Subject: futex: Cure exit race + +From: Thomas Gleixner + +commit da791a667536bf8322042e38ca85d55a78d3c273 upstream. + +Stefan reported, that the glibc tst-robustpi4 test case fails +occasionally. That case creates the following race between +sys_exit() and sys_futex_lock_pi(): + + CPU0 CPU1 + + sys_exit() sys_futex() + do_exit() futex_lock_pi() + exit_signals(tsk) No waiters: + tsk->flags |= PF_EXITING; *uaddr == 0x00000PID + mm_release(tsk) Set waiter bit + exit_robust_list(tsk) { *uaddr = 0x80000PID; + Set owner died attach_to_pi_owner() { + *uaddr = 0xC0000000; tsk = get_task(PID); + } if (!tsk->flags & PF_EXITING) { + ... attach(); + tsk->flags |= PF_EXITPIDONE; } else { + if (!(tsk->flags & PF_EXITPIDONE)) + return -EAGAIN; + return -ESRCH; <--- FAIL + } + +ESRCH is returned all the way to user space, which triggers the glibc test +case assert. Returning ESRCH unconditionally is wrong here because the user +space value has been changed by the exiting task to 0xC0000000, i.e. the +FUTEX_OWNER_DIED bit is set and the futex PID value has been cleared. This +is a valid state and the kernel has to handle it, i.e. taking the futex. + +Cure it by rereading the user space value when PF_EXITING and PF_EXITPIDONE +is set in the task which 'owns' the futex. If the value has changed, let +the kernel retry the operation, which includes all regular sanity checks +and correctly handles the FUTEX_OWNER_DIED case. + +If it hasn't changed, then return ESRCH as there is no way to distinguish +this case from malfunctioning user space. This happens when the exiting +task did not have a robust list, the robust list was corrupted or the user +space value in the futex was simply bogus. + +Reported-by: Stefan Liebler +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra +Cc: Heiko Carstens +Cc: Darren Hart +Cc: Ingo Molnar +Cc: Sasha Levin +Cc: stable@vger.kernel.org +Link: https://bugzilla.kernel.org/show_bug.cgi?id=200467 +Link: https://lkml.kernel.org/r/20181210152311.986181245@linutronix.de +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/futex.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 63 insertions(+), 6 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1166,11 +1166,65 @@ out_error: + return ret; + } + ++static int handle_exit_race(u32 __user *uaddr, u32 uval, ++ struct task_struct *tsk) ++{ ++ u32 uval2; ++ ++ /* ++ * If PF_EXITPIDONE is not yet set, then try again. ++ */ ++ if (tsk && !(tsk->flags & PF_EXITPIDONE)) ++ return -EAGAIN; ++ ++ /* ++ * Reread the user space value to handle the following situation: ++ * ++ * CPU0 CPU1 ++ * ++ * sys_exit() sys_futex() ++ * do_exit() futex_lock_pi() ++ * futex_lock_pi_atomic() ++ * exit_signals(tsk) No waiters: ++ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID ++ * mm_release(tsk) Set waiter bit ++ * exit_robust_list(tsk) { *uaddr = 0x80000PID; ++ * Set owner died attach_to_pi_owner() { ++ * *uaddr = 0xC0000000; tsk = get_task(PID); ++ * } if (!tsk->flags & PF_EXITING) { ++ * ... attach(); ++ * tsk->flags |= PF_EXITPIDONE; } else { ++ * if (!(tsk->flags & PF_EXITPIDONE)) ++ * return -EAGAIN; ++ * return -ESRCH; <--- FAIL ++ * } ++ * ++ * Returning ESRCH unconditionally is wrong here because the ++ * user space value has been changed by the exiting task. ++ * ++ * The same logic applies to the case where the exiting task is ++ * already gone. ++ */ ++ if (get_futex_value_locked(&uval2, uaddr)) ++ return -EFAULT; ++ ++ /* If the user space value has changed, try again. */ ++ if (uval2 != uval) ++ return -EAGAIN; ++ ++ /* ++ * The exiting task did not have a robust list, the robust list was ++ * corrupted or the user space value in *uaddr is simply bogus. ++ * Give up and tell user space. ++ */ ++ return -ESRCH; ++} ++ + /* + * Lookup the task for the TID provided from user space and attach to + * it after doing proper sanity checks. + */ +-static int attach_to_pi_owner(u32 uval, union futex_key *key, ++static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, + struct futex_pi_state **ps) + { + pid_t pid = uval & FUTEX_TID_MASK; +@@ -1180,12 +1234,15 @@ static int attach_to_pi_owner(u32 uval, + /* + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when TID = 0 [1] ++ * ++ * The !pid check is paranoid. None of the call sites should end up ++ * with pid == 0, but better safe than sorry. Let the caller retry + */ + if (!pid) +- return -ESRCH; ++ return -EAGAIN; + p = futex_find_get_task(pid); + if (!p) +- return -ESRCH; ++ return handle_exit_race(uaddr, uval, NULL); + + if (unlikely(p->flags & PF_KTHREAD)) { + put_task_struct(p); +@@ -1205,7 +1262,7 @@ static int attach_to_pi_owner(u32 uval, + * set, we know that the task has finished the + * cleanup: + */ +- int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; ++ int ret = handle_exit_race(uaddr, uval, p); + + raw_spin_unlock_irq(&p->pi_lock); + put_task_struct(p); +@@ -1262,7 +1319,7 @@ static int lookup_pi_state(u32 __user *u + * We are the first waiter - try to look up the owner based on + * @uval and attach to it. + */ +- return attach_to_pi_owner(uval, key, ps); ++ return attach_to_pi_owner(uaddr, uval, key, ps); + } + + static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) +@@ -1370,7 +1427,7 @@ static int futex_lock_pi_atomic(u32 __us + * attach to the owner. If that fails, no harm done, we only + * set the FUTEX_WAITERS bit in the user space variable. + */ +- return attach_to_pi_owner(uval, key, ps); ++ return attach_to_pi_owner(uaddr, newval, key, ps); + } + + /** diff --git a/queue-4.14/sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch b/queue-4.14/sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch new file mode 100644 index 00000000000..db6952592c1 --- /dev/null +++ b/queue-4.14/sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch @@ -0,0 +1,60 @@ +From 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 Mon Sep 17 00:00:00 2001 +From: Pavankumar Kondeti +Date: Tue, 30 Oct 2018 12:24:33 +0530 +Subject: sched, trace: Fix prev_state output in sched_switch tracepoint + +From: Pavankumar Kondeti + +commit 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 upstream. + +commit 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout") +tried to fix the problem introduced by a previous commit efb40f588b43 +("sched/tracing: Fix trace_sched_switch task-state printing"). However +the prev_state output in sched_switch is still broken. + +task_state_index() uses fls() which considers the LSB as 1. Left +shifting 1 by this value gives an incorrect mapping to the task state. +Fix this by decrementing the value returned by __get_task_state() +before shifting. + +Link: http://lkml.kernel.org/r/1540882473-1103-1-git-send-email-pkondeti@codeaurora.org + +Cc: stable@vger.kernel.org +Fixes: 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout") +Signed-off-by: Pavankumar Kondeti +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman + +--- + include/trace/events/sched.h | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/include/trace/events/sched.h ++++ b/include/trace/events/sched.h +@@ -107,6 +107,8 @@ DEFINE_EVENT(sched_wakeup_template, sche + #ifdef CREATE_TRACE_POINTS + static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) + { ++ unsigned int state; ++ + #ifdef CONFIG_SCHED_DEBUG + BUG_ON(p != current); + #endif /* CONFIG_SCHED_DEBUG */ +@@ -118,7 +120,15 @@ static inline long __trace_sched_switch_ + if (preempt) + return TASK_REPORT_MAX; + +- return 1 << __get_task_state(p); ++ /* ++ * task_state_index() uses fls() and returns a value from 0-8 range. ++ * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using ++ * it for left shift operation to get the correct task->state ++ * mapping. ++ */ ++ state = __get_task_state(p); ++ ++ return state ? (1 << (state - 1)) : state; + } + #endif /* CREATE_TRACE_POINTS */ + diff --git a/queue-4.14/series b/queue-4.14/series index 80a8ed25d11..1482022f469 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -57,3 +57,5 @@ x86-a.out-clear-the-dump-structure-initially.patch dm-crypt-don-t-overallocate-the-integrity-tag-space.patch dm-thin-fix-bug-where-bio-that-overwrites-thin-block-ignores-fua.patch drm-i915-prevent-a-race-during-i915_gem_mmap-ioctl-with-wc-set.patch +sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch +futex-cure-exit-race.patch