From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 18 Feb 2019 12:49:01 +0000 (+0100)
Subject: 4.14-stable patches
X-Git-Tag: v3.18.135~10
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7c4e1ebb3614e30ecaf3e99ba9246d553d2c0c5e;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	futex-cure-exit-race.patch
	sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch
---

diff --git a/queue-4.14/futex-cure-exit-race.patch b/queue-4.14/futex-cure-exit-race.patch
new file mode 100644
index 00000000000..7b0947614cb
--- /dev/null
+++ b/queue-4.14/futex-cure-exit-race.patch
@@ -0,0 +1,178 @@
+From da791a667536bf8322042e38ca85d55a78d3c273 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 10 Dec 2018 14:35:14 +0100
+Subject: futex: Cure exit race
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit da791a667536bf8322042e38ca85d55a78d3c273 upstream.
+
+Stefan reported, that the glibc tst-robustpi4 test case fails
+occasionally. That case creates the following race between
+sys_exit() and sys_futex_lock_pi():
+
+ CPU0				CPU1
+
+ sys_exit()			sys_futex()
+  do_exit()			 futex_lock_pi()
+   exit_signals(tsk)		  No waiters:
+    tsk->flags |= PF_EXITING;	  *uaddr == 0x00000PID
+  mm_release(tsk)		  Set waiter bit
+   exit_robust_list(tsk) {	  *uaddr = 0x80000PID;
+      Set owner died		  attach_to_pi_owner() {
+    *uaddr = 0xC0000000;	   tsk = get_task(PID);
+   }				   if (!tsk->flags & PF_EXITING) {
+  ...				     attach();
+  tsk->flags |= PF_EXITPIDONE;	   } else {
+				     if (!(tsk->flags & PF_EXITPIDONE))
+				       return -EAGAIN;
+				     return -ESRCH; <--- FAIL
+				   }
+
+ESRCH is returned all the way to user space, which triggers the glibc test
+case assert. Returning ESRCH unconditionally is wrong here because the user
+space value has been changed by the exiting task to 0xC0000000, i.e. the
+FUTEX_OWNER_DIED bit is set and the futex PID value has been cleared. This
+is a valid state and the kernel has to handle it, i.e. taking the futex.
+
+Cure it by rereading the user space value when PF_EXITING and PF_EXITPIDONE
+is set in the task which 'owns' the futex. If the value has changed, let
+the kernel retry the operation, which includes all regular sanity checks
+and correctly handles the FUTEX_OWNER_DIED case.
+
+If it hasn't changed, then return ESRCH as there is no way to distinguish
+this case from malfunctioning user space. This happens when the exiting
+task did not have a robust list, the robust list was corrupted or the user
+space value in the futex was simply bogus.
+
+Reported-by: Stefan Liebler <stli@linux.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200467
+Link: https://lkml.kernel.org/r/20181210152311.986181245@linutronix.de
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c |   69 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 63 insertions(+), 6 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1166,11 +1166,65 @@ out_error:
+ 	return ret;
+ }
+ 
++static int handle_exit_race(u32 __user *uaddr, u32 uval,
++			    struct task_struct *tsk)
++{
++	u32 uval2;
++
++	/*
++	 * If PF_EXITPIDONE is not yet set, then try again.
++	 */
++	if (tsk && !(tsk->flags & PF_EXITPIDONE))
++		return -EAGAIN;
++
++	/*
++	 * Reread the user space value to handle the following situation:
++	 *
++	 * CPU0				CPU1
++	 *
++	 * sys_exit()			sys_futex()
++	 *  do_exit()			 futex_lock_pi()
++	 *                                futex_lock_pi_atomic()
++	 *   exit_signals(tsk)		    No waiters:
++	 *    tsk->flags |= PF_EXITING;	    *uaddr == 0x00000PID
++	 *  mm_release(tsk)		    Set waiter bit
++	 *   exit_robust_list(tsk) {	    *uaddr = 0x80000PID;
++	 *      Set owner died		    attach_to_pi_owner() {
++	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
++	 *   }				     if (!tsk->flags & PF_EXITING) {
++	 *  ...				       attach();
++	 *  tsk->flags |= PF_EXITPIDONE;     } else {
++	 *				       if (!(tsk->flags & PF_EXITPIDONE))
++	 *				         return -EAGAIN;
++	 *				       return -ESRCH; <--- FAIL
++	 *				     }
++	 *
++	 * Returning ESRCH unconditionally is wrong here because the
++	 * user space value has been changed by the exiting task.
++	 *
++	 * The same logic applies to the case where the exiting task is
++	 * already gone.
++	 */
++	if (get_futex_value_locked(&uval2, uaddr))
++		return -EFAULT;
++
++	/* If the user space value has changed, try again. */
++	if (uval2 != uval)
++		return -EAGAIN;
++
++	/*
++	 * The exiting task did not have a robust list, the robust list was
++	 * corrupted or the user space value in *uaddr is simply bogus.
++	 * Give up and tell user space.
++	 */
++	return -ESRCH;
++}
++
+ /*
+  * Lookup the task for the TID provided from user space and attach to
+  * it after doing proper sanity checks.
+  */
+-static int attach_to_pi_owner(u32 uval, union futex_key *key,
++static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
+ 			      struct futex_pi_state **ps)
+ {
+ 	pid_t pid = uval & FUTEX_TID_MASK;
+@@ -1180,12 +1234,15 @@ static int attach_to_pi_owner(u32 uval,
+ 	/*
+ 	 * We are the first waiter - try to look up the real owner and attach
+ 	 * the new pi_state to it, but bail out when TID = 0 [1]
++	 *
++	 * The !pid check is paranoid. None of the call sites should end up
++	 * with pid == 0, but better safe than sorry. Let the caller retry
+ 	 */
+ 	if (!pid)
+-		return -ESRCH;
++		return -EAGAIN;
+ 	p = futex_find_get_task(pid);
+ 	if (!p)
+-		return -ESRCH;
++		return handle_exit_race(uaddr, uval, NULL);
+ 
+ 	if (unlikely(p->flags & PF_KTHREAD)) {
+ 		put_task_struct(p);
+@@ -1205,7 +1262,7 @@ static int attach_to_pi_owner(u32 uval,
+ 		 * set, we know that the task has finished the
+ 		 * cleanup:
+ 		 */
+-		int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
++		int ret = handle_exit_race(uaddr, uval, p);
+ 
+ 		raw_spin_unlock_irq(&p->pi_lock);
+ 		put_task_struct(p);
+@@ -1262,7 +1319,7 @@ static int lookup_pi_state(u32 __user *u
+ 	 * We are the first waiter - try to look up the owner based on
+ 	 * @uval and attach to it.
+ 	 */
+-	return attach_to_pi_owner(uval, key, ps);
++	return attach_to_pi_owner(uaddr, uval, key, ps);
+ }
+ 
+ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+@@ -1370,7 +1427,7 @@ static int futex_lock_pi_atomic(u32 __us
+ 	 * attach to the owner. If that fails, no harm done, we only
+ 	 * set the FUTEX_WAITERS bit in the user space variable.
+ 	 */
+-	return attach_to_pi_owner(uval, key, ps);
++	return attach_to_pi_owner(uaddr, newval, key, ps);
+ }
+ 
+ /**
diff --git a/queue-4.14/sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch b/queue-4.14/sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch
new file mode 100644
index 00000000000..db6952592c1
--- /dev/null
+++ b/queue-4.14/sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch
@@ -0,0 +1,60 @@
+From 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 Mon Sep 17 00:00:00 2001
+From: Pavankumar Kondeti <pkondeti@codeaurora.org>
+Date: Tue, 30 Oct 2018 12:24:33 +0530
+Subject: sched, trace: Fix prev_state output in sched_switch tracepoint
+
+From: Pavankumar Kondeti <pkondeti@codeaurora.org>
+
+commit 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 upstream.
+
+commit 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
+tried to fix the problem introduced by a previous commit efb40f588b43
+("sched/tracing: Fix trace_sched_switch task-state printing"). However
+the prev_state output in sched_switch is still broken.
+
+task_state_index() uses fls() which considers the LSB as 1. Left
+shifting 1 by this value gives an incorrect mapping to the task state.
+Fix this by decrementing the value returned by __get_task_state()
+before shifting.
+
+Link: http://lkml.kernel.org/r/1540882473-1103-1-git-send-email-pkondeti@codeaurora.org
+
+Cc: stable@vger.kernel.org
+Fixes: 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
+Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/sched.h |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/include/trace/events/sched.h
++++ b/include/trace/events/sched.h
+@@ -107,6 +107,8 @@ DEFINE_EVENT(sched_wakeup_template, sche
+ #ifdef CREATE_TRACE_POINTS
+ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+ {
++	unsigned int state;
++
+ #ifdef CONFIG_SCHED_DEBUG
+ 	BUG_ON(p != current);
+ #endif /* CONFIG_SCHED_DEBUG */
+@@ -118,7 +120,15 @@ static inline long __trace_sched_switch_
+ 	if (preempt)
+ 		return TASK_REPORT_MAX;
+ 
+-	return 1 << __get_task_state(p);
++	/*
++	 * task_state_index() uses fls() and returns a value from 0-8 range.
++	 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
++	 * it for left shift operation to get the correct task->state
++	 * mapping.
++	 */
++	state = __get_task_state(p);
++
++	return state ? (1 << (state - 1)) : state;
+ }
+ #endif /* CREATE_TRACE_POINTS */
+ 
diff --git a/queue-4.14/series b/queue-4.14/series
index 80a8ed25d11..1482022f469 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -57,3 +57,5 @@ x86-a.out-clear-the-dump-structure-initially.patch
 dm-crypt-don-t-overallocate-the-integrity-tag-space.patch
 dm-thin-fix-bug-where-bio-that-overwrites-thin-block-ignores-fua.patch
 drm-i915-prevent-a-race-during-i915_gem_mmap-ioctl-with-wc-set.patch
+sched-trace-fix-prev_state-output-in-sched_switch-tracepoint.patch
+futex-cure-exit-race.patch