+++ /dev/null
-Subject: exit/exec: Seperate mm_release()
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:38 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 4610ba7ad877fafc0a25a30c6c82015304120426 upstream
-
-mm_release() contains the futex exit handling. mm_release() is called from
-do_exit()->exit_mm() and from exec()->exec_mm().
-
-In the exit_mm() case PF_EXITING and the futex state is updated. In the
-exec_mm() case these states are not touched.
-
-As the futex exit code needs further protections against exit races, this
-needs to be split into two functions.
-
-Preparatory only, no functional change.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- fs/exec.c | 2 +-
- include/linux/sched/mm.h | 6 ++++--
- kernel/exit.c | 2 +-
- kernel/fork.c | 12 +++++++++++-
- 4 files changed, 17 insertions(+), 5 deletions(-)
-
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *m
- /* Notify parent that we're no longer interested in the old VM */
- tsk = current;
- old_mm = current->mm;
-- mm_release(tsk, old_mm);
-+ exec_mm_release(tsk, old_mm);
-
- if (old_mm) {
- sync_mm_rss(old_mm);
---- a/include/linux/sched/mm.h
-+++ b/include/linux/sched/mm.h
-@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(str
- * succeeds.
- */
- extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
--/* Remove the current tasks stale references to the old mm_struct */
--extern void mm_release(struct task_struct *, struct mm_struct *);
-+/* Remove the current tasks stale references to the old mm_struct on exit() */
-+extern void exit_mm_release(struct task_struct *, struct mm_struct *);
-+/* Remove the current tasks stale references to the old mm_struct on exec() */
-+extern void exec_mm_release(struct task_struct *, struct mm_struct *);
-
- #ifdef CONFIG_MEMCG
- extern void mm_update_next_owner(struct mm_struct *mm);
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -498,7 +498,7 @@ static void exit_mm(void)
- struct mm_struct *mm = current->mm;
- struct core_state *core_state;
-
-- mm_release(current, mm);
-+ exit_mm_release(current, mm);
- if (!mm)
- return;
- sync_mm_rss(mm);
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -1217,7 +1217,7 @@ static int wait_for_vfork_done(struct ta
- * restoring the old one. . .
- * Eric Biederman 10 January 1998
- */
--void mm_release(struct task_struct *tsk, struct mm_struct *mm)
-+static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
- {
- /* Get rid of any futexes when releasing the mm */
- futex_mm_release(tsk);
-@@ -1254,6 +1254,16 @@ void mm_release(struct task_struct *tsk,
- complete_vfork_done(tsk);
- }
-
-+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
-+{
-+ mm_release(tsk, mm);
-+}
-+
-+void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
-+{
-+ mm_release(tsk, mm);
-+}
-+
- /*
- * Allocate a new mm structure and copy contents from the
- * mm structure of the passed in task structure.
+++ /dev/null
-Subject: futex: Add mutex around futex exit
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:44 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 3f186d974826847a07bc7964d79ec4eded475ad9 upstream
-
-The mutex will be used in subsequent changes to replace the busy looping of
-a waiter when the futex owner is currently executing the exit cleanup to
-prevent a potential live lock.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.845798895@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/futex.h | 1 +
- include/linux/sched.h | 1 +
- kernel/futex.c | 16 ++++++++++++++++
- 3 files changed, 18 insertions(+)
-
---- a/include/linux/futex.h
-+++ b/include/linux/futex.h
-@@ -68,6 +68,7 @@ static inline void futex_init_task(struc
- INIT_LIST_HEAD(&tsk->pi_state_list);
- tsk->pi_state_cache = NULL;
- tsk->futex_state = FUTEX_STATE_OK;
-+ mutex_init(&tsk->futex_exit_mutex);
- }
-
- void futex_exit_recursive(struct task_struct *tsk);
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -996,6 +996,7 @@ struct task_struct {
- #endif
- struct list_head pi_state_list;
- struct futex_pi_state *pi_state_cache;
-+ struct mutex futex_exit_mutex;
- unsigned int futex_state;
- #endif
- #ifdef CONFIG_PERF_EVENTS
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -3735,12 +3735,23 @@ static void futex_cleanup(struct task_st
- */
- void futex_exit_recursive(struct task_struct *tsk)
- {
-+ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
-+ if (tsk->futex_state == FUTEX_STATE_EXITING)
-+ mutex_unlock(&tsk->futex_exit_mutex);
- tsk->futex_state = FUTEX_STATE_DEAD;
- }
-
- static void futex_cleanup_begin(struct task_struct *tsk)
- {
- /*
-+ * Prevent various race issues against a concurrent incoming waiter
-+ * including live locks by forcing the waiter to block on
-+ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
-+ * attach_to_pi_owner().
-+ */
-+ mutex_lock(&tsk->futex_exit_mutex);
-+
-+ /*
- * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
- *
- * This ensures that all subsequent checks of tsk->futex_state in
-@@ -3763,6 +3774,11 @@ static void futex_cleanup_end(struct tas
- * take another loop until it becomes visible.
- */
- tsk->futex_state = state;
-+ /*
-+ * Drop the exit protection. This unblocks waiters which observed
-+ * FUTEX_STATE_EXITING to reevaluate the state.
-+ */
-+ mutex_unlock(&tsk->futex_exit_mutex);
- }
-
- void futex_exec_release(struct task_struct *tsk)
+++ /dev/null
-Subject: futex: Mark the begin of futex exit explicitly
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:41 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 18f694385c4fd77a09851fd301236746ca83f3cb upstream
-
-Instead of relying on PF_EXITING use an explicit state for the futex exit
-and set it in the futex exit function. This moves the smp barrier and the
-lock/unlock serialization into the futex code.
-
-As with the DEAD state this is restricted to the exit path as exec
-continues to use the same task struct.
-
-This allows to simplify that logic in a next step.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/futex.h | 31 +++----------------------------
- kernel/exit.c | 13 +------------
- kernel/futex.c | 37 ++++++++++++++++++++++++++++++++++++-
- 3 files changed, 40 insertions(+), 41 deletions(-)
-
---- a/include/linux/futex.h
-+++ b/include/linux/futex.h
-@@ -55,6 +55,7 @@ union futex_key {
- #ifdef CONFIG_FUTEX
- enum {
- FUTEX_STATE_OK,
-+ FUTEX_STATE_EXITING,
- FUTEX_STATE_DEAD,
- };
-
-@@ -69,33 +70,7 @@ static inline void futex_init_task(struc
- tsk->futex_state = FUTEX_STATE_OK;
- }
-
--/**
-- * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
-- * @tsk: task to set the state on
-- *
-- * Set the futex exit state of the task lockless. The futex waiter code
-- * observes that state when a task is exiting and loops until the task has
-- * actually finished the futex cleanup. The worst case for this is that the
-- * waiter runs through the wait loop until the state becomes visible.
-- *
-- * This has two callers:
-- *
-- * - futex_mm_release() after the futex exit cleanup has been done
-- *
-- * - do_exit() from the recursive fault handling path.
-- *
-- * In case of a recursive fault this is best effort. Either the futex exit
-- * code has run already or not. If the OWNER_DIED bit has been set on the
-- * futex then the waiter can take it over. If not, the problem is pushed
-- * back to user space. If the futex exit code did not run yet, then an
-- * already queued waiter might block forever, but there is nothing which
-- * can be done about that.
-- */
--static inline void futex_exit_done(struct task_struct *tsk)
--{
-- tsk->futex_state = FUTEX_STATE_DEAD;
--}
--
-+void futex_exit_recursive(struct task_struct *tsk);
- void futex_exit_release(struct task_struct *tsk);
- void futex_exec_release(struct task_struct *tsk);
-
-@@ -103,7 +78,7 @@ long do_futex(u32 __user *uaddr, int op,
- u32 __user *uaddr2, u32 val2, u32 val3);
- #else
- static inline void futex_init_task(struct task_struct *tsk) { }
--static inline void futex_exit_done(struct task_struct *tsk) { }
-+static inline void futex_exit_recursive(struct task_struct *tsk) { }
- static inline void futex_exit_release(struct task_struct *tsk) { }
- static inline void futex_exec_release(struct task_struct *tsk) { }
- static inline long do_futex(u32 __user *uaddr, int op, u32 val,
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -818,23 +818,12 @@ void __noreturn do_exit(long code)
- */
- if (unlikely(tsk->flags & PF_EXITING)) {
- pr_alert("Fixing recursive fault but reboot is needed!\n");
-- futex_exit_done(tsk);
-+ futex_exit_recursive(tsk);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
-
- exit_signals(tsk); /* sets PF_EXITING */
-- /*
-- * Ensure that all new tsk->pi_lock acquisitions must observe
-- * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
-- */
-- smp_mb();
-- /*
-- * Ensure that we must observe the pi_state in exit_mm() ->
-- * mm_release() -> exit_pi_state_list().
-- */
-- raw_spin_lock_irq(&tsk->pi_lock);
-- raw_spin_unlock_irq(&tsk->pi_lock);
-
- /* sync mm's RSS info before statistics gathering */
- if (tsk->mm)
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -3716,10 +3716,45 @@ void futex_exec_release(struct task_stru
- exit_pi_state_list(tsk);
- }
-
-+/**
-+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
-+ * @tsk: task to set the state on
-+ *
-+ * Set the futex exit state of the task lockless. The futex waiter code
-+ * observes that state when a task is exiting and loops until the task has
-+ * actually finished the futex cleanup. The worst case for this is that the
-+ * waiter runs through the wait loop until the state becomes visible.
-+ *
-+ * This is called from the recursive fault handling path in do_exit().
-+ *
-+ * This is best effort. Either the futex exit code has run already or
-+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
-+ * take it over. If not, the problem is pushed back to user space. If the
-+ * futex exit code did not run yet, then an already queued waiter might
-+ * block forever, but there is nothing which can be done about that.
-+ */
-+void futex_exit_recursive(struct task_struct *tsk)
-+{
-+ tsk->futex_state = FUTEX_STATE_DEAD;
-+}
-+
- void futex_exit_release(struct task_struct *tsk)
- {
-+ tsk->futex_state = FUTEX_STATE_EXITING;
-+ /*
-+ * Ensure that all new tsk->pi_lock acquisitions must observe
-+ * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner().
-+ */
-+ smp_mb();
-+ /*
-+ * Ensure that we must observe the pi_state in exit_pi_state_list().
-+ */
-+ raw_spin_lock_irq(&tsk->pi_lock);
-+ raw_spin_unlock_irq(&tsk->pi_lock);
-+
- futex_exec_release(tsk);
-- futex_exit_done(tsk);
-+
-+ tsk->futex_state = FUTEX_STATE_DEAD;
- }
-
- long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+++ /dev/null
-Subject: futex: Move futex exit handling into futex code
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:36 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit ba31c1a48538992316cc71ce94fa9cd3e7b427c0 upstream
-
-The futex exit handling is #ifdeffed into mm_release() which is not pretty
-to begin with. But upcoming changes to address futex exit races need to add
-more functionality to this exit code.
-
-Split it out into a function, move it into futex code and make the various
-futex exit functions static.
-
-Preparatory only and no functional change.
-
-Folded build fix from Borislav.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.049705556@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/compat.h | 2 --
- include/linux/futex.h | 29 ++++++++++++++++-------------
- kernel/fork.c | 25 +++----------------------
- kernel/futex.c | 33 +++++++++++++++++++++++++++++----
- 4 files changed, 48 insertions(+), 41 deletions(-)
-
---- a/include/linux/compat.h
-+++ b/include/linux/compat.h
-@@ -445,8 +445,6 @@ struct compat_kexec_segment;
- struct compat_mq_attr;
- struct compat_msgbuf;
-
--extern void compat_exit_robust_list(struct task_struct *curr);
--
- #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
-
- #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
---- a/include/linux/futex.h
-+++ b/include/linux/futex.h
-@@ -2,7 +2,9 @@
- #ifndef _LINUX_FUTEX_H
- #define _LINUX_FUTEX_H
-
-+#include <linux/sched.h>
- #include <linux/ktime.h>
-+
- #include <uapi/linux/futex.h>
-
- struct inode;
-@@ -51,15 +53,24 @@ union futex_key {
- #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
-
- #ifdef CONFIG_FUTEX
--extern void exit_robust_list(struct task_struct *curr);
-
--long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-- u32 __user *uaddr2, u32 val2, u32 val3);
--#else
--static inline void exit_robust_list(struct task_struct *curr)
-+static inline void futex_init_task(struct task_struct *tsk)
- {
-+ tsk->robust_list = NULL;
-+#ifdef CONFIG_COMPAT
-+ tsk->compat_robust_list = NULL;
-+#endif
-+ INIT_LIST_HEAD(&tsk->pi_state_list);
-+ tsk->pi_state_cache = NULL;
- }
-
-+void futex_mm_release(struct task_struct *tsk);
-+
-+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-+ u32 __user *uaddr2, u32 val2, u32 val3);
-+#else
-+static inline void futex_init_task(struct task_struct *tsk) { }
-+static inline void futex_mm_release(struct task_struct *tsk) { }
- static inline long do_futex(u32 __user *uaddr, int op, u32 val,
- ktime_t *timeout, u32 __user *uaddr2,
- u32 val2, u32 val3)
-@@ -68,12 +79,4 @@ static inline long do_futex(u32 __user *
- }
- #endif
-
--#ifdef CONFIG_FUTEX_PI
--extern void exit_pi_state_list(struct task_struct *curr);
--#else
--static inline void exit_pi_state_list(struct task_struct *curr)
--{
--}
--#endif
--
- #endif
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -1220,20 +1220,7 @@ static int wait_for_vfork_done(struct ta
- void mm_release(struct task_struct *tsk, struct mm_struct *mm)
- {
- /* Get rid of any futexes when releasing the mm */
--#ifdef CONFIG_FUTEX
-- if (unlikely(tsk->robust_list)) {
-- exit_robust_list(tsk);
-- tsk->robust_list = NULL;
-- }
--#ifdef CONFIG_COMPAT
-- if (unlikely(tsk->compat_robust_list)) {
-- compat_exit_robust_list(tsk);
-- tsk->compat_robust_list = NULL;
-- }
--#endif
-- if (unlikely(!list_empty(&tsk->pi_state_list)))
-- exit_pi_state_list(tsk);
--#endif
-+ futex_mm_release(tsk);
-
- uprobe_free_utask(tsk);
-
-@@ -1937,14 +1924,8 @@ static __latent_entropy struct task_stru
- #ifdef CONFIG_BLOCK
- p->plug = NULL;
- #endif
--#ifdef CONFIG_FUTEX
-- p->robust_list = NULL;
--#ifdef CONFIG_COMPAT
-- p->compat_robust_list = NULL;
--#endif
-- INIT_LIST_HEAD(&p->pi_state_list);
-- p->pi_state_cache = NULL;
--#endif
-+ futex_init_task(p);
-+
- /*
- * sigaltstack should be cleared when sharing the same VM
- */
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -341,6 +341,12 @@ static inline bool should_fail_futex(boo
- }
- #endif /* CONFIG_FAIL_FUTEX */
-
-+#ifdef CONFIG_COMPAT
-+static void compat_exit_robust_list(struct task_struct *curr);
-+#else
-+static inline void compat_exit_robust_list(struct task_struct *curr) { }
-+#endif
-+
- static inline void futex_get_mm(union futex_key *key)
- {
- mmgrab(key->private.mm);
-@@ -890,7 +896,7 @@ static void put_pi_state(struct futex_pi
- * Kernel cleans up PI-state, but userspace is likely hosed.
- * (Robust-futex cleanup is separate and might save the day for userspace.)
- */
--void exit_pi_state_list(struct task_struct *curr)
-+static void exit_pi_state_list(struct task_struct *curr)
- {
- struct list_head *next, *head = &curr->pi_state_list;
- struct futex_pi_state *pi_state;
-@@ -960,7 +966,8 @@ void exit_pi_state_list(struct task_stru
- }
- raw_spin_unlock_irq(&curr->pi_lock);
- }
--
-+#else
-+static inline void exit_pi_state_list(struct task_struct *curr) { }
- #endif
-
- /*
-@@ -3625,7 +3632,7 @@ static inline int fetch_robust_entry(str
- *
- * We silently return on any sign of list-walking problem.
- */
--void exit_robust_list(struct task_struct *curr)
-+static void exit_robust_list(struct task_struct *curr)
- {
- struct robust_list_head __user *head = curr->robust_list;
- struct robust_list __user *entry, *next_entry, *pending;
-@@ -3690,6 +3697,24 @@ void exit_robust_list(struct task_struct
- }
- }
-
-+void futex_mm_release(struct task_struct *tsk)
-+{
-+ if (unlikely(tsk->robust_list)) {
-+ exit_robust_list(tsk);
-+ tsk->robust_list = NULL;
-+ }
-+
-+#ifdef CONFIG_COMPAT
-+ if (unlikely(tsk->compat_robust_list)) {
-+ compat_exit_robust_list(tsk);
-+ tsk->compat_robust_list = NULL;
-+ }
-+#endif
-+
-+ if (unlikely(!list_empty(&tsk->pi_state_list)))
-+ exit_pi_state_list(tsk);
-+}
-+
- long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- u32 __user *uaddr2, u32 val2, u32 val3)
- {
-@@ -3817,7 +3842,7 @@ static void __user *futex_uaddr(struct r
- *
- * We silently return on any sign of list-walking problem.
- */
--void compat_exit_robust_list(struct task_struct *curr)
-+static void compat_exit_robust_list(struct task_struct *curr)
- {
- struct compat_robust_list_head __user *head = curr->compat_robust_list;
- struct robust_list __user *entry, *next_entry, *pending;
+++ /dev/null
-Subject: futex: Prevent exit livelock
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:46 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream
-
-Oleg provided the following test case:
-
-int main(void)
-{
- struct sched_param sp = {};
-
- sp.sched_priority = 2;
- assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
-
- int lock = vfork();
- if (!lock) {
- sp.sched_priority = 1;
- assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
- _exit(0);
- }
-
- syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0);
- return 0;
-}
-
-This creates an unkillable RT process spinning in futex_lock_pi() on a UP
-machine or if the process is affine to a single CPU. The reason is:
-
- parent child
-
- set FIFO prio 2
-
- vfork() -> set FIFO prio 1
- implies wait_for_child() sched_setscheduler(...)
- exit()
- do_exit()
- ....
- mm_release()
- tsk->futex_state = FUTEX_STATE_EXITING;
- exit_futex(); (NOOP in this case)
- complete() --> wakes parent
- sys_futex()
- loop infinite because
- tsk->futex_state == FUTEX_STATE_EXITING
-
-The same problem can happen just by regular preemption as well:
-
- task holds futex
- ...
- do_exit()
- tsk->futex_state = FUTEX_STATE_EXITING;
-
- --> preemption (unrelated wakeup of some other higher prio task, e.g. timer)
-
- switch_to(other_task)
-
- return to user
- sys_futex()
- loop infinite as above
-
-Just for the fun of it the futex exit cleanup could trigger the wakeup
-itself before the task sets its futex state to DEAD.
-
-To cure this, the handling of the exiting owner is changed so:
-
- - A refcount is held on the task
-
- - The task pointer is stored in a caller visible location
-
- - The caller drops all locks (hash bucket, mmap_sem) and blocks
- on task::futex_exit_mutex. When the mutex is acquired then
- the exiting task has completed the cleanup and the state
- is consistent and can be reevaluated.
-
-This is not a pretty solution, but there is no choice other than returning
-an error code to user space, which would break the state consistency
-guarantee and open another can of problems including regressions.
-
-For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538
-are required as well, but for anything older than 5.3.y the backports are
-going to be provided when this hits mainline as the other dependencies for
-those kernels are definitely not stable material.
-
-Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems")
-Reported-by: Oleg Nesterov <oleg@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Cc: Stable Team <stable@vger.kernel.org>
-Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++---------
- 1 file changed, 91 insertions(+), 15 deletions(-)
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -1176,6 +1176,36 @@ out_error:
- return ret;
- }
-
-+/**
-+ * wait_for_owner_exiting - Block until the owner has exited
-+ * @exiting: Pointer to the exiting task
-+ *
-+ * Caller must hold a refcount on @exiting.
-+ */
-+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
-+{
-+ if (ret != -EBUSY) {
-+ WARN_ON_ONCE(exiting);
-+ return;
-+ }
-+
-+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-+ return;
-+
-+ mutex_lock(&exiting->futex_exit_mutex);
-+ /*
-+ * No point in doing state checking here. If the waiter got here
-+ * while the task was in exec()->exec_futex_release() then it can
-+ * have any FUTEX_STATE_* value when the waiter has acquired the
-+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
-+ * already. Highly unlikely and not a problem. Just one more round
-+ * through the futex maze.
-+ */
-+ mutex_unlock(&exiting->futex_exit_mutex);
-+
-+ put_task_struct(exiting);
-+}
-+
- static int handle_exit_race(u32 __user *uaddr, u32 uval,
- struct task_struct *tsk)
- {
-@@ -1237,7 +1267,8 @@ static int handle_exit_race(u32 __user *
- * it after doing proper sanity checks.
- */
- static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-- struct futex_pi_state **ps)
-+ struct futex_pi_state **ps,
-+ struct task_struct **exiting)
- {
- pid_t pid = uval & FUTEX_TID_MASK;
- struct futex_pi_state *pi_state;
-@@ -1276,7 +1307,19 @@ static int attach_to_pi_owner(u32 __user
- int ret = handle_exit_race(uaddr, uval, p);
-
- raw_spin_unlock_irq(&p->pi_lock);
-- put_task_struct(p);
-+ /*
-+ * If the owner task is between FUTEX_STATE_EXITING and
-+ * FUTEX_STATE_DEAD then store the task pointer and keep
-+ * the reference on the task struct. The calling code will
-+ * drop all locks, wait for the task to reach
-+ * FUTEX_STATE_DEAD and then drop the refcount. This is
-+ * required to prevent a live lock when the current task
-+ * preempted the exiting task between the two states.
-+ */
-+ if (ret == -EBUSY)
-+ *exiting = p;
-+ else
-+ put_task_struct(p);
- return ret;
- }
-
-@@ -1315,7 +1358,8 @@ static int attach_to_pi_owner(u32 __user
-
- static int lookup_pi_state(u32 __user *uaddr, u32 uval,
- struct futex_hash_bucket *hb,
-- union futex_key *key, struct futex_pi_state **ps)
-+ union futex_key *key, struct futex_pi_state **ps,
-+ struct task_struct **exiting)
- {
- struct futex_q *top_waiter = futex_top_waiter(hb, key);
-
-@@ -1330,7 +1374,7 @@ static int lookup_pi_state(u32 __user *u
- * We are the first waiter - try to look up the owner based on
- * @uval and attach to it.
- */
-- return attach_to_pi_owner(uaddr, uval, key, ps);
-+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
- }
-
- static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
-@@ -1358,6 +1402,8 @@ static int lock_pi_update_atomic(u32 __u
- * lookup
- * @task: the task to perform the atomic lock work for. This will
- * be "current" except in the case of requeue pi.
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
- *
- * Return:
-@@ -1366,11 +1412,17 @@ static int lock_pi_update_atomic(u32 __u
- * - <0 - error
- *
- * The hb->lock and futex_key refs shall be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
- */
- static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
- union futex_key *key,
- struct futex_pi_state **ps,
-- struct task_struct *task, int set_waiters)
-+ struct task_struct *task,
-+ struct task_struct **exiting,
-+ int set_waiters)
- {
- u32 uval, newval, vpid = task_pid_vnr(task);
- struct futex_q *top_waiter;
-@@ -1440,7 +1492,7 @@ static int futex_lock_pi_atomic(u32 __us
- * attach to the owner. If that fails, no harm done, we only
- * set the FUTEX_WAITERS bit in the user space variable.
- */
-- return attach_to_pi_owner(uaddr, newval, key, ps);
-+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
- }
-
- /**
-@@ -1861,6 +1913,8 @@ void requeue_pi_wake_futex(struct futex_
- * @key1: the from futex key
- * @key2: the to futex key
- * @ps: address to store the pi_state pointer
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
- *
- * Try and get the lock on behalf of the top waiter if we can do it atomically.
-@@ -1868,16 +1922,20 @@ void requeue_pi_wake_futex(struct futex_
- * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
- * hb1 and hb2 must be held by the caller.
- *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ *
- * Return:
- * - 0 - failed to acquire the lock atomically;
- * - >0 - acquired the lock, return value is vpid of the top_waiter
- * - <0 - error
- */
--static int futex_proxy_trylock_atomic(u32 __user *pifutex,
-- struct futex_hash_bucket *hb1,
-- struct futex_hash_bucket *hb2,
-- union futex_key *key1, union futex_key *key2,
-- struct futex_pi_state **ps, int set_waiters)
-+static int
-+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-+ struct futex_hash_bucket *hb2, union futex_key *key1,
-+ union futex_key *key2, struct futex_pi_state **ps,
-+ struct task_struct **exiting, int set_waiters)
- {
- struct futex_q *top_waiter = NULL;
- u32 curval;
-@@ -1914,7 +1972,7 @@ static int futex_proxy_trylock_atomic(u3
- */
- vpid = task_pid_vnr(top_waiter->task);
- ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-- set_waiters);
-+ exiting, set_waiters);
- if (ret == 1) {
- requeue_pi_wake_futex(top_waiter, key2, hb2);
- return vpid;
-@@ -2043,6 +2101,8 @@ retry_private:
- }
-
- if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
-+ struct task_struct *exiting = NULL;
-+
- /*
- * Attempt to acquire uaddr2 and wake the top waiter. If we
- * intend to requeue waiters, force setting the FUTEX_WAITERS
-@@ -2050,7 +2110,8 @@ retry_private:
- * faults rather in the requeue loop below.
- */
- ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-- &key2, &pi_state, nr_requeue);
-+ &key2, &pi_state,
-+ &exiting, nr_requeue);
-
- /*
- * At this point the top_waiter has either taken uaddr2 or is
-@@ -2077,7 +2138,8 @@ retry_private:
- * If that call succeeds then we have pi_state and an
- * initial refcount on it.
- */
-- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
-+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
-+ &pi_state, &exiting);
- }
-
- switch (ret) {
-@@ -2107,6 +2169,12 @@ retry_private:
- hb_waiters_dec(hb2);
- put_futex_key(&key2);
- put_futex_key(&key1);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
- cond_resched();
- goto retry;
- default:
-@@ -2834,6 +2902,7 @@ static int futex_lock_pi(u32 __user *uad
- {
- struct hrtimer_sleeper timeout, *to = NULL;
- struct futex_pi_state *pi_state = NULL;
-+ struct task_struct *exiting = NULL;
- struct rt_mutex_waiter rt_waiter;
- struct futex_hash_bucket *hb;
- struct futex_q q = futex_q_init;
-@@ -2861,7 +2930,8 @@ retry:
- retry_private:
- hb = queue_lock(&q);
-
-- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
-+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-+ &exiting, 0);
- if (unlikely(ret)) {
- /*
- * Atomic work succeeded and we got the lock,
-@@ -2884,6 +2954,12 @@ retry_private:
- */
- queue_unlock(hb);
- put_futex_key(&q.key);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
- cond_resched();
- goto retry;
- default:
+++ /dev/null
-Subject: futex: Provide distinct return value when owner is exiting
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:45 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-`
-commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream
-
-attach_to_pi_owner() returns -EAGAIN for various cases:
-
- - Owner task is exiting
- - Futex value has changed
-
-The caller drops the held locks (hash bucket, mmap_sem) and retries the
-operation. In case of the owner task exiting this can result in a live
-lock.
-
-As a preparatory step for seperating those cases, provide a distinct return
-value (EBUSY) for the owner exiting case.
-
-No functional change.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.935606117@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/futex.c | 16 +++++++++-------
- 1 file changed, 9 insertions(+), 7 deletions(-)
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -1182,11 +1182,11 @@ static int handle_exit_race(u32 __user *
- u32 uval2;
-
- /*
-- * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
-- * for it to finish.
-+ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
-+ * caller that the alleged owner is busy.
- */
- if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-- return -EAGAIN;
-+ return -EBUSY;
-
- /*
- * Reread the user space value to handle the following situation:
-@@ -2095,12 +2095,13 @@ retry_private:
- if (!ret)
- goto retry;
- goto out;
-+ case -EBUSY:
- case -EAGAIN:
- /*
- * Two reasons for this:
-- * - Owner is exiting and we just wait for the
-+ * - EBUSY: Owner is exiting and we just wait for the
- * exit to complete.
-- * - The user space value changed.
-+ * - EAGAIN: The user space value changed.
- */
- double_unlock_hb(hb1, hb2);
- hb_waiters_dec(hb2);
-@@ -2873,12 +2874,13 @@ retry_private:
- goto out_unlock_put_key;
- case -EFAULT:
- goto uaddr_faulted;
-+ case -EBUSY:
- case -EAGAIN:
- /*
- * Two reasons for this:
-- * - Task is exiting and we just wait for the
-+ * - EBUSY: Task is exiting and we just wait for the
- * exit to complete.
-- * - The user space value changed.
-+ * - EAGAIN: The user space value changed.
- */
- queue_unlock(hb);
- put_futex_key(&q.key);
+++ /dev/null
-Subject: futex: Provide state handling for exec() as well
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:43 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit af8cbda2cfcaa5515d61ec500498d46e9a8247e2 upstream
-
-exec() attempts to handle potentially held futexes gracefully by running
-the futex exit handling code like exit() does.
-
-The current implementation has no protection against concurrent incoming
-waiters. The reason is that the futex state cannot be set to
-FUTEX_STATE_DEAD after the cleanup because the task struct is still active
-and just about to execute the new binary.
-
-While its arguably buggy when a task holds a futex over exec(), for
-consistency sake the state handling can at least cover the actual futex
-exit cleanup section. This provides state consistency protection accross
-the cleanup. As the futex state of the task becomes FUTEX_STATE_OK after the
-cleanup has been finished, this cannot prevent subsequent attempts to
-attach to the task in case that the cleanup was not successfull in mopping
-up all leftovers.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.753355618@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++----
- 1 file changed, 34 insertions(+), 4 deletions(-)
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -3698,7 +3698,7 @@ static void exit_robust_list(struct task
- }
- }
-
--void futex_exec_release(struct task_struct *tsk)
-+static void futex_cleanup(struct task_struct *tsk)
- {
- if (unlikely(tsk->robust_list)) {
- exit_robust_list(tsk);
-@@ -3738,7 +3738,7 @@ void futex_exit_recursive(struct task_st
- tsk->futex_state = FUTEX_STATE_DEAD;
- }
-
--void futex_exit_release(struct task_struct *tsk)
-+static void futex_cleanup_begin(struct task_struct *tsk)
- {
- /*
- * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
-@@ -3754,10 +3754,40 @@ void futex_exit_release(struct task_stru
- raw_spin_lock_irq(&tsk->pi_lock);
- tsk->futex_state = FUTEX_STATE_EXITING;
- raw_spin_unlock_irq(&tsk->pi_lock);
-+}
-
-- futex_exec_release(tsk);
-+static void futex_cleanup_end(struct task_struct *tsk, int state)
-+{
-+ /*
-+ * Lockless store. The only side effect is that an observer might
-+ * take another loop until it becomes visible.
-+ */
-+ tsk->futex_state = state;
-+}
-
-- tsk->futex_state = FUTEX_STATE_DEAD;
-+void futex_exec_release(struct task_struct *tsk)
-+{
-+ /*
-+ * The state handling is done for consistency, but in the case of
-+ * exec() there is no way to prevent futher damage as the PID stays
-+ * the same. But for the unlikely and arguably buggy case that a
-+ * futex is held on exec(), this provides at least as much state
-+ * consistency protection which is possible.
-+ */
-+ futex_cleanup_begin(tsk);
-+ futex_cleanup(tsk);
-+ /*
-+ * Reset the state to FUTEX_STATE_OK. The task is alive and about
-+ * exec a new binary.
-+ */
-+ futex_cleanup_end(tsk, FUTEX_STATE_OK);
-+}
-+
-+void futex_exit_release(struct task_struct *tsk)
-+{
-+ futex_cleanup_begin(tsk);
-+ futex_cleanup(tsk);
-+ futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
- }
-
- long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+++ /dev/null
-Subject: futex: Replace PF_EXITPIDONE with a state
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:37 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream
-
-The futex exit handling relies on PF_ flags. That's suboptimal as it
-requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
-the middle of do_exit() to enforce the observability of PF_EXITING in the
-futex code.
-
-Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
-over to the new state. The PF_EXITING dependency will be cleaned up in a
-later step.
-
-This prepares for handling various futex exit issues later.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/futex.h | 33 +++++++++++++++++++++++++++++++++
- include/linux/sched.h | 2 +-
- kernel/exit.c | 18 ++----------------
- kernel/futex.c | 25 +++++++++++++------------
- 4 files changed, 49 insertions(+), 29 deletions(-)
-
---- a/include/linux/futex.h
-+++ b/include/linux/futex.h
-@@ -53,6 +53,10 @@ union futex_key {
- #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
-
- #ifdef CONFIG_FUTEX
-+enum {
-+ FUTEX_STATE_OK,
-+ FUTEX_STATE_DEAD,
-+};
-
- static inline void futex_init_task(struct task_struct *tsk)
- {
-@@ -62,6 +66,34 @@ static inline void futex_init_task(struc
- #endif
- INIT_LIST_HEAD(&tsk->pi_state_list);
- tsk->pi_state_cache = NULL;
-+ tsk->futex_state = FUTEX_STATE_OK;
-+}
-+
-+/**
-+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
-+ * @tsk: task to set the state on
-+ *
-+ * Set the futex exit state of the task lockless. The futex waiter code
-+ * observes that state when a task is exiting and loops until the task has
-+ * actually finished the futex cleanup. The worst case for this is that the
-+ * waiter runs through the wait loop until the state becomes visible.
-+ *
-+ * This has two callers:
-+ *
-+ * - futex_mm_release() after the futex exit cleanup has been done
-+ *
-+ * - do_exit() from the recursive fault handling path.
-+ *
-+ * In case of a recursive fault this is best effort. Either the futex exit
-+ * code has run already or not. If the OWNER_DIED bit has been set on the
-+ * futex then the waiter can take it over. If not, the problem is pushed
-+ * back to user space. If the futex exit code did not run yet, then an
-+ * already queued waiter might block forever, but there is nothing which
-+ * can be done about that.
-+ */
-+static inline void futex_exit_done(struct task_struct *tsk)
-+{
-+ tsk->futex_state = FUTEX_STATE_DEAD;
- }
-
- void futex_mm_release(struct task_struct *tsk);
-@@ -71,6 +103,7 @@ long do_futex(u32 __user *uaddr, int op,
- #else
- static inline void futex_init_task(struct task_struct *tsk) { }
- static inline void futex_mm_release(struct task_struct *tsk) { }
-+static inline void futex_exit_done(struct task_struct *tsk) { }
- static inline long do_futex(u32 __user *uaddr, int op, u32 val,
- ktime_t *timeout, u32 __user *uaddr2,
- u32 val2, u32 val3)
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -996,6 +996,7 @@ struct task_struct {
- #endif
- struct list_head pi_state_list;
- struct futex_pi_state *pi_state_cache;
-+ unsigned int futex_state;
- #endif
- #ifdef CONFIG_PERF_EVENTS
- struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
-@@ -1377,7 +1378,6 @@ extern struct pid *cad_pid;
- */
- #define PF_IDLE 0x00000002 /* I am an IDLE thread */
- #define PF_EXITING 0x00000004 /* Getting shut down */
--#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
- #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
- #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
- #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -818,16 +818,7 @@ void __noreturn do_exit(long code)
- */
- if (unlikely(tsk->flags & PF_EXITING)) {
- pr_alert("Fixing recursive fault but reboot is needed!\n");
-- /*
-- * We can do this unlocked here. The futex code uses
-- * this flag just to verify whether the pi state
-- * cleanup has been done or not. In the worst case it
-- * loops once more. We pretend that the cleanup was
-- * done as there is no way to return. Either the
-- * OWNER_DIED bit is set by now or we push the blocked
-- * task into the wait for ever nirwana as well.
-- */
-- tsk->flags |= PF_EXITPIDONE;
-+ futex_exit_done(tsk);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
-@@ -918,12 +909,7 @@ void __noreturn do_exit(long code)
- * Make sure we are holding no locks:
- */
- debug_check_no_locks_held();
-- /*
-- * We can do this unlocked here. The futex code uses this flag
-- * just to verify whether the pi state cleanup has been done
-- * or not. In the worst case it loops once more.
-- */
-- tsk->flags |= PF_EXITPIDONE;
-+ futex_exit_done(tsk);
-
- if (tsk->io_context)
- exit_io_context(tsk);
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *
- u32 uval2;
-
- /*
-- * If PF_EXITPIDONE is not yet set, then try again.
-+ * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
-+ * for it to finish.
- */
-- if (tsk && !(tsk->flags & PF_EXITPIDONE))
-+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
- return -EAGAIN;
-
- /*
-@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *
- * *uaddr = 0xC0000000; tsk = get_task(PID);
- * } if (!tsk->flags & PF_EXITING) {
- * ... attach();
-- * tsk->flags |= PF_EXITPIDONE; } else {
-- * if (!(tsk->flags & PF_EXITPIDONE))
-+ * tsk->futex_state = } else {
-+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
-+ * FUTEX_STATE_DEAD)
- * return -EAGAIN;
- * return -ESRCH; <--- FAIL
- * }
-@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user
- }
-
- /*
-- * We need to look at the task state flags to figure out,
-- * whether the task is exiting. To protect against the do_exit
-- * change of the task flags, we do this protected by
-- * p->pi_lock:
-+ * We need to look at the task state to figure out, whether the
-+ * task is exiting. To protect against the change of the task state
-+ * in futex_exit_release(), we do this protected by p->pi_lock:
- */
- raw_spin_lock_irq(&p->pi_lock);
-- if (unlikely(p->flags & PF_EXITING)) {
-+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
- /*
-- * The task is on the way out. When PF_EXITPIDONE is
-- * set, we know that the task has finished the
-- * cleanup:
-+ * The task is on the way out. When the futex state is
-+ * FUTEX_STATE_DEAD, we know that the task has finished
-+ * the cleanup:
- */
- int ret = handle_exit_race(uaddr, uval, p);
-
+++ /dev/null
-Subject: futex: Sanitize exit state handling
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:42 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 4a8e991b91aca9e20705d434677ac013974e0e30 upstream
-
-Instead of having a smp_mb() and an empty lock/unlock of task::pi_lock move
-the state setting into to the lock section.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.645603214@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/futex.c | 17 ++++++++++-------
- 1 file changed, 10 insertions(+), 7 deletions(-)
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -3740,16 +3740,19 @@ void futex_exit_recursive(struct task_st
-
- void futex_exit_release(struct task_struct *tsk)
- {
-- tsk->futex_state = FUTEX_STATE_EXITING;
-- /*
-- * Ensure that all new tsk->pi_lock acquisitions must observe
-- * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner().
-- */
-- smp_mb();
- /*
-- * Ensure that we must observe the pi_state in exit_pi_state_list().
-+ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
-+ *
-+ * This ensures that all subsequent checks of tsk->futex_state in
-+ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
-+ * tsk->pi_lock held.
-+ *
-+ * It guarantees also that a pi_state which was queued right before
-+ * the state change under tsk->pi_lock by a concurrent waiter must
-+ * be observed in exit_pi_state_list().
- */
- raw_spin_lock_irq(&tsk->pi_lock);
-+ tsk->futex_state = FUTEX_STATE_EXITING;
- raw_spin_unlock_irq(&tsk->pi_lock);
-
- futex_exec_release(tsk);
+++ /dev/null
-Subject: futex: Set task::futex_state to DEAD right after handling futex exit
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:40 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit f24f22435dcc11389acc87e5586239c1819d217c upstream
-
-Setting task::futex_state in do_exit() is rather arbitrarily placed for no
-reason. Move it into the futex code.
-
-Note, this is only done for the exit cleanup as the exec cleanup cannot set
-the state to FUTEX_STATE_DEAD because the task struct is still in active
-use.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.439511191@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/exit.c | 1 -
- kernel/futex.c | 1 +
- 2 files changed, 1 insertion(+), 1 deletion(-)
-
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -909,7 +909,6 @@ void __noreturn do_exit(long code)
- * Make sure we are holding no locks:
- */
- debug_check_no_locks_held();
-- futex_exit_done(tsk);
-
- if (tsk->io_context)
- exit_io_context(tsk);
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -3719,6 +3719,7 @@ void futex_exec_release(struct task_stru
- void futex_exit_release(struct task_struct *tsk)
- {
- futex_exec_release(tsk);
-+ futex_exit_done(tsk);
- }
-
- long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+++ /dev/null
-Subject: futex: Split futex_mm_release() for exit/exec
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed Nov 6 22:55:39 2019 +0100
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 150d71584b12809144b8145b817e83b81158ae5f upstream
-
-To allow separate handling of the futex exit state in the futex exit code
-for exit and exec, split futex_mm_release() into two functions and invoke
-them from the corresponding exit/exec_mm_release() callsites.
-
-Preparatory only, no functional change.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20191106224556.332094221@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/futex.h | 6 ++++--
- kernel/fork.c | 5 ++---
- kernel/futex.c | 7 ++++++-
- 3 files changed, 12 insertions(+), 6 deletions(-)
-
---- a/include/linux/futex.h
-+++ b/include/linux/futex.h
-@@ -96,14 +96,16 @@ static inline void futex_exit_done(struc
- tsk->futex_state = FUTEX_STATE_DEAD;
- }
-
--void futex_mm_release(struct task_struct *tsk);
-+void futex_exit_release(struct task_struct *tsk);
-+void futex_exec_release(struct task_struct *tsk);
-
- long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- u32 __user *uaddr2, u32 val2, u32 val3);
- #else
- static inline void futex_init_task(struct task_struct *tsk) { }
--static inline void futex_mm_release(struct task_struct *tsk) { }
- static inline void futex_exit_done(struct task_struct *tsk) { }
-+static inline void futex_exit_release(struct task_struct *tsk) { }
-+static inline void futex_exec_release(struct task_struct *tsk) { }
- static inline long do_futex(u32 __user *uaddr, int op, u32 val,
- ktime_t *timeout, u32 __user *uaddr2,
- u32 val2, u32 val3)
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -1219,9 +1219,6 @@ static int wait_for_vfork_done(struct ta
- */
- static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
- {
-- /* Get rid of any futexes when releasing the mm */
-- futex_mm_release(tsk);
--
- uprobe_free_utask(tsk);
-
- /* Get rid of any cached register state */
-@@ -1256,11 +1253,13 @@ static void mm_release(struct task_struc
-
- void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
- {
-+ futex_exit_release(tsk);
- mm_release(tsk, mm);
- }
-
- void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
- {
-+ futex_exec_release(tsk);
- mm_release(tsk, mm);
- }
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -3698,7 +3698,7 @@ static void exit_robust_list(struct task
- }
- }
-
--void futex_mm_release(struct task_struct *tsk)
-+void futex_exec_release(struct task_struct *tsk)
- {
- if (unlikely(tsk->robust_list)) {
- exit_robust_list(tsk);
-@@ -3716,6 +3716,11 @@ void futex_mm_release(struct task_struct
- exit_pi_state_list(tsk);
- }
-
-+void futex_exit_release(struct task_struct *tsk)
-+{
-+ futex_exec_release(tsk);
-+}
-+
- long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- u32 __user *uaddr2, u32 val2, u32 val3)
- {
+++ /dev/null
-futex_Move_futex_exit_handling_into_futex_code.patch
-futex_Replace_PF_EXITPIDONE_with_a_state.patch
-exitexec_Seperate_mm_release().patch
-futex_Split_futex_mm_release()_for_exitexec.patch
-futex_Set_taskfutex_state_to_DEAD_right_after_handling_futex_exit.patch
-futex_Mark_the_begin_of_futex_exit_explicitly.patch
-futex_Sanitize_exit_state_handling.patch
-futex_Provide_state_handling_for_exec()_as_well.patch
-futex_Add_mutex_around_futex_exit.patch
-futex_Provide_distinct_return_value_when_owner_is_exiting.patch
-futex_Prevent_exit_livelock.patch