From ec02d46e5c0f76d5e1be2736e80348463a2a5aec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Jan 2021 12:54:42 +0100 Subject: [PATCH] futex patches now in 4.19 queue --- .../exitexec_Seperate_mm_release().patch | 97 ----- .../futex_Add_mutex_around_futex_exit.patch | 82 ----- ...k_the_begin_of_futex_exit_explicitly.patch | 159 -------- ..._futex_exit_handling_into_futex_code.patch | 216 ----------- .../futex_Prevent_exit_livelock.patch | 342 ------------------ ...t_return_value_when_owner_is_exiting.patch | 81 ----- ...de_state_handling_for_exec()_as_well.patch | 97 ----- ...x_Replace_PF_EXITPIDONE_with_a_state.patch | 192 ---------- .../futex_Sanitize_exit_state_handling.patch | 50 --- ...DEAD_right_after_handling_futex_exit.patch | 46 --- ...plit_futex_mm_release()_for_exitexec.patch | 96 ----- pending/futex-4.19/series | 11 - 12 files changed, 1469 deletions(-) delete mode 100644 pending/futex-4.19/exitexec_Seperate_mm_release().patch delete mode 100644 pending/futex-4.19/futex_Add_mutex_around_futex_exit.patch delete mode 100644 pending/futex-4.19/futex_Mark_the_begin_of_futex_exit_explicitly.patch delete mode 100644 pending/futex-4.19/futex_Move_futex_exit_handling_into_futex_code.patch delete mode 100644 pending/futex-4.19/futex_Prevent_exit_livelock.patch delete mode 100644 pending/futex-4.19/futex_Provide_distinct_return_value_when_owner_is_exiting.patch delete mode 100644 pending/futex-4.19/futex_Provide_state_handling_for_exec()_as_well.patch delete mode 100644 pending/futex-4.19/futex_Replace_PF_EXITPIDONE_with_a_state.patch delete mode 100644 pending/futex-4.19/futex_Sanitize_exit_state_handling.patch delete mode 100644 pending/futex-4.19/futex_Set_taskfutex_state_to_DEAD_right_after_handling_futex_exit.patch delete mode 100644 pending/futex-4.19/futex_Split_futex_mm_release()_for_exitexec.patch delete mode 100644 pending/futex-4.19/series diff --git a/pending/futex-4.19/exitexec_Seperate_mm_release().patch b/pending/futex-4.19/exitexec_Seperate_mm_release().patch deleted file mode 100644 index 6f1eca6d78a..00000000000 --- a/pending/futex-4.19/exitexec_Seperate_mm_release().patch +++ /dev/null @@ -1,97 +0,0 @@ -Subject: exit/exec: Seperate mm_release() -From: Thomas Gleixner -Date: Wed Nov 6 22:55:38 2019 +0100 - -From: Thomas Gleixner - -commit 4610ba7ad877fafc0a25a30c6c82015304120426 upstream - -mm_release() contains the futex exit handling. mm_release() is called from -do_exit()->exit_mm() and from exec()->exec_mm(). - -In the exit_mm() case PF_EXITING and the futex state is updated. In the -exec_mm() case these states are not touched. - -As the futex exit code needs further protections against exit races, this -needs to be split into two functions. - -Preparatory only, no functional change. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - fs/exec.c | 2 +- - include/linux/sched/mm.h | 6 ++++-- - kernel/exit.c | 2 +- - kernel/fork.c | 12 +++++++++++- - 4 files changed, 17 insertions(+), 5 deletions(-) - ---- a/fs/exec.c -+++ b/fs/exec.c -@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *m - /* Notify parent that we're no longer interested in the old VM */ - tsk = current; - old_mm = current->mm; -- mm_release(tsk, old_mm); -+ exec_mm_release(tsk, old_mm); - - if (old_mm) { - sync_mm_rss(old_mm); ---- a/include/linux/sched/mm.h -+++ b/include/linux/sched/mm.h -@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(str - * succeeds. - */ - extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); --/* Remove the current tasks stale references to the old mm_struct */ --extern void mm_release(struct task_struct *, struct mm_struct *); -+/* Remove the current tasks stale references to the old mm_struct on exit() */ -+extern void exit_mm_release(struct task_struct *, struct mm_struct *); -+/* Remove the current tasks stale references to the old mm_struct on exec() */ -+extern void exec_mm_release(struct task_struct *, struct mm_struct *); - - #ifdef CONFIG_MEMCG - extern void mm_update_next_owner(struct mm_struct *mm); ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -498,7 +498,7 @@ static void exit_mm(void) - struct mm_struct *mm = current->mm; - struct core_state *core_state; - -- mm_release(current, mm); -+ exit_mm_release(current, mm); - if (!mm) - return; - sync_mm_rss(mm); ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -1217,7 +1217,7 @@ static int wait_for_vfork_done(struct ta - * restoring the old one. . . - * Eric Biederman 10 January 1998 - */ --void mm_release(struct task_struct *tsk, struct mm_struct *mm) -+static void mm_release(struct task_struct *tsk, struct mm_struct *mm) - { - /* Get rid of any futexes when releasing the mm */ - futex_mm_release(tsk); -@@ -1254,6 +1254,16 @@ void mm_release(struct task_struct *tsk, - complete_vfork_done(tsk); - } - -+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) -+{ -+ mm_release(tsk, mm); -+} -+ -+void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) -+{ -+ mm_release(tsk, mm); -+} -+ - /* - * Allocate a new mm structure and copy contents from the - * mm structure of the passed in task structure. diff --git a/pending/futex-4.19/futex_Add_mutex_around_futex_exit.patch b/pending/futex-4.19/futex_Add_mutex_around_futex_exit.patch deleted file mode 100644 index ef9d6d5ac26..00000000000 --- a/pending/futex-4.19/futex_Add_mutex_around_futex_exit.patch +++ /dev/null @@ -1,82 +0,0 @@ -Subject: futex: Add mutex around futex exit -From: Thomas Gleixner -Date: Wed Nov 6 22:55:44 2019 +0100 - -From: Thomas Gleixner - -commit 3f186d974826847a07bc7964d79ec4eded475ad9 upstream - -The mutex will be used in subsequent changes to replace the busy looping of -a waiter when the futex owner is currently executing the exit cleanup to -prevent a potential live lock. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.845798895@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - include/linux/futex.h | 1 + - include/linux/sched.h | 1 + - kernel/futex.c | 16 ++++++++++++++++ - 3 files changed, 18 insertions(+) - ---- a/include/linux/futex.h -+++ b/include/linux/futex.h -@@ -68,6 +68,7 @@ static inline void futex_init_task(struc - INIT_LIST_HEAD(&tsk->pi_state_list); - tsk->pi_state_cache = NULL; - tsk->futex_state = FUTEX_STATE_OK; -+ mutex_init(&tsk->futex_exit_mutex); - } - - void futex_exit_recursive(struct task_struct *tsk); ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -996,6 +996,7 @@ struct task_struct { - #endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; -+ struct mutex futex_exit_mutex; - unsigned int futex_state; - #endif - #ifdef CONFIG_PERF_EVENTS ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3735,12 +3735,23 @@ static void futex_cleanup(struct task_st - */ - void futex_exit_recursive(struct task_struct *tsk) - { -+ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ -+ if (tsk->futex_state == FUTEX_STATE_EXITING) -+ mutex_unlock(&tsk->futex_exit_mutex); - tsk->futex_state = FUTEX_STATE_DEAD; - } - - static void futex_cleanup_begin(struct task_struct *tsk) - { - /* -+ * Prevent various race issues against a concurrent incoming waiter -+ * including live locks by forcing the waiter to block on -+ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in -+ * attach_to_pi_owner(). -+ */ -+ mutex_lock(&tsk->futex_exit_mutex); -+ -+ /* - * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. - * - * This ensures that all subsequent checks of tsk->futex_state in -@@ -3763,6 +3774,11 @@ static void futex_cleanup_end(struct tas - * take another loop until it becomes visible. - */ - tsk->futex_state = state; -+ /* -+ * Drop the exit protection. This unblocks waiters which observed -+ * FUTEX_STATE_EXITING to reevaluate the state. -+ */ -+ mutex_unlock(&tsk->futex_exit_mutex); - } - - void futex_exec_release(struct task_struct *tsk) diff --git a/pending/futex-4.19/futex_Mark_the_begin_of_futex_exit_explicitly.patch b/pending/futex-4.19/futex_Mark_the_begin_of_futex_exit_explicitly.patch deleted file mode 100644 index 973da35c0ca..00000000000 --- a/pending/futex-4.19/futex_Mark_the_begin_of_futex_exit_explicitly.patch +++ /dev/null @@ -1,159 +0,0 @@ -Subject: futex: Mark the begin of futex exit explicitly -From: Thomas Gleixner -Date: Wed Nov 6 22:55:41 2019 +0100 - -From: Thomas Gleixner - -commit 18f694385c4fd77a09851fd301236746ca83f3cb upstream - -Instead of relying on PF_EXITING use an explicit state for the futex exit -and set it in the futex exit function. This moves the smp barrier and the -lock/unlock serialization into the futex code. - -As with the DEAD state this is restricted to the exit path as exec -continues to use the same task struct. - -This allows to simplify that logic in a next step. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - include/linux/futex.h | 31 +++---------------------------- - kernel/exit.c | 13 +------------ - kernel/futex.c | 37 ++++++++++++++++++++++++++++++++++++- - 3 files changed, 40 insertions(+), 41 deletions(-) - ---- a/include/linux/futex.h -+++ b/include/linux/futex.h -@@ -55,6 +55,7 @@ union futex_key { - #ifdef CONFIG_FUTEX - enum { - FUTEX_STATE_OK, -+ FUTEX_STATE_EXITING, - FUTEX_STATE_DEAD, - }; - -@@ -69,33 +70,7 @@ static inline void futex_init_task(struc - tsk->futex_state = FUTEX_STATE_OK; - } - --/** -- * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD -- * @tsk: task to set the state on -- * -- * Set the futex exit state of the task lockless. The futex waiter code -- * observes that state when a task is exiting and loops until the task has -- * actually finished the futex cleanup. The worst case for this is that the -- * waiter runs through the wait loop until the state becomes visible. -- * -- * This has two callers: -- * -- * - futex_mm_release() after the futex exit cleanup has been done -- * -- * - do_exit() from the recursive fault handling path. -- * -- * In case of a recursive fault this is best effort. Either the futex exit -- * code has run already or not. If the OWNER_DIED bit has been set on the -- * futex then the waiter can take it over. If not, the problem is pushed -- * back to user space. If the futex exit code did not run yet, then an -- * already queued waiter might block forever, but there is nothing which -- * can be done about that. -- */ --static inline void futex_exit_done(struct task_struct *tsk) --{ -- tsk->futex_state = FUTEX_STATE_DEAD; --} -- -+void futex_exit_recursive(struct task_struct *tsk); - void futex_exit_release(struct task_struct *tsk); - void futex_exec_release(struct task_struct *tsk); - -@@ -103,7 +78,7 @@ long do_futex(u32 __user *uaddr, int op, - u32 __user *uaddr2, u32 val2, u32 val3); - #else - static inline void futex_init_task(struct task_struct *tsk) { } --static inline void futex_exit_done(struct task_struct *tsk) { } -+static inline void futex_exit_recursive(struct task_struct *tsk) { } - static inline void futex_exit_release(struct task_struct *tsk) { } - static inline void futex_exec_release(struct task_struct *tsk) { } - static inline long do_futex(u32 __user *uaddr, int op, u32 val, ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -818,23 +818,12 @@ void __noreturn do_exit(long code) - */ - if (unlikely(tsk->flags & PF_EXITING)) { - pr_alert("Fixing recursive fault but reboot is needed!\n"); -- futex_exit_done(tsk); -+ futex_exit_recursive(tsk); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } - - exit_signals(tsk); /* sets PF_EXITING */ -- /* -- * Ensure that all new tsk->pi_lock acquisitions must observe -- * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). -- */ -- smp_mb(); -- /* -- * Ensure that we must observe the pi_state in exit_mm() -> -- * mm_release() -> exit_pi_state_list(). -- */ -- raw_spin_lock_irq(&tsk->pi_lock); -- raw_spin_unlock_irq(&tsk->pi_lock); - - /* sync mm's RSS info before statistics gathering */ - if (tsk->mm) ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3716,10 +3716,45 @@ void futex_exec_release(struct task_stru - exit_pi_state_list(tsk); - } - -+/** -+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD -+ * @tsk: task to set the state on -+ * -+ * Set the futex exit state of the task lockless. The futex waiter code -+ * observes that state when a task is exiting and loops until the task has -+ * actually finished the futex cleanup. The worst case for this is that the -+ * waiter runs through the wait loop until the state becomes visible. -+ * -+ * This is called from the recursive fault handling path in do_exit(). -+ * -+ * This is best effort. Either the futex exit code has run already or -+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can -+ * take it over. If not, the problem is pushed back to user space. If the -+ * futex exit code did not run yet, then an already queued waiter might -+ * block forever, but there is nothing which can be done about that. -+ */ -+void futex_exit_recursive(struct task_struct *tsk) -+{ -+ tsk->futex_state = FUTEX_STATE_DEAD; -+} -+ - void futex_exit_release(struct task_struct *tsk) - { -+ tsk->futex_state = FUTEX_STATE_EXITING; -+ /* -+ * Ensure that all new tsk->pi_lock acquisitions must observe -+ * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner(). -+ */ -+ smp_mb(); -+ /* -+ * Ensure that we must observe the pi_state in exit_pi_state_list(). -+ */ -+ raw_spin_lock_irq(&tsk->pi_lock); -+ raw_spin_unlock_irq(&tsk->pi_lock); -+ - futex_exec_release(tsk); -- futex_exit_done(tsk); -+ -+ tsk->futex_state = FUTEX_STATE_DEAD; - } - - long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, diff --git a/pending/futex-4.19/futex_Move_futex_exit_handling_into_futex_code.patch b/pending/futex-4.19/futex_Move_futex_exit_handling_into_futex_code.patch deleted file mode 100644 index d08d9ed4a18..00000000000 --- a/pending/futex-4.19/futex_Move_futex_exit_handling_into_futex_code.patch +++ /dev/null @@ -1,216 +0,0 @@ -Subject: futex: Move futex exit handling into futex code -From: Thomas Gleixner -Date: Wed Nov 6 22:55:36 2019 +0100 - -From: Thomas Gleixner - -commit ba31c1a48538992316cc71ce94fa9cd3e7b427c0 upstream - -The futex exit handling is #ifdeffed into mm_release() which is not pretty -to begin with. But upcoming changes to address futex exit races need to add -more functionality to this exit code. - -Split it out into a function, move it into futex code and make the various -futex exit functions static. - -Preparatory only and no functional change. - -Folded build fix from Borislav. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.049705556@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - include/linux/compat.h | 2 -- - include/linux/futex.h | 29 ++++++++++++++++------------- - kernel/fork.c | 25 +++---------------------- - kernel/futex.c | 33 +++++++++++++++++++++++++++++---- - 4 files changed, 48 insertions(+), 41 deletions(-) - ---- a/include/linux/compat.h -+++ b/include/linux/compat.h -@@ -445,8 +445,6 @@ struct compat_kexec_segment; - struct compat_mq_attr; - struct compat_msgbuf; - --extern void compat_exit_robust_list(struct task_struct *curr); -- - #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) - - #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) ---- a/include/linux/futex.h -+++ b/include/linux/futex.h -@@ -2,7 +2,9 @@ - #ifndef _LINUX_FUTEX_H - #define _LINUX_FUTEX_H - -+#include - #include -+ - #include - - struct inode; -@@ -51,15 +53,24 @@ union futex_key { - #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } - - #ifdef CONFIG_FUTEX --extern void exit_robust_list(struct task_struct *curr); - --long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, -- u32 __user *uaddr2, u32 val2, u32 val3); --#else --static inline void exit_robust_list(struct task_struct *curr) -+static inline void futex_init_task(struct task_struct *tsk) - { -+ tsk->robust_list = NULL; -+#ifdef CONFIG_COMPAT -+ tsk->compat_robust_list = NULL; -+#endif -+ INIT_LIST_HEAD(&tsk->pi_state_list); -+ tsk->pi_state_cache = NULL; - } - -+void futex_mm_release(struct task_struct *tsk); -+ -+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, -+ u32 __user *uaddr2, u32 val2, u32 val3); -+#else -+static inline void futex_init_task(struct task_struct *tsk) { } -+static inline void futex_mm_release(struct task_struct *tsk) { } - static inline long do_futex(u32 __user *uaddr, int op, u32 val, - ktime_t *timeout, u32 __user *uaddr2, - u32 val2, u32 val3) -@@ -68,12 +79,4 @@ static inline long do_futex(u32 __user * - } - #endif - --#ifdef CONFIG_FUTEX_PI --extern void exit_pi_state_list(struct task_struct *curr); --#else --static inline void exit_pi_state_list(struct task_struct *curr) --{ --} --#endif -- - #endif ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -1220,20 +1220,7 @@ static int wait_for_vfork_done(struct ta - void mm_release(struct task_struct *tsk, struct mm_struct *mm) - { - /* Get rid of any futexes when releasing the mm */ --#ifdef CONFIG_FUTEX -- if (unlikely(tsk->robust_list)) { -- exit_robust_list(tsk); -- tsk->robust_list = NULL; -- } --#ifdef CONFIG_COMPAT -- if (unlikely(tsk->compat_robust_list)) { -- compat_exit_robust_list(tsk); -- tsk->compat_robust_list = NULL; -- } --#endif -- if (unlikely(!list_empty(&tsk->pi_state_list))) -- exit_pi_state_list(tsk); --#endif -+ futex_mm_release(tsk); - - uprobe_free_utask(tsk); - -@@ -1937,14 +1924,8 @@ static __latent_entropy struct task_stru - #ifdef CONFIG_BLOCK - p->plug = NULL; - #endif --#ifdef CONFIG_FUTEX -- p->robust_list = NULL; --#ifdef CONFIG_COMPAT -- p->compat_robust_list = NULL; --#endif -- INIT_LIST_HEAD(&p->pi_state_list); -- p->pi_state_cache = NULL; --#endif -+ futex_init_task(p); -+ - /* - * sigaltstack should be cleared when sharing the same VM - */ ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -341,6 +341,12 @@ static inline bool should_fail_futex(boo - } - #endif /* CONFIG_FAIL_FUTEX */ - -+#ifdef CONFIG_COMPAT -+static void compat_exit_robust_list(struct task_struct *curr); -+#else -+static inline void compat_exit_robust_list(struct task_struct *curr) { } -+#endif -+ - static inline void futex_get_mm(union futex_key *key) - { - mmgrab(key->private.mm); -@@ -890,7 +896,7 @@ static void put_pi_state(struct futex_pi - * Kernel cleans up PI-state, but userspace is likely hosed. - * (Robust-futex cleanup is separate and might save the day for userspace.) - */ --void exit_pi_state_list(struct task_struct *curr) -+static void exit_pi_state_list(struct task_struct *curr) - { - struct list_head *next, *head = &curr->pi_state_list; - struct futex_pi_state *pi_state; -@@ -960,7 +966,8 @@ void exit_pi_state_list(struct task_stru - } - raw_spin_unlock_irq(&curr->pi_lock); - } -- -+#else -+static inline void exit_pi_state_list(struct task_struct *curr) { } - #endif - - /* -@@ -3625,7 +3632,7 @@ static inline int fetch_robust_entry(str - * - * We silently return on any sign of list-walking problem. - */ --void exit_robust_list(struct task_struct *curr) -+static void exit_robust_list(struct task_struct *curr) - { - struct robust_list_head __user *head = curr->robust_list; - struct robust_list __user *entry, *next_entry, *pending; -@@ -3690,6 +3697,24 @@ void exit_robust_list(struct task_struct - } - } - -+void futex_mm_release(struct task_struct *tsk) -+{ -+ if (unlikely(tsk->robust_list)) { -+ exit_robust_list(tsk); -+ tsk->robust_list = NULL; -+ } -+ -+#ifdef CONFIG_COMPAT -+ if (unlikely(tsk->compat_robust_list)) { -+ compat_exit_robust_list(tsk); -+ tsk->compat_robust_list = NULL; -+ } -+#endif -+ -+ if (unlikely(!list_empty(&tsk->pi_state_list))) -+ exit_pi_state_list(tsk); -+} -+ - long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3) - { -@@ -3817,7 +3842,7 @@ static void __user *futex_uaddr(struct r - * - * We silently return on any sign of list-walking problem. - */ --void compat_exit_robust_list(struct task_struct *curr) -+static void compat_exit_robust_list(struct task_struct *curr) - { - struct compat_robust_list_head __user *head = curr->compat_robust_list; - struct robust_list __user *entry, *next_entry, *pending; diff --git a/pending/futex-4.19/futex_Prevent_exit_livelock.patch b/pending/futex-4.19/futex_Prevent_exit_livelock.patch deleted file mode 100644 index 99b8d8c8fe7..00000000000 --- a/pending/futex-4.19/futex_Prevent_exit_livelock.patch +++ /dev/null @@ -1,342 +0,0 @@ -Subject: futex: Prevent exit livelock -From: Thomas Gleixner -Date: Wed Nov 6 22:55:46 2019 +0100 - -From: Thomas Gleixner - -commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream - -Oleg provided the following test case: - -int main(void) -{ - struct sched_param sp = {}; - - sp.sched_priority = 2; - assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); - - int lock = vfork(); - if (!lock) { - sp.sched_priority = 1; - assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); - _exit(0); - } - - syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0); - return 0; -} - -This creates an unkillable RT process spinning in futex_lock_pi() on a UP -machine or if the process is affine to a single CPU. The reason is: - - parent child - - set FIFO prio 2 - - vfork() -> set FIFO prio 1 - implies wait_for_child() sched_setscheduler(...) - exit() - do_exit() - .... - mm_release() - tsk->futex_state = FUTEX_STATE_EXITING; - exit_futex(); (NOOP in this case) - complete() --> wakes parent - sys_futex() - loop infinite because - tsk->futex_state == FUTEX_STATE_EXITING - -The same problem can happen just by regular preemption as well: - - task holds futex - ... - do_exit() - tsk->futex_state = FUTEX_STATE_EXITING; - - --> preemption (unrelated wakeup of some other higher prio task, e.g. timer) - - switch_to(other_task) - - return to user - sys_futex() - loop infinite as above - -Just for the fun of it the futex exit cleanup could trigger the wakeup -itself before the task sets its futex state to DEAD. - -To cure this, the handling of the exiting owner is changed so: - - - A refcount is held on the task - - - The task pointer is stored in a caller visible location - - - The caller drops all locks (hash bucket, mmap_sem) and blocks - on task::futex_exit_mutex. When the mutex is acquired then - the exiting task has completed the cleanup and the state - is consistent and can be reevaluated. - -This is not a pretty solution, but there is no choice other than returning -an error code to user space, which would break the state consistency -guarantee and open another can of problems including regressions. - -For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538 -are required as well, but for anything older than 5.3.y the backports are -going to be provided when this hits mainline as the other dependencies for -those kernels are definitely not stable material. - -Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems") -Reported-by: Oleg Nesterov -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Cc: Stable Team -Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 91 insertions(+), 15 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1176,6 +1176,36 @@ out_error: - return ret; - } - -+/** -+ * wait_for_owner_exiting - Block until the owner has exited -+ * @exiting: Pointer to the exiting task -+ * -+ * Caller must hold a refcount on @exiting. -+ */ -+static void wait_for_owner_exiting(int ret, struct task_struct *exiting) -+{ -+ if (ret != -EBUSY) { -+ WARN_ON_ONCE(exiting); -+ return; -+ } -+ -+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) -+ return; -+ -+ mutex_lock(&exiting->futex_exit_mutex); -+ /* -+ * No point in doing state checking here. If the waiter got here -+ * while the task was in exec()->exec_futex_release() then it can -+ * have any FUTEX_STATE_* value when the waiter has acquired the -+ * mutex. OK, if running, EXITING or DEAD if it reached exit() -+ * already. Highly unlikely and not a problem. Just one more round -+ * through the futex maze. -+ */ -+ mutex_unlock(&exiting->futex_exit_mutex); -+ -+ put_task_struct(exiting); -+} -+ - static int handle_exit_race(u32 __user *uaddr, u32 uval, - struct task_struct *tsk) - { -@@ -1237,7 +1267,8 @@ static int handle_exit_race(u32 __user * - * it after doing proper sanity checks. - */ - static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, -- struct futex_pi_state **ps) -+ struct futex_pi_state **ps, -+ struct task_struct **exiting) - { - pid_t pid = uval & FUTEX_TID_MASK; - struct futex_pi_state *pi_state; -@@ -1276,7 +1307,19 @@ static int attach_to_pi_owner(u32 __user - int ret = handle_exit_race(uaddr, uval, p); - - raw_spin_unlock_irq(&p->pi_lock); -- put_task_struct(p); -+ /* -+ * If the owner task is between FUTEX_STATE_EXITING and -+ * FUTEX_STATE_DEAD then store the task pointer and keep -+ * the reference on the task struct. The calling code will -+ * drop all locks, wait for the task to reach -+ * FUTEX_STATE_DEAD and then drop the refcount. This is -+ * required to prevent a live lock when the current task -+ * preempted the exiting task between the two states. -+ */ -+ if (ret == -EBUSY) -+ *exiting = p; -+ else -+ put_task_struct(p); - return ret; - } - -@@ -1315,7 +1358,8 @@ static int attach_to_pi_owner(u32 __user - - static int lookup_pi_state(u32 __user *uaddr, u32 uval, - struct futex_hash_bucket *hb, -- union futex_key *key, struct futex_pi_state **ps) -+ union futex_key *key, struct futex_pi_state **ps, -+ struct task_struct **exiting) - { - struct futex_q *top_waiter = futex_top_waiter(hb, key); - -@@ -1330,7 +1374,7 @@ static int lookup_pi_state(u32 __user *u - * We are the first waiter - try to look up the owner based on - * @uval and attach to it. - */ -- return attach_to_pi_owner(uaddr, uval, key, ps); -+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting); - } - - static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) -@@ -1358,6 +1402,8 @@ static int lock_pi_update_atomic(u32 __u - * lookup - * @task: the task to perform the atomic lock work for. This will - * be "current" except in the case of requeue pi. -+ * @exiting: Pointer to store the task pointer of the owner task -+ * which is in the middle of exiting - * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) - * - * Return: -@@ -1366,11 +1412,17 @@ static int lock_pi_update_atomic(u32 __u - * - <0 - error - * - * The hb->lock and futex_key refs shall be held by the caller. -+ * -+ * @exiting is only set when the return value is -EBUSY. If so, this holds -+ * a refcount on the exiting task on return and the caller needs to drop it -+ * after waiting for the exit to complete. - */ - static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, - union futex_key *key, - struct futex_pi_state **ps, -- struct task_struct *task, int set_waiters) -+ struct task_struct *task, -+ struct task_struct **exiting, -+ int set_waiters) - { - u32 uval, newval, vpid = task_pid_vnr(task); - struct futex_q *top_waiter; -@@ -1440,7 +1492,7 @@ static int futex_lock_pi_atomic(u32 __us - * attach to the owner. If that fails, no harm done, we only - * set the FUTEX_WAITERS bit in the user space variable. - */ -- return attach_to_pi_owner(uaddr, newval, key, ps); -+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting); - } - - /** -@@ -1861,6 +1913,8 @@ void requeue_pi_wake_futex(struct futex_ - * @key1: the from futex key - * @key2: the to futex key - * @ps: address to store the pi_state pointer -+ * @exiting: Pointer to store the task pointer of the owner task -+ * which is in the middle of exiting - * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) - * - * Try and get the lock on behalf of the top waiter if we can do it atomically. -@@ -1868,16 +1922,20 @@ void requeue_pi_wake_futex(struct futex_ - * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. - * hb1 and hb2 must be held by the caller. - * -+ * @exiting is only set when the return value is -EBUSY. If so, this holds -+ * a refcount on the exiting task on return and the caller needs to drop it -+ * after waiting for the exit to complete. -+ * - * Return: - * - 0 - failed to acquire the lock atomically; - * - >0 - acquired the lock, return value is vpid of the top_waiter - * - <0 - error - */ --static int futex_proxy_trylock_atomic(u32 __user *pifutex, -- struct futex_hash_bucket *hb1, -- struct futex_hash_bucket *hb2, -- union futex_key *key1, union futex_key *key2, -- struct futex_pi_state **ps, int set_waiters) -+static int -+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, -+ struct futex_hash_bucket *hb2, union futex_key *key1, -+ union futex_key *key2, struct futex_pi_state **ps, -+ struct task_struct **exiting, int set_waiters) - { - struct futex_q *top_waiter = NULL; - u32 curval; -@@ -1914,7 +1972,7 @@ static int futex_proxy_trylock_atomic(u3 - */ - vpid = task_pid_vnr(top_waiter->task); - ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, -- set_waiters); -+ exiting, set_waiters); - if (ret == 1) { - requeue_pi_wake_futex(top_waiter, key2, hb2); - return vpid; -@@ -2043,6 +2101,8 @@ retry_private: - } - - if (requeue_pi && (task_count - nr_wake < nr_requeue)) { -+ struct task_struct *exiting = NULL; -+ - /* - * Attempt to acquire uaddr2 and wake the top waiter. If we - * intend to requeue waiters, force setting the FUTEX_WAITERS -@@ -2050,7 +2110,8 @@ retry_private: - * faults rather in the requeue loop below. - */ - ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, -- &key2, &pi_state, nr_requeue); -+ &key2, &pi_state, -+ &exiting, nr_requeue); - - /* - * At this point the top_waiter has either taken uaddr2 or is -@@ -2077,7 +2138,8 @@ retry_private: - * If that call succeeds then we have pi_state and an - * initial refcount on it. - */ -- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); -+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2, -+ &pi_state, &exiting); - } - - switch (ret) { -@@ -2107,6 +2169,12 @@ retry_private: - hb_waiters_dec(hb2); - put_futex_key(&key2); - put_futex_key(&key1); -+ /* -+ * Handle the case where the owner is in the middle of -+ * exiting. Wait for the exit to complete otherwise -+ * this task might loop forever, aka. live lock. -+ */ -+ wait_for_owner_exiting(ret, exiting); - cond_resched(); - goto retry; - default: -@@ -2834,6 +2902,7 @@ static int futex_lock_pi(u32 __user *uad - { - struct hrtimer_sleeper timeout, *to = NULL; - struct futex_pi_state *pi_state = NULL; -+ struct task_struct *exiting = NULL; - struct rt_mutex_waiter rt_waiter; - struct futex_hash_bucket *hb; - struct futex_q q = futex_q_init; -@@ -2861,7 +2930,8 @@ retry: - retry_private: - hb = queue_lock(&q); - -- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); -+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, -+ &exiting, 0); - if (unlikely(ret)) { - /* - * Atomic work succeeded and we got the lock, -@@ -2884,6 +2954,12 @@ retry_private: - */ - queue_unlock(hb); - put_futex_key(&q.key); -+ /* -+ * Handle the case where the owner is in the middle of -+ * exiting. Wait for the exit to complete otherwise -+ * this task might loop forever, aka. live lock. -+ */ -+ wait_for_owner_exiting(ret, exiting); - cond_resched(); - goto retry; - default: diff --git a/pending/futex-4.19/futex_Provide_distinct_return_value_when_owner_is_exiting.patch b/pending/futex-4.19/futex_Provide_distinct_return_value_when_owner_is_exiting.patch deleted file mode 100644 index 6da55da7582..00000000000 --- a/pending/futex-4.19/futex_Provide_distinct_return_value_when_owner_is_exiting.patch +++ /dev/null @@ -1,81 +0,0 @@ -Subject: futex: Provide distinct return value when owner is exiting -From: Thomas Gleixner -Date: Wed Nov 6 22:55:45 2019 +0100 - -From: Thomas Gleixner -` -commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream - -attach_to_pi_owner() returns -EAGAIN for various cases: - - - Owner task is exiting - - Futex value has changed - -The caller drops the held locks (hash bucket, mmap_sem) and retries the -operation. In case of the owner task exiting this can result in a live -lock. - -As a preparatory step for seperating those cases, provide a distinct return -value (EBUSY) for the owner exiting case. - -No functional change. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.935606117@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - kernel/futex.c | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1182,11 +1182,11 @@ static int handle_exit_race(u32 __user * - u32 uval2; - - /* -- * If the futex exit state is not yet FUTEX_STATE_DEAD, wait -- * for it to finish. -+ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the -+ * caller that the alleged owner is busy. - */ - if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) -- return -EAGAIN; -+ return -EBUSY; - - /* - * Reread the user space value to handle the following situation: -@@ -2095,12 +2095,13 @@ retry_private: - if (!ret) - goto retry; - goto out; -+ case -EBUSY: - case -EAGAIN: - /* - * Two reasons for this: -- * - Owner is exiting and we just wait for the -+ * - EBUSY: Owner is exiting and we just wait for the - * exit to complete. -- * - The user space value changed. -+ * - EAGAIN: The user space value changed. - */ - double_unlock_hb(hb1, hb2); - hb_waiters_dec(hb2); -@@ -2873,12 +2874,13 @@ retry_private: - goto out_unlock_put_key; - case -EFAULT: - goto uaddr_faulted; -+ case -EBUSY: - case -EAGAIN: - /* - * Two reasons for this: -- * - Task is exiting and we just wait for the -+ * - EBUSY: Task is exiting and we just wait for the - * exit to complete. -- * - The user space value changed. -+ * - EAGAIN: The user space value changed. - */ - queue_unlock(hb); - put_futex_key(&q.key); diff --git a/pending/futex-4.19/futex_Provide_state_handling_for_exec()_as_well.patch b/pending/futex-4.19/futex_Provide_state_handling_for_exec()_as_well.patch deleted file mode 100644 index 782f5a73616..00000000000 --- a/pending/futex-4.19/futex_Provide_state_handling_for_exec()_as_well.patch +++ /dev/null @@ -1,97 +0,0 @@ -Subject: futex: Provide state handling for exec() as well -From: Thomas Gleixner -Date: Wed Nov 6 22:55:43 2019 +0100 - -From: Thomas Gleixner - -commit af8cbda2cfcaa5515d61ec500498d46e9a8247e2 upstream - -exec() attempts to handle potentially held futexes gracefully by running -the futex exit handling code like exit() does. - -The current implementation has no protection against concurrent incoming -waiters. The reason is that the futex state cannot be set to -FUTEX_STATE_DEAD after the cleanup because the task struct is still active -and just about to execute the new binary. - -While its arguably buggy when a task holds a futex over exec(), for -consistency sake the state handling can at least cover the actual futex -exit cleanup section. This provides state consistency protection accross -the cleanup. As the futex state of the task becomes FUTEX_STATE_OK after the -cleanup has been finished, this cannot prevent subsequent attempts to -attach to the task in case that the cleanup was not successfull in mopping -up all leftovers. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.753355618@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++---- - 1 file changed, 34 insertions(+), 4 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3698,7 +3698,7 @@ static void exit_robust_list(struct task - } - } - --void futex_exec_release(struct task_struct *tsk) -+static void futex_cleanup(struct task_struct *tsk) - { - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); -@@ -3738,7 +3738,7 @@ void futex_exit_recursive(struct task_st - tsk->futex_state = FUTEX_STATE_DEAD; - } - --void futex_exit_release(struct task_struct *tsk) -+static void futex_cleanup_begin(struct task_struct *tsk) - { - /* - * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. -@@ -3754,10 +3754,40 @@ void futex_exit_release(struct task_stru - raw_spin_lock_irq(&tsk->pi_lock); - tsk->futex_state = FUTEX_STATE_EXITING; - raw_spin_unlock_irq(&tsk->pi_lock); -+} - -- futex_exec_release(tsk); -+static void futex_cleanup_end(struct task_struct *tsk, int state) -+{ -+ /* -+ * Lockless store. The only side effect is that an observer might -+ * take another loop until it becomes visible. -+ */ -+ tsk->futex_state = state; -+} - -- tsk->futex_state = FUTEX_STATE_DEAD; -+void futex_exec_release(struct task_struct *tsk) -+{ -+ /* -+ * The state handling is done for consistency, but in the case of -+ * exec() there is no way to prevent futher damage as the PID stays -+ * the same. But for the unlikely and arguably buggy case that a -+ * futex is held on exec(), this provides at least as much state -+ * consistency protection which is possible. -+ */ -+ futex_cleanup_begin(tsk); -+ futex_cleanup(tsk); -+ /* -+ * Reset the state to FUTEX_STATE_OK. The task is alive and about -+ * exec a new binary. -+ */ -+ futex_cleanup_end(tsk, FUTEX_STATE_OK); -+} -+ -+void futex_exit_release(struct task_struct *tsk) -+{ -+ futex_cleanup_begin(tsk); -+ futex_cleanup(tsk); -+ futex_cleanup_end(tsk, FUTEX_STATE_DEAD); - } - - long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, diff --git a/pending/futex-4.19/futex_Replace_PF_EXITPIDONE_with_a_state.patch b/pending/futex-4.19/futex_Replace_PF_EXITPIDONE_with_a_state.patch deleted file mode 100644 index 11303d7f009..00000000000 --- a/pending/futex-4.19/futex_Replace_PF_EXITPIDONE_with_a_state.patch +++ /dev/null @@ -1,192 +0,0 @@ -Subject: futex: Replace PF_EXITPIDONE with a state -From: Thomas Gleixner -Date: Wed Nov 6 22:55:37 2019 +0100 - -From: Thomas Gleixner - -commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream - -The futex exit handling relies on PF_ flags. That's suboptimal as it -requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in -the middle of do_exit() to enforce the observability of PF_EXITING in the -futex code. - -Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic -over to the new state. The PF_EXITING dependency will be cleaned up in a -later step. - -This prepares for handling various futex exit issues later. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - include/linux/futex.h | 33 +++++++++++++++++++++++++++++++++ - include/linux/sched.h | 2 +- - kernel/exit.c | 18 ++---------------- - kernel/futex.c | 25 +++++++++++++------------ - 4 files changed, 49 insertions(+), 29 deletions(-) - ---- a/include/linux/futex.h -+++ b/include/linux/futex.h -@@ -53,6 +53,10 @@ union futex_key { - #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } - - #ifdef CONFIG_FUTEX -+enum { -+ FUTEX_STATE_OK, -+ FUTEX_STATE_DEAD, -+}; - - static inline void futex_init_task(struct task_struct *tsk) - { -@@ -62,6 +66,34 @@ static inline void futex_init_task(struc - #endif - INIT_LIST_HEAD(&tsk->pi_state_list); - tsk->pi_state_cache = NULL; -+ tsk->futex_state = FUTEX_STATE_OK; -+} -+ -+/** -+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD -+ * @tsk: task to set the state on -+ * -+ * Set the futex exit state of the task lockless. The futex waiter code -+ * observes that state when a task is exiting and loops until the task has -+ * actually finished the futex cleanup. The worst case for this is that the -+ * waiter runs through the wait loop until the state becomes visible. -+ * -+ * This has two callers: -+ * -+ * - futex_mm_release() after the futex exit cleanup has been done -+ * -+ * - do_exit() from the recursive fault handling path. -+ * -+ * In case of a recursive fault this is best effort. Either the futex exit -+ * code has run already or not. If the OWNER_DIED bit has been set on the -+ * futex then the waiter can take it over. If not, the problem is pushed -+ * back to user space. If the futex exit code did not run yet, then an -+ * already queued waiter might block forever, but there is nothing which -+ * can be done about that. -+ */ -+static inline void futex_exit_done(struct task_struct *tsk) -+{ -+ tsk->futex_state = FUTEX_STATE_DEAD; - } - - void futex_mm_release(struct task_struct *tsk); -@@ -71,6 +103,7 @@ long do_futex(u32 __user *uaddr, int op, - #else - static inline void futex_init_task(struct task_struct *tsk) { } - static inline void futex_mm_release(struct task_struct *tsk) { } -+static inline void futex_exit_done(struct task_struct *tsk) { } - static inline long do_futex(u32 __user *uaddr, int op, u32 val, - ktime_t *timeout, u32 __user *uaddr2, - u32 val2, u32 val3) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -996,6 +996,7 @@ struct task_struct { - #endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; -+ unsigned int futex_state; - #endif - #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; -@@ -1377,7 +1378,6 @@ extern struct pid *cad_pid; - */ - #define PF_IDLE 0x00000002 /* I am an IDLE thread */ - #define PF_EXITING 0x00000004 /* Getting shut down */ --#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ - #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ - #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ - #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -818,16 +818,7 @@ void __noreturn do_exit(long code) - */ - if (unlikely(tsk->flags & PF_EXITING)) { - pr_alert("Fixing recursive fault but reboot is needed!\n"); -- /* -- * We can do this unlocked here. The futex code uses -- * this flag just to verify whether the pi state -- * cleanup has been done or not. In the worst case it -- * loops once more. We pretend that the cleanup was -- * done as there is no way to return. Either the -- * OWNER_DIED bit is set by now or we push the blocked -- * task into the wait for ever nirwana as well. -- */ -- tsk->flags |= PF_EXITPIDONE; -+ futex_exit_done(tsk); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } -@@ -918,12 +909,7 @@ void __noreturn do_exit(long code) - * Make sure we are holding no locks: - */ - debug_check_no_locks_held(); -- /* -- * We can do this unlocked here. The futex code uses this flag -- * just to verify whether the pi state cleanup has been done -- * or not. In the worst case it loops once more. -- */ -- tsk->flags |= PF_EXITPIDONE; -+ futex_exit_done(tsk); - - if (tsk->io_context) - exit_io_context(tsk); ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user * - u32 uval2; - - /* -- * If PF_EXITPIDONE is not yet set, then try again. -+ * If the futex exit state is not yet FUTEX_STATE_DEAD, wait -+ * for it to finish. - */ -- if (tsk && !(tsk->flags & PF_EXITPIDONE)) -+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) - return -EAGAIN; - - /* -@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user * - * *uaddr = 0xC0000000; tsk = get_task(PID); - * } if (!tsk->flags & PF_EXITING) { - * ... attach(); -- * tsk->flags |= PF_EXITPIDONE; } else { -- * if (!(tsk->flags & PF_EXITPIDONE)) -+ * tsk->futex_state = } else { -+ * FUTEX_STATE_DEAD; if (tsk->futex_state != -+ * FUTEX_STATE_DEAD) - * return -EAGAIN; - * return -ESRCH; <--- FAIL - * } -@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user - } - - /* -- * We need to look at the task state flags to figure out, -- * whether the task is exiting. To protect against the do_exit -- * change of the task flags, we do this protected by -- * p->pi_lock: -+ * We need to look at the task state to figure out, whether the -+ * task is exiting. To protect against the change of the task state -+ * in futex_exit_release(), we do this protected by p->pi_lock: - */ - raw_spin_lock_irq(&p->pi_lock); -- if (unlikely(p->flags & PF_EXITING)) { -+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) { - /* -- * The task is on the way out. When PF_EXITPIDONE is -- * set, we know that the task has finished the -- * cleanup: -+ * The task is on the way out. When the futex state is -+ * FUTEX_STATE_DEAD, we know that the task has finished -+ * the cleanup: - */ - int ret = handle_exit_race(uaddr, uval, p); - diff --git a/pending/futex-4.19/futex_Sanitize_exit_state_handling.patch b/pending/futex-4.19/futex_Sanitize_exit_state_handling.patch deleted file mode 100644 index ea0c362f2bc..00000000000 --- a/pending/futex-4.19/futex_Sanitize_exit_state_handling.patch +++ /dev/null @@ -1,50 +0,0 @@ -Subject: futex: Sanitize exit state handling -From: Thomas Gleixner -Date: Wed Nov 6 22:55:42 2019 +0100 - -From: Thomas Gleixner - -commit 4a8e991b91aca9e20705d434677ac013974e0e30 upstream - -Instead of having a smp_mb() and an empty lock/unlock of task::pi_lock move -the state setting into to the lock section. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.645603214@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - kernel/futex.c | 17 ++++++++++------- - 1 file changed, 10 insertions(+), 7 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3740,16 +3740,19 @@ void futex_exit_recursive(struct task_st - - void futex_exit_release(struct task_struct *tsk) - { -- tsk->futex_state = FUTEX_STATE_EXITING; -- /* -- * Ensure that all new tsk->pi_lock acquisitions must observe -- * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner(). -- */ -- smp_mb(); - /* -- * Ensure that we must observe the pi_state in exit_pi_state_list(). -+ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. -+ * -+ * This ensures that all subsequent checks of tsk->futex_state in -+ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with -+ * tsk->pi_lock held. -+ * -+ * It guarantees also that a pi_state which was queued right before -+ * the state change under tsk->pi_lock by a concurrent waiter must -+ * be observed in exit_pi_state_list(). - */ - raw_spin_lock_irq(&tsk->pi_lock); -+ tsk->futex_state = FUTEX_STATE_EXITING; - raw_spin_unlock_irq(&tsk->pi_lock); - - futex_exec_release(tsk); diff --git a/pending/futex-4.19/futex_Set_taskfutex_state_to_DEAD_right_after_handling_futex_exit.patch b/pending/futex-4.19/futex_Set_taskfutex_state_to_DEAD_right_after_handling_futex_exit.patch deleted file mode 100644 index 8ea9cbbb411..00000000000 --- a/pending/futex-4.19/futex_Set_taskfutex_state_to_DEAD_right_after_handling_futex_exit.patch +++ /dev/null @@ -1,46 +0,0 @@ -Subject: futex: Set task::futex_state to DEAD right after handling futex exit -From: Thomas Gleixner -Date: Wed Nov 6 22:55:40 2019 +0100 - -From: Thomas Gleixner - -commit f24f22435dcc11389acc87e5586239c1819d217c upstream - -Setting task::futex_state in do_exit() is rather arbitrarily placed for no -reason. Move it into the futex code. - -Note, this is only done for the exit cleanup as the exec cleanup cannot set -the state to FUTEX_STATE_DEAD because the task struct is still in active -use. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.439511191@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - kernel/exit.c | 1 - - kernel/futex.c | 1 + - 2 files changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -909,7 +909,6 @@ void __noreturn do_exit(long code) - * Make sure we are holding no locks: - */ - debug_check_no_locks_held(); -- futex_exit_done(tsk); - - if (tsk->io_context) - exit_io_context(tsk); ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3719,6 +3719,7 @@ void futex_exec_release(struct task_stru - void futex_exit_release(struct task_struct *tsk) - { - futex_exec_release(tsk); -+ futex_exit_done(tsk); - } - - long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, diff --git a/pending/futex-4.19/futex_Split_futex_mm_release()_for_exitexec.patch b/pending/futex-4.19/futex_Split_futex_mm_release()_for_exitexec.patch deleted file mode 100644 index ef555df81fd..00000000000 --- a/pending/futex-4.19/futex_Split_futex_mm_release()_for_exitexec.patch +++ /dev/null @@ -1,96 +0,0 @@ -Subject: futex: Split futex_mm_release() for exit/exec -From: Thomas Gleixner -Date: Wed Nov 6 22:55:39 2019 +0100 - -From: Thomas Gleixner - -commit 150d71584b12809144b8145b817e83b81158ae5f upstream - -To allow separate handling of the futex exit state in the futex exit code -for exit and exec, split futex_mm_release() into two functions and invoke -them from the corresponding exit/exec_mm_release() callsites. - -Preparatory only, no functional change. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20191106224556.332094221@linutronix.de -Signed-off-by: Greg Kroah-Hartman ---- - include/linux/futex.h | 6 ++++-- - kernel/fork.c | 5 ++--- - kernel/futex.c | 7 ++++++- - 3 files changed, 12 insertions(+), 6 deletions(-) - ---- a/include/linux/futex.h -+++ b/include/linux/futex.h -@@ -96,14 +96,16 @@ static inline void futex_exit_done(struc - tsk->futex_state = FUTEX_STATE_DEAD; - } - --void futex_mm_release(struct task_struct *tsk); -+void futex_exit_release(struct task_struct *tsk); -+void futex_exec_release(struct task_struct *tsk); - - long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3); - #else - static inline void futex_init_task(struct task_struct *tsk) { } --static inline void futex_mm_release(struct task_struct *tsk) { } - static inline void futex_exit_done(struct task_struct *tsk) { } -+static inline void futex_exit_release(struct task_struct *tsk) { } -+static inline void futex_exec_release(struct task_struct *tsk) { } - static inline long do_futex(u32 __user *uaddr, int op, u32 val, - ktime_t *timeout, u32 __user *uaddr2, - u32 val2, u32 val3) ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -1219,9 +1219,6 @@ static int wait_for_vfork_done(struct ta - */ - static void mm_release(struct task_struct *tsk, struct mm_struct *mm) - { -- /* Get rid of any futexes when releasing the mm */ -- futex_mm_release(tsk); -- - uprobe_free_utask(tsk); - - /* Get rid of any cached register state */ -@@ -1256,11 +1253,13 @@ static void mm_release(struct task_struc - - void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) - { -+ futex_exit_release(tsk); - mm_release(tsk, mm); - } - - void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) - { -+ futex_exec_release(tsk); - mm_release(tsk, mm); - } - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3698,7 +3698,7 @@ static void exit_robust_list(struct task - } - } - --void futex_mm_release(struct task_struct *tsk) -+void futex_exec_release(struct task_struct *tsk) - { - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); -@@ -3716,6 +3716,11 @@ void futex_mm_release(struct task_struct - exit_pi_state_list(tsk); - } - -+void futex_exit_release(struct task_struct *tsk) -+{ -+ futex_exec_release(tsk); -+} -+ - long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3) - { diff --git a/pending/futex-4.19/series b/pending/futex-4.19/series deleted file mode 100644 index df259c9b485..00000000000 --- a/pending/futex-4.19/series +++ /dev/null @@ -1,11 +0,0 @@ -futex_Move_futex_exit_handling_into_futex_code.patch -futex_Replace_PF_EXITPIDONE_with_a_state.patch -exitexec_Seperate_mm_release().patch -futex_Split_futex_mm_release()_for_exitexec.patch -futex_Set_taskfutex_state_to_DEAD_right_after_handling_futex_exit.patch -futex_Mark_the_begin_of_futex_exit_explicitly.patch -futex_Sanitize_exit_state_handling.patch -futex_Provide_state_handling_for_exec()_as_well.patch -futex_Add_mutex_around_futex_exit.patch -futex_Provide_distinct_return_value_when_owner_is_exiting.patch -futex_Prevent_exit_livelock.patch -- 2.47.3