From 9ad34100ec040953234287c9d51fa8cd8bb7a331 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 1 Feb 2021 16:22:37 +0100 Subject: [PATCH] 4.4-stable patches added patches: exit-exec-seperate-mm_release.patch futex-add-mutex-around-futex-exit.patch futex-mark-the-begin-of-futex-exit-explicitly.patch futex-move-futex-exit-handling-into-futex-code.patch futex-prevent-exit-livelock.patch futex-provide-distinct-return-value-when-owner-is-exiting.patch futex-provide-state-handling-for-exec-as-well.patch futex-replace-pf_exitpidone-with-a-state.patch futex-sanitize-exit-state-handling.patch futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch futex-split-futex_mm_release-for-exit-exec.patch y2038-futex-move-compat-implementation-into-futex.c.patch --- queue-4.4/exit-exec-seperate-mm_release.patch | 102 ++++ .../futex-add-mutex-around-futex-exit.patch | 87 +++ ...k-the-begin-of-futex-exit-explicitly.patch | 159 ++++++ ...-futex-exit-handling-into-futex-code.patch | 188 +++++++ queue-4.4/futex-prevent-exit-livelock.patch | 345 ++++++++++++ ...t-return-value-when-owner-is-exiting.patch | 71 +++ ...vide-state-handling-for-exec-as-well.patch | 102 ++++ ...x-replace-pf_exitpidone-with-a-state.patch | 175 ++++++ .../futex-sanitize-exit-state-handling.patch | 55 ++ ...dead-right-after-handling-futex-exit.patch | 51 ++ ...split-futex_mm_release-for-exit-exec.patch | 100 ++++ queue-4.4/series | 12 + ...e-compat-implementation-into-futex.c.patch | 506 ++++++++++++++++++ 13 files changed, 1953 insertions(+) create mode 100644 queue-4.4/exit-exec-seperate-mm_release.patch create mode 100644 queue-4.4/futex-add-mutex-around-futex-exit.patch create mode 100644 queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch create mode 100644 queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch create mode 100644 queue-4.4/futex-prevent-exit-livelock.patch create mode 100644 queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch create mode 100644 queue-4.4/futex-provide-state-handling-for-exec-as-well.patch create mode 100644 queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch create mode 100644 queue-4.4/futex-sanitize-exit-state-handling.patch create mode 100644 queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch create mode 100644 queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch create mode 100644 queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch diff --git a/queue-4.4/exit-exec-seperate-mm_release.patch b/queue-4.4/exit-exec-seperate-mm_release.patch new file mode 100644 index 00000000000..477f6dff699 --- /dev/null +++ b/queue-4.4/exit-exec-seperate-mm_release.patch @@ -0,0 +1,102 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:06 +0000 +Subject: exit/exec: Seperate mm_release() +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-5-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 4610ba7ad877fafc0a25a30c6c82015304120426 upstream. + +mm_release() contains the futex exit handling. mm_release() is called from +do_exit()->exit_mm() and from exec()->exec_mm(). + +In the exit_mm() case PF_EXITING and the futex state is updated. In the +exec_mm() case these states are not touched. + +As the futex exit code needs further protections against exit races, this +needs to be split into two functions. + +Preparatory only, no functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + fs/exec.c | 2 +- + include/linux/sched.h | 6 ++++-- + kernel/exit.c | 2 +- + kernel/fork.c | 12 +++++++++++- + 4 files changed, 17 insertions(+), 5 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -875,7 +875,7 @@ static int exec_mmap(struct mm_struct *m + /* Notify parent that we're no longer interested in the old VM */ + tsk = current; + old_mm = current->mm; +- mm_release(tsk, old_mm); ++ exec_mm_release(tsk, old_mm); + + if (old_mm) { + sync_mm_rss(old_mm); +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2647,8 +2647,10 @@ extern struct mm_struct *get_task_mm(str + * succeeds. + */ + extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); +-/* Remove the current tasks stale references to the old mm_struct */ +-extern void mm_release(struct task_struct *, struct mm_struct *); ++/* Remove the current tasks stale references to the old mm_struct on exit() */ ++extern void exit_mm_release(struct task_struct *, struct mm_struct *); ++/* Remove the current tasks stale references to the old mm_struct on exec() */ ++extern void exec_mm_release(struct task_struct *, struct mm_struct *); + + #ifdef CONFIG_HAVE_COPY_THREAD_TLS + extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -389,7 +389,7 @@ static void exit_mm(struct task_struct * + struct mm_struct *mm = tsk->mm; + struct core_state *core_state; + +- mm_release(tsk, mm); ++ exit_mm_release(tsk, mm); + if (!mm) + return; + sync_mm_rss(mm); +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -887,7 +887,7 @@ static int wait_for_vfork_done(struct ta + * restoring the old one. . . + * Eric Biederman 10 January 1998 + */ +-void mm_release(struct task_struct *tsk, struct mm_struct *mm) ++static void mm_release(struct task_struct *tsk, struct mm_struct *mm) + { + /* Get rid of any futexes when releasing the mm */ + futex_mm_release(tsk); +@@ -924,6 +924,16 @@ void mm_release(struct task_struct *tsk, + complete_vfork_done(tsk); + } + ++void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ mm_release(tsk, mm); ++} ++ ++void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ mm_release(tsk, mm); ++} ++ + /* + * Allocate a new mm structure and copy contents from the + * mm structure of the passed in task structure. diff --git a/queue-4.4/futex-add-mutex-around-futex-exit.patch b/queue-4.4/futex-add-mutex-around-futex-exit.patch new file mode 100644 index 00000000000..05070b0808a --- /dev/null +++ b/queue-4.4/futex-add-mutex-around-futex-exit.patch @@ -0,0 +1,87 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:12 +0000 +Subject: futex: Add mutex around futex exit +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-11-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 3f186d974826847a07bc7964d79ec4eded475ad9 upstream. + +The mutex will be used in subsequent changes to replace the busy looping of +a waiter when the futex owner is currently executing the exit cleanup to +prevent a potential live lock. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.845798895@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/futex.h | 1 + + include/linux/sched.h | 1 + + kernel/futex.c | 16 ++++++++++++++++ + 3 files changed, 18 insertions(+) + +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -70,6 +70,7 @@ static inline void futex_init_task(struc + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; + tsk->futex_state = FUTEX_STATE_OK; ++ mutex_init(&tsk->futex_exit_mutex); + } + + void futex_exit_recursive(struct task_struct *tsk); +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1704,6 +1704,7 @@ struct task_struct { + #endif + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; ++ struct mutex futex_exit_mutex; + unsigned int futex_state; + #endif + #ifdef CONFIG_PERF_EVENTS +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3271,12 +3271,23 @@ static void futex_cleanup(struct task_st + */ + void futex_exit_recursive(struct task_struct *tsk) + { ++ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ ++ if (tsk->futex_state == FUTEX_STATE_EXITING) ++ mutex_unlock(&tsk->futex_exit_mutex); + tsk->futex_state = FUTEX_STATE_DEAD; + } + + static void futex_cleanup_begin(struct task_struct *tsk) + { + /* ++ * Prevent various race issues against a concurrent incoming waiter ++ * including live locks by forcing the waiter to block on ++ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in ++ * attach_to_pi_owner(). ++ */ ++ mutex_lock(&tsk->futex_exit_mutex); ++ ++ /* + * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. + * + * This ensures that all subsequent checks of tsk->futex_state in +@@ -3299,6 +3310,11 @@ static void futex_cleanup_end(struct tas + * take another loop until it becomes visible. + */ + tsk->futex_state = state; ++ /* ++ * Drop the exit protection. This unblocks waiters which observed ++ * FUTEX_STATE_EXITING to reevaluate the state. ++ */ ++ mutex_unlock(&tsk->futex_exit_mutex); + } + + void futex_exec_release(struct task_struct *tsk) diff --git a/queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch b/queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch new file mode 100644 index 00000000000..8812f7a749f --- /dev/null +++ b/queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch @@ -0,0 +1,159 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:09 +0000 +Subject: futex: Mark the begin of futex exit explicitly +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-8-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 18f694385c4fd77a09851fd301236746ca83f3cb upstream. + +Instead of relying on PF_EXITING use an explicit state for the futex exit +and set it in the futex exit function. This moves the smp barrier and the +lock/unlock serialization into the futex code. + +As with the DEAD state this is restricted to the exit path as exec +continues to use the same task struct. + +This allows to simplify that logic in a next step. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/futex.h | 31 +++---------------------------- + kernel/exit.c | 8 +------- + kernel/futex.c | 37 ++++++++++++++++++++++++++++++++++++- + 3 files changed, 40 insertions(+), 36 deletions(-) + +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -57,6 +57,7 @@ union futex_key { + #ifdef CONFIG_FUTEX + enum { + FUTEX_STATE_OK, ++ FUTEX_STATE_EXITING, + FUTEX_STATE_DEAD, + }; + +@@ -71,33 +72,7 @@ static inline void futex_init_task(struc + tsk->futex_state = FUTEX_STATE_OK; + } + +-/** +- * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD +- * @tsk: task to set the state on +- * +- * Set the futex exit state of the task lockless. The futex waiter code +- * observes that state when a task is exiting and loops until the task has +- * actually finished the futex cleanup. The worst case for this is that the +- * waiter runs through the wait loop until the state becomes visible. +- * +- * This has two callers: +- * +- * - futex_mm_release() after the futex exit cleanup has been done +- * +- * - do_exit() from the recursive fault handling path. +- * +- * In case of a recursive fault this is best effort. Either the futex exit +- * code has run already or not. If the OWNER_DIED bit has been set on the +- * futex then the waiter can take it over. If not, the problem is pushed +- * back to user space. If the futex exit code did not run yet, then an +- * already queued waiter might block forever, but there is nothing which +- * can be done about that. +- */ +-static inline void futex_exit_done(struct task_struct *tsk) +-{ +- tsk->futex_state = FUTEX_STATE_DEAD; +-} +- ++void futex_exit_recursive(struct task_struct *tsk); + void futex_exit_release(struct task_struct *tsk); + void futex_exec_release(struct task_struct *tsk); + +@@ -105,7 +80,7 @@ long do_futex(u32 __user *uaddr, int op, + u32 __user *uaddr2, u32 val2, u32 val3); + #else + static inline void futex_init_task(struct task_struct *tsk) { } +-static inline void futex_exit_done(struct task_struct *tsk) { } ++static inline void futex_exit_recursive(struct task_struct *tsk) { } + static inline void futex_exit_release(struct task_struct *tsk) { } + static inline void futex_exec_release(struct task_struct *tsk) { } + #endif +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -695,18 +695,12 @@ void do_exit(long code) + */ + if (unlikely(tsk->flags & PF_EXITING)) { + pr_alert("Fixing recursive fault but reboot is needed!\n"); +- futex_exit_done(tsk); ++ futex_exit_recursive(tsk); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + } + + exit_signals(tsk); /* sets PF_EXITING */ +- /* +- * tsk->flags are checked in the futex code to protect against +- * an exiting task cleaning up the robust pi futexes. +- */ +- smp_mb(); +- raw_spin_unlock_wait(&tsk->pi_lock); + + if (unlikely(in_atomic())) { + pr_info("note: %s[%d] exited with preempt_count %d\n", +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3252,10 +3252,45 @@ void futex_exec_release(struct task_stru + exit_pi_state_list(tsk); + } + ++/** ++ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD ++ * @tsk: task to set the state on ++ * ++ * Set the futex exit state of the task lockless. The futex waiter code ++ * observes that state when a task is exiting and loops until the task has ++ * actually finished the futex cleanup. The worst case for this is that the ++ * waiter runs through the wait loop until the state becomes visible. ++ * ++ * This is called from the recursive fault handling path in do_exit(). ++ * ++ * This is best effort. Either the futex exit code has run already or ++ * not. If the OWNER_DIED bit has been set on the futex then the waiter can ++ * take it over. If not, the problem is pushed back to user space. If the ++ * futex exit code did not run yet, then an already queued waiter might ++ * block forever, but there is nothing which can be done about that. ++ */ ++void futex_exit_recursive(struct task_struct *tsk) ++{ ++ tsk->futex_state = FUTEX_STATE_DEAD; ++} ++ + void futex_exit_release(struct task_struct *tsk) + { ++ tsk->futex_state = FUTEX_STATE_EXITING; ++ /* ++ * Ensure that all new tsk->pi_lock acquisitions must observe ++ * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner(). ++ */ ++ smp_mb(); ++ /* ++ * Ensure that we must observe the pi_state in exit_pi_state_list(). ++ */ ++ raw_spin_lock_irq(&tsk->pi_lock); ++ raw_spin_unlock_irq(&tsk->pi_lock); ++ + futex_exec_release(tsk); +- futex_exit_done(tsk); ++ ++ tsk->futex_state = FUTEX_STATE_DEAD; + } + + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, diff --git a/queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch b/queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch new file mode 100644 index 00000000000..1093a2fa173 --- /dev/null +++ b/queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch @@ -0,0 +1,188 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:04 +0000 +Subject: futex: Move futex exit handling into futex code +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-3-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit ba31c1a48538992316cc71ce94fa9cd3e7b427c0 upstream. + +The futex exit handling is #ifdeffed into mm_release() which is not pretty +to begin with. But upcoming changes to address futex exit races need to add +more functionality to this exit code. + +Split it out into a function, move it into futex code and make the various +futex exit functions static. + +Preparatory only and no functional change. + +Folded build fix from Borislav. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.049705556@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/compat.h | 2 -- + include/linux/futex.h | 24 +++++++++++++++++------- + kernel/fork.c | 25 +++---------------------- + kernel/futex.c | 28 ++++++++++++++++++++++++++-- + 4 files changed, 46 insertions(+), 33 deletions(-) + +--- a/include/linux/compat.h ++++ b/include/linux/compat.h +@@ -306,8 +306,6 @@ struct compat_kexec_segment; + struct compat_mq_attr; + struct compat_msgbuf; + +-extern void compat_exit_robust_list(struct task_struct *curr); +- + asmlinkage long + compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len); +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -1,6 +1,8 @@ + #ifndef _LINUX_FUTEX_H + #define _LINUX_FUTEX_H + ++#include ++ + #include + + struct inode; +@@ -53,14 +55,22 @@ union futex_key { + #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } + + #ifdef CONFIG_FUTEX +-extern void exit_robust_list(struct task_struct *curr); +-extern void exit_pi_state_list(struct task_struct *curr); +-#else +-static inline void exit_robust_list(struct task_struct *curr) +-{ +-} +-static inline void exit_pi_state_list(struct task_struct *curr) ++static inline void futex_init_task(struct task_struct *tsk) + { ++ tsk->robust_list = NULL; ++#ifdef CONFIG_COMPAT ++ tsk->compat_robust_list = NULL; ++#endif ++ INIT_LIST_HEAD(&tsk->pi_state_list); ++ tsk->pi_state_cache = NULL; + } ++ ++void futex_mm_release(struct task_struct *tsk); ++ ++long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ++ u32 __user *uaddr2, u32 val2, u32 val3); ++#else ++static inline void futex_init_task(struct task_struct *tsk) { } ++static inline void futex_mm_release(struct task_struct *tsk) { } + #endif + #endif +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -890,20 +890,7 @@ static int wait_for_vfork_done(struct ta + void mm_release(struct task_struct *tsk, struct mm_struct *mm) + { + /* Get rid of any futexes when releasing the mm */ +-#ifdef CONFIG_FUTEX +- if (unlikely(tsk->robust_list)) { +- exit_robust_list(tsk); +- tsk->robust_list = NULL; +- } +-#ifdef CONFIG_COMPAT +- if (unlikely(tsk->compat_robust_list)) { +- compat_exit_robust_list(tsk); +- tsk->compat_robust_list = NULL; +- } +-#endif +- if (unlikely(!list_empty(&tsk->pi_state_list))) +- exit_pi_state_list(tsk); +-#endif ++ futex_mm_release(tsk); + + uprobe_free_utask(tsk); + +@@ -1511,14 +1498,8 @@ static struct task_struct *copy_process( + #ifdef CONFIG_BLOCK + p->plug = NULL; + #endif +-#ifdef CONFIG_FUTEX +- p->robust_list = NULL; +-#ifdef CONFIG_COMPAT +- p->compat_robust_list = NULL; +-#endif +- INIT_LIST_HEAD(&p->pi_state_list); +- p->pi_state_cache = NULL; +-#endif ++ futex_init_task(p); ++ + /* + * sigaltstack should be cleared when sharing the same VM + */ +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -331,6 +331,12 @@ static inline bool should_fail_futex(boo + } + #endif /* CONFIG_FAIL_FUTEX */ + ++#ifdef CONFIG_COMPAT ++static void compat_exit_robust_list(struct task_struct *curr); ++#else ++static inline void compat_exit_robust_list(struct task_struct *curr) { } ++#endif ++ + static inline void futex_get_mm(union futex_key *key) + { + atomic_inc(&key->private.mm->mm_count); +@@ -889,7 +895,7 @@ static struct task_struct * futex_find_g + * Kernel cleans up PI-state, but userspace is likely hosed. + * (Robust-futex cleanup is separate and might save the day for userspace.) + */ +-void exit_pi_state_list(struct task_struct *curr) ++static void exit_pi_state_list(struct task_struct *curr) + { + struct list_head *next, *head = &curr->pi_state_list; + struct futex_pi_state *pi_state; +@@ -3166,7 +3172,7 @@ static inline int fetch_robust_entry(str + * + * We silently return on any sign of list-walking problem. + */ +-void exit_robust_list(struct task_struct *curr) ++static void exit_robust_list(struct task_struct *curr) + { + struct robust_list_head __user *head = curr->robust_list; + struct robust_list __user *entry, *next_entry, *pending; +@@ -3229,6 +3235,24 @@ void exit_robust_list(struct task_struct + curr, pip); + } + ++void futex_mm_release(struct task_struct *tsk) ++{ ++ if (unlikely(tsk->robust_list)) { ++ exit_robust_list(tsk); ++ tsk->robust_list = NULL; ++ } ++ ++#ifdef CONFIG_COMPAT ++ if (unlikely(tsk->compat_robust_list)) { ++ compat_exit_robust_list(tsk); ++ tsk->compat_robust_list = NULL; ++ } ++#endif ++ ++ if (unlikely(!list_empty(&tsk->pi_state_list))) ++ exit_pi_state_list(tsk); ++} ++ + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3) + { diff --git a/queue-4.4/futex-prevent-exit-livelock.patch b/queue-4.4/futex-prevent-exit-livelock.patch new file mode 100644 index 00000000000..e61ef84cd7a --- /dev/null +++ b/queue-4.4/futex-prevent-exit-livelock.patch @@ -0,0 +1,345 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:14 +0000 +Subject: futex: Prevent exit livelock +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Oleg Nesterov , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-13-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream. + +Oleg provided the following test case: + +int main(void) +{ + struct sched_param sp = {}; + + sp.sched_priority = 2; + assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); + + int lock = vfork(); + if (!lock) { + sp.sched_priority = 1; + assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); + _exit(0); + } + + syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0); + return 0; +} + +This creates an unkillable RT process spinning in futex_lock_pi() on a UP +machine or if the process is affine to a single CPU. The reason is: + + parent child + + set FIFO prio 2 + + vfork() -> set FIFO prio 1 + implies wait_for_child() sched_setscheduler(...) + exit() + do_exit() + .... + mm_release() + tsk->futex_state = FUTEX_STATE_EXITING; + exit_futex(); (NOOP in this case) + complete() --> wakes parent + sys_futex() + loop infinite because + tsk->futex_state == FUTEX_STATE_EXITING + +The same problem can happen just by regular preemption as well: + + task holds futex + ... + do_exit() + tsk->futex_state = FUTEX_STATE_EXITING; + + --> preemption (unrelated wakeup of some other higher prio task, e.g. timer) + + switch_to(other_task) + + return to user + sys_futex() + loop infinite as above + +Just for the fun of it the futex exit cleanup could trigger the wakeup +itself before the task sets its futex state to DEAD. + +To cure this, the handling of the exiting owner is changed so: + + - A refcount is held on the task + + - The task pointer is stored in a caller visible location + + - The caller drops all locks (hash bucket, mmap_sem) and blocks + on task::futex_exit_mutex. When the mutex is acquired then + the exiting task has completed the cleanup and the state + is consistent and can be reevaluated. + +This is not a pretty solution, but there is no choice other than returning +an error code to user space, which would break the state consistency +guarantee and open another can of problems including regressions. + +For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538 +are required as well, but for anything older than 5.3.y the backports are +going to be provided when this hits mainline as the other dependencies for +those kernels are definitely not stable material. + +Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems") +Reported-by: Oleg Nesterov +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Cc: Stable Team +Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 91 insertions(+), 15 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1067,12 +1067,43 @@ out_state: + return 0; + } + ++/** ++ * wait_for_owner_exiting - Block until the owner has exited ++ * @exiting: Pointer to the exiting task ++ * ++ * Caller must hold a refcount on @exiting. ++ */ ++static void wait_for_owner_exiting(int ret, struct task_struct *exiting) ++{ ++ if (ret != -EBUSY) { ++ WARN_ON_ONCE(exiting); ++ return; ++ } ++ ++ if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) ++ return; ++ ++ mutex_lock(&exiting->futex_exit_mutex); ++ /* ++ * No point in doing state checking here. If the waiter got here ++ * while the task was in exec()->exec_futex_release() then it can ++ * have any FUTEX_STATE_* value when the waiter has acquired the ++ * mutex. OK, if running, EXITING or DEAD if it reached exit() ++ * already. Highly unlikely and not a problem. Just one more round ++ * through the futex maze. ++ */ ++ mutex_unlock(&exiting->futex_exit_mutex); ++ ++ put_task_struct(exiting); ++} ++ + /* + * Lookup the task for the TID provided from user space and attach to + * it after doing proper sanity checks. + */ + static int attach_to_pi_owner(u32 uval, union futex_key *key, +- struct futex_pi_state **ps) ++ struct futex_pi_state **ps, ++ struct task_struct **exiting) + { + pid_t pid = uval & FUTEX_TID_MASK; + struct futex_pi_state *pi_state; +@@ -1108,7 +1139,19 @@ static int attach_to_pi_owner(u32 uval, + int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN; + + raw_spin_unlock_irq(&p->pi_lock); +- put_task_struct(p); ++ /* ++ * If the owner task is between FUTEX_STATE_EXITING and ++ * FUTEX_STATE_DEAD then store the task pointer and keep ++ * the reference on the task struct. The calling code will ++ * drop all locks, wait for the task to reach ++ * FUTEX_STATE_DEAD and then drop the refcount. This is ++ * required to prevent a live lock when the current task ++ * preempted the exiting task between the two states. ++ */ ++ if (ret == -EBUSY) ++ *exiting = p; ++ else ++ put_task_struct(p); + return ret; + } + +@@ -1139,7 +1182,8 @@ static int attach_to_pi_owner(u32 uval, + } + + static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, +- union futex_key *key, struct futex_pi_state **ps) ++ union futex_key *key, struct futex_pi_state **ps, ++ struct task_struct **exiting) + { + struct futex_q *match = futex_top_waiter(hb, key); + +@@ -1154,7 +1198,7 @@ static int lookup_pi_state(u32 uval, str + * We are the first waiter - try to look up the owner based on + * @uval and attach to it. + */ +- return attach_to_pi_owner(uval, key, ps); ++ return attach_to_pi_owner(uval, key, ps, exiting); + } + + static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) +@@ -1180,6 +1224,8 @@ static int lock_pi_update_atomic(u32 __u + * lookup + * @task: the task to perform the atomic lock work for. This will + * be "current" except in the case of requeue pi. ++ * @exiting: Pointer to store the task pointer of the owner task ++ * which is in the middle of exiting + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) + * + * Return: +@@ -1188,11 +1234,17 @@ static int lock_pi_update_atomic(u32 __u + * <0 - error + * + * The hb->lock and futex_key refs shall be held by the caller. ++ * ++ * @exiting is only set when the return value is -EBUSY. If so, this holds ++ * a refcount on the exiting task on return and the caller needs to drop it ++ * after waiting for the exit to complete. + */ + static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, + union futex_key *key, + struct futex_pi_state **ps, +- struct task_struct *task, int set_waiters) ++ struct task_struct *task, ++ struct task_struct **exiting, ++ int set_waiters) + { + u32 uval, newval, vpid = task_pid_vnr(task); + struct futex_q *match; +@@ -1262,7 +1314,7 @@ static int futex_lock_pi_atomic(u32 __us + * attach to the owner. If that fails, no harm done, we only + * set the FUTEX_WAITERS bit in the user space variable. + */ +- return attach_to_pi_owner(uval, key, ps); ++ return attach_to_pi_owner(uval, key, ps, exiting); + } + + /** +@@ -1688,6 +1740,8 @@ void requeue_pi_wake_futex(struct futex_ + * @key1: the from futex key + * @key2: the to futex key + * @ps: address to store the pi_state pointer ++ * @exiting: Pointer to store the task pointer of the owner task ++ * which is in the middle of exiting + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) + * + * Try and get the lock on behalf of the top waiter if we can do it atomically. +@@ -1695,16 +1749,20 @@ void requeue_pi_wake_futex(struct futex_ + * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. + * hb1 and hb2 must be held by the caller. + * ++ * @exiting is only set when the return value is -EBUSY. If so, this holds ++ * a refcount on the exiting task on return and the caller needs to drop it ++ * after waiting for the exit to complete. ++ * + * Return: + * 0 - failed to acquire the lock atomically; + * >0 - acquired the lock, return value is vpid of the top_waiter + * <0 - error + */ +-static int futex_proxy_trylock_atomic(u32 __user *pifutex, +- struct futex_hash_bucket *hb1, +- struct futex_hash_bucket *hb2, +- union futex_key *key1, union futex_key *key2, +- struct futex_pi_state **ps, int set_waiters) ++static int ++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, ++ struct futex_hash_bucket *hb2, union futex_key *key1, ++ union futex_key *key2, struct futex_pi_state **ps, ++ struct task_struct **exiting, int set_waiters) + { + struct futex_q *top_waiter = NULL; + u32 curval; +@@ -1741,7 +1799,7 @@ static int futex_proxy_trylock_atomic(u3 + */ + vpid = task_pid_vnr(top_waiter->task); + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, +- set_waiters); ++ exiting, set_waiters); + if (ret == 1) { + requeue_pi_wake_futex(top_waiter, key2, hb2); + return vpid; +@@ -1861,6 +1919,8 @@ retry_private: + } + + if (requeue_pi && (task_count - nr_wake < nr_requeue)) { ++ struct task_struct *exiting = NULL; ++ + /* + * Attempt to acquire uaddr2 and wake the top waiter. If we + * intend to requeue waiters, force setting the FUTEX_WAITERS +@@ -1868,7 +1928,8 @@ retry_private: + * faults rather in the requeue loop below. + */ + ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, +- &key2, &pi_state, nr_requeue); ++ &key2, &pi_state, ++ &exiting, nr_requeue); + + /* + * At this point the top_waiter has either taken uaddr2 or is +@@ -1892,7 +1953,8 @@ retry_private: + * rereading and handing potential crap to + * lookup_pi_state. + */ +- ret = lookup_pi_state(ret, hb2, &key2, &pi_state); ++ ret = lookup_pi_state(ret, hb2, &key2, ++ &pi_state, &exiting); + } + + switch (ret) { +@@ -1923,6 +1985,12 @@ retry_private: + hb_waiters_dec(hb2); + put_futex_key(&key2); + put_futex_key(&key1); ++ /* ++ * Handle the case where the owner is in the middle of ++ * exiting. Wait for the exit to complete otherwise ++ * this task might loop forever, aka. live lock. ++ */ ++ wait_for_owner_exiting(ret, exiting); + cond_resched(); + goto retry; + default: +@@ -2545,6 +2613,7 @@ static int futex_lock_pi(u32 __user *uad + ktime_t *time, int trylock) + { + struct hrtimer_sleeper timeout, *to = NULL; ++ struct task_struct *exiting = NULL; + struct futex_hash_bucket *hb; + struct futex_q q = futex_q_init; + int res, ret; +@@ -2568,7 +2637,8 @@ retry: + retry_private: + hb = queue_lock(&q); + +- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); ++ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, ++ &exiting, 0); + if (unlikely(ret)) { + /* + * Atomic work succeeded and we got the lock, +@@ -2591,6 +2661,12 @@ retry_private: + */ + queue_unlock(hb); + put_futex_key(&q.key); ++ /* ++ * Handle the case where the owner is in the middle of ++ * exiting. Wait for the exit to complete otherwise ++ * this task might loop forever, aka. live lock. ++ */ ++ wait_for_owner_exiting(ret, exiting); + cond_resched(); + goto retry; + default: diff --git a/queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch b/queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch new file mode 100644 index 00000000000..ba0d564c195 --- /dev/null +++ b/queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch @@ -0,0 +1,71 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:13 +0000 +Subject: futex: Provide distinct return value when owner is exiting +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-12-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream. + +attach_to_pi_owner() returns -EAGAIN for various cases: + + - Owner task is exiting + - Futex value has changed + +The caller drops the held locks (hash bucket, mmap_sem) and retries the +operation. In case of the owner task exiting this can result in a live +lock. + +As a preparatory step for seperating those cases, provide a distinct return +value (EBUSY) for the owner exiting case. + +No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.935606117@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1909,12 +1909,13 @@ retry_private: + if (!ret) + goto retry; + goto out; ++ case -EBUSY: + case -EAGAIN: + /* + * Two reasons for this: +- * - Owner is exiting and we just wait for the ++ * - EBUSY: Owner is exiting and we just wait for the + * exit to complete. +- * - The user space value changed. ++ * - EAGAIN: The user space value changed. + */ + free_pi_state(pi_state); + pi_state = NULL; +@@ -2580,12 +2581,13 @@ retry_private: + goto out_unlock_put_key; + case -EFAULT: + goto uaddr_faulted; ++ case -EBUSY: + case -EAGAIN: + /* + * Two reasons for this: +- * - Task is exiting and we just wait for the ++ * - EBUSY: Task is exiting and we just wait for the + * exit to complete. +- * - The user space value changed. ++ * - EAGAIN: The user space value changed. + */ + queue_unlock(hb); + put_futex_key(&q.key); diff --git a/queue-4.4/futex-provide-state-handling-for-exec-as-well.patch b/queue-4.4/futex-provide-state-handling-for-exec-as-well.patch new file mode 100644 index 00000000000..3dd692ce150 --- /dev/null +++ b/queue-4.4/futex-provide-state-handling-for-exec-as-well.patch @@ -0,0 +1,102 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:11 +0000 +Subject: futex: Provide state handling for exec() as well +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-10-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit af8cbda2cfcaa5515d61ec500498d46e9a8247e2 upstream. + +exec() attempts to handle potentially held futexes gracefully by running +the futex exit handling code like exit() does. + +The current implementation has no protection against concurrent incoming +waiters. The reason is that the futex state cannot be set to +FUTEX_STATE_DEAD after the cleanup because the task struct is still active +and just about to execute the new binary. + +While its arguably buggy when a task holds a futex over exec(), for +consistency sake the state handling can at least cover the actual futex +exit cleanup section. This provides state consistency protection accross +the cleanup. As the futex state of the task becomes FUTEX_STATE_OK after the +cleanup has been finished, this cannot prevent subsequent attempts to +attach to the task in case that the cleanup was not successfull in mopping +up all leftovers. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.753355618@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 34 insertions(+), 4 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3234,7 +3234,7 @@ static void exit_robust_list(struct task + curr, pip); + } + +-void futex_exec_release(struct task_struct *tsk) ++static void futex_cleanup(struct task_struct *tsk) + { + if (unlikely(tsk->robust_list)) { + exit_robust_list(tsk); +@@ -3274,7 +3274,7 @@ void futex_exit_recursive(struct task_st + tsk->futex_state = FUTEX_STATE_DEAD; + } + +-void futex_exit_release(struct task_struct *tsk) ++static void futex_cleanup_begin(struct task_struct *tsk) + { + /* + * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. +@@ -3290,10 +3290,40 @@ void futex_exit_release(struct task_stru + raw_spin_lock_irq(&tsk->pi_lock); + tsk->futex_state = FUTEX_STATE_EXITING; + raw_spin_unlock_irq(&tsk->pi_lock); ++} + +- futex_exec_release(tsk); ++static void futex_cleanup_end(struct task_struct *tsk, int state) ++{ ++ /* ++ * Lockless store. The only side effect is that an observer might ++ * take another loop until it becomes visible. ++ */ ++ tsk->futex_state = state; ++} + +- tsk->futex_state = FUTEX_STATE_DEAD; ++void futex_exec_release(struct task_struct *tsk) ++{ ++ /* ++ * The state handling is done for consistency, but in the case of ++ * exec() there is no way to prevent futher damage as the PID stays ++ * the same. But for the unlikely and arguably buggy case that a ++ * futex is held on exec(), this provides at least as much state ++ * consistency protection which is possible. ++ */ ++ futex_cleanup_begin(tsk); ++ futex_cleanup(tsk); ++ /* ++ * Reset the state to FUTEX_STATE_OK. The task is alive and about ++ * exec a new binary. ++ */ ++ futex_cleanup_end(tsk, FUTEX_STATE_OK); ++} ++ ++void futex_exit_release(struct task_struct *tsk) ++{ ++ futex_cleanup_begin(tsk); ++ futex_cleanup(tsk); ++ futex_cleanup_end(tsk, FUTEX_STATE_DEAD); + } + + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, diff --git a/queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch b/queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch new file mode 100644 index 00000000000..9494d65deab --- /dev/null +++ b/queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch @@ -0,0 +1,175 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:05 +0000 +Subject: futex: Replace PF_EXITPIDONE with a state +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-4-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream. + +The futex exit handling relies on PF_ flags. That's suboptimal as it +requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in +the middle of do_exit() to enforce the observability of PF_EXITING in the +futex code. + +Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic +over to the new state. The PF_EXITING dependency will be cleaned up in a +later step. + +This prepares for handling various futex exit issues later. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/futex.h | 34 ++++++++++++++++++++++++++++++++++ + include/linux/sched.h | 2 +- + kernel/exit.c | 18 ++---------------- + kernel/futex.c | 17 ++++++++--------- + 4 files changed, 45 insertions(+), 26 deletions(-) + +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -55,6 +55,11 @@ union futex_key { + #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } + + #ifdef CONFIG_FUTEX ++enum { ++ FUTEX_STATE_OK, ++ FUTEX_STATE_DEAD, ++}; ++ + static inline void futex_init_task(struct task_struct *tsk) + { + tsk->robust_list = NULL; +@@ -63,6 +68,34 @@ static inline void futex_init_task(struc + #endif + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; ++ tsk->futex_state = FUTEX_STATE_OK; ++} ++ ++/** ++ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD ++ * @tsk: task to set the state on ++ * ++ * Set the futex exit state of the task lockless. The futex waiter code ++ * observes that state when a task is exiting and loops until the task has ++ * actually finished the futex cleanup. The worst case for this is that the ++ * waiter runs through the wait loop until the state becomes visible. ++ * ++ * This has two callers: ++ * ++ * - futex_mm_release() after the futex exit cleanup has been done ++ * ++ * - do_exit() from the recursive fault handling path. ++ * ++ * In case of a recursive fault this is best effort. Either the futex exit ++ * code has run already or not. If the OWNER_DIED bit has been set on the ++ * futex then the waiter can take it over. If not, the problem is pushed ++ * back to user space. If the futex exit code did not run yet, then an ++ * already queued waiter might block forever, but there is nothing which ++ * can be done about that. ++ */ ++static inline void futex_exit_done(struct task_struct *tsk) ++{ ++ tsk->futex_state = FUTEX_STATE_DEAD; + } + + void futex_mm_release(struct task_struct *tsk); +@@ -72,5 +105,6 @@ long do_futex(u32 __user *uaddr, int op, + #else + static inline void futex_init_task(struct task_struct *tsk) { } + static inline void futex_mm_release(struct task_struct *tsk) { } ++static inline void futex_exit_done(struct task_struct *tsk) { } + #endif + #endif +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1704,6 +1704,7 @@ struct task_struct { + #endif + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; ++ unsigned int futex_state; + #endif + #ifdef CONFIG_PERF_EVENTS + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; +@@ -2099,7 +2100,6 @@ extern void thread_group_cputime_adjuste + * Per process flags + */ + #define PF_EXITING 0x00000004 /* getting shut down */ +-#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ + #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ + #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ + #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -695,16 +695,7 @@ void do_exit(long code) + */ + if (unlikely(tsk->flags & PF_EXITING)) { + pr_alert("Fixing recursive fault but reboot is needed!\n"); +- /* +- * We can do this unlocked here. The futex code uses +- * this flag just to verify whether the pi state +- * cleanup has been done or not. In the worst case it +- * loops once more. We pretend that the cleanup was +- * done as there is no way to return. Either the +- * OWNER_DIED bit is set by now or we push the blocked +- * task into the wait for ever nirwana as well. +- */ +- tsk->flags |= PF_EXITPIDONE; ++ futex_exit_done(tsk); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + } +@@ -793,12 +784,7 @@ void do_exit(long code) + * Make sure we are holding no locks: + */ + debug_check_no_locks_held(); +- /* +- * We can do this unlocked here. The futex code uses this flag +- * just to verify whether the pi state cleanup has been done +- * or not. In the worst case it loops once more. +- */ +- tsk->flags |= PF_EXITPIDONE; ++ futex_exit_done(tsk); + + if (tsk->io_context) + exit_io_context(tsk); +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1094,19 +1094,18 @@ static int attach_to_pi_owner(u32 uval, + } + + /* +- * We need to look at the task state flags to figure out, +- * whether the task is exiting. To protect against the do_exit +- * change of the task flags, we do this protected by +- * p->pi_lock: ++ * We need to look at the task state to figure out, whether the ++ * task is exiting. To protect against the change of the task state ++ * in futex_exit_release(), we do this protected by p->pi_lock: + */ + raw_spin_lock_irq(&p->pi_lock); +- if (unlikely(p->flags & PF_EXITING)) { ++ if (unlikely(p->futex_state != FUTEX_STATE_OK)) { + /* +- * The task is on the way out. When PF_EXITPIDONE is +- * set, we know that the task has finished the +- * cleanup: ++ * The task is on the way out. When the futex state is ++ * FUTEX_STATE_DEAD, we know that the task has finished ++ * the cleanup: + */ +- int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; ++ int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN; + + raw_spin_unlock_irq(&p->pi_lock); + put_task_struct(p); diff --git a/queue-4.4/futex-sanitize-exit-state-handling.patch b/queue-4.4/futex-sanitize-exit-state-handling.patch new file mode 100644 index 00000000000..8c8b42d9870 --- /dev/null +++ b/queue-4.4/futex-sanitize-exit-state-handling.patch @@ -0,0 +1,55 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:10 +0000 +Subject: futex: Sanitize exit state handling +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-9-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 4a8e991b91aca9e20705d434677ac013974e0e30 upstream. + +Instead of having a smp_mb() and an empty lock/unlock of task::pi_lock move +the state setting into to the lock section. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.645603214@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3276,16 +3276,19 @@ void futex_exit_recursive(struct task_st + + void futex_exit_release(struct task_struct *tsk) + { +- tsk->futex_state = FUTEX_STATE_EXITING; +- /* +- * Ensure that all new tsk->pi_lock acquisitions must observe +- * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner(). +- */ +- smp_mb(); + /* +- * Ensure that we must observe the pi_state in exit_pi_state_list(). ++ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. ++ * ++ * This ensures that all subsequent checks of tsk->futex_state in ++ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with ++ * tsk->pi_lock held. ++ * ++ * It guarantees also that a pi_state which was queued right before ++ * the state change under tsk->pi_lock by a concurrent waiter must ++ * be observed in exit_pi_state_list(). + */ + raw_spin_lock_irq(&tsk->pi_lock); ++ tsk->futex_state = FUTEX_STATE_EXITING; + raw_spin_unlock_irq(&tsk->pi_lock); + + futex_exec_release(tsk); diff --git a/queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch b/queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch new file mode 100644 index 00000000000..cb37cbd4f76 --- /dev/null +++ b/queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch @@ -0,0 +1,51 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:08 +0000 +Subject: futex: Set task::futex_state to DEAD right after handling futex exit +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-7-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit f24f22435dcc11389acc87e5586239c1819d217c upstream. + +Setting task::futex_state in do_exit() is rather arbitrarily placed for no +reason. Move it into the futex code. + +Note, this is only done for the exit cleanup as the exec cleanup cannot set +the state to FUTEX_STATE_DEAD because the task struct is still in active +use. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.439511191@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/exit.c | 1 - + kernel/futex.c | 1 + + 2 files changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -784,7 +784,6 @@ void do_exit(long code) + * Make sure we are holding no locks: + */ + debug_check_no_locks_held(); +- futex_exit_done(tsk); + + if (tsk->io_context) + exit_io_context(tsk); +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3255,6 +3255,7 @@ void futex_exec_release(struct task_stru + void futex_exit_release(struct task_struct *tsk) + { + futex_exec_release(tsk); ++ futex_exit_done(tsk); + } + + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, diff --git a/queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch b/queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch new file mode 100644 index 00000000000..bb6b2f3847e --- /dev/null +++ b/queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch @@ -0,0 +1,100 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:07 +0000 +Subject: futex: Split futex_mm_release() for exit/exec +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-6-lee.jones@linaro.org> + +From: Thomas Gleixner + +commit 150d71584b12809144b8145b817e83b81158ae5f upstream. + +To allow separate handling of the futex exit state in the futex exit code +for exit and exec, split futex_mm_release() into two functions and invoke +them from the corresponding exit/exec_mm_release() callsites. + +Preparatory only, no functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20191106224556.332094221@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/futex.h | 6 ++++-- + kernel/fork.c | 5 ++--- + kernel/futex.c | 7 ++++++- + 3 files changed, 12 insertions(+), 6 deletions(-) + +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -98,13 +98,15 @@ static inline void futex_exit_done(struc + tsk->futex_state = FUTEX_STATE_DEAD; + } + +-void futex_mm_release(struct task_struct *tsk); ++void futex_exit_release(struct task_struct *tsk); ++void futex_exec_release(struct task_struct *tsk); + + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); + #else + static inline void futex_init_task(struct task_struct *tsk) { } +-static inline void futex_mm_release(struct task_struct *tsk) { } + static inline void futex_exit_done(struct task_struct *tsk) { } ++static inline void futex_exit_release(struct task_struct *tsk) { } ++static inline void futex_exec_release(struct task_struct *tsk) { } + #endif + #endif +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -889,9 +889,6 @@ static int wait_for_vfork_done(struct ta + */ + static void mm_release(struct task_struct *tsk, struct mm_struct *mm) + { +- /* Get rid of any futexes when releasing the mm */ +- futex_mm_release(tsk); +- + uprobe_free_utask(tsk); + + /* Get rid of any cached register state */ +@@ -926,11 +923,13 @@ static void mm_release(struct task_struc + + void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) + { ++ futex_exit_release(tsk); + mm_release(tsk, mm); + } + + void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) + { ++ futex_exec_release(tsk); + mm_release(tsk, mm); + } + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3234,7 +3234,7 @@ static void exit_robust_list(struct task + curr, pip); + } + +-void futex_mm_release(struct task_struct *tsk) ++void futex_exec_release(struct task_struct *tsk) + { + if (unlikely(tsk->robust_list)) { + exit_robust_list(tsk); +@@ -3252,6 +3252,11 @@ void futex_mm_release(struct task_struct + exit_pi_state_list(tsk); + } + ++void futex_exit_release(struct task_struct *tsk) ++{ ++ futex_exec_release(tsk); ++} ++ + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3) + { diff --git a/queue-4.4/series b/queue-4.4/series index 61f87165588..0a4d0bae019 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -4,3 +4,15 @@ net-usb-qmi_wwan-added-support-for-thales-cinterion-plsx3-modem-family.patch kvm-x86-pmu-fix-hw_ref_cpu_cycles-event-pseudo-encoding-in-intel_arch_events.patch mt7601u-fix-kernel-crash-unplugging-the-device.patch mt7601u-fix-rx-buffer-refcounting.patch +y2038-futex-move-compat-implementation-into-futex.c.patch +futex-move-futex-exit-handling-into-futex-code.patch +futex-replace-pf_exitpidone-with-a-state.patch +exit-exec-seperate-mm_release.patch +futex-split-futex_mm_release-for-exit-exec.patch +futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch +futex-mark-the-begin-of-futex-exit-explicitly.patch +futex-sanitize-exit-state-handling.patch +futex-provide-state-handling-for-exec-as-well.patch +futex-add-mutex-around-futex-exit.patch +futex-provide-distinct-return-value-when-owner-is-exiting.patch +futex-prevent-exit-livelock.patch diff --git a/queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch b/queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch new file mode 100644 index 00000000000..4b1d62c54fb --- /dev/null +++ b/queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch @@ -0,0 +1,506 @@ +From foo@baz Mon Feb 1 04:21:37 PM CET 2021 +From: Lee Jones +Date: Mon, 1 Feb 2021 15:12:03 +0000 +Subject: y2038: futex: Move compat implementation into futex.c +To: stable@vger.kernel.org +Cc: Arnd Bergmann , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210201151214.2193508-2-lee.jones@linaro.org> + +From: Arnd Bergmann + +commit 04e7712f4460585e5eed5b853fd8b82a9943958f upstream. + +We are going to share the compat_sys_futex() handler between 64-bit +architectures and 32-bit architectures that need to deal with both 32-bit +and 64-bit time_t, and this is easier if both entry points are in the +same file. + +In fact, most other system call handlers do the same thing these days, so +let's follow the trend here and merge all of futex_compat.c into futex.c. + +In the process, a few minor changes have to be done to make sure everything +still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become +local symbol, and the compat version of the fetch_robust_entry() function +gets renamed to compat_fetch_robust_entry() to avoid a symbol clash. + +This is intended as a purely cosmetic patch, no behavior should +change. + +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +[Lee: Back-ported to satisfy a build dependency] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/futex.h | 8 - + kernel/Makefile | 3 + kernel/futex.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++- + kernel/futex_compat.c | 201 -------------------------------------------------- + 4 files changed, 192 insertions(+), 215 deletions(-) + delete mode 100644 kernel/futex_compat.c + +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -11,9 +11,6 @@ union ktime; + long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); + +-extern int +-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); +- + /* + * Futexes are matched on equal values of this key. + * The key type depends on whether it's a shared or private mapping. +@@ -58,11 +55,6 @@ union futex_key { + #ifdef CONFIG_FUTEX + extern void exit_robust_list(struct task_struct *curr); + extern void exit_pi_state_list(struct task_struct *curr); +-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +-#define futex_cmpxchg_enabled 1 +-#else +-extern int futex_cmpxchg_enabled; +-#endif + #else + static inline void exit_robust_list(struct task_struct *curr) + { +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -36,9 +36,6 @@ obj-$(CONFIG_PROFILING) += profile.o + obj-$(CONFIG_STACKTRACE) += stacktrace.o + obj-y += time/ + obj-$(CONFIG_FUTEX) += futex.o +-ifeq ($(CONFIG_COMPAT),y) +-obj-$(CONFIG_FUTEX) += futex_compat.o +-endif + obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o + obj-$(CONFIG_SMP) += smp.o + ifneq ($(CONFIG_SMP),y) +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -44,6 +44,7 @@ + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ ++#include + #include + #include + #include +@@ -171,8 +172,10 @@ + * double_lock_hb() and double_unlock_hb(), respectively. + */ + +-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG +-int __read_mostly futex_cmpxchg_enabled; ++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG ++#define futex_cmpxchg_enabled 1 ++#else ++static int __read_mostly futex_cmpxchg_enabled; + #endif + + /* +@@ -3088,7 +3091,7 @@ err_unlock: + * Process a futex-list entry, check whether it's owned by the + * dying task, and do notification if so: + */ +-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) ++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) + { + u32 uval, uninitialized_var(nval), mval; + +@@ -3318,6 +3321,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + ++#ifdef CONFIG_COMPAT ++/* ++ * Fetch a robust-list pointer. Bit 0 signals PI futexes: ++ */ ++static inline int ++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, ++ compat_uptr_t __user *head, unsigned int *pi) ++{ ++ if (get_user(*uentry, head)) ++ return -EFAULT; ++ ++ *entry = compat_ptr((*uentry) & ~1); ++ *pi = (unsigned int)(*uentry) & 1; ++ ++ return 0; ++} ++ ++static void __user *futex_uaddr(struct robust_list __user *entry, ++ compat_long_t futex_offset) ++{ ++ compat_uptr_t base = ptr_to_compat(entry); ++ void __user *uaddr = compat_ptr(base + futex_offset); ++ ++ return uaddr; ++} ++ ++/* ++ * Walk curr->robust_list (very carefully, it's a userspace list!) ++ * and mark any locks found there dead, and notify any waiters. ++ * ++ * We silently return on any sign of list-walking problem. ++ */ ++void compat_exit_robust_list(struct task_struct *curr) ++{ ++ struct compat_robust_list_head __user *head = curr->compat_robust_list; ++ struct robust_list __user *entry, *next_entry, *pending; ++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; ++ unsigned int uninitialized_var(next_pi); ++ compat_uptr_t uentry, next_uentry, upending; ++ compat_long_t futex_offset; ++ int rc; ++ ++ if (!futex_cmpxchg_enabled) ++ return; ++ ++ /* ++ * Fetch the list head (which was registered earlier, via ++ * sys_set_robust_list()): ++ */ ++ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) ++ return; ++ /* ++ * Fetch the relative futex offset: ++ */ ++ if (get_user(futex_offset, &head->futex_offset)) ++ return; ++ /* ++ * Fetch any possibly pending lock-add first, and handle it ++ * if it exists: ++ */ ++ if (compat_fetch_robust_entry(&upending, &pending, ++ &head->list_op_pending, &pip)) ++ return; ++ ++ next_entry = NULL; /* avoid warning with gcc */ ++ while (entry != (struct robust_list __user *) &head->list) { ++ /* ++ * Fetch the next entry in the list before calling ++ * handle_futex_death: ++ */ ++ rc = compat_fetch_robust_entry(&next_uentry, &next_entry, ++ (compat_uptr_t __user *)&entry->next, &next_pi); ++ /* ++ * A pending lock might already be on the list, so ++ * dont process it twice: ++ */ ++ if (entry != pending) { ++ void __user *uaddr = futex_uaddr(entry, futex_offset); ++ ++ if (handle_futex_death(uaddr, curr, pi)) ++ return; ++ } ++ if (rc) ++ return; ++ uentry = next_uentry; ++ entry = next_entry; ++ pi = next_pi; ++ /* ++ * Avoid excessively long or circular lists: ++ */ ++ if (!--limit) ++ break; ++ ++ cond_resched(); ++ } ++ if (pending) { ++ void __user *uaddr = futex_uaddr(pending, futex_offset); ++ ++ handle_futex_death(uaddr, curr, pip); ++ } ++} ++ ++COMPAT_SYSCALL_DEFINE2(set_robust_list, ++ struct compat_robust_list_head __user *, head, ++ compat_size_t, len) ++{ ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ ++ if (unlikely(len != sizeof(*head))) ++ return -EINVAL; ++ ++ current->compat_robust_list = head; ++ ++ return 0; ++} ++ ++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, ++ compat_uptr_t __user *, head_ptr, ++ compat_size_t __user *, len_ptr) ++{ ++ struct compat_robust_list_head __user *head; ++ unsigned long ret; ++ struct task_struct *p; ++ ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ ++ rcu_read_lock(); ++ ++ ret = -ESRCH; ++ if (!pid) ++ p = current; ++ else { ++ p = find_task_by_vpid(pid); ++ if (!p) ++ goto err_unlock; ++ } ++ ++ ret = -EPERM; ++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) ++ goto err_unlock; ++ ++ head = p->compat_robust_list; ++ rcu_read_unlock(); ++ ++ if (put_user(sizeof(*head), len_ptr)) ++ return -EFAULT; ++ return put_user(ptr_to_compat(head), head_ptr); ++ ++err_unlock: ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, ++ struct compat_timespec __user *, utime, u32 __user *, uaddr2, ++ u32, val3) ++{ ++ struct timespec ts; ++ ktime_t t, *tp = NULL; ++ int val2 = 0; ++ int cmd = op & FUTEX_CMD_MASK; ++ ++ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || ++ cmd == FUTEX_WAIT_BITSET || ++ cmd == FUTEX_WAIT_REQUEUE_PI)) { ++ if (compat_get_timespec(&ts, utime)) ++ return -EFAULT; ++ if (!timespec_valid(&ts)) ++ return -EINVAL; ++ ++ t = timespec_to_ktime(ts); ++ if (cmd == FUTEX_WAIT) ++ t = ktime_add_safe(ktime_get(), t); ++ tp = &t; ++ } ++ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || ++ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) ++ val2 = (int) (unsigned long) utime; ++ ++ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); ++} ++#endif /* CONFIG_COMPAT */ ++ + static void __init futex_detect_cmpxchg(void) + { + #ifndef CONFIG_HAVE_FUTEX_CMPXCHG +--- a/kernel/futex_compat.c ++++ /dev/null +@@ -1,201 +0,0 @@ +-/* +- * linux/kernel/futex_compat.c +- * +- * Futex compatibililty routines. +- * +- * Copyright 2006, Red Hat, Inc., Ingo Molnar +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +- +-/* +- * Fetch a robust-list pointer. Bit 0 signals PI futexes: +- */ +-static inline int +-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, +- compat_uptr_t __user *head, unsigned int *pi) +-{ +- if (get_user(*uentry, head)) +- return -EFAULT; +- +- *entry = compat_ptr((*uentry) & ~1); +- *pi = (unsigned int)(*uentry) & 1; +- +- return 0; +-} +- +-static void __user *futex_uaddr(struct robust_list __user *entry, +- compat_long_t futex_offset) +-{ +- compat_uptr_t base = ptr_to_compat(entry); +- void __user *uaddr = compat_ptr(base + futex_offset); +- +- return uaddr; +-} +- +-/* +- * Walk curr->robust_list (very carefully, it's a userspace list!) +- * and mark any locks found there dead, and notify any waiters. +- * +- * We silently return on any sign of list-walking problem. +- */ +-void compat_exit_robust_list(struct task_struct *curr) +-{ +- struct compat_robust_list_head __user *head = curr->compat_robust_list; +- struct robust_list __user *entry, *next_entry, *pending; +- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +- unsigned int uninitialized_var(next_pi); +- compat_uptr_t uentry, next_uentry, upending; +- compat_long_t futex_offset; +- int rc; +- +- if (!futex_cmpxchg_enabled) +- return; +- +- /* +- * Fetch the list head (which was registered earlier, via +- * sys_set_robust_list()): +- */ +- if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) +- return; +- /* +- * Fetch the relative futex offset: +- */ +- if (get_user(futex_offset, &head->futex_offset)) +- return; +- /* +- * Fetch any possibly pending lock-add first, and handle it +- * if it exists: +- */ +- if (fetch_robust_entry(&upending, &pending, +- &head->list_op_pending, &pip)) +- return; +- +- next_entry = NULL; /* avoid warning with gcc */ +- while (entry != (struct robust_list __user *) &head->list) { +- /* +- * Fetch the next entry in the list before calling +- * handle_futex_death: +- */ +- rc = fetch_robust_entry(&next_uentry, &next_entry, +- (compat_uptr_t __user *)&entry->next, &next_pi); +- /* +- * A pending lock might already be on the list, so +- * dont process it twice: +- */ +- if (entry != pending) { +- void __user *uaddr = futex_uaddr(entry, futex_offset); +- +- if (handle_futex_death(uaddr, curr, pi)) +- return; +- } +- if (rc) +- return; +- uentry = next_uentry; +- entry = next_entry; +- pi = next_pi; +- /* +- * Avoid excessively long or circular lists: +- */ +- if (!--limit) +- break; +- +- cond_resched(); +- } +- if (pending) { +- void __user *uaddr = futex_uaddr(pending, futex_offset); +- +- handle_futex_death(uaddr, curr, pip); +- } +-} +- +-COMPAT_SYSCALL_DEFINE2(set_robust_list, +- struct compat_robust_list_head __user *, head, +- compat_size_t, len) +-{ +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- if (unlikely(len != sizeof(*head))) +- return -EINVAL; +- +- current->compat_robust_list = head; +- +- return 0; +-} +- +-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, +- compat_uptr_t __user *, head_ptr, +- compat_size_t __user *, len_ptr) +-{ +- struct compat_robust_list_head __user *head; +- unsigned long ret; +- struct task_struct *p; +- +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- rcu_read_lock(); +- +- ret = -ESRCH; +- if (!pid) +- p = current; +- else { +- p = find_task_by_vpid(pid); +- if (!p) +- goto err_unlock; +- } +- +- ret = -EPERM; +- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) +- goto err_unlock; +- +- head = p->compat_robust_list; +- rcu_read_unlock(); +- +- if (put_user(sizeof(*head), len_ptr)) +- return -EFAULT; +- return put_user(ptr_to_compat(head), head_ptr); +- +-err_unlock: +- rcu_read_unlock(); +- +- return ret; +-} +- +-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, +- struct compat_timespec __user *, utime, u32 __user *, uaddr2, +- u32, val3) +-{ +- struct timespec ts; +- ktime_t t, *tp = NULL; +- int val2 = 0; +- int cmd = op & FUTEX_CMD_MASK; +- +- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || +- cmd == FUTEX_WAIT_BITSET || +- cmd == FUTEX_WAIT_REQUEUE_PI)) { +- if (compat_get_timespec(&ts, utime)) +- return -EFAULT; +- if (!timespec_valid(&ts)) +- return -EINVAL; +- +- t = timespec_to_ktime(ts); +- if (cmd == FUTEX_WAIT) +- t = ktime_add_safe(ktime_get(), t); +- tp = &t; +- } +- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || +- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) +- val2 = (int) (unsigned long) utime; +- +- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); +-} -- 2.47.3