]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Feb 2021 15:22:37 +0000 (16:22 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Feb 2021 15:22:37 +0000 (16:22 +0100)
added patches:
exit-exec-seperate-mm_release.patch
futex-add-mutex-around-futex-exit.patch
futex-mark-the-begin-of-futex-exit-explicitly.patch
futex-move-futex-exit-handling-into-futex-code.patch
futex-prevent-exit-livelock.patch
futex-provide-distinct-return-value-when-owner-is-exiting.patch
futex-provide-state-handling-for-exec-as-well.patch
futex-replace-pf_exitpidone-with-a-state.patch
futex-sanitize-exit-state-handling.patch
futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch
futex-split-futex_mm_release-for-exit-exec.patch
y2038-futex-move-compat-implementation-into-futex.c.patch

13 files changed:
queue-4.4/exit-exec-seperate-mm_release.patch [new file with mode: 0644]
queue-4.4/futex-add-mutex-around-futex-exit.patch [new file with mode: 0644]
queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch [new file with mode: 0644]
queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch [new file with mode: 0644]
queue-4.4/futex-prevent-exit-livelock.patch [new file with mode: 0644]
queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch [new file with mode: 0644]
queue-4.4/futex-provide-state-handling-for-exec-as-well.patch [new file with mode: 0644]
queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch [new file with mode: 0644]
queue-4.4/futex-sanitize-exit-state-handling.patch [new file with mode: 0644]
queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch [new file with mode: 0644]
queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch [new file with mode: 0644]

diff --git a/queue-4.4/exit-exec-seperate-mm_release.patch b/queue-4.4/exit-exec-seperate-mm_release.patch
new file mode 100644 (file)
index 0000000..477f6df
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:06 +0000
+Subject: exit/exec: Seperate mm_release()
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-5-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 4610ba7ad877fafc0a25a30c6c82015304120426 upstream.
+
+mm_release() contains the futex exit handling. mm_release() is called from
+do_exit()->exit_mm() and from exec()->exec_mm().
+
+In the exit_mm() case PF_EXITING and the futex state is updated. In the
+exec_mm() case these states are not touched.
+
+As the futex exit code needs further protections against exit races, this
+needs to be split into two functions.
+
+Preparatory only, no functional change.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exec.c             |    2 +-
+ include/linux/sched.h |    6 ++++--
+ kernel/exit.c         |    2 +-
+ kernel/fork.c         |   12 +++++++++++-
+ 4 files changed, 17 insertions(+), 5 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -875,7 +875,7 @@ static int exec_mmap(struct mm_struct *m
+       /* Notify parent that we're no longer interested in the old VM */
+       tsk = current;
+       old_mm = current->mm;
+-      mm_release(tsk, old_mm);
++      exec_mm_release(tsk, old_mm);
+       if (old_mm) {
+               sync_mm_rss(old_mm);
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2647,8 +2647,10 @@ extern struct mm_struct *get_task_mm(str
+  * succeeds.
+  */
+ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
+-/* Remove the current tasks stale references to the old mm_struct */
+-extern void mm_release(struct task_struct *, struct mm_struct *);
++/* Remove the current tasks stale references to the old mm_struct on exit() */
++extern void exit_mm_release(struct task_struct *, struct mm_struct *);
++/* Remove the current tasks stale references to the old mm_struct on exec() */
++extern void exec_mm_release(struct task_struct *, struct mm_struct *);
+ #ifdef CONFIG_HAVE_COPY_THREAD_TLS
+ extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -389,7 +389,7 @@ static void exit_mm(struct task_struct *
+       struct mm_struct *mm = tsk->mm;
+       struct core_state *core_state;
+-      mm_release(tsk, mm);
++      exit_mm_release(tsk, mm);
+       if (!mm)
+               return;
+       sync_mm_rss(mm);
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -887,7 +887,7 @@ static int wait_for_vfork_done(struct ta
+  * restoring the old one. . .
+  * Eric Biederman 10 January 1998
+  */
+-void mm_release(struct task_struct *tsk, struct mm_struct *mm)
++static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ {
+       /* Get rid of any futexes when releasing the mm */
+       futex_mm_release(tsk);
+@@ -924,6 +924,16 @@ void mm_release(struct task_struct *tsk,
+               complete_vfork_done(tsk);
+ }
++void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
++{
++      mm_release(tsk, mm);
++}
++
++void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
++{
++      mm_release(tsk, mm);
++}
++
+ /*
+  * Allocate a new mm structure and copy contents from the
+  * mm structure of the passed in task structure.
diff --git a/queue-4.4/futex-add-mutex-around-futex-exit.patch b/queue-4.4/futex-add-mutex-around-futex-exit.patch
new file mode 100644 (file)
index 0000000..05070b0
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:12 +0000
+Subject: futex: Add mutex around futex exit
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-11-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 3f186d974826847a07bc7964d79ec4eded475ad9 upstream.
+
+The mutex will be used in subsequent changes to replace the busy looping of
+a waiter when the futex owner is currently executing the exit cleanup to
+prevent a potential live lock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.845798895@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/futex.h |    1 +
+ include/linux/sched.h |    1 +
+ kernel/futex.c        |   16 ++++++++++++++++
+ 3 files changed, 18 insertions(+)
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -70,6 +70,7 @@ static inline void futex_init_task(struc
+       INIT_LIST_HEAD(&tsk->pi_state_list);
+       tsk->pi_state_cache = NULL;
+       tsk->futex_state = FUTEX_STATE_OK;
++      mutex_init(&tsk->futex_exit_mutex);
+ }
+ void futex_exit_recursive(struct task_struct *tsk);
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1704,6 +1704,7 @@ struct task_struct {
+ #endif
+       struct list_head pi_state_list;
+       struct futex_pi_state *pi_state_cache;
++      struct mutex futex_exit_mutex;
+       unsigned int futex_state;
+ #endif
+ #ifdef CONFIG_PERF_EVENTS
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3271,12 +3271,23 @@ static void futex_cleanup(struct task_st
+  */
+ void futex_exit_recursive(struct task_struct *tsk)
+ {
++      /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
++      if (tsk->futex_state == FUTEX_STATE_EXITING)
++              mutex_unlock(&tsk->futex_exit_mutex);
+       tsk->futex_state = FUTEX_STATE_DEAD;
+ }
+ static void futex_cleanup_begin(struct task_struct *tsk)
+ {
+       /*
++       * Prevent various race issues against a concurrent incoming waiter
++       * including live locks by forcing the waiter to block on
++       * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
++       * attach_to_pi_owner().
++       */
++      mutex_lock(&tsk->futex_exit_mutex);
++
++      /*
+        * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+        *
+        * This ensures that all subsequent checks of tsk->futex_state in
+@@ -3299,6 +3310,11 @@ static void futex_cleanup_end(struct tas
+        * take another loop until it becomes visible.
+        */
+       tsk->futex_state = state;
++      /*
++       * Drop the exit protection. This unblocks waiters which observed
++       * FUTEX_STATE_EXITING to reevaluate the state.
++       */
++      mutex_unlock(&tsk->futex_exit_mutex);
+ }
+ void futex_exec_release(struct task_struct *tsk)
diff --git a/queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch b/queue-4.4/futex-mark-the-begin-of-futex-exit-explicitly.patch
new file mode 100644 (file)
index 0000000..8812f7a
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:09 +0000
+Subject: futex: Mark the begin of futex exit explicitly
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-8-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 18f694385c4fd77a09851fd301236746ca83f3cb upstream.
+
+Instead of relying on PF_EXITING use an explicit state for the futex exit
+and set it in the futex exit function. This moves the smp barrier and the
+lock/unlock serialization into the futex code.
+
+As with the DEAD state this is restricted to the exit path as exec
+continues to use the same task struct.
+
+This allows to simplify that logic in a next step.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/futex.h |   31 +++----------------------------
+ kernel/exit.c         |    8 +-------
+ kernel/futex.c        |   37 ++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 40 insertions(+), 36 deletions(-)
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -57,6 +57,7 @@ union futex_key {
+ #ifdef CONFIG_FUTEX
+ enum {
+       FUTEX_STATE_OK,
++      FUTEX_STATE_EXITING,
+       FUTEX_STATE_DEAD,
+ };
+@@ -71,33 +72,7 @@ static inline void futex_init_task(struc
+       tsk->futex_state = FUTEX_STATE_OK;
+ }
+-/**
+- * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+- * @tsk:      task to set the state on
+- *
+- * Set the futex exit state of the task lockless. The futex waiter code
+- * observes that state when a task is exiting and loops until the task has
+- * actually finished the futex cleanup. The worst case for this is that the
+- * waiter runs through the wait loop until the state becomes visible.
+- *
+- * This has two callers:
+- *
+- * - futex_mm_release() after the futex exit cleanup has been done
+- *
+- * - do_exit() from the recursive fault handling path.
+- *
+- * In case of a recursive fault this is best effort. Either the futex exit
+- * code has run already or not. If the OWNER_DIED bit has been set on the
+- * futex then the waiter can take it over. If not, the problem is pushed
+- * back to user space. If the futex exit code did not run yet, then an
+- * already queued waiter might block forever, but there is nothing which
+- * can be done about that.
+- */
+-static inline void futex_exit_done(struct task_struct *tsk)
+-{
+-      tsk->futex_state = FUTEX_STATE_DEAD;
+-}
+-
++void futex_exit_recursive(struct task_struct *tsk);
+ void futex_exit_release(struct task_struct *tsk);
+ void futex_exec_release(struct task_struct *tsk);
+@@ -105,7 +80,7 @@ long do_futex(u32 __user *uaddr, int op,
+             u32 __user *uaddr2, u32 val2, u32 val3);
+ #else
+ static inline void futex_init_task(struct task_struct *tsk) { }
+-static inline void futex_exit_done(struct task_struct *tsk) { }
++static inline void futex_exit_recursive(struct task_struct *tsk) { }
+ static inline void futex_exit_release(struct task_struct *tsk) { }
+ static inline void futex_exec_release(struct task_struct *tsk) { }
+ #endif
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -695,18 +695,12 @@ void do_exit(long code)
+        */
+       if (unlikely(tsk->flags & PF_EXITING)) {
+               pr_alert("Fixing recursive fault but reboot is needed!\n");
+-              futex_exit_done(tsk);
++              futex_exit_recursive(tsk);
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule();
+       }
+       exit_signals(tsk);  /* sets PF_EXITING */
+-      /*
+-       * tsk->flags are checked in the futex code to protect against
+-       * an exiting task cleaning up the robust pi futexes.
+-       */
+-      smp_mb();
+-      raw_spin_unlock_wait(&tsk->pi_lock);
+       if (unlikely(in_atomic())) {
+               pr_info("note: %s[%d] exited with preempt_count %d\n",
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3252,10 +3252,45 @@ void futex_exec_release(struct task_stru
+               exit_pi_state_list(tsk);
+ }
++/**
++ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
++ * @tsk:      task to set the state on
++ *
++ * Set the futex exit state of the task lockless. The futex waiter code
++ * observes that state when a task is exiting and loops until the task has
++ * actually finished the futex cleanup. The worst case for this is that the
++ * waiter runs through the wait loop until the state becomes visible.
++ *
++ * This is called from the recursive fault handling path in do_exit().
++ *
++ * This is best effort. Either the futex exit code has run already or
++ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
++ * take it over. If not, the problem is pushed back to user space. If the
++ * futex exit code did not run yet, then an already queued waiter might
++ * block forever, but there is nothing which can be done about that.
++ */
++void futex_exit_recursive(struct task_struct *tsk)
++{
++      tsk->futex_state = FUTEX_STATE_DEAD;
++}
++
+ void futex_exit_release(struct task_struct *tsk)
+ {
++      tsk->futex_state = FUTEX_STATE_EXITING;
++      /*
++       * Ensure that all new tsk->pi_lock acquisitions must observe
++       * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner().
++       */
++      smp_mb();
++      /*
++       * Ensure that we must observe the pi_state in exit_pi_state_list().
++       */
++      raw_spin_lock_irq(&tsk->pi_lock);
++      raw_spin_unlock_irq(&tsk->pi_lock);
++
+       futex_exec_release(tsk);
+-      futex_exit_done(tsk);
++
++      tsk->futex_state = FUTEX_STATE_DEAD;
+ }
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
diff --git a/queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch b/queue-4.4/futex-move-futex-exit-handling-into-futex-code.patch
new file mode 100644 (file)
index 0000000..1093a2f
--- /dev/null
@@ -0,0 +1,188 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:04 +0000
+Subject: futex: Move futex exit handling into futex code
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-3-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ba31c1a48538992316cc71ce94fa9cd3e7b427c0 upstream.
+
+The futex exit handling is #ifdeffed into mm_release() which is not pretty
+to begin with. But upcoming changes to address futex exit races need to add
+more functionality to this exit code.
+
+Split it out into a function, move it into futex code and make the various
+futex exit functions static.
+
+Preparatory only and no functional change.
+
+Folded build fix from Borislav.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.049705556@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/compat.h |    2 --
+ include/linux/futex.h  |   24 +++++++++++++++++-------
+ kernel/fork.c          |   25 +++----------------------
+ kernel/futex.c         |   28 ++++++++++++++++++++++++++--
+ 4 files changed, 46 insertions(+), 33 deletions(-)
+
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -306,8 +306,6 @@ struct compat_kexec_segment;
+ struct compat_mq_attr;
+ struct compat_msgbuf;
+-extern void compat_exit_robust_list(struct task_struct *curr);
+-
+ asmlinkage long
+ compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
+                          compat_size_t len);
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -1,6 +1,8 @@
+ #ifndef _LINUX_FUTEX_H
+ #define _LINUX_FUTEX_H
++#include <linux/sched.h>
++
+ #include <uapi/linux/futex.h>
+ struct inode;
+@@ -53,14 +55,22 @@ union futex_key {
+ #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
+ #ifdef CONFIG_FUTEX
+-extern void exit_robust_list(struct task_struct *curr);
+-extern void exit_pi_state_list(struct task_struct *curr);
+-#else
+-static inline void exit_robust_list(struct task_struct *curr)
+-{
+-}
+-static inline void exit_pi_state_list(struct task_struct *curr)
++static inline void futex_init_task(struct task_struct *tsk)
+ {
++      tsk->robust_list = NULL;
++#ifdef CONFIG_COMPAT
++      tsk->compat_robust_list = NULL;
++#endif
++      INIT_LIST_HEAD(&tsk->pi_state_list);
++      tsk->pi_state_cache = NULL;
+ }
++
++void futex_mm_release(struct task_struct *tsk);
++
++long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
++            u32 __user *uaddr2, u32 val2, u32 val3);
++#else
++static inline void futex_init_task(struct task_struct *tsk) { }
++static inline void futex_mm_release(struct task_struct *tsk) { }
+ #endif
+ #endif
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -890,20 +890,7 @@ static int wait_for_vfork_done(struct ta
+ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ {
+       /* Get rid of any futexes when releasing the mm */
+-#ifdef CONFIG_FUTEX
+-      if (unlikely(tsk->robust_list)) {
+-              exit_robust_list(tsk);
+-              tsk->robust_list = NULL;
+-      }
+-#ifdef CONFIG_COMPAT
+-      if (unlikely(tsk->compat_robust_list)) {
+-              compat_exit_robust_list(tsk);
+-              tsk->compat_robust_list = NULL;
+-      }
+-#endif
+-      if (unlikely(!list_empty(&tsk->pi_state_list)))
+-              exit_pi_state_list(tsk);
+-#endif
++      futex_mm_release(tsk);
+       uprobe_free_utask(tsk);
+@@ -1511,14 +1498,8 @@ static struct task_struct *copy_process(
+ #ifdef CONFIG_BLOCK
+       p->plug = NULL;
+ #endif
+-#ifdef CONFIG_FUTEX
+-      p->robust_list = NULL;
+-#ifdef CONFIG_COMPAT
+-      p->compat_robust_list = NULL;
+-#endif
+-      INIT_LIST_HEAD(&p->pi_state_list);
+-      p->pi_state_cache = NULL;
+-#endif
++      futex_init_task(p);
++
+       /*
+        * sigaltstack should be cleared when sharing the same VM
+        */
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -331,6 +331,12 @@ static inline bool should_fail_futex(boo
+ }
+ #endif /* CONFIG_FAIL_FUTEX */
++#ifdef CONFIG_COMPAT
++static void compat_exit_robust_list(struct task_struct *curr);
++#else
++static inline void compat_exit_robust_list(struct task_struct *curr) { }
++#endif
++
+ static inline void futex_get_mm(union futex_key *key)
+ {
+       atomic_inc(&key->private.mm->mm_count);
+@@ -889,7 +895,7 @@ static struct task_struct * futex_find_g
+  * Kernel cleans up PI-state, but userspace is likely hosed.
+  * (Robust-futex cleanup is separate and might save the day for userspace.)
+  */
+-void exit_pi_state_list(struct task_struct *curr)
++static void exit_pi_state_list(struct task_struct *curr)
+ {
+       struct list_head *next, *head = &curr->pi_state_list;
+       struct futex_pi_state *pi_state;
+@@ -3166,7 +3172,7 @@ static inline int fetch_robust_entry(str
+  *
+  * We silently return on any sign of list-walking problem.
+  */
+-void exit_robust_list(struct task_struct *curr)
++static void exit_robust_list(struct task_struct *curr)
+ {
+       struct robust_list_head __user *head = curr->robust_list;
+       struct robust_list __user *entry, *next_entry, *pending;
+@@ -3229,6 +3235,24 @@ void exit_robust_list(struct task_struct
+                                  curr, pip);
+ }
++void futex_mm_release(struct task_struct *tsk)
++{
++      if (unlikely(tsk->robust_list)) {
++              exit_robust_list(tsk);
++              tsk->robust_list = NULL;
++      }
++
++#ifdef CONFIG_COMPAT
++      if (unlikely(tsk->compat_robust_list)) {
++              compat_exit_robust_list(tsk);
++              tsk->compat_robust_list = NULL;
++      }
++#endif
++
++      if (unlikely(!list_empty(&tsk->pi_state_list)))
++              exit_pi_state_list(tsk);
++}
++
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+               u32 __user *uaddr2, u32 val2, u32 val3)
+ {
diff --git a/queue-4.4/futex-prevent-exit-livelock.patch b/queue-4.4/futex-prevent-exit-livelock.patch
new file mode 100644 (file)
index 0000000..e61ef84
--- /dev/null
@@ -0,0 +1,345 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:14 +0000
+Subject: futex: Prevent exit livelock
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Oleg Nesterov <oleg@redhat.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-13-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream.
+
+Oleg provided the following test case:
+
+int main(void)
+{
+       struct sched_param sp = {};
+
+       sp.sched_priority = 2;
+       assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
+
+       int lock = vfork();
+       if (!lock) {
+               sp.sched_priority = 1;
+               assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
+               _exit(0);
+       }
+
+       syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0);
+       return 0;
+}
+
+This creates an unkillable RT process spinning in futex_lock_pi() on a UP
+machine or if the process is affine to a single CPU. The reason is:
+
+ parent                                        child
+
+  set FIFO prio 2
+
+  vfork()                      ->      set FIFO prio 1
+   implies wait_for_child()            sched_setscheduler(...)
+                                       exit()
+                                       do_exit()
+                                       ....
+                                       mm_release()
+                                         tsk->futex_state = FUTEX_STATE_EXITING;
+                                         exit_futex(); (NOOP in this case)
+                                         complete() --> wakes parent
+  sys_futex()
+    loop infinite because
+    tsk->futex_state == FUTEX_STATE_EXITING
+
+The same problem can happen just by regular preemption as well:
+
+  task holds futex
+  ...
+  do_exit()
+    tsk->futex_state = FUTEX_STATE_EXITING;
+
+  --> preemption (unrelated wakeup of some other higher prio task, e.g. timer)
+
+  switch_to(other_task)
+
+  return to user
+  sys_futex()
+       loop infinite as above
+
+Just for the fun of it the futex exit cleanup could trigger the wakeup
+itself before the task sets its futex state to DEAD.
+
+To cure this, the handling of the exiting owner is changed so:
+
+   - A refcount is held on the task
+
+   - The task pointer is stored in a caller visible location
+
+   - The caller drops all locks (hash bucket, mmap_sem) and blocks
+     on task::futex_exit_mutex. When the mutex is acquired then
+     the exiting task has completed the cleanup and the state
+     is consistent and can be reevaluated.
+
+This is not a pretty solution, but there is no choice other than returning
+an error code to user space, which would break the state consistency
+guarantee and open another can of problems including regressions.
+
+For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538
+are required as well, but for anything older than 5.3.y the backports are
+going to be provided when this hits mainline as the other dependencies for
+those kernels are definitely not stable material.
+
+Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems")
+Reported-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Stable Team <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c |  106 ++++++++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 91 insertions(+), 15 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1067,12 +1067,43 @@ out_state:
+       return 0;
+ }
++/**
++ * wait_for_owner_exiting - Block until the owner has exited
++ * @exiting:  Pointer to the exiting task
++ *
++ * Caller must hold a refcount on @exiting.
++ */
++static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
++{
++      if (ret != -EBUSY) {
++              WARN_ON_ONCE(exiting);
++              return;
++      }
++
++      if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
++              return;
++
++      mutex_lock(&exiting->futex_exit_mutex);
++      /*
++       * No point in doing state checking here. If the waiter got here
++       * while the task was in exec()->exec_futex_release() then it can
++       * have any FUTEX_STATE_* value when the waiter has acquired the
++       * mutex. OK, if running, EXITING or DEAD if it reached exit()
++       * already. Highly unlikely and not a problem. Just one more round
++       * through the futex maze.
++       */
++      mutex_unlock(&exiting->futex_exit_mutex);
++
++      put_task_struct(exiting);
++}
++
+ /*
+  * Lookup the task for the TID provided from user space and attach to
+  * it after doing proper sanity checks.
+  */
+ static int attach_to_pi_owner(u32 uval, union futex_key *key,
+-                            struct futex_pi_state **ps)
++                            struct futex_pi_state **ps,
++                            struct task_struct **exiting)
+ {
+       pid_t pid = uval & FUTEX_TID_MASK;
+       struct futex_pi_state *pi_state;
+@@ -1108,7 +1139,19 @@ static int attach_to_pi_owner(u32 uval,
+               int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
+               raw_spin_unlock_irq(&p->pi_lock);
+-              put_task_struct(p);
++              /*
++               * If the owner task is between FUTEX_STATE_EXITING and
++               * FUTEX_STATE_DEAD then store the task pointer and keep
++               * the reference on the task struct. The calling code will
++               * drop all locks, wait for the task to reach
++               * FUTEX_STATE_DEAD and then drop the refcount. This is
++               * required to prevent a live lock when the current task
++               * preempted the exiting task between the two states.
++               */
++              if (ret == -EBUSY)
++                      *exiting = p;
++              else
++                      put_task_struct(p);
+               return ret;
+       }
+@@ -1139,7 +1182,8 @@ static int attach_to_pi_owner(u32 uval,
+ }
+ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+-                         union futex_key *key, struct futex_pi_state **ps)
++                         union futex_key *key, struct futex_pi_state **ps,
++                         struct task_struct **exiting)
+ {
+       struct futex_q *match = futex_top_waiter(hb, key);
+@@ -1154,7 +1198,7 @@ static int lookup_pi_state(u32 uval, str
+        * We are the first waiter - try to look up the owner based on
+        * @uval and attach to it.
+        */
+-      return attach_to_pi_owner(uval, key, ps);
++      return attach_to_pi_owner(uval, key, ps, exiting);
+ }
+ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+@@ -1180,6 +1224,8 @@ static int lock_pi_update_atomic(u32 __u
+  *                    lookup
+  * @task:             the task to perform the atomic lock work for.  This will
+  *                    be "current" except in the case of requeue pi.
++ * @exiting:          Pointer to store the task pointer of the owner task
++ *                    which is in the middle of exiting
+  * @set_waiters:      force setting the FUTEX_WAITERS bit (1) or not (0)
+  *
+  * Return:
+@@ -1188,11 +1234,17 @@ static int lock_pi_update_atomic(u32 __u
+  * <0 - error
+  *
+  * The hb->lock and futex_key refs shall be held by the caller.
++ *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
+  */
+ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+                               union futex_key *key,
+                               struct futex_pi_state **ps,
+-                              struct task_struct *task, int set_waiters)
++                              struct task_struct *task,
++                              struct task_struct **exiting,
++                              int set_waiters)
+ {
+       u32 uval, newval, vpid = task_pid_vnr(task);
+       struct futex_q *match;
+@@ -1262,7 +1314,7 @@ static int futex_lock_pi_atomic(u32 __us
+        * attach to the owner. If that fails, no harm done, we only
+        * set the FUTEX_WAITERS bit in the user space variable.
+        */
+-      return attach_to_pi_owner(uval, key, ps);
++      return attach_to_pi_owner(uval, key, ps, exiting);
+ }
+ /**
+@@ -1688,6 +1740,8 @@ void requeue_pi_wake_futex(struct futex_
+  * @key1:             the from futex key
+  * @key2:             the to futex key
+  * @ps:                       address to store the pi_state pointer
++ * @exiting:          Pointer to store the task pointer of the owner task
++ *                    which is in the middle of exiting
+  * @set_waiters:      force setting the FUTEX_WAITERS bit (1) or not (0)
+  *
+  * Try and get the lock on behalf of the top waiter if we can do it atomically.
+@@ -1695,16 +1749,20 @@ void requeue_pi_wake_futex(struct futex_
+  * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+  * hb1 and hb2 must be held by the caller.
+  *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
++ *
+  * Return:
+  *  0 - failed to acquire the lock atomically;
+  * >0 - acquired the lock, return value is vpid of the top_waiter
+  * <0 - error
+  */
+-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+-                               struct futex_hash_bucket *hb1,
+-                               struct futex_hash_bucket *hb2,
+-                               union futex_key *key1, union futex_key *key2,
+-                               struct futex_pi_state **ps, int set_waiters)
++static int
++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
++                         struct futex_hash_bucket *hb2, union futex_key *key1,
++                         union futex_key *key2, struct futex_pi_state **ps,
++                         struct task_struct **exiting, int set_waiters)
+ {
+       struct futex_q *top_waiter = NULL;
+       u32 curval;
+@@ -1741,7 +1799,7 @@ static int futex_proxy_trylock_atomic(u3
+        */
+       vpid = task_pid_vnr(top_waiter->task);
+       ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+-                                 set_waiters);
++                                 exiting, set_waiters);
+       if (ret == 1) {
+               requeue_pi_wake_futex(top_waiter, key2, hb2);
+               return vpid;
+@@ -1861,6 +1919,8 @@ retry_private:
+       }
+       if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
++              struct task_struct *exiting = NULL;
++
+               /*
+                * Attempt to acquire uaddr2 and wake the top waiter. If we
+                * intend to requeue waiters, force setting the FUTEX_WAITERS
+@@ -1868,7 +1928,8 @@ retry_private:
+                * faults rather in the requeue loop below.
+                */
+               ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+-                                               &key2, &pi_state, nr_requeue);
++                                               &key2, &pi_state,
++                                               &exiting, nr_requeue);
+               /*
+                * At this point the top_waiter has either taken uaddr2 or is
+@@ -1892,7 +1953,8 @@ retry_private:
+                        * rereading and handing potential crap to
+                        * lookup_pi_state.
+                        */
+-                      ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
++                      ret = lookup_pi_state(ret, hb2, &key2,
++                                            &pi_state, &exiting);
+               }
+               switch (ret) {
+@@ -1923,6 +1985,12 @@ retry_private:
+                       hb_waiters_dec(hb2);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
++                      /*
++                       * Handle the case where the owner is in the middle of
++                       * exiting. Wait for the exit to complete otherwise
++                       * this task might loop forever, aka. live lock.
++                       */
++                      wait_for_owner_exiting(ret, exiting);
+                       cond_resched();
+                       goto retry;
+               default:
+@@ -2545,6 +2613,7 @@ static int futex_lock_pi(u32 __user *uad
+                        ktime_t *time, int trylock)
+ {
+       struct hrtimer_sleeper timeout, *to = NULL;
++      struct task_struct *exiting = NULL;
+       struct futex_hash_bucket *hb;
+       struct futex_q q = futex_q_init;
+       int res, ret;
+@@ -2568,7 +2637,8 @@ retry:
+ retry_private:
+       hb = queue_lock(&q);
+-      ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
++      ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
++                                 &exiting, 0);
+       if (unlikely(ret)) {
+               /*
+                * Atomic work succeeded and we got the lock,
+@@ -2591,6 +2661,12 @@ retry_private:
+                        */
+                       queue_unlock(hb);
+                       put_futex_key(&q.key);
++                      /*
++                       * Handle the case where the owner is in the middle of
++                       * exiting. Wait for the exit to complete otherwise
++                       * this task might loop forever, aka. live lock.
++                       */
++                      wait_for_owner_exiting(ret, exiting);
+                       cond_resched();
+                       goto retry;
+               default:
diff --git a/queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch b/queue-4.4/futex-provide-distinct-return-value-when-owner-is-exiting.patch
new file mode 100644 (file)
index 0000000..ba0d564
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:13 +0000
+Subject: futex: Provide distinct return value when owner is exiting
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-12-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream.
+
+attach_to_pi_owner() returns -EAGAIN for various cases:
+
+ - Owner task is exiting
+ - Futex value has changed
+
+The caller drops the held locks (hash bucket, mmap_sem) and retries the
+operation. In case of the owner task exiting this can result in a live
+lock.
+
+As a preparatory step for seperating those cases, provide a distinct return
+value (EBUSY) for the owner exiting case.
+
+No functional change.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.935606117@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1909,12 +1909,13 @@ retry_private:
+                       if (!ret)
+                               goto retry;
+                       goto out;
++              case -EBUSY:
+               case -EAGAIN:
+                       /*
+                        * Two reasons for this:
+-                       * - Owner is exiting and we just wait for the
++                       * - EBUSY: Owner is exiting and we just wait for the
+                        *   exit to complete.
+-                       * - The user space value changed.
++                       * - EAGAIN: The user space value changed.
+                        */
+                       free_pi_state(pi_state);
+                       pi_state = NULL;
+@@ -2580,12 +2581,13 @@ retry_private:
+                       goto out_unlock_put_key;
+               case -EFAULT:
+                       goto uaddr_faulted;
++              case -EBUSY:
+               case -EAGAIN:
+                       /*
+                        * Two reasons for this:
+-                       * - Task is exiting and we just wait for the
++                       * - EBUSY: Task is exiting and we just wait for the
+                        *   exit to complete.
+-                       * - The user space value changed.
++                       * - EAGAIN: The user space value changed.
+                        */
+                       queue_unlock(hb);
+                       put_futex_key(&q.key);
diff --git a/queue-4.4/futex-provide-state-handling-for-exec-as-well.patch b/queue-4.4/futex-provide-state-handling-for-exec-as-well.patch
new file mode 100644 (file)
index 0000000..3dd692c
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:11 +0000
+Subject: futex: Provide state handling for exec() as well
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-10-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit af8cbda2cfcaa5515d61ec500498d46e9a8247e2 upstream.
+
+exec() attempts to handle potentially held futexes gracefully by running
+the futex exit handling code like exit() does.
+
+The current implementation has no protection against concurrent incoming
+waiters. The reason is that the futex state cannot be set to
+FUTEX_STATE_DEAD after the cleanup because the task struct is still active
+and just about to execute the new binary.
+
+While its arguably buggy when a task holds a futex over exec(), for
+consistency sake the state handling can at least cover the actual futex
+exit cleanup section. This provides state consistency protection accross
+the cleanup. As the futex state of the task becomes FUTEX_STATE_OK after the
+cleanup has been finished, this cannot prevent subsequent attempts to
+attach to the task in case that the cleanup was not successfull in mopping
+up all leftovers.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.753355618@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c |   38 ++++++++++++++++++++++++++++++++++----
+ 1 file changed, 34 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3234,7 +3234,7 @@ static void exit_robust_list(struct task
+                                  curr, pip);
+ }
+-void futex_exec_release(struct task_struct *tsk)
++static void futex_cleanup(struct task_struct *tsk)
+ {
+       if (unlikely(tsk->robust_list)) {
+               exit_robust_list(tsk);
+@@ -3274,7 +3274,7 @@ void futex_exit_recursive(struct task_st
+       tsk->futex_state = FUTEX_STATE_DEAD;
+ }
+-void futex_exit_release(struct task_struct *tsk)
++static void futex_cleanup_begin(struct task_struct *tsk)
+ {
+       /*
+        * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+@@ -3290,10 +3290,40 @@ void futex_exit_release(struct task_stru
+       raw_spin_lock_irq(&tsk->pi_lock);
+       tsk->futex_state = FUTEX_STATE_EXITING;
+       raw_spin_unlock_irq(&tsk->pi_lock);
++}
+-      futex_exec_release(tsk);
++static void futex_cleanup_end(struct task_struct *tsk, int state)
++{
++      /*
++       * Lockless store. The only side effect is that an observer might
++       * take another loop until it becomes visible.
++       */
++      tsk->futex_state = state;
++}
+-      tsk->futex_state = FUTEX_STATE_DEAD;
++void futex_exec_release(struct task_struct *tsk)
++{
++      /*
++       * The state handling is done for consistency, but in the case of
++       * exec() there is no way to prevent futher damage as the PID stays
++       * the same. But for the unlikely and arguably buggy case that a
++       * futex is held on exec(), this provides at least as much state
++       * consistency protection which is possible.
++       */
++      futex_cleanup_begin(tsk);
++      futex_cleanup(tsk);
++      /*
++       * Reset the state to FUTEX_STATE_OK. The task is alive and about
++       * exec a new binary.
++       */
++      futex_cleanup_end(tsk, FUTEX_STATE_OK);
++}
++
++void futex_exit_release(struct task_struct *tsk)
++{
++      futex_cleanup_begin(tsk);
++      futex_cleanup(tsk);
++      futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
+ }
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
diff --git a/queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch b/queue-4.4/futex-replace-pf_exitpidone-with-a-state.patch
new file mode 100644 (file)
index 0000000..9494d65
--- /dev/null
@@ -0,0 +1,175 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:05 +0000
+Subject: futex: Replace PF_EXITPIDONE with a state
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-4-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.
+
+The futex exit handling relies on PF_ flags. That's suboptimal as it
+requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
+the middle of do_exit() to enforce the observability of PF_EXITING in the
+futex code.
+
+Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
+over to the new state. The PF_EXITING dependency will be cleaned up in a
+later step.
+
+This prepares for handling various futex exit issues later.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/futex.h |   34 ++++++++++++++++++++++++++++++++++
+ include/linux/sched.h |    2 +-
+ kernel/exit.c         |   18 ++----------------
+ kernel/futex.c        |   17 ++++++++---------
+ 4 files changed, 45 insertions(+), 26 deletions(-)
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -55,6 +55,11 @@ union futex_key {
+ #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
+ #ifdef CONFIG_FUTEX
++enum {
++      FUTEX_STATE_OK,
++      FUTEX_STATE_DEAD,
++};
++
+ static inline void futex_init_task(struct task_struct *tsk)
+ {
+       tsk->robust_list = NULL;
+@@ -63,6 +68,34 @@ static inline void futex_init_task(struc
+ #endif
+       INIT_LIST_HEAD(&tsk->pi_state_list);
+       tsk->pi_state_cache = NULL;
++      tsk->futex_state = FUTEX_STATE_OK;
++}
++
++/**
++ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
++ * @tsk:      task to set the state on
++ *
++ * Set the futex exit state of the task lockless. The futex waiter code
++ * observes that state when a task is exiting and loops until the task has
++ * actually finished the futex cleanup. The worst case for this is that the
++ * waiter runs through the wait loop until the state becomes visible.
++ *
++ * This has two callers:
++ *
++ * - futex_mm_release() after the futex exit cleanup has been done
++ *
++ * - do_exit() from the recursive fault handling path.
++ *
++ * In case of a recursive fault this is best effort. Either the futex exit
++ * code has run already or not. If the OWNER_DIED bit has been set on the
++ * futex then the waiter can take it over. If not, the problem is pushed
++ * back to user space. If the futex exit code did not run yet, then an
++ * already queued waiter might block forever, but there is nothing which
++ * can be done about that.
++ */
++static inline void futex_exit_done(struct task_struct *tsk)
++{
++      tsk->futex_state = FUTEX_STATE_DEAD;
+ }
+ void futex_mm_release(struct task_struct *tsk);
+@@ -72,5 +105,6 @@ long do_futex(u32 __user *uaddr, int op,
+ #else
+ static inline void futex_init_task(struct task_struct *tsk) { }
+ static inline void futex_mm_release(struct task_struct *tsk) { }
++static inline void futex_exit_done(struct task_struct *tsk) { }
+ #endif
+ #endif
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1704,6 +1704,7 @@ struct task_struct {
+ #endif
+       struct list_head pi_state_list;
+       struct futex_pi_state *pi_state_cache;
++      unsigned int futex_state;
+ #endif
+ #ifdef CONFIG_PERF_EVENTS
+       struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
+@@ -2099,7 +2100,6 @@ extern void thread_group_cputime_adjuste
+  * Per process flags
+  */
+ #define PF_EXITING    0x00000004      /* getting shut down */
+-#define PF_EXITPIDONE 0x00000008      /* pi exit done on shut down */
+ #define PF_VCPU               0x00000010      /* I'm a virtual CPU */
+ #define PF_WQ_WORKER  0x00000020      /* I'm a workqueue worker */
+ #define PF_FORKNOEXEC 0x00000040      /* forked but didn't exec */
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -695,16 +695,7 @@ void do_exit(long code)
+        */
+       if (unlikely(tsk->flags & PF_EXITING)) {
+               pr_alert("Fixing recursive fault but reboot is needed!\n");
+-              /*
+-               * We can do this unlocked here. The futex code uses
+-               * this flag just to verify whether the pi state
+-               * cleanup has been done or not. In the worst case it
+-               * loops once more. We pretend that the cleanup was
+-               * done as there is no way to return. Either the
+-               * OWNER_DIED bit is set by now or we push the blocked
+-               * task into the wait for ever nirwana as well.
+-               */
+-              tsk->flags |= PF_EXITPIDONE;
++              futex_exit_done(tsk);
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule();
+       }
+@@ -793,12 +784,7 @@ void do_exit(long code)
+        * Make sure we are holding no locks:
+        */
+       debug_check_no_locks_held();
+-      /*
+-       * We can do this unlocked here. The futex code uses this flag
+-       * just to verify whether the pi state cleanup has been done
+-       * or not. In the worst case it loops once more.
+-       */
+-      tsk->flags |= PF_EXITPIDONE;
++      futex_exit_done(tsk);
+       if (tsk->io_context)
+               exit_io_context(tsk);
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1094,19 +1094,18 @@ static int attach_to_pi_owner(u32 uval,
+       }
+       /*
+-       * We need to look at the task state flags to figure out,
+-       * whether the task is exiting. To protect against the do_exit
+-       * change of the task flags, we do this protected by
+-       * p->pi_lock:
++       * We need to look at the task state to figure out, whether the
++       * task is exiting. To protect against the change of the task state
++       * in futex_exit_release(), we do this protected by p->pi_lock:
+        */
+       raw_spin_lock_irq(&p->pi_lock);
+-      if (unlikely(p->flags & PF_EXITING)) {
++      if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
+               /*
+-               * The task is on the way out. When PF_EXITPIDONE is
+-               * set, we know that the task has finished the
+-               * cleanup:
++               * The task is on the way out. When the futex state is
++               * FUTEX_STATE_DEAD, we know that the task has finished
++               * the cleanup:
+                */
+-              int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
++              int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
+               raw_spin_unlock_irq(&p->pi_lock);
+               put_task_struct(p);
diff --git a/queue-4.4/futex-sanitize-exit-state-handling.patch b/queue-4.4/futex-sanitize-exit-state-handling.patch
new file mode 100644 (file)
index 0000000..8c8b42d
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:10 +0000
+Subject: futex: Sanitize exit state handling
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-9-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 4a8e991b91aca9e20705d434677ac013974e0e30 upstream.
+
+Instead of having a smp_mb() and an empty lock/unlock of task::pi_lock move
+the state setting into to the lock section.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.645603214@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c |   17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3276,16 +3276,19 @@ void futex_exit_recursive(struct task_st
+ void futex_exit_release(struct task_struct *tsk)
+ {
+-      tsk->futex_state = FUTEX_STATE_EXITING;
+-      /*
+-       * Ensure that all new tsk->pi_lock acquisitions must observe
+-       * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner().
+-       */
+-      smp_mb();
+       /*
+-       * Ensure that we must observe the pi_state in exit_pi_state_list().
++       * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
++       *
++       * This ensures that all subsequent checks of tsk->futex_state in
++       * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
++       * tsk->pi_lock held.
++       *
++       * It guarantees also that a pi_state which was queued right before
++       * the state change under tsk->pi_lock by a concurrent waiter must
++       * be observed in exit_pi_state_list().
+        */
+       raw_spin_lock_irq(&tsk->pi_lock);
++      tsk->futex_state = FUTEX_STATE_EXITING;
+       raw_spin_unlock_irq(&tsk->pi_lock);
+       futex_exec_release(tsk);
diff --git a/queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch b/queue-4.4/futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch
new file mode 100644 (file)
index 0000000..cb37cbd
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:08 +0000
+Subject: futex: Set task::futex_state to DEAD right after handling futex exit
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-7-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f24f22435dcc11389acc87e5586239c1819d217c upstream.
+
+Setting task::futex_state in do_exit() is rather arbitrarily placed for no
+reason. Move it into the futex code.
+
+Note, this is only done for the exit cleanup as the exec cleanup cannot set
+the state to FUTEX_STATE_DEAD because the task struct is still in active
+use.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.439511191@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/exit.c  |    1 -
+ kernel/futex.c |    1 +
+ 2 files changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -784,7 +784,6 @@ void do_exit(long code)
+        * Make sure we are holding no locks:
+        */
+       debug_check_no_locks_held();
+-      futex_exit_done(tsk);
+       if (tsk->io_context)
+               exit_io_context(tsk);
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3255,6 +3255,7 @@ void futex_exec_release(struct task_stru
+ void futex_exit_release(struct task_struct *tsk)
+ {
+       futex_exec_release(tsk);
++      futex_exit_done(tsk);
+ }
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
diff --git a/queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch b/queue-4.4/futex-split-futex_mm_release-for-exit-exec.patch
new file mode 100644 (file)
index 0000000..bb6b2f3
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:07 +0000
+Subject: futex: Split futex_mm_release() for exit/exec
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-6-lee.jones@linaro.org>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 150d71584b12809144b8145b817e83b81158ae5f upstream.
+
+To allow separate handling of the futex exit state in the futex exit code
+for exit and exec, split futex_mm_release() into two functions and invoke
+them from the corresponding exit/exec_mm_release() callsites.
+
+Preparatory only, no functional change.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20191106224556.332094221@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/futex.h |    6 ++++--
+ kernel/fork.c         |    5 ++---
+ kernel/futex.c        |    7 ++++++-
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -98,13 +98,15 @@ static inline void futex_exit_done(struc
+       tsk->futex_state = FUTEX_STATE_DEAD;
+ }
+-void futex_mm_release(struct task_struct *tsk);
++void futex_exit_release(struct task_struct *tsk);
++void futex_exec_release(struct task_struct *tsk);
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+             u32 __user *uaddr2, u32 val2, u32 val3);
+ #else
+ static inline void futex_init_task(struct task_struct *tsk) { }
+-static inline void futex_mm_release(struct task_struct *tsk) { }
+ static inline void futex_exit_done(struct task_struct *tsk) { }
++static inline void futex_exit_release(struct task_struct *tsk) { }
++static inline void futex_exec_release(struct task_struct *tsk) { }
+ #endif
+ #endif
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -889,9 +889,6 @@ static int wait_for_vfork_done(struct ta
+  */
+ static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ {
+-      /* Get rid of any futexes when releasing the mm */
+-      futex_mm_release(tsk);
+-
+       uprobe_free_utask(tsk);
+       /* Get rid of any cached register state */
+@@ -926,11 +923,13 @@ static void mm_release(struct task_struc
+ void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ {
++      futex_exit_release(tsk);
+       mm_release(tsk, mm);
+ }
+ void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ {
++      futex_exec_release(tsk);
+       mm_release(tsk, mm);
+ }
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3234,7 +3234,7 @@ static void exit_robust_list(struct task
+                                  curr, pip);
+ }
+-void futex_mm_release(struct task_struct *tsk)
++void futex_exec_release(struct task_struct *tsk)
+ {
+       if (unlikely(tsk->robust_list)) {
+               exit_robust_list(tsk);
+@@ -3252,6 +3252,11 @@ void futex_mm_release(struct task_struct
+               exit_pi_state_list(tsk);
+ }
++void futex_exit_release(struct task_struct *tsk)
++{
++      futex_exec_release(tsk);
++}
++
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+               u32 __user *uaddr2, u32 val2, u32 val3)
+ {
index 61f871655889731ba4a3a42d8a49de47aaa33c04..0a4d0bae01938bf7577317f8e45c261339de490f 100644 (file)
@@ -4,3 +4,15 @@ net-usb-qmi_wwan-added-support-for-thales-cinterion-plsx3-modem-family.patch
 kvm-x86-pmu-fix-hw_ref_cpu_cycles-event-pseudo-encoding-in-intel_arch_events.patch
 mt7601u-fix-kernel-crash-unplugging-the-device.patch
 mt7601u-fix-rx-buffer-refcounting.patch
+y2038-futex-move-compat-implementation-into-futex.c.patch
+futex-move-futex-exit-handling-into-futex-code.patch
+futex-replace-pf_exitpidone-with-a-state.patch
+exit-exec-seperate-mm_release.patch
+futex-split-futex_mm_release-for-exit-exec.patch
+futex-set-task-futex_state-to-dead-right-after-handling-futex-exit.patch
+futex-mark-the-begin-of-futex-exit-explicitly.patch
+futex-sanitize-exit-state-handling.patch
+futex-provide-state-handling-for-exec-as-well.patch
+futex-add-mutex-around-futex-exit.patch
+futex-provide-distinct-return-value-when-owner-is-exiting.patch
+futex-prevent-exit-livelock.patch
diff --git a/queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch b/queue-4.4/y2038-futex-move-compat-implementation-into-futex.c.patch
new file mode 100644 (file)
index 0000000..4b1d62c
--- /dev/null
@@ -0,0 +1,506 @@
+From foo@baz Mon Feb  1 04:21:37 PM CET 2021
+From: Lee Jones <lee.jones@linaro.org>
+Date: Mon,  1 Feb 2021 15:12:03 +0000
+Subject: y2038: futex: Move compat implementation into futex.c
+To: stable@vger.kernel.org
+Cc: Arnd Bergmann <arnd@arndb.de>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
+Message-ID: <20210201151214.2193508-2-lee.jones@linaro.org>
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 04e7712f4460585e5eed5b853fd8b82a9943958f upstream.
+
+We are going to share the compat_sys_futex() handler between 64-bit
+architectures and 32-bit architectures that need to deal with both 32-bit
+and 64-bit time_t, and this is easier if both entry points are in the
+same file.
+
+In fact, most other system call handlers do the same thing these days, so
+let's follow the trend here and merge all of futex_compat.c into futex.c.
+
+In the process, a few minor changes have to be done to make sure everything
+still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become
+local symbol, and the compat version of the fetch_robust_entry() function
+gets renamed to compat_fetch_robust_entry() to avoid a symbol clash.
+
+This is intended as a purely cosmetic patch, no behavior should
+change.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[Lee: Back-ported to satisfy a build dependency]
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/futex.h |    8 -
+ kernel/Makefile       |    3 
+ kernel/futex.c        |  195 +++++++++++++++++++++++++++++++++++++++++++++++-
+ kernel/futex_compat.c |  201 --------------------------------------------------
+ 4 files changed, 192 insertions(+), 215 deletions(-)
+ delete mode 100644 kernel/futex_compat.c
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -11,9 +11,6 @@ union ktime;
+ long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
+             u32 __user *uaddr2, u32 val2, u32 val3);
+-extern int
+-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
+-
+ /*
+  * Futexes are matched on equal values of this key.
+  * The key type depends on whether it's a shared or private mapping.
+@@ -58,11 +55,6 @@ union futex_key {
+ #ifdef CONFIG_FUTEX
+ extern void exit_robust_list(struct task_struct *curr);
+ extern void exit_pi_state_list(struct task_struct *curr);
+-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+-#define futex_cmpxchg_enabled 1
+-#else
+-extern int futex_cmpxchg_enabled;
+-#endif
+ #else
+ static inline void exit_robust_list(struct task_struct *curr)
+ {
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -36,9 +36,6 @@ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
+ obj-$(CONFIG_FUTEX) += futex.o
+-ifeq ($(CONFIG_COMPAT),y)
+-obj-$(CONFIG_FUTEX) += futex_compat.o
+-endif
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += smp.o
+ ifneq ($(CONFIG_SMP),y)
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -44,6 +44,7 @@
+  *  along with this program; if not, write to the Free Software
+  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+  */
++#include <linux/compat.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/fs.h>
+@@ -171,8 +172,10 @@
+  * double_lock_hb() and double_unlock_hb(), respectively.
+  */
+-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+-int __read_mostly futex_cmpxchg_enabled;
++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
++#define futex_cmpxchg_enabled 1
++#else
++static int  __read_mostly futex_cmpxchg_enabled;
+ #endif
+ /*
+@@ -3088,7 +3091,7 @@ err_unlock:
+  * Process a futex-list entry, check whether it's owned by the
+  * dying task, and do notification if so:
+  */
+-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+ {
+       u32 uval, uninitialized_var(nval), mval;
+@@ -3318,6 +3321,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad
+       return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ }
++#ifdef CONFIG_COMPAT
++/*
++ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
++ */
++static inline int
++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
++                 compat_uptr_t __user *head, unsigned int *pi)
++{
++      if (get_user(*uentry, head))
++              return -EFAULT;
++
++      *entry = compat_ptr((*uentry) & ~1);
++      *pi = (unsigned int)(*uentry) & 1;
++
++      return 0;
++}
++
++static void __user *futex_uaddr(struct robust_list __user *entry,
++                              compat_long_t futex_offset)
++{
++      compat_uptr_t base = ptr_to_compat(entry);
++      void __user *uaddr = compat_ptr(base + futex_offset);
++
++      return uaddr;
++}
++
++/*
++ * Walk curr->robust_list (very carefully, it's a userspace list!)
++ * and mark any locks found there dead, and notify any waiters.
++ *
++ * We silently return on any sign of list-walking problem.
++ */
++void compat_exit_robust_list(struct task_struct *curr)
++{
++      struct compat_robust_list_head __user *head = curr->compat_robust_list;
++      struct robust_list __user *entry, *next_entry, *pending;
++      unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
++      unsigned int uninitialized_var(next_pi);
++      compat_uptr_t uentry, next_uentry, upending;
++      compat_long_t futex_offset;
++      int rc;
++
++      if (!futex_cmpxchg_enabled)
++              return;
++
++      /*
++       * Fetch the list head (which was registered earlier, via
++       * sys_set_robust_list()):
++       */
++      if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
++              return;
++      /*
++       * Fetch the relative futex offset:
++       */
++      if (get_user(futex_offset, &head->futex_offset))
++              return;
++      /*
++       * Fetch any possibly pending lock-add first, and handle it
++       * if it exists:
++       */
++      if (compat_fetch_robust_entry(&upending, &pending,
++                             &head->list_op_pending, &pip))
++              return;
++
++      next_entry = NULL;      /* avoid warning with gcc */
++      while (entry != (struct robust_list __user *) &head->list) {
++              /*
++               * Fetch the next entry in the list before calling
++               * handle_futex_death:
++               */
++              rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
++                      (compat_uptr_t __user *)&entry->next, &next_pi);
++              /*
++               * A pending lock might already be on the list, so
++               * dont process it twice:
++               */
++              if (entry != pending) {
++                      void __user *uaddr = futex_uaddr(entry, futex_offset);
++
++                      if (handle_futex_death(uaddr, curr, pi))
++                              return;
++              }
++              if (rc)
++                      return;
++              uentry = next_uentry;
++              entry = next_entry;
++              pi = next_pi;
++              /*
++               * Avoid excessively long or circular lists:
++               */
++              if (!--limit)
++                      break;
++
++              cond_resched();
++      }
++      if (pending) {
++              void __user *uaddr = futex_uaddr(pending, futex_offset);
++
++              handle_futex_death(uaddr, curr, pip);
++      }
++}
++
++COMPAT_SYSCALL_DEFINE2(set_robust_list,
++              struct compat_robust_list_head __user *, head,
++              compat_size_t, len)
++{
++      if (!futex_cmpxchg_enabled)
++              return -ENOSYS;
++
++      if (unlikely(len != sizeof(*head)))
++              return -EINVAL;
++
++      current->compat_robust_list = head;
++
++      return 0;
++}
++
++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
++                      compat_uptr_t __user *, head_ptr,
++                      compat_size_t __user *, len_ptr)
++{
++      struct compat_robust_list_head __user *head;
++      unsigned long ret;
++      struct task_struct *p;
++
++      if (!futex_cmpxchg_enabled)
++              return -ENOSYS;
++
++      rcu_read_lock();
++
++      ret = -ESRCH;
++      if (!pid)
++              p = current;
++      else {
++              p = find_task_by_vpid(pid);
++              if (!p)
++                      goto err_unlock;
++      }
++
++      ret = -EPERM;
++      if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
++              goto err_unlock;
++
++      head = p->compat_robust_list;
++      rcu_read_unlock();
++
++      if (put_user(sizeof(*head), len_ptr))
++              return -EFAULT;
++      return put_user(ptr_to_compat(head), head_ptr);
++
++err_unlock:
++      rcu_read_unlock();
++
++      return ret;
++}
++
++COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
++              struct compat_timespec __user *, utime, u32 __user *, uaddr2,
++              u32, val3)
++{
++      struct timespec ts;
++      ktime_t t, *tp = NULL;
++      int val2 = 0;
++      int cmd = op & FUTEX_CMD_MASK;
++
++      if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
++                    cmd == FUTEX_WAIT_BITSET ||
++                    cmd == FUTEX_WAIT_REQUEUE_PI)) {
++              if (compat_get_timespec(&ts, utime))
++                      return -EFAULT;
++              if (!timespec_valid(&ts))
++                      return -EINVAL;
++
++              t = timespec_to_ktime(ts);
++              if (cmd == FUTEX_WAIT)
++                      t = ktime_add_safe(ktime_get(), t);
++              tp = &t;
++      }
++      if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
++          cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
++              val2 = (int) (unsigned long) utime;
++
++      return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
++}
++#endif /* CONFIG_COMPAT */
++
+ static void __init futex_detect_cmpxchg(void)
+ {
+ #ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+--- a/kernel/futex_compat.c
++++ /dev/null
+@@ -1,201 +0,0 @@
+-/*
+- * linux/kernel/futex_compat.c
+- *
+- * Futex compatibililty routines.
+- *
+- * Copyright 2006, Red Hat, Inc., Ingo Molnar
+- */
+-
+-#include <linux/linkage.h>
+-#include <linux/compat.h>
+-#include <linux/nsproxy.h>
+-#include <linux/futex.h>
+-#include <linux/ptrace.h>
+-#include <linux/syscalls.h>
+-
+-#include <asm/uaccess.h>
+-
+-
+-/*
+- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+- */
+-static inline int
+-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+-                 compat_uptr_t __user *head, unsigned int *pi)
+-{
+-      if (get_user(*uentry, head))
+-              return -EFAULT;
+-
+-      *entry = compat_ptr((*uentry) & ~1);
+-      *pi = (unsigned int)(*uentry) & 1;
+-
+-      return 0;
+-}
+-
+-static void __user *futex_uaddr(struct robust_list __user *entry,
+-                              compat_long_t futex_offset)
+-{
+-      compat_uptr_t base = ptr_to_compat(entry);
+-      void __user *uaddr = compat_ptr(base + futex_offset);
+-
+-      return uaddr;
+-}
+-
+-/*
+- * Walk curr->robust_list (very carefully, it's a userspace list!)
+- * and mark any locks found there dead, and notify any waiters.
+- *
+- * We silently return on any sign of list-walking problem.
+- */
+-void compat_exit_robust_list(struct task_struct *curr)
+-{
+-      struct compat_robust_list_head __user *head = curr->compat_robust_list;
+-      struct robust_list __user *entry, *next_entry, *pending;
+-      unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+-      unsigned int uninitialized_var(next_pi);
+-      compat_uptr_t uentry, next_uentry, upending;
+-      compat_long_t futex_offset;
+-      int rc;
+-
+-      if (!futex_cmpxchg_enabled)
+-              return;
+-
+-      /*
+-       * Fetch the list head (which was registered earlier, via
+-       * sys_set_robust_list()):
+-       */
+-      if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+-              return;
+-      /*
+-       * Fetch the relative futex offset:
+-       */
+-      if (get_user(futex_offset, &head->futex_offset))
+-              return;
+-      /*
+-       * Fetch any possibly pending lock-add first, and handle it
+-       * if it exists:
+-       */
+-      if (fetch_robust_entry(&upending, &pending,
+-                             &head->list_op_pending, &pip))
+-              return;
+-
+-      next_entry = NULL;      /* avoid warning with gcc */
+-      while (entry != (struct robust_list __user *) &head->list) {
+-              /*
+-               * Fetch the next entry in the list before calling
+-               * handle_futex_death:
+-               */
+-              rc = fetch_robust_entry(&next_uentry, &next_entry,
+-                      (compat_uptr_t __user *)&entry->next, &next_pi);
+-              /*
+-               * A pending lock might already be on the list, so
+-               * dont process it twice:
+-               */
+-              if (entry != pending) {
+-                      void __user *uaddr = futex_uaddr(entry, futex_offset);
+-
+-                      if (handle_futex_death(uaddr, curr, pi))
+-                              return;
+-              }
+-              if (rc)
+-                      return;
+-              uentry = next_uentry;
+-              entry = next_entry;
+-              pi = next_pi;
+-              /*
+-               * Avoid excessively long or circular lists:
+-               */
+-              if (!--limit)
+-                      break;
+-
+-              cond_resched();
+-      }
+-      if (pending) {
+-              void __user *uaddr = futex_uaddr(pending, futex_offset);
+-
+-              handle_futex_death(uaddr, curr, pip);
+-      }
+-}
+-
+-COMPAT_SYSCALL_DEFINE2(set_robust_list,
+-              struct compat_robust_list_head __user *, head,
+-              compat_size_t, len)
+-{
+-      if (!futex_cmpxchg_enabled)
+-              return -ENOSYS;
+-
+-      if (unlikely(len != sizeof(*head)))
+-              return -EINVAL;
+-
+-      current->compat_robust_list = head;
+-
+-      return 0;
+-}
+-
+-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+-                      compat_uptr_t __user *, head_ptr,
+-                      compat_size_t __user *, len_ptr)
+-{
+-      struct compat_robust_list_head __user *head;
+-      unsigned long ret;
+-      struct task_struct *p;
+-
+-      if (!futex_cmpxchg_enabled)
+-              return -ENOSYS;
+-
+-      rcu_read_lock();
+-
+-      ret = -ESRCH;
+-      if (!pid)
+-              p = current;
+-      else {
+-              p = find_task_by_vpid(pid);
+-              if (!p)
+-                      goto err_unlock;
+-      }
+-
+-      ret = -EPERM;
+-      if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+-              goto err_unlock;
+-
+-      head = p->compat_robust_list;
+-      rcu_read_unlock();
+-
+-      if (put_user(sizeof(*head), len_ptr))
+-              return -EFAULT;
+-      return put_user(ptr_to_compat(head), head_ptr);
+-
+-err_unlock:
+-      rcu_read_unlock();
+-
+-      return ret;
+-}
+-
+-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+-              struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+-              u32, val3)
+-{
+-      struct timespec ts;
+-      ktime_t t, *tp = NULL;
+-      int val2 = 0;
+-      int cmd = op & FUTEX_CMD_MASK;
+-
+-      if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+-                    cmd == FUTEX_WAIT_BITSET ||
+-                    cmd == FUTEX_WAIT_REQUEUE_PI)) {
+-              if (compat_get_timespec(&ts, utime))
+-                      return -EFAULT;
+-              if (!timespec_valid(&ts))
+-                      return -EINVAL;
+-
+-              t = timespec_to_ktime(ts);
+-              if (cmd == FUTEX_WAIT)
+-                      t = ktime_add_safe(ktime_get(), t);
+-              tp = &t;
+-      }
+-      if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+-          cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+-              val2 = (int) (unsigned long) utime;
+-
+-      return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+-}