+++ /dev/null
-From 3ef240eaff36b8119ac9e2ea17cbf41179c930ba Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 6 Nov 2019 22:55:46 +0100
-Subject: futex: Prevent exit livelock
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream.
-
-Oleg provided the following test case:
-
-int main(void)
-{
- struct sched_param sp = {};
-
- sp.sched_priority = 2;
- assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
-
- int lock = vfork();
- if (!lock) {
- sp.sched_priority = 1;
- assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
- _exit(0);
- }
-
- syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0);
- return 0;
-}
-
-This creates an unkillable RT process spinning in futex_lock_pi() on a UP
-machine or if the process is affine to a single CPU. The reason is:
-
- parent child
-
- set FIFO prio 2
-
- vfork() -> set FIFO prio 1
- implies wait_for_child() sched_setscheduler(...)
- exit()
- do_exit()
- ....
- mm_release()
- tsk->futex_state = FUTEX_STATE_EXITING;
- exit_futex(); (NOOP in this case)
- complete() --> wakes parent
- sys_futex()
- loop infinite because
- tsk->futex_state == FUTEX_STATE_EXITING
-
-The same problem can happen just by regular preemption as well:
-
- task holds futex
- ...
- do_exit()
- tsk->futex_state = FUTEX_STATE_EXITING;
-
- --> preemption (unrelated wakeup of some other higher prio task, e.g. timer)
-
- switch_to(other_task)
-
- return to user
- sys_futex()
- loop infinite as above
-
-Just for the fun of it the futex exit cleanup could trigger the wakeup
-itself before the task sets its futex state to DEAD.
-
-To cure this, the handling of the exiting owner is changed so:
-
- - A refcount is held on the task
-
- - The task pointer is stored in a caller visible location
-
- - The caller drops all locks (hash bucket, mmap_sem) and blocks
- on task::futex_exit_mutex. When the mutex is acquired then
- the exiting task has completed the cleanup and the state
- is consistent and can be reevaluated.
-
-This is not a pretty solution, but there is no choice other than returning
-an error code to user space, which would break the state consistency
-guarantee and open another can of problems including regressions.
-
-For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538
-are required as well, but for anything older than 5.3.y the backports are
-going to be provided when this hits mainline as the other dependencies for
-those kernels are definitely not stable material.
-
-Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems")
-Reported-by: Oleg Nesterov <oleg@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Cc: Stable Team <stable@vger.kernel.org>
-Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++---------
- 1 file changed, 91 insertions(+), 15 deletions(-)
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -1148,6 +1148,36 @@ out_error:
- return ret;
- }
-
-+/**
-+ * wait_for_owner_exiting - Block until the owner has exited
-+ * @exiting: Pointer to the exiting task
-+ *
-+ * Caller must hold a refcount on @exiting.
-+ */
-+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
-+{
-+ if (ret != -EBUSY) {
-+ WARN_ON_ONCE(exiting);
-+ return;
-+ }
-+
-+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-+ return;
-+
-+ mutex_lock(&exiting->futex_exit_mutex);
-+ /*
-+ * No point in doing state checking here. If the waiter got here
-+ * while the task was in exec()->exec_futex_release() then it can
-+ * have any FUTEX_STATE_* value when the waiter has acquired the
-+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
-+ * already. Highly unlikely and not a problem. Just one more round
-+ * through the futex maze.
-+ */
-+ mutex_unlock(&exiting->futex_exit_mutex);
-+
-+ put_task_struct(exiting);
-+}
-+
- static int handle_exit_race(u32 __user *uaddr, u32 uval,
- struct task_struct *tsk)
- {
-@@ -1207,7 +1237,8 @@ static int handle_exit_race(u32 __user *
- * it after doing proper sanity checks.
- */
- static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-- struct futex_pi_state **ps)
-+ struct futex_pi_state **ps,
-+ struct task_struct **exiting)
- {
- pid_t pid = uval & FUTEX_TID_MASK;
- struct futex_pi_state *pi_state;
-@@ -1247,7 +1278,19 @@ static int attach_to_pi_owner(u32 __user
- int ret = handle_exit_race(uaddr, uval, p);
-
- raw_spin_unlock_irq(&p->pi_lock);
-- put_task_struct(p);
-+ /*
-+ * If the owner task is between FUTEX_STATE_EXITING and
-+ * FUTEX_STATE_DEAD then store the task pointer and keep
-+ * the reference on the task struct. The calling code will
-+ * drop all locks, wait for the task to reach
-+ * FUTEX_STATE_DEAD and then drop the refcount. This is
-+ * required to prevent a live lock when the current task
-+ * preempted the exiting task between the two states.
-+ */
-+ if (ret == -EBUSY)
-+ *exiting = p;
-+ else
-+ put_task_struct(p);
- return ret;
- }
-
-@@ -1286,7 +1329,8 @@ static int attach_to_pi_owner(u32 __user
-
- static int lookup_pi_state(u32 __user *uaddr, u32 uval,
- struct futex_hash_bucket *hb,
-- union futex_key *key, struct futex_pi_state **ps)
-+ union futex_key *key, struct futex_pi_state **ps,
-+ struct task_struct **exiting)
- {
- struct futex_q *top_waiter = futex_top_waiter(hb, key);
-
-@@ -1301,7 +1345,7 @@ static int lookup_pi_state(u32 __user *u
- * We are the first waiter - try to look up the owner based on
- * @uval and attach to it.
- */
-- return attach_to_pi_owner(uaddr, uval, key, ps);
-+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
- }
-
- static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
-@@ -1329,6 +1373,8 @@ static int lock_pi_update_atomic(u32 __u
- * lookup
- * @task: the task to perform the atomic lock work for. This will
- * be "current" except in the case of requeue pi.
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
- *
- * Return:
-@@ -1337,11 +1383,17 @@ static int lock_pi_update_atomic(u32 __u
- * - <0 - error
- *
- * The hb->lock and futex_key refs shall be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
- */
- static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
- union futex_key *key,
- struct futex_pi_state **ps,
-- struct task_struct *task, int set_waiters)
-+ struct task_struct *task,
-+ struct task_struct **exiting,
-+ int set_waiters)
- {
- u32 uval, newval, vpid = task_pid_vnr(task);
- struct futex_q *top_waiter;
-@@ -1411,7 +1463,7 @@ static int futex_lock_pi_atomic(u32 __us
- * attach to the owner. If that fails, no harm done, we only
- * set the FUTEX_WAITERS bit in the user space variable.
- */
-- return attach_to_pi_owner(uaddr, newval, key, ps);
-+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
- }
-
- /**
-@@ -1830,6 +1882,8 @@ void requeue_pi_wake_futex(struct futex_
- * @key1: the from futex key
- * @key2: the to futex key
- * @ps: address to store the pi_state pointer
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
- *
- * Try and get the lock on behalf of the top waiter if we can do it atomically.
-@@ -1837,16 +1891,20 @@ void requeue_pi_wake_futex(struct futex_
- * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
- * hb1 and hb2 must be held by the caller.
- *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ *
- * Return:
- * - 0 - failed to acquire the lock atomically;
- * - >0 - acquired the lock, return value is vpid of the top_waiter
- * - <0 - error
- */
--static int futex_proxy_trylock_atomic(u32 __user *pifutex,
-- struct futex_hash_bucket *hb1,
-- struct futex_hash_bucket *hb2,
-- union futex_key *key1, union futex_key *key2,
-- struct futex_pi_state **ps, int set_waiters)
-+static int
-+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-+ struct futex_hash_bucket *hb2, union futex_key *key1,
-+ union futex_key *key2, struct futex_pi_state **ps,
-+ struct task_struct **exiting, int set_waiters)
- {
- struct futex_q *top_waiter = NULL;
- u32 curval;
-@@ -1883,7 +1941,7 @@ static int futex_proxy_trylock_atomic(u3
- */
- vpid = task_pid_vnr(top_waiter->task);
- ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-- set_waiters);
-+ exiting, set_waiters);
- if (ret == 1) {
- requeue_pi_wake_futex(top_waiter, key2, hb2);
- return vpid;
-@@ -2012,6 +2070,8 @@ retry_private:
- }
-
- if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
-+ struct task_struct *exiting = NULL;
-+
- /*
- * Attempt to acquire uaddr2 and wake the top waiter. If we
- * intend to requeue waiters, force setting the FUTEX_WAITERS
-@@ -2019,7 +2079,8 @@ retry_private:
- * faults rather in the requeue loop below.
- */
- ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-- &key2, &pi_state, nr_requeue);
-+ &key2, &pi_state,
-+ &exiting, nr_requeue);
-
- /*
- * At this point the top_waiter has either taken uaddr2 or is
-@@ -2046,7 +2107,8 @@ retry_private:
- * If that call succeeds then we have pi_state and an
- * initial refcount on it.
- */
-- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
-+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
-+ &pi_state, &exiting);
- }
-
- switch (ret) {
-@@ -2075,6 +2137,12 @@ retry_private:
- hb_waiters_dec(hb2);
- put_futex_key(&key2);
- put_futex_key(&key1);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
- cond_resched();
- goto retry;
- default:
-@@ -2790,6 +2858,7 @@ static int futex_lock_pi(u32 __user *uad
- {
- struct hrtimer_sleeper timeout, *to = NULL;
- struct futex_pi_state *pi_state = NULL;
-+ struct task_struct *exiting = NULL;
- struct rt_mutex_waiter rt_waiter;
- struct futex_hash_bucket *hb;
- struct futex_q q = futex_q_init;
-@@ -2817,7 +2886,8 @@ retry:
- retry_private:
- hb = queue_lock(&q);
-
-- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
-+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-+ &exiting, 0);
- if (unlikely(ret)) {
- /*
- * Atomic work succeeded and we got the lock,
-@@ -2839,6 +2909,12 @@ retry_private:
- */
- queue_unlock(hb);
- put_futex_key(&q.key);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
- cond_resched();
- goto retry;
- default:
--- /dev/null
+From ca16d5bee59807bf04deaab0a8eccecd5061528c Mon Sep 17 00:00:00 2001
+From: Yang Tao <yang.tao172@zte.com.cn>
+Date: Wed, 6 Nov 2019 22:55:35 +0100
+Subject: futex: Prevent robust futex exit race
+
+From: Yang Tao <yang.tao172@zte.com.cn>
+
+commit ca16d5bee59807bf04deaab0a8eccecd5061528c upstream.
+
+Robust futexes utilize the robust_list mechanism to allow the kernel to
+release futexes which are held when a task exits. The exit can be voluntary
+or caused by a signal or fault. This prevents that waiters block forever.
+
+The futex operations in user space store a pointer to the futex they are
+either locking or unlocking in the op_pending member of the per task robust
+list.
+
+After a lock operation has succeeded the futex is queued in the robust list
+linked list and the op_pending pointer is cleared.
+
+After an unlock operation has succeeded the futex is removed from the
+robust list linked list and the op_pending pointer is cleared.
+
+The robust list exit code checks for the pending operation and any futex
+which is queued in the linked list. It carefully checks whether the futex
+value is the TID of the exiting task. If so, it sets the OWNER_DIED bit and
+tries to wake up a potential waiter.
+
+This is race free for the lock operation but unlock has two race scenarios
+where waiters might not be woken up. These issues can be observed with
+regular robust pthread mutexes. PI aware pthread mutexes are not affected.
+
+(1) Unlocking task is killed after unlocking the futex value in user space
+ before being able to wake a waiter.
+
+ pthread_mutex_unlock()
+ |
+ V
+ atomic_exchange_rel (&mutex->__data.__lock, 0)
+ <------------------------killed
+ lll_futex_wake () |
+ |
+ |(__lock = 0)
+ |(enter kernel)
+ |
+ V
+ do_exit()
+ exit_mm()
+ mm_release()
+ exit_robust_list()
+ handle_futex_death()
+ |
+ |(__lock = 0)
+ |(uval = 0)
+ |
+ V
+ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+ return 0;
+
+ The sanity check which ensures that the user space futex is owned by
+ the exiting task prevents the wakeup of waiters which in consequence
+ block infinitely.
+
+(2) Waiting task is killed after a wakeup and before it can acquire the
+ futex in user space.
+
+ OWNER WAITER
+ futex_wait()
+ pthread_mutex_unlock() |
+ | |
+ |(__lock = 0) |
+ | |
+ V |
+ futex_wake() ------------> wakeup()
+ |
+ |(return to userspace)
+ |(__lock = 0)
+ |
+ V
+ oldval = mutex->__data.__lock
+ <-----------------killed
+ atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, |
+ id | assume_other_futex_waiters, 0) |
+ |
+ |
+ (enter kernel)|
+ |
+ V
+ do_exit()
+ |
+ |
+ V
+ handle_futex_death()
+ |
+ |(__lock = 0)
+ |(uval = 0)
+ |
+ V
+ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+ return 0;
+
+ The sanity check which ensures that the user space futex is owned
+ by the exiting task prevents the wakeup of waiters, which seems to
+ be correct as the exiting task does not own the futex value, but
+ the consequence is that other waiters wont be woken up and block
+ infinitely.
+
+In both scenarios the following conditions are true:
+
+ - task->robust_list->list_op_pending != NULL
+ - user space futex value == 0
+ - Regular futex (not PI)
+
+If these conditions are met then it is reasonably safe to wake up a
+potential waiter in order to prevent the above problems.
+
+As this might be a false positive it can cause spurious wakeups, but the
+waiter side has to handle other types of unrelated wakeups, e.g. signals
+gracefully anyway. So such a spurious wakeup will not affect the
+correctness of these operations.
+
+This workaround must not touch the user space futex value and cannot set
+the OWNER_DIED bit because the lock value is 0, i.e. uncontended. Setting
+OWNER_DIED in this case would result in inconsistent state and subsequently
+in malfunction of the owner died handling in user space.
+
+The rest of the user space state is still consistent as no other task can
+observe the list_op_pending entry in the exiting tasks robust list.
+
+The eventually woken up waiter will observe the uncontended lock value and
+take it over.
+
+[ tglx: Massaged changelog and comment. Made the return explicit and not
+ depend on the subsequent check and added constants to hand into
+ handle_futex_death() instead of plain numbers. Fixed a few coding
+ style issues. ]
+
+Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core")
+Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>
+Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1573010582-35297-1-git-send-email-wang.yi59@zte.com.cn
+Link: https://lkml.kernel.org/r/20191106224555.943191378@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 51 insertions(+), 7 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3457,11 +3457,16 @@ err_unlock:
+ return ret;
+ }
+
++/* Constants for the pending_op argument of handle_futex_death */
++#define HANDLE_DEATH_PENDING true
++#define HANDLE_DEATH_LIST false
++
+ /*
+ * Process a futex-list entry, check whether it's owned by the
+ * dying task, and do notification if so:
+ */
+-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
++ bool pi, bool pending_op)
+ {
+ u32 uval, uninitialized_var(nval), mval;
+ int err;
+@@ -3474,6 +3479,42 @@ retry:
+ if (get_user(uval, uaddr))
+ return -1;
+
++ /*
++ * Special case for regular (non PI) futexes. The unlock path in
++ * user space has two race scenarios:
++ *
++ * 1. The unlock path releases the user space futex value and
++ * before it can execute the futex() syscall to wake up
++ * waiters it is killed.
++ *
++ * 2. A woken up waiter is killed before it can acquire the
++ * futex in user space.
++ *
++ * In both cases the TID validation below prevents a wakeup of
++ * potential waiters which can cause these waiters to block
++ * forever.
++ *
++ * In both cases the following conditions are met:
++ *
++ * 1) task->robust_list->list_op_pending != NULL
++ * @pending_op == true
++ * 2) User space futex value == 0
++ * 3) Regular futex: @pi == false
++ *
++ * If these conditions are met, it is safe to attempt waking up a
++ * potential waiter without touching the user space futex value and
++ * trying to set the OWNER_DIED bit. The user space futex value is
++ * uncontended and the rest of the user space mutex state is
++ * consistent, so a woken waiter will just take over the
++ * uncontended futex. Setting the OWNER_DIED bit would create
++ * inconsistent state and malfunction of the user space owner died
++ * handling.
++ */
++ if (pending_op && !pi && !uval) {
++ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
++ return 0;
++ }
++
+ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+ return 0;
+
+@@ -3593,10 +3634,11 @@ void exit_robust_list(struct task_struct
+ * A pending lock might already be on the list, so
+ * don't process it twice:
+ */
+- if (entry != pending)
++ if (entry != pending) {
+ if (handle_futex_death((void __user *)entry + futex_offset,
+- curr, pi))
++ curr, pi, HANDLE_DEATH_LIST))
+ return;
++ }
+ if (rc)
+ return;
+ entry = next_entry;
+@@ -3610,9 +3652,10 @@ void exit_robust_list(struct task_struct
+ cond_resched();
+ }
+
+- if (pending)
++ if (pending) {
+ handle_futex_death((void __user *)pending + futex_offset,
+- curr, pip);
++ curr, pip, HANDLE_DEATH_PENDING);
++ }
+ }
+
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+@@ -3789,7 +3832,8 @@ void compat_exit_robust_list(struct task
+ if (entry != pending) {
+ void __user *uaddr = futex_uaddr(entry, futex_offset);
+
+- if (handle_futex_death(uaddr, curr, pi))
++ if (handle_futex_death(uaddr, curr, pi,
++ HANDLE_DEATH_LIST))
+ return;
+ }
+ if (rc)
+@@ -3808,7 +3852,7 @@ void compat_exit_robust_list(struct task
+ if (pending) {
+ void __user *uaddr = futex_uaddr(pending, futex_offset);
+
+- handle_futex_death(uaddr, curr, pip);
++ handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
+ }
+ }
+
selftests-x86-sigreturn-32-invalidate-ds-and-es-when-abusing-the-kernel.patch
x86-pti-32-calculate-the-various-pti-cpu_entry_area-sizes-correctly-make-the-cpu_entry_area_pages-assert-precise.patch
x86-entry-32-fix-fixup_espfix_stack-with-user-cr3.patch
-futex-prevent-exit-livelock.patch
+y2038-futex-move-compat-implementation-into-futex.c.patch
+futex-prevent-robust-futex-exit-race.patch
alsa-usb-audio-fix-null-dereference-at-parsing-badd.patch
nfc-port100-handle-command-failure-cleanly.patch
net-sysfs-fix-reference-count-leak-in-rx-netdev_queue_add_kobject.patch
--- /dev/null
+From 04e7712f4460585e5eed5b853fd8b82a9943958f Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Tue, 17 Apr 2018 16:31:07 +0200
+Subject: y2038: futex: Move compat implementation into futex.c
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 04e7712f4460585e5eed5b853fd8b82a9943958f upstream.
+
+We are going to share the compat_sys_futex() handler between 64-bit
+architectures and 32-bit architectures that need to deal with both 32-bit
+and 64-bit time_t, and this is easier if both entry points are in the
+same file.
+
+In fact, most other system call handlers do the same thing these days, so
+let's follow the trend here and merge all of futex_compat.c into futex.c.
+
+In the process, a few minor changes have to be done to make sure everything
+still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become
+local symbol, and the compat version of the fetch_robust_entry() function
+gets renamed to compat_fetch_robust_entry() to avoid a symbol clash.
+
+This is intended as a purely cosmetic patch, no behavior should
+change.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/futex.h | 8 -
+ kernel/Makefile | 3
+ kernel/futex.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++-
+ kernel/futex_compat.c | 202 --------------------------------------------------
+ 4 files changed, 192 insertions(+), 216 deletions(-)
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -9,9 +9,6 @@ struct inode;
+ struct mm_struct;
+ struct task_struct;
+
+-extern int
+-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
+-
+ /*
+ * Futexes are matched on equal values of this key.
+ * The key type depends on whether it's a shared or private mapping.
+@@ -55,11 +52,6 @@ extern void exit_robust_list(struct task
+
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+ u32 __user *uaddr2, u32 val2, u32 val3);
+-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+-#define futex_cmpxchg_enabled 1
+-#else
+-extern int futex_cmpxchg_enabled;
+-#endif
+ #else
+ static inline void exit_robust_list(struct task_struct *curr)
+ {
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -50,9 +50,6 @@ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
+ obj-$(CONFIG_FUTEX) += futex.o
+-ifeq ($(CONFIG_COMPAT),y)
+-obj-$(CONFIG_FUTEX) += futex_compat.o
+-endif
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += smp.o
+ ifneq ($(CONFIG_SMP),y)
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -44,6 +44,7 @@
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
++#include <linux/compat.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/fs.h>
+@@ -173,8 +174,10 @@
+ * double_lock_hb() and double_unlock_hb(), respectively.
+ */
+
+-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+-int __read_mostly futex_cmpxchg_enabled;
++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
++#define futex_cmpxchg_enabled 1
++#else
++static int __read_mostly futex_cmpxchg_enabled;
+ #endif
+
+ /*
+@@ -3458,7 +3461,7 @@ err_unlock:
+ * Process a futex-list entry, check whether it's owned by the
+ * dying task, and do notification if so:
+ */
+-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+ {
+ u32 uval, uninitialized_var(nval), mval;
+ int err;
+@@ -3707,6 +3710,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ }
+
++#ifdef CONFIG_COMPAT
++/*
++ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
++ */
++static inline int
++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
++ compat_uptr_t __user *head, unsigned int *pi)
++{
++ if (get_user(*uentry, head))
++ return -EFAULT;
++
++ *entry = compat_ptr((*uentry) & ~1);
++ *pi = (unsigned int)(*uentry) & 1;
++
++ return 0;
++}
++
++static void __user *futex_uaddr(struct robust_list __user *entry,
++ compat_long_t futex_offset)
++{
++ compat_uptr_t base = ptr_to_compat(entry);
++ void __user *uaddr = compat_ptr(base + futex_offset);
++
++ return uaddr;
++}
++
++/*
++ * Walk curr->robust_list (very carefully, it's a userspace list!)
++ * and mark any locks found there dead, and notify any waiters.
++ *
++ * We silently return on any sign of list-walking problem.
++ */
++void compat_exit_robust_list(struct task_struct *curr)
++{
++ struct compat_robust_list_head __user *head = curr->compat_robust_list;
++ struct robust_list __user *entry, *next_entry, *pending;
++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
++ unsigned int uninitialized_var(next_pi);
++ compat_uptr_t uentry, next_uentry, upending;
++ compat_long_t futex_offset;
++ int rc;
++
++ if (!futex_cmpxchg_enabled)
++ return;
++
++ /*
++ * Fetch the list head (which was registered earlier, via
++ * sys_set_robust_list()):
++ */
++ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
++ return;
++ /*
++ * Fetch the relative futex offset:
++ */
++ if (get_user(futex_offset, &head->futex_offset))
++ return;
++ /*
++ * Fetch any possibly pending lock-add first, and handle it
++ * if it exists:
++ */
++ if (compat_fetch_robust_entry(&upending, &pending,
++ &head->list_op_pending, &pip))
++ return;
++
++ next_entry = NULL; /* avoid warning with gcc */
++ while (entry != (struct robust_list __user *) &head->list) {
++ /*
++ * Fetch the next entry in the list before calling
++ * handle_futex_death:
++ */
++ rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
++ (compat_uptr_t __user *)&entry->next, &next_pi);
++ /*
++ * A pending lock might already be on the list, so
++ * dont process it twice:
++ */
++ if (entry != pending) {
++ void __user *uaddr = futex_uaddr(entry, futex_offset);
++
++ if (handle_futex_death(uaddr, curr, pi))
++ return;
++ }
++ if (rc)
++ return;
++ uentry = next_uentry;
++ entry = next_entry;
++ pi = next_pi;
++ /*
++ * Avoid excessively long or circular lists:
++ */
++ if (!--limit)
++ break;
++
++ cond_resched();
++ }
++ if (pending) {
++ void __user *uaddr = futex_uaddr(pending, futex_offset);
++
++ handle_futex_death(uaddr, curr, pip);
++ }
++}
++
++COMPAT_SYSCALL_DEFINE2(set_robust_list,
++ struct compat_robust_list_head __user *, head,
++ compat_size_t, len)
++{
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++
++ if (unlikely(len != sizeof(*head)))
++ return -EINVAL;
++
++ current->compat_robust_list = head;
++
++ return 0;
++}
++
++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
++ compat_uptr_t __user *, head_ptr,
++ compat_size_t __user *, len_ptr)
++{
++ struct compat_robust_list_head __user *head;
++ unsigned long ret;
++ struct task_struct *p;
++
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++
++ rcu_read_lock();
++
++ ret = -ESRCH;
++ if (!pid)
++ p = current;
++ else {
++ p = find_task_by_vpid(pid);
++ if (!p)
++ goto err_unlock;
++ }
++
++ ret = -EPERM;
++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
++ goto err_unlock;
++
++ head = p->compat_robust_list;
++ rcu_read_unlock();
++
++ if (put_user(sizeof(*head), len_ptr))
++ return -EFAULT;
++ return put_user(ptr_to_compat(head), head_ptr);
++
++err_unlock:
++ rcu_read_unlock();
++
++ return ret;
++}
++
++COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
++ struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
++ u32, val3)
++{
++ struct timespec ts;
++ ktime_t t, *tp = NULL;
++ int val2 = 0;
++ int cmd = op & FUTEX_CMD_MASK;
++
++ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
++ cmd == FUTEX_WAIT_BITSET ||
++ cmd == FUTEX_WAIT_REQUEUE_PI)) {
++ if (compat_get_timespec(&ts, utime))
++ return -EFAULT;
++ if (!timespec_valid(&ts))
++ return -EINVAL;
++
++ t = timespec_to_ktime(ts);
++ if (cmd == FUTEX_WAIT)
++ t = ktime_add_safe(ktime_get(), t);
++ tp = &t;
++ }
++ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
++ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
++ val2 = (int) (unsigned long) utime;
++
++ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
++}
++#endif /* CONFIG_COMPAT */
++
+ static void __init futex_detect_cmpxchg(void)
+ {
+ #ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+--- a/kernel/futex_compat.c
++++ /dev/null
+@@ -1,202 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * linux/kernel/futex_compat.c
+- *
+- * Futex compatibililty routines.
+- *
+- * Copyright 2006, Red Hat, Inc., Ingo Molnar
+- */
+-
+-#include <linux/linkage.h>
+-#include <linux/compat.h>
+-#include <linux/nsproxy.h>
+-#include <linux/futex.h>
+-#include <linux/ptrace.h>
+-#include <linux/syscalls.h>
+-
+-#include <linux/uaccess.h>
+-
+-
+-/*
+- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+- */
+-static inline int
+-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+- compat_uptr_t __user *head, unsigned int *pi)
+-{
+- if (get_user(*uentry, head))
+- return -EFAULT;
+-
+- *entry = compat_ptr((*uentry) & ~1);
+- *pi = (unsigned int)(*uentry) & 1;
+-
+- return 0;
+-}
+-
+-static void __user *futex_uaddr(struct robust_list __user *entry,
+- compat_long_t futex_offset)
+-{
+- compat_uptr_t base = ptr_to_compat(entry);
+- void __user *uaddr = compat_ptr(base + futex_offset);
+-
+- return uaddr;
+-}
+-
+-/*
+- * Walk curr->robust_list (very carefully, it's a userspace list!)
+- * and mark any locks found there dead, and notify any waiters.
+- *
+- * We silently return on any sign of list-walking problem.
+- */
+-void compat_exit_robust_list(struct task_struct *curr)
+-{
+- struct compat_robust_list_head __user *head = curr->compat_robust_list;
+- struct robust_list __user *entry, *next_entry, *pending;
+- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+- unsigned int uninitialized_var(next_pi);
+- compat_uptr_t uentry, next_uentry, upending;
+- compat_long_t futex_offset;
+- int rc;
+-
+- if (!futex_cmpxchg_enabled)
+- return;
+-
+- /*
+- * Fetch the list head (which was registered earlier, via
+- * sys_set_robust_list()):
+- */
+- if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+- return;
+- /*
+- * Fetch the relative futex offset:
+- */
+- if (get_user(futex_offset, &head->futex_offset))
+- return;
+- /*
+- * Fetch any possibly pending lock-add first, and handle it
+- * if it exists:
+- */
+- if (fetch_robust_entry(&upending, &pending,
+- &head->list_op_pending, &pip))
+- return;
+-
+- next_entry = NULL; /* avoid warning with gcc */
+- while (entry != (struct robust_list __user *) &head->list) {
+- /*
+- * Fetch the next entry in the list before calling
+- * handle_futex_death:
+- */
+- rc = fetch_robust_entry(&next_uentry, &next_entry,
+- (compat_uptr_t __user *)&entry->next, &next_pi);
+- /*
+- * A pending lock might already be on the list, so
+- * dont process it twice:
+- */
+- if (entry != pending) {
+- void __user *uaddr = futex_uaddr(entry, futex_offset);
+-
+- if (handle_futex_death(uaddr, curr, pi))
+- return;
+- }
+- if (rc)
+- return;
+- uentry = next_uentry;
+- entry = next_entry;
+- pi = next_pi;
+- /*
+- * Avoid excessively long or circular lists:
+- */
+- if (!--limit)
+- break;
+-
+- cond_resched();
+- }
+- if (pending) {
+- void __user *uaddr = futex_uaddr(pending, futex_offset);
+-
+- handle_futex_death(uaddr, curr, pip);
+- }
+-}
+-
+-COMPAT_SYSCALL_DEFINE2(set_robust_list,
+- struct compat_robust_list_head __user *, head,
+- compat_size_t, len)
+-{
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+-
+- if (unlikely(len != sizeof(*head)))
+- return -EINVAL;
+-
+- current->compat_robust_list = head;
+-
+- return 0;
+-}
+-
+-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+- compat_uptr_t __user *, head_ptr,
+- compat_size_t __user *, len_ptr)
+-{
+- struct compat_robust_list_head __user *head;
+- unsigned long ret;
+- struct task_struct *p;
+-
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+-
+- rcu_read_lock();
+-
+- ret = -ESRCH;
+- if (!pid)
+- p = current;
+- else {
+- p = find_task_by_vpid(pid);
+- if (!p)
+- goto err_unlock;
+- }
+-
+- ret = -EPERM;
+- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+- goto err_unlock;
+-
+- head = p->compat_robust_list;
+- rcu_read_unlock();
+-
+- if (put_user(sizeof(*head), len_ptr))
+- return -EFAULT;
+- return put_user(ptr_to_compat(head), head_ptr);
+-
+-err_unlock:
+- rcu_read_unlock();
+-
+- return ret;
+-}
+-
+-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+- struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+- u32, val3)
+-{
+- struct timespec ts;
+- ktime_t t, *tp = NULL;
+- int val2 = 0;
+- int cmd = op & FUTEX_CMD_MASK;
+-
+- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+- cmd == FUTEX_WAIT_BITSET ||
+- cmd == FUTEX_WAIT_REQUEUE_PI)) {
+- if (compat_get_timespec(&ts, utime))
+- return -EFAULT;
+- if (!timespec_valid(&ts))
+- return -EINVAL;
+-
+- t = timespec_to_ktime(ts);
+- if (cmd == FUTEX_WAIT)
+- t = ktime_add_safe(ktime_get(), t);
+- tp = &t;
+- }
+- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+- val2 = (int) (unsigned long) utime;
+-
+- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+-}