]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Nov 2019 12:33:44 +0000 (13:33 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Nov 2019 12:33:44 +0000 (13:33 +0100)
added patches:
futex-prevent-robust-futex-exit-race.patch
y2038-futex-move-compat-implementation-into-futex.c.patch

queue-4.19/futex-prevent-exit-livelock.patch [deleted file]
queue-4.19/futex-prevent-robust-futex-exit-race.patch [new file with mode: 0644]
queue-4.19/series
queue-4.19/y2038-futex-move-compat-implementation-into-futex.c.patch [new file with mode: 0644]

diff --git a/queue-4.19/futex-prevent-exit-livelock.patch b/queue-4.19/futex-prevent-exit-livelock.patch
deleted file mode 100644 (file)
index 45d6ad3..0000000
+++ /dev/null
@@ -1,343 +0,0 @@
-From 3ef240eaff36b8119ac9e2ea17cbf41179c930ba Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 6 Nov 2019 22:55:46 +0100
-Subject: futex: Prevent exit livelock
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream.
-
-Oleg provided the following test case:
-
-int main(void)
-{
-       struct sched_param sp = {};
-
-       sp.sched_priority = 2;
-       assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
-
-       int lock = vfork();
-       if (!lock) {
-               sp.sched_priority = 1;
-               assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
-               _exit(0);
-       }
-
-       syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0);
-       return 0;
-}
-
-This creates an unkillable RT process spinning in futex_lock_pi() on a UP
-machine or if the process is affine to a single CPU. The reason is:
-
- parent                                        child
-
-  set FIFO prio 2
-
-  vfork()                      ->      set FIFO prio 1
-   implies wait_for_child()            sched_setscheduler(...)
-                                       exit()
-                                       do_exit()
-                                       ....
-                                       mm_release()
-                                         tsk->futex_state = FUTEX_STATE_EXITING;
-                                         exit_futex(); (NOOP in this case)
-                                         complete() --> wakes parent
-  sys_futex()
-    loop infinite because
-    tsk->futex_state == FUTEX_STATE_EXITING
-
-The same problem can happen just by regular preemption as well:
-
-  task holds futex
-  ...
-  do_exit()
-    tsk->futex_state = FUTEX_STATE_EXITING;
-
-  --> preemption (unrelated wakeup of some other higher prio task, e.g. timer)
-
-  switch_to(other_task)
-
-  return to user
-  sys_futex()
-       loop infinite as above
-
-Just for the fun of it the futex exit cleanup could trigger the wakeup
-itself before the task sets its futex state to DEAD.
-
-To cure this, the handling of the exiting owner is changed so:
-
-   - A refcount is held on the task
-
-   - The task pointer is stored in a caller visible location
-
-   - The caller drops all locks (hash bucket, mmap_sem) and blocks
-     on task::futex_exit_mutex. When the mutex is acquired then
-     the exiting task has completed the cleanup and the state
-     is consistent and can be reevaluated.
-
-This is not a pretty solution, but there is no choice other than returning
-an error code to user space, which would break the state consistency
-guarantee and open another can of problems including regressions.
-
-For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538
-are required as well, but for anything older than 5.3.y the backports are
-going to be provided when this hits mainline as the other dependencies for
-those kernels are definitely not stable material.
-
-Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems")
-Reported-by: Oleg Nesterov <oleg@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Ingo Molnar <mingo@kernel.org>
-Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Cc: Stable Team <stable@vger.kernel.org>
-Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- kernel/futex.c |  106 ++++++++++++++++++++++++++++++++++++++++++++++++---------
- 1 file changed, 91 insertions(+), 15 deletions(-)
-
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -1148,6 +1148,36 @@ out_error:
-       return ret;
- }
-+/**
-+ * wait_for_owner_exiting - Block until the owner has exited
-+ * @exiting:  Pointer to the exiting task
-+ *
-+ * Caller must hold a refcount on @exiting.
-+ */
-+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
-+{
-+      if (ret != -EBUSY) {
-+              WARN_ON_ONCE(exiting);
-+              return;
-+      }
-+
-+      if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-+              return;
-+
-+      mutex_lock(&exiting->futex_exit_mutex);
-+      /*
-+       * No point in doing state checking here. If the waiter got here
-+       * while the task was in exec()->exec_futex_release() then it can
-+       * have any FUTEX_STATE_* value when the waiter has acquired the
-+       * mutex. OK, if running, EXITING or DEAD if it reached exit()
-+       * already. Highly unlikely and not a problem. Just one more round
-+       * through the futex maze.
-+       */
-+      mutex_unlock(&exiting->futex_exit_mutex);
-+
-+      put_task_struct(exiting);
-+}
-+
- static int handle_exit_race(u32 __user *uaddr, u32 uval,
-                           struct task_struct *tsk)
- {
-@@ -1207,7 +1237,8 @@ static int handle_exit_race(u32 __user *
-  * it after doing proper sanity checks.
-  */
- static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
--                            struct futex_pi_state **ps)
-+                            struct futex_pi_state **ps,
-+                            struct task_struct **exiting)
- {
-       pid_t pid = uval & FUTEX_TID_MASK;
-       struct futex_pi_state *pi_state;
-@@ -1247,7 +1278,19 @@ static int attach_to_pi_owner(u32 __user
-               int ret = handle_exit_race(uaddr, uval, p);
-               raw_spin_unlock_irq(&p->pi_lock);
--              put_task_struct(p);
-+              /*
-+               * If the owner task is between FUTEX_STATE_EXITING and
-+               * FUTEX_STATE_DEAD then store the task pointer and keep
-+               * the reference on the task struct. The calling code will
-+               * drop all locks, wait for the task to reach
-+               * FUTEX_STATE_DEAD and then drop the refcount. This is
-+               * required to prevent a live lock when the current task
-+               * preempted the exiting task between the two states.
-+               */
-+              if (ret == -EBUSY)
-+                      *exiting = p;
-+              else
-+                      put_task_struct(p);
-               return ret;
-       }
-@@ -1286,7 +1329,8 @@ static int attach_to_pi_owner(u32 __user
- static int lookup_pi_state(u32 __user *uaddr, u32 uval,
-                          struct futex_hash_bucket *hb,
--                         union futex_key *key, struct futex_pi_state **ps)
-+                         union futex_key *key, struct futex_pi_state **ps,
-+                         struct task_struct **exiting)
- {
-       struct futex_q *top_waiter = futex_top_waiter(hb, key);
-@@ -1301,7 +1345,7 @@ static int lookup_pi_state(u32 __user *u
-        * We are the first waiter - try to look up the owner based on
-        * @uval and attach to it.
-        */
--      return attach_to_pi_owner(uaddr, uval, key, ps);
-+      return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
- }
- static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
-@@ -1329,6 +1373,8 @@ static int lock_pi_update_atomic(u32 __u
-  *                    lookup
-  * @task:             the task to perform the atomic lock work for.  This will
-  *                    be "current" except in the case of requeue pi.
-+ * @exiting:          Pointer to store the task pointer of the owner task
-+ *                    which is in the middle of exiting
-  * @set_waiters:      force setting the FUTEX_WAITERS bit (1) or not (0)
-  *
-  * Return:
-@@ -1337,11 +1383,17 @@ static int lock_pi_update_atomic(u32 __u
-  *  - <0 - error
-  *
-  * The hb->lock and futex_key refs shall be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-  */
- static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-                               union futex_key *key,
-                               struct futex_pi_state **ps,
--                              struct task_struct *task, int set_waiters)
-+                              struct task_struct *task,
-+                              struct task_struct **exiting,
-+                              int set_waiters)
- {
-       u32 uval, newval, vpid = task_pid_vnr(task);
-       struct futex_q *top_waiter;
-@@ -1411,7 +1463,7 @@ static int futex_lock_pi_atomic(u32 __us
-        * attach to the owner. If that fails, no harm done, we only
-        * set the FUTEX_WAITERS bit in the user space variable.
-        */
--      return attach_to_pi_owner(uaddr, newval, key, ps);
-+      return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
- }
- /**
-@@ -1830,6 +1882,8 @@ void requeue_pi_wake_futex(struct futex_
-  * @key1:             the from futex key
-  * @key2:             the to futex key
-  * @ps:                       address to store the pi_state pointer
-+ * @exiting:          Pointer to store the task pointer of the owner task
-+ *                    which is in the middle of exiting
-  * @set_waiters:      force setting the FUTEX_WAITERS bit (1) or not (0)
-  *
-  * Try and get the lock on behalf of the top waiter if we can do it atomically.
-@@ -1837,16 +1891,20 @@ void requeue_pi_wake_futex(struct futex_
-  * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
-  * hb1 and hb2 must be held by the caller.
-  *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ *
-  * Return:
-  *  -  0 - failed to acquire the lock atomically;
-  *  - >0 - acquired the lock, return value is vpid of the top_waiter
-  *  - <0 - error
-  */
--static int futex_proxy_trylock_atomic(u32 __user *pifutex,
--                               struct futex_hash_bucket *hb1,
--                               struct futex_hash_bucket *hb2,
--                               union futex_key *key1, union futex_key *key2,
--                               struct futex_pi_state **ps, int set_waiters)
-+static int
-+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-+                         struct futex_hash_bucket *hb2, union futex_key *key1,
-+                         union futex_key *key2, struct futex_pi_state **ps,
-+                         struct task_struct **exiting, int set_waiters)
- {
-       struct futex_q *top_waiter = NULL;
-       u32 curval;
-@@ -1883,7 +1941,7 @@ static int futex_proxy_trylock_atomic(u3
-        */
-       vpid = task_pid_vnr(top_waiter->task);
-       ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
--                                 set_waiters);
-+                                 exiting, set_waiters);
-       if (ret == 1) {
-               requeue_pi_wake_futex(top_waiter, key2, hb2);
-               return vpid;
-@@ -2012,6 +2070,8 @@ retry_private:
-       }
-       if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
-+              struct task_struct *exiting = NULL;
-+
-               /*
-                * Attempt to acquire uaddr2 and wake the top waiter. If we
-                * intend to requeue waiters, force setting the FUTEX_WAITERS
-@@ -2019,7 +2079,8 @@ retry_private:
-                * faults rather in the requeue loop below.
-                */
-               ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
--                                               &key2, &pi_state, nr_requeue);
-+                                               &key2, &pi_state,
-+                                               &exiting, nr_requeue);
-               /*
-                * At this point the top_waiter has either taken uaddr2 or is
-@@ -2046,7 +2107,8 @@ retry_private:
-                        * If that call succeeds then we have pi_state and an
-                        * initial refcount on it.
-                        */
--                      ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
-+                      ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
-+                                            &pi_state, &exiting);
-               }
-               switch (ret) {
-@@ -2075,6 +2137,12 @@ retry_private:
-                       hb_waiters_dec(hb2);
-                       put_futex_key(&key2);
-                       put_futex_key(&key1);
-+                      /*
-+                       * Handle the case where the owner is in the middle of
-+                       * exiting. Wait for the exit to complete otherwise
-+                       * this task might loop forever, aka. live lock.
-+                       */
-+                      wait_for_owner_exiting(ret, exiting);
-                       cond_resched();
-                       goto retry;
-               default:
-@@ -2790,6 +2858,7 @@ static int futex_lock_pi(u32 __user *uad
- {
-       struct hrtimer_sleeper timeout, *to = NULL;
-       struct futex_pi_state *pi_state = NULL;
-+      struct task_struct *exiting = NULL;
-       struct rt_mutex_waiter rt_waiter;
-       struct futex_hash_bucket *hb;
-       struct futex_q q = futex_q_init;
-@@ -2817,7 +2886,8 @@ retry:
- retry_private:
-       hb = queue_lock(&q);
--      ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
-+      ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-+                                 &exiting, 0);
-       if (unlikely(ret)) {
-               /*
-                * Atomic work succeeded and we got the lock,
-@@ -2839,6 +2909,12 @@ retry_private:
-                        */
-                       queue_unlock(hb);
-                       put_futex_key(&q.key);
-+                      /*
-+                       * Handle the case where the owner is in the middle of
-+                       * exiting. Wait for the exit to complete otherwise
-+                       * this task might loop forever, aka. live lock.
-+                       */
-+                      wait_for_owner_exiting(ret, exiting);
-                       cond_resched();
-                       goto retry;
-               default:
diff --git a/queue-4.19/futex-prevent-robust-futex-exit-race.patch b/queue-4.19/futex-prevent-robust-futex-exit-race.patch
new file mode 100644 (file)
index 0000000..c551328
--- /dev/null
@@ -0,0 +1,261 @@
+From ca16d5bee59807bf04deaab0a8eccecd5061528c Mon Sep 17 00:00:00 2001
+From: Yang Tao <yang.tao172@zte.com.cn>
+Date: Wed, 6 Nov 2019 22:55:35 +0100
+Subject: futex: Prevent robust futex exit race
+
+From: Yang Tao <yang.tao172@zte.com.cn>
+
+commit ca16d5bee59807bf04deaab0a8eccecd5061528c upstream.
+
+Robust futexes utilize the robust_list mechanism to allow the kernel to
+release futexes which are held when a task exits. The exit can be voluntary
+or caused by a signal or fault. This prevents that waiters block forever.
+
+The futex operations in user space store a pointer to the futex they are
+either locking or unlocking in the op_pending member of the per task robust
+list.
+
+After a lock operation has succeeded the futex is queued in the robust list
+linked list and the op_pending pointer is cleared.
+
+After an unlock operation has succeeded the futex is removed from the
+robust list linked list and the op_pending pointer is cleared.
+
+The robust list exit code checks for the pending operation and any futex
+which is queued in the linked list. It carefully checks whether the futex
+value is the TID of the exiting task. If so, it sets the OWNER_DIED bit and
+tries to wake up a potential waiter.
+
+This is race free for the lock operation but unlock has two race scenarios
+where waiters might not be woken up. These issues can be observed with
+regular robust pthread mutexes. PI aware pthread mutexes are not affected.
+
+(1) Unlocking task is killed after unlocking the futex value in user space
+    before being able to wake a waiter.
+
+        pthread_mutex_unlock()
+                |
+                V
+        atomic_exchange_rel (&mutex->__data.__lock, 0)
+                        <------------------------killed
+            lll_futex_wake ()                   |
+                                                |
+                                                |(__lock = 0)
+                                                |(enter kernel)
+                                                |
+                                                V
+                                            do_exit()
+                                            exit_mm()
+                                          mm_release()
+                                        exit_robust_list()
+                                        handle_futex_death()
+                                                |
+                                                |(__lock = 0)
+                                                |(uval = 0)
+                                                |
+                                                V
+        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+                return 0;
+
+    The sanity check which ensures that the user space futex is owned by
+    the exiting task prevents the wakeup of waiters which in consequence
+    block infinitely.
+
+(2) Waiting task is killed after a wakeup and before it can acquire the
+    futex in user space.
+
+        OWNER                         WAITER
+                               futex_wait()
+   pthread_mutex_unlock()               |
+                |                       |
+                |(__lock = 0)           |
+                |                       |
+                V                       |
+         futex_wake() ------------>  wakeup()
+                                        |
+                                        |(return to userspace)
+                                        |(__lock = 0)
+                                        |
+                                        V
+                        oldval = mutex->__data.__lock
+                                          <-----------------killed
+    atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,  |
+                        id | assume_other_futex_waiters, 0)      |
+                                                                 |
+                                                                 |
+                                                   (enter kernel)|
+                                                                 |
+                                                                 V
+                                                         do_exit()
+                                                        |
+                                                        |
+                                                        V
+                                        handle_futex_death()
+                                        |
+                                        |(__lock = 0)
+                                        |(uval = 0)
+                                        |
+                                        V
+        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+                return 0;
+
+    The sanity check which ensures that the user space futex is owned
+    by the exiting task prevents the wakeup of waiters, which seems to
+    be correct as the exiting task does not own the futex value, but
+    the consequence is that other waiters wont be woken up and block
+    infinitely.
+
+In both scenarios the following conditions are true:
+
+   - task->robust_list->list_op_pending != NULL
+   - user space futex value == 0
+   - Regular futex (not PI)
+
+If these conditions are met then it is reasonably safe to wake up a
+potential waiter in order to prevent the above problems.
+
+As this might be a false positive it can cause spurious wakeups, but the
+waiter side has to handle other types of unrelated wakeups, e.g. signals
+gracefully anyway. So such a spurious wakeup will not affect the
+correctness of these operations.
+
+This workaround must not touch the user space futex value and cannot set
+the OWNER_DIED bit because the lock value is 0, i.e. uncontended. Setting
+OWNER_DIED in this case would result in inconsistent state and subsequently
+in malfunction of the owner died handling in user space.
+
+The rest of the user space state is still consistent as no other task can
+observe the list_op_pending entry in the exiting tasks robust list.
+
+The eventually woken up waiter will observe the uncontended lock value and
+take it over.
+
+[ tglx: Massaged changelog and comment. Made the return explicit and not
+       depend on the subsequent check and added constants to hand into
+       handle_futex_death() instead of plain numbers. Fixed a few coding
+       style issues. ]
+
+Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core")
+Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>
+Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1573010582-35297-1-git-send-email-wang.yi59@zte.com.cn
+Link: https://lkml.kernel.org/r/20191106224555.943191378@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 51 insertions(+), 7 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3457,11 +3457,16 @@ err_unlock:
+       return ret;
+ }
++/* Constants for the pending_op argument of handle_futex_death */
++#define HANDLE_DEATH_PENDING  true
++#define HANDLE_DEATH_LIST     false
++
+ /*
+  * Process a futex-list entry, check whether it's owned by the
+  * dying task, and do notification if so:
+  */
+-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
++                            bool pi, bool pending_op)
+ {
+       u32 uval, uninitialized_var(nval), mval;
+       int err;
+@@ -3474,6 +3479,42 @@ retry:
+       if (get_user(uval, uaddr))
+               return -1;
++      /*
++       * Special case for regular (non PI) futexes. The unlock path in
++       * user space has two race scenarios:
++       *
++       * 1. The unlock path releases the user space futex value and
++       *    before it can execute the futex() syscall to wake up
++       *    waiters it is killed.
++       *
++       * 2. A woken up waiter is killed before it can acquire the
++       *    futex in user space.
++       *
++       * In both cases the TID validation below prevents a wakeup of
++       * potential waiters which can cause these waiters to block
++       * forever.
++       *
++       * In both cases the following conditions are met:
++       *
++       *      1) task->robust_list->list_op_pending != NULL
++       *         @pending_op == true
++       *      2) User space futex value == 0
++       *      3) Regular futex: @pi == false
++       *
++       * If these conditions are met, it is safe to attempt waking up a
++       * potential waiter without touching the user space futex value and
++       * trying to set the OWNER_DIED bit. The user space futex value is
++       * uncontended and the rest of the user space mutex state is
++       * consistent, so a woken waiter will just take over the
++       * uncontended futex. Setting the OWNER_DIED bit would create
++       * inconsistent state and malfunction of the user space owner died
++       * handling.
++       */
++      if (pending_op && !pi && !uval) {
++              futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
++              return 0;
++      }
++
+       if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+               return 0;
+@@ -3593,10 +3634,11 @@ void exit_robust_list(struct task_struct
+                * A pending lock might already be on the list, so
+                * don't process it twice:
+                */
+-              if (entry != pending)
++              if (entry != pending) {
+                       if (handle_futex_death((void __user *)entry + futex_offset,
+-                                              curr, pi))
++                                              curr, pi, HANDLE_DEATH_LIST))
+                               return;
++              }
+               if (rc)
+                       return;
+               entry = next_entry;
+@@ -3610,9 +3652,10 @@ void exit_robust_list(struct task_struct
+               cond_resched();
+       }
+-      if (pending)
++      if (pending) {
+               handle_futex_death((void __user *)pending + futex_offset,
+-                                 curr, pip);
++                                 curr, pip, HANDLE_DEATH_PENDING);
++      }
+ }
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+@@ -3789,7 +3832,8 @@ void compat_exit_robust_list(struct task
+               if (entry != pending) {
+                       void __user *uaddr = futex_uaddr(entry, futex_offset);
+-                      if (handle_futex_death(uaddr, curr, pi))
++                      if (handle_futex_death(uaddr, curr, pi,
++                                             HANDLE_DEATH_LIST))
+                               return;
+               }
+               if (rc)
+@@ -3808,7 +3852,7 @@ void compat_exit_robust_list(struct task
+       if (pending) {
+               void __user *uaddr = futex_uaddr(pending, futex_offset);
+-              handle_futex_death(uaddr, curr, pip);
++              handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
+       }
+ }
index 35122dc811abd181ad0119c6bf62e30d849ea9bb..f6ed71565b993502b05aa26860d8019ee4065a07 100644 (file)
@@ -275,7 +275,8 @@ selftests-x86-mov_ss_trap-fix-the-sysenter-test.patch
 selftests-x86-sigreturn-32-invalidate-ds-and-es-when-abusing-the-kernel.patch
 x86-pti-32-calculate-the-various-pti-cpu_entry_area-sizes-correctly-make-the-cpu_entry_area_pages-assert-precise.patch
 x86-entry-32-fix-fixup_espfix_stack-with-user-cr3.patch
-futex-prevent-exit-livelock.patch
+y2038-futex-move-compat-implementation-into-futex.c.patch
+futex-prevent-robust-futex-exit-race.patch
 alsa-usb-audio-fix-null-dereference-at-parsing-badd.patch
 nfc-port100-handle-command-failure-cleanly.patch
 net-sysfs-fix-reference-count-leak-in-rx-netdev_queue_add_kobject.patch
diff --git a/queue-4.19/y2038-futex-move-compat-implementation-into-futex.c.patch b/queue-4.19/y2038-futex-move-compat-implementation-into-futex.c.patch
new file mode 100644 (file)
index 0000000..a2c847f
--- /dev/null
@@ -0,0 +1,501 @@
+From 04e7712f4460585e5eed5b853fd8b82a9943958f Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Tue, 17 Apr 2018 16:31:07 +0200
+Subject: y2038: futex: Move compat implementation into futex.c
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 04e7712f4460585e5eed5b853fd8b82a9943958f upstream.
+
+We are going to share the compat_sys_futex() handler between 64-bit
+architectures and 32-bit architectures that need to deal with both 32-bit
+and 64-bit time_t, and this is easier if both entry points are in the
+same file.
+
+In fact, most other system call handlers do the same thing these days, so
+let's follow the trend here and merge all of futex_compat.c into futex.c.
+
+In the process, a few minor changes have to be done to make sure everything
+still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become
+local symbol, and the compat version of the fetch_robust_entry() function
+gets renamed to compat_fetch_robust_entry() to avoid a symbol clash.
+
+This is intended as a purely cosmetic patch, no behavior should
+change.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/futex.h |    8 -
+ kernel/Makefile       |    3 
+ kernel/futex.c        |  195 +++++++++++++++++++++++++++++++++++++++++++++++-
+ kernel/futex_compat.c |  202 --------------------------------------------------
+ 4 files changed, 192 insertions(+), 216 deletions(-)
+
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -9,9 +9,6 @@ struct inode;
+ struct mm_struct;
+ struct task_struct;
+-extern int
+-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
+-
+ /*
+  * Futexes are matched on equal values of this key.
+  * The key type depends on whether it's a shared or private mapping.
+@@ -55,11 +52,6 @@ extern void exit_robust_list(struct task
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+             u32 __user *uaddr2, u32 val2, u32 val3);
+-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+-#define futex_cmpxchg_enabled 1
+-#else
+-extern int futex_cmpxchg_enabled;
+-#endif
+ #else
+ static inline void exit_robust_list(struct task_struct *curr)
+ {
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -50,9 +50,6 @@ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
+ obj-$(CONFIG_FUTEX) += futex.o
+-ifeq ($(CONFIG_COMPAT),y)
+-obj-$(CONFIG_FUTEX) += futex_compat.o
+-endif
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += smp.o
+ ifneq ($(CONFIG_SMP),y)
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -44,6 +44,7 @@
+  *  along with this program; if not, write to the Free Software
+  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+  */
++#include <linux/compat.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/fs.h>
+@@ -173,8 +174,10 @@
+  * double_lock_hb() and double_unlock_hb(), respectively.
+  */
+-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+-int __read_mostly futex_cmpxchg_enabled;
++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
++#define futex_cmpxchg_enabled 1
++#else
++static int  __read_mostly futex_cmpxchg_enabled;
+ #endif
+ /*
+@@ -3458,7 +3461,7 @@ err_unlock:
+  * Process a futex-list entry, check whether it's owned by the
+  * dying task, and do notification if so:
+  */
+-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+ {
+       u32 uval, uninitialized_var(nval), mval;
+       int err;
+@@ -3707,6 +3710,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad
+       return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ }
++#ifdef CONFIG_COMPAT
++/*
++ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
++ */
++static inline int
++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
++                 compat_uptr_t __user *head, unsigned int *pi)
++{
++      if (get_user(*uentry, head))
++              return -EFAULT;
++
++      *entry = compat_ptr((*uentry) & ~1);
++      *pi = (unsigned int)(*uentry) & 1;
++
++      return 0;
++}
++
++static void __user *futex_uaddr(struct robust_list __user *entry,
++                              compat_long_t futex_offset)
++{
++      compat_uptr_t base = ptr_to_compat(entry);
++      void __user *uaddr = compat_ptr(base + futex_offset);
++
++      return uaddr;
++}
++
++/*
++ * Walk curr->robust_list (very carefully, it's a userspace list!)
++ * and mark any locks found there dead, and notify any waiters.
++ *
++ * We silently return on any sign of list-walking problem.
++ */
++void compat_exit_robust_list(struct task_struct *curr)
++{
++      struct compat_robust_list_head __user *head = curr->compat_robust_list;
++      struct robust_list __user *entry, *next_entry, *pending;
++      unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
++      unsigned int uninitialized_var(next_pi);
++      compat_uptr_t uentry, next_uentry, upending;
++      compat_long_t futex_offset;
++      int rc;
++
++      if (!futex_cmpxchg_enabled)
++              return;
++
++      /*
++       * Fetch the list head (which was registered earlier, via
++       * sys_set_robust_list()):
++       */
++      if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
++              return;
++      /*
++       * Fetch the relative futex offset:
++       */
++      if (get_user(futex_offset, &head->futex_offset))
++              return;
++      /*
++       * Fetch any possibly pending lock-add first, and handle it
++       * if it exists:
++       */
++      if (compat_fetch_robust_entry(&upending, &pending,
++                             &head->list_op_pending, &pip))
++              return;
++
++      next_entry = NULL;      /* avoid warning with gcc */
++      while (entry != (struct robust_list __user *) &head->list) {
++              /*
++               * Fetch the next entry in the list before calling
++               * handle_futex_death:
++               */
++              rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
++                      (compat_uptr_t __user *)&entry->next, &next_pi);
++              /*
++               * A pending lock might already be on the list, so
++               * dont process it twice:
++               */
++              if (entry != pending) {
++                      void __user *uaddr = futex_uaddr(entry, futex_offset);
++
++                      if (handle_futex_death(uaddr, curr, pi))
++                              return;
++              }
++              if (rc)
++                      return;
++              uentry = next_uentry;
++              entry = next_entry;
++              pi = next_pi;
++              /*
++               * Avoid excessively long or circular lists:
++               */
++              if (!--limit)
++                      break;
++
++              cond_resched();
++      }
++      if (pending) {
++              void __user *uaddr = futex_uaddr(pending, futex_offset);
++
++              handle_futex_death(uaddr, curr, pip);
++      }
++}
++
++COMPAT_SYSCALL_DEFINE2(set_robust_list,
++              struct compat_robust_list_head __user *, head,
++              compat_size_t, len)
++{
++      if (!futex_cmpxchg_enabled)
++              return -ENOSYS;
++
++      if (unlikely(len != sizeof(*head)))
++              return -EINVAL;
++
++      current->compat_robust_list = head;
++
++      return 0;
++}
++
++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
++                      compat_uptr_t __user *, head_ptr,
++                      compat_size_t __user *, len_ptr)
++{
++      struct compat_robust_list_head __user *head;
++      unsigned long ret;
++      struct task_struct *p;
++
++      if (!futex_cmpxchg_enabled)
++              return -ENOSYS;
++
++      rcu_read_lock();
++
++      ret = -ESRCH;
++      if (!pid)
++              p = current;
++      else {
++              p = find_task_by_vpid(pid);
++              if (!p)
++                      goto err_unlock;
++      }
++
++      ret = -EPERM;
++      if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
++              goto err_unlock;
++
++      head = p->compat_robust_list;
++      rcu_read_unlock();
++
++      if (put_user(sizeof(*head), len_ptr))
++              return -EFAULT;
++      return put_user(ptr_to_compat(head), head_ptr);
++
++err_unlock:
++      rcu_read_unlock();
++
++      return ret;
++}
++
++COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
++              struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
++              u32, val3)
++{
++      struct timespec ts;
++      ktime_t t, *tp = NULL;
++      int val2 = 0;
++      int cmd = op & FUTEX_CMD_MASK;
++
++      if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
++                    cmd == FUTEX_WAIT_BITSET ||
++                    cmd == FUTEX_WAIT_REQUEUE_PI)) {
++              if (compat_get_timespec(&ts, utime))
++                      return -EFAULT;
++              if (!timespec_valid(&ts))
++                      return -EINVAL;
++
++              t = timespec_to_ktime(ts);
++              if (cmd == FUTEX_WAIT)
++                      t = ktime_add_safe(ktime_get(), t);
++              tp = &t;
++      }
++      if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
++          cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
++              val2 = (int) (unsigned long) utime;
++
++      return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
++}
++#endif /* CONFIG_COMPAT */
++
+ static void __init futex_detect_cmpxchg(void)
+ {
+ #ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+--- a/kernel/futex_compat.c
++++ /dev/null
+@@ -1,202 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * linux/kernel/futex_compat.c
+- *
+- * Futex compatibililty routines.
+- *
+- * Copyright 2006, Red Hat, Inc., Ingo Molnar
+- */
+-
+-#include <linux/linkage.h>
+-#include <linux/compat.h>
+-#include <linux/nsproxy.h>
+-#include <linux/futex.h>
+-#include <linux/ptrace.h>
+-#include <linux/syscalls.h>
+-
+-#include <linux/uaccess.h>
+-
+-
+-/*
+- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+- */
+-static inline int
+-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+-                 compat_uptr_t __user *head, unsigned int *pi)
+-{
+-      if (get_user(*uentry, head))
+-              return -EFAULT;
+-
+-      *entry = compat_ptr((*uentry) & ~1);
+-      *pi = (unsigned int)(*uentry) & 1;
+-
+-      return 0;
+-}
+-
+-static void __user *futex_uaddr(struct robust_list __user *entry,
+-                              compat_long_t futex_offset)
+-{
+-      compat_uptr_t base = ptr_to_compat(entry);
+-      void __user *uaddr = compat_ptr(base + futex_offset);
+-
+-      return uaddr;
+-}
+-
+-/*
+- * Walk curr->robust_list (very carefully, it's a userspace list!)
+- * and mark any locks found there dead, and notify any waiters.
+- *
+- * We silently return on any sign of list-walking problem.
+- */
+-void compat_exit_robust_list(struct task_struct *curr)
+-{
+-      struct compat_robust_list_head __user *head = curr->compat_robust_list;
+-      struct robust_list __user *entry, *next_entry, *pending;
+-      unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+-      unsigned int uninitialized_var(next_pi);
+-      compat_uptr_t uentry, next_uentry, upending;
+-      compat_long_t futex_offset;
+-      int rc;
+-
+-      if (!futex_cmpxchg_enabled)
+-              return;
+-
+-      /*
+-       * Fetch the list head (which was registered earlier, via
+-       * sys_set_robust_list()):
+-       */
+-      if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+-              return;
+-      /*
+-       * Fetch the relative futex offset:
+-       */
+-      if (get_user(futex_offset, &head->futex_offset))
+-              return;
+-      /*
+-       * Fetch any possibly pending lock-add first, and handle it
+-       * if it exists:
+-       */
+-      if (fetch_robust_entry(&upending, &pending,
+-                             &head->list_op_pending, &pip))
+-              return;
+-
+-      next_entry = NULL;      /* avoid warning with gcc */
+-      while (entry != (struct robust_list __user *) &head->list) {
+-              /*
+-               * Fetch the next entry in the list before calling
+-               * handle_futex_death:
+-               */
+-              rc = fetch_robust_entry(&next_uentry, &next_entry,
+-                      (compat_uptr_t __user *)&entry->next, &next_pi);
+-              /*
+-               * A pending lock might already be on the list, so
+-               * dont process it twice:
+-               */
+-              if (entry != pending) {
+-                      void __user *uaddr = futex_uaddr(entry, futex_offset);
+-
+-                      if (handle_futex_death(uaddr, curr, pi))
+-                              return;
+-              }
+-              if (rc)
+-                      return;
+-              uentry = next_uentry;
+-              entry = next_entry;
+-              pi = next_pi;
+-              /*
+-               * Avoid excessively long or circular lists:
+-               */
+-              if (!--limit)
+-                      break;
+-
+-              cond_resched();
+-      }
+-      if (pending) {
+-              void __user *uaddr = futex_uaddr(pending, futex_offset);
+-
+-              handle_futex_death(uaddr, curr, pip);
+-      }
+-}
+-
+-COMPAT_SYSCALL_DEFINE2(set_robust_list,
+-              struct compat_robust_list_head __user *, head,
+-              compat_size_t, len)
+-{
+-      if (!futex_cmpxchg_enabled)
+-              return -ENOSYS;
+-
+-      if (unlikely(len != sizeof(*head)))
+-              return -EINVAL;
+-
+-      current->compat_robust_list = head;
+-
+-      return 0;
+-}
+-
+-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+-                      compat_uptr_t __user *, head_ptr,
+-                      compat_size_t __user *, len_ptr)
+-{
+-      struct compat_robust_list_head __user *head;
+-      unsigned long ret;
+-      struct task_struct *p;
+-
+-      if (!futex_cmpxchg_enabled)
+-              return -ENOSYS;
+-
+-      rcu_read_lock();
+-
+-      ret = -ESRCH;
+-      if (!pid)
+-              p = current;
+-      else {
+-              p = find_task_by_vpid(pid);
+-              if (!p)
+-                      goto err_unlock;
+-      }
+-
+-      ret = -EPERM;
+-      if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+-              goto err_unlock;
+-
+-      head = p->compat_robust_list;
+-      rcu_read_unlock();
+-
+-      if (put_user(sizeof(*head), len_ptr))
+-              return -EFAULT;
+-      return put_user(ptr_to_compat(head), head_ptr);
+-
+-err_unlock:
+-      rcu_read_unlock();
+-
+-      return ret;
+-}
+-
+-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+-              struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+-              u32, val3)
+-{
+-      struct timespec ts;
+-      ktime_t t, *tp = NULL;
+-      int val2 = 0;
+-      int cmd = op & FUTEX_CMD_MASK;
+-
+-      if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+-                    cmd == FUTEX_WAIT_BITSET ||
+-                    cmd == FUTEX_WAIT_REQUEUE_PI)) {
+-              if (compat_get_timespec(&ts, utime))
+-                      return -EFAULT;
+-              if (!timespec_valid(&ts))
+-                      return -EINVAL;
+-
+-              t = timespec_to_ktime(ts);
+-              if (cmd == FUTEX_WAIT)
+-                      t = ktime_add_safe(ktime_get(), t);
+-              tp = &t;
+-      }
+-      if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+-          cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+-              val2 = (int) (unsigned long) utime;
+-
+-      return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+-}