From a32215f8237ea61d3ed6584cf374b743677ff5b3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Feb 2021 16:16:40 +0100 Subject: [PATCH] 4.9-stable patches added patches: futex-avoid-violating-the-10th-rule-of-futex.patch futex-handle-faults-correctly-for-pi-futexes.patch futex-provide-and-use-pi_state_update_owner.patch futex-remove-rt_mutex_deadlock_account_.patch futex-replace-pointless-printk-in-fixup_owner.patch futex-rework-inconsistent-rt_mutex-futex_q-state.patch futex-rt_mutex-provide-futex-specific-rt_mutex-api.patch futex-simplify-fixup_pi_state_owner.patch futex-use-pi_state_update_owner-in-put_pi_state.patch ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch net-dsa-bcm_sf2-put-device-node-before-return.patch net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch rtmutex-remove-unused-argument-from-rt_mutex_proxy_unlock.patch --- queue-4.14/series | 4 + queue-4.19/series | 3 + ...oid-violating-the-10th-rule-of-futex.patch | 292 ++++++++++++++++++ ...ndle-faults-correctly-for-pi-futexes.patch | 124 ++++++++ ...rovide-and-use-pi_state_update_owner.patch | 118 +++++++ ...ex-remove-rt_mutex_deadlock_account_.patch | 177 +++++++++++ ...lace-pointless-printk-in-fixup_owner.patch | 45 +++ ...-inconsistent-rt_mutex-futex_q-state.patch | 147 +++++++++ ...-provide-futex-specific-rt_mutex-api.patch | 227 ++++++++++++++ .../futex-simplify-fixup_pi_state_owner.patch | 109 +++++++ ...i_state_update_owner-in-put_pi_state.patch | 37 +++ ...crq-entry-read-are-correctly-ordered.patch | 38 +++ ...cm_sf2-put-device-node-before-return.patch | 44 +++ ...ect-silly-cell_log-in-qdisc_get_rtab.patch | 65 ++++ ...-argument-from-rt_mutex_proxy_unlock.patch | 60 ++++ queue-4.9/series | 13 + 16 files changed, 1503 insertions(+) create mode 100644 queue-4.14/series create mode 100644 queue-4.19/series create mode 100644 queue-4.9/futex-avoid-violating-the-10th-rule-of-futex.patch create mode 100644 queue-4.9/futex-handle-faults-correctly-for-pi-futexes.patch create mode 100644 queue-4.9/futex-provide-and-use-pi_state_update_owner.patch create mode 100644 queue-4.9/futex-remove-rt_mutex_deadlock_account_.patch create mode 100644 queue-4.9/futex-replace-pointless-printk-in-fixup_owner.patch create mode 100644 queue-4.9/futex-rework-inconsistent-rt_mutex-futex_q-state.patch create mode 100644 queue-4.9/futex-rt_mutex-provide-futex-specific-rt_mutex-api.patch create mode 100644 queue-4.9/futex-simplify-fixup_pi_state_owner.patch create mode 100644 queue-4.9/futex-use-pi_state_update_owner-in-put_pi_state.patch create mode 100644 queue-4.9/ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch create mode 100644 queue-4.9/net-dsa-bcm_sf2-put-device-node-before-return.patch create mode 100644 queue-4.9/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch create mode 100644 queue-4.9/rtmutex-remove-unused-argument-from-rt_mutex_proxy_unlock.patch create mode 100644 queue-4.9/series diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..15720c95a14 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,4 @@ +net-dsa-bcm_sf2-put-device-node-before-return.patch +ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch +acpi-thermal-do-not-call-acpi_thermal_check-directly.patch +net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch diff --git a/queue-4.19/series b/queue-4.19/series new file mode 100644 index 00000000000..279c89515fa --- /dev/null +++ b/queue-4.19/series @@ -0,0 +1,3 @@ +net-dsa-bcm_sf2-put-device-node-before-return.patch +ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch +acpi-thermal-do-not-call-acpi_thermal_check-directly.patch diff --git a/queue-4.9/futex-avoid-violating-the-10th-rule-of-futex.patch b/queue-4.9/futex-avoid-violating-the-10th-rule-of-futex.patch new file mode 100644 index 00000000000..eda43d92259 --- /dev/null +++ b/queue-4.9/futex-avoid-violating-the-10th-rule-of-futex.patch @@ -0,0 +1,292 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:33 +0000 +Subject: futex: Avoid violating the 10th rule of futex +To: stable@vger.kernel.org +Cc: Peter Zijlstra , Julia Cartwright , Gratian Crisan , Thomas Gleixner , Darren Hart , Greg Kroah-Hartman , Lee Jones +Message-ID: <20210203134539.2583943-5-lee.jones@linaro.org> + +From: Peter Zijlstra + +commit c1e2f0eaf015fb7076d51a339011f2383e6dd389 upstream. + +Julia reported futex state corruption in the following scenario: + + waiter waker stealer (prio > waiter) + + futex(WAIT_REQUEUE_PI, uaddr, uaddr2, + timeout=[N ms]) + futex_wait_requeue_pi() + futex_wait_queue_me() + freezable_schedule() + + futex(LOCK_PI, uaddr2) + futex(CMP_REQUEUE_PI, uaddr, + uaddr2, 1, 0) + /* requeues waiter to uaddr2 */ + futex(UNLOCK_PI, uaddr2) + wake_futex_pi() + cmp_futex_value_locked(uaddr2, waiter) + wake_up_q() + + task> + futex(LOCK_PI, uaddr2) + __rt_mutex_start_proxy_lock() + try_to_take_rt_mutex() /* steals lock */ + rt_mutex_set_owner(lock, stealer) + + + rt_mutex_wait_proxy_lock() + __rt_mutex_slowlock() + try_to_take_rt_mutex() /* fails, lock held by stealer */ + if (timeout && !timeout->task) + return -ETIMEDOUT; + fixup_owner() + /* lock wasn't acquired, so, + fixup_pi_state_owner skipped */ + + return -ETIMEDOUT; + + /* At this point, we've returned -ETIMEDOUT to userspace, but the + * futex word shows waiter to be the owner, and the pi_mutex has + * stealer as the owner */ + + futex_lock(LOCK_PI, uaddr2) + -> bails with EDEADLK, futex word says we're owner. + +And suggested that what commit: + + 73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state") + +removes from fixup_owner() looks to be just what is needed. And indeed +it is -- I completely missed that requeue_pi could also result in this +case. So we need to restore that, except that subsequent patches, like +commit: + + 16ffa12d7425 ("futex: Pull rt_mutex_futex_unlock() out from under hb->lock") + +changed all the locking rules. Even without that, the sequence: + +- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { +- locked = 1; +- goto out; +- } + +- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); +- owner = rt_mutex_owner(&q->pi_state->pi_mutex); +- if (!owner) +- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); +- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); +- ret = fixup_pi_state_owner(uaddr, q, owner); + +already suggests there were races; otherwise we'd never have to look +at next_owner. + +So instead of doing 3 consecutive wait_lock sections with who knows +what races, we do it all in a single section. Additionally, the usage +of pi_state->owner in fixup_owner() was only safe because only the +rt_mutex owner would modify it, which this additional case wrecks. + +Luckily the values can only change away and not to the value we're +testing, this means we can do a speculative test and double check once +we have the wait_lock. + +Fixes: 73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state") +Reported-by: Julia Cartwright +Reported-by: Gratian Crisan +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Tested-by: Julia Cartwright +Tested-by: Gratian Crisan +Cc: Darren Hart +Link: https://lkml.kernel.org/r/20171208124939.7livp7no2ov65rrc@hirez.programming.kicks-ass.net +Signed-off-by: Greg Kroah-Hartman +[Lee: Back-ported to solve a dependency] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 80 +++++++++++++++++++++++++++++++++------- + kernel/locking/rtmutex.c | 26 +++++++++---- + kernel/locking/rtmutex_common.h | 1 + 3 files changed, 87 insertions(+), 20 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2262,30 +2262,34 @@ static void unqueue_me_pi(struct futex_q + spin_unlock(q->lock_ptr); + } + +-/* +- * Fixup the pi_state owner with the new owner. +- * +- * Must be called with hash bucket lock held and mm->sem held for non +- * private futexes. +- */ + static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, +- struct task_struct *newowner) ++ struct task_struct *argowner) + { +- u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + struct futex_pi_state *pi_state = q->pi_state; +- struct task_struct *oldowner = pi_state->owner; + u32 uval, uninitialized_var(curval), newval; ++ struct task_struct *oldowner, *newowner; ++ u32 newtid; + int ret; + ++ lockdep_assert_held(q->lock_ptr); ++ ++ oldowner = pi_state->owner; + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; + + /* +- * We are here either because we stole the rtmutex from the +- * previous highest priority waiter or we are the highest priority +- * waiter but failed to get the rtmutex the first time. +- * We have to replace the newowner TID in the user space variable. ++ * We are here because either: ++ * ++ * - we stole the lock and pi_state->owner needs updating to reflect ++ * that (@argowner == current), ++ * ++ * or: ++ * ++ * - someone stole our lock and we need to fix things to point to the ++ * new owner (@argowner == NULL). ++ * ++ * Either way, we have to replace the TID in the user space variable. + * This must be atomic as we have to preserve the owner died bit here. + * + * Note: We write the user space value _before_ changing the pi_state +@@ -2299,6 +2303,39 @@ static int fixup_pi_state_owner(u32 __us + * in lookup_pi_state. + */ + retry: ++ if (!argowner) { ++ if (oldowner != current) { ++ /* ++ * We raced against a concurrent self; things are ++ * already fixed up. Nothing to do. ++ */ ++ return 0; ++ } ++ ++ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { ++ /* We got the lock after all, nothing to fix. */ ++ return 0; ++ } ++ ++ /* ++ * Since we just failed the trylock; there must be an owner. ++ */ ++ newowner = rt_mutex_owner(&pi_state->pi_mutex); ++ BUG_ON(!newowner); ++ } else { ++ WARN_ON_ONCE(argowner != current); ++ if (oldowner == current) { ++ /* ++ * We raced against a concurrent self; things are ++ * already fixed up. Nothing to do. ++ */ ++ return 0; ++ } ++ newowner = argowner; ++ } ++ ++ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; ++ + if (get_futex_value_locked(&uval, uaddr)) + goto handle_fault; + +@@ -2385,12 +2422,29 @@ static int fixup_owner(u32 __user *uaddr + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case: ++ * ++ * Speculative pi_state->owner read (we don't hold wait_lock); ++ * since we own the lock pi_state->owner == current is the ++ * stable state, anything else needs more attention. + */ + if (q->pi_state->owner != current) + ret = fixup_pi_state_owner(uaddr, q, current); + goto out; + } + ++ /* ++ * If we didn't get the lock; check if anybody stole it from us. In ++ * that case, we need to fix up the uval to point to them instead of ++ * us, otherwise bad things happen. [10] ++ * ++ * Another speculative read; pi_state->owner == current is unstable ++ * but needs our attention. ++ */ ++ if (q->pi_state->owner == current) { ++ ret = fixup_pi_state_owner(uaddr, q, NULL); ++ goto out; ++ } ++ + /* + * Paranoia check. If we did not take the lock, then we should not be + * the owner of the rt_mutex. +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1314,6 +1314,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, + return ret; + } + ++static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) ++{ ++ int ret = try_to_take_rt_mutex(lock, current, NULL); ++ ++ /* ++ * try_to_take_rt_mutex() sets the lock waiters bit ++ * unconditionally. Clean this up. ++ */ ++ fixup_rt_mutex_waiters(lock); ++ ++ return ret; ++} ++ + /* + * Slow path try-lock function: + */ +@@ -1336,13 +1349,7 @@ static inline int rt_mutex_slowtrylock(s + */ + raw_spin_lock_irqsave(&lock->wait_lock, flags); + +- ret = try_to_take_rt_mutex(lock, current, NULL); +- +- /* +- * try_to_take_rt_mutex() sets the lock waiters bit +- * unconditionally. Clean this up. +- */ +- fixup_rt_mutex_waiters(lock); ++ ret = __rt_mutex_slowtrylock(lock); + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + +@@ -1530,6 +1537,11 @@ int __sched rt_mutex_futex_trylock(struc + return rt_mutex_slowtrylock(lock); + } + ++int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) ++{ ++ return __rt_mutex_slowtrylock(lock); ++} ++ + /** + * rt_mutex_timed_lock - lock a rt_mutex interruptible + * the timeout structure is provided +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -114,6 +114,7 @@ extern bool rt_mutex_cleanup_proxy_lock( + struct rt_mutex_waiter *waiter); + extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); + extern int rt_mutex_futex_trylock(struct rt_mutex *l); ++extern int __rt_mutex_futex_trylock(struct rt_mutex *l); + + extern void rt_mutex_futex_unlock(struct rt_mutex *lock); + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, diff --git a/queue-4.9/futex-handle-faults-correctly-for-pi-futexes.patch b/queue-4.9/futex-handle-faults-correctly-for-pi-futexes.patch new file mode 100644 index 00000000000..60edaf8bad0 --- /dev/null +++ b/queue-4.9/futex-handle-faults-correctly-for-pi-futexes.patch @@ -0,0 +1,124 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:39 +0000 +Subject: futex: Handle faults correctly for PI futexes +To: stable@vger.kernel.org +Cc: Thomas Gleixner , gzobqq@gmail.com, Peter Zijlstra , Lee Jones +Message-ID: <20210203134539.2583943-11-lee.jones@linaro.org> + +From: Thomas Gleixner + +fixup_pi_state_owner() tries to ensure that the state of the rtmutex, +pi_state and the user space value related to the PI futex are consistent +before returning to user space. In case that the user space value update +faults and the fault cannot be resolved by faulting the page in via +fault_in_user_writeable() the function returns with -EFAULT and leaves +the rtmutex and pi_state owner state inconsistent. + +A subsequent futex_unlock_pi() operates on the inconsistent pi_state and +releases the rtmutex despite not owning it which can corrupt the RB tree of +the rtmutex and cause a subsequent kernel stack use after free. + +It was suggested to loop forever in fixup_pi_state_owner() if the fault +cannot be resolved, but that results in runaway tasks which is especially +undesired when the problem happens due to a programming error and not due +to malice. + +As the user space value cannot be fixed up, the proper solution is to make +the rtmutex and the pi_state consistent so both have the same owner. This +leaves the user space value out of sync. Any subsequent operation on the +futex will fail because the 10th rule of PI futexes (pi_state owner and +user space value are consistent) has been violated. + +As a consequence this removes the inept attempts of 'fixing' the situation +in case that the current task owns the rtmutex when returning with an +unresolvable fault by unlocking the rtmutex which left pi_state::owner and +rtmutex::owner out of sync in a different and only slightly less dangerous +way. + +Fixes: 1b7558e457ed ("futexes: fix fault handling in futex_lock_pi") +Reported-by: gzobqq@gmail.com +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 38 ++++++++++++++++++++------------------ + 1 file changed, 20 insertions(+), 18 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1017,7 +1017,8 @@ static void exit_pi_state_list(struct ta + * FUTEX_OWNER_DIED bit. See [4] + * + * [10] There is no transient state which leaves owner and user space +- * TID out of sync. ++ * TID out of sync. Except one error case where the kernel is denied ++ * write access to the user address, see fixup_pi_state_owner(). + */ + + /* +@@ -2392,6 +2393,24 @@ handle_fault: + if (!err) + goto retry; + ++ /* ++ * fault_in_user_writeable() failed so user state is immutable. At ++ * best we can make the kernel state consistent but user state will ++ * be most likely hosed and any subsequent unlock operation will be ++ * rejected due to PI futex rule [10]. ++ * ++ * Ensure that the rtmutex owner is also the pi_state owner despite ++ * the user space value claiming something different. There is no ++ * point in unlocking the rtmutex if current is the owner as it ++ * would need to wait until the next waiter has taken the rtmutex ++ * to guarantee consistent state. Keep it simple. Userspace asked ++ * for this wreckaged state. ++ * ++ * The rtmutex has an owner - either current or some other ++ * task. See the EAGAIN loop above. ++ */ ++ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); ++ + return err; + } + +@@ -2777,13 +2796,6 @@ retry_private: + if (res) + ret = (res < 0) ? res : 0; + +- /* +- * If fixup_owner() faulted and was unable to handle the fault, unlock +- * it and return the fault to userspace. +- */ +- if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) +- rt_mutex_futex_unlock(&q.pi_state->pi_mutex); +- + /* Unqueue and drop the lock */ + unqueue_me_pi(&q); + +@@ -3088,8 +3100,6 @@ static int futex_wait_requeue_pi(u32 __u + if (q.pi_state && (q.pi_state->owner != current)) { + spin_lock(q.lock_ptr); + ret = fixup_pi_state_owner(uaddr2, &q, current); +- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) +- rt_mutex_futex_unlock(&q.pi_state->pi_mutex); + /* + * Drop the reference to the pi state which + * the requeue_pi() code acquired for us. +@@ -3126,14 +3136,6 @@ static int futex_wait_requeue_pi(u32 __u + if (res) + ret = (res < 0) ? res : 0; + +- /* +- * If fixup_pi_state_owner() faulted and was unable to handle +- * the fault, unlock the rt_mutex and return the fault to +- * userspace. +- */ +- if (ret && rt_mutex_owner(pi_mutex) == current) +- rt_mutex_futex_unlock(pi_mutex); +- + /* Unqueue and drop the lock. */ + unqueue_me_pi(&q); + } diff --git a/queue-4.9/futex-provide-and-use-pi_state_update_owner.patch b/queue-4.9/futex-provide-and-use-pi_state_update_owner.patch new file mode 100644 index 00000000000..98ab9adad22 --- /dev/null +++ b/queue-4.9/futex-provide-and-use-pi_state_update_owner.patch @@ -0,0 +1,118 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:35 +0000 +Subject: futex: Provide and use pi_state_update_owner() +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Peter Zijlstra , Lee Jones +Message-ID: <20210203134539.2583943-7-lee.jones@linaro.org> + +From: Thomas Gleixner + +[ Upstream commit c5cade200ab9a2a3be9e7f32a752c8d86b502ec7 ] + +Updating pi_state::owner is done at several places with the same +code. Provide a function for it and use that at the obvious places. + +This is also a preparation for a bug fix to avoid yet another copy of the +same code or alternatively introducing a completely unpenetratable mess of +gotos. + +Originally-by: Peter Zijlstra +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 64 +++++++++++++++++++++++++++++---------------------------- + 1 file changed, 33 insertions(+), 31 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -837,6 +837,29 @@ static struct futex_pi_state * alloc_pi_ + return pi_state; + } + ++static void pi_state_update_owner(struct futex_pi_state *pi_state, ++ struct task_struct *new_owner) ++{ ++ struct task_struct *old_owner = pi_state->owner; ++ ++ lockdep_assert_held(&pi_state->pi_mutex.wait_lock); ++ ++ if (old_owner) { ++ raw_spin_lock(&old_owner->pi_lock); ++ WARN_ON(list_empty(&pi_state->list)); ++ list_del_init(&pi_state->list); ++ raw_spin_unlock(&old_owner->pi_lock); ++ } ++ ++ if (new_owner) { ++ raw_spin_lock(&new_owner->pi_lock); ++ WARN_ON(!list_empty(&pi_state->list)); ++ list_add(&pi_state->list, &new_owner->pi_state_list); ++ pi_state->owner = new_owner; ++ raw_spin_unlock(&new_owner->pi_lock); ++ } ++} ++ + /* + * Drops a reference to the pi_state object and frees or caches it + * when the last reference is gone. +@@ -1432,26 +1455,16 @@ static int wake_futex_pi(u32 __user *uad + else + ret = -EINVAL; + } +- if (ret) { +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- return ret; +- } +- +- raw_spin_lock(&pi_state->owner->pi_lock); +- WARN_ON(list_empty(&pi_state->list)); +- list_del_init(&pi_state->list); +- raw_spin_unlock(&pi_state->owner->pi_lock); + +- raw_spin_lock(&new_owner->pi_lock); +- WARN_ON(!list_empty(&pi_state->list)); +- list_add(&pi_state->list, &new_owner->pi_state_list); +- pi_state->owner = new_owner; +- raw_spin_unlock(&new_owner->pi_lock); +- +- /* +- * We've updated the uservalue, this unlock cannot fail. +- */ +- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ if (!ret) { ++ /* ++ * This is a point of no return; once we modified the uval ++ * there is no going back and subsequent operations must ++ * not fail. ++ */ ++ pi_state_update_owner(pi_state, new_owner); ++ deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ } + + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + spin_unlock(&hb->lock); +@@ -2353,19 +2366,8 @@ retry: + * We fixed up user space. Now we need to fix the pi_state + * itself. + */ +- if (pi_state->owner != NULL) { +- raw_spin_lock_irq(&pi_state->owner->pi_lock); +- WARN_ON(list_empty(&pi_state->list)); +- list_del_init(&pi_state->list); +- raw_spin_unlock_irq(&pi_state->owner->pi_lock); +- } +- +- pi_state->owner = newowner; ++ pi_state_update_owner(pi_state, newowner); + +- raw_spin_lock_irq(&newowner->pi_lock); +- WARN_ON(!list_empty(&pi_state->list)); +- list_add(&pi_state->list, &newowner->pi_state_list); +- raw_spin_unlock_irq(&newowner->pi_lock); + return 0; + + /* diff --git a/queue-4.9/futex-remove-rt_mutex_deadlock_account_.patch b/queue-4.9/futex-remove-rt_mutex_deadlock_account_.patch new file mode 100644 index 00000000000..2a91366cdf0 --- /dev/null +++ b/queue-4.9/futex-remove-rt_mutex_deadlock_account_.patch @@ -0,0 +1,177 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:31 +0000 +Subject: futex: Remove rt_mutex_deadlock_account_*() +To: stable@vger.kernel.org +Cc: Peter Zijlstra , juri.lelli@arm.com, bigeasy@linutronix.de, xlpang@redhat.com, rostedt@goodmis.org, mathieu.desnoyers@efficios.com, jdesfossez@efficios.com, dvhart@infradead.org, bristot@redhat.com, Thomas Gleixner , Lee Jones +Message-ID: <20210203134539.2583943-3-lee.jones@linaro.org> + +From: Peter Zijlstra + +These are unused and clutter up the code. + +Signed-off-by: Peter Zijlstra (Intel) +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: dvhart@infradead.org +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170322104151.652692478@infradead.org +Signed-off-by: Thomas Gleixner +[Lee: Back-ported to solve a dependency] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/locking/rtmutex-debug.c | 9 -------- + kernel/locking/rtmutex-debug.h | 3 -- + kernel/locking/rtmutex.c | 42 +++++++++++++++-------------------------- + kernel/locking/rtmutex.h | 2 - + 4 files changed, 16 insertions(+), 40 deletions(-) + +--- a/kernel/locking/rtmutex-debug.c ++++ b/kernel/locking/rtmutex-debug.c +@@ -173,12 +173,3 @@ void debug_rt_mutex_init(struct rt_mutex + lock->name = name; + } + +-void +-rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) +-{ +-} +- +-void rt_mutex_deadlock_account_unlock(struct task_struct *task) +-{ +-} +- +--- a/kernel/locking/rtmutex-debug.h ++++ b/kernel/locking/rtmutex-debug.h +@@ -9,9 +9,6 @@ + * This file contains macros used solely by rtmutex.c. Debug version. + */ + +-extern void +-rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); +-extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); + extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); + extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); + extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -956,8 +956,6 @@ takeit: + */ + rt_mutex_set_owner(lock, task); + +- rt_mutex_deadlock_account_lock(lock, task); +- + return 1; + } + +@@ -1365,8 +1363,6 @@ static bool __sched rt_mutex_slowunlock( + + debug_rt_mutex_unlock(lock); + +- rt_mutex_deadlock_account_unlock(current); +- + /* + * We must be careful here if the fast path is enabled. If we + * have no waiters queued we cannot set owner to NULL here +@@ -1432,11 +1428,10 @@ rt_mutex_fastlock(struct rt_mutex *lock, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk)) + { +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { +- rt_mutex_deadlock_account_lock(lock, current); ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 0; +- } else +- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); ++ ++ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); + } + + static inline int +@@ -1448,21 +1443,19 @@ rt_mutex_timed_fastlock(struct rt_mutex + enum rtmutex_chainwalk chwalk)) + { + if (chwalk == RT_MUTEX_MIN_CHAINWALK && +- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { +- rt_mutex_deadlock_account_lock(lock, current); ++ likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 0; +- } else +- return slowfn(lock, state, timeout, chwalk); ++ ++ return slowfn(lock, state, timeout, chwalk); + } + + static inline int + rt_mutex_fasttrylock(struct rt_mutex *lock, + int (*slowfn)(struct rt_mutex *lock)) + { +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { +- rt_mutex_deadlock_account_lock(lock, current); ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 1; +- } ++ + return slowfn(lock); + } + +@@ -1472,19 +1465,18 @@ rt_mutex_fastunlock(struct rt_mutex *loc + struct wake_q_head *wqh)) + { + WAKE_Q(wake_q); ++ bool deboost; + +- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { +- rt_mutex_deadlock_account_unlock(current); ++ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) ++ return; + +- } else { +- bool deboost = slowfn(lock, &wake_q); ++ deboost = slowfn(lock, &wake_q); + +- wake_up_q(&wake_q); ++ wake_up_q(&wake_q); + +- /* Undo pi boosting if necessary: */ +- if (deboost) +- rt_mutex_adjust_prio(current); +- } ++ /* Undo pi boosting if necessary: */ ++ if (deboost) ++ rt_mutex_adjust_prio(current); + } + + /** +@@ -1682,7 +1674,6 @@ void rt_mutex_init_proxy_locked(struct r + __rt_mutex_init(lock, NULL); + debug_rt_mutex_proxy_lock(lock, proxy_owner); + rt_mutex_set_owner(lock, proxy_owner); +- rt_mutex_deadlock_account_lock(lock, proxy_owner); + } + + /** +@@ -1698,7 +1689,6 @@ void rt_mutex_proxy_unlock(struct rt_mut + { + debug_rt_mutex_proxy_unlock(lock); + rt_mutex_set_owner(lock, NULL); +- rt_mutex_deadlock_account_unlock(proxy_owner); + } + + /** +--- a/kernel/locking/rtmutex.h ++++ b/kernel/locking/rtmutex.h +@@ -11,8 +11,6 @@ + */ + + #define rt_mutex_deadlock_check(l) (0) +-#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) +-#define rt_mutex_deadlock_account_unlock(l) do { } while (0) + #define debug_rt_mutex_init_waiter(w) do { } while (0) + #define debug_rt_mutex_free_waiter(w) do { } while (0) + #define debug_rt_mutex_lock(l) do { } while (0) diff --git a/queue-4.9/futex-replace-pointless-printk-in-fixup_owner.patch b/queue-4.9/futex-replace-pointless-printk-in-fixup_owner.patch new file mode 100644 index 00000000000..8909d8e7ba0 --- /dev/null +++ b/queue-4.9/futex-replace-pointless-printk-in-fixup_owner.patch @@ -0,0 +1,45 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:34 +0000 +Subject: futex: Replace pointless printk in fixup_owner() +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Peter Zijlstra , Lee Jones +Message-ID: <20210203134539.2583943-6-lee.jones@linaro.org> + +From: Thomas Gleixner + +[ Upstream commit 04b79c55201f02ffd675e1231d731365e335c307 ] + +If that unexpected case of inconsistent arguments ever happens then the +futex state is left completely inconsistent and the printk is not really +helpful. Replace it with a warning and make the state consistent. + +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2447,14 +2447,10 @@ static int fixup_owner(u32 __user *uaddr + + /* + * Paranoia check. If we did not take the lock, then we should not be +- * the owner of the rt_mutex. ++ * the owner of the rt_mutex. Warn and establish consistent state. + */ +- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { +- printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " +- "pi-state %p\n", ret, +- q->pi_state->pi_mutex.owner, +- q->pi_state->owner); +- } ++ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) ++ return fixup_pi_state_owner(uaddr, q, current); + + out: + return ret ? ret : locked; diff --git a/queue-4.9/futex-rework-inconsistent-rt_mutex-futex_q-state.patch b/queue-4.9/futex-rework-inconsistent-rt_mutex-futex_q-state.patch new file mode 100644 index 00000000000..1193510223c --- /dev/null +++ b/queue-4.9/futex-rework-inconsistent-rt_mutex-futex_q-state.patch @@ -0,0 +1,147 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:32 +0000 +Subject: futex: Rework inconsistent rt_mutex/futex_q state +To: stable@vger.kernel.org +Cc: Peter Zijlstra , juri.lelli@arm.com, bigeasy@linutronix.de, xlpang@redhat.com, rostedt@goodmis.org, mathieu.desnoyers@efficios.com, jdesfossez@efficios.com, dvhart@infradead.org, bristot@redhat.com, Thomas Gleixner , Lee Jones +Message-ID: <20210203134539.2583943-4-lee.jones@linaro.org> + +From: Peter Zijlstra + +[Upstream commit 73d786bd043ebc855f349c81ea805f6b11cbf2aa ] + +There is a weird state in the futex_unlock_pi() path when it interleaves +with a concurrent futex_lock_pi() at the point where it drops hb->lock. + +In this case, it can happen that the rt_mutex wait_list and the futex_q +disagree on pending waiters, in particular rt_mutex will find no pending +waiters where futex_q thinks there are. In this case the rt_mutex unlock +code cannot assign an owner. + +The futex side fixup code has to cleanup the inconsistencies with quite a +bunch of interesting corner cases. + +Simplify all this by changing wake_futex_pi() to return -EAGAIN when this +situation occurs. This then gives the futex_lock_pi() code the opportunity +to continue and the retried futex_unlock_pi() will now observe a coherent +state. + +The only problem is that this breaks RT timeliness guarantees. That +is, consider the following scenario: + + T1 and T2 are both pinned to CPU0. prio(T2) > prio(T1) + + CPU0 + + T1 + lock_pi() + queue_me() <- Waiter is visible + + preemption + + T2 + unlock_pi() + loops with -EAGAIN forever + +Which is undesirable for PI primitives. Future patches will rectify +this. + +Signed-off-by: Peter Zijlstra (Intel) +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: dvhart@infradead.org +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170322104151.850383690@infradead.org +Signed-off-by: Thomas Gleixner +[Lee: Back-ported to solve a dependency] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 50 ++++++++++++++------------------------------------ + 1 file changed, 14 insertions(+), 36 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1394,12 +1394,19 @@ static int wake_futex_pi(u32 __user *uad + new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); + + /* +- * It is possible that the next waiter (the one that brought +- * this owner to the kernel) timed out and is no longer +- * waiting on the lock. ++ * When we interleave with futex_lock_pi() where it does ++ * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter, ++ * but the rt_mutex's wait_list can be empty (either still, or again, ++ * depending on which side we land). ++ * ++ * When this happens, give up our locks and try again, giving the ++ * futex_lock_pi() instance time to complete, either by waiting on the ++ * rtmutex or removing itself from the futex queue. + */ +- if (!new_owner) +- new_owner = this->task; ++ if (!new_owner) { ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ return -EAGAIN; ++ } + + /* + * We pass it to the next owner. The WAITERS bit is always +@@ -2372,7 +2379,6 @@ static long futex_wait_restart(struct re + */ + static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) + { +- struct task_struct *owner; + int ret = 0; + + if (locked) { +@@ -2386,43 +2392,15 @@ static int fixup_owner(u32 __user *uaddr + } + + /* +- * Catch the rare case, where the lock was released when we were on the +- * way back before we locked the hash bucket. +- */ +- if (q->pi_state->owner == current) { +- /* +- * Try to get the rt_mutex now. This might fail as some other +- * task acquired the rt_mutex after we removed ourself from the +- * rt_mutex waiters list. +- */ +- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { +- locked = 1; +- goto out; +- } +- +- /* +- * pi_state is incorrect, some other task did a lock steal and +- * we returned due to timeout or signal without taking the +- * rt_mutex. Too late. +- */ +- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); +- owner = rt_mutex_owner(&q->pi_state->pi_mutex); +- if (!owner) +- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); +- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); +- ret = fixup_pi_state_owner(uaddr, q, owner); +- goto out; +- } +- +- /* + * Paranoia check. If we did not take the lock, then we should not be + * the owner of the rt_mutex. + */ +- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) ++ if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { + printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " + "pi-state %p\n", ret, + q->pi_state->pi_mutex.owner, + q->pi_state->owner); ++ } + + out: + return ret ? ret : locked; diff --git a/queue-4.9/futex-rt_mutex-provide-futex-specific-rt_mutex-api.patch b/queue-4.9/futex-rt_mutex-provide-futex-specific-rt_mutex-api.patch new file mode 100644 index 00000000000..ac2ded19758 --- /dev/null +++ b/queue-4.9/futex-rt_mutex-provide-futex-specific-rt_mutex-api.patch @@ -0,0 +1,227 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:30 +0000 +Subject: futex,rt_mutex: Provide futex specific rt_mutex API +To: stable@vger.kernel.org +Cc: Peter Zijlstra , juri.lelli@arm.com, bigeasy@linutronix.de, xlpang@redhat.com, rostedt@goodmis.org, mathieu.desnoyers@efficios.com, jdesfossez@efficios.com, dvhart@infradead.org, bristot@redhat.com, Thomas Gleixner , Lee Jones +Message-ID: <20210203134539.2583943-2-lee.jones@linaro.org> + +From: Peter Zijlstra + +[ Upstream commit 5293c2efda37775346885c7e924d4ef7018ea60b ] + +Part of what makes futex_unlock_pi() intricate is that +rt_mutex_futex_unlock() -> rt_mutex_slowunlock() can drop +rt_mutex::wait_lock. + +This means it cannot rely on the atomicy of wait_lock, which would be +preferred in order to not rely on hb->lock so much. + +The reason rt_mutex_slowunlock() needs to drop wait_lock is because it can +race with the rt_mutex fastpath, however futexes have their own fast path. + +Since futexes already have a bunch of separate rt_mutex accessors, complete +that set and implement a rt_mutex variant without fastpath for them. + +Signed-off-by: Peter Zijlstra (Intel) +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: dvhart@infradead.org +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170322104151.702962446@infradead.org +Signed-off-by: Thomas Gleixner +[Lee: Back-ported to solve a dependency] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 30 ++++++++++----------- + kernel/locking/rtmutex.c | 56 +++++++++++++++++++++++++++++----------- + kernel/locking/rtmutex_common.h | 8 ++++- + 3 files changed, 61 insertions(+), 33 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -941,7 +941,7 @@ static void exit_pi_state_list(struct ta + pi_state->owner = NULL; + raw_spin_unlock_irq(&curr->pi_lock); + +- rt_mutex_unlock(&pi_state->pi_mutex); ++ rt_mutex_futex_unlock(&pi_state->pi_mutex); + + spin_unlock(&hb->lock); + +@@ -1441,20 +1441,18 @@ static int wake_futex_pi(u32 __user *uad + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- +- deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); +- + /* +- * First unlock HB so the waiter does not spin on it once he got woken +- * up. Second wake up the waiter before the priority is adjusted. If we +- * deboost first (and lose our higher priority), then the task might get +- * scheduled away before the wake up can take place. ++ * We've updated the uservalue, this unlock cannot fail. + */ ++ deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + spin_unlock(&hb->lock); +- wake_up_q(&wake_q); +- if (deboost) ++ ++ if (deboost) { ++ wake_up_q(&wake_q); + rt_mutex_adjust_prio(current); ++ } + + return 0; + } +@@ -2397,7 +2395,7 @@ static int fixup_owner(u32 __user *uaddr + * task acquired the rt_mutex after we removed ourself from the + * rt_mutex waiters list. + */ +- if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { ++ if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { + locked = 1; + goto out; + } +@@ -2721,7 +2719,7 @@ retry_private: + if (!trylock) { + ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); + } else { +- ret = rt_mutex_trylock(&q.pi_state->pi_mutex); ++ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); + /* Fixup the trylock return value: */ + ret = ret ? 0 : -EWOULDBLOCK; + } +@@ -2744,7 +2742,7 @@ retry_private: + * it and return the fault to userspace. + */ + if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) +- rt_mutex_unlock(&q.pi_state->pi_mutex); ++ rt_mutex_futex_unlock(&q.pi_state->pi_mutex); + + /* Unqueue and drop the lock */ + unqueue_me_pi(&q); +@@ -3051,7 +3049,7 @@ static int futex_wait_requeue_pi(u32 __u + spin_lock(q.lock_ptr); + ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) +- rt_mutex_unlock(&q.pi_state->pi_mutex); ++ rt_mutex_futex_unlock(&q.pi_state->pi_mutex); + /* + * Drop the reference to the pi state which + * the requeue_pi() code acquired for us. +@@ -3094,7 +3092,7 @@ static int futex_wait_requeue_pi(u32 __u + * userspace. + */ + if (ret && rt_mutex_owner(pi_mutex) == current) +- rt_mutex_unlock(pi_mutex); ++ rt_mutex_futex_unlock(pi_mutex); + + /* Unqueue and drop the lock. */ + unqueue_me_pi(&q); +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1519,15 +1519,23 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interrup + + /* + * Futex variant with full deadlock detection. ++ * Futex variants must not use the fast-path, see __rt_mutex_futex_unlock(). + */ +-int rt_mutex_timed_futex_lock(struct rt_mutex *lock, ++int __sched rt_mutex_timed_futex_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *timeout) + { + might_sleep(); + +- return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, +- RT_MUTEX_FULL_CHAINWALK, +- rt_mutex_slowlock); ++ return rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, ++ timeout, RT_MUTEX_FULL_CHAINWALK); ++} ++ ++/* ++ * Futex variant, must not use fastpath. ++ */ ++int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) ++{ ++ return rt_mutex_slowtrylock(lock); + } + + /** +@@ -1586,20 +1594,38 @@ void __sched rt_mutex_unlock(struct rt_m + EXPORT_SYMBOL_GPL(rt_mutex_unlock); + + /** +- * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock +- * @lock: the rt_mutex to be unlocked +- * +- * Returns: true/false indicating whether priority adjustment is +- * required or not. ++ * Futex variant, that since futex variants do not use the fast-path, can be ++ * simple and will not need to retry. + */ +-bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wqh) ++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q) + { +- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { +- rt_mutex_deadlock_account_unlock(current); +- return false; ++ lockdep_assert_held(&lock->wait_lock); ++ ++ debug_rt_mutex_unlock(lock); ++ ++ if (!rt_mutex_has_waiters(lock)) { ++ lock->owner = NULL; ++ return false; /* done */ ++ } ++ ++ mark_wakeup_next_waiter(wake_q, lock); ++ return true; /* deboost and wakeups */ ++} ++ ++void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) ++{ ++ WAKE_Q(wake_q); ++ bool deboost; ++ ++ raw_spin_lock_irq(&lock->wait_lock); ++ deboost = __rt_mutex_futex_unlock(lock, &wake_q); ++ raw_spin_unlock_irq(&lock->wait_lock); ++ ++ if (deboost) { ++ wake_up_q(&wake_q); ++ rt_mutex_adjust_prio(current); + } +- return rt_mutex_slowunlock(lock, wqh); + } + + /** +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -113,8 +113,12 @@ extern int rt_mutex_wait_proxy_lock(stru + extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter); + extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); +-extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wqh); ++extern int rt_mutex_futex_trylock(struct rt_mutex *l); ++ ++extern void rt_mutex_futex_unlock(struct rt_mutex *lock); ++extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wqh); ++ + extern void rt_mutex_adjust_prio(struct task_struct *task); + + #ifdef CONFIG_DEBUG_RT_MUTEXES diff --git a/queue-4.9/futex-simplify-fixup_pi_state_owner.patch b/queue-4.9/futex-simplify-fixup_pi_state_owner.patch new file mode 100644 index 00000000000..7aaf48fba6f --- /dev/null +++ b/queue-4.9/futex-simplify-fixup_pi_state_owner.patch @@ -0,0 +1,109 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:38 +0000 +Subject: futex: Simplify fixup_pi_state_owner() +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Peter Zijlstra , Lee Jones +Message-ID: <20210203134539.2583943-10-lee.jones@linaro.org> + +From: Thomas Gleixner + +[ Upstream commit f2dac39d93987f7de1e20b3988c8685523247ae2 ] + +Too many gotos already and an upcoming fix would make it even more +unreadable. + +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 41 +++++++++++++++++++++++++++-------------- + 1 file changed, 27 insertions(+), 14 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2272,18 +2272,16 @@ static void unqueue_me_pi(struct futex_q + spin_unlock(q->lock_ptr); + } + +-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, +- struct task_struct *argowner) ++static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ++ struct task_struct *argowner) + { + struct futex_pi_state *pi_state = q->pi_state; +- u32 uval, uninitialized_var(curval), newval; + struct task_struct *oldowner, *newowner; +- u32 newtid; +- int ret; +- +- lockdep_assert_held(q->lock_ptr); ++ u32 uval, curval, newval, newtid; ++ int err = 0; + + oldowner = pi_state->owner; ++ + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; +@@ -2324,7 +2322,7 @@ retry: + + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { + /* We got the lock after all, nothing to fix. */ +- return 0; ++ return 1; + } + + /* +@@ -2339,7 +2337,7 @@ retry: + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ +- return 0; ++ return 1; + } + newowner = argowner; + } +@@ -2380,7 +2378,7 @@ retry: + handle_fault: + spin_unlock(q->lock_ptr); + +- ret = fault_in_user_writeable(uaddr); ++ err = fault_in_user_writeable(uaddr); + + spin_lock(q->lock_ptr); + +@@ -2388,12 +2386,27 @@ handle_fault: + * Check if someone else fixed it for us: + */ + if (pi_state->owner != oldowner) +- return 0; ++ return argowner == current; ++ ++ /* Retry if err was -EAGAIN or the fault in succeeded */ ++ if (!err) ++ goto retry; + +- if (ret) +- return ret; ++ return err; ++} ++ ++static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ++ struct task_struct *argowner) ++{ ++ struct futex_pi_state *pi_state = q->pi_state; ++ int ret; ++ ++ lockdep_assert_held(q->lock_ptr); + +- goto retry; ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ ret = __fixup_pi_state_owner(uaddr, q, argowner); ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ return ret; + } + + static long futex_wait_restart(struct restart_block *restart); diff --git a/queue-4.9/futex-use-pi_state_update_owner-in-put_pi_state.patch b/queue-4.9/futex-use-pi_state_update_owner-in-put_pi_state.patch new file mode 100644 index 00000000000..b4cb4ae5d7b --- /dev/null +++ b/queue-4.9/futex-use-pi_state_update_owner-in-put_pi_state.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:37 +0000 +Subject: futex: Use pi_state_update_owner() in put_pi_state() +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Peter Zijlstra , Lee Jones +Message-ID: <20210203134539.2583943-9-lee.jones@linaro.org> + +From: Thomas Gleixner + +[ Upstream commit 6ccc84f917d33312eb2846bd7b567639f585ad6d ] + +No point in open coding it. This way it gains the extra sanity checks. + +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -879,10 +879,7 @@ static void put_pi_state(struct futex_pi + * and has cleaned up the pi_state already + */ + if (pi_state->owner) { +- raw_spin_lock_irq(&pi_state->owner->pi_lock); +- list_del_init(&pi_state->list); +- raw_spin_unlock_irq(&pi_state->owner->pi_lock); +- ++ pi_state_update_owner(pi_state, NULL); + rt_mutex_proxy_unlock(&pi_state->pi_mutex); + } + diff --git a/queue-4.9/ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch b/queue-4.9/ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch new file mode 100644 index 00000000000..80f9a0168b0 --- /dev/null +++ b/queue-4.9/ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch @@ -0,0 +1,38 @@ +From e41aec79e62fa50f940cf222d1e9577f14e149dc Mon Sep 17 00:00:00 2001 +From: Lijun Pan +Date: Wed, 27 Jan 2021 19:34:42 -0600 +Subject: ibmvnic: Ensure that CRQ entry read are correctly ordered + +From: Lijun Pan + +commit e41aec79e62fa50f940cf222d1e9577f14e149dc upstream. + +Ensure that received Command-Response Queue (CRQ) entries are +properly read in order by the driver. dma_rmb barrier has +been added before accessing the CRQ descriptor to ensure +the entire descriptor is read before processing. + +Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") +Signed-off-by: Lijun Pan +Link: https://lore.kernel.org/r/20210128013442.88319-1-ljp@linux.ibm.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -3496,6 +3496,12 @@ static irqreturn_t ibmvnic_interrupt(int + while (!done) { + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { ++ /* This barrier makes sure ibmvnic_next_crq()'s ++ * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded ++ * before ibmvnic_handle_crq()'s ++ * switch(gen_crq->first) and switch(gen_crq->cmd). ++ */ ++ dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; + } diff --git a/queue-4.9/net-dsa-bcm_sf2-put-device-node-before-return.patch b/queue-4.9/net-dsa-bcm_sf2-put-device-node-before-return.patch new file mode 100644 index 00000000000..0925d1861b4 --- /dev/null +++ b/queue-4.9/net-dsa-bcm_sf2-put-device-node-before-return.patch @@ -0,0 +1,44 @@ +From cf3c46631e1637582f517a574c77cd6c05793817 Mon Sep 17 00:00:00 2001 +From: Pan Bian +Date: Thu, 21 Jan 2021 04:33:43 -0800 +Subject: net: dsa: bcm_sf2: put device node before return + +From: Pan Bian + +commit cf3c46631e1637582f517a574c77cd6c05793817 upstream. + +Put the device node dn before return error code on failure path. + +Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") +Signed-off-by: Pan Bian +Link: https://lore.kernel.org/r/20210121123343.26330-1-bianpan2016@163.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -515,15 +515,19 @@ static int bcm_sf2_mdio_register(struct + /* Find our integrated MDIO bus node */ + dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); + priv->master_mii_bus = of_mdio_find_bus(dn); +- if (!priv->master_mii_bus) ++ if (!priv->master_mii_bus) { ++ of_node_put(dn); + return -EPROBE_DEFER; ++ } + + get_device(&priv->master_mii_bus->dev); + priv->master_mii_dn = dn; + + priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev); +- if (!priv->slave_mii_bus) ++ if (!priv->slave_mii_bus) { ++ of_node_put(dn); + return -ENOMEM; ++ } + + priv->slave_mii_bus->priv = priv; + priv->slave_mii_bus->name = "sf2 slave mii"; diff --git a/queue-4.9/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch b/queue-4.9/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch new file mode 100644 index 00000000000..c432dbfa7f9 --- /dev/null +++ b/queue-4.9/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch @@ -0,0 +1,65 @@ +From foo@baz Thu Feb 4 04:10:09 PM CET 2021 +From: Eric Dumazet +Date: Thu, 14 Jan 2021 08:06:37 -0800 +Subject: net_sched: reject silly cell_log in qdisc_get_rtab() + +From: Eric Dumazet + +commit e4bedf48aaa5552bc1f49703abd17606e7e6e82a upstream + +iproute2 probably never goes beyond 8 for the cell exponent, +but stick to the max shift exponent for signed 32bit. + +UBSAN reported: +UBSAN: shift-out-of-bounds in net/sched/sch_api.c:389:22 +shift exponent 130 is too large for 32-bit type 'int' +CPU: 1 PID: 8450 Comm: syz-executor586 Not tainted 5.11.0-rc3-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:79 [inline] + dump_stack+0x183/0x22e lib/dump_stack.c:120 + ubsan_epilogue lib/ubsan.c:148 [inline] + __ubsan_handle_shift_out_of_bounds+0x432/0x4d0 lib/ubsan.c:395 + __detect_linklayer+0x2a9/0x330 net/sched/sch_api.c:389 + qdisc_get_rtab+0x2b5/0x410 net/sched/sch_api.c:435 + cbq_init+0x28f/0x12c0 net/sched/sch_cbq.c:1180 + qdisc_create+0x801/0x1470 net/sched/sch_api.c:1246 + tc_modify_qdisc+0x9e3/0x1fc0 net/sched/sch_api.c:1662 + rtnetlink_rcv_msg+0xb1d/0xe60 net/core/rtnetlink.c:5564 + netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2494 + netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] + netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1330 + netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1919 + sock_sendmsg_nosec net/socket.c:652 [inline] + sock_sendmsg net/socket.c:672 [inline] + ____sys_sendmsg+0x5a2/0x900 net/socket.c:2345 + ___sys_sendmsg net/socket.c:2399 [inline] + __sys_sendmsg+0x319/0x400 net/socket.c:2432 + do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Acked-by: Cong Wang +Link: https://lore.kernel.org/r/20210114160637.1660597-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +[sudip: adjust context] +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -393,7 +393,8 @@ struct qdisc_rate_table *qdisc_get_rtab( + { + struct qdisc_rate_table *rtab; + +- if (tab == NULL || r->rate == 0 || r->cell_log == 0 || ++ if (tab == NULL || r->rate == 0 || ++ r->cell_log == 0 || r->cell_log >= 32 || + nla_len(tab) != TC_RTAB_SIZE) + return NULL; + diff --git a/queue-4.9/rtmutex-remove-unused-argument-from-rt_mutex_proxy_unlock.patch b/queue-4.9/rtmutex-remove-unused-argument-from-rt_mutex_proxy_unlock.patch new file mode 100644 index 00000000000..9641171d955 --- /dev/null +++ b/queue-4.9/rtmutex-remove-unused-argument-from-rt_mutex_proxy_unlock.patch @@ -0,0 +1,60 @@ +From foo@baz Thu Feb 4 04:13:47 PM CET 2021 +From: Lee Jones +Date: Wed, 3 Feb 2021 13:45:36 +0000 +Subject: rtmutex: Remove unused argument from rt_mutex_proxy_unlock() +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Peter Zijlstra , Lee Jones +Message-ID: <20210203134539.2583943-8-lee.jones@linaro.org> + +From: Thomas Gleixner + +[ Upstream commit 2156ac1934166d6deb6cd0f6ffc4c1076ec63697 ] +Nothing uses the argument. Remove it as preparation to use +pi_state_update_owner(). + +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 2 +- + kernel/locking/rtmutex.c | 3 +-- + kernel/locking/rtmutex_common.h | 3 +-- + 3 files changed, 3 insertions(+), 5 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -883,7 +883,7 @@ static void put_pi_state(struct futex_pi + list_del_init(&pi_state->list); + raw_spin_unlock_irq(&pi_state->owner->pi_lock); + +- rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); ++ rt_mutex_proxy_unlock(&pi_state->pi_mutex); + } + + if (current->pi_state_cache) +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1696,8 +1696,7 @@ void rt_mutex_init_proxy_locked(struct r + * No locking. Caller has to do serializing itself + * Special API call for PI-futex support + */ +-void rt_mutex_proxy_unlock(struct rt_mutex *lock, +- struct task_struct *proxy_owner) ++void rt_mutex_proxy_unlock(struct rt_mutex *lock) + { + debug_rt_mutex_proxy_unlock(lock); + rt_mutex_set_owner(lock, NULL); +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -102,8 +102,7 @@ enum rtmutex_chainwalk { + extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); + extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner); +-extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, +- struct task_struct *proxy_owner); ++extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); + extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..4eea0483ac9 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,13 @@ +net-dsa-bcm_sf2-put-device-node-before-return.patch +ibmvnic-ensure-that-crq-entry-read-are-correctly-ordered.patch +net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch +futex-rt_mutex-provide-futex-specific-rt_mutex-api.patch +futex-remove-rt_mutex_deadlock_account_.patch +futex-rework-inconsistent-rt_mutex-futex_q-state.patch +futex-avoid-violating-the-10th-rule-of-futex.patch +futex-replace-pointless-printk-in-fixup_owner.patch +futex-provide-and-use-pi_state_update_owner.patch +rtmutex-remove-unused-argument-from-rt_mutex_proxy_unlock.patch +futex-use-pi_state_update_owner-in-put_pi_state.patch +futex-simplify-fixup_pi_state_owner.patch +futex-handle-faults-correctly-for-pi-futexes.patch -- 2.47.3