--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:22 +0800
+Subject: futex: Avoid freeing an active timer
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-10-thunder.leizhen@huawei.com>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 97181f9bd57405b879403763284537e27d46963d ]
+
+Alexander reported a hrtimer debug_object splat:
+
+ ODEBUG: free active (active state 0) object type: hrtimer hint: hrtimer_wakeup (kernel/time/hrtimer.c:1423)
+
+ debug_object_free (lib/debugobjects.c:603)
+ destroy_hrtimer_on_stack (kernel/time/hrtimer.c:427)
+ futex_lock_pi (kernel/futex.c:2740)
+ do_futex (kernel/futex.c:3399)
+ SyS_futex (kernel/futex.c:3447 kernel/futex.c:3415)
+ do_syscall_64 (arch/x86/entry/common.c:284)
+ entry_SYSCALL64_slow_path (arch/x86/entry/entry_64.S:249)
+
+Which was caused by commit:
+
+ cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+
+... losing the hrtimer_cancel() in the shuffle. Where previously the
+hrtimer_cancel() was done by rt_mutex_slowlock() we now need to do it
+manually.
+
+Reported-by: Alexander Levin <alexander.levin@verizon.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Fixes: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704101802370.2906@nanos
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2960,8 +2960,10 @@ out_unlock_put_key:
+ out_put_key:
+ put_futex_key(&q.key);
+ out:
+- if (to)
++ if (to) {
++ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
++ }
+ return ret != -EINTR ? ret : -ERESTARTNOINTR;
+
+ uaddr_faulted:
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:15 +0800
+Subject: futex: Cleanup refcounting
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-3-thunder.leizhen@huawei.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit bf92cf3a5100f5a0d5f9834787b130159397cb22 ]
+
+Add a put_pit_state() as counterpart for get_pi_state() so the refcounting
+becomes consistent.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: dvhart@infradead.org
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170322104151.801778516@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -825,7 +825,7 @@ static int refill_pi_state_cache(void)
+ return 0;
+ }
+
+-static struct futex_pi_state * alloc_pi_state(void)
++static struct futex_pi_state *alloc_pi_state(void)
+ {
+ struct futex_pi_state *pi_state = current->pi_state_cache;
+
+@@ -858,6 +858,11 @@ static void pi_state_update_owner(struct
+ }
+ }
+
++static void get_pi_state(struct futex_pi_state *pi_state)
++{
++ WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
++}
++
+ /*
+ * Drops a reference to the pi_state object and frees or caches it
+ * when the last reference is gone.
+@@ -901,7 +906,7 @@ static void put_pi_state(struct futex_pi
+ * Look up the task based on what TID userspace gave us.
+ * We dont trust it.
+ */
+-static struct task_struct * futex_find_get_task(pid_t pid)
++static struct task_struct *futex_find_get_task(pid_t pid)
+ {
+ struct task_struct *p;
+
+@@ -1149,7 +1154,7 @@ static int attach_to_pi_state(u32 __user
+ goto out_einval;
+
+ out_attach:
+- atomic_inc(&pi_state->refcount);
++ get_pi_state(pi_state);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ *ps = pi_state;
+ return 0;
+@@ -2204,7 +2209,7 @@ retry_private:
+ */
+ if (requeue_pi) {
+ /* Prepare the waiter to take the rt_mutex. */
+- atomic_inc(&pi_state->refcount);
++ get_pi_state(pi_state);
+ this->pi_state = pi_state;
+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+ this->rt_waiter,
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:19 +0800
+Subject: futex: Futex_unlock_pi() determinism
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-7-thunder.leizhen@huawei.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit bebe5b514345f09be2c15e414d076b02ecb9cce8 ]
+
+The problem with returning -EAGAIN when the waiter state mismatches is that
+it becomes very hard to proof a bounded execution time on the
+operation. And seeing that this is a RT operation, this is somewhat
+important.
+
+While in practise; given the previous patch; it will be very unlikely to
+ever really take more than one or two rounds, proving so becomes rather
+hard.
+
+However, now that modifying wait_list is done while holding both hb->lock
+and wait_lock, the scenario can be avoided entirely by acquiring wait_lock
+while still holding hb-lock. Doing a hand-over, without leaving a hole.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: dvhart@infradead.org
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170322104152.112378812@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 24 +++++++++++-------------
+ 1 file changed, 11 insertions(+), 13 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1555,15 +1555,10 @@ static int wake_futex_pi(u32 __user *uad
+ WAKE_Q(wake_q);
+ int ret = 0;
+
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
+- if (!new_owner) {
++ if (WARN_ON_ONCE(!new_owner)) {
+ /*
+- * Since we held neither hb->lock nor wait_lock when coming
+- * into this function, we could have raced with futex_lock_pi()
+- * such that we might observe @this futex_q waiter, but the
+- * rt_mutex's wait_list can be empty (either still, or again,
+- * depending on which side we land).
++ * As per the comment in futex_unlock_pi() this should not happen.
+ *
+ * When this happens, give up our locks and try again, giving
+ * the futex_lock_pi() instance time to complete, either by
+@@ -3020,15 +3015,18 @@ retry:
+ if (pi_state->owner != current)
+ goto out_unlock;
+
++ get_pi_state(pi_state);
+ /*
+- * Grab a reference on the pi_state and drop hb->lock.
++ * Since modifying the wait_list is done while holding both
++ * hb->lock and wait_lock, holding either is sufficient to
++ * observe it.
+ *
+- * The reference ensures pi_state lives, dropping the hb->lock
+- * is tricky.. wake_futex_pi() will take rt_mutex::wait_lock to
+- * close the races against futex_lock_pi(), but in case of
+- * _any_ fail we'll abort and retry the whole deal.
++ * By taking wait_lock while still holding hb->lock, we ensure
++ * there is no point where we hold neither; and therefore
++ * wake_futex_pi() must observe a state consistent with what we
++ * observed.
+ */
+- get_pi_state(pi_state);
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ spin_unlock(&hb->lock);
+
+ ret = wake_futex_pi(uaddr, uval, pi_state);
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:21 +0800
+Subject: futex: Handle transient "ownerless" rtmutex state correctly
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-9-thunder.leizhen@huawei.com>
+
+From: Mike Galbraith <efault@gmx.de>
+
+[ Upstream commit 9f5d1c336a10c0d24e83e40b4c1b9539f7dba627 ]
+
+Gratian managed to trigger the BUG_ON(!newowner) in fixup_pi_state_owner().
+This is one possible chain of events leading to this:
+
+Task Prio Operation
+T1 120 lock(F)
+T2 120 lock(F) -> blocks (top waiter)
+T3 50 (RT) lock(F) -> boosts T1 and blocks (new top waiter)
+XX timeout/ -> wakes T2
+ signal
+T1 50 unlock(F) -> wakes T3 (rtmutex->owner == NULL, waiter bit is set)
+T2 120 cleanup -> try_to_take_mutex() fails because T3 is the top waiter
+ and the lower priority T2 cannot steal the lock.
+ -> fixup_pi_state_owner() sees newowner == NULL -> BUG_ON()
+
+The comment states that this is invalid and rt_mutex_real_owner() must
+return a non NULL owner when the trylock failed, but in case of a queued
+and woken up waiter rt_mutex_real_owner() == NULL is a valid transient
+state. The higher priority waiter has simply not yet managed to take over
+the rtmutex.
+
+The BUG_ON() is therefore wrong and this is just another retry condition in
+fixup_pi_state_owner().
+
+Drop the locks, so that T3 can make progress, and then try the fixup again.
+
+Gratian provided a great analysis, traces and a reproducer. The analysis is
+to the point, but it confused the hell out of that tglx dude who had to
+page in all the futex horrors again. Condensed version is above.
+
+[ tglx: Wrote comment and changelog ]
+
+Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex")
+Reported-by: Gratian Crisan <gratian.crisan@ni.com>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/87a6w6x7bb.fsf@ni.com
+Link: https://lore.kernel.org/r/87sg9pkvf7.fsf@nanos.tec.linutronix.de
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2437,10 +2437,22 @@ retry:
+ }
+
+ /*
+- * Since we just failed the trylock; there must be an owner.
++ * The trylock just failed, so either there is an owner or
++ * there is a higher priority waiter than this one.
+ */
+ newowner = rt_mutex_owner(&pi_state->pi_mutex);
+- BUG_ON(!newowner);
++ /*
++ * If the higher priority waiter has not yet taken over the
++ * rtmutex then newowner is NULL. We can't return here with
++ * that state because it's inconsistent vs. the user space
++ * state. So drop the locks and try again. It's a valid
++ * situation and not any different from the other retry
++ * conditions.
++ */
++ if (unlikely(!newowner)) {
++ err = -EAGAIN;
++ goto handle_fault;
++ }
+ } else {
+ WARN_ON_ONCE(argowner != current);
+ if (oldowner == current) {
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:17 +0800
+Subject: futex: Pull rt_mutex_futex_unlock() out from under hb->lock
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-5-thunder.leizhen@huawei.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 16ffa12d742534d4ff73e8b3a4e81c1de39196f0 ]
+
+There's a number of 'interesting' problems, all caused by holding
+hb->lock while doing the rt_mutex_unlock() equivalient.
+
+Notably:
+
+ - a PI inversion on hb->lock; and,
+
+ - a SCHED_DEADLINE crash because of pointer instability.
+
+The previous changes:
+
+ - changed the locking rules to cover {uval,pi_state} with wait_lock.
+
+ - allow to do rt_mutex_futex_unlock() without dropping wait_lock; which in
+ turn allows to rely on wait_lock atomicity completely.
+
+ - simplified the waiter conundrum.
+
+It's now sufficient to hold rtmutex::wait_lock and a reference on the
+pi_state to protect the state consistency, so hb->lock can be dropped
+before calling rt_mutex_futex_unlock().
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: dvhart@infradead.org
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170322104151.900002056@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 111 ++++++++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 68 insertions(+), 43 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -966,10 +966,12 @@ static void exit_pi_state_list(struct ta
+ pi_state->owner = NULL;
+ raw_spin_unlock_irq(&curr->pi_lock);
+
+- rt_mutex_futex_unlock(&pi_state->pi_mutex);
+-
++ get_pi_state(pi_state);
+ spin_unlock(&hb->lock);
+
++ rt_mutex_futex_unlock(&pi_state->pi_mutex);
++ put_pi_state(pi_state);
++
+ raw_spin_lock_irq(&curr->pi_lock);
+ }
+ raw_spin_unlock_irq(&curr->pi_lock);
+@@ -1083,6 +1085,11 @@ static int attach_to_pi_state(u32 __user
+ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
+ * which in turn means that futex_lock_pi() still has a reference on
+ * our pi_state.
++ *
++ * The waiter holding a reference on @pi_state also protects against
++ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
++ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
++ * free pi_state before we can take a reference ourselves.
+ */
+ WARN_ON(!atomic_read(&pi_state->refcount));
+
+@@ -1537,48 +1544,40 @@ static void mark_wake_futex(struct wake_
+ q->lock_ptr = NULL;
+ }
+
+-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+- struct futex_hash_bucket *hb)
++/*
++ * Caller must hold a reference on @pi_state.
++ */
++static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
+ {
+- struct task_struct *new_owner;
+- struct futex_pi_state *pi_state = this->pi_state;
+ u32 uninitialized_var(curval), newval;
++ struct task_struct *new_owner;
++ bool deboost = false;
+ WAKE_Q(wake_q);
+- bool deboost;
+ int ret = 0;
+
+- if (!pi_state)
+- return -EINVAL;
+-
+- /*
+- * If current does not own the pi_state then the futex is
+- * inconsistent and user space fiddled with the futex value.
+- */
+- if (pi_state->owner != current)
+- return -EINVAL;
+-
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
+-
+- /*
+- * When we interleave with futex_lock_pi() where it does
+- * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter,
+- * but the rt_mutex's wait_list can be empty (either still, or again,
+- * depending on which side we land).
+- *
+- * When this happens, give up our locks and try again, giving the
+- * futex_lock_pi() instance time to complete, either by waiting on the
+- * rtmutex or removing itself from the futex queue.
+- */
+ if (!new_owner) {
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- return -EAGAIN;
++ /*
++ * Since we held neither hb->lock nor wait_lock when coming
++ * into this function, we could have raced with futex_lock_pi()
++ * such that we might observe @this futex_q waiter, but the
++ * rt_mutex's wait_list can be empty (either still, or again,
++ * depending on which side we land).
++ *
++ * When this happens, give up our locks and try again, giving
++ * the futex_lock_pi() instance time to complete, either by
++ * waiting on the rtmutex or removing itself from the futex
++ * queue.
++ */
++ ret = -EAGAIN;
++ goto out_unlock;
+ }
+
+ /*
+- * We pass it to the next owner. The WAITERS bit is always
+- * kept enabled while there is PI state around. We cleanup the
+- * owner died bit, because we are the owner.
++ * We pass it to the next owner. The WAITERS bit is always kept
++ * enabled while there is PI state around. We cleanup the owner
++ * died bit, because we are the owner.
+ */
+ newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
+
+@@ -1611,15 +1610,15 @@ static int wake_futex_pi(u32 __user *uad
+ deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ }
+
++out_unlock:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- spin_unlock(&hb->lock);
+
+ if (deboost) {
+ wake_up_q(&wake_q);
+ rt_mutex_adjust_prio(current);
+ }
+
+- return 0;
++ return ret;
+ }
+
+ /*
+@@ -2462,7 +2461,7 @@ retry:
+ if (get_futex_value_locked(&uval, uaddr))
+ goto handle_fault;
+
+- while (1) {
++ for (;;) {
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
+
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+@@ -2975,10 +2974,36 @@ retry:
+ */
+ match = futex_top_waiter(hb, &key);
+ if (match) {
+- ret = wake_futex_pi(uaddr, uval, match, hb);
++ struct futex_pi_state *pi_state = match->pi_state;
++
++ ret = -EINVAL;
++ if (!pi_state)
++ goto out_unlock;
++
+ /*
+- * In case of success wake_futex_pi dropped the hash
+- * bucket lock.
++ * If current does not own the pi_state then the futex is
++ * inconsistent and user space fiddled with the futex value.
++ */
++ if (pi_state->owner != current)
++ goto out_unlock;
++
++ /*
++ * Grab a reference on the pi_state and drop hb->lock.
++ *
++ * The reference ensures pi_state lives, dropping the hb->lock
++ * is tricky.. wake_futex_pi() will take rt_mutex::wait_lock to
++ * close the races against futex_lock_pi(), but in case of
++ * _any_ fail we'll abort and retry the whole deal.
++ */
++ get_pi_state(pi_state);
++ spin_unlock(&hb->lock);
++
++ ret = wake_futex_pi(uaddr, uval, pi_state);
++
++ put_pi_state(pi_state);
++
++ /*
++ * Success, we're done! No tricky corner cases.
+ */
+ if (!ret)
+ goto out_putkey;
+@@ -2993,7 +3018,6 @@ retry:
+ * setting the FUTEX_WAITERS bit. Try again.
+ */
+ if (ret == -EAGAIN) {
+- spin_unlock(&hb->lock);
+ put_futex_key(&key);
+ goto retry;
+ }
+@@ -3001,7 +3025,7 @@ retry:
+ * wake_futex_pi has detected invalid state. Tell user
+ * space.
+ */
+- goto out_unlock;
++ goto out_putkey;
+ }
+
+ /*
+@@ -3011,8 +3035,10 @@ retry:
+ * preserve the WAITERS bit not the OWNER_DIED one. We are the
+ * owner.
+ */
+- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
++ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) {
++ spin_unlock(&hb->lock);
+ goto pi_faulted;
++ }
+
+ /*
+ * If uval has changed, let user space handle it.
+@@ -3026,7 +3052,6 @@ out_putkey:
+ return ret;
+
+ pi_faulted:
+- spin_unlock(&hb->lock);
+ put_futex_key(&key);
+
+ ret = fault_in_user_writeable(uaddr);
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:14 +0800
+Subject: futex: Rename free_pi_state() to put_pi_state()
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-2-thunder.leizhen@huawei.com>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 29e9ee5d48c35d6cf8afe09bdf03f77125c9ac11 ]
+
+free_pi_state() is confusing as it is in fact only freeing/caching the
+pi state when the last reference is gone. Rename it to put_pi_state()
+which reflects better what it is doing.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Darren Hart <darren@dvhart.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Bhuvanesh_Surachari@mentor.com
+Cc: Andy Lowe <Andy_Lowe@mentor.com>
+Link: http://lkml.kernel.org/r/20151219200607.259636467@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -859,9 +859,12 @@ static void pi_state_update_owner(struct
+ }
+
+ /*
++ * Drops a reference to the pi_state object and frees or caches it
++ * when the last reference is gone.
++ *
+ * Must be called with the hb lock held.
+ */
+-static void free_pi_state(struct futex_pi_state *pi_state)
++static void put_pi_state(struct futex_pi_state *pi_state)
+ {
+ if (!pi_state)
+ return;
+@@ -2121,7 +2124,7 @@ retry_private:
+ case 0:
+ break;
+ case -EFAULT:
+- free_pi_state(pi_state);
++ put_pi_state(pi_state);
+ pi_state = NULL;
+ double_unlock_hb(hb1, hb2);
+ hb_waiters_dec(hb2);
+@@ -2139,7 +2142,7 @@ retry_private:
+ * exit to complete.
+ * - EAGAIN: The user space value changed.
+ */
+- free_pi_state(pi_state);
++ put_pi_state(pi_state);
+ pi_state = NULL;
+ double_unlock_hb(hb1, hb2);
+ hb_waiters_dec(hb2);
+@@ -2214,7 +2217,7 @@ retry_private:
+ } else if (ret) {
+ /* -EDEADLK */
+ this->pi_state = NULL;
+- free_pi_state(pi_state);
++ put_pi_state(pi_state);
+ goto out_unlock;
+ }
+ }
+@@ -2223,7 +2226,7 @@ retry_private:
+ }
+
+ out_unlock:
+- free_pi_state(pi_state);
++ put_pi_state(pi_state);
+ double_unlock_hb(hb1, hb2);
+ wake_up_q(&wake_q);
+ hb_waiters_dec(hb2);
+@@ -2376,7 +2379,7 @@ static void unqueue_me_pi(struct futex_q
+ __unqueue_futex(q);
+
+ BUG_ON(!q->pi_state);
+- free_pi_state(q->pi_state);
++ put_pi_state(q->pi_state);
+ q->pi_state = NULL;
+
+ spin_unlock(q->lock_ptr);
+@@ -3210,7 +3213,7 @@ static int futex_wait_requeue_pi(u32 __u
+ * Drop the reference to the pi state which
+ * the requeue_pi() code acquired for us.
+ */
+- free_pi_state(q.pi_state);
++ put_pi_state(q.pi_state);
+ spin_unlock(q.lock_ptr);
+ /*
+ * Adjust the return value. It's either -EFAULT or
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:18 +0800
+Subject: futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-6-thunder.leizhen@huawei.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit cfafcd117da0216520568c195cb2f6cd1980c4bb ]
+
+By changing futex_lock_pi() to use rt_mutex_*_proxy_lock() all wait_list
+modifications are done under both hb->lock and wait_lock.
+
+This closes the obvious interleave pattern between futex_lock_pi() and
+futex_unlock_pi(), but not entirely so. See below:
+
+Before:
+
+futex_lock_pi() futex_unlock_pi()
+ unlock hb->lock
+
+ lock hb->lock
+ unlock hb->lock
+
+ lock rt_mutex->wait_lock
+ unlock rt_mutex_wait_lock
+ -EAGAIN
+
+ lock rt_mutex->wait_lock
+ list_add
+ unlock rt_mutex->wait_lock
+
+ schedule()
+
+ lock rt_mutex->wait_lock
+ list_del
+ unlock rt_mutex->wait_lock
+
+ <idem>
+ -EAGAIN
+
+ lock hb->lock
+
+After:
+
+futex_lock_pi() futex_unlock_pi()
+
+ lock hb->lock
+ lock rt_mutex->wait_lock
+ list_add
+ unlock rt_mutex->wait_lock
+ unlock hb->lock
+
+ schedule()
+ lock hb->lock
+ unlock hb->lock
+ lock hb->lock
+ lock rt_mutex->wait_lock
+ list_del
+ unlock rt_mutex->wait_lock
+
+ lock rt_mutex->wait_lock
+ unlock rt_mutex_wait_lock
+ -EAGAIN
+
+ unlock hb->lock
+
+It does however solve the earlier starvation/live-lock scenario which got
+introduced with the -EAGAIN since unlike the before scenario; where the
+-EAGAIN happens while futex_unlock_pi() doesn't hold any locks; in the
+after scenario it happens while futex_unlock_pi() actually holds a lock,
+and then it is serialized on that lock.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: dvhart@infradead.org
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170322104152.062785528@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 77 ++++++++++++++++++++++++++++------------
+ kernel/locking/rtmutex.c | 26 +++----------
+ kernel/locking/rtmutex_common.h | 1
+ 3 files changed, 62 insertions(+), 42 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2284,20 +2284,7 @@ queue_unlock(struct futex_hash_bucket *h
+ hb_waiters_dec(hb);
+ }
+
+-/**
+- * queue_me() - Enqueue the futex_q on the futex_hash_bucket
+- * @q: The futex_q to enqueue
+- * @hb: The destination hash bucket
+- *
+- * The hb->lock must be held by the caller, and is released here. A call to
+- * queue_me() is typically paired with exactly one call to unqueue_me(). The
+- * exceptions involve the PI related operations, which may use unqueue_me_pi()
+- * or nothing if the unqueue is done as part of the wake process and the unqueue
+- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
+- * an example).
+- */
+-static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+- __releases(&hb->lock)
++static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+ {
+ int prio;
+
+@@ -2314,6 +2301,24 @@ static inline void queue_me(struct futex
+ plist_node_init(&q->list, prio);
+ plist_add(&q->list, &hb->chain);
+ q->task = current;
++}
++
++/**
++ * queue_me() - Enqueue the futex_q on the futex_hash_bucket
++ * @q: The futex_q to enqueue
++ * @hb: The destination hash bucket
++ *
++ * The hb->lock must be held by the caller, and is released here. A call to
++ * queue_me() is typically paired with exactly one call to unqueue_me(). The
++ * exceptions involve the PI related operations, which may use unqueue_me_pi()
++ * or nothing if the unqueue is done as part of the wake process and the unqueue
++ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
++ * an example).
++ */
++static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
++ __releases(&hb->lock)
++{
++ __queue_me(q, hb);
+ spin_unlock(&hb->lock);
+ }
+
+@@ -2819,6 +2824,7 @@ static int futex_lock_pi(u32 __user *uad
+ {
+ struct hrtimer_sleeper timeout, *to = NULL;
+ struct task_struct *exiting = NULL;
++ struct rt_mutex_waiter rt_waiter;
+ struct futex_hash_bucket *hb;
+ struct futex_q q = futex_q_init;
+ int res, ret;
+@@ -2879,25 +2885,52 @@ retry_private:
+ }
+ }
+
++ WARN_ON(!q.pi_state);
++
+ /*
+ * Only actually queue now that the atomic ops are done:
+ */
+- queue_me(&q, hb);
++ __queue_me(&q, hb);
+
+- WARN_ON(!q.pi_state);
+- /*
+- * Block on the PI mutex:
+- */
+- if (!trylock) {
+- ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
+- } else {
++ if (trylock) {
+ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
+ /* Fixup the trylock return value: */
+ ret = ret ? 0 : -EWOULDBLOCK;
++ goto no_block;
+ }
+
++ /*
++ * We must add ourselves to the rt_mutex waitlist while holding hb->lock
++ * such that the hb and rt_mutex wait lists match.
++ */
++ rt_mutex_init_waiter(&rt_waiter);
++ ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
++ if (ret) {
++ if (ret == 1)
++ ret = 0;
++
++ goto no_block;
++ }
++
++ spin_unlock(q.lock_ptr);
++
++ if (unlikely(to))
++ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
++
++ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
++
+ spin_lock(q.lock_ptr);
+ /*
++ * If we failed to acquire the lock (signal/timeout), we must
++ * first acquire the hb->lock before removing the lock from the
++ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
++ * wait lists consistent.
++ */
++ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
++ ret = 0;
++
++no_block:
++ /*
+ * Fixup the pi_state owner and possibly acquire the lock if we
+ * haven't already.
+ */
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1489,19 +1489,6 @@ int __sched rt_mutex_lock_interruptible(
+ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+
+ /*
+- * Futex variant with full deadlock detection.
+- * Futex variants must not use the fast-path, see __rt_mutex_futex_unlock().
+- */
+-int __sched rt_mutex_timed_futex_lock(struct rt_mutex *lock,
+- struct hrtimer_sleeper *timeout)
+-{
+- might_sleep();
+-
+- return rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE,
+- timeout, RT_MUTEX_FULL_CHAINWALK);
+-}
+-
+-/*
+ * Futex variant, must not use fastpath.
+ */
+ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
+@@ -1774,12 +1761,6 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ /* sleep on the mutex */
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+
+- /*
+- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+- * have to fix that up.
+- */
+- fixup_rt_mutex_waiters(lock);
+-
+ raw_spin_unlock(&lock->wait_lock);
+
+ return ret;
+@@ -1819,6 +1800,13 @@ bool rt_mutex_cleanup_proxy_lock(struct
+ fixup_rt_mutex_waiters(lock);
+ cleanup = true;
+ }
++
++ /*
++ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
++ * have to fix that up.
++ */
++ fixup_rt_mutex_waiters(lock);
++
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return cleanup;
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -111,7 +111,6 @@ extern int rt_mutex_wait_proxy_lock(stru
+ struct rt_mutex_waiter *waiter);
+ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter);
+-extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
+ extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+ extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
+
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:23 +0800
+Subject: futex,rt_mutex: Fix rt_mutex_cleanup_proxy_lock()
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-11-thunder.leizhen@huawei.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 04dc1b2fff4e96cb4142227fbdc63c8871ad4ed9 ]
+
+Markus reported that the glibc/nptl/tst-robustpi8 test was failing after
+commit:
+
+ cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+
+The following trace shows the problem:
+
+ ld-linux-x86-64-2161 [019] .... 410.760971: SyS_futex: 00007ffbeb76b028: 80000875 op=FUTEX_LOCK_PI
+ ld-linux-x86-64-2161 [019] ...1 410.760972: lock_pi_update_atomic: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000875 ret=0
+ ld-linux-x86-64-2165 [011] .... 410.760978: SyS_futex: 00007ffbeb76b028: 80000875 op=FUTEX_UNLOCK_PI
+ ld-linux-x86-64-2165 [011] d..1 410.760979: do_futex: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000871 ret=0
+ ld-linux-x86-64-2165 [011] .... 410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=0000
+ ld-linux-x86-64-2161 [019] .... 410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=ETIMEDOUT
+
+Task 2165 does an UNLOCK_PI, assigning the lock to the waiter task 2161
+which then returns with -ETIMEDOUT. That wrecks the lock state, because now
+the owner isn't aware it acquired the lock and removes the pending robust
+list entry.
+
+If 2161 is killed, the robust list will not clear out this futex and the
+subsequent acquire on this futex will then (correctly) result in -ESRCH
+which is unexpected by glibc, triggers an internal assertion and dies.
+
+Task 2161 Task 2165
+
+rt_mutex_wait_proxy_lock()
+ timeout();
+ /* T2161 is still queued in the waiter list */
+ return -ETIMEDOUT;
+
+ futex_unlock_pi()
+ spin_lock(hb->lock);
+ rtmutex_unlock()
+ remove_rtmutex_waiter(T2161);
+ mark_lock_available();
+ /* Make the next waiter owner of the user space side */
+ futex_uval = 2161;
+ spin_unlock(hb->lock);
+spin_lock(hb->lock);
+rt_mutex_cleanup_proxy_lock()
+ if (rtmutex_owner() !== current)
+ ...
+ return FAIL;
+....
+return -ETIMEOUT;
+
+This means that rt_mutex_cleanup_proxy_lock() needs to call
+try_to_take_rt_mutex() so it can take over the rtmutex correctly which was
+assigned by the waker. If the rtmutex is owned by some other task then this
+call is harmless and just confirmes that the waiter is not able to acquire
+it.
+
+While there, fix what looks like a merge error which resulted in
+rt_mutex_cleanup_proxy_lock() having two calls to
+fixup_rt_mutex_waiters() and rt_mutex_wait_proxy_lock() not having any.
+Both should have one, since both potentially touch the waiter list.
+
+Fixes: 38d589f2fd08 ("futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock()")
+Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
+Bug-Spotted-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Florian Weimer <fweimer@redhat.com>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
+Link: http://lkml.kernel.org/r/20170519154850.mlomgdsd26drq5j6@hirez.programming.kicks-ass.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/locking/rtmutex.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1764,12 +1764,14 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ int ret;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+-
+- set_current_state(TASK_INTERRUPTIBLE);
+-
+ /* sleep on the mutex */
++ set_current_state(TASK_INTERRUPTIBLE);
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+-
++ /*
++ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
++ * have to fix that up.
++ */
++ fixup_rt_mutex_waiters(lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+@@ -1801,15 +1803,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ /*
++ * Do an unconditional try-lock, this deals with the lock stealing
++ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
++ * sets a NULL owner.
++ *
++ * We're not interested in the return value, because the subsequent
++ * test on rt_mutex_owner() will infer that. If the trylock succeeded,
++ * we will own the lock and it will have removed the waiter. If we
++ * failed the trylock, we're still not owner and we need to remove
++ * ourselves.
++ */
++ try_to_take_rt_mutex(lock, current, waiter);
++ /*
+ * Unless we're the owner; we're still enqueued on the wait_list.
+ * So check if we became owner, if not, take us off the wait_list.
+ */
+ if (rt_mutex_owner(lock) != current) {
+ remove_waiter(lock, waiter);
+- fixup_rt_mutex_waiters(lock);
+ cleanup = true;
+ }
+-
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:16 +0800
+Subject: futex,rt_mutex: Introduce rt_mutex_init_waiter()
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-4-thunder.leizhen@huawei.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 50809358dd7199aa7ce232f6877dd09ec30ef374 ]
+
+Since there's already two copies of this code, introduce a helper now
+before adding a third one.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: dvhart@infradead.org
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170322104151.950039479@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex.c | 5 +----
+ kernel/locking/rtmutex.c | 12 +++++++++---
+ kernel/locking/rtmutex_common.h | 1 +
+ 3 files changed, 11 insertions(+), 7 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -3156,10 +3156,7 @@ static int futex_wait_requeue_pi(u32 __u
+ * The waiter is allocated on our stack, manipulated by the requeue
+ * code while we sleep on uaddr.
+ */
+- debug_rt_mutex_init_waiter(&rt_waiter);
+- RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
+- RB_CLEAR_NODE(&rt_waiter.tree_entry);
+- rt_waiter.task = NULL;
++ rt_mutex_init_waiter(&rt_waiter);
+
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+ if (unlikely(ret != 0))
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1155,6 +1155,14 @@ void rt_mutex_adjust_pi(struct task_stru
+ next_lock, NULL, task);
+ }
+
++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
++{
++ debug_rt_mutex_init_waiter(waiter);
++ RB_CLEAR_NODE(&waiter->pi_tree_entry);
++ RB_CLEAR_NODE(&waiter->tree_entry);
++ waiter->task = NULL;
++}
++
+ /**
+ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+ * @lock: the rt_mutex to take
+@@ -1236,9 +1244,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ struct rt_mutex_waiter waiter;
+ int ret = 0;
+
+- debug_rt_mutex_init_waiter(&waiter);
+- RB_CLEAR_NODE(&waiter.pi_tree_entry);
+- RB_CLEAR_NODE(&waiter.tree_entry);
++ rt_mutex_init_waiter(&waiter);
+
+ raw_spin_lock(&lock->wait_lock);
+
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -102,6 +102,7 @@ extern struct task_struct *rt_mutex_next
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ struct task_struct *proxy_owner);
+ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
+ extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task);
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:24 +0800
+Subject: rcu: Update documentation of rcu_read_unlock()
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-12-thunder.leizhen@huawei.com>
+
+From: Anna-Maria Gleixner <anna-maria@linutronix.de>
+
+[ Upstream commit ec84b27f9b3b569f9235413d1945a2006b97b0aa ]
+
+Since commit b4abf91047cf ("rtmutex: Make wait_lock irq safe") the
+explanation in rcu_read_unlock() documentation about irq unsafe rtmutex
+wait_lock is no longer valid.
+
+Remove it to prevent kernel developers reading the documentation to rely on
+it.
+
+Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: bigeasy@linutronix.de
+Link: https://lkml.kernel.org/r/20180525090507.22248-2-anna-maria@linutronix.de
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rcupdate.h | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -880,9 +880,7 @@ static __always_inline void rcu_read_loc
+ * Unfortunately, this function acquires the scheduler's runqueue and
+ * priority-inheritance spinlocks. This means that deadlock could result
+ * if the caller of rcu_read_unlock() already holds one of these locks or
+- * any lock that is ever acquired while holding them; or any lock which
+- * can be taken from interrupt context because rcu_boost()->rt_mutex_lock()
+- * does not disable irqs while taking ->wait_lock.
++ * any lock that is ever acquired while holding them.
+ *
+ * That said, RCU readers are never priority boosted unless they were
+ * preempted. Therefore, one way to avoid deadlock is to make sure
--- /dev/null
+From foo@baz Sun Aug 8 08:43:25 AM CEST 2021
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Mon, 2 Aug 2021 21:46:20 +0800
+Subject: rtmutex: Make wait_lock irq safe
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>
+Cc: Zhen Lei <thunder.leizhen@huawei.com>, Anna-Maria Gleixner <anna-maria@linutronix.de>, Mike Galbraith <efault@gmx.de>, Sasha Levin <sasha.levin@oracle.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, linux-kernel <linux-kernel@vger.kernel.org>
+Message-ID: <20210802134624.1934-8-thunder.leizhen@huawei.com>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit b4abf91047cf054f203dcfac97e1038388826937 ]
+
+Sasha reported a lockdep splat about a potential deadlock between RCU boosting
+rtmutex and the posix timer it_lock.
+
+CPU0 CPU1
+
+rtmutex_lock(&rcu->rt_mutex)
+ spin_lock(&rcu->rt_mutex.wait_lock)
+ local_irq_disable()
+ spin_lock(&timer->it_lock)
+ spin_lock(&rcu->mutex.wait_lock)
+--> Interrupt
+ spin_lock(&timer->it_lock)
+
+This is caused by the following code sequence on CPU1
+
+ rcu_read_lock()
+ x = lookup();
+ if (x)
+ spin_lock_irqsave(&x->it_lock);
+ rcu_read_unlock();
+ return x;
+
+We could fix that in the posix timer code by keeping rcu read locked across
+the spinlocked and irq disabled section, but the above sequence is common and
+there is no reason not to support it.
+
+Taking rt_mutex.wait_lock irq safe prevents the deadlock.
+
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Joe Korty <joe.korty@concurrent-rt.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/locking/rtmutex.c | 135 +++++++++++++++++++++++++----------------------
+ 1 file changed, 72 insertions(+), 63 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -163,13 +163,14 @@ static inline void mark_rt_mutex_waiters
+ * 2) Drop lock->wait_lock
+ * 3) Try to unlock the lock with cmpxchg
+ */
+-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
++ unsigned long flags)
+ __releases(lock->wait_lock)
+ {
+ struct task_struct *owner = rt_mutex_owner(lock);
+
+ clear_rt_mutex_waiters(lock);
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ /*
+ * If a new waiter comes in between the unlock and the cmpxchg
+ * we have two situations:
+@@ -211,11 +212,12 @@ static inline void mark_rt_mutex_waiters
+ /*
+ * Simple slow path only version: lock->owner is protected by lock->wait_lock.
+ */
+-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
++ unsigned long flags)
+ __releases(lock->wait_lock)
+ {
+ lock->owner = NULL;
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ return true;
+ }
+ #endif
+@@ -497,7 +499,6 @@ static int rt_mutex_adjust_prio_chain(st
+ int ret = 0, depth = 0;
+ struct rt_mutex *lock;
+ bool detect_deadlock;
+- unsigned long flags;
+ bool requeue = true;
+
+ detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
+@@ -540,7 +541,7 @@ static int rt_mutex_adjust_prio_chain(st
+ /*
+ * [1] Task cannot go away as we did a get_task() before !
+ */
+- raw_spin_lock_irqsave(&task->pi_lock, flags);
++ raw_spin_lock_irq(&task->pi_lock);
+
+ /*
+ * [2] Get the waiter on which @task is blocked on.
+@@ -624,7 +625,7 @@ static int rt_mutex_adjust_prio_chain(st
+ * operations.
+ */
+ if (!raw_spin_trylock(&lock->wait_lock)) {
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ raw_spin_unlock_irq(&task->pi_lock);
+ cpu_relax();
+ goto retry;
+ }
+@@ -655,7 +656,7 @@ static int rt_mutex_adjust_prio_chain(st
+ /*
+ * No requeue[7] here. Just release @task [8]
+ */
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ raw_spin_unlock(&task->pi_lock);
+ put_task_struct(task);
+
+ /*
+@@ -663,14 +664,14 @@ static int rt_mutex_adjust_prio_chain(st
+ * If there is no owner of the lock, end of chain.
+ */
+ if (!rt_mutex_owner(lock)) {
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+ return 0;
+ }
+
+ /* [10] Grab the next task, i.e. owner of @lock */
+ task = rt_mutex_owner(lock);
+ get_task_struct(task);
+- raw_spin_lock_irqsave(&task->pi_lock, flags);
++ raw_spin_lock(&task->pi_lock);
+
+ /*
+ * No requeue [11] here. We just do deadlock detection.
+@@ -685,8 +686,8 @@ static int rt_mutex_adjust_prio_chain(st
+ top_waiter = rt_mutex_top_waiter(lock);
+
+ /* [13] Drop locks */
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock(&task->pi_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ /* If owner is not blocked, end of chain. */
+ if (!next_lock)
+@@ -707,7 +708,7 @@ static int rt_mutex_adjust_prio_chain(st
+ rt_mutex_enqueue(lock, waiter);
+
+ /* [8] Release the task */
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ raw_spin_unlock(&task->pi_lock);
+ put_task_struct(task);
+
+ /*
+@@ -725,14 +726,14 @@ static int rt_mutex_adjust_prio_chain(st
+ */
+ if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
+ wake_up_process(rt_mutex_top_waiter(lock)->task);
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+ return 0;
+ }
+
+ /* [10] Grab the next task, i.e. the owner of @lock */
+ task = rt_mutex_owner(lock);
+ get_task_struct(task);
+- raw_spin_lock_irqsave(&task->pi_lock, flags);
++ raw_spin_lock(&task->pi_lock);
+
+ /* [11] requeue the pi waiters if necessary */
+ if (waiter == rt_mutex_top_waiter(lock)) {
+@@ -786,8 +787,8 @@ static int rt_mutex_adjust_prio_chain(st
+ top_waiter = rt_mutex_top_waiter(lock);
+
+ /* [13] Drop the locks */
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock(&task->pi_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ /*
+ * Make the actual exit decisions [12], based on the stored
+@@ -810,7 +811,7 @@ static int rt_mutex_adjust_prio_chain(st
+ goto again;
+
+ out_unlock_pi:
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ raw_spin_unlock_irq(&task->pi_lock);
+ out_put_task:
+ put_task_struct(task);
+
+@@ -820,7 +821,7 @@ static int rt_mutex_adjust_prio_chain(st
+ /*
+ * Try to take an rt-mutex
+ *
+- * Must be called with lock->wait_lock held.
++ * Must be called with lock->wait_lock held and interrupts disabled
+ *
+ * @lock: The lock to be acquired.
+ * @task: The task which wants to acquire the lock
+@@ -830,8 +831,6 @@ static int rt_mutex_adjust_prio_chain(st
+ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
+ {
+- unsigned long flags;
+-
+ /*
+ * Before testing whether we can acquire @lock, we set the
+ * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
+@@ -916,7 +915,7 @@ static int try_to_take_rt_mutex(struct r
+ * case, but conditionals are more expensive than a redundant
+ * store.
+ */
+- raw_spin_lock_irqsave(&task->pi_lock, flags);
++ raw_spin_lock(&task->pi_lock);
+ task->pi_blocked_on = NULL;
+ /*
+ * Finish the lock acquisition. @task is the new owner. If
+@@ -925,7 +924,7 @@ static int try_to_take_rt_mutex(struct r
+ */
+ if (rt_mutex_has_waiters(lock))
+ rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ raw_spin_unlock(&task->pi_lock);
+
+ takeit:
+ /* We got the lock. */
+@@ -945,7 +944,7 @@ takeit:
+ *
+ * Prepare waiter and propagate pi chain
+ *
+- * This must be called with lock->wait_lock held.
++ * This must be called with lock->wait_lock held and interrupts disabled
+ */
+ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+@@ -956,7 +955,6 @@ static int task_blocks_on_rt_mutex(struc
+ struct rt_mutex_waiter *top_waiter = waiter;
+ struct rt_mutex *next_lock;
+ int chain_walk = 0, res;
+- unsigned long flags;
+
+ /*
+ * Early deadlock detection. We really don't want the task to
+@@ -970,7 +968,7 @@ static int task_blocks_on_rt_mutex(struc
+ if (owner == task)
+ return -EDEADLK;
+
+- raw_spin_lock_irqsave(&task->pi_lock, flags);
++ raw_spin_lock(&task->pi_lock);
+ __rt_mutex_adjust_prio(task);
+ waiter->task = task;
+ waiter->lock = lock;
+@@ -983,12 +981,12 @@ static int task_blocks_on_rt_mutex(struc
+
+ task->pi_blocked_on = waiter;
+
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ raw_spin_unlock(&task->pi_lock);
+
+ if (!owner)
+ return 0;
+
+- raw_spin_lock_irqsave(&owner->pi_lock, flags);
++ raw_spin_lock(&owner->pi_lock);
+ if (waiter == rt_mutex_top_waiter(lock)) {
+ rt_mutex_dequeue_pi(owner, top_waiter);
+ rt_mutex_enqueue_pi(owner, waiter);
+@@ -1003,7 +1001,7 @@ static int task_blocks_on_rt_mutex(struc
+ /* Store the lock on which owner is blocked or NULL */
+ next_lock = task_blocked_on_lock(owner);
+
+- raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
++ raw_spin_unlock(&owner->pi_lock);
+ /*
+ * Even if full deadlock detection is on, if the owner is not
+ * blocked itself, we can avoid finding this out in the chain
+@@ -1019,12 +1017,12 @@ static int task_blocks_on_rt_mutex(struc
+ */
+ get_task_struct(owner);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
+ next_lock, waiter, task);
+
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irq(&lock->wait_lock);
+
+ return res;
+ }
+@@ -1033,15 +1031,14 @@ static int task_blocks_on_rt_mutex(struc
+ * Remove the top waiter from the current tasks pi waiter tree and
+ * queue it up.
+ *
+- * Called with lock->wait_lock held.
++ * Called with lock->wait_lock held and interrupts disabled.
+ */
+ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
+ struct rt_mutex *lock)
+ {
+ struct rt_mutex_waiter *waiter;
+- unsigned long flags;
+
+- raw_spin_lock_irqsave(¤t->pi_lock, flags);
++ raw_spin_lock(¤t->pi_lock);
+
+ waiter = rt_mutex_top_waiter(lock);
+
+@@ -1063,7 +1060,7 @@ static void mark_wakeup_next_waiter(stru
+ */
+ lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
+
+- raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
++ raw_spin_unlock(¤t->pi_lock);
+
+ wake_q_add(wake_q, waiter->task);
+ }
+@@ -1071,7 +1068,7 @@ static void mark_wakeup_next_waiter(stru
+ /*
+ * Remove a waiter from a lock and give up
+ *
+- * Must be called with lock->wait_lock held and
++ * Must be called with lock->wait_lock held and interrupts disabled. I must
+ * have just failed to try_to_take_rt_mutex().
+ */
+ static void remove_waiter(struct rt_mutex *lock,
+@@ -1080,12 +1077,11 @@ static void remove_waiter(struct rt_mute
+ bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
+ struct task_struct *owner = rt_mutex_owner(lock);
+ struct rt_mutex *next_lock;
+- unsigned long flags;
+
+- raw_spin_lock_irqsave(¤t->pi_lock, flags);
++ raw_spin_lock(¤t->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+ current->pi_blocked_on = NULL;
+- raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
++ raw_spin_unlock(¤t->pi_lock);
+
+ /*
+ * Only update priority if the waiter was the highest priority
+@@ -1094,7 +1090,7 @@ static void remove_waiter(struct rt_mute
+ if (!owner || !is_top_waiter)
+ return;
+
+- raw_spin_lock_irqsave(&owner->pi_lock, flags);
++ raw_spin_lock(&owner->pi_lock);
+
+ rt_mutex_dequeue_pi(owner, waiter);
+
+@@ -1106,7 +1102,7 @@ static void remove_waiter(struct rt_mute
+ /* Store the lock on which owner is blocked or NULL */
+ next_lock = task_blocked_on_lock(owner);
+
+- raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
++ raw_spin_unlock(&owner->pi_lock);
+
+ /*
+ * Don't walk the chain, if the owner task is not blocked
+@@ -1118,12 +1114,12 @@ static void remove_waiter(struct rt_mute
+ /* gets dropped in rt_mutex_adjust_prio_chain()! */
+ get_task_struct(owner);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
+ next_lock, NULL, current);
+
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irq(&lock->wait_lock);
+ }
+
+ /*
+@@ -1167,11 +1163,11 @@ void rt_mutex_init_waiter(struct rt_mute
+ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+ * @lock: the rt_mutex to take
+ * @state: the state the task should block in (TASK_INTERRUPTIBLE
+- * or TASK_UNINTERRUPTIBLE)
++ * or TASK_UNINTERRUPTIBLE)
+ * @timeout: the pre-initialized and started timer, or NULL for none
+ * @waiter: the pre-initialized rt_mutex_waiter
+ *
+- * lock->wait_lock must be held by the caller.
++ * Must be called with lock->wait_lock held and interrupts disabled
+ */
+ static int __sched
+ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+@@ -1199,13 +1195,13 @@ __rt_mutex_slowlock(struct rt_mutex *loc
+ break;
+ }
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ debug_rt_mutex_print_deadlock(waiter);
+
+ schedule();
+
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irq(&lock->wait_lock);
+ set_current_state(state);
+ }
+
+@@ -1242,15 +1238,24 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ enum rtmutex_chainwalk chwalk)
+ {
+ struct rt_mutex_waiter waiter;
++ unsigned long flags;
+ int ret = 0;
+
+ rt_mutex_init_waiter(&waiter);
+
+- raw_spin_lock(&lock->wait_lock);
++ /*
++ * Technically we could use raw_spin_[un]lock_irq() here, but this can
++ * be called in early boot if the cmpxchg() fast path is disabled
++ * (debug, no architecture support). In this case we will acquire the
++ * rtmutex with lock->wait_lock held. But we cannot unconditionally
++ * enable interrupts in that early boot case. So we need to use the
++ * irqsave/restore variants.
++ */
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+ /* Try to acquire the lock again: */
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ return 0;
+ }
+
+@@ -1279,7 +1284,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ */
+ fixup_rt_mutex_waiters(lock);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ /* Remove pending timer: */
+ if (unlikely(timeout))
+@@ -1308,6 +1313,7 @@ static inline int __rt_mutex_slowtrylock
+ */
+ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
+ {
++ unsigned long flags;
+ int ret;
+
+ /*
+@@ -1319,14 +1325,14 @@ static inline int rt_mutex_slowtrylock(s
+ return 0;
+
+ /*
+- * The mutex has currently no owner. Lock the wait lock and
+- * try to acquire the lock.
++ * The mutex has currently no owner. Lock the wait lock and try to
++ * acquire the lock. We use irqsave here to support early boot calls.
+ */
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+ ret = __rt_mutex_slowtrylock(lock);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ return ret;
+ }
+@@ -1338,7 +1344,10 @@ static inline int rt_mutex_slowtrylock(s
+ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+ struct wake_q_head *wake_q)
+ {
+- raw_spin_lock(&lock->wait_lock);
++ unsigned long flags;
++
++ /* irqsave required to support early boot calls */
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+ debug_rt_mutex_unlock(lock);
+
+@@ -1375,10 +1384,10 @@ static bool __sched rt_mutex_slowunlock(
+ */
+ while (!rt_mutex_has_waiters(lock)) {
+ /* Drops lock->wait_lock ! */
+- if (unlock_rt_mutex_safe(lock) == true)
++ if (unlock_rt_mutex_safe(lock, flags) == true)
+ return false;
+ /* Relock the rtmutex and try again */
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ }
+
+ /*
+@@ -1389,7 +1398,7 @@ static bool __sched rt_mutex_slowunlock(
+ */
+ mark_wakeup_next_waiter(wake_q, lock);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ /* check PI boosting */
+ return true;
+@@ -1680,10 +1689,10 @@ int rt_mutex_start_proxy_lock(struct rt_
+ {
+ int ret;
+
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irq(&lock->wait_lock);
+
+ if (try_to_take_rt_mutex(lock, task, NULL)) {
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+ return 1;
+ }
+
+@@ -1704,7 +1713,7 @@ int rt_mutex_start_proxy_lock(struct rt_
+ if (unlikely(ret))
+ remove_waiter(lock, waiter);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ debug_rt_mutex_print_deadlock(waiter);
+
+@@ -1754,14 +1763,14 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ {
+ int ret;
+
+- raw_spin_lock(&lock->wait_lock);
++ raw_spin_lock_irq(&lock->wait_lock);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* sleep on the mutex */
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+
+- raw_spin_unlock(&lock->wait_lock);
++ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+ }
--- /dev/null
+futex-rename-free_pi_state-to-put_pi_state.patch
+futex-cleanup-refcounting.patch
+futex-rt_mutex-introduce-rt_mutex_init_waiter.patch
+futex-pull-rt_mutex_futex_unlock-out-from-under-hb-lock.patch
+futex-rework-futex_lock_pi-to-use-rt_mutex_-_proxy_lock.patch
+futex-futex_unlock_pi-determinism.patch
+rtmutex-make-wait_lock-irq-safe.patch
+futex-handle-transient-ownerless-rtmutex-state-correctly.patch
+futex-avoid-freeing-an-active-timer.patch
+futex-rt_mutex-fix-rt_mutex_cleanup_proxy_lock.patch
+rcu-update-documentation-of-rcu_read_unlock.patch