+++ /dev/null
-From 008604345f50eeebd31e85e7b1d1be2a9ec5f5e8 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 2 Dec 2022 10:02:23 +0000
-Subject: rtmutex: Add acquire semantics for rtmutex lock acquisition slow path
-
-From: Mel Gorman <mgorman@techsingularity.net>
-
-[ Upstream commit 1c0908d8e441631f5b8ba433523cf39339ee2ba0 ]
-
-Jan Kara reported the following bug triggering on 6.0.5-rt14 running dbench
-on XFS on arm64.
-
- kernel BUG at fs/inode.c:625!
- Internal error: Oops - BUG: 0 [#1] PREEMPT_RT SMP
- CPU: 11 PID: 6611 Comm: dbench Tainted: G E 6.0.0-rt14-rt+ #1
- pc : clear_inode+0xa0/0xc0
- lr : clear_inode+0x38/0xc0
- Call trace:
- clear_inode+0xa0/0xc0
- evict+0x160/0x180
- iput+0x154/0x240
- do_unlinkat+0x184/0x300
- __arm64_sys_unlinkat+0x48/0xc0
- el0_svc_common.constprop.4+0xe4/0x2c0
- do_el0_svc+0xac/0x100
- el0_svc+0x78/0x200
- el0t_64_sync_handler+0x9c/0xc0
- el0t_64_sync+0x19c/0x1a0
-
-It also affects 6.1-rc7-rt5 and affects a preempt-rt fork of 5.14 so this
-is likely a bug that existed forever and only became visible when ARM
-support was added to preempt-rt. The same problem does not occur on x86-64
-and he also reported that converting sb->s_inode_wblist_lock to
-raw_spinlock_t makes the problem disappear indicating that the RT spinlock
-variant is the problem.
-
-Which in turn means that RT mutexes on ARM64 and any other weakly ordered
-architecture are affected by this independent of RT.
-
-Will Deacon observed:
-
- "I'd be more inclined to be suspicious of the slowpath tbh, as we need to
- make sure that we have acquire semantics on all paths where the lock can
- be taken. Looking at the rtmutex code, this really isn't obvious to me
- -- for example, try_to_take_rt_mutex() appears to be able to return via
- the 'takeit' label without acquire semantics and it looks like we might
- be relying on the caller's subsequent _unlock_ of the wait_lock for
- ordering, but that will give us release semantics which aren't correct."
-
-Sebastian Andrzej Siewior prototyped a fix that does work based on that
-comment but it was a little bit overkill and added some fences that should
-not be necessary.
-
-The lock owner is updated with an IRQ-safe raw spinlock held, but the
-spin_unlock does not provide acquire semantics which are needed when
-acquiring a mutex.
-
-Adds the necessary acquire semantics for lock owner updates in the slow path
-acquisition and the waiter bit logic.
-
-It successfully completed 10 iterations of the dbench workload while the
-vanilla kernel fails on the first iteration.
-
-[ bigeasy@linutronix.de: Initial prototype fix ]
-
-Fixes: 700318d1d7b38 ("locking/rtmutex: Use acquire/release semantics")
-Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core")
-Reported-by: Jan Kara <jack@suse.cz>
-Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: stable@vger.kernel.org
-Link: https://lore.kernel.org/r/20221202100223.6mevpbl7i6x5udfd@techsingularity.net
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- kernel/locking/rtmutex.c | 56 ++++++++++++++++++++++++++++++------
- kernel/locking/rtmutex_api.c | 6 ++--
- 2 files changed, 50 insertions(+), 12 deletions(-)
-
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index ea5a701ab240..3060cb6bd504 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -87,15 +87,31 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
- * set this bit before looking at the lock.
- */
-
--static __always_inline void
--rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
-+static __always_inline struct task_struct *
-+rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
- {
- unsigned long val = (unsigned long)owner;
-
- if (rt_mutex_has_waiters(lock))
- val |= RT_MUTEX_HAS_WAITERS;
-
-- WRITE_ONCE(lock->owner, (struct task_struct *)val);
-+ return (struct task_struct *)val;
-+}
-+
-+static __always_inline void
-+rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
-+{
-+ /*
-+ * lock->wait_lock is held but explicit acquire semantics are needed
-+ * for a new lock owner so WRITE_ONCE is insufficient.
-+ */
-+ xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
-+}
-+
-+static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
-+{
-+ /* lock->wait_lock is held so the unlock provides release semantics. */
-+ WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
- }
-
- static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
-@@ -104,7 +120,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
- ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
- }
-
--static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
-+static __always_inline void
-+fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
- {
- unsigned long owner, *p = (unsigned long *) &lock->owner;
-
-@@ -170,8 +187,21 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
- * still set.
- */
- owner = READ_ONCE(*p);
-- if (owner & RT_MUTEX_HAS_WAITERS)
-- WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
-+ if (owner & RT_MUTEX_HAS_WAITERS) {
-+ /*
-+ * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
-+ * why xchg_acquire() is used for updating owner for
-+ * locking and WRITE_ONCE() for unlocking.
-+ *
-+ * WRITE_ONCE() would work for the acquire case too, but
-+ * in case that the lock acquisition failed it might
-+ * force other lockers into the slow path unnecessarily.
-+ */
-+ if (acquire_lock)
-+ xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
-+ else
-+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
-+ }
- }
-
- /*
-@@ -206,6 +236,13 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
- owner = *p;
- } while (cmpxchg_relaxed(p, owner,
- owner | RT_MUTEX_HAS_WAITERS) != owner);
-+
-+ /*
-+ * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
-+ * operations in the event of contention. Ensure the successful
-+ * cmpxchg is visible.
-+ */
-+ smp_mb__after_atomic();
- }
-
- /*
-@@ -1231,7 +1268,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
- * try_to_take_rt_mutex() sets the lock waiters bit
- * unconditionally. Clean this up.
- */
-- fixup_rt_mutex_waiters(lock);
-+ fixup_rt_mutex_waiters(lock, true);
-
- return ret;
- }
-@@ -1591,7 +1628,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
- * try_to_take_rt_mutex() sets the waiter bit
- * unconditionally. We might have to fix that up.
- */
-- fixup_rt_mutex_waiters(lock);
-+ fixup_rt_mutex_waiters(lock, true);
-+
- return ret;
- }
-
-@@ -1701,7 +1739,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
- * try_to_take_rt_mutex() sets the waiter bit unconditionally.
- * We might have to fix that up:
- */
-- fixup_rt_mutex_waiters(lock);
-+ fixup_rt_mutex_waiters(lock, true);
- debug_rt_mutex_free_waiter(&waiter);
- }
-
-diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
-index 5c9299aaabae..a461be2f873d 100644
---- a/kernel/locking/rtmutex_api.c
-+++ b/kernel/locking/rtmutex_api.c
-@@ -245,7 +245,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
- void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
- {
- debug_rt_mutex_proxy_unlock(lock);
-- rt_mutex_set_owner(lock, NULL);
-+ rt_mutex_clear_owner(lock);
- }
-
- /**
-@@ -360,7 +360,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
- * have to fix that up.
- */
-- fixup_rt_mutex_waiters(lock);
-+ fixup_rt_mutex_waiters(lock, true);
- raw_spin_unlock_irq(&lock->wait_lock);
-
- return ret;
-@@ -416,7 +416,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
- * have to fix that up.
- */
-- fixup_rt_mutex_waiters(lock);
-+ fixup_rt_mutex_waiters(lock, false);
-
- raw_spin_unlock_irq(&lock->wait_lock);
-
---
-2.35.1
-