Add the contended_release trace event. This tracepoint fires on the
holder side when a contended lock is released, complementing the
existing contention_begin/contention_end tracepoints which fire on the
waiter side.
This enables correlating lock hold time under contention with waiter
events by lock address.
Add trace_contended_release()/trace_call__contended_release() calls to
the slowpath unlock paths of sleepable locks: mutex, rtmutex, semaphore,
rwsem, percpu-rwsem, and RT-specific rwbase locks.
Where possible, trace_contended_release() fires before the lock is
released and before the waiter is woken. For some lock types, the
tracepoint fires after the release but before the wake. Making the
placement consistent across all lock types is not worth the added
complexity.
For reader/writer locks, the tracepoint fires for every reader releasing
while a writer is waiting, not only for the last reader.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Usama Arif <usama.arif@linux.dev>
Link: https://patch.msgid.link/02f4f6c5ce6761e7f6587cf0ff2289d962ecddd4.1780506267.git.d@ilvokhin.com
TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
);
+TRACE_EVENT(contended_release,
+
+ TP_PROTO(void *lock),
+
+ TP_ARGS(lock),
+
+ TP_STRUCT__entry(
+ __field(void *, lock_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->lock_addr = lock;
+ ),
+
+ TP_printk("%p", __entry->lock_addr)
+);
+
#endif /* _TRACE_LOCK_H */
/* This part must be outside protection */
wake_q_add(&wake_q, next);
}
+ if (trace_contended_release_enabled() && waiter)
+ trace_call__contended_release(lock);
+
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin);
EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(contended_release);
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
{
rwsem_release(&sem->dep_map, _RET_IP_);
+ if (trace_contended_release_enabled() && wq_has_sleeper(&sem->waiters))
+ trace_call__contended_release(sem);
+
/*
* Signal the writer is done, no fast path yet.
*
void __percpu_up_read(struct percpu_rw_semaphore *sem)
{
lockdep_assert_preemption_disabled();
+ /*
+ * After percpu_up_write() completes, rcu_sync_is_idle() can still
+ * return false during the grace period, forcing readers into this
+ * slowpath. Only trace when a writer is actually waiting for
+ * readers to drain.
+ */
+ if (trace_contended_release_enabled() && rcuwait_active(&sem->writer))
+ trace_call__contended_release(sem);
/*
* slowpath; reader will only ever wake a single blocked
* writer.
raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
+ trace_contended_release(lock);
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
unsigned int state)
{
+ if (trace_contended_release_enabled() && rt_mutex_owner(&rwb->rtmutex))
+ trace_call__contended_release(rwb);
/*
* rwb->readers can only hit 0 when a writer is waiting for the
* active readers to leave the critical section.
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_call__contended_release(rwb);
__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
}
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_call__contended_release(rwb);
/* Release it and account current as reader */
__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
}
rwsem_clear_reader_owned(sem);
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+ if (trace_contended_release_enabled() && (tmp & RWSEM_FLAG_WAITERS))
+ trace_call__contended_release(sem);
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
- if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+ trace_contended_release(sem);
rwsem_wake(sem);
+ }
preempt_enable();
}
tmp = atomic_long_fetch_add_release(
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
- if (tmp & RWSEM_FLAG_WAITERS)
+ if (tmp & RWSEM_FLAG_WAITERS) {
+ trace_contended_release(sem);
rwsem_downgrade_wake(sem);
+ }
preempt_enable();
}
sem->count++;
else
__up(sem, &wake_q);
+
+ if (trace_contended_release_enabled() && !wake_q_empty(&wake_q))
+ trace_call__contended_release(sem);
+
raw_spin_unlock_irqrestore(&sem->lock, flags);
if (!wake_q_empty(&wake_q))
wake_up_q(&wake_q);