From 96d1610e0b20b5a627773874b4514ae922ad98f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 24 Feb 2026 17:35:52 +0100 Subject: [PATCH] sched: Optimize hrtimer handling schedule() provides several mechanisms to update the hrtick timer: 1) When the next task is picked 2) When the balance callbacks are invoked before rq::lock is released Each of them can result in a first expiring timer and cause a reprogram of the clock event device. Solve this by deferring the rearm to the end of schedule() right before releasing rq::lock by setting a flag on entry which tells hrtick_start() to cache the runtime constraint in rq::hrtick_delay without touching the timer itself. Right before releasing rq::lock evaluate the flags and either rearm or cancel the hrtick timer. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260224163429.273068659@kernel.org --- kernel/sched/core.c | 57 +++++++++++++++++++++++++++++++++++++------- kernel/sched/sched.h | 2 ++ 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a716cc61d42d2..a868f0a4159e7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -872,6 +872,12 @@ void update_rq_clock(struct rq *rq) * Use HR-timers to deliver accurate preemption points. */ +enum { + HRTICK_SCHED_NONE = 0, + HRTICK_SCHED_DEFER = BIT(1), + HRTICK_SCHED_START = BIT(2), +}; + static void hrtick_clear(struct rq *rq) { if (hrtimer_active(&rq->hrtick_timer)) @@ -932,6 +938,17 @@ void hrtick_start(struct rq *rq, u64 delay) * doesn't make sense and can cause timer DoS. */ delta = max_t(s64, delay, 10000LL); + + /* + * If this is in the middle of schedule() only note the delay + * and let hrtick_schedule_exit() deal with it. + */ + if (rq->hrtick_sched) { + rq->hrtick_sched |= HRTICK_SCHED_START; + rq->hrtick_delay = delta; + return; + } + rq->hrtick_time = ktime_add_ns(ktime_get(), delta); if (rq == this_rq()) @@ -940,19 +957,40 @@ void hrtick_start(struct rq *rq, u64 delay) smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); } -static void hrtick_rq_init(struct rq *rq) +static inline void hrtick_schedule_enter(struct rq *rq) { - INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); - hrtimer_setup(&rq->hrtick_timer, hrtick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + rq->hrtick_sched = HRTICK_SCHED_DEFER; } -#else /* !CONFIG_SCHED_HRTICK: */ -static inline void hrtick_clear(struct rq *rq) + +static inline void hrtick_schedule_exit(struct rq *rq) { + if (rq->hrtick_sched & HRTICK_SCHED_START) { + rq->hrtick_time = ktime_add_ns(ktime_get(), rq->hrtick_delay); + __hrtick_restart(rq); + } else if (idle_rq(rq)) { + /* + * No need for using hrtimer_is_active(). The timer is CPU local + * and interrupts are disabled, so the callback cannot be + * running and the queued state is valid. + */ + if (hrtimer_is_queued(&rq->hrtick_timer)) + hrtimer_cancel(&rq->hrtick_timer); + } + + rq->hrtick_sched = HRTICK_SCHED_NONE; } -static inline void hrtick_rq_init(struct rq *rq) +static void hrtick_rq_init(struct rq *rq) { + INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); + rq->hrtick_sched = HRTICK_SCHED_NONE; + hrtimer_setup(&rq->hrtick_timer, hrtick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); } +#else /* !CONFIG_SCHED_HRTICK: */ +static inline void hrtick_clear(struct rq *rq) { } +static inline void hrtick_rq_init(struct rq *rq) { } +static inline void hrtick_schedule_enter(struct rq *rq) { } +static inline void hrtick_schedule_exit(struct rq *rq) { } #endif /* !CONFIG_SCHED_HRTICK */ /* @@ -5028,6 +5066,7 @@ static inline void finish_lock_switch(struct rq *rq) */ spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); __balance_callbacks(rq, NULL); + hrtick_schedule_exit(rq); raw_spin_rq_unlock_irq(rq); } @@ -6781,9 +6820,6 @@ static void __sched notrace __schedule(int sched_mode) schedule_debug(prev, preempt); - if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) - hrtick_clear(rq); - klp_sched_try_switch(prev); local_irq_disable(); @@ -6810,6 +6846,8 @@ static void __sched notrace __schedule(int sched_mode) rq_lock(rq, &rf); smp_mb__after_spinlock(); + hrtick_schedule_enter(rq); + /* Promote REQ to ACT */ rq->clock_update_flags <<= 1; update_rq_clock(rq); @@ -6911,6 +6949,7 @@ keep_resched: rq_unpin_lock(rq, &rf); __balance_callbacks(rq, NULL); + hrtick_schedule_exit(rq); raw_spin_rq_unlock_irq(rq); } trace_sched_exit_tp(is_switch); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0aa089dfaaa4b..6774fb5ac7adb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1285,6 +1285,8 @@ struct rq { call_single_data_t hrtick_csd; struct hrtimer hrtick_timer; ktime_t hrtick_time; + ktime_t hrtick_delay; + unsigned int hrtick_sched; #endif #ifdef CONFIG_SCHEDSTATS -- 2.47.3