From: Thomas Gleixner Date: Tue, 24 Feb 2026 16:38:33 +0000 (+0100) Subject: hrtimer: Rework next event evaluation X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2bd1cc24fafc84be844c9ef66aa819d7dec285bf;p=thirdparty%2Fkernel%2Flinux.git hrtimer: Rework next event evaluation The per clock base cached expiry time allows to do a more efficient evaluation of the next expiry on a CPU. Separate the reprogramming evaluation from the NOHZ idle evaluation which needs to exclude the NOHZ timer to keep the reprogramming path lean and clean. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260224163431.468186893@kernel.org --- diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d70899a9ddc12..aa1cb4f8a4735 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -546,49 +546,67 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) #define for_each_active_base(base, cpu_base, active) \ while ((base = __next_base((cpu_base), &(active)))) -static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, - const struct hrtimer *exclude, - unsigned int active, ktime_t expires_next) +#if defined(CONFIG_NO_HZ_COMMON) +/* + * Same as hrtimer_bases_next_event() below, but skips the excluded timer and + * does not update cpu_base->next_timer/expires. + */ +static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_base, + const struct hrtimer *exclude, + unsigned int active, ktime_t expires_next) { struct hrtimer_clock_base *base; ktime_t expires; + lockdep_assert_held(&cpu_base->lock); + for_each_active_base(base, cpu_base, active) { - struct timerqueue_node *next; - struct hrtimer *timer; + expires = ktime_sub(base->expires_next, base->offset); + if (expires >= expires_next) + continue; - next = timerqueue_getnext(&base->active); - timer = container_of(next, struct hrtimer, node); - if (timer == exclude) { - /* Get to the next timer in the queue. */ - next = timerqueue_iterate_next(next); - if (!next) - continue; + /* + * If the excluded timer is the first on this base evaluate the + * next timer. + */ + struct timerqueue_node *node = timerqueue_getnext(&base->active); - timer = container_of(next, struct hrtimer, node); + if (unlikely(&exclude->node == node)) { + node = timerqueue_iterate_next(node); + if (!node) + continue; + expires = ktime_sub(node->expires, base->offset); + if (expires >= expires_next) + continue; } - expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - if (expires < expires_next) { - expires_next = expires; + expires_next = expires; + } + /* If base->offset changed, the result might be negative */ + return max(expires_next, 0); +} +#endif - /* Skip cpu_base update if a timer is being excluded. */ - if (exclude) - continue; +static __always_inline struct hrtimer *clock_base_next_timer(struct hrtimer_clock_base *base) +{ + struct timerqueue_node *next = timerqueue_getnext(&base->active); + + return container_of(next, struct hrtimer, node); +} - if (timer->is_soft) - cpu_base->softirq_next_timer = timer; - else - cpu_base->next_timer = timer; +/* Find the base with the earliest expiry */ +static void hrtimer_bases_first(struct hrtimer_cpu_base *cpu_base,unsigned int active, + ktime_t *expires_next, struct hrtimer **next_timer) +{ + struct hrtimer_clock_base *base; + ktime_t expires; + + for_each_active_base(base, cpu_base, active) { + expires = ktime_sub(base->expires_next, base->offset); + if (expires < *expires_next) { + *expires_next = expires; + *next_timer = clock_base_next_timer(base); } } - /* - * clock_was_set() might have changed base->offset of any of - * the clock bases so the result might be negative. Fix it up - * to prevent a false positive in clockevents_program_event(). - */ - if (expires_next < 0) - expires_next = 0; - return expires_next; } /* @@ -617,19 +635,22 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsig ktime_t expires_next = KTIME_MAX; unsigned int active; + lockdep_assert_held(&cpu_base->lock); + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; - cpu_base->softirq_next_timer = NULL; - expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, KTIME_MAX); - next_timer = cpu_base->softirq_next_timer; + if (active) + hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer); + cpu_base->softirq_next_timer = next_timer; } if (active_mask & HRTIMER_ACTIVE_HARD) { active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + if (active) + hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer); cpu_base->next_timer = next_timer; - expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, expires_next); } - return expires_next; + return max(expires_next, 0); } static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) @@ -724,11 +745,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtime hrtimer_rearm_event(expires_next, false); } -/* - * Reprogram the event source with checking both queues for the - * next event - * Called with interrupts disabled and base->lock held - */ +/* Reprogram the event source with a evaluation of all clock bases */ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, bool skip_equal) { ktime_t expires_next = hrtimer_update_next_event(cpu_base); @@ -1662,19 +1679,20 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); u64 expires = KTIME_MAX; + unsigned int active; guard(raw_spinlock_irqsave)(&cpu_base->lock); - if (hrtimer_hres_active(cpu_base)) { - unsigned int active; + if (!hrtimer_hres_active(cpu_base)) + return expires; - if (!cpu_base->softirq_activated) { - active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; - expires = __hrtimer_next_event_base(cpu_base, exclude, active, KTIME_MAX); - } - active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; - expires = __hrtimer_next_event_base(cpu_base, exclude, active, expires); - } - return expires; + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; + if (active && !cpu_base->softirq_activated) + expires = hrtimer_bases_next_event_without(cpu_base, exclude, active, KTIME_MAX); + + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + if (!active) + return expires; + return hrtimer_bases_next_event_without(cpu_base, exclude, active, expires); } #endif