--- /dev/null
+From 76031d9536a076bf023bedbdb1b4317fc801dd67 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 3 Dec 2024 11:16:30 +0100
+Subject: clocksource: Make negative motion detection more robust
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 76031d9536a076bf023bedbdb1b4317fc801dd67 upstream.
+
+Guenter reported boot stalls on a emulated ARM 32-bit platform, which has a
+24-bit wide clocksource.
+
+It turns out that the calculated maximal idle time, which limits idle
+sleeps to prevent clocksource wrap arounds, is close to the point where the
+negative motion detection triggers.
+
+ max_idle_ns: 597268854 ns
+ negative motion tripping point: 671088640 ns
+
+If the idle wakeup is delayed beyond that point, the clocksource
+advances far enough to trigger the negative motion detection. This
+prevents the clock to advance and in the worst case the system stalls
+completely if the consecutive sleeps based on the stale clock are
+delayed as well.
+
+Cure this by calculating a more robust cut-off value for negative motion,
+which covers 87.5% of the actual clocksource counter width. Compare the
+delta against this value to catch negative motion. This is specifically for
+clock sources with a small counter width as their wrap around time is close
+to the half counter width. For clock sources with wide counters this is not
+a problem because the maximum idle time is far from the half counter width
+due to the math overflow protection constraints.
+
+For the case at hand this results in a tripping point of 1174405120ns.
+
+Note, that this cannot prevent issues when the delay exceeds the 87.5%
+margin, but that's not different from the previous unchecked version which
+allowed arbitrary time jumps.
+
+Systems with small counter width are prone to invalid results, but this
+problem is unlikely to be seen on real hardware. If such a system
+completely stalls for more than half a second, then there are other more
+urgent problems than the counter wrapping around.
+
+Fixes: c163e40af9b2 ("timekeeping: Always check for negative motion")
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/all/8734j5ul4x.ffs@tglx
+Closes: https://lore.kernel.org/all/387b120b-d68a-45e8-b6ab-768cd95d11c2@roeck-us.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/clocksource.h | 2 ++
+ kernel/time/clocksource.c | 11 ++++++++++-
+ kernel/time/timekeeping.c | 6 ++++--
+ kernel/time/timekeeping_internal.h | 8 ++++----
+ 4 files changed, 20 insertions(+), 7 deletions(-)
+
+--- a/include/linux/clocksource.h
++++ b/include/linux/clocksource.h
+@@ -49,6 +49,7 @@ struct module;
+ * @archdata: Optional arch-specific data
+ * @max_cycles: Maximum safe cycle value which won't overflow on
+ * multiplication
++ * @max_raw_delta: Maximum safe delta value for negative motion detection
+ * @name: Pointer to clocksource name
+ * @list: List head for registration (internal)
+ * @freq_khz: Clocksource frequency in khz.
+@@ -109,6 +110,7 @@ struct clocksource {
+ struct arch_clocksource_data archdata;
+ #endif
+ u64 max_cycles;
++ u64 max_raw_delta;
+ const char *name;
+ struct list_head list;
+ u32 freq_khz;
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -22,7 +22,7 @@
+
+ static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
+ {
+- u64 delta = clocksource_delta(end, start, cs->mask);
++ u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta);
+
+ if (likely(delta < cs->max_cycles))
+ return clocksource_cyc2ns(delta, cs->mult, cs->shift);
+@@ -985,6 +985,15 @@ static inline void clocksource_update_ma
+ cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+ cs->maxadj, cs->mask,
+ &cs->max_cycles);
++
++ /*
++ * Threshold for detecting negative motion in clocksource_delta().
++ *
++ * Allow for 0.875 of the counter width so that overly long idle
++ * sleeps, which go slightly over mask/2, do not trigger the
++ * negative motion detection.
++ */
++ cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3);
+ }
+
+ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -694,7 +694,8 @@ static void timekeeping_forward_now(stru
+ u64 cycle_now, delta;
+
+ cycle_now = tk_clock_read(&tk->tkr_mono);
+- delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
++ delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
++ tk->tkr_mono.clock->max_raw_delta);
+ tk->tkr_mono.cycle_last = cycle_now;
+ tk->tkr_raw.cycle_last = cycle_now;
+
+@@ -2193,7 +2194,8 @@ static bool timekeeping_advance(enum tim
+ goto out;
+
+ offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
+- tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
++ tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
++ tk->tkr_mono.clock->max_raw_delta);
+
+ /* Check if there's really nothing to do */
+ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+--- a/kernel/time/timekeeping_internal.h
++++ b/kernel/time/timekeeping_internal.h
+@@ -15,15 +15,15 @@ extern void tk_debug_account_sleep_time(
+ #define tk_debug_account_sleep_time(x)
+ #endif
+
+-static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
++static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta)
+ {
+ u64 ret = (now - last) & mask;
+
+ /*
+- * Prevent time going backwards by checking the MSB of mask in
+- * the result. If set, return 0.
++ * Prevent time going backwards by checking the result against
++ * @max_delta. If greater, return 0.
+ */
+- return ret & ~(mask >> 1) ? 0 : ret;
++ return ret > max_delta ? 0 : ret;
+ }
+
+ /* Semi public for serialization of non timekeeper VDSO updates. */
--- /dev/null
+From d44d26987bb3df6d76556827097fc9ce17565cb8 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 31 Oct 2024 13:04:07 +0100
+Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d44d26987bb3df6d76556827097fc9ce17565cb8 upstream.
+
+Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which
+would lead to 64bit multiplication overflows correctly. It's also protected
+against negative motion of the clocksource unconditionally, which was
+exclusive to x86 before.
+
+timekeeping_advance() handles large offsets already correctly.
+
+That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases
+is very close to zero. Remove all of it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: John Stultz <jstultz@google.com>
+Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/configs/defconfig | 1
+ include/linux/timekeeper_internal.h | 15 --
+ kernel/time/timekeeping.c | 108 --------------------
+ lib/Kconfig.debug | 13 --
+ tools/testing/selftests/wireguard/qemu/debug.config | 1
+ 5 files changed, 3 insertions(+), 135 deletions(-)
+
+--- a/arch/riscv/configs/defconfig
++++ b/arch/riscv/configs/defconfig
+@@ -301,7 +301,6 @@ CONFIG_DEBUG_MEMORY_INIT=y
+ CONFIG_DEBUG_PER_CPU_MAPS=y
+ CONFIG_SOFTLOCKUP_DETECTOR=y
+ CONFIG_WQ_WATCHDOG=y
+-CONFIG_DEBUG_TIMEKEEPING=y
+ CONFIG_DEBUG_RT_MUTEXES=y
+ CONFIG_DEBUG_SPINLOCK=y
+ CONFIG_DEBUG_MUTEXES=y
+--- a/include/linux/timekeeper_internal.h
++++ b/include/linux/timekeeper_internal.h
+@@ -68,9 +68,6 @@ struct tk_read_base {
+ * shifted nano seconds.
+ * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds.
+- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
+- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
+- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
+ *
+ * Note: For timespec(64) based interfaces wall_to_monotonic is what
+ * we need to add to xtime (or xtime corrected for sub jiffy times)
+@@ -124,18 +121,6 @@ struct timekeeper {
+ u32 ntp_err_mult;
+ /* Flag used to avoid updating NTP twice with same second */
+ u32 skip_second_overflow;
+-#ifdef CONFIG_DEBUG_TIMEKEEPING
+- long last_warning;
+- /*
+- * These simple flag variables are managed
+- * without locks, which is racy, but they are
+- * ok since we don't really care about being
+- * super precise about how many events were
+- * seen, just that a problem was observed.
+- */
+- int underflow_seen;
+- int overflow_seen;
+-#endif
+ };
+
+ #ifdef CONFIG_GENERIC_TIME_VSYSCALL
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -195,97 +195,6 @@ static inline u64 tk_clock_read(const st
+ return clock->read(clock);
+ }
+
+-#ifdef CONFIG_DEBUG_TIMEKEEPING
+-#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+-
+-static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
+-{
+-
+- u64 max_cycles = tk->tkr_mono.clock->max_cycles;
+- const char *name = tk->tkr_mono.clock->name;
+-
+- if (offset > max_cycles) {
+- printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+- offset, name, max_cycles);
+- printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+- } else {
+- if (offset > (max_cycles >> 1)) {
+- printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
+- offset, name, max_cycles >> 1);
+- printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+- }
+- }
+-
+- if (tk->underflow_seen) {
+- if (jiffies - tk->last_warning > WARNING_FREQ) {
+- printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
+- printk_deferred(" Your kernel is probably still fine.\n");
+- tk->last_warning = jiffies;
+- }
+- tk->underflow_seen = 0;
+- }
+-
+- if (tk->overflow_seen) {
+- if (jiffies - tk->last_warning > WARNING_FREQ) {
+- printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
+- printk_deferred(" Your kernel is probably still fine.\n");
+- tk->last_warning = jiffies;
+- }
+- tk->overflow_seen = 0;
+- }
+-}
+-
+-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
+-
+-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
+-{
+- struct timekeeper *tk = &tk_core.timekeeper;
+- u64 now, last, mask, max, delta;
+- unsigned int seq;
+-
+- /*
+- * Since we're called holding a seqcount, the data may shift
+- * under us while we're doing the calculation. This can cause
+- * false positives, since we'd note a problem but throw the
+- * results away. So nest another seqcount here to atomically
+- * grab the points we are checking with.
+- */
+- do {
+- seq = read_seqcount_begin(&tk_core.seq);
+- now = tk_clock_read(tkr);
+- last = tkr->cycle_last;
+- mask = tkr->mask;
+- max = tkr->clock->max_cycles;
+- } while (read_seqcount_retry(&tk_core.seq, seq));
+-
+- delta = clocksource_delta(now, last, mask);
+-
+- /*
+- * Try to catch underflows by checking if we are seeing small
+- * mask-relative negative values.
+- */
+- if (unlikely((~delta & mask) < (mask >> 3)))
+- tk->underflow_seen = 1;
+-
+- /* Check for multiplication overflows */
+- if (unlikely(delta > max))
+- tk->overflow_seen = 1;
+-
+- /* timekeeping_cycles_to_ns() handles both under and overflow */
+- return timekeeping_cycles_to_ns(tkr, now);
+-}
+-#else
+-static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
+-{
+-}
+-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
+-{
+- BUG();
+-}
+-#endif
+-
+ /**
+ * tk_setup_internals - Set up internals to use clocksource clock.
+ *
+@@ -390,19 +299,11 @@ static inline u64 timekeeping_cycles_to_
+ return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
+ }
+
+-static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
++static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+ {
+ return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
+ }
+
+-static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+-{
+- if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
+- return timekeeping_debug_get_ns(tkr);
+-
+- return __timekeeping_get_ns(tkr);
+-}
+-
+ /**
+ * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+ * @tkr: Timekeeping readout base from which we take the update
+@@ -446,7 +347,7 @@ static __always_inline u64 __ktime_get_f
+ seq = raw_read_seqcount_latch(&tkf->seq);
+ tkr = tkf->base + (seq & 0x01);
+ now = ktime_to_ns(tkr->base);
+- now += __timekeeping_get_ns(tkr);
++ now += timekeeping_get_ns(tkr);
+ } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+
+ return now;
+@@ -562,7 +463,7 @@ static __always_inline u64 __ktime_get_r
+ tkr = tkf->base + (seq & 0x01);
+ basem = ktime_to_ns(tkr->base);
+ baser = ktime_to_ns(tkr->base_real);
+- delta = __timekeeping_get_ns(tkr);
++ delta = timekeeping_get_ns(tkr);
+ } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+
+ if (mono)
+@@ -2298,9 +2199,6 @@ static bool timekeeping_advance(enum tim
+ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+ goto out;
+
+- /* Do some additional sanity checking */
+- timekeeping_check_update(tk, offset);
+-
+ /*
+ * With NO_HZ we may have to accumulate many cycle_intervals
+ * (think "ticks") worth of time at once. To do this efficiently,
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1328,19 +1328,6 @@ config SCHEDSTATS
+
+ endmenu
+
+-config DEBUG_TIMEKEEPING
+- bool "Enable extra timekeeping sanity checking"
+- help
+- This option will enable additional timekeeping sanity checks
+- which may be helpful when diagnosing issues where timekeeping
+- problems are suspected.
+-
+- This may include checks in the timekeeping hotpaths, so this
+- option may have a (very small) performance impact to some
+- workloads.
+-
+- If unsure, say N.
+-
+ config DEBUG_PREEMPT
+ bool "Debug preemptible kernel"
+ depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
+--- a/tools/testing/selftests/wireguard/qemu/debug.config
++++ b/tools/testing/selftests/wireguard/qemu/debug.config
+@@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y
+ CONFIG_SCHED_INFO=y
+ CONFIG_SCHEDSTATS=y
+ CONFIG_SCHED_STACK_END_CHECK=y
+-CONFIG_DEBUG_TIMEKEEPING=y
+ CONFIG_DEBUG_PREEMPT=y
+ CONFIG_DEBUG_RT_MUTEXES=y
+ CONFIG_DEBUG_SPINLOCK=y