From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Dec 2024 11:32:50 +0000 (+0100)
Subject: 6.12-stable patches
X-Git-Tag: v5.4.287~8
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4ac7f8961660a0c45f5e0f5135e11a7c781b0df7;p=thirdparty%2Fkernel%2Fstable-queue.git

6.12-stable patches

added patches:
	clocksource-make-negative-motion-detection-more-robust.patch
	timekeeping-remove-config_debug_timekeeping.patch
---

diff --git a/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch b/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch
new file mode 100644
index 00000000000..54c0d2862ff
--- /dev/null
+++ b/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch
@@ -0,0 +1,147 @@
+From 76031d9536a076bf023bedbdb1b4317fc801dd67 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 3 Dec 2024 11:16:30 +0100
+Subject: clocksource: Make negative motion detection more robust
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 76031d9536a076bf023bedbdb1b4317fc801dd67 upstream.
+
+Guenter reported boot stalls on a emulated ARM 32-bit platform, which has a
+24-bit wide clocksource.
+
+It turns out that the calculated maximal idle time, which limits idle
+sleeps to prevent clocksource wrap arounds, is close to the point where the
+negative motion detection triggers.
+
+  max_idle_ns:                    597268854 ns
+  negative motion tripping point: 671088640 ns
+
+If the idle wakeup is delayed beyond that point, the clocksource
+advances far enough to trigger the negative motion detection. This
+prevents the clock to advance and in the worst case the system stalls
+completely if the consecutive sleeps based on the stale clock are
+delayed as well.
+
+Cure this by calculating a more robust cut-off value for negative motion,
+which covers 87.5% of the actual clocksource counter width. Compare the
+delta against this value to catch negative motion. This is specifically for
+clock sources with a small counter width as their wrap around time is close
+to the half counter width. For clock sources with wide counters this is not
+a problem because the maximum idle time is far from the half counter width
+due to the math overflow protection constraints.
+
+For the case at hand this results in a tripping point of 1174405120ns.
+
+Note, that this cannot prevent issues when the delay exceeds the 87.5%
+margin, but that's not different from the previous unchecked version which
+allowed arbitrary time jumps.
+
+Systems with small counter width are prone to invalid results, but this
+problem is unlikely to be seen on real hardware. If such a system
+completely stalls for more than half a second, then there are other more
+urgent problems than the counter wrapping around.
+
+Fixes: c163e40af9b2 ("timekeeping: Always check for negative motion")
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/all/8734j5ul4x.ffs@tglx
+Closes: https://lore.kernel.org/all/387b120b-d68a-45e8-b6ab-768cd95d11c2@roeck-us.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/clocksource.h        |    2 ++
+ kernel/time/clocksource.c          |   11 ++++++++++-
+ kernel/time/timekeeping.c          |    6 ++++--
+ kernel/time/timekeeping_internal.h |    8 ++++----
+ 4 files changed, 20 insertions(+), 7 deletions(-)
+
+--- a/include/linux/clocksource.h
++++ b/include/linux/clocksource.h
+@@ -49,6 +49,7 @@ struct module;
+  * @archdata:		Optional arch-specific data
+  * @max_cycles:		Maximum safe cycle value which won't overflow on
+  *			multiplication
++ * @max_raw_delta:	Maximum safe delta value for negative motion detection
+  * @name:		Pointer to clocksource name
+  * @list:		List head for registration (internal)
+  * @freq_khz:		Clocksource frequency in khz.
+@@ -109,6 +110,7 @@ struct clocksource {
+ 	struct arch_clocksource_data archdata;
+ #endif
+ 	u64			max_cycles;
++	u64			max_raw_delta;
+ 	const char		*name;
+ 	struct list_head	list;
+ 	u32			freq_khz;
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -22,7 +22,7 @@
+ 
+ static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
+ {
+-	u64 delta = clocksource_delta(end, start, cs->mask);
++	u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta);
+ 
+ 	if (likely(delta < cs->max_cycles))
+ 		return clocksource_cyc2ns(delta, cs->mult, cs->shift);
+@@ -985,6 +985,15 @@ static inline void clocksource_update_ma
+ 	cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+ 						cs->maxadj, cs->mask,
+ 						&cs->max_cycles);
++
++	/*
++	 * Threshold for detecting negative motion in clocksource_delta().
++	 *
++	 * Allow for 0.875 of the counter width so that overly long idle
++	 * sleeps, which go slightly over mask/2, do not trigger the
++	 * negative motion detection.
++	 */
++	cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3);
+ }
+ 
+ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -694,7 +694,8 @@ static void timekeeping_forward_now(stru
+ 	u64 cycle_now, delta;
+ 
+ 	cycle_now = tk_clock_read(&tk->tkr_mono);
+-	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
++	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
++				  tk->tkr_mono.clock->max_raw_delta);
+ 	tk->tkr_mono.cycle_last = cycle_now;
+ 	tk->tkr_raw.cycle_last  = cycle_now;
+ 
+@@ -2193,7 +2194,8 @@ static bool timekeeping_advance(enum tim
+ 		goto out;
+ 
+ 	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
+-				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
++				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
++				   tk->tkr_mono.clock->max_raw_delta);
+ 
+ 	/* Check if there's really nothing to do */
+ 	if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+--- a/kernel/time/timekeeping_internal.h
++++ b/kernel/time/timekeeping_internal.h
+@@ -15,15 +15,15 @@ extern void tk_debug_account_sleep_time(
+ #define tk_debug_account_sleep_time(x)
+ #endif
+ 
+-static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
++static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta)
+ {
+ 	u64 ret = (now - last) & mask;
+ 
+ 	/*
+-	 * Prevent time going backwards by checking the MSB of mask in
+-	 * the result. If set, return 0.
++	 * Prevent time going backwards by checking the result against
++	 * @max_delta. If greater, return 0.
+ 	 */
+-	return ret & ~(mask >> 1) ? 0 : ret;
++	return ret > max_delta ? 0 : ret;
+ }
+ 
+ /* Semi public for serialization of non timekeeper VDSO updates. */
diff --git a/queue-6.12/series b/queue-6.12/series
index 7659133c049..5199b7d6664 100644
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -463,3 +463,5 @@ alsa-hda-fix-build-error-without-config_snd_debug.patch
 revert-drm-amd-display-parse-umc_info-or-vram_info-based-on-asic.patch
 s390-pci-fix-leak-of-struct-zpci_dev-when-zpci_add_device-fails.patch
 alsa-hda-realtek-fix-spelling-mistake-firelfy-firefly.patch
+timekeeping-remove-config_debug_timekeeping.patch
+clocksource-make-negative-motion-detection-more-robust.patch
diff --git a/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch b/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch
new file mode 100644
index 00000000000..7a255e57184
--- /dev/null
+++ b/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch
@@ -0,0 +1,253 @@
+From d44d26987bb3df6d76556827097fc9ce17565cb8 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 31 Oct 2024 13:04:07 +0100
+Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d44d26987bb3df6d76556827097fc9ce17565cb8 upstream.
+
+Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which
+would lead to 64bit multiplication overflows correctly. It's also protected
+against negative motion of the clocksource unconditionally, which was
+exclusive to x86 before.
+
+timekeeping_advance() handles large offsets already correctly.
+
+That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases
+is very close to zero. Remove all of it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: John Stultz <jstultz@google.com>
+Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/configs/defconfig                        |    1 
+ include/linux/timekeeper_internal.h                 |   15 --
+ kernel/time/timekeeping.c                           |  108 --------------------
+ lib/Kconfig.debug                                   |   13 --
+ tools/testing/selftests/wireguard/qemu/debug.config |    1 
+ 5 files changed, 3 insertions(+), 135 deletions(-)
+
+--- a/arch/riscv/configs/defconfig
++++ b/arch/riscv/configs/defconfig
+@@ -301,7 +301,6 @@ CONFIG_DEBUG_MEMORY_INIT=y
+ CONFIG_DEBUG_PER_CPU_MAPS=y
+ CONFIG_SOFTLOCKUP_DETECTOR=y
+ CONFIG_WQ_WATCHDOG=y
+-CONFIG_DEBUG_TIMEKEEPING=y
+ CONFIG_DEBUG_RT_MUTEXES=y
+ CONFIG_DEBUG_SPINLOCK=y
+ CONFIG_DEBUG_MUTEXES=y
+--- a/include/linux/timekeeper_internal.h
++++ b/include/linux/timekeeper_internal.h
+@@ -68,9 +68,6 @@ struct tk_read_base {
+  *			shifted nano seconds.
+  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
+  *			ntp shifted nano seconds.
+- * @last_warning:	Warning ratelimiter (DEBUG_TIMEKEEPING)
+- * @underflow_seen:	Underflow warning flag (DEBUG_TIMEKEEPING)
+- * @overflow_seen:	Overflow warning flag (DEBUG_TIMEKEEPING)
+  *
+  * Note: For timespec(64) based interfaces wall_to_monotonic is what
+  * we need to add to xtime (or xtime corrected for sub jiffy times)
+@@ -124,18 +121,6 @@ struct timekeeper {
+ 	u32			ntp_err_mult;
+ 	/* Flag used to avoid updating NTP twice with same second */
+ 	u32			skip_second_overflow;
+-#ifdef CONFIG_DEBUG_TIMEKEEPING
+-	long			last_warning;
+-	/*
+-	 * These simple flag variables are managed
+-	 * without locks, which is racy, but they are
+-	 * ok since we don't really care about being
+-	 * super precise about how many events were
+-	 * seen, just that a problem was observed.
+-	 */
+-	int			underflow_seen;
+-	int			overflow_seen;
+-#endif
+ };
+ 
+ #ifdef CONFIG_GENERIC_TIME_VSYSCALL
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -195,97 +195,6 @@ static inline u64 tk_clock_read(const st
+ 	return clock->read(clock);
+ }
+ 
+-#ifdef CONFIG_DEBUG_TIMEKEEPING
+-#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+-
+-static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
+-{
+-
+-	u64 max_cycles = tk->tkr_mono.clock->max_cycles;
+-	const char *name = tk->tkr_mono.clock->name;
+-
+-	if (offset > max_cycles) {
+-		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+-				offset, name, max_cycles);
+-		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+-	} else {
+-		if (offset > (max_cycles >> 1)) {
+-			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
+-					offset, name, max_cycles >> 1);
+-			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+-		}
+-	}
+-
+-	if (tk->underflow_seen) {
+-		if (jiffies - tk->last_warning > WARNING_FREQ) {
+-			printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+-			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+-			printk_deferred("         Your kernel is probably still fine.\n");
+-			tk->last_warning = jiffies;
+-		}
+-		tk->underflow_seen = 0;
+-	}
+-
+-	if (tk->overflow_seen) {
+-		if (jiffies - tk->last_warning > WARNING_FREQ) {
+-			printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+-			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+-			printk_deferred("         Your kernel is probably still fine.\n");
+-			tk->last_warning = jiffies;
+-		}
+-		tk->overflow_seen = 0;
+-	}
+-}
+-
+-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
+-
+-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
+-{
+-	struct timekeeper *tk = &tk_core.timekeeper;
+-	u64 now, last, mask, max, delta;
+-	unsigned int seq;
+-
+-	/*
+-	 * Since we're called holding a seqcount, the data may shift
+-	 * under us while we're doing the calculation. This can cause
+-	 * false positives, since we'd note a problem but throw the
+-	 * results away. So nest another seqcount here to atomically
+-	 * grab the points we are checking with.
+-	 */
+-	do {
+-		seq = read_seqcount_begin(&tk_core.seq);
+-		now = tk_clock_read(tkr);
+-		last = tkr->cycle_last;
+-		mask = tkr->mask;
+-		max = tkr->clock->max_cycles;
+-	} while (read_seqcount_retry(&tk_core.seq, seq));
+-
+-	delta = clocksource_delta(now, last, mask);
+-
+-	/*
+-	 * Try to catch underflows by checking if we are seeing small
+-	 * mask-relative negative values.
+-	 */
+-	if (unlikely((~delta & mask) < (mask >> 3)))
+-		tk->underflow_seen = 1;
+-
+-	/* Check for multiplication overflows */
+-	if (unlikely(delta > max))
+-		tk->overflow_seen = 1;
+-
+-	/* timekeeping_cycles_to_ns() handles both under and overflow */
+-	return timekeeping_cycles_to_ns(tkr, now);
+-}
+-#else
+-static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
+-{
+-}
+-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
+-{
+-	BUG();
+-}
+-#endif
+-
+ /**
+  * tk_setup_internals - Set up internals to use clocksource clock.
+  *
+@@ -390,19 +299,11 @@ static inline u64 timekeeping_cycles_to_
+ 	return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
+ }
+ 
+-static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
++static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+ {
+ 	return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
+ }
+ 
+-static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+-{
+-	if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
+-		return timekeeping_debug_get_ns(tkr);
+-
+-	return __timekeeping_get_ns(tkr);
+-}
+-
+ /**
+  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+  * @tkr: Timekeeping readout base from which we take the update
+@@ -446,7 +347,7 @@ static __always_inline u64 __ktime_get_f
+ 		seq = raw_read_seqcount_latch(&tkf->seq);
+ 		tkr = tkf->base + (seq & 0x01);
+ 		now = ktime_to_ns(tkr->base);
+-		now += __timekeeping_get_ns(tkr);
++		now += timekeeping_get_ns(tkr);
+ 	} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+ 
+ 	return now;
+@@ -562,7 +463,7 @@ static __always_inline u64 __ktime_get_r
+ 		tkr = tkf->base + (seq & 0x01);
+ 		basem = ktime_to_ns(tkr->base);
+ 		baser = ktime_to_ns(tkr->base_real);
+-		delta = __timekeeping_get_ns(tkr);
++		delta = timekeeping_get_ns(tkr);
+ 	} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+ 
+ 	if (mono)
+@@ -2298,9 +2199,6 @@ static bool timekeeping_advance(enum tim
+ 	if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+ 		goto out;
+ 
+-	/* Do some additional sanity checking */
+-	timekeeping_check_update(tk, offset);
+-
+ 	/*
+ 	 * With NO_HZ we may have to accumulate many cycle_intervals
+ 	 * (think "ticks") worth of time at once. To do this efficiently,
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1328,19 +1328,6 @@ config SCHEDSTATS
+ 
+ endmenu
+ 
+-config DEBUG_TIMEKEEPING
+-	bool "Enable extra timekeeping sanity checking"
+-	help
+-	  This option will enable additional timekeeping sanity checks
+-	  which may be helpful when diagnosing issues where timekeeping
+-	  problems are suspected.
+-
+-	  This may include checks in the timekeeping hotpaths, so this
+-	  option may have a (very small) performance impact to some
+-	  workloads.
+-
+-	  If unsure, say N.
+-
+ config DEBUG_PREEMPT
+ 	bool "Debug preemptible kernel"
+ 	depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
+--- a/tools/testing/selftests/wireguard/qemu/debug.config
++++ b/tools/testing/selftests/wireguard/qemu/debug.config
+@@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y
+ CONFIG_SCHED_INFO=y
+ CONFIG_SCHEDSTATS=y
+ CONFIG_SCHED_STACK_END_CHECK=y
+-CONFIG_DEBUG_TIMEKEEPING=y
+ CONFIG_DEBUG_PREEMPT=y
+ CONFIG_DEBUG_RT_MUTEXES=y
+ CONFIG_DEBUG_SPINLOCK=y