From: Greg Kroah-Hartman Date: Fri, 13 Dec 2024 11:32:50 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v5.4.287~8 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4ac7f8961660a0c45f5e0f5135e11a7c781b0df7;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: clocksource-make-negative-motion-detection-more-robust.patch timekeeping-remove-config_debug_timekeeping.patch --- diff --git a/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch b/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch new file mode 100644 index 00000000000..54c0d2862ff --- /dev/null +++ b/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch @@ -0,0 +1,147 @@ +From 76031d9536a076bf023bedbdb1b4317fc801dd67 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 3 Dec 2024 11:16:30 +0100 +Subject: clocksource: Make negative motion detection more robust + +From: Thomas Gleixner + +commit 76031d9536a076bf023bedbdb1b4317fc801dd67 upstream. + +Guenter reported boot stalls on a emulated ARM 32-bit platform, which has a +24-bit wide clocksource. + +It turns out that the calculated maximal idle time, which limits idle +sleeps to prevent clocksource wrap arounds, is close to the point where the +negative motion detection triggers. + + max_idle_ns: 597268854 ns + negative motion tripping point: 671088640 ns + +If the idle wakeup is delayed beyond that point, the clocksource +advances far enough to trigger the negative motion detection. This +prevents the clock to advance and in the worst case the system stalls +completely if the consecutive sleeps based on the stale clock are +delayed as well. + +Cure this by calculating a more robust cut-off value for negative motion, +which covers 87.5% of the actual clocksource counter width. Compare the +delta against this value to catch negative motion. This is specifically for +clock sources with a small counter width as their wrap around time is close +to the half counter width. For clock sources with wide counters this is not +a problem because the maximum idle time is far from the half counter width +due to the math overflow protection constraints. + +For the case at hand this results in a tripping point of 1174405120ns. + +Note, that this cannot prevent issues when the delay exceeds the 87.5% +margin, but that's not different from the previous unchecked version which +allowed arbitrary time jumps. + +Systems with small counter width are prone to invalid results, but this +problem is unlikely to be seen on real hardware. If such a system +completely stalls for more than half a second, then there are other more +urgent problems than the counter wrapping around. + +Fixes: c163e40af9b2 ("timekeeping: Always check for negative motion") +Reported-by: Guenter Roeck +Signed-off-by: Thomas Gleixner +Tested-by: Guenter Roeck +Link: https://lore.kernel.org/all/8734j5ul4x.ffs@tglx +Closes: https://lore.kernel.org/all/387b120b-d68a-45e8-b6ab-768cd95d11c2@roeck-us.net +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/clocksource.h | 2 ++ + kernel/time/clocksource.c | 11 ++++++++++- + kernel/time/timekeeping.c | 6 ++++-- + kernel/time/timekeeping_internal.h | 8 ++++---- + 4 files changed, 20 insertions(+), 7 deletions(-) + +--- a/include/linux/clocksource.h ++++ b/include/linux/clocksource.h +@@ -49,6 +49,7 @@ struct module; + * @archdata: Optional arch-specific data + * @max_cycles: Maximum safe cycle value which won't overflow on + * multiplication ++ * @max_raw_delta: Maximum safe delta value for negative motion detection + * @name: Pointer to clocksource name + * @list: List head for registration (internal) + * @freq_khz: Clocksource frequency in khz. +@@ -109,6 +110,7 @@ struct clocksource { + struct arch_clocksource_data archdata; + #endif + u64 max_cycles; ++ u64 max_raw_delta; + const char *name; + struct list_head list; + u32 freq_khz; +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -22,7 +22,7 @@ + + static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) + { +- u64 delta = clocksource_delta(end, start, cs->mask); ++ u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta); + + if (likely(delta < cs->max_cycles)) + return clocksource_cyc2ns(delta, cs->mult, cs->shift); +@@ -985,6 +985,15 @@ static inline void clocksource_update_ma + cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, + cs->maxadj, cs->mask, + &cs->max_cycles); ++ ++ /* ++ * Threshold for detecting negative motion in clocksource_delta(). ++ * ++ * Allow for 0.875 of the counter width so that overly long idle ++ * sleeps, which go slightly over mask/2, do not trigger the ++ * negative motion detection. ++ */ ++ cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3); + } + + static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) +--- a/kernel/time/timekeeping.c ++++ b/kernel/time/timekeeping.c +@@ -694,7 +694,8 @@ static void timekeeping_forward_now(stru + u64 cycle_now, delta; + + cycle_now = tk_clock_read(&tk->tkr_mono); +- delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); ++ delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, ++ tk->tkr_mono.clock->max_raw_delta); + tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; + +@@ -2193,7 +2194,8 @@ static bool timekeeping_advance(enum tim + goto out; + + offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), +- tk->tkr_mono.cycle_last, tk->tkr_mono.mask); ++ tk->tkr_mono.cycle_last, tk->tkr_mono.mask, ++ tk->tkr_mono.clock->max_raw_delta); + + /* Check if there's really nothing to do */ + if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) +--- a/kernel/time/timekeeping_internal.h ++++ b/kernel/time/timekeeping_internal.h +@@ -15,15 +15,15 @@ extern void tk_debug_account_sleep_time( + #define tk_debug_account_sleep_time(x) + #endif + +-static inline u64 clocksource_delta(u64 now, u64 last, u64 mask) ++static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta) + { + u64 ret = (now - last) & mask; + + /* +- * Prevent time going backwards by checking the MSB of mask in +- * the result. If set, return 0. ++ * Prevent time going backwards by checking the result against ++ * @max_delta. If greater, return 0. + */ +- return ret & ~(mask >> 1) ? 0 : ret; ++ return ret > max_delta ? 0 : ret; + } + + /* Semi public for serialization of non timekeeper VDSO updates. */ diff --git a/queue-6.12/series b/queue-6.12/series index 7659133c049..5199b7d6664 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -463,3 +463,5 @@ alsa-hda-fix-build-error-without-config_snd_debug.patch revert-drm-amd-display-parse-umc_info-or-vram_info-based-on-asic.patch s390-pci-fix-leak-of-struct-zpci_dev-when-zpci_add_device-fails.patch alsa-hda-realtek-fix-spelling-mistake-firelfy-firefly.patch +timekeeping-remove-config_debug_timekeeping.patch +clocksource-make-negative-motion-detection-more-robust.patch diff --git a/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch b/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch new file mode 100644 index 00000000000..7a255e57184 --- /dev/null +++ b/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch @@ -0,0 +1,253 @@ +From d44d26987bb3df6d76556827097fc9ce17565cb8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 31 Oct 2024 13:04:07 +0100 +Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING + +From: Thomas Gleixner + +commit d44d26987bb3df6d76556827097fc9ce17565cb8 upstream. + +Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which +would lead to 64bit multiplication overflows correctly. It's also protected +against negative motion of the clocksource unconditionally, which was +exclusive to x86 before. + +timekeeping_advance() handles large offsets already correctly. + +That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases +is very close to zero. Remove all of it. + +Signed-off-by: Thomas Gleixner +Acked-by: John Stultz +Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/configs/defconfig | 1 + include/linux/timekeeper_internal.h | 15 -- + kernel/time/timekeeping.c | 108 -------------------- + lib/Kconfig.debug | 13 -- + tools/testing/selftests/wireguard/qemu/debug.config | 1 + 5 files changed, 3 insertions(+), 135 deletions(-) + +--- a/arch/riscv/configs/defconfig ++++ b/arch/riscv/configs/defconfig +@@ -301,7 +301,6 @@ CONFIG_DEBUG_MEMORY_INIT=y + CONFIG_DEBUG_PER_CPU_MAPS=y + CONFIG_SOFTLOCKUP_DETECTOR=y + CONFIG_WQ_WATCHDOG=y +-CONFIG_DEBUG_TIMEKEEPING=y + CONFIG_DEBUG_RT_MUTEXES=y + CONFIG_DEBUG_SPINLOCK=y + CONFIG_DEBUG_MUTEXES=y +--- a/include/linux/timekeeper_internal.h ++++ b/include/linux/timekeeper_internal.h +@@ -68,9 +68,6 @@ struct tk_read_base { + * shifted nano seconds. + * @ntp_error_shift: Shift conversion between clock shifted nano seconds and + * ntp shifted nano seconds. +- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) +- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) +- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) + * + * Note: For timespec(64) based interfaces wall_to_monotonic is what + * we need to add to xtime (or xtime corrected for sub jiffy times) +@@ -124,18 +121,6 @@ struct timekeeper { + u32 ntp_err_mult; + /* Flag used to avoid updating NTP twice with same second */ + u32 skip_second_overflow; +-#ifdef CONFIG_DEBUG_TIMEKEEPING +- long last_warning; +- /* +- * These simple flag variables are managed +- * without locks, which is racy, but they are +- * ok since we don't really care about being +- * super precise about how many events were +- * seen, just that a problem was observed. +- */ +- int underflow_seen; +- int overflow_seen; +-#endif + }; + + #ifdef CONFIG_GENERIC_TIME_VSYSCALL +--- a/kernel/time/timekeeping.c ++++ b/kernel/time/timekeeping.c +@@ -195,97 +195,6 @@ static inline u64 tk_clock_read(const st + return clock->read(clock); + } + +-#ifdef CONFIG_DEBUG_TIMEKEEPING +-#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ +- +-static void timekeeping_check_update(struct timekeeper *tk, u64 offset) +-{ +- +- u64 max_cycles = tk->tkr_mono.clock->max_cycles; +- const char *name = tk->tkr_mono.clock->name; +- +- if (offset > max_cycles) { +- printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", +- offset, name, max_cycles); +- printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); +- } else { +- if (offset > (max_cycles >> 1)) { +- printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n", +- offset, name, max_cycles >> 1); +- printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); +- } +- } +- +- if (tk->underflow_seen) { +- if (jiffies - tk->last_warning > WARNING_FREQ) { +- printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); +- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); +- printk_deferred(" Your kernel is probably still fine.\n"); +- tk->last_warning = jiffies; +- } +- tk->underflow_seen = 0; +- } +- +- if (tk->overflow_seen) { +- if (jiffies - tk->last_warning > WARNING_FREQ) { +- printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); +- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); +- printk_deferred(" Your kernel is probably still fine.\n"); +- tk->last_warning = jiffies; +- } +- tk->overflow_seen = 0; +- } +-} +- +-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles); +- +-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) +-{ +- struct timekeeper *tk = &tk_core.timekeeper; +- u64 now, last, mask, max, delta; +- unsigned int seq; +- +- /* +- * Since we're called holding a seqcount, the data may shift +- * under us while we're doing the calculation. This can cause +- * false positives, since we'd note a problem but throw the +- * results away. So nest another seqcount here to atomically +- * grab the points we are checking with. +- */ +- do { +- seq = read_seqcount_begin(&tk_core.seq); +- now = tk_clock_read(tkr); +- last = tkr->cycle_last; +- mask = tkr->mask; +- max = tkr->clock->max_cycles; +- } while (read_seqcount_retry(&tk_core.seq, seq)); +- +- delta = clocksource_delta(now, last, mask); +- +- /* +- * Try to catch underflows by checking if we are seeing small +- * mask-relative negative values. +- */ +- if (unlikely((~delta & mask) < (mask >> 3))) +- tk->underflow_seen = 1; +- +- /* Check for multiplication overflows */ +- if (unlikely(delta > max)) +- tk->overflow_seen = 1; +- +- /* timekeeping_cycles_to_ns() handles both under and overflow */ +- return timekeeping_cycles_to_ns(tkr, now); +-} +-#else +-static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset) +-{ +-} +-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) +-{ +- BUG(); +-} +-#endif +- + /** + * tk_setup_internals - Set up internals to use clocksource clock. + * +@@ -390,19 +299,11 @@ static inline u64 timekeeping_cycles_to_ + return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; + } + +-static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) ++static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) + { + return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); + } + +-static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) +-{ +- if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING)) +- return timekeeping_debug_get_ns(tkr); +- +- return __timekeeping_get_ns(tkr); +-} +- + /** + * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. + * @tkr: Timekeeping readout base from which we take the update +@@ -446,7 +347,7 @@ static __always_inline u64 __ktime_get_f + seq = raw_read_seqcount_latch(&tkf->seq); + tkr = tkf->base + (seq & 0x01); + now = ktime_to_ns(tkr->base); +- now += __timekeeping_get_ns(tkr); ++ now += timekeeping_get_ns(tkr); + } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); + + return now; +@@ -562,7 +463,7 @@ static __always_inline u64 __ktime_get_r + tkr = tkf->base + (seq & 0x01); + basem = ktime_to_ns(tkr->base); + baser = ktime_to_ns(tkr->base_real); +- delta = __timekeeping_get_ns(tkr); ++ delta = timekeeping_get_ns(tkr); + } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); + + if (mono) +@@ -2298,9 +2199,6 @@ static bool timekeeping_advance(enum tim + if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) + goto out; + +- /* Do some additional sanity checking */ +- timekeeping_check_update(tk, offset); +- + /* + * With NO_HZ we may have to accumulate many cycle_intervals + * (think "ticks") worth of time at once. To do this efficiently, +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -1328,19 +1328,6 @@ config SCHEDSTATS + + endmenu + +-config DEBUG_TIMEKEEPING +- bool "Enable extra timekeeping sanity checking" +- help +- This option will enable additional timekeeping sanity checks +- which may be helpful when diagnosing issues where timekeeping +- problems are suspected. +- +- This may include checks in the timekeeping hotpaths, so this +- option may have a (very small) performance impact to some +- workloads. +- +- If unsure, say N. +- + config DEBUG_PREEMPT + bool "Debug preemptible kernel" + depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT +--- a/tools/testing/selftests/wireguard/qemu/debug.config ++++ b/tools/testing/selftests/wireguard/qemu/debug.config +@@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y + CONFIG_SCHED_INFO=y + CONFIG_SCHEDSTATS=y + CONFIG_SCHED_STACK_END_CHECK=y +-CONFIG_DEBUG_TIMEKEEPING=y + CONFIG_DEBUG_PREEMPT=y + CONFIG_DEBUG_RT_MUTEXES=y + CONFIG_DEBUG_SPINLOCK=y