]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 13 Dec 2024 11:32:50 +0000 (12:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 13 Dec 2024 11:32:50 +0000 (12:32 +0100)
added patches:
clocksource-make-negative-motion-detection-more-robust.patch
timekeeping-remove-config_debug_timekeeping.patch

queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch [new file with mode: 0644]
queue-6.12/series
queue-6.12/timekeeping-remove-config_debug_timekeeping.patch [new file with mode: 0644]

diff --git a/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch b/queue-6.12/clocksource-make-negative-motion-detection-more-robust.patch
new file mode 100644 (file)
index 0000000..54c0d28
--- /dev/null
@@ -0,0 +1,147 @@
+From 76031d9536a076bf023bedbdb1b4317fc801dd67 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 3 Dec 2024 11:16:30 +0100
+Subject: clocksource: Make negative motion detection more robust
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 76031d9536a076bf023bedbdb1b4317fc801dd67 upstream.
+
+Guenter reported boot stalls on a emulated ARM 32-bit platform, which has a
+24-bit wide clocksource.
+
+It turns out that the calculated maximal idle time, which limits idle
+sleeps to prevent clocksource wrap arounds, is close to the point where the
+negative motion detection triggers.
+
+  max_idle_ns:                    597268854 ns
+  negative motion tripping point: 671088640 ns
+
+If the idle wakeup is delayed beyond that point, the clocksource
+advances far enough to trigger the negative motion detection. This
+prevents the clock to advance and in the worst case the system stalls
+completely if the consecutive sleeps based on the stale clock are
+delayed as well.
+
+Cure this by calculating a more robust cut-off value for negative motion,
+which covers 87.5% of the actual clocksource counter width. Compare the
+delta against this value to catch negative motion. This is specifically for
+clock sources with a small counter width as their wrap around time is close
+to the half counter width. For clock sources with wide counters this is not
+a problem because the maximum idle time is far from the half counter width
+due to the math overflow protection constraints.
+
+For the case at hand this results in a tripping point of 1174405120ns.
+
+Note, that this cannot prevent issues when the delay exceeds the 87.5%
+margin, but that's not different from the previous unchecked version which
+allowed arbitrary time jumps.
+
+Systems with small counter width are prone to invalid results, but this
+problem is unlikely to be seen on real hardware. If such a system
+completely stalls for more than half a second, then there are other more
+urgent problems than the counter wrapping around.
+
+Fixes: c163e40af9b2 ("timekeeping: Always check for negative motion")
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/all/8734j5ul4x.ffs@tglx
+Closes: https://lore.kernel.org/all/387b120b-d68a-45e8-b6ab-768cd95d11c2@roeck-us.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/clocksource.h        |    2 ++
+ kernel/time/clocksource.c          |   11 ++++++++++-
+ kernel/time/timekeeping.c          |    6 ++++--
+ kernel/time/timekeeping_internal.h |    8 ++++----
+ 4 files changed, 20 insertions(+), 7 deletions(-)
+
+--- a/include/linux/clocksource.h
++++ b/include/linux/clocksource.h
+@@ -49,6 +49,7 @@ struct module;
+  * @archdata:         Optional arch-specific data
+  * @max_cycles:               Maximum safe cycle value which won't overflow on
+  *                    multiplication
++ * @max_raw_delta:    Maximum safe delta value for negative motion detection
+  * @name:             Pointer to clocksource name
+  * @list:             List head for registration (internal)
+  * @freq_khz:         Clocksource frequency in khz.
+@@ -109,6 +110,7 @@ struct clocksource {
+       struct arch_clocksource_data archdata;
+ #endif
+       u64                     max_cycles;
++      u64                     max_raw_delta;
+       const char              *name;
+       struct list_head        list;
+       u32                     freq_khz;
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -22,7 +22,7 @@
+ static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
+ {
+-      u64 delta = clocksource_delta(end, start, cs->mask);
++      u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta);
+       if (likely(delta < cs->max_cycles))
+               return clocksource_cyc2ns(delta, cs->mult, cs->shift);
+@@ -985,6 +985,15 @@ static inline void clocksource_update_ma
+       cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+                                               cs->maxadj, cs->mask,
+                                               &cs->max_cycles);
++
++      /*
++       * Threshold for detecting negative motion in clocksource_delta().
++       *
++       * Allow for 0.875 of the counter width so that overly long idle
++       * sleeps, which go slightly over mask/2, do not trigger the
++       * negative motion detection.
++       */
++      cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3);
+ }
+ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -694,7 +694,8 @@ static void timekeeping_forward_now(stru
+       u64 cycle_now, delta;
+       cycle_now = tk_clock_read(&tk->tkr_mono);
+-      delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
++      delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
++                                tk->tkr_mono.clock->max_raw_delta);
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
+@@ -2193,7 +2194,8 @@ static bool timekeeping_advance(enum tim
+               goto out;
+       offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
+-                                 tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
++                                 tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
++                                 tk->tkr_mono.clock->max_raw_delta);
+       /* Check if there's really nothing to do */
+       if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+--- a/kernel/time/timekeeping_internal.h
++++ b/kernel/time/timekeeping_internal.h
+@@ -15,15 +15,15 @@ extern void tk_debug_account_sleep_time(
+ #define tk_debug_account_sleep_time(x)
+ #endif
+-static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
++static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta)
+ {
+       u64 ret = (now - last) & mask;
+       /*
+-       * Prevent time going backwards by checking the MSB of mask in
+-       * the result. If set, return 0.
++       * Prevent time going backwards by checking the result against
++       * @max_delta. If greater, return 0.
+        */
+-      return ret & ~(mask >> 1) ? 0 : ret;
++      return ret > max_delta ? 0 : ret;
+ }
+ /* Semi public for serialization of non timekeeper VDSO updates. */
index 7659133c049035b5e96b4f4523ba833af6dc1066..5199b7d666445dbda4015857fe2599aaab3b825a 100644 (file)
@@ -463,3 +463,5 @@ alsa-hda-fix-build-error-without-config_snd_debug.patch
 revert-drm-amd-display-parse-umc_info-or-vram_info-based-on-asic.patch
 s390-pci-fix-leak-of-struct-zpci_dev-when-zpci_add_device-fails.patch
 alsa-hda-realtek-fix-spelling-mistake-firelfy-firefly.patch
+timekeeping-remove-config_debug_timekeeping.patch
+clocksource-make-negative-motion-detection-more-robust.patch
diff --git a/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch b/queue-6.12/timekeeping-remove-config_debug_timekeeping.patch
new file mode 100644 (file)
index 0000000..7a255e5
--- /dev/null
@@ -0,0 +1,253 @@
+From d44d26987bb3df6d76556827097fc9ce17565cb8 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 31 Oct 2024 13:04:07 +0100
+Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d44d26987bb3df6d76556827097fc9ce17565cb8 upstream.
+
+Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which
+would lead to 64bit multiplication overflows correctly. It's also protected
+against negative motion of the clocksource unconditionally, which was
+exclusive to x86 before.
+
+timekeeping_advance() handles large offsets already correctly.
+
+That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases
+is very close to zero. Remove all of it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: John Stultz <jstultz@google.com>
+Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/configs/defconfig                        |    1 
+ include/linux/timekeeper_internal.h                 |   15 --
+ kernel/time/timekeeping.c                           |  108 --------------------
+ lib/Kconfig.debug                                   |   13 --
+ tools/testing/selftests/wireguard/qemu/debug.config |    1 
+ 5 files changed, 3 insertions(+), 135 deletions(-)
+
+--- a/arch/riscv/configs/defconfig
++++ b/arch/riscv/configs/defconfig
+@@ -301,7 +301,6 @@ CONFIG_DEBUG_MEMORY_INIT=y
+ CONFIG_DEBUG_PER_CPU_MAPS=y
+ CONFIG_SOFTLOCKUP_DETECTOR=y
+ CONFIG_WQ_WATCHDOG=y
+-CONFIG_DEBUG_TIMEKEEPING=y
+ CONFIG_DEBUG_RT_MUTEXES=y
+ CONFIG_DEBUG_SPINLOCK=y
+ CONFIG_DEBUG_MUTEXES=y
+--- a/include/linux/timekeeper_internal.h
++++ b/include/linux/timekeeper_internal.h
+@@ -68,9 +68,6 @@ struct tk_read_base {
+  *                    shifted nano seconds.
+  * @ntp_error_shift:  Shift conversion between clock shifted nano seconds and
+  *                    ntp shifted nano seconds.
+- * @last_warning:     Warning ratelimiter (DEBUG_TIMEKEEPING)
+- * @underflow_seen:   Underflow warning flag (DEBUG_TIMEKEEPING)
+- * @overflow_seen:    Overflow warning flag (DEBUG_TIMEKEEPING)
+  *
+  * Note: For timespec(64) based interfaces wall_to_monotonic is what
+  * we need to add to xtime (or xtime corrected for sub jiffy times)
+@@ -124,18 +121,6 @@ struct timekeeper {
+       u32                     ntp_err_mult;
+       /* Flag used to avoid updating NTP twice with same second */
+       u32                     skip_second_overflow;
+-#ifdef CONFIG_DEBUG_TIMEKEEPING
+-      long                    last_warning;
+-      /*
+-       * These simple flag variables are managed
+-       * without locks, which is racy, but they are
+-       * ok since we don't really care about being
+-       * super precise about how many events were
+-       * seen, just that a problem was observed.
+-       */
+-      int                     underflow_seen;
+-      int                     overflow_seen;
+-#endif
+ };
+ #ifdef CONFIG_GENERIC_TIME_VSYSCALL
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -195,97 +195,6 @@ static inline u64 tk_clock_read(const st
+       return clock->read(clock);
+ }
+-#ifdef CONFIG_DEBUG_TIMEKEEPING
+-#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+-
+-static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
+-{
+-
+-      u64 max_cycles = tk->tkr_mono.clock->max_cycles;
+-      const char *name = tk->tkr_mono.clock->name;
+-
+-      if (offset > max_cycles) {
+-              printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+-                              offset, name, max_cycles);
+-              printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+-      } else {
+-              if (offset > (max_cycles >> 1)) {
+-                      printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
+-                                      offset, name, max_cycles >> 1);
+-                      printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+-              }
+-      }
+-
+-      if (tk->underflow_seen) {
+-              if (jiffies - tk->last_warning > WARNING_FREQ) {
+-                      printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+-                      printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+-                      printk_deferred("         Your kernel is probably still fine.\n");
+-                      tk->last_warning = jiffies;
+-              }
+-              tk->underflow_seen = 0;
+-      }
+-
+-      if (tk->overflow_seen) {
+-              if (jiffies - tk->last_warning > WARNING_FREQ) {
+-                      printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+-                      printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+-                      printk_deferred("         Your kernel is probably still fine.\n");
+-                      tk->last_warning = jiffies;
+-              }
+-              tk->overflow_seen = 0;
+-      }
+-}
+-
+-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
+-
+-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
+-{
+-      struct timekeeper *tk = &tk_core.timekeeper;
+-      u64 now, last, mask, max, delta;
+-      unsigned int seq;
+-
+-      /*
+-       * Since we're called holding a seqcount, the data may shift
+-       * under us while we're doing the calculation. This can cause
+-       * false positives, since we'd note a problem but throw the
+-       * results away. So nest another seqcount here to atomically
+-       * grab the points we are checking with.
+-       */
+-      do {
+-              seq = read_seqcount_begin(&tk_core.seq);
+-              now = tk_clock_read(tkr);
+-              last = tkr->cycle_last;
+-              mask = tkr->mask;
+-              max = tkr->clock->max_cycles;
+-      } while (read_seqcount_retry(&tk_core.seq, seq));
+-
+-      delta = clocksource_delta(now, last, mask);
+-
+-      /*
+-       * Try to catch underflows by checking if we are seeing small
+-       * mask-relative negative values.
+-       */
+-      if (unlikely((~delta & mask) < (mask >> 3)))
+-              tk->underflow_seen = 1;
+-
+-      /* Check for multiplication overflows */
+-      if (unlikely(delta > max))
+-              tk->overflow_seen = 1;
+-
+-      /* timekeeping_cycles_to_ns() handles both under and overflow */
+-      return timekeeping_cycles_to_ns(tkr, now);
+-}
+-#else
+-static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
+-{
+-}
+-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
+-{
+-      BUG();
+-}
+-#endif
+-
+ /**
+  * tk_setup_internals - Set up internals to use clocksource clock.
+  *
+@@ -390,19 +299,11 @@ static inline u64 timekeeping_cycles_to_
+       return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
+ }
+-static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
++static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+ {
+       return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
+ }
+-static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+-{
+-      if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
+-              return timekeeping_debug_get_ns(tkr);
+-
+-      return __timekeeping_get_ns(tkr);
+-}
+-
+ /**
+  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+  * @tkr: Timekeeping readout base from which we take the update
+@@ -446,7 +347,7 @@ static __always_inline u64 __ktime_get_f
+               seq = raw_read_seqcount_latch(&tkf->seq);
+               tkr = tkf->base + (seq & 0x01);
+               now = ktime_to_ns(tkr->base);
+-              now += __timekeeping_get_ns(tkr);
++              now += timekeeping_get_ns(tkr);
+       } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+       return now;
+@@ -562,7 +463,7 @@ static __always_inline u64 __ktime_get_r
+               tkr = tkf->base + (seq & 0x01);
+               basem = ktime_to_ns(tkr->base);
+               baser = ktime_to_ns(tkr->base_real);
+-              delta = __timekeeping_get_ns(tkr);
++              delta = timekeeping_get_ns(tkr);
+       } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+       if (mono)
+@@ -2298,9 +2199,6 @@ static bool timekeeping_advance(enum tim
+       if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+               goto out;
+-      /* Do some additional sanity checking */
+-      timekeeping_check_update(tk, offset);
+-
+       /*
+        * With NO_HZ we may have to accumulate many cycle_intervals
+        * (think "ticks") worth of time at once. To do this efficiently,
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1328,19 +1328,6 @@ config SCHEDSTATS
+ endmenu
+-config DEBUG_TIMEKEEPING
+-      bool "Enable extra timekeeping sanity checking"
+-      help
+-        This option will enable additional timekeeping sanity checks
+-        which may be helpful when diagnosing issues where timekeeping
+-        problems are suspected.
+-
+-        This may include checks in the timekeeping hotpaths, so this
+-        option may have a (very small) performance impact to some
+-        workloads.
+-
+-        If unsure, say N.
+-
+ config DEBUG_PREEMPT
+       bool "Debug preemptible kernel"
+       depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
+--- a/tools/testing/selftests/wireguard/qemu/debug.config
++++ b/tools/testing/selftests/wireguard/qemu/debug.config
+@@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y
+ CONFIG_SCHED_INFO=y
+ CONFIG_SCHEDSTATS=y
+ CONFIG_SCHED_STACK_END_CHECK=y
+-CONFIG_DEBUG_TIMEKEEPING=y
+ CONFIG_DEBUG_PREEMPT=y
+ CONFIG_DEBUG_RT_MUTEXES=y
+ CONFIG_DEBUG_SPINLOCK=y