From 707a2030442c79c914b749251eb1e5db33231d33 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 12 Aug 2024 06:01:52 -0400 Subject: [PATCH] Fixes for 6.6 Signed-off-by: Sasha Levin --- ...brown-bag-boolean-thinko-in-cs_watch.patch | 43 ++++ ...e-the-watchdog-read-retries-automati.patch | 194 ++++++++++++++++++ ...rror-and-esterror-to-operating-range.patch | 74 +++++++ queue-6.6/series | 3 + 4 files changed, 314 insertions(+) create mode 100644 queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch create mode 100644 queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch create mode 100644 queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch diff --git a/queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch b/queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch new file mode 100644 index 00000000000..c70d112106e --- /dev/null +++ b/queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch @@ -0,0 +1,43 @@ +From 619a99f4c228359ee2df08389fc413c96f6397bd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Aug 2024 08:46:15 -0700 +Subject: clocksource: Fix brown-bag boolean thinko in cs_watchdog_read() + +From: Paul E. McKenney + +[ Upstream commit f2655ac2c06a15558e51ed6529de280e1553c86e ] + +The current "nretries > 1 || nretries >= max_retries" check in +cs_watchdog_read() will always evaluate to true, and thus pr_warn(), if +nretries is greater than 1. The intent is instead to never warn on the +first try, but otherwise warn if the successful retry was the last retry. + +Therefore, change that "||" to "&&". + +Fixes: db3a34e17433 ("clocksource: Retry clock read if long delays detected") +Reported-by: Borislav Petkov +Signed-off-by: Paul E. McKenney +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240802154618.4149953-2-paulmck@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/time/clocksource.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c +index c95080f005dd4..3260bbe98894b 100644 +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -238,7 +238,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, + wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, + watchdog->shift); + if (wd_delay <= WATCHDOG_MAX_SKEW) { +- if (nretries > 1 || nretries >= max_retries) { ++ if (nretries > 1 && nretries >= max_retries) { + pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", + smp_processor_id(), watchdog->name, nretries); + } +-- +2.43.0 + diff --git a/queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch b/queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch new file mode 100644 index 00000000000..0da0a387e96 --- /dev/null +++ b/queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch @@ -0,0 +1,194 @@ +From 3cf0c03c8f2e815543a2478727d1777a40cc46e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Feb 2024 14:08:59 +0800 +Subject: clocksource: Scale the watchdog read retries automatically + +From: Feng Tang + +[ Upstream commit 2ed08e4bc53298db3f87b528cd804cb0cce066a9 ] + +On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled +during boot time on about one out of 120 boot attempts: + + clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns, + wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable + tsc: Marking TSC unstable due to clocksource watchdog + TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'. + sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152) + clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896. + clocksource: Switched to clocksource hpet + +The reason is that for platform with a large number of CPUs, there are +sporadic big or huge read latencies while reading the watchog/clocksource +during boot or when system is under stress work load, and the frequency and +maximum value of the latency goes up with the number of online CPUs. + +The cCurrent code already has logic to detect and filter such high latency +case by reading the watchdog twice and checking the two deltas. Due to the +randomness of the latency, there is a low probabilty that the first delta +(latency) is big, but the second delta is small and looks valid. The +watchdog code retries the readouts by default twice, which is not +necessarily sufficient for systems with a large number of CPUs. + +There is a command line parameter 'max_cswd_read_retries' which allows to +increase the number of retries, but that's not user friendly as it needs to +be tweaked per system. As the number of required retries is proportional to +the number of online CPUs, this parameter can be calculated at runtime. + +Scale and enlarge the number of retries according to the number of online +CPUs and remove the command line parameter completely. + +[ tglx: Massaged change log and comments ] + +Signed-off-by: Feng Tang +Signed-off-by: Thomas Gleixner +Tested-by: Jin Wang +Tested-by: Paul E. McKenney +Reviewed-by: Waiman Long +Reviewed-by: Paul E. McKenney +Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com +Stable-dep-of: f2655ac2c06a ("clocksource: Fix brown-bag boolean thinko in cs_watchdog_read()") +Signed-off-by: Sasha Levin +--- + Documentation/admin-guide/kernel-parameters.txt | 6 ------ + include/linux/clocksource.h | 14 +++++++++++++- + kernel/time/clocksource-wdtest.c | 13 +++++++------ + kernel/time/clocksource.c | 10 ++++------ + tools/testing/selftests/rcutorture/bin/torture.sh | 2 +- + 5 files changed, 25 insertions(+), 20 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 83b1795335e53..a7fe113897361 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -664,12 +664,6 @@ + loops can be debugged more effectively on production + systems. + +- clocksource.max_cswd_read_retries= [KNL] +- Number of clocksource_watchdog() retries due to +- external delays before the clock will be marked +- unstable. Defaults to two retries, that is, +- three attempts to read the clock under test. +- + clocksource.verify_n_cpus= [KNL] + Limit the number of CPUs checked for clocksources + marked with CLOCK_SOURCE_VERIFY_PERCPU that +diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h +index 1d42d4b173271..0ad8b550bb4b4 100644 +--- a/include/linux/clocksource.h ++++ b/include/linux/clocksource.h +@@ -291,7 +291,19 @@ static inline void timer_probe(void) {} + #define TIMER_ACPI_DECLARE(name, table_id, fn) \ + ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) + +-extern ulong max_cswd_read_retries; ++static inline unsigned int clocksource_get_max_watchdog_retry(void) ++{ ++ /* ++ * When system is in the boot phase or under heavy workload, there ++ * can be random big latencies during the clocksource/watchdog ++ * read, so allow retries to filter the noise latency. As the ++ * latency's frequency and maximum value goes up with the number of ++ * CPUs, scale the number of retries with the number of online ++ * CPUs. ++ */ ++ return (ilog2(num_online_cpus()) / 2) + 1; ++} ++ + void clocksource_verify_percpu(struct clocksource *cs); + + #endif /* _LINUX_CLOCKSOURCE_H */ +diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c +index df922f49d171b..d06185e054ea2 100644 +--- a/kernel/time/clocksource-wdtest.c ++++ b/kernel/time/clocksource-wdtest.c +@@ -104,8 +104,8 @@ static void wdtest_ktime_clocksource_reset(void) + static int wdtest_func(void *arg) + { + unsigned long j1, j2; ++ int i, max_retries; + char *s; +- int i; + + schedule_timeout_uninterruptible(holdoff * HZ); + +@@ -139,18 +139,19 @@ static int wdtest_func(void *arg) + WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC)); + + /* Verify tsc-like stability with various numbers of errors injected. */ +- for (i = 0; i <= max_cswd_read_retries + 1; i++) { +- if (i <= 1 && i < max_cswd_read_retries) ++ max_retries = clocksource_get_max_watchdog_retry(); ++ for (i = 0; i <= max_retries + 1; i++) { ++ if (i <= 1 && i < max_retries) + s = ""; +- else if (i <= max_cswd_read_retries) ++ else if (i <= max_retries) + s = ", expect message"; + else + s = ", expect clock skew"; +- pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s); ++ pr_info("--- Watchdog with %dx error injection, %d retries%s.\n", i, max_retries, s); + WRITE_ONCE(wdtest_ktime_read_ndelays, i); + schedule_timeout_uninterruptible(2 * HZ); + WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays)); +- WARN_ON_ONCE((i <= max_cswd_read_retries) != ++ WARN_ON_ONCE((i <= max_retries) != + !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE)); + wdtest_ktime_clocksource_reset(); + } +diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c +index 3052b1f1168e2..c95080f005dd4 100644 +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -210,9 +210,6 @@ void clocksource_mark_unstable(struct clocksource *cs) + spin_unlock_irqrestore(&watchdog_lock, flags); + } + +-ulong max_cswd_read_retries = 2; +-module_param(max_cswd_read_retries, ulong, 0644); +-EXPORT_SYMBOL_GPL(max_cswd_read_retries); + static int verify_n_cpus = 8; + module_param(verify_n_cpus, int, 0644); + +@@ -224,11 +221,12 @@ enum wd_read_status { + + static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) + { +- unsigned int nretries; ++ unsigned int nretries, max_retries; + u64 wd_end, wd_end2, wd_delta; + int64_t wd_delay, wd_seq_delay; + +- for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) { ++ max_retries = clocksource_get_max_watchdog_retry(); ++ for (nretries = 0; nretries <= max_retries; nretries++) { + local_irq_disable(); + *wdnow = watchdog->read(watchdog); + *csnow = cs->read(cs); +@@ -240,7 +238,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, + wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, + watchdog->shift); + if (wd_delay <= WATCHDOG_MAX_SKEW) { +- if (nretries > 1 || nretries >= max_cswd_read_retries) { ++ if (nretries > 1 || nretries >= max_retries) { + pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", + smp_processor_id(), watchdog->name, nretries); + } +diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh +index 12b50a4a881ac..89a82f6f140ef 100755 +--- a/tools/testing/selftests/rcutorture/bin/torture.sh ++++ b/tools/testing/selftests/rcutorture/bin/torture.sh +@@ -567,7 +567,7 @@ then + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog" + torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make + +- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1 tsc=watchdog" ++ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog" + torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make + + # In case our work is already done... +-- +2.43.0 + diff --git a/queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch b/queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch new file mode 100644 index 00000000000..e6a2f893bf2 --- /dev/null +++ b/queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch @@ -0,0 +1,74 @@ +From 238bcb463244a5614d61dc73a0ba5928bdc57c4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 20:22:44 +0000 +Subject: ntp: Clamp maxerror and esterror to operating range + +From: Justin Stitt + +[ Upstream commit 87d571d6fb77ec342a985afa8744bb9bb75b3622 ] + +Using syzkaller alongside the newly reintroduced signed integer overflow +sanitizer spits out this report: + +UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:461:16 +9223372036854775807 + 500 cannot be represented in type 'long' +Call Trace: + handle_overflow+0x171/0x1b0 + second_overflow+0x2d6/0x500 + accumulate_nsecs_to_secs+0x60/0x160 + timekeeping_advance+0x1fe/0x890 + update_wall_time+0x10/0x30 + +time_maxerror is unconditionally incremented and the result is checked +against NTP_PHASE_LIMIT, but the increment itself can overflow, resulting +in wrap-around to negative space. + +Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user +supplied value was sanity checked to be in the operating range. That change +removed the sanity check and relied on clamping in handle_overflow() which +does not work correctly when the user supplied value is in the overflow +zone of the '+ 500' operation. + +The operation requires CAP_SYS_TIME and the side effect of the overflow is +NTP getting out of sync. + +Miroslav confirmed that the input value should be clamped to the operating +range and the same applies to time_esterror. The latter is not used by the +kernel, but the value still should be in the operating range as it was +before the sanity check got removed. + +Clamp them to the operating range. + +[ tglx: Changed it to clamping and included time_esterror ] + +Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") +Signed-off-by: Justin Stitt +Signed-off-by: Thomas Gleixner +Cc: Miroslav Lichvar +Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-usec-v2-1-d539180f2b79@google.com +Closes: https://github.com/KSPP/linux/issues/354 +Signed-off-by: Sasha Levin +--- + kernel/time/ntp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c +index 406dccb79c2b6..502e1e5b7f7f6 100644 +--- a/kernel/time/ntp.c ++++ b/kernel/time/ntp.c +@@ -727,10 +727,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, + } + + if (txc->modes & ADJ_MAXERROR) +- time_maxerror = txc->maxerror; ++ time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT); + + if (txc->modes & ADJ_ESTERROR) +- time_esterror = txc->esterror; ++ time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); + + if (txc->modes & ADJ_TIMECONST) { + time_constant = txc->constant; +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index dd8e042c661..8a1ede12018 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -116,3 +116,6 @@ alsa-hda-realtek-add-framework-laptop-13-intel-core-ultra-to-quirks.patch alsa-hda-hdmi-yet-more-pin-fix-for-hp-elitedesk-800-g4.patch usb-vhci-hcd-do-not-drop-references-before-new-references-are-gained.patch usb-serial-debug-do-not-echo-input-by-default.patch +ntp-clamp-maxerror-and-esterror-to-operating-range.patch +clocksource-scale-the-watchdog-read-retries-automati.patch +clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch -- 2.47.3