From 707a2030442c79c914b749251eb1e5db33231d33 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Mon, 12 Aug 2024 06:01:52 -0400
Subject: [PATCH] Fixes for 6.6

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...brown-bag-boolean-thinko-in-cs_watch.patch |  43 ++++
 ...e-the-watchdog-read-retries-automati.patch | 194 ++++++++++++++++++
 ...rror-and-esterror-to-operating-range.patch |  74 +++++++
 queue-6.6/series                              |   3 +
 4 files changed, 314 insertions(+)
 create mode 100644 queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch
 create mode 100644 queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch
 create mode 100644 queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch

diff --git a/queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch b/queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch
new file mode 100644
index 00000000000..c70d112106e
--- /dev/null
+++ b/queue-6.6/clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch
@@ -0,0 +1,43 @@
+From 619a99f4c228359ee2df08389fc413c96f6397bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Aug 2024 08:46:15 -0700
+Subject: clocksource: Fix brown-bag boolean thinko in cs_watchdog_read()
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit f2655ac2c06a15558e51ed6529de280e1553c86e ]
+
+The current "nretries > 1 || nretries >= max_retries" check in
+cs_watchdog_read() will always evaluate to true, and thus pr_warn(), if
+nretries is greater than 1.  The intent is instead to never warn on the
+first try, but otherwise warn if the successful retry was the last retry.
+
+Therefore, change that "||" to "&&".
+
+Fixes: db3a34e17433 ("clocksource: Retry clock read if long delays detected")
+Reported-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240802154618.4149953-2-paulmck@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/clocksource.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index c95080f005dd4..3260bbe98894b 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -238,7 +238,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
+ 		wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
+ 					      watchdog->shift);
+ 		if (wd_delay <= WATCHDOG_MAX_SKEW) {
+-			if (nretries > 1 || nretries >= max_retries) {
++			if (nretries > 1 && nretries >= max_retries) {
+ 				pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+ 					smp_processor_id(), watchdog->name, nretries);
+ 			}
+-- 
+2.43.0
+
diff --git a/queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch b/queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch
new file mode 100644
index 00000000000..0da0a387e96
--- /dev/null
+++ b/queue-6.6/clocksource-scale-the-watchdog-read-retries-automati.patch
@@ -0,0 +1,194 @@
+From 3cf0c03c8f2e815543a2478727d1777a40cc46e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Feb 2024 14:08:59 +0800
+Subject: clocksource: Scale the watchdog read retries automatically
+
+From: Feng Tang <feng.tang@intel.com>
+
+[ Upstream commit 2ed08e4bc53298db3f87b528cd804cb0cce066a9 ]
+
+On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled
+during boot time on about one out of 120 boot attempts:
+
+    clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns,
+    wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable
+    tsc: Marking TSC unstable due to clocksource watchdog
+    TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
+    sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152)
+    clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896.
+    clocksource: Switched to clocksource hpet
+
+The reason is that for platform with a large number of CPUs, there are
+sporadic big or huge read latencies while reading the watchog/clocksource
+during boot or when system is under stress work load, and the frequency and
+maximum value of the latency goes up with the number of online CPUs.
+
+The cCurrent code already has logic to detect and filter such high latency
+case by reading the watchdog twice and checking the two deltas. Due to the
+randomness of the latency, there is a low probabilty that the first delta
+(latency) is big, but the second delta is small and looks valid. The
+watchdog code retries the readouts by default twice, which is not
+necessarily sufficient for systems with a large number of CPUs.
+
+There is a command line parameter 'max_cswd_read_retries' which allows to
+increase the number of retries, but that's not user friendly as it needs to
+be tweaked per system. As the number of required retries is proportional to
+the number of online CPUs, this parameter can be calculated at runtime.
+
+Scale and enlarge the number of retries according to the number of online
+CPUs and remove the command line parameter completely.
+
+[ tglx: Massaged change log and comments ]
+
+Signed-off-by: Feng Tang <feng.tang@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Jin Wang <jin1.wang@intel.com>
+Tested-by: Paul E. McKenney <paulmck@kernel.org>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
+Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com
+Stable-dep-of: f2655ac2c06a ("clocksource: Fix brown-bag boolean thinko in cs_watchdog_read()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt   |  6 ------
+ include/linux/clocksource.h                       | 14 +++++++++++++-
+ kernel/time/clocksource-wdtest.c                  | 13 +++++++------
+ kernel/time/clocksource.c                         | 10 ++++------
+ tools/testing/selftests/rcutorture/bin/torture.sh |  2 +-
+ 5 files changed, 25 insertions(+), 20 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 83b1795335e53..a7fe113897361 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -664,12 +664,6 @@
+ 			loops can be debugged more effectively on production
+ 			systems.
+ 
+-	clocksource.max_cswd_read_retries= [KNL]
+-			Number of clocksource_watchdog() retries due to
+-			external delays before the clock will be marked
+-			unstable.  Defaults to two retries, that is,
+-			three attempts to read the clock under test.
+-
+ 	clocksource.verify_n_cpus= [KNL]
+ 			Limit the number of CPUs checked for clocksources
+ 			marked with CLOCK_SOURCE_VERIFY_PERCPU that
+diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
+index 1d42d4b173271..0ad8b550bb4b4 100644
+--- a/include/linux/clocksource.h
++++ b/include/linux/clocksource.h
+@@ -291,7 +291,19 @@ static inline void timer_probe(void) {}
+ #define TIMER_ACPI_DECLARE(name, table_id, fn)		\
+ 	ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
+ 
+-extern ulong max_cswd_read_retries;
++static inline unsigned int clocksource_get_max_watchdog_retry(void)
++{
++	/*
++	 * When system is in the boot phase or under heavy workload, there
++	 * can be random big latencies during the clocksource/watchdog
++	 * read, so allow retries to filter the noise latency. As the
++	 * latency's frequency and maximum value goes up with the number of
++	 * CPUs, scale the number of retries with the number of online
++	 * CPUs.
++	 */
++	return (ilog2(num_online_cpus()) / 2) + 1;
++}
++
+ void clocksource_verify_percpu(struct clocksource *cs);
+ 
+ #endif /* _LINUX_CLOCKSOURCE_H */
+diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c
+index df922f49d171b..d06185e054ea2 100644
+--- a/kernel/time/clocksource-wdtest.c
++++ b/kernel/time/clocksource-wdtest.c
+@@ -104,8 +104,8 @@ static void wdtest_ktime_clocksource_reset(void)
+ static int wdtest_func(void *arg)
+ {
+ 	unsigned long j1, j2;
++	int i, max_retries;
+ 	char *s;
+-	int i;
+ 
+ 	schedule_timeout_uninterruptible(holdoff * HZ);
+ 
+@@ -139,18 +139,19 @@ static int wdtest_func(void *arg)
+ 	WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC));
+ 
+ 	/* Verify tsc-like stability with various numbers of errors injected. */
+-	for (i = 0; i <= max_cswd_read_retries + 1; i++) {
+-		if (i <= 1 && i < max_cswd_read_retries)
++	max_retries = clocksource_get_max_watchdog_retry();
++	for (i = 0; i <= max_retries + 1; i++) {
++		if (i <= 1 && i < max_retries)
+ 			s = "";
+-		else if (i <= max_cswd_read_retries)
++		else if (i <= max_retries)
+ 			s = ", expect message";
+ 		else
+ 			s = ", expect clock skew";
+-		pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s);
++		pr_info("--- Watchdog with %dx error injection, %d retries%s.\n", i, max_retries, s);
+ 		WRITE_ONCE(wdtest_ktime_read_ndelays, i);
+ 		schedule_timeout_uninterruptible(2 * HZ);
+ 		WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
+-		WARN_ON_ONCE((i <= max_cswd_read_retries) !=
++		WARN_ON_ONCE((i <= max_retries) !=
+ 			     !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
+ 		wdtest_ktime_clocksource_reset();
+ 	}
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index 3052b1f1168e2..c95080f005dd4 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -210,9 +210,6 @@ void clocksource_mark_unstable(struct clocksource *cs)
+ 	spin_unlock_irqrestore(&watchdog_lock, flags);
+ }
+ 
+-ulong max_cswd_read_retries = 2;
+-module_param(max_cswd_read_retries, ulong, 0644);
+-EXPORT_SYMBOL_GPL(max_cswd_read_retries);
+ static int verify_n_cpus = 8;
+ module_param(verify_n_cpus, int, 0644);
+ 
+@@ -224,11 +221,12 @@ enum wd_read_status {
+ 
+ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+ {
+-	unsigned int nretries;
++	unsigned int nretries, max_retries;
+ 	u64 wd_end, wd_end2, wd_delta;
+ 	int64_t wd_delay, wd_seq_delay;
+ 
+-	for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
++	max_retries = clocksource_get_max_watchdog_retry();
++	for (nretries = 0; nretries <= max_retries; nretries++) {
+ 		local_irq_disable();
+ 		*wdnow = watchdog->read(watchdog);
+ 		*csnow = cs->read(cs);
+@@ -240,7 +238,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
+ 		wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
+ 					      watchdog->shift);
+ 		if (wd_delay <= WATCHDOG_MAX_SKEW) {
+-			if (nretries > 1 || nretries >= max_cswd_read_retries) {
++			if (nretries > 1 || nretries >= max_retries) {
+ 				pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+ 					smp_processor_id(), watchdog->name, nretries);
+ 			}
+diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
+index 12b50a4a881ac..89a82f6f140ef 100755
+--- a/tools/testing/selftests/rcutorture/bin/torture.sh
++++ b/tools/testing/selftests/rcutorture/bin/torture.sh
+@@ -567,7 +567,7 @@ then
+ 	torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
+ 	torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+ 
+-	torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1 tsc=watchdog"
++	torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
+ 	torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+ 
+ 	# In case our work is already done...
+-- 
+2.43.0
+
diff --git a/queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch b/queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch
new file mode 100644
index 00000000000..e6a2f893bf2
--- /dev/null
+++ b/queue-6.6/ntp-clamp-maxerror-and-esterror-to-operating-range.patch
@@ -0,0 +1,74 @@
+From 238bcb463244a5614d61dc73a0ba5928bdc57c4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 20:22:44 +0000
+Subject: ntp: Clamp maxerror and esterror to operating range
+
+From: Justin Stitt <justinstitt@google.com>
+
+[ Upstream commit 87d571d6fb77ec342a985afa8744bb9bb75b3622 ]
+
+Using syzkaller alongside the newly reintroduced signed integer overflow
+sanitizer spits out this report:
+
+UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:461:16
+9223372036854775807 + 500 cannot be represented in type 'long'
+Call Trace:
+ handle_overflow+0x171/0x1b0
+ second_overflow+0x2d6/0x500
+ accumulate_nsecs_to_secs+0x60/0x160
+ timekeeping_advance+0x1fe/0x890
+ update_wall_time+0x10/0x30
+
+time_maxerror is unconditionally incremented and the result is checked
+against NTP_PHASE_LIMIT, but the increment itself can overflow, resulting
+in wrap-around to negative space.
+
+Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user
+supplied value was sanity checked to be in the operating range. That change
+removed the sanity check and relied on clamping in handle_overflow() which
+does not work correctly when the user supplied value is in the overflow
+zone of the '+ 500' operation.
+
+The operation requires CAP_SYS_TIME and the side effect of the overflow is
+NTP getting out of sync.
+
+Miroslav confirmed that the input value should be clamped to the operating
+range and the same applies to time_esterror. The latter is not used by the
+kernel, but the value still should be in the operating range as it was
+before the sanity check got removed.
+
+Clamp them to the operating range.
+
+[ tglx: Changed it to clamping and included time_esterror ]
+
+Fixes: eea83d896e31 ("ntp: NTP4 user space bits update")
+Signed-off-by: Justin Stitt <justinstitt@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Miroslav Lichvar <mlichvar@redhat.com>
+Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-usec-v2-1-d539180f2b79@google.com
+Closes: https://github.com/KSPP/linux/issues/354
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/ntp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
+index 406dccb79c2b6..502e1e5b7f7f6 100644
+--- a/kernel/time/ntp.c
++++ b/kernel/time/ntp.c
+@@ -727,10 +727,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
+ 	}
+ 
+ 	if (txc->modes & ADJ_MAXERROR)
+-		time_maxerror = txc->maxerror;
++		time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT);
+ 
+ 	if (txc->modes & ADJ_ESTERROR)
+-		time_esterror = txc->esterror;
++		time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT);
+ 
+ 	if (txc->modes & ADJ_TIMECONST) {
+ 		time_constant = txc->constant;
+-- 
+2.43.0
+
diff --git a/queue-6.6/series b/queue-6.6/series
index dd8e042c661..8a1ede12018 100644
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -116,3 +116,6 @@ alsa-hda-realtek-add-framework-laptop-13-intel-core-ultra-to-quirks.patch
 alsa-hda-hdmi-yet-more-pin-fix-for-hp-elitedesk-800-g4.patch
 usb-vhci-hcd-do-not-drop-references-before-new-references-are-gained.patch
 usb-serial-debug-do-not-echo-input-by-default.patch
+ntp-clamp-maxerror-and-esterror-to-operating-range.patch
+clocksource-scale-the-watchdog-read-retries-automati.patch
+clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch
-- 
2.47.3