From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 13 Mar 2025 16:02:41 +0000 (+0100)
Subject: 5.4-stable patches
X-Git-Tag: v6.6.84~69
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=84a4fe139cc46cbf364d77c60b60f7fe68ce34da;p=thirdparty%2Fkernel%2Fstable-queue.git

5.4-stable patches

added patches:
	clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
	sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch
---

diff --git a/queue-5.4/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch b/queue-5.4/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
new file mode 100644
index 0000000000..f95a5bd577
--- /dev/null
+++ b/queue-5.4/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
@@ -0,0 +1,237 @@
+From 531b2ca0a940ac9db03f246c8b77c4201de72b00 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Fri, 2 Aug 2024 14:55:55 +0100
+Subject: clockevents/drivers/i8253: Fix stop sequence for timer 0
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 531b2ca0a940ac9db03f246c8b77c4201de72b00 upstream.
+
+According to the data sheet, writing the MODE register should stop the
+counter (and thus the interrupts). This appears to work on real hardware,
+at least modern Intel and AMD systems. It should also work on Hyper-V.
+
+However, on some buggy virtual machines the mode change doesn't have any
+effect until the counter is subsequently loaded (or perhaps when the IRQ
+next fires).
+
+So, set MODE 0 and then load the counter, to ensure that those buggy VMs
+do the right thing and the interrupts stop. And then write MODE 0 *again*
+to stop the counter on compliant implementations too.
+
+Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero
+when it should only happen once, but the second MODE write stops that too.
+
+Userspace test program (mostly written by tglx):
+=====
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <stdlib.h>
+ #include <stdint.h>
+ #include <sys/io.h>
+
+static __always_inline void __out##bwl(type value, uint16_t port)	\
+{									\
+	asm volatile("out" #bwl " %" #bw "0, %w1"			\
+		     : : "a"(value), "Nd"(port));			\
+}									\
+									\
+static __always_inline type __in##bwl(uint16_t port)			\
+{									\
+	type value;							\
+	asm volatile("in" #bwl " %w1, %" #bw "0"			\
+		     : "=a"(value) : "Nd"(port));			\
+	return value;							\
+}
+
+BUILDIO(b, b, uint8_t)
+
+ #define inb __inb
+ #define outb __outb
+
+ #define PIT_MODE	0x43
+ #define PIT_CH0	0x40
+ #define PIT_CH2	0x42
+
+static int is8254;
+
+static void dump_pit(void)
+{
+	if (is8254) {
+		// Latch and output counter and status
+		outb(0xC2, PIT_MODE);
+		printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0));
+	} else {
+		// Latch and output counter
+		outb(0x0, PIT_MODE);
+		printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0));
+	}
+}
+
+int main(int argc, char* argv[])
+{
+	int nr_counts = 2;
+
+	if (argc > 1)
+		nr_counts = atoi(argv[1]);
+
+	if (argc > 2)
+		is8254 = 1;
+
+	if (ioperm(0x40, 4, 1) != 0)
+		return 1;
+
+	dump_pit();
+
+	printf("Set oneshot\n");
+	outb(0x38, PIT_MODE);
+	outb(0x00, PIT_CH0);
+	outb(0x0F, PIT_CH0);
+
+	dump_pit();
+	usleep(1000);
+	dump_pit();
+
+	printf("Set periodic\n");
+	outb(0x34, PIT_MODE);
+	outb(0x00, PIT_CH0);
+	outb(0x0F, PIT_CH0);
+
+	dump_pit();
+	usleep(1000);
+	dump_pit();
+	dump_pit();
+	usleep(100000);
+	dump_pit();
+	usleep(100000);
+	dump_pit();
+
+	printf("Set stop (%d counter writes)\n", nr_counts);
+	outb(0x30, PIT_MODE);
+	while (nr_counts--)
+		outb(0xFF, PIT_CH0);
+
+	dump_pit();
+	usleep(100000);
+	dump_pit();
+	usleep(100000);
+	dump_pit();
+
+	printf("Set MODE 0\n");
+	outb(0x30, PIT_MODE);
+
+	dump_pit();
+	usleep(100000);
+	dump_pit();
+	usleep(100000);
+	dump_pit();
+
+	return 0;
+}
+=====
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Co-developed-by: Li RongQing <lirongqing@baidu.com>
+Signed-off-by: Li RongQing <lirongqing@baidu.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Michael Kelley <mhkelley@outlook.com>
+Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mshyperv.c |   11 -----------
+ drivers/clocksource/i8253.c    |   36 +++++++++++++++++++++++++-----------
+ include/linux/i8253.h          |    1 -
+ 3 files changed, 25 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -16,7 +16,6 @@
+ #include <linux/interrupt.h>
+ #include <linux/irq.h>
+ #include <linux/kexec.h>
+-#include <linux/i8253.h>
+ #include <linux/random.h>
+ #include <asm/processor.h>
+ #include <asm/hypervisor.h>
+@@ -310,16 +309,6 @@ static void __init ms_hyperv_init_platfo
+ 	if (efi_enabled(EFI_BOOT))
+ 		x86_platform.get_nmi_reason = hv_get_nmi_reason;
+ 
+-	/*
+-	 * Hyper-V VMs have a PIT emulation quirk such that zeroing the
+-	 * counter register during PIT shutdown restarts the PIT. So it
+-	 * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
+-	 * to false tells pit_shutdown() not to zero the counter so that
+-	 * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
+-	 * and setting this value has no effect.
+-	 */
+-	i8253_clear_counter_on_shutdown = false;
+-
+ #if IS_ENABLED(CONFIG_HYPERV)
+ 	/*
+ 	 * Setup the hook to get control post apic initialization.
+--- a/drivers/clocksource/i8253.c
++++ b/drivers/clocksource/i8253.c
+@@ -20,13 +20,6 @@
+ DEFINE_RAW_SPINLOCK(i8253_lock);
+ EXPORT_SYMBOL(i8253_lock);
+ 
+-/*
+- * Handle PIT quirk in pit_shutdown() where zeroing the counter register
+- * restarts the PIT, negating the shutdown. On platforms with the quirk,
+- * platform specific code can set this to false.
+- */
+-bool i8253_clear_counter_on_shutdown __ro_after_init = true;
+-
+ #ifdef CONFIG_CLKSRC_I8253
+ /*
+  * Since the PIT overflows every tick, its not very useful
+@@ -112,12 +105,33 @@ void clockevent_i8253_disable(void)
+ {
+ 	raw_spin_lock(&i8253_lock);
+ 
++	/*
++	 * Writing the MODE register should stop the counter, according to
++	 * the datasheet. This appears to work on real hardware (well, on
++	 * modern Intel and AMD boxes; I didn't dig the Pegasos out of the
++	 * shed).
++	 *
++	 * However, some virtual implementations differ, and the MODE change
++	 * doesn't have any effect until either the counter is written (KVM
++	 * in-kernel PIT) or the next interrupt (QEMU). And in those cases,
++	 * it may not stop the *count*, only the interrupts. Although in
++	 * the virt case, that probably doesn't matter, as the value of the
++	 * counter will only be calculated on demand if the guest reads it;
++	 * it's the interrupts which cause steal time.
++	 *
++	 * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
++	 * firing repeatedly if the counter is running. But it *does* do the
++	 * right thing when the MODE register is written.
++	 *
++	 * So: write the MODE and then load the counter, which ensures that
++	 * the IRQ is stopped on those buggy virt implementations. And then
++	 * write the MODE again, which is the right way to stop it.
++	 */
+ 	outb_p(0x30, PIT_MODE);
++	outb_p(0, PIT_CH0);
++	outb_p(0, PIT_CH0);
+ 
+-	if (i8253_clear_counter_on_shutdown) {
+-		outb_p(0, PIT_CH0);
+-		outb_p(0, PIT_CH0);
+-	}
++	outb_p(0x30, PIT_MODE);
+ 
+ 	raw_spin_unlock(&i8253_lock);
+ }
+--- a/include/linux/i8253.h
++++ b/include/linux/i8253.h
+@@ -21,7 +21,6 @@
+ #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
+ 
+ extern raw_spinlock_t i8253_lock;
+-extern bool i8253_clear_counter_on_shutdown;
+ extern struct clock_event_device i8253_clockevent;
+ extern void clockevent_i8253_init(bool oneshot);
+ extern void clockevent_i8253_disable(void);
diff --git a/queue-5.4/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch b/queue-5.4/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch
new file mode 100644
index 0000000000..883e8a925f
--- /dev/null
+++ b/queue-5.4/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch
@@ -0,0 +1,67 @@
+From 5097cbcb38e6e0d2627c9dde1985e91d2c9f880e Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 11 Apr 2024 16:39:05 +0200
+Subject: sched/isolation: Prevent boot crash when the boot CPU is nohz_full
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+Documentation/timers/no_hz.rst states that the "nohz_full=" mask must not
+include the boot CPU, which is no longer true after:
+
+  08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full").
+
+However after:
+
+  aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work")
+
+the kernel will crash at boot time in this case; housekeeping_any_cpu()
+returns an invalid CPU number until smp_init() brings the first
+housekeeping CPU up.
+
+Change housekeeping_any_cpu() to check the result of cpumask_any_and() and
+return smp_processor_id() in this case.
+
+This is just the simple and backportable workaround which fixes the
+symptom, but smp_processor_id() at boot time should be safe at least for
+type == HK_TYPE_TIMER, this more or less matches the tick_do_timer_boot_cpu
+logic.
+
+There is no worry about cpu_down(); tick_nohz_cpu_down() will not allow to
+offline tick_do_timer_cpu (the 1st online housekeeping CPU).
+
+[ Apply only documentation changes as commit which causes boot
+  crash when boot CPU is nohz_full is not backported to stable
+  kernels - Krishanth ]
+
+Reported-by: Chris von Recklinghausen <crecklin@redhat.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Acked-by: Frederic Weisbecker <frederic@kernel.org>
+Link: https://lore.kernel.org/r/20240411143905.GA19288@redhat.com
+Closes: https://lore.kernel.org/all/20240402105847.GA24832@redhat.com/
+Signed-off-by: Krishanth Jagaduri <Krishanth.Jagaduri@sony.com>
+[ strip out upstream commit and Fixes: so tools don't get confused that
+  this commit actually does anything real - gregkh]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/timers/no_hz.rst |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/Documentation/timers/no_hz.rst
++++ b/Documentation/timers/no_hz.rst
+@@ -129,11 +129,8 @@ adaptive-tick CPUs:  At least one non-ad
+ online to handle timekeeping tasks in order to ensure that system
+ calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
+ (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
+-user processes to observe slight drifts in clock rate.)  Therefore, the
+-boot CPU is prohibited from entering adaptive-ticks mode.  Specifying a
+-"nohz_full=" mask that includes the boot CPU will result in a boot-time
+-error message, and the boot CPU will be removed from the mask.  Note that
+-this means that your system must have at least two CPUs in order for
++user processes to observe slight drifts in clock rate.) Note that this
++means that your system must have at least two CPUs in order for
+ CONFIG_NO_HZ_FULL=y to do anything for you.
+ 
+ Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
diff --git a/queue-5.4/series b/queue-5.4/series
index 90efacb7ec..038082ecb7 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -1 +1,3 @@
 vlan-fix-memory-leak-in-vlan_newlink.patch
+clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
+sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch