From: Greg Kroah-Hartman Date: Thu, 13 Mar 2025 16:02:41 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v6.6.84~69 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=84a4fe139cc46cbf364d77c60b60f7fe68ce34da;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch --- diff --git a/queue-5.4/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch b/queue-5.4/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch new file mode 100644 index 0000000000..f95a5bd577 --- /dev/null +++ b/queue-5.4/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch @@ -0,0 +1,237 @@ +From 531b2ca0a940ac9db03f246c8b77c4201de72b00 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Fri, 2 Aug 2024 14:55:55 +0100 +Subject: clockevents/drivers/i8253: Fix stop sequence for timer 0 + +From: David Woodhouse + +commit 531b2ca0a940ac9db03f246c8b77c4201de72b00 upstream. + +According to the data sheet, writing the MODE register should stop the +counter (and thus the interrupts). This appears to work on real hardware, +at least modern Intel and AMD systems. It should also work on Hyper-V. + +However, on some buggy virtual machines the mode change doesn't have any +effect until the counter is subsequently loaded (or perhaps when the IRQ +next fires). + +So, set MODE 0 and then load the counter, to ensure that those buggy VMs +do the right thing and the interrupts stop. And then write MODE 0 *again* +to stop the counter on compliant implementations too. + +Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero +when it should only happen once, but the second MODE write stops that too. + +Userspace test program (mostly written by tglx): +===== + #include + #include + #include + #include + #include + +static __always_inline void __out##bwl(type value, uint16_t port) \ +{ \ + asm volatile("out" #bwl " %" #bw "0, %w1" \ + : : "a"(value), "Nd"(port)); \ +} \ + \ +static __always_inline type __in##bwl(uint16_t port) \ +{ \ + type value; \ + asm volatile("in" #bwl " %w1, %" #bw "0" \ + : "=a"(value) : "Nd"(port)); \ + return value; \ +} + +BUILDIO(b, b, uint8_t) + + #define inb __inb + #define outb __outb + + #define PIT_MODE 0x43 + #define PIT_CH0 0x40 + #define PIT_CH2 0x42 + +static int is8254; + +static void dump_pit(void) +{ + if (is8254) { + // Latch and output counter and status + outb(0xC2, PIT_MODE); + printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0)); + } else { + // Latch and output counter + outb(0x0, PIT_MODE); + printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0)); + } +} + +int main(int argc, char* argv[]) +{ + int nr_counts = 2; + + if (argc > 1) + nr_counts = atoi(argv[1]); + + if (argc > 2) + is8254 = 1; + + if (ioperm(0x40, 4, 1) != 0) + return 1; + + dump_pit(); + + printf("Set oneshot\n"); + outb(0x38, PIT_MODE); + outb(0x00, PIT_CH0); + outb(0x0F, PIT_CH0); + + dump_pit(); + usleep(1000); + dump_pit(); + + printf("Set periodic\n"); + outb(0x34, PIT_MODE); + outb(0x00, PIT_CH0); + outb(0x0F, PIT_CH0); + + dump_pit(); + usleep(1000); + dump_pit(); + dump_pit(); + usleep(100000); + dump_pit(); + usleep(100000); + dump_pit(); + + printf("Set stop (%d counter writes)\n", nr_counts); + outb(0x30, PIT_MODE); + while (nr_counts--) + outb(0xFF, PIT_CH0); + + dump_pit(); + usleep(100000); + dump_pit(); + usleep(100000); + dump_pit(); + + printf("Set MODE 0\n"); + outb(0x30, PIT_MODE); + + dump_pit(); + usleep(100000); + dump_pit(); + usleep(100000); + dump_pit(); + + return 0; +} +===== + +Suggested-by: Sean Christopherson +Co-developed-by: Li RongQing +Signed-off-by: Li RongQing +Signed-off-by: David Woodhouse +Signed-off-by: Thomas Gleixner +Tested-by: Michael Kelley +Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/mshyperv.c | 11 ----------- + drivers/clocksource/i8253.c | 36 +++++++++++++++++++++++++----------- + include/linux/i8253.h | 1 - + 3 files changed, 25 insertions(+), 23 deletions(-) + +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -16,7 +16,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -310,16 +309,6 @@ static void __init ms_hyperv_init_platfo + if (efi_enabled(EFI_BOOT)) + x86_platform.get_nmi_reason = hv_get_nmi_reason; + +- /* +- * Hyper-V VMs have a PIT emulation quirk such that zeroing the +- * counter register during PIT shutdown restarts the PIT. So it +- * continues to interrupt @18.2 HZ. Setting i8253_clear_counter +- * to false tells pit_shutdown() not to zero the counter so that +- * the PIT really is shutdown. Generation 2 VMs don't have a PIT, +- * and setting this value has no effect. +- */ +- i8253_clear_counter_on_shutdown = false; +- + #if IS_ENABLED(CONFIG_HYPERV) + /* + * Setup the hook to get control post apic initialization. +--- a/drivers/clocksource/i8253.c ++++ b/drivers/clocksource/i8253.c +@@ -20,13 +20,6 @@ + DEFINE_RAW_SPINLOCK(i8253_lock); + EXPORT_SYMBOL(i8253_lock); + +-/* +- * Handle PIT quirk in pit_shutdown() where zeroing the counter register +- * restarts the PIT, negating the shutdown. On platforms with the quirk, +- * platform specific code can set this to false. +- */ +-bool i8253_clear_counter_on_shutdown __ro_after_init = true; +- + #ifdef CONFIG_CLKSRC_I8253 + /* + * Since the PIT overflows every tick, its not very useful +@@ -112,12 +105,33 @@ void clockevent_i8253_disable(void) + { + raw_spin_lock(&i8253_lock); + ++ /* ++ * Writing the MODE register should stop the counter, according to ++ * the datasheet. This appears to work on real hardware (well, on ++ * modern Intel and AMD boxes; I didn't dig the Pegasos out of the ++ * shed). ++ * ++ * However, some virtual implementations differ, and the MODE change ++ * doesn't have any effect until either the counter is written (KVM ++ * in-kernel PIT) or the next interrupt (QEMU). And in those cases, ++ * it may not stop the *count*, only the interrupts. Although in ++ * the virt case, that probably doesn't matter, as the value of the ++ * counter will only be calculated on demand if the guest reads it; ++ * it's the interrupts which cause steal time. ++ * ++ * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps ++ * firing repeatedly if the counter is running. But it *does* do the ++ * right thing when the MODE register is written. ++ * ++ * So: write the MODE and then load the counter, which ensures that ++ * the IRQ is stopped on those buggy virt implementations. And then ++ * write the MODE again, which is the right way to stop it. ++ */ + outb_p(0x30, PIT_MODE); ++ outb_p(0, PIT_CH0); ++ outb_p(0, PIT_CH0); + +- if (i8253_clear_counter_on_shutdown) { +- outb_p(0, PIT_CH0); +- outb_p(0, PIT_CH0); +- } ++ outb_p(0x30, PIT_MODE); + + raw_spin_unlock(&i8253_lock); + } +--- a/include/linux/i8253.h ++++ b/include/linux/i8253.h +@@ -21,7 +21,6 @@ + #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) + + extern raw_spinlock_t i8253_lock; +-extern bool i8253_clear_counter_on_shutdown; + extern struct clock_event_device i8253_clockevent; + extern void clockevent_i8253_init(bool oneshot); + extern void clockevent_i8253_disable(void); diff --git a/queue-5.4/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch b/queue-5.4/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch new file mode 100644 index 0000000000..883e8a925f --- /dev/null +++ b/queue-5.4/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch @@ -0,0 +1,67 @@ +From 5097cbcb38e6e0d2627c9dde1985e91d2c9f880e Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Thu, 11 Apr 2024 16:39:05 +0200 +Subject: sched/isolation: Prevent boot crash when the boot CPU is nohz_full + +From: Oleg Nesterov + +Documentation/timers/no_hz.rst states that the "nohz_full=" mask must not +include the boot CPU, which is no longer true after: + + 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full"). + +However after: + + aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work") + +the kernel will crash at boot time in this case; housekeeping_any_cpu() +returns an invalid CPU number until smp_init() brings the first +housekeeping CPU up. + +Change housekeeping_any_cpu() to check the result of cpumask_any_and() and +return smp_processor_id() in this case. + +This is just the simple and backportable workaround which fixes the +symptom, but smp_processor_id() at boot time should be safe at least for +type == HK_TYPE_TIMER, this more or less matches the tick_do_timer_boot_cpu +logic. + +There is no worry about cpu_down(); tick_nohz_cpu_down() will not allow to +offline tick_do_timer_cpu (the 1st online housekeeping CPU). + +[ Apply only documentation changes as commit which causes boot + crash when boot CPU is nohz_full is not backported to stable + kernels - Krishanth ] + +Reported-by: Chris von Recklinghausen +Signed-off-by: Oleg Nesterov +Signed-off-by: Thomas Gleixner +Signed-off-by: Ingo Molnar +Reviewed-by: Phil Auld +Acked-by: Frederic Weisbecker +Link: https://lore.kernel.org/r/20240411143905.GA19288@redhat.com +Closes: https://lore.kernel.org/all/20240402105847.GA24832@redhat.com/ +Signed-off-by: Krishanth Jagaduri +[ strip out upstream commit and Fixes: so tools don't get confused that + this commit actually does anything real - gregkh] +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/timers/no_hz.rst | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/Documentation/timers/no_hz.rst ++++ b/Documentation/timers/no_hz.rst +@@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-ad + online to handle timekeeping tasks in order to ensure that system + calls like gettimeofday() returns accurate values on adaptive-tick CPUs. + (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running +-user processes to observe slight drifts in clock rate.) Therefore, the +-boot CPU is prohibited from entering adaptive-ticks mode. Specifying a +-"nohz_full=" mask that includes the boot CPU will result in a boot-time +-error message, and the boot CPU will be removed from the mask. Note that +-this means that your system must have at least two CPUs in order for ++user processes to observe slight drifts in clock rate.) Note that this ++means that your system must have at least two CPUs in order for + CONFIG_NO_HZ_FULL=y to do anything for you. + + Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. diff --git a/queue-5.4/series b/queue-5.4/series index 90efacb7ec..038082ecb7 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -1 +1,3 @@ vlan-fix-memory-leak-in-vlan_newlink.patch +clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch +sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch