]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Mar 2025 16:02:53 +0000 (17:02 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Mar 2025 16:02:53 +0000 (17:02 +0100)
added patches:
clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch

queue-5.10/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch [new file with mode: 0644]
queue-5.10/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch b/queue-5.10/clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
new file mode 100644 (file)
index 0000000..510791e
--- /dev/null
@@ -0,0 +1,237 @@
+From 531b2ca0a940ac9db03f246c8b77c4201de72b00 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Fri, 2 Aug 2024 14:55:55 +0100
+Subject: clockevents/drivers/i8253: Fix stop sequence for timer 0
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 531b2ca0a940ac9db03f246c8b77c4201de72b00 upstream.
+
+According to the data sheet, writing the MODE register should stop the
+counter (and thus the interrupts). This appears to work on real hardware,
+at least modern Intel and AMD systems. It should also work on Hyper-V.
+
+However, on some buggy virtual machines the mode change doesn't have any
+effect until the counter is subsequently loaded (or perhaps when the IRQ
+next fires).
+
+So, set MODE 0 and then load the counter, to ensure that those buggy VMs
+do the right thing and the interrupts stop. And then write MODE 0 *again*
+to stop the counter on compliant implementations too.
+
+Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero
+when it should only happen once, but the second MODE write stops that too.
+
+Userspace test program (mostly written by tglx):
+=====
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <stdlib.h>
+ #include <stdint.h>
+ #include <sys/io.h>
+
+static __always_inline void __out##bwl(type value, uint16_t port)      \
+{                                                                      \
+       asm volatile("out" #bwl " %" #bw "0, %w1"                       \
+                    : : "a"(value), "Nd"(port));                       \
+}                                                                      \
+                                                                       \
+static __always_inline type __in##bwl(uint16_t port)                   \
+{                                                                      \
+       type value;                                                     \
+       asm volatile("in" #bwl " %w1, %" #bw "0"                        \
+                    : "=a"(value) : "Nd"(port));                       \
+       return value;                                                   \
+}
+
+BUILDIO(b, b, uint8_t)
+
+ #define inb __inb
+ #define outb __outb
+
+ #define PIT_MODE      0x43
+ #define PIT_CH0       0x40
+ #define PIT_CH2       0x42
+
+static int is8254;
+
+static void dump_pit(void)
+{
+       if (is8254) {
+               // Latch and output counter and status
+               outb(0xC2, PIT_MODE);
+               printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0));
+       } else {
+               // Latch and output counter
+               outb(0x0, PIT_MODE);
+               printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0));
+       }
+}
+
+int main(int argc, char* argv[])
+{
+       int nr_counts = 2;
+
+       if (argc > 1)
+               nr_counts = atoi(argv[1]);
+
+       if (argc > 2)
+               is8254 = 1;
+
+       if (ioperm(0x40, 4, 1) != 0)
+               return 1;
+
+       dump_pit();
+
+       printf("Set oneshot\n");
+       outb(0x38, PIT_MODE);
+       outb(0x00, PIT_CH0);
+       outb(0x0F, PIT_CH0);
+
+       dump_pit();
+       usleep(1000);
+       dump_pit();
+
+       printf("Set periodic\n");
+       outb(0x34, PIT_MODE);
+       outb(0x00, PIT_CH0);
+       outb(0x0F, PIT_CH0);
+
+       dump_pit();
+       usleep(1000);
+       dump_pit();
+       dump_pit();
+       usleep(100000);
+       dump_pit();
+       usleep(100000);
+       dump_pit();
+
+       printf("Set stop (%d counter writes)\n", nr_counts);
+       outb(0x30, PIT_MODE);
+       while (nr_counts--)
+               outb(0xFF, PIT_CH0);
+
+       dump_pit();
+       usleep(100000);
+       dump_pit();
+       usleep(100000);
+       dump_pit();
+
+       printf("Set MODE 0\n");
+       outb(0x30, PIT_MODE);
+
+       dump_pit();
+       usleep(100000);
+       dump_pit();
+       usleep(100000);
+       dump_pit();
+
+       return 0;
+}
+=====
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Co-developed-by: Li RongQing <lirongqing@baidu.com>
+Signed-off-by: Li RongQing <lirongqing@baidu.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Michael Kelley <mhkelley@outlook.com>
+Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mshyperv.c |   11 -----------
+ drivers/clocksource/i8253.c    |   36 +++++++++++++++++++++++++-----------
+ include/linux/i8253.h          |    1 -
+ 3 files changed, 25 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -16,7 +16,6 @@
+ #include <linux/interrupt.h>
+ #include <linux/irq.h>
+ #include <linux/kexec.h>
+-#include <linux/i8253.h>
+ #include <linux/random.h>
+ #include <asm/processor.h>
+ #include <asm/hypervisor.h>
+@@ -389,16 +388,6 @@ static void __init ms_hyperv_init_platfo
+       if (efi_enabled(EFI_BOOT))
+               x86_platform.get_nmi_reason = hv_get_nmi_reason;
+-      /*
+-       * Hyper-V VMs have a PIT emulation quirk such that zeroing the
+-       * counter register during PIT shutdown restarts the PIT. So it
+-       * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
+-       * to false tells pit_shutdown() not to zero the counter so that
+-       * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
+-       * and setting this value has no effect.
+-       */
+-      i8253_clear_counter_on_shutdown = false;
+-
+ #if IS_ENABLED(CONFIG_HYPERV)
+       /*
+        * Setup the hook to get control post apic initialization.
+--- a/drivers/clocksource/i8253.c
++++ b/drivers/clocksource/i8253.c
+@@ -20,13 +20,6 @@
+ DEFINE_RAW_SPINLOCK(i8253_lock);
+ EXPORT_SYMBOL(i8253_lock);
+-/*
+- * Handle PIT quirk in pit_shutdown() where zeroing the counter register
+- * restarts the PIT, negating the shutdown. On platforms with the quirk,
+- * platform specific code can set this to false.
+- */
+-bool i8253_clear_counter_on_shutdown __ro_after_init = true;
+-
+ #ifdef CONFIG_CLKSRC_I8253
+ /*
+  * Since the PIT overflows every tick, its not very useful
+@@ -112,12 +105,33 @@ void clockevent_i8253_disable(void)
+ {
+       raw_spin_lock(&i8253_lock);
++      /*
++       * Writing the MODE register should stop the counter, according to
++       * the datasheet. This appears to work on real hardware (well, on
++       * modern Intel and AMD boxes; I didn't dig the Pegasos out of the
++       * shed).
++       *
++       * However, some virtual implementations differ, and the MODE change
++       * doesn't have any effect until either the counter is written (KVM
++       * in-kernel PIT) or the next interrupt (QEMU). And in those cases,
++       * it may not stop the *count*, only the interrupts. Although in
++       * the virt case, that probably doesn't matter, as the value of the
++       * counter will only be calculated on demand if the guest reads it;
++       * it's the interrupts which cause steal time.
++       *
++       * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
++       * firing repeatedly if the counter is running. But it *does* do the
++       * right thing when the MODE register is written.
++       *
++       * So: write the MODE and then load the counter, which ensures that
++       * the IRQ is stopped on those buggy virt implementations. And then
++       * write the MODE again, which is the right way to stop it.
++       */
+       outb_p(0x30, PIT_MODE);
++      outb_p(0, PIT_CH0);
++      outb_p(0, PIT_CH0);
+-      if (i8253_clear_counter_on_shutdown) {
+-              outb_p(0, PIT_CH0);
+-              outb_p(0, PIT_CH0);
+-      }
++      outb_p(0x30, PIT_MODE);
+       raw_spin_unlock(&i8253_lock);
+ }
+--- a/include/linux/i8253.h
++++ b/include/linux/i8253.h
+@@ -21,7 +21,6 @@
+ #define PIT_LATCH     ((PIT_TICK_RATE + HZ/2) / HZ)
+ extern raw_spinlock_t i8253_lock;
+-extern bool i8253_clear_counter_on_shutdown;
+ extern struct clock_event_device i8253_clockevent;
+ extern void clockevent_i8253_init(bool oneshot);
+ extern void clockevent_i8253_disable(void);
diff --git a/queue-5.10/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch b/queue-5.10/sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch
new file mode 100644 (file)
index 0000000..883e8a9
--- /dev/null
@@ -0,0 +1,67 @@
+From 5097cbcb38e6e0d2627c9dde1985e91d2c9f880e Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 11 Apr 2024 16:39:05 +0200
+Subject: sched/isolation: Prevent boot crash when the boot CPU is nohz_full
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+Documentation/timers/no_hz.rst states that the "nohz_full=" mask must not
+include the boot CPU, which is no longer true after:
+
+  08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full").
+
+However after:
+
+  aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work")
+
+the kernel will crash at boot time in this case; housekeeping_any_cpu()
+returns an invalid CPU number until smp_init() brings the first
+housekeeping CPU up.
+
+Change housekeeping_any_cpu() to check the result of cpumask_any_and() and
+return smp_processor_id() in this case.
+
+This is just the simple and backportable workaround which fixes the
+symptom, but smp_processor_id() at boot time should be safe at least for
+type == HK_TYPE_TIMER, this more or less matches the tick_do_timer_boot_cpu
+logic.
+
+There is no worry about cpu_down(); tick_nohz_cpu_down() will not allow to
+offline tick_do_timer_cpu (the 1st online housekeeping CPU).
+
+[ Apply only documentation changes as commit which causes boot
+  crash when boot CPU is nohz_full is not backported to stable
+  kernels - Krishanth ]
+
+Reported-by: Chris von Recklinghausen <crecklin@redhat.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Acked-by: Frederic Weisbecker <frederic@kernel.org>
+Link: https://lore.kernel.org/r/20240411143905.GA19288@redhat.com
+Closes: https://lore.kernel.org/all/20240402105847.GA24832@redhat.com/
+Signed-off-by: Krishanth Jagaduri <Krishanth.Jagaduri@sony.com>
+[ strip out upstream commit and Fixes: so tools don't get confused that
+  this commit actually does anything real - gregkh]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/timers/no_hz.rst |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/Documentation/timers/no_hz.rst
++++ b/Documentation/timers/no_hz.rst
+@@ -129,11 +129,8 @@ adaptive-tick CPUs:  At least one non-ad
+ online to handle timekeeping tasks in order to ensure that system
+ calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
+ (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
+-user processes to observe slight drifts in clock rate.)  Therefore, the
+-boot CPU is prohibited from entering adaptive-ticks mode.  Specifying a
+-"nohz_full=" mask that includes the boot CPU will result in a boot-time
+-error message, and the boot CPU will be removed from the mask.  Note that
+-this means that your system must have at least two CPUs in order for
++user processes to observe slight drifts in clock rate.) Note that this
++means that your system must have at least two CPUs in order for
+ CONFIG_NO_HZ_FULL=y to do anything for you.
+ Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
index 90efacb7ecef64f8297dc95dc7d90f3e7be6e0f9..038082ecb73b200deee52496015a316d86eaf018 100644 (file)
@@ -1 +1,3 @@
 vlan-fix-memory-leak-in-vlan_newlink.patch
+clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
+sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch