x86/nmi: Add an emergency handler in nmi_desc & use it in nmi_shootdown_cpus()

author Waiman Long <longman@redhat.com>

Thu, 6 Feb 2025 19:18:44 +0000 (14:18 -0500)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jun 2025 12:38:00 +0000 (14:38 +0200)
author Waiman Long <longman@redhat.com>
Thu, 6 Feb 2025 19:18:44 +0000 (14:18 -0500)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jun 2025 12:38:00 +0000 (14:38 +0200)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h

index 1cb9c17a4cb4b1fba49646749ee2a6400ab6fb93..affe5522961aed6a1960a0ce76da910a525e959d 100644 (file)
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -58,6 +58,8 @@ int __register_nmi_handler(unsigned int, struct nmiaction *);
  
  void unregister_nmi_handler(unsigned int, const char *);
  
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
+
  void stop_nmi(void);
  void restart_nmi(void);
  void local_touch_nmi(void);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index b892fe7035db5f61929a46f76f943b9b749b06b2..a858d8e5d610416948866028f7a6596e098309a1 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -38,8 +38,12 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/nmi.h>
  
+/*
+ * An emergency handler can be set in any context including NMI
+ */
  struct nmi_desc {
         raw_spinlock_t lock;
+       nmi_handler_t emerg_handler;
         struct list_head head;
  };
  
@@ -121,9 +125,22 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
  static int nmi_handle(unsigned int type, struct pt_regs *regs)
  {
         struct nmi_desc *desc = nmi_to_desc(type);
+       nmi_handler_t ehandler;
         struct nmiaction *a;
         int handled=0;
  
+       /*
+        * Call the emergency handler, if set
+        *
+        * In the case of crash_nmi_callback() emergency handler, it will
+        * return in the case of the crashing CPU to enable it to complete
+        * other necessary crashing actions ASAP. Other handlers in the
+        * linked list won't need to be run.
+        */
+       ehandler = desc->emerg_handler;
+       if (ehandler)
+               return ehandler(type, regs);
+
         rcu_read_lock();
  
         /*
@@ -209,6 +226,31 @@ void unregister_nmi_handler(unsigned int type, const char *name)
  }
  EXPORT_SYMBOL_GPL(unregister_nmi_handler);
  
+/**
+ * set_emergency_nmi_handler - Set emergency handler
+ * @type:    NMI type
+ * @handler: the emergency handler to be stored
+ *
+ * Set an emergency NMI handler which, if set, will preempt all the other
+ * handlers in the linked list. If a NULL handler is passed in, it will clear
+ * it. It is expected that concurrent calls to this function will not happen
+ * or the system is screwed beyond repair.
+ */
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler)
+{
+       struct nmi_desc *desc = nmi_to_desc(type);
+
+       if (WARN_ON_ONCE(desc->emerg_handler == handler))
+               return;
+       desc->emerg_handler = handler;
+
+       /*
+        * Ensure the emergency handler is visible to other CPUs before
+        * function return
+        */
+       smp_wmb();
+}
+
  static void
  pci_serr_error(unsigned char reason, struct pt_regs *regs)
  {
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c

index deedd77c7593f0a89a5e4b30eba6fa1404d4abbd..d8f7f8e43e19984a5c84d3487f46587e232e8eb8 100644 (file)
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -874,15 +874,11 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
         shootdown_callback = callback;
  
         atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-       /* Would it be better to replace the trap vector here? */
-       if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
-                                NMI_FLAG_FIRST, "crash"))
-               return;         /* Return what? */
+
         /*
-        * Ensure the new callback function is set before sending
-        * out the NMI
+        * Set emergency handler to preempt other handlers.
          */
-       wmb();
+       set_emergency_nmi_handler(NMI_LOCAL, crash_nmi_callback);
  
         apic_send_IPI_allbutself(NMI_VECTOR);
author	Waiman Long <longman@redhat.com>
	Thu, 6 Feb 2025 19:18:44 +0000 (14:18 -0500)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jun 2025 12:38:00 +0000 (14:38 +0200)
arch/x86/include/asm/nmi.h		patch \| blob \| blame \| history
arch/x86/kernel/nmi.c		patch \| blob \| blame \| history
arch/x86/kernel/reboot.c		patch \| blob \| blame \| history