x86/mce: Carve out the crashing_cpu check

author Borislav Petkov <bp@suse.de>

Fri, 22 Jun 2018 09:54:24 +0000 (11:54 +0200)

committer Thomas Gleixner <tglx@linutronix.de>

Fri, 22 Jun 2018 12:37:22 +0000 (14:37 +0200)
author Borislav Petkov <bp@suse.de>
Fri, 22 Jun 2018 09:54:24 +0000 (11:54 +0200)
committer Thomas Gleixner <tglx@linutronix.de>
Fri, 22 Jun 2018 12:37:22 +0000 (14:37 +0200)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c

index d62201e40027d9e177d267b53683277d042b0dd8..18804834dbc057bc4bb4abbf2055872c1d88e537 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1104,6 +1104,34 @@ static void mce_unmap_kpfn(unsigned long pfn)
  }
  #endif
  
+
+/*
+ * Cases where we avoid rendezvous handler timeout:
+ * 1) If this CPU is offline.
+ *
+ * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
+ *  skip those CPUs which remain looping in the 1st kernel - see
+ *  crash_nmi_callback().
+ *
+ * Note: there still is a small window between kexec-ing and the new,
+ * kdump kernel establishing a new #MC handler where a broadcasted MCE
+ * might not get handled properly.
+ */
+static bool __mc_check_crashing_cpu(int cpu)
+{
+       if (cpu_is_offline(cpu) ||
+           (crashing_cpu != -1 && crashing_cpu != cpu)) {
+               u64 mcgstatus;
+
+               mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+               if (mcgstatus & MCG_STATUS_RIPV) {
+                       mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+                       return true;
+               }
+       }
+       return false;
+}
+
  /*
   * The actual machine check handler. This only handles real
   * exceptions when something got corrupted coming in through int 18.
@@ -1118,60 +1146,42 @@ static void mce_unmap_kpfn(unsigned long pfn)
   */
  void do_machine_check(struct pt_regs *regs, long error_code)
  {
+       DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
+       DECLARE_BITMAP(toclear, MAX_NR_BANKS);
         struct mca_config *cfg = &mca_cfg;
+       int cpu = smp_processor_id();
+       char *msg = "Unknown";
         struct mce m, *final;
-       int i;
         int worst = 0;
         int severity;
+       int i;
  
         /*
          * Establish sequential order between the CPUs entering the machine
          * check handler.
          */
         int order = -1;
+
         /*
          * If no_way_out gets set, there is no safe way to recover from this
          * MCE.  If mca_cfg.tolerant is cranked up, we'll try anyway.
          */
         int no_way_out = 0;
+
         /*
          * If kill_it gets set, there might be a way to recover from this
          * error.
          */
         int kill_it = 0;
-       DECLARE_BITMAP(toclear, MAX_NR_BANKS);
-       DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
-       char *msg = "Unknown";
  
         /*
          * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
          * on Intel.
          */
         int lmce = 1;
-       int cpu = smp_processor_id();
-
-       /*
-        * Cases where we avoid rendezvous handler timeout:
-        * 1) If this CPU is offline.
-        *
-        * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
-        *  skip those CPUs which remain looping in the 1st kernel - see
-        *  crash_nmi_callback().
-        *
-        * Note: there still is a small window between kexec-ing and the new,
-        * kdump kernel establishing a new #MC handler where a broadcasted MCE
-        * might not get handled properly.
-        */
-       if (cpu_is_offline(cpu) ||
-           (crashing_cpu != -1 && crashing_cpu != cpu)) {
-               u64 mcgstatus;
  
-               mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
-               if (mcgstatus & MCG_STATUS_RIPV) {
-                       mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-                       return;
-               }
-       }
+       if (__mc_check_crashing_cpu(cpu))
+               return;
  
         ist_enter(regs);
author	Borislav Petkov <bp@suse.de>
	Fri, 22 Jun 2018 09:54:24 +0000 (11:54 +0200)
committer	Thomas Gleixner <tglx@linutronix.de>
	Fri, 22 Jun 2018 12:37:22 +0000 (14:37 +0200)