]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Jun 2018 02:09:16 +0000 (11:09 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Jun 2018 02:09:16 +0000 (11:09 +0900)
added patches:
x86-call-fixup_exception-before-notify_die-in-math_error.patch
x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch
x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch
x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch
x86-mce-improve-error-message-when-kernel-cannot-recover.patch
x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch

queue-4.9/x86-call-fixup_exception-before-notify_die-in-math_error.patch [new file with mode: 0644]
queue-4.9/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch [new file with mode: 0644]
queue-4.9/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch [new file with mode: 0644]
queue-4.9/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch [new file with mode: 0644]
queue-4.9/x86-mce-improve-error-message-when-kernel-cannot-recover.patch [new file with mode: 0644]
queue-4.9/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch [new file with mode: 0644]

diff --git a/queue-4.9/x86-call-fixup_exception-before-notify_die-in-math_error.patch b/queue-4.9/x86-call-fixup_exception-before-notify_die-in-math_error.patch
new file mode 100644 (file)
index 0000000..e1cf159
--- /dev/null
@@ -0,0 +1,58 @@
+From 3ae6295ccb7cf6d344908209701badbbbb503e40 Mon Sep 17 00:00:00 2001
+From: Siarhei Liakh <Siarhei.Liakh@concurrent-rt.com>
+Date: Thu, 14 Jun 2018 19:36:07 +0000
+Subject: x86: Call fixup_exception() before notify_die() in math_error()
+
+From: Siarhei Liakh <Siarhei.Liakh@concurrent-rt.com>
+
+commit 3ae6295ccb7cf6d344908209701badbbbb503e40 upstream.
+
+fpu__drop() has an explicit fwait which under some conditions can trigger a
+fixable FPU exception while in kernel. Thus, we should attempt to fixup the
+exception first, and only call notify_die() if the fixup failed just like
+in do_general_protection(). The original call sequence incorrectly triggers
+KDB entry on debug kernels under particular FPU-intensive workloads.
+
+Andy noted, that this makes the whole conditional irq enable thing even
+more inconsistent, but fixing that it outside the scope of this.
+
+Signed-off-by: Siarhei Liakh <siarhei.liakh@concurrent-rt.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Andy Lutomirski <luto@kernel.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: "Borislav  Petkov" <bpetkov@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/DM5PR11MB201156F1CAB2592B07C79A03B17D0@DM5PR11MB2011.namprd11.prod.outlook.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/traps.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -799,16 +799,18 @@ static void math_error(struct pt_regs *r
+       char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
+                                               "simd exception";
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
+-              return;
+       cond_local_irq_enable(regs);
+       if (!user_mode(regs)) {
+-              if (!fixup_exception(regs, trapnr)) {
+-                      task->thread.error_code = error_code;
+-                      task->thread.trap_nr = trapnr;
++              if (fixup_exception(regs, trapnr))
++                      return;
++
++              task->thread.error_code = error_code;
++              task->thread.trap_nr = trapnr;
++
++              if (notify_die(DIE_TRAP, str, regs, error_code,
++                                      trapnr, SIGFPE) != NOTIFY_STOP)
+                       die(str, regs, error_code);
+-              }
+               return;
+       }
diff --git a/queue-4.9/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch b/queue-4.9/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch
new file mode 100644 (file)
index 0000000..eef4316
--- /dev/null
@@ -0,0 +1,57 @@
+From 4c5717da1d021cf368eabb3cb1adcaead56c0d1e Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 25 May 2018 14:42:09 -0700
+Subject: x86/mce: Check for alternate indication of machine check recovery on Skylake
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 4c5717da1d021cf368eabb3cb1adcaead56c0d1e upstream.
+
+Currently we just check the "CAPID0" register to see whether the CPU
+can recover from machine checks.
+
+But there are also some special SKUs which do not have all advanced
+RAS features, but do enable machine check recovery for use with NVDIMMs.
+
+Add a check for any of bits {8:5} in the "CAPID5" register (each
+reports some NVDIMM mode available, if any of them are set, then
+the system supports memory machine check recovery).
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: stable@vger.kernel.org # 4.9
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/quirks.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/quirks.c
++++ b/arch/x86/kernel/quirks.c
+@@ -643,12 +643,19 @@ static void quirk_intel_brickland_xeon_r
+ /* Skylake */
+ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
+ {
+-      u32 capid0;
++      u32 capid0, capid5;
+       pci_read_config_dword(pdev, 0x84, &capid0);
++      pci_read_config_dword(pdev, 0x98, &capid5);
+-      if ((capid0 & 0xc0) == 0xc0)
++      /*
++       * CAPID0{7:6} indicate whether this is an advanced RAS SKU
++       * CAPID5{8:5} indicate that various NVDIMM usage modes are
++       * enabled, so memory machine check recovery is also enabled.
++       */
++      if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
+               static_branch_inc(&mcsafe_key);
++
+ }
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
diff --git a/queue-4.9/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch b/queue-4.9/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch
new file mode 100644 (file)
index 0000000..ba0190e
--- /dev/null
@@ -0,0 +1,78 @@
+From 1f74c8a64798e2c488f86efc97e308b85fb7d7aa Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 22 Jun 2018 11:54:28 +0200
+Subject: x86/mce: Do not overwrite MCi_STATUS in mce_no_way_out()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 1f74c8a64798e2c488f86efc97e308b85fb7d7aa upstream.
+
+mce_no_way_out() does a quick check during #MC to see whether some of
+the MCEs logged would require the kernel to panic immediately. And it
+passes a struct mce where MCi_STATUS gets written.
+
+However, after having saved a valid status value, the next iteration
+of the loop which goes over the MCA banks on the CPU, overwrites the
+valid status value because we're using struct mce as storage instead of
+a temporary variable.
+
+Which leads to MCE records with an empty status value:
+
+  mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000
+  mce: [Hardware Error]: RIP 10:<ffffffffbd42fbd7> {trigger_mce+0x7/0x10}
+
+In order to prevent the loss of the status register value, return
+immediately when severity is a panic one so that we can panic
+immediately with the first fatal MCE logged. This is also the intention
+of this function and not to noodle over the banks while a fatal MCE is
+already logged.
+
+Tony: read the rest of the MCA bank to populate the struct mce fully.
+
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -738,23 +738,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
+ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+                         struct pt_regs *regs)
+ {
+-      int i, ret = 0;
+       char *tmp;
++      int i;
+       for (i = 0; i < mca_cfg.banks; i++) {
+               m->status = mce_rdmsrl(msr_ops.status(i));
+-              if (m->status & MCI_STATUS_VAL) {
+-                      __set_bit(i, validp);
+-                      if (quirk_no_way_out)
+-                              quirk_no_way_out(i, m, regs);
+-              }
++              if (!(m->status & MCI_STATUS_VAL))
++                      continue;
++
++              __set_bit(i, validp);
++              if (quirk_no_way_out)
++                      quirk_no_way_out(i, m, regs);
+               if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
++                      mce_read_aux(m, i);
+                       *msg = tmp;
+-                      ret = 1;
++                      return 1;
+               }
+       }
+-      return ret;
++      return 0;
+ }
+ /*
diff --git a/queue-4.9/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch b/queue-4.9/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch
new file mode 100644 (file)
index 0000000..aca43f1
--- /dev/null
@@ -0,0 +1,100 @@
+From 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 22 Jun 2018 11:54:23 +0200
+Subject: x86/mce: Fix incorrect "Machine check from unknown source" message
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 upstream.
+
+Some injection testing resulted in the following console log:
+
+  mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134
+  mce: [Hardware Error]: RIP 10:<ffffffffc05292dd> {pmem_do_bvec+0x11d/0x330 [nd_pmem]}
+  mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88
+  mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043
+  mce: [Hardware Error]: Run the above through 'mcelog --ascii'
+  Kernel panic - not syncing: Machine check from unknown source
+
+This confused everybody because the first line quite clearly shows
+that we found a logged error in "Bank 1", while the last line says
+"unknown source".
+
+The problem is that the Linux code doesn't do the right thing
+for a local machine check that results in a fatal error.
+
+It turns out that we know very early in the handler whether the
+machine check is fatal. The call to mce_no_way_out() has checked
+all the banks for the CPU that took the local machine check. If
+it says we must crash, we can do so right away with the right
+messages.
+
+We do scan all the banks again. This means that we might initially
+not see a problem, but during the second scan find something fatal.
+If this happens we print a slightly different message (so I can
+see if it actually every happens).
+
+[ bp: Remove unneeded severity assignment. ]
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: linux-edac <linux-edac@vger.kernel.org>
+Cc: stable@vger.kernel.org # 4.2
+Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce.c |   26 ++++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -1140,13 +1140,18 @@ void do_machine_check(struct pt_regs *re
+               lmce = m.mcgstatus & MCG_STATUS_LMCES;
+       /*
++       * Local machine check may already know that we have to panic.
++       * Broadcast machine check begins rendezvous in mce_start()
+        * Go through all banks in exclusion of the other CPUs. This way we
+        * don't report duplicated events on shared banks because the first one
+-       * to see it will clear it. If this is a Local MCE, then no need to
+-       * perform rendezvous.
++       * to see it will clear it.
+        */
+-      if (!lmce)
++      if (lmce) {
++              if (no_way_out)
++                      mce_panic("Fatal local machine check", &m, msg);
++      } else {
+               order = mce_start(&no_way_out);
++      }
+       for (i = 0; i < cfg->banks; i++) {
+               __clear_bit(i, toclear);
+@@ -1222,12 +1227,17 @@ void do_machine_check(struct pt_regs *re
+                       no_way_out = worst >= MCE_PANIC_SEVERITY;
+       } else {
+               /*
+-               * Local MCE skipped calling mce_reign()
+-               * If we found a fatal error, we need to panic here.
++               * If there was a fatal machine check we should have
++               * already called mce_panic earlier in this function.
++               * Since we re-read the banks, we might have found
++               * something new. Check again to see if we found a
++               * fatal error. We call "mce_severity()" again to
++               * make sure we have the right "msg".
+                */
+-               if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+-                      mce_panic("Machine check from unknown source",
+-                              NULL, NULL);
++              if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
++                      mce_severity(&m, cfg->tolerant, &msg, true);
++                      mce_panic("Local fatal machine check!", &m, msg);
++              }
+       }
+       /*
diff --git a/queue-4.9/x86-mce-improve-error-message-when-kernel-cannot-recover.patch b/queue-4.9/x86-mce-improve-error-message-when-kernel-cannot-recover.patch
new file mode 100644 (file)
index 0000000..d2e6074
--- /dev/null
@@ -0,0 +1,56 @@
+From c7d606f560e4c698884697fef503e4abacdd8c25 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 25 May 2018 14:41:39 -0700
+Subject: x86/mce: Improve error message when kernel cannot recover
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit c7d606f560e4c698884697fef503e4abacdd8c25 upstream.
+
+Since we added support to add recovery from some errors inside the kernel in:
+
+commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")
+
+we have done a less than stellar job at reporting the cause of recoverable
+machine checks that occur in other parts of the kernel. The user just gets
+the unhelpful message:
+
+       mce: [Hardware Error]: Machine check: Action required: unknown MCACOD
+
+doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD
+and see that it is listed as one of the standard recoverable values.
+
+Add an extra rule to the MCE severity table to catch this case and report it
+as:
+
+       mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel
+
+Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: stable@vger.kernel.org # 4.6+
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce-severity.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
++++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
+@@ -143,6 +143,11 @@ static struct severity {
+               SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+               USER
+               ),
++      MCESEV(
++              PANIC, "Data load in unrecoverable area of kernel",
++              SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
++              KERNEL
++              ),
+ #endif
+       MCESEV(
+               PANIC, "Action required: unknown MCACOD",
diff --git a/queue-4.9/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch b/queue-4.9/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch
new file mode 100644 (file)
index 0000000..187ca9e
--- /dev/null
@@ -0,0 +1,81 @@
+From eab6870fee877258122a042bfd99ee7908c40280 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 7 Jun 2018 09:13:48 -0700
+Subject: x86/spectre_v1: Disable compiler optimizations over array_index_mask_nospec()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit eab6870fee877258122a042bfd99ee7908c40280 upstream.
+
+Mark Rutland noticed that GCC optimization passes have the potential to elide
+necessary invocations of the array_index_mask_nospec() instruction sequence,
+so mark the asm() volatile.
+
+Mark explains:
+
+"The volatile will inhibit *some* cases where the compiler could lift the
+ array_index_nospec() call out of a branch, e.g. where there are multiple
+ invocations of array_index_nospec() with the same arguments:
+
+        if (idx < foo) {
+                idx1 = array_idx_nospec(idx, foo)
+                do_something(idx1);
+        }
+
+        < some other code >
+
+        if (idx < foo) {
+                idx2 = array_idx_nospec(idx, foo);
+                do_something_else(idx2);
+        }
+
+ ... since the compiler can determine that the two invocations yield the same
+ result, and reuse the first result (likely the same register as idx was in
+ originally) for the second branch, effectively re-writing the above as:
+
+        if (idx < foo) {
+                idx = array_idx_nospec(idx, foo);
+                do_something(idx);
+        }
+
+        < some other code >
+
+        if (idx < foo) {
+                do_something_else(idx);
+        }
+
+ ... if we don't take the first branch, then speculatively take the second, we
+ lose the nospec protection.
+
+ There's more info on volatile asm in the GCC docs:
+
+   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile
+ "
+
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: <stable@vger.kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec")
+Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/barrier.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -37,7 +37,7 @@ static inline unsigned long array_index_
+ {
+       unsigned long mask;
+-      asm ("cmp %1,%2; sbb %0,%0;"
++      asm volatile ("cmp %1,%2; sbb %0,%0;"
+                       :"=r" (mask)
+                       :"g"(size),"r" (index)
+                       :"cc");