4.14-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 28 Jun 2018 02:08:29 +0000 (11:08 +0900)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 28 Jun 2018 02:08:29 +0000 (11:08 +0900)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Jun 2018 02:08:29 +0000 (11:08 +0900)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Jun 2018 02:08:29 +0000 (11:08 +0900)
diff --git a/queue-4.14/series b/queue-4.14/series

new file mode 100644 (file)

index 0000000..c9693dd
--- /dev/null
+++ b/queue-4.14/series
@@ -0,0 +1,9 @@
+x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch
+x86-xen-add-call-of-speculative_store_bypass_ht_init-to-pv-paths.patch
+x86-platform-uv-use-new-set-memory-block-size-function.patch
+x86-platform-uv-add-kernel-parameter-to-set-memory-block-size.patch
+x86-mce-improve-error-message-when-kernel-cannot-recover.patch
+x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch
+x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch
+x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch
+x86-call-fixup_exception-before-notify_die-in-math_error.patch
diff --git a/queue-4.14/x86-call-fixup_exception-before-notify_die-in-math_error.patch b/queue-4.14/x86-call-fixup_exception-before-notify_die-in-math_error.patch

new file mode 100644 (file)

index 0000000..6a81d6a
--- /dev/null
+++ b/queue-4.14/x86-call-fixup_exception-before-notify_die-in-math_error.patch
@@ -0,0 +1,58 @@
+From 3ae6295ccb7cf6d344908209701badbbbb503e40 Mon Sep 17 00:00:00 2001
+From: Siarhei Liakh <Siarhei.Liakh@concurrent-rt.com>
+Date: Thu, 14 Jun 2018 19:36:07 +0000
+Subject: x86: Call fixup_exception() before notify_die() in math_error()
+
+From: Siarhei Liakh <Siarhei.Liakh@concurrent-rt.com>
+
+commit 3ae6295ccb7cf6d344908209701badbbbb503e40 upstream.
+
+fpu__drop() has an explicit fwait which under some conditions can trigger a
+fixable FPU exception while in kernel. Thus, we should attempt to fixup the
+exception first, and only call notify_die() if the fixup failed just like
+in do_general_protection(). The original call sequence incorrectly triggers
+KDB entry on debug kernels under particular FPU-intensive workloads.
+
+Andy noted, that this makes the whole conditional irq enable thing even
+more inconsistent, but fixing that it outside the scope of this.
+
+Signed-off-by: Siarhei Liakh <siarhei.liakh@concurrent-rt.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Andy Lutomirski <luto@kernel.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: "Borislav  Petkov" <bpetkov@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/DM5PR11MB201156F1CAB2592B07C79A03B17D0@DM5PR11MB2011.namprd11.prod.outlook.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/traps.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -828,16 +828,18 @@ static void math_error(struct pt_regs *r
+       char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
+                                               "simd exception";
+ 
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
+-              return;
+       cond_local_irq_enable(regs);
+ 
+       if (!user_mode(regs)) {
+-              if (!fixup_exception(regs, trapnr)) {
+-                      task->thread.error_code = error_code;
+-                      task->thread.trap_nr = trapnr;
++              if (fixup_exception(regs, trapnr))
++                      return;
++
++              task->thread.error_code = error_code;
++              task->thread.trap_nr = trapnr;
++
++              if (notify_die(DIE_TRAP, str, regs, error_code,
++                                      trapnr, SIGFPE) != NOTIFY_STOP)
+                       die(str, regs, error_code);
+-              }
+               return;
+       }
+ 
diff --git a/queue-4.14/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch b/queue-4.14/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch

new file mode 100644 (file)

index 0000000..569f55e
--- /dev/null
+++ b/queue-4.14/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch
@@ -0,0 +1,57 @@
+From 4c5717da1d021cf368eabb3cb1adcaead56c0d1e Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 25 May 2018 14:42:09 -0700
+Subject: x86/mce: Check for alternate indication of machine check recovery on Skylake
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 4c5717da1d021cf368eabb3cb1adcaead56c0d1e upstream.
+
+Currently we just check the "CAPID0" register to see whether the CPU
+can recover from machine checks.
+
+But there are also some special SKUs which do not have all advanced
+RAS features, but do enable machine check recovery for use with NVDIMMs.
+
+Add a check for any of bits {8:5} in the "CAPID5" register (each
+reports some NVDIMM mode available, if any of them are set, then
+the system supports memory machine check recovery).
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: stable@vger.kernel.org # 4.9
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/quirks.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/quirks.c
++++ b/arch/x86/kernel/quirks.c
+@@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_r
+ /* Skylake */
+ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
+ {
+-      u32 capid0;
++      u32 capid0, capid5;
+ 
+       pci_read_config_dword(pdev, 0x84, &capid0);
++      pci_read_config_dword(pdev, 0x98, &capid5);
+ 
+-      if ((capid0 & 0xc0) == 0xc0)
++      /*
++       * CAPID0{7:6} indicate whether this is an advanced RAS SKU
++       * CAPID5{8:5} indicate that various NVDIMM usage modes are
++       * enabled, so memory machine check recovery is also enabled.
++       */
++      if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
+               static_branch_inc(&mcsafe_key);
++
+ }
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
diff --git a/queue-4.14/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch b/queue-4.14/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch

new file mode 100644 (file)

index 0000000..7b179b9
--- /dev/null
+++ b/queue-4.14/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch
@@ -0,0 +1,78 @@
+From 1f74c8a64798e2c488f86efc97e308b85fb7d7aa Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 22 Jun 2018 11:54:28 +0200
+Subject: x86/mce: Do not overwrite MCi_STATUS in mce_no_way_out()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 1f74c8a64798e2c488f86efc97e308b85fb7d7aa upstream.
+
+mce_no_way_out() does a quick check during #MC to see whether some of
+the MCEs logged would require the kernel to panic immediately. And it
+passes a struct mce where MCi_STATUS gets written.
+
+However, after having saved a valid status value, the next iteration
+of the loop which goes over the MCA banks on the CPU, overwrites the
+valid status value because we're using struct mce as storage instead of
+a temporary variable.
+
+Which leads to MCE records with an empty status value:
+
+  mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000
+  mce: [Hardware Error]: RIP 10:<ffffffffbd42fbd7> {trigger_mce+0x7/0x10}
+
+In order to prevent the loss of the status register value, return
+immediately when severity is a panic one so that we can panic
+immediately with the first fatal MCE logged. This is also the intention
+of this function and not to noodle over the banks while a fatal MCE is
+already logged.
+
+Tony: read the rest of the MCA bank to populate the struct mce fully.
+
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -760,23 +760,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
+ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+                         struct pt_regs *regs)
+ {
+-      int i, ret = 0;
+       char *tmp;
++      int i;
+ 
+       for (i = 0; i < mca_cfg.banks; i++) {
+               m->status = mce_rdmsrl(msr_ops.status(i));
+-              if (m->status & MCI_STATUS_VAL) {
+-                      __set_bit(i, validp);
+-                      if (quirk_no_way_out)
+-                              quirk_no_way_out(i, m, regs);
+-              }
++              if (!(m->status & MCI_STATUS_VAL))
++                      continue;
++
++              __set_bit(i, validp);
++              if (quirk_no_way_out)
++                      quirk_no_way_out(i, m, regs);
+ 
+               if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
++                      mce_read_aux(m, i);
+                       *msg = tmp;
+-                      ret = 1;
++                      return 1;
+               }
+       }
+-      return ret;
++      return 0;
+ }
+ 
+ /*
diff --git a/queue-4.14/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch b/queue-4.14/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch

new file mode 100644 (file)

index 0000000..7627f7c
--- /dev/null
+++ b/queue-4.14/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch
@@ -0,0 +1,100 @@
+From 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 22 Jun 2018 11:54:23 +0200
+Subject: x86/mce: Fix incorrect "Machine check from unknown source" message
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 upstream.
+
+Some injection testing resulted in the following console log:
+
+  mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134
+  mce: [Hardware Error]: RIP 10:<ffffffffc05292dd> {pmem_do_bvec+0x11d/0x330 [nd_pmem]}
+  mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88
+  mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043
+  mce: [Hardware Error]: Run the above through 'mcelog --ascii'
+  Kernel panic - not syncing: Machine check from unknown source
+
+This confused everybody because the first line quite clearly shows
+that we found a logged error in "Bank 1", while the last line says
+"unknown source".
+
+The problem is that the Linux code doesn't do the right thing
+for a local machine check that results in a fatal error.
+
+It turns out that we know very early in the handler whether the
+machine check is fatal. The call to mce_no_way_out() has checked
+all the banks for the CPU that took the local machine check. If
+it says we must crash, we can do so right away with the right
+messages.
+
+We do scan all the banks again. This means that we might initially
+not see a problem, but during the second scan find something fatal.
+If this happens we print a slightly different message (so I can
+see if it actually every happens).
+
+[ bp: Remove unneeded severity assignment. ]
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: linux-edac <linux-edac@vger.kernel.org>
+Cc: stable@vger.kernel.org # 4.2
+Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce.c |   26 ++++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -1205,13 +1205,18 @@ void do_machine_check(struct pt_regs *re
+               lmce = m.mcgstatus & MCG_STATUS_LMCES;
+ 
+       /*
++       * Local machine check may already know that we have to panic.
++       * Broadcast machine check begins rendezvous in mce_start()
+        * Go through all banks in exclusion of the other CPUs. This way we
+        * don't report duplicated events on shared banks because the first one
+-       * to see it will clear it. If this is a Local MCE, then no need to
+-       * perform rendezvous.
++       * to see it will clear it.
+        */
+-      if (!lmce)
++      if (lmce) {
++              if (no_way_out)
++                      mce_panic("Fatal local machine check", &m, msg);
++      } else {
+               order = mce_start(&no_way_out);
++      }
+ 
+       for (i = 0; i < cfg->banks; i++) {
+               __clear_bit(i, toclear);
+@@ -1287,12 +1292,17 @@ void do_machine_check(struct pt_regs *re
+                       no_way_out = worst >= MCE_PANIC_SEVERITY;
+       } else {
+               /*
+-               * Local MCE skipped calling mce_reign()
+-               * If we found a fatal error, we need to panic here.
++               * If there was a fatal machine check we should have
++               * already called mce_panic earlier in this function.
++               * Since we re-read the banks, we might have found
++               * something new. Check again to see if we found a
++               * fatal error. We call "mce_severity()" again to
++               * make sure we have the right "msg".
+                */
+-               if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+-                      mce_panic("Machine check from unknown source",
+-                              NULL, NULL);
++              if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
++                      mce_severity(&m, cfg->tolerant, &msg, true);
++                      mce_panic("Local fatal machine check!", &m, msg);
++              }
+       }
+ 
+       /*
diff --git a/queue-4.14/x86-mce-improve-error-message-when-kernel-cannot-recover.patch b/queue-4.14/x86-mce-improve-error-message-when-kernel-cannot-recover.patch

new file mode 100644 (file)

index 0000000..d2e6074
--- /dev/null
+++ b/queue-4.14/x86-mce-improve-error-message-when-kernel-cannot-recover.patch
@@ -0,0 +1,56 @@
+From c7d606f560e4c698884697fef503e4abacdd8c25 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 25 May 2018 14:41:39 -0700
+Subject: x86/mce: Improve error message when kernel cannot recover
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit c7d606f560e4c698884697fef503e4abacdd8c25 upstream.
+
+Since we added support to add recovery from some errors inside the kernel in:
+
+commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")
+
+we have done a less than stellar job at reporting the cause of recoverable
+machine checks that occur in other parts of the kernel. The user just gets
+the unhelpful message:
+
+       mce: [Hardware Error]: Machine check: Action required: unknown MCACOD
+
+doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD
+and see that it is listed as one of the standard recoverable values.
+
+Add an extra rule to the MCE severity table to catch this case and report it
+as:
+
+       mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel
+
+Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: stable@vger.kernel.org # 4.6+
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce-severity.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
++++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
+@@ -143,6 +143,11 @@ static struct severity {
+               SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+               USER
+               ),
++      MCESEV(
++              PANIC, "Data load in unrecoverable area of kernel",
++              SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
++              KERNEL
++              ),
+ #endif
+       MCESEV(
+               PANIC, "Action required: unknown MCACOD",
diff --git a/queue-4.14/x86-platform-uv-add-kernel-parameter-to-set-memory-block-size.patch b/queue-4.14/x86-platform-uv-add-kernel-parameter-to-set-memory-block-size.patch

new file mode 100644 (file)

index 0000000..d436832
--- /dev/null
+++ b/queue-4.14/x86-platform-uv-add-kernel-parameter-to-set-memory-block-size.patch
@@ -0,0 +1,57 @@
+From d7609f4210cb716c11abfe2bfb5997191095d00b Mon Sep 17 00:00:00 2001
+From: "mike.travis@hpe.com" <mike.travis@hpe.com>
+Date: Thu, 24 May 2018 15:17:14 -0500
+Subject: x86/platform/UV: Add kernel parameter to set memory block size
+
+From: mike.travis@hpe.com <mike.travis@hpe.com>
+
+commit d7609f4210cb716c11abfe2bfb5997191095d00b upstream.
+
+Add a kernel parameter that allows setting UV memory block size.  This
+is to provide an adjustment for new forms of PMEM and other DIMM memory
+that might require alignment restrictions other than scanning the global
+address table for the required minimum alignment.  The value set will be
+further adjusted by both the GAM range table scan as well as restrictions
+imposed by set_memory_block_size_order().
+
+Signed-off-by: Mike Travis <mike.travis@hpe.com>
+Reviewed-by: Andrew Banman <andrew.banman@hpe.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Russ Anderson <russ.anderson@hpe.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: dan.j.williams@intel.com
+Cc: jgross@suse.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: mhocko@suse.com
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/lkml/20180524201711.854849120@stormcage.americas.sgi.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/x2apic_uv_x.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/x86/kernel/apic/x2apic_uv_x.c
++++ b/arch/x86/kernel/apic/x2apic_uv_x.c
+@@ -350,6 +350,17 @@ EXPORT_SYMBOL(uv_hub_info_version);
+ /* Default UV memory block size is 2GB */
+ static unsigned long mem_block_size = (2UL << 30);
+ 
++/* Kernel parameter to specify UV mem block size */
++static int parse_mem_block_size(char *ptr)
++{
++      unsigned long size = memparse(ptr, NULL);
++
++      /* Size will be rounded down by set_block_size() below */
++      mem_block_size = size;
++      return 0;
++}
++early_param("uv_memblksize", parse_mem_block_size);
++
+ static __init int adj_blksize(u32 lgre)
+ {
+       unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
diff --git a/queue-4.14/x86-platform-uv-use-new-set-memory-block-size-function.patch b/queue-4.14/x86-platform-uv-use-new-set-memory-block-size-function.patch

new file mode 100644 (file)

index 0000000..51ea29a
--- /dev/null
+++ b/queue-4.14/x86-platform-uv-use-new-set-memory-block-size-function.patch
@@ -0,0 +1,139 @@
+From bbbd2b51a2aa0d76b3676271e216cf3647773397 Mon Sep 17 00:00:00 2001
+From: "mike.travis@hpe.com" <mike.travis@hpe.com>
+Date: Thu, 24 May 2018 15:17:13 -0500
+Subject: x86/platform/UV: Use new set memory block size function
+
+From: mike.travis@hpe.com <mike.travis@hpe.com>
+
+commit bbbd2b51a2aa0d76b3676271e216cf3647773397 upstream.
+
+Add a call to the new function to "adjust" the current fixed UV memory
+block size of 2GB so it can be changed to a different physical boundary.
+This accommodates changes in the Intel BIOS, and therefore UV BIOS,
+which now can align boundaries different than the previous UV standard
+of 2GB.  It also flags any UV Global Address boundaries from BIOS that
+cause a change in the mem block size (boundary).
+
+The current boundary of 2GB has been used on UV since the first system
+release in 2009 with Linux 2.6 and has worked fine.  But the new NVDIMM
+persistent memory modules (PMEM), along with the Intel BIOS changes to
+support these modules caused the memory block size boundary to be set
+to a lower limit.  Intel only guarantees that this minimum boundary at
+64MB though the current Linux limit is 128MB.
+
+Note that the default remains 2GB if no changes occur.
+
+Signed-off-by: Mike Travis <mike.travis@hpe.com>
+Reviewed-by: Andrew Banman <andrew.banman@hpe.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Russ Anderson <russ.anderson@hpe.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: dan.j.williams@intel.com
+Cc: jgross@suse.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: mhocko@suse.com
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/lkml/20180524201711.732785782@stormcage.americas.sgi.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/x2apic_uv_x.c |   49 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 46 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/apic/x2apic_uv_x.c
++++ b/arch/x86/kernel/apic/x2apic_uv_x.c
+@@ -26,6 +26,7 @@
+ #include <linux/delay.h>
+ #include <linux/crash_dump.h>
+ #include <linux/reboot.h>
++#include <linux/memory.h>
+ 
+ #include <asm/uv/uv_mmrs.h>
+ #include <asm/uv/uv_hub.h>
+@@ -346,6 +347,40 @@ extern int uv_hub_info_version(void)
+ }
+ EXPORT_SYMBOL(uv_hub_info_version);
+ 
++/* Default UV memory block size is 2GB */
++static unsigned long mem_block_size = (2UL << 30);
++
++static __init int adj_blksize(u32 lgre)
++{
++      unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
++      unsigned long size;
++
++      for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1)
++              if (IS_ALIGNED(base, size))
++                      break;
++
++      if (size >= mem_block_size)
++              return 0;
++
++      mem_block_size = size;
++      return 1;
++}
++
++static __init void set_block_size(void)
++{
++      unsigned int order = ffs(mem_block_size);
++
++      if (order) {
++              /* adjust for ffs return of 1..64 */
++              set_memory_block_size_order(order - 1);
++              pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size);
++      } else {
++              /* bad or zero value, default to 1UL << 31 (2GB) */
++              pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size);
++              set_memory_block_size_order(31);
++      }
++}
++
+ /* Build GAM range lookup table: */
+ static __init void build_uv_gr_table(void)
+ {
+@@ -1144,23 +1179,30 @@ static void __init decode_gam_rng_tbl(un
+                                       << UV_GAM_RANGE_SHFT);
+               int order = 0;
+               char suffix[] = " KMGTPE";
++              int flag = ' ';
+ 
+               while (size > 9999 && order < sizeof(suffix)) {
+                       size /= 1024;
+                       order++;
+               }
+ 
++              /* adjust max block size to current range start */
++              if (gre->type == 1 || gre->type == 2)
++                      if (adj_blksize(lgre))
++                              flag = '*';
++
+               if (!index) {
+                       pr_info("UV: GAM Range Table...\n");
+-                      pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
++                      pr_info("UV:  # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
+               }
+-              pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
++              pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d   %04x  %02x %02x\n",
+                       index++,
+                       (unsigned long)lgre << UV_GAM_RANGE_SHFT,
+                       (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
+-                      size, suffix[order],
++                      flag, size, suffix[order],
+                       gre->type, gre->nasid, gre->sockid, gre->pnode);
+ 
++              /* update to next range start */
+               lgre = gre->limit;
+               if (sock_min > gre->sockid)
+                       sock_min = gre->sockid;
+@@ -1391,6 +1433,7 @@ static void __init uv_system_init_hub(vo
+ 
+       build_socket_tables();
+       build_uv_gr_table();
++      set_block_size();
+       uv_init_hub_info(&hub_info);
+       uv_possible_blades = num_possible_nodes();
+       if (!_node_to_pnode)
diff --git a/queue-4.14/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch b/queue-4.14/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch

new file mode 100644 (file)

index 0000000..a8a48be
--- /dev/null
+++ b/queue-4.14/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch
@@ -0,0 +1,81 @@
+From eab6870fee877258122a042bfd99ee7908c40280 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 7 Jun 2018 09:13:48 -0700
+Subject: x86/spectre_v1: Disable compiler optimizations over array_index_mask_nospec()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit eab6870fee877258122a042bfd99ee7908c40280 upstream.
+
+Mark Rutland noticed that GCC optimization passes have the potential to elide
+necessary invocations of the array_index_mask_nospec() instruction sequence,
+so mark the asm() volatile.
+
+Mark explains:
+
+"The volatile will inhibit *some* cases where the compiler could lift the
+ array_index_nospec() call out of a branch, e.g. where there are multiple
+ invocations of array_index_nospec() with the same arguments:
+
+        if (idx < foo) {
+                idx1 = array_idx_nospec(idx, foo)
+                do_something(idx1);
+        }
+
+        < some other code >
+
+        if (idx < foo) {
+                idx2 = array_idx_nospec(idx, foo);
+                do_something_else(idx2);
+        }
+
+ ... since the compiler can determine that the two invocations yield the same
+ result, and reuse the first result (likely the same register as idx was in
+ originally) for the second branch, effectively re-writing the above as:
+
+        if (idx < foo) {
+                idx = array_idx_nospec(idx, foo);
+                do_something(idx);
+        }
+
+        < some other code >
+
+        if (idx < foo) {
+                do_something_else(idx);
+        }
+
+ ... if we don't take the first branch, then speculatively take the second, we
+ lose the nospec protection.
+
+ There's more info on volatile asm in the GCC docs:
+
+   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile
+ "
+
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: <stable@vger.kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec")
+Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/barrier.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -38,7 +38,7 @@ static inline unsigned long array_index_
+ {
+       unsigned long mask;
+ 
+-      asm ("cmp %1,%2; sbb %0,%0;"
++      asm volatile ("cmp %1,%2; sbb %0,%0;"
+                       :"=r" (mask)
+                       :"g"(size),"r" (index)
+                       :"cc");
diff --git a/queue-4.14/x86-xen-add-call-of-speculative_store_bypass_ht_init-to-pv-paths.patch b/queue-4.14/x86-xen-add-call-of-speculative_store_bypass_ht_init-to-pv-paths.patch

new file mode 100644 (file)

index 0000000..4162ff7
--- /dev/null
+++ b/queue-4.14/x86-xen-add-call-of-speculative_store_bypass_ht_init-to-pv-paths.patch
@@ -0,0 +1,64 @@
+From 74899d92e66663dc7671a8017b3146dcd4735f3b Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 21 Jun 2018 10:43:31 +0200
+Subject: x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 74899d92e66663dc7671a8017b3146dcd4735f3b upstream.
+
+Commit:
+
+  1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")
+
+... added speculative_store_bypass_ht_init() to the per-CPU initialization sequence.
+
+speculative_store_bypass_ht_init() needs to be called on each CPU for
+PV guests, too.
+
+Reported-by: Brian Woods <brian.woods@amd.com>
+Tested-by: Brian Woods <brian.woods@amd.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Cc: <stable@vger.kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boris.ostrovsky@oracle.com
+Cc: xen-devel@lists.xenproject.org
+Fixes: 1f50ddb4f4189243c05926b842dc1a0332195f31 ("x86/speculation: Handle HT correctly on AMD")
+Link: https://lore.kernel.org/lkml/20180621084331.21228-1-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/smp_pv.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/xen/smp_pv.c
++++ b/arch/x86/xen/smp_pv.c
+@@ -32,6 +32,7 @@
+ #include <xen/interface/vcpu.h>
+ #include <xen/interface/xenpmu.h>
+ 
++#include <asm/spec-ctrl.h>
+ #include <asm/xen/interface.h>
+ #include <asm/xen/hypercall.h>
+ 
+@@ -70,6 +71,8 @@ static void cpu_bringup(void)
+       cpu_data(cpu).x86_max_cores = 1;
+       set_cpu_sibling_map(cpu);
+ 
++      speculative_store_bypass_ht_init();
++
+       xen_setup_cpu_clockevents();
+ 
+       notify_cpu_starting(cpu);
+@@ -250,6 +253,8 @@ static void __init xen_pv_smp_prepare_cp
+       }
+       set_cpu_sibling_map(0);
+ 
++      speculative_store_bypass_ht_init();
++
+       xen_pmu_init(0);
+ 
+       if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))
diff --git a/queue-4.17/series b/queue-4.17/series

new file mode 100644 (file)

index 0000000..0593302
--- /dev/null
+++ b/queue-4.17/series
@@ -0,0 +1,10 @@
+x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch
+x86-xen-add-call-of-speculative_store_bypass_ht_init-to-pv-paths.patch
+x86-platform-uv-add-adjustable-set-memory-block-size-function.patch
+x86-platform-uv-use-new-set-memory-block-size-function.patch
+x86-platform-uv-add-kernel-parameter-to-set-memory-block-size.patch
+x86-mce-improve-error-message-when-kernel-cannot-recover.patch
+x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch
+x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch
+x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch
+x86-call-fixup_exception-before-notify_die-in-math_error.patch
diff --git a/queue-4.4/series b/queue-4.4/series

index ab7aad6ceaca92323a39cfff26a3895901f120af..feeb755aac995002c1fd6a822dae03022f14fb34 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -23,3 +23,4 @@ libata-zpodd-small-read-overflow-in-eject_tray.patch
  libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch
  w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch
  fs-binfmt_misc.c-do-not-allow-offset-overflow.patch
+x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch
diff --git a/queue-4.9/series b/queue-4.9/series

new file mode 100644 (file)

index 0000000..83a1197
--- /dev/null
+++ b/queue-4.9/series
@@ -0,0 +1,6 @@
+x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch
+x86-mce-improve-error-message-when-kernel-cannot-recover.patch
+x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch
+x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch
+x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch
+x86-call-fixup_exception-before-notify_die-in-math_error.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 28 Jun 2018 02:08:29 +0000 (11:08 +0900)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 28 Jun 2018 02:08:29 +0000 (11:08 +0900)
queue-4.14/series	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-call-fixup_exception-before-notify_die-in-math_error.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-mce-check-for-alternate-indication-of-machine-check-recovery-on-skylake.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-mce-do-not-overwrite-mci_status-in-mce_no_way_out.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-mce-fix-incorrect-machine-check-from-unknown-source-message.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-mce-improve-error-message-when-kernel-cannot-recover.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-platform-uv-add-kernel-parameter-to-set-memory-block-size.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-platform-uv-use-new-set-memory-block-size-function.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-spectre_v1-disable-compiler-optimizations-over-array_index_mask_nospec.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/x86-xen-add-call-of-speculative_store_bypass_ht_init-to-pv-paths.patch	[new file with mode: 0644]	patch \| blob
queue-4.17/series	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history
queue-4.9/series	[new file with mode: 0644]	patch \| blob