]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 9 Aug 2019 12:39:39 +0000 (14:39 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 9 Aug 2019 12:39:39 +0000 (14:39 +0200)
added patches:
x86-cpufeatures-sort-feature-word-7.patch
x86-entry-64-fix-context-tracking-state-warning-when-load_gs_index-fails.patch
x86-entry-64-use-jmp-instead-of-jmpq.patch
x86-speculation-enable-spectre-v1-swapgs-mitigations.patch
x86-speculation-prepare-entry-code-for-spectre-v1-swapgs-mitigations.patch
x86-speculation-swapgs-exclude-atoms-from-speculation-through-swapgs.patch

queue-4.4/series
queue-4.4/x86-cpufeatures-sort-feature-word-7.patch [new file with mode: 0644]
queue-4.4/x86-entry-64-fix-context-tracking-state-warning-when-load_gs_index-fails.patch [new file with mode: 0644]
queue-4.4/x86-entry-64-use-jmp-instead-of-jmpq.patch [new file with mode: 0644]
queue-4.4/x86-speculation-enable-spectre-v1-swapgs-mitigations.patch [new file with mode: 0644]
queue-4.4/x86-speculation-prepare-entry-code-for-spectre-v1-swapgs-mitigations.patch [new file with mode: 0644]
queue-4.4/x86-speculation-swapgs-exclude-atoms-from-speculation-through-swapgs.patch [new file with mode: 0644]

index 54220d091aa3067f886a7e25b60150826a8edef7..aa4ca1fe98d55aca21c187929a3f7c341fba9cce 100644 (file)
@@ -14,3 +14,9 @@ bnx2x-disable-multi-cos-feature.patch
 compat_ioctl-pppoe-fix-pppoeiocsfwd-handling.patch
 block-blk_init_allocated_queue-set-q-fq-as-null-in-the-fail-case.patch
 spi-bcm2835-fix-3-wire-mode-if-dma-is-enabled.patch
+x86-cpufeatures-sort-feature-word-7.patch
+x86-entry-64-fix-context-tracking-state-warning-when-load_gs_index-fails.patch
+x86-speculation-prepare-entry-code-for-spectre-v1-swapgs-mitigations.patch
+x86-speculation-enable-spectre-v1-swapgs-mitigations.patch
+x86-entry-64-use-jmp-instead-of-jmpq.patch
+x86-speculation-swapgs-exclude-atoms-from-speculation-through-swapgs.patch
diff --git a/queue-4.4/x86-cpufeatures-sort-feature-word-7.patch b/queue-4.4/x86-cpufeatures-sort-feature-word-7.patch
new file mode 100644 (file)
index 0000000..e8f8d3a
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Fri 09 Aug 2019 02:39:17 PM CEST
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Thu, 8 Aug 2019 20:03:32 +0100
+Subject: x86: cpufeatures: Sort feature word 7
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+This will make it clearer which bits are allocated, in case we need to
+assign more feature bits for later backports.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -196,13 +196,10 @@
+ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_INTEL_PT  ( 7*32+15) /* Intel Processor Trace */
+-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+-
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+ #define X86_FEATURE_SSBD      ( 7*32+17) /* Speculative Store Bypass Disable */
+-/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+-#define X86_FEATURE_KAISER    ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+ #define X86_FEATURE_USE_IBPB  ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
+ #define X86_FEATURE_USE_IBRS_FW       ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+@@ -215,6 +212,7 @@
+ #define X86_FEATURE_ZEN               ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+ #define X86_FEATURE_L1TF_PTEINV       ( 7*32+29) /* "" L1TF workaround PTE inversion */
+ #define X86_FEATURE_IBRS_ENHANCED     ( 7*32+30) /* Enhanced IBRS */
++#define X86_FEATURE_KAISER    ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/queue-4.4/x86-entry-64-fix-context-tracking-state-warning-when-load_gs_index-fails.patch b/queue-4.4/x86-entry-64-fix-context-tracking-state-warning-when-load_gs_index-fails.patch
new file mode 100644 (file)
index 0000000..d905a1f
--- /dev/null
@@ -0,0 +1,78 @@
+From foo@baz Fri 09 Aug 2019 02:39:17 PM CEST
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Fri, 30 Sep 2016 09:01:06 +0800
+Subject: x86/entry/64: Fix context tracking state warning when load_gs_index fails
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit 2fa5f04f85730d0c4f49f984b7efeb4f8d5bd1fc upstream.
+
+This warning:
+
+ WARNING: CPU: 0 PID: 3331 at arch/x86/entry/common.c:45 enter_from_user_mode+0x32/0x50
+ CPU: 0 PID: 3331 Comm: ldt_gdt_64 Not tainted 4.8.0-rc7+ #13
+ Call Trace:
+  dump_stack+0x99/0xd0
+  __warn+0xd1/0xf0
+  warn_slowpath_null+0x1d/0x20
+  enter_from_user_mode+0x32/0x50
+  error_entry+0x6d/0xc0
+  ? general_protection+0x12/0x30
+  ? native_load_gs_index+0xd/0x20
+  ? do_set_thread_area+0x19c/0x1f0
+  SyS_set_thread_area+0x24/0x30
+  do_int80_syscall_32+0x7c/0x220
+  entry_INT80_compat+0x38/0x50
+
+... can be reproduced by running the GS testcase of the ldt_gdt test unit in
+the x86 selftests.
+
+do_int80_syscall_32() will call enter_form_user_mode() to convert context
+tracking state from user state to kernel state. The load_gs_index() call
+can fail with user gsbase, gsbase will be fixed up and proceed if this
+happen.
+
+However, enter_from_user_mode() will be called again in the fixed up path
+though it is context tracking kernel state currently.
+
+This patch fixes it by just fixing up gsbase and telling lockdep that IRQs
+are off once load_gs_index() failed with user gsbase.
+
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Acked-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1475197266-3440-1-git-send-email-wanpeng.li@hotmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1133,7 +1133,6 @@ ENTRY(error_entry)
+       testb   $3, CS+8(%rsp)
+       jz      .Lerror_kernelspace
+-.Lerror_entry_from_usermode_swapgs:
+       /*
+        * We entered from user mode or we're pretending to have entered
+        * from user mode due to an IRET fault.
+@@ -1177,7 +1176,8 @@ ENTRY(error_entry)
+        * gsbase and proceed.  We'll fix up the exception and land in
+        * gs_change's error handler with kernel gsbase.
+        */
+-      jmp     .Lerror_entry_from_usermode_swapgs
++      SWAPGS
++      jmp .Lerror_entry_done
+ .Lbstep_iret:
+       /* Fix truncated RIP */
diff --git a/queue-4.4/x86-entry-64-use-jmp-instead-of-jmpq.patch b/queue-4.4/x86-entry-64-use-jmp-instead-of-jmpq.patch
new file mode 100644 (file)
index 0000000..40bb512
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Fri 09 Aug 2019 02:39:17 PM CEST
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 15 Jul 2019 11:51:39 -0500
+Subject: x86/entry/64: Use JMP instead of JMPQ
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 64dbc122b20f75183d8822618c24f85144a5a94d upstream.
+
+Somehow the swapgs mitigation entry code patch ended up with a JMPQ
+instruction instead of JMP, where only the short jump is needed.  Some
+assembler versions apparently fail to optimize JMPQ into a two-byte JMP
+when possible, instead always using a 7-byte JMP with relocation.  For
+some reason that makes the entry code explode with a #GP during boot.
+
+Change it back to "JMP" as originally intended.
+
+Fixes: 18ec54fdd6d1 ("x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations")
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -567,7 +567,7 @@ END(irq_entries_start)
+ #ifdef CONFIG_CONTEXT_TRACKING
+       call enter_from_user_mode
+ #endif
+-      jmpq    2f
++      jmp     2f
+ 1:
+       FENCE_SWAPGS_KERNEL_ENTRY
+ 2:
diff --git a/queue-4.4/x86-speculation-enable-spectre-v1-swapgs-mitigations.patch b/queue-4.4/x86-speculation-enable-spectre-v1-swapgs-mitigations.patch
new file mode 100644 (file)
index 0000000..fa44b5a
--- /dev/null
@@ -0,0 +1,268 @@
+From foo@baz Fri 09 Aug 2019 02:39:17 PM CEST
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 8 Jul 2019 11:52:26 -0500
+Subject: x86/speculation: Enable Spectre v1 swapgs mitigations
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit a2059825986a1c8143fd6698774fa9d83733bb11 upstream.
+
+The previous commit added macro calls in the entry code which mitigate the
+Spectre v1 swapgs issue if the X86_FEATURE_FENCE_SWAPGS_* features are
+enabled.  Enable those features where applicable.
+
+The mitigations may be disabled with "nospectre_v1" or "mitigations=off".
+
+There are different features which can affect the risk of attack:
+
+- When FSGSBASE is enabled, unprivileged users are able to place any
+  value in GS, using the wrgsbase instruction.  This means they can
+  write a GS value which points to any value in kernel space, which can
+  be useful with the following gadget in an interrupt/exception/NMI
+  handler:
+
+       if (coming from user space)
+               swapgs
+       mov %gs:<percpu_offset>, %reg1
+       // dependent load or store based on the value of %reg
+       // for example: mov %(reg1), %reg2
+
+  If an interrupt is coming from user space, and the entry code
+  speculatively skips the swapgs (due to user branch mistraining), it
+  may speculatively execute the GS-based load and a subsequent dependent
+  load or store, exposing the kernel data to an L1 side channel leak.
+
+  Note that, on Intel, a similar attack exists in the above gadget when
+  coming from kernel space, if the swapgs gets speculatively executed to
+  switch back to the user GS.  On AMD, this variant isn't possible
+  because swapgs is serializing with respect to future GS-based
+  accesses.
+
+  NOTE: The FSGSBASE patch set hasn't been merged yet, so the above case
+       doesn't exist quite yet.
+
+- When FSGSBASE is disabled, the issue is mitigated somewhat because
+  unprivileged users must use prctl(ARCH_SET_GS) to set GS, which
+  restricts GS values to user space addresses only.  That means the
+  gadget would need an additional step, since the target kernel address
+  needs to be read from user space first.  Something like:
+
+       if (coming from user space)
+               swapgs
+       mov %gs:<percpu_offset>, %reg1
+       mov (%reg1), %reg2
+       // dependent load or store based on the value of %reg2
+       // for example: mov %(reg2), %reg3
+
+  It's difficult to audit for this gadget in all the handlers, so while
+  there are no known instances of it, it's entirely possible that it
+  exists somewhere (or could be introduced in the future).  Without
+  tooling to analyze all such code paths, consider it vulnerable.
+
+  Effects of SMAP on the !FSGSBASE case:
+
+  - If SMAP is enabled, and the CPU reports RDCL_NO (i.e., not
+    susceptible to Meltdown), the kernel is prevented from speculatively
+    reading user space memory, even L1 cached values.  This effectively
+    disables the !FSGSBASE attack vector.
+
+  - If SMAP is enabled, but the CPU *is* susceptible to Meltdown, SMAP
+    still prevents the kernel from speculatively reading user space
+    memory.  But it does *not* prevent the kernel from reading the
+    user value from L1, if it has already been cached.  This is probably
+    only a small hurdle for an attacker to overcome.
+
+Thanks to Dave Hansen for contributing the speculative_smap() function.
+
+Thanks to Andrew Cooper for providing the inside scoop on whether swapgs
+is serializing on AMD.
+
+[ tglx: Fixed the USER fence decision and polished the comment as suggested
+       by Dave Hansen ]
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 4.4:
+ - Check for X86_FEATURE_KAISER instead of X86_FEATURE_PTI
+ - mitigations= parameter is x86-only here
+ - Don't use __ro_after_init
+ - Adjust filename, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    7 +-
+ arch/x86/kernel/cpu/bugs.c          |  115 +++++++++++++++++++++++++++++++++---
+ 2 files changed, 110 insertions(+), 12 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2184,6 +2184,7 @@ bytes respectively. Such letter suffixes
+                               improves system performance, but it may also
+                               expose users to several CPU vulnerabilities.
+                               Equivalent to: nopti [X86]
++                                             nospectre_v1 [X86]
+                                              nospectre_v2 [X86]
+                                              spectre_v2_user=off [X86]
+                                              spec_store_bypass_disable=off [X86]
+@@ -2498,9 +2499,9 @@ bytes respectively. Such letter suffixes
+       nohugeiomap     [KNL,x86] Disable kernel huge I/O mappings.
+-      nospectre_v1    [PPC] Disable mitigations for Spectre Variant 1 (bounds
+-                      check bypass). With this option data leaks are possible
+-                      in the system.
++      nospectre_v1    [X86,PPC] Disable mitigations for Spectre Variant 1
++                      (bounds check bypass). With this option data leaks are
++                      possible in the system.
+       nospectre_v2    [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
+                       (indirect branch prediction) vulnerability. System may
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -30,6 +30,7 @@
+ #include <asm/intel-family.h>
+ #include <asm/e820.h>
++static void __init spectre_v1_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+@@ -87,17 +88,11 @@ void __init check_bugs(void)
+       if (boot_cpu_has(X86_FEATURE_STIBP))
+               x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+-      /* Select the proper spectre mitigation before patching alternatives */
++      /* Select the proper CPU mitigations before patching alternatives: */
++      spectre_v1_select_mitigation();
+       spectre_v2_select_mitigation();
+-
+-      /*
+-       * Select proper mitigation for any exposure to the Speculative Store
+-       * Bypass vulnerability.
+-       */
+       ssb_select_mitigation();
+-
+       l1tf_select_mitigation();
+-
+       mds_select_mitigation();
+       arch_smt_update();
+@@ -252,6 +247,108 @@ static int __init mds_cmdline(char *str)
+ early_param("mds", mds_cmdline);
+ #undef pr_fmt
++#define pr_fmt(fmt)     "Spectre V1 : " fmt
++
++enum spectre_v1_mitigation {
++      SPECTRE_V1_MITIGATION_NONE,
++      SPECTRE_V1_MITIGATION_AUTO,
++};
++
++static enum spectre_v1_mitigation spectre_v1_mitigation =
++      SPECTRE_V1_MITIGATION_AUTO;
++
++static const char * const spectre_v1_strings[] = {
++      [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
++      [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
++};
++
++static bool is_swapgs_serializing(void)
++{
++      /*
++       * Technically, swapgs isn't serializing on AMD (despite it previously
++       * being documented as such in the APM).  But according to AMD, %gs is
++       * updated non-speculatively, and the issuing of %gs-relative memory
++       * operands will be blocked until the %gs update completes, which is
++       * good enough for our purposes.
++       */
++      return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
++}
++
++/*
++ * Does SMAP provide full mitigation against speculative kernel access to
++ * userspace?
++ */
++static bool smap_works_speculatively(void)
++{
++      if (!boot_cpu_has(X86_FEATURE_SMAP))
++              return false;
++
++      /*
++       * On CPUs which are vulnerable to Meltdown, SMAP does not
++       * prevent speculative access to user data in the L1 cache.
++       * Consider SMAP to be non-functional as a mitigation on these
++       * CPUs.
++       */
++      if (boot_cpu_has(X86_BUG_CPU_MELTDOWN))
++              return false;
++
++      return true;
++}
++
++static void __init spectre_v1_select_mitigation(void)
++{
++      if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
++              spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
++              return;
++      }
++
++      if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
++              /*
++               * With Spectre v1, a user can speculatively control either
++               * path of a conditional swapgs with a user-controlled GS
++               * value.  The mitigation is to add lfences to both code paths.
++               *
++               * If FSGSBASE is enabled, the user can put a kernel address in
++               * GS, in which case SMAP provides no protection.
++               *
++               * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
++               *         FSGSBASE enablement patches have been merged. ]
++               *
++               * If FSGSBASE is disabled, the user can only put a user space
++               * address in GS.  That makes an attack harder, but still
++               * possible if there's no SMAP protection.
++               */
++              if (!smap_works_speculatively()) {
++                      /*
++                       * Mitigation can be provided from SWAPGS itself or
++                       * PTI as the CR3 write in the Meltdown mitigation
++                       * is serializing.
++                       *
++                       * If neither is there, mitigate with an LFENCE.
++                       */
++                      if (!is_swapgs_serializing() && !boot_cpu_has(X86_FEATURE_KAISER))
++                              setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
++
++                      /*
++                       * Enable lfences in the kernel entry (non-swapgs)
++                       * paths, to prevent user entry from speculatively
++                       * skipping swapgs.
++                       */
++                      setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL);
++              }
++      }
++
++      pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
++}
++
++static int __init nospectre_v1_cmdline(char *str)
++{
++      spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
++      return 0;
++}
++early_param("nospectre_v1", nospectre_v1_cmdline);
++
++#undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V2 : " fmt
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+@@ -1154,7 +1251,7 @@ static ssize_t cpu_show_common(struct de
+               break;
+       case X86_BUG_SPECTRE_V1:
+-              return sprintf(buf, "Mitigation: __user pointer sanitization\n");
++              return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
+       case X86_BUG_SPECTRE_V2:
+               return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
diff --git a/queue-4.4/x86-speculation-prepare-entry-code-for-spectre-v1-swapgs-mitigations.patch b/queue-4.4/x86-speculation-prepare-entry-code-for-spectre-v1-swapgs-mitigations.patch
new file mode 100644 (file)
index 0000000..073ecb8
--- /dev/null
@@ -0,0 +1,213 @@
+From foo@baz Fri 09 Aug 2019 02:39:17 PM CEST
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 8 Jul 2019 11:52:25 -0500
+Subject: x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 18ec54fdd6d18d92025af097cd042a75cf0ea24c upstream.
+
+Spectre v1 isn't only about array bounds checks.  It can affect any
+conditional checks.  The kernel entry code interrupt, exception, and NMI
+handlers all have conditional swapgs checks.  Those may be problematic in
+the context of Spectre v1, as kernel code can speculatively run with a user
+GS.
+
+For example:
+
+       if (coming from user space)
+               swapgs
+       mov %gs:<percpu_offset>, %reg
+       mov (%reg), %reg1
+
+When coming from user space, the CPU can speculatively skip the swapgs, and
+then do a speculative percpu load using the user GS value.  So the user can
+speculatively force a read of any kernel value.  If a gadget exists which
+uses the percpu value as an address in another load/store, then the
+contents of the kernel value may become visible via an L1 side channel
+attack.
+
+A similar attack exists when coming from kernel space.  The CPU can
+speculatively do the swapgs, causing the user GS to get used for the rest
+of the speculative window.
+
+The mitigation is similar to a traditional Spectre v1 mitigation, except:
+
+  a) index masking isn't possible; because the index (percpu offset)
+     isn't user-controlled; and
+
+  b) an lfence is needed in both the "from user" swapgs path and the
+     "from kernel" non-swapgs path (because of the two attacks described
+     above).
+
+The user entry swapgs paths already have SWITCH_TO_KERNEL_CR3, which has a
+CR3 write when PTI is enabled.  Since CR3 writes are serializing, the
+lfences can be skipped in those cases.
+
+On the other hand, the kernel entry swapgs paths don't depend on PTI.
+
+To avoid unnecessary lfences for the user entry case, create two separate
+features for alternative patching:
+
+  X86_FEATURE_FENCE_SWAPGS_USER
+  X86_FEATURE_FENCE_SWAPGS_KERNEL
+
+Use these features in entry code to patch in lfences where needed.
+
+The features aren't enabled yet, so there's no functional change.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 4.4:
+ - Assign the CPU feature bits from word 7
+ - Add FENCE_SWAPGS_KERNEL_ENTRY to NMI entry, since it does not
+   use paranoid_entry
+ - Include <asm/cpufeatures.h> in calling.h
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h           |   19 +++++++++++++++++++
+ arch/x86/entry/entry_64.S          |   21 +++++++++++++++++++--
+ arch/x86/include/asm/cpufeatures.h |    3 +++
+ 3 files changed, 41 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -1,3 +1,5 @@
++#include <asm/cpufeatures.h>
++
+ /*
+  x86 function call convention, 64-bit:
+@@ -199,6 +201,23 @@ For 32-bit we have the following convent
+       .byte 0xf1
+       .endm
++/*
++ * Mitigate Spectre v1 for conditional swapgs code paths.
++ *
++ * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
++ * prevent a speculative swapgs when coming from kernel space.
++ *
++ * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
++ * to prevent the swapgs from getting speculatively skipped when coming from
++ * user space.
++ */
++.macro FENCE_SWAPGS_USER_ENTRY
++      ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
++.endm
++.macro FENCE_SWAPGS_KERNEL_ENTRY
++      ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
++.endm
++
+ #else /* CONFIG_X86_64 */
+ /*
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -551,6 +551,7 @@ END(irq_entries_start)
+        * tracking that we're in kernel mode.
+        */
+       SWAPGS
++      FENCE_SWAPGS_USER_ENTRY
+       SWITCH_KERNEL_CR3
+       /*
+@@ -566,8 +567,10 @@ END(irq_entries_start)
+ #ifdef CONFIG_CONTEXT_TRACKING
+       call enter_from_user_mode
+ #endif
+-
++      jmpq    2f
+ 1:
++      FENCE_SWAPGS_KERNEL_ENTRY
++2:
+       /*
+        * Save previous stack pointer, optionally switch to interrupt stack.
+        * irq_count is used to check if a CPU is already on an interrupt stack
+@@ -1077,6 +1080,13 @@ ENTRY(paranoid_entry)
+       movq    %rax, %cr3
+ 2:
+ #endif
++      /*
++       * The above doesn't do an unconditional CR3 write, even in the PTI
++       * case.  So do an lfence to prevent GS speculation, regardless of
++       * whether PTI is enabled.
++       */
++      FENCE_SWAPGS_KERNEL_ENTRY
++
+       ret
+ END(paranoid_entry)
+@@ -1138,6 +1148,7 @@ ENTRY(error_entry)
+        * from user mode due to an IRET fault.
+        */
+       SWAPGS
++      FENCE_SWAPGS_USER_ENTRY
+ .Lerror_entry_from_usermode_after_swapgs:
+       /*
+@@ -1151,6 +1162,8 @@ ENTRY(error_entry)
+ #endif
+       ret
++.Lerror_entry_done_lfence:
++      FENCE_SWAPGS_KERNEL_ENTRY
+ .Lerror_entry_done:
+       TRACE_IRQS_OFF
+       ret
+@@ -1169,7 +1182,7 @@ ENTRY(error_entry)
+       cmpq    %rax, RIP+8(%rsp)
+       je      .Lbstep_iret
+       cmpq    $gs_change, RIP+8(%rsp)
+-      jne     .Lerror_entry_done
++      jne     .Lerror_entry_done_lfence
+       /*
+        * hack: gs_change can fail with user gsbase.  If this happens, fix up
+@@ -1177,6 +1190,7 @@ ENTRY(error_entry)
+        * gs_change's error handler with kernel gsbase.
+        */
+       SWAPGS
++      FENCE_SWAPGS_USER_ENTRY
+       jmp .Lerror_entry_done
+ .Lbstep_iret:
+@@ -1190,6 +1204,7 @@ ENTRY(error_entry)
+        * Switch to kernel gsbase:
+        */
+       SWAPGS
++      FENCE_SWAPGS_USER_ENTRY
+       /*
+        * Pretend that the exception came from user mode: set up pt_regs
+@@ -1286,6 +1301,7 @@ ENTRY(nmi)
+        * to switch CR3 here.
+        */
+       cld
++      FENCE_SWAPGS_USER_ENTRY
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       pushq   5*8(%rdx)       /* pt_regs->ss */
+@@ -1574,6 +1590,7 @@ end_repeat_nmi:
+       movq    %rax, %cr3
+ 2:
+ #endif
++      FENCE_SWAPGS_KERNEL_ENTRY
+       /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+       call    do_nmi
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -192,6 +192,9 @@
+ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
++#define X86_FEATURE_FENCE_SWAPGS_USER ( 7*32+10) /* "" LFENCE in user entry SWAPGS path */
++#define X86_FEATURE_FENCE_SWAPGS_KERNEL       ( 7*32+11) /* "" LFENCE in kernel entry SWAPGS path */
++
+ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
diff --git a/queue-4.4/x86-speculation-swapgs-exclude-atoms-from-speculation-through-swapgs.patch b/queue-4.4/x86-speculation-swapgs-exclude-atoms-from-speculation-through-swapgs.patch
new file mode 100644 (file)
index 0000000..72378eb
--- /dev/null
@@ -0,0 +1,155 @@
+From foo@baz Fri 09 Aug 2019 02:39:17 PM CEST
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 17 Jul 2019 21:18:59 +0200
+Subject: x86/speculation/swapgs: Exclude ATOMs from speculation through SWAPGS
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f36cf386e3fec258a341d446915862eded3e13d8 upstream.
+
+Intel provided the following information:
+
+ On all current Atom processors, instructions that use a segment register
+ value (e.g. a load or store) will not speculatively execute before the
+ last writer of that segment retires. Thus they will not use a
+ speculatively written segment value.
+
+That means on ATOMs there is no speculation through SWAPGS, so the SWAPGS
+entry paths can be excluded from the extra LFENCE if PTI is disabled.
+
+Create a separate bug flag for the through SWAPGS speculation and mark all
+out-of-order ATOMs and AMD/HYGON CPUs as not affected. The in-order ATOMs
+are excluded from the whole mitigation mess anyway.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+[bwh: Backported to 4.4:
+ - There's no whitelist entry (or any support) for Hygon CPUs
+ - Adjust context, indentation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 
+ arch/x86/kernel/cpu/bugs.c         |   18 +++------------
+ arch/x86/kernel/cpu/common.c       |   42 +++++++++++++++++++++++--------------
+ 3 files changed, 32 insertions(+), 29 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -339,5 +339,6 @@
+ #define X86_BUG_L1TF          X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
+ #define X86_BUG_MDS           X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
+ #define X86_BUG_MSBDS_ONLY    X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
++#define X86_BUG_SWAPGS                X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -262,18 +262,6 @@ static const char * const spectre_v1_str
+       [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
+ };
+-static bool is_swapgs_serializing(void)
+-{
+-      /*
+-       * Technically, swapgs isn't serializing on AMD (despite it previously
+-       * being documented as such in the APM).  But according to AMD, %gs is
+-       * updated non-speculatively, and the issuing of %gs-relative memory
+-       * operands will be blocked until the %gs update completes, which is
+-       * good enough for our purposes.
+-       */
+-      return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
+-}
+-
+ /*
+  * Does SMAP provide full mitigation against speculative kernel access to
+  * userspace?
+@@ -324,9 +312,11 @@ static void __init spectre_v1_select_mit
+                        * PTI as the CR3 write in the Meltdown mitigation
+                        * is serializing.
+                        *
+-                       * If neither is there, mitigate with an LFENCE.
++                       * If neither is there, mitigate with an LFENCE to
++                       * stop speculation through swapgs.
+                        */
+-                      if (!is_swapgs_serializing() && !boot_cpu_has(X86_FEATURE_KAISER))
++                      if (boot_cpu_has_bug(X86_BUG_SWAPGS) &&
++                          !boot_cpu_has(X86_FEATURE_KAISER))
+                               setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
+                       /*
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -853,6 +853,7 @@ static void identify_cpu_without_cpuid(s
+ #define NO_L1TF               BIT(3)
+ #define NO_MDS                BIT(4)
+ #define MSBDS_ONLY    BIT(5)
++#define NO_SWAPGS     BIT(6)
+ #define VULNWL(_vendor, _family, _model, _whitelist)  \
+       { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
+@@ -876,29 +877,37 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_BONNELL,              NO_SPECULATION),
+       VULNWL_INTEL(ATOM_BONNELL_MID,          NO_SPECULATION),
+-      VULNWL_INTEL(ATOM_SILVERMONT,           NO_SSB | NO_L1TF | MSBDS_ONLY),
+-      VULNWL_INTEL(ATOM_SILVERMONT_X,         NO_SSB | NO_L1TF | MSBDS_ONLY),
+-      VULNWL_INTEL(ATOM_SILVERMONT_MID,       NO_SSB | NO_L1TF | MSBDS_ONLY),
+-      VULNWL_INTEL(ATOM_AIRMONT,              NO_SSB | NO_L1TF | MSBDS_ONLY),
+-      VULNWL_INTEL(XEON_PHI_KNL,              NO_SSB | NO_L1TF | MSBDS_ONLY),
+-      VULNWL_INTEL(XEON_PHI_KNM,              NO_SSB | NO_L1TF | MSBDS_ONLY),
++      VULNWL_INTEL(ATOM_SILVERMONT,           NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_SILVERMONT_X,         NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_SILVERMONT_MID,       NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_AIRMONT,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(XEON_PHI_KNL,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(XEON_PHI_KNM,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+       VULNWL_INTEL(CORE_YONAH,                NO_SSB),
+-      VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY),
++      VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF),
+-      VULNWL_INTEL(ATOM_GOLDMONT_X,           NO_MDS | NO_L1TF),
+-      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF),
++      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_GOLDMONT_X,           NO_MDS | NO_L1TF | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS),
++
++      /*
++       * Technically, swapgs isn't serializing on AMD (despite it previously
++       * being documented as such in the APM).  But according to AMD, %gs is
++       * updated non-speculatively, and the issuing of %gs-relative memory
++       * operands will be blocked until the %gs update completes, which is
++       * good enough for our purposes.
++       */
+       /* AMD Family 0xf - 0x12 */
+-      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+-      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+-      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+-      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
++      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+       /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS),
++      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+       {}
+ };
+@@ -935,6 +944,9 @@ static void __init cpu_set_bug_bits(stru
+                       setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
+       }
++      if (!cpu_matches(NO_SWAPGS))
++              setup_force_cpu_bug(X86_BUG_SWAPGS);
++
+       if (cpu_matches(NO_MELTDOWN))
+               return;