]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 9 Aug 2022 17:18:02 +0000 (19:18 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 9 Aug 2022 17:18:02 +0000 (19:18 +0200)
added patches:
x86-speculation-add-lfence-to-rsb-fill-sequence.patch
x86-speculation-add-rsb-vm-exit-protections.patch

queue-5.4/series
queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch [new file with mode: 0644]
queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch [new file with mode: 0644]

index 0b71806cfd0fa9a2aba5d83114ae1341ef69039d..fd8f85a70067f854ed9b18db49d55135afc1299c 100644 (file)
@@ -11,3 +11,5 @@ kvm-don-t-null-dereference-ops-destroy.patch
 selftests-kvm-handle-compiler-optimizations-in-ucall.patch
 media-v4l2-mem2mem-apply-dst_queue_off_base-on-mmap-buffers-across-ioctls.patch
 macintosh-adb-fix-oob-read-in-do_adb_query-function.patch
+x86-speculation-add-rsb-vm-exit-protections.patch
+x86-speculation-add-lfence-to-rsb-fill-sequence.patch
diff --git a/queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch b/queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch
new file mode 100644 (file)
index 0000000..29960df
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Tue Aug  9 07:16:28 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 2 Aug 2022 15:47:02 -0700
+Subject: x86/speculation: Add LFENCE to RSB fill sequence
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit ba6e31af2be96c4d0536f2152ed6f7b6c11bca47 upstream.
+
+RSB fill sequence does not have any protection for miss-prediction of
+conditional branch at the end of the sequence. CPU can speculatively
+execute code immediately after the sequence, while RSB filling hasn't
+completed yet.
+
+  #define __FILL_RETURN_BUFFER(reg, nr, sp)    \
+       mov     $(nr/2), reg;                   \
+  771:                                         \
+       call    772f;                           \
+  773: /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     773b;                           \
+  772:                                         \
+       call    774f;                           \
+  775: /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     775b;                           \
+  774:                                         \
+       dec     reg;                            \
+       jnz     771b;  <----- CPU can miss-predict here.                                \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+
+Before RSB is filled, RETs that come in program order after this macro
+can be executed speculatively, making them vulnerable to RSB-based
+attacks.
+
+Mitigate it by adding an LFENCE after the conditional branch to prevent
+speculation while RSB is being filled.
+
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -61,7 +61,9 @@
+ 774:                                          \
+       dec     reg;                            \
+       jnz     771b;                           \
+-      add     $(BITS_PER_LONG/8) * nr, sp;
++      add     $(BITS_PER_LONG/8) * nr, sp;    \
++      /* barrier for jnz misprediction */     \
++      lfence;
+ #define __ISSUE_UNBALANCED_RET_GUARD(sp)      \
+       call    881f;                           \
diff --git a/queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch b/queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch
new file mode 100644 (file)
index 0000000..4915bc3
--- /dev/null
@@ -0,0 +1,356 @@
+From foo@baz Tue Aug  9 07:16:28 PM CEST 2022
+From: Daniel Sneddon <daniel.sneddon@linux.intel.com>
+Date: Tue, 2 Aug 2022 15:47:01 -0700
+Subject: x86/speculation: Add RSB VM Exit protections
+
+From: Daniel Sneddon <daniel.sneddon@linux.intel.com>
+
+commit 2b1299322016731d56807aa49254a5ea3080b6b3 upstream.
+
+tl;dr: The Enhanced IBRS mitigation for Spectre v2 does not work as
+documented for RET instructions after VM exits. Mitigate it with a new
+one-entry RSB stuffing mechanism and a new LFENCE.
+
+== Background ==
+
+Indirect Branch Restricted Speculation (IBRS) was designed to help
+mitigate Branch Target Injection and Speculative Store Bypass, i.e.
+Spectre, attacks. IBRS prevents software run in less privileged modes
+from affecting branch prediction in more privileged modes. IBRS requires
+the MSR to be written on every privilege level change.
+
+To overcome some of the performance issues of IBRS, Enhanced IBRS was
+introduced.  eIBRS is an "always on" IBRS, in other words, just turn
+it on once instead of writing the MSR on every privilege level change.
+When eIBRS is enabled, more privileged modes should be protected from
+less privileged modes, including protecting VMMs from guests.
+
+== Problem ==
+
+Here's a simplification of how guests are run on Linux' KVM:
+
+void run_kvm_guest(void)
+{
+       // Prepare to run guest
+       VMRESUME();
+       // Clean up after guest runs
+}
+
+The execution flow for that would look something like this to the
+processor:
+
+1. Host-side: call run_kvm_guest()
+2. Host-side: VMRESUME
+3. Guest runs, does "CALL guest_function"
+4. VM exit, host runs again
+5. Host might make some "cleanup" function calls
+6. Host-side: RET from run_kvm_guest()
+
+Now, when back on the host, there are a couple of possible scenarios of
+post-guest activity the host needs to do before executing host code:
+
+* on pre-eIBRS hardware (legacy IBRS, or nothing at all), the RSB is not
+touched and Linux has to do a 32-entry stuffing.
+
+* on eIBRS hardware, VM exit with IBRS enabled, or restoring the host
+IBRS=1 shortly after VM exit, has a documented side effect of flushing
+the RSB except in this PBRSB situation where the software needs to stuff
+the last RSB entry "by hand".
+
+IOW, with eIBRS supported, host RET instructions should no longer be
+influenced by guest behavior after the host retires a single CALL
+instruction.
+
+However, if the RET instructions are "unbalanced" with CALLs after a VM
+exit as is the RET in #6, it might speculatively use the address for the
+instruction after the CALL in #3 as an RSB prediction. This is a problem
+since the (untrusted) guest controls this address.
+
+Balanced CALL/RET instruction pairs such as in step #5 are not affected.
+
+== Solution ==
+
+The PBRSB issue affects a wide variety of Intel processors which
+support eIBRS. But not all of them need mitigation. Today,
+X86_FEATURE_RETPOLINE triggers an RSB filling sequence that mitigates
+PBRSB. Systems setting RETPOLINE need no further mitigation - i.e.,
+eIBRS systems which enable retpoline explicitly.
+
+However, such systems (X86_FEATURE_IBRS_ENHANCED) do not set RETPOLINE
+and most of them need a new mitigation.
+
+Therefore, introduce a new feature flag X86_FEATURE_RSB_VMEXIT_LITE
+which triggers a lighter-weight PBRSB mitigation versus RSB Filling at
+vmexit.
+
+The lighter-weight mitigation performs a CALL instruction which is
+immediately followed by a speculative execution barrier (INT3). This
+steers speculative execution to the barrier -- just like a retpoline
+-- which ensures that speculation can never reach an unbalanced RET.
+Then, ensure this CALL is retired before continuing execution with an
+LFENCE.
+
+In other words, the window of exposure is opened at VM exit where RET
+behavior is troublesome. While the window is open, force RSB predictions
+sampling for RET targets to a dead end at the INT3. Close the window
+with the LFENCE.
+
+There is a subset of eIBRS systems which are not vulnerable to PBRSB.
+Add these systems to the cpu_vuln_whitelist[] as NO_EIBRS_PBRSB.
+Future systems that aren't vulnerable will set ARCH_CAP_PBRSB_NO.
+
+  [ bp: Massage, incorporate review comments from Andy Cooper. ]
+  [ Pawan: Update commit message to replace RSB_VMEXIT with RETPOLINE ]
+
+Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
+Co-developed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/spectre.rst |    8 +++
+ arch/x86/include/asm/cpufeatures.h            |    2 
+ arch/x86/include/asm/msr-index.h              |    4 +
+ arch/x86/include/asm/nospec-branch.h          |   15 ++++++
+ arch/x86/kernel/cpu/bugs.c                    |   61 +++++++++++++++++++++++++-
+ arch/x86/kernel/cpu/common.c                  |   12 ++++-
+ arch/x86/kvm/vmx/vmenter.S                    |    1 
+ tools/arch/x86/include/asm/cpufeatures.h      |    1 
+ 8 files changed, 101 insertions(+), 3 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/spectre.rst
++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
+@@ -422,6 +422,14 @@ The possible values in this file are:
+   'RSB filling'   Protection of RSB on context switch enabled
+   =============   ===========================================
++  - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
++
++  ===========================  =======================================================
++  'PBRSB-eIBRS: SW sequence'   CPU is affected and protection of RSB on VMEXIT enabled
++  'PBRSB-eIBRS: Vulnerable'    CPU is vulnerable
++  'PBRSB-eIBRS: Not affected'  CPU is not affected by PBRSB
++  ===========================  =======================================================
++
+ Full mitigation might require a microcode update from the CPU
+ vendor. When the necessary microcode is not available, the kernel will
+ report vulnerability.
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -286,6 +286,7 @@
+ #define X86_FEATURE_CQM_MBM_LOCAL     (11*32+ 3) /* LLC Local MBM monitoring */
+ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
+ #define X86_FEATURE_FENCE_SWAPGS_KERNEL       (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
++#define X86_FEATURE_RSB_VMEXIT_LITE   (11*32+ 6) /* "" Fill RSB on VM exit when EIBRS is enabled */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16               (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+@@ -406,5 +407,6 @@
+ #define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS                 X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA               X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
++#define X86_BUG_EIBRS_PBRSB           X86_BUG(26) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -129,6 +129,10 @@
+                                                * bit available to control VERW
+                                                * behavior.
+                                                */
++#define ARCH_CAP_PBRSB_NO             BIT(24) /*
++                                               * Not susceptible to Post-Barrier
++                                               * Return Stack Buffer Predictions.
++                                               */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -63,6 +63,13 @@
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
++#define __ISSUE_UNBALANCED_RET_GUARD(sp)      \
++      call    881f;                           \
++      int3;                                   \
++881:                                          \
++      add     $(BITS_PER_LONG/8), sp;         \
++      lfence;
++
+ #ifdef __ASSEMBLY__
+ /*
+@@ -132,6 +139,14 @@
+ #endif
+ .endm
++.macro ISSUE_UNBALANCED_RET_GUARD ftr:req
++      ANNOTATE_NOSPEC_ALTERNATIVE
++      ALTERNATIVE "jmp .Lskip_pbrsb_\@",                              \
++              __stringify(__ISSUE_UNBALANCED_RET_GUARD(%_ASM_SP))     \
++              \ftr
++.Lskip_pbrsb_\@:
++.endm
++
+  /*
+   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+   * monstrosity above, manually.
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1043,6 +1043,49 @@ static enum spectre_v2_mitigation __init
+       return SPECTRE_V2_RETPOLINE;
+ }
++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
++{
++      /*
++       * Similar to context switches, there are two types of RSB attacks
++       * after VM exit:
++       *
++       * 1) RSB underflow
++       *
++       * 2) Poisoned RSB entry
++       *
++       * When retpoline is enabled, both are mitigated by filling/clearing
++       * the RSB.
++       *
++       * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
++       * prediction isolation protections, RSB still needs to be cleared
++       * because of #2.  Note that SMEP provides no protection here, unlike
++       * user-space-poisoned RSB entries.
++       *
++       * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB
++       * bug is present then a LITE version of RSB protection is required,
++       * just a single call needs to retire before a RET is executed.
++       */
++      switch (mode) {
++      case SPECTRE_V2_NONE:
++      /* These modes already fill RSB at vmexit */
++      case SPECTRE_V2_LFENCE:
++      case SPECTRE_V2_RETPOLINE:
++      case SPECTRE_V2_EIBRS_RETPOLINE:
++              return;
++
++      case SPECTRE_V2_EIBRS_LFENCE:
++      case SPECTRE_V2_EIBRS:
++              if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
++                      setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
++                      pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
++              }
++              return;
++      }
++
++      pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
++      dump_stack();
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+       enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -1135,6 +1178,8 @@ static void __init spectre_v2_select_mit
+       setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+       pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
++      spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
++
+       /*
+        * Retpoline means the kernel is safe because it has no indirect
+        * branches. Enhanced IBRS protects firmware too, so, enable restricted
+@@ -1879,6 +1924,19 @@ static char *ibpb_state(void)
+       return "";
+ }
++static char *pbrsb_eibrs_state(void)
++{
++      if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
++              if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
++                  boot_cpu_has(X86_FEATURE_RETPOLINE))
++                      return ", PBRSB-eIBRS: SW sequence";
++              else
++                      return ", PBRSB-eIBRS: Vulnerable";
++      } else {
++              return ", PBRSB-eIBRS: Not affected";
++      }
++}
++
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+       if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+@@ -1891,12 +1949,13 @@ static ssize_t spectre_v2_show_state(cha
+           spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+               return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+-      return sprintf(buf, "%s%s%s%s%s%s\n",
++      return sprintf(buf, "%s%s%s%s%s%s%s\n",
+                      spectre_v2_strings[spectre_v2_enabled],
+                      ibpb_state(),
+                      boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+                      stibp_state(),
+                      boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
++                     pbrsb_eibrs_state(),
+                      spectre_v2_module_string());
+ }
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1025,6 +1025,7 @@ static void identify_cpu_without_cpuid(s
+ #define NO_SWAPGS             BIT(6)
+ #define NO_ITLB_MULTIHIT      BIT(7)
+ #define NO_SPECTRE_V2         BIT(8)
++#define NO_EIBRS_PBRSB                BIT(9)
+ #define VULNWL(_vendor, _family, _model, _whitelist)  \
+       { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
+@@ -1065,7 +1066,7 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+       /*
+        * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -1075,7 +1076,9 @@ static const __initconst struct x86_cpu_
+        * good enough for our purposes.
+        */
+-      VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_TREMONT,              NO_EIBRS_PBRSB),
++      VULNWL_INTEL(ATOM_TREMONT_L,            NO_EIBRS_PBRSB),
++      VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+       /* AMD Family 0xf - 0x12 */
+       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+@@ -1236,6 +1239,11 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++      if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
++          !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
++          !(ia32_cap & ARCH_CAP_PBRSB_NO))
++              setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
++
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+               return;
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -92,6 +92,7 @@ ENTRY(vmx_vmexit)
+       pop %_ASM_AX
+ .Lvmexit_skip_rsb:
+ #endif
++      ISSUE_UNBALANCED_RET_GUARD X86_FEATURE_RSB_VMEXIT_LITE
+       ret
+ ENDPROC(vmx_vmexit)
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -284,6 +284,7 @@
+ #define X86_FEATURE_CQM_MBM_LOCAL     (11*32+ 3) /* LLC Local MBM monitoring */
+ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
+ #define X86_FEATURE_FENCE_SWAPGS_KERNEL       (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
++#define X86_FEATURE_RSB_VMEXIT_LITE   (11*32+ 6) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16               (12*32+ 5) /* AVX512 BFLOAT16 instructions */