From 11954bf0eac761c0699e1ff74401b0da37456add Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Aug 2022 19:18:02 +0200 Subject: [PATCH] 5.4-stable patches added patches: x86-speculation-add-lfence-to-rsb-fill-sequence.patch x86-speculation-add-rsb-vm-exit-protections.patch --- queue-5.4/series | 2 + ...tion-add-lfence-to-rsb-fill-sequence.patch | 61 +++ ...culation-add-rsb-vm-exit-protections.patch | 356 ++++++++++++++++++ 3 files changed, 419 insertions(+) create mode 100644 queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch create mode 100644 queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch diff --git a/queue-5.4/series b/queue-5.4/series index 0b71806cfd0..fd8f85a7006 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -11,3 +11,5 @@ kvm-don-t-null-dereference-ops-destroy.patch selftests-kvm-handle-compiler-optimizations-in-ucall.patch media-v4l2-mem2mem-apply-dst_queue_off_base-on-mmap-buffers-across-ioctls.patch macintosh-adb-fix-oob-read-in-do_adb_query-function.patch +x86-speculation-add-rsb-vm-exit-protections.patch +x86-speculation-add-lfence-to-rsb-fill-sequence.patch diff --git a/queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch b/queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch new file mode 100644 index 00000000000..29960df345b --- /dev/null +++ b/queue-5.4/x86-speculation-add-lfence-to-rsb-fill-sequence.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Aug 9 07:16:28 PM CEST 2022 +From: Pawan Gupta +Date: Tue, 2 Aug 2022 15:47:02 -0700 +Subject: x86/speculation: Add LFENCE to RSB fill sequence + +From: Pawan Gupta + +commit ba6e31af2be96c4d0536f2152ed6f7b6c11bca47 upstream. + +RSB fill sequence does not have any protection for miss-prediction of +conditional branch at the end of the sequence. CPU can speculatively +execute code immediately after the sequence, while RSB filling hasn't +completed yet. + + #define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ + 771: \ + call 772f; \ + 773: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 773b; \ + 772: \ + call 774f; \ + 775: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 775b; \ + 774: \ + dec reg; \ + jnz 771b; <----- CPU can miss-predict here. \ + add $(BITS_PER_LONG/8) * nr, sp; + +Before RSB is filled, RETs that come in program order after this macro +can be executed speculatively, making them vulnerable to RSB-based +attacks. + +Mitigate it by adding an LFENCE after the conditional branch to prevent +speculation while RSB is being filled. + +Suggested-by: Andrew Cooper +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -61,7 +61,9 @@ + 774: \ + dec reg; \ + jnz 771b; \ +- add $(BITS_PER_LONG/8) * nr, sp; ++ add $(BITS_PER_LONG/8) * nr, sp; \ ++ /* barrier for jnz misprediction */ \ ++ lfence; + + #define __ISSUE_UNBALANCED_RET_GUARD(sp) \ + call 881f; \ diff --git a/queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch b/queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch new file mode 100644 index 00000000000..4915bc3633b --- /dev/null +++ b/queue-5.4/x86-speculation-add-rsb-vm-exit-protections.patch @@ -0,0 +1,356 @@ +From foo@baz Tue Aug 9 07:16:28 PM CEST 2022 +From: Daniel Sneddon +Date: Tue, 2 Aug 2022 15:47:01 -0700 +Subject: x86/speculation: Add RSB VM Exit protections + +From: Daniel Sneddon + +commit 2b1299322016731d56807aa49254a5ea3080b6b3 upstream. + +tl;dr: The Enhanced IBRS mitigation for Spectre v2 does not work as +documented for RET instructions after VM exits. Mitigate it with a new +one-entry RSB stuffing mechanism and a new LFENCE. + +== Background == + +Indirect Branch Restricted Speculation (IBRS) was designed to help +mitigate Branch Target Injection and Speculative Store Bypass, i.e. +Spectre, attacks. IBRS prevents software run in less privileged modes +from affecting branch prediction in more privileged modes. IBRS requires +the MSR to be written on every privilege level change. + +To overcome some of the performance issues of IBRS, Enhanced IBRS was +introduced. eIBRS is an "always on" IBRS, in other words, just turn +it on once instead of writing the MSR on every privilege level change. +When eIBRS is enabled, more privileged modes should be protected from +less privileged modes, including protecting VMMs from guests. + +== Problem == + +Here's a simplification of how guests are run on Linux' KVM: + +void run_kvm_guest(void) +{ + // Prepare to run guest + VMRESUME(); + // Clean up after guest runs +} + +The execution flow for that would look something like this to the +processor: + +1. Host-side: call run_kvm_guest() +2. Host-side: VMRESUME +3. Guest runs, does "CALL guest_function" +4. VM exit, host runs again +5. Host might make some "cleanup" function calls +6. Host-side: RET from run_kvm_guest() + +Now, when back on the host, there are a couple of possible scenarios of +post-guest activity the host needs to do before executing host code: + +* on pre-eIBRS hardware (legacy IBRS, or nothing at all), the RSB is not +touched and Linux has to do a 32-entry stuffing. + +* on eIBRS hardware, VM exit with IBRS enabled, or restoring the host +IBRS=1 shortly after VM exit, has a documented side effect of flushing +the RSB except in this PBRSB situation where the software needs to stuff +the last RSB entry "by hand". + +IOW, with eIBRS supported, host RET instructions should no longer be +influenced by guest behavior after the host retires a single CALL +instruction. + +However, if the RET instructions are "unbalanced" with CALLs after a VM +exit as is the RET in #6, it might speculatively use the address for the +instruction after the CALL in #3 as an RSB prediction. This is a problem +since the (untrusted) guest controls this address. + +Balanced CALL/RET instruction pairs such as in step #5 are not affected. + +== Solution == + +The PBRSB issue affects a wide variety of Intel processors which +support eIBRS. But not all of them need mitigation. Today, +X86_FEATURE_RETPOLINE triggers an RSB filling sequence that mitigates +PBRSB. Systems setting RETPOLINE need no further mitigation - i.e., +eIBRS systems which enable retpoline explicitly. + +However, such systems (X86_FEATURE_IBRS_ENHANCED) do not set RETPOLINE +and most of them need a new mitigation. + +Therefore, introduce a new feature flag X86_FEATURE_RSB_VMEXIT_LITE +which triggers a lighter-weight PBRSB mitigation versus RSB Filling at +vmexit. + +The lighter-weight mitigation performs a CALL instruction which is +immediately followed by a speculative execution barrier (INT3). This +steers speculative execution to the barrier -- just like a retpoline +-- which ensures that speculation can never reach an unbalanced RET. +Then, ensure this CALL is retired before continuing execution with an +LFENCE. + +In other words, the window of exposure is opened at VM exit where RET +behavior is troublesome. While the window is open, force RSB predictions +sampling for RET targets to a dead end at the INT3. Close the window +with the LFENCE. + +There is a subset of eIBRS systems which are not vulnerable to PBRSB. +Add these systems to the cpu_vuln_whitelist[] as NO_EIBRS_PBRSB. +Future systems that aren't vulnerable will set ARCH_CAP_PBRSB_NO. + + [ bp: Massage, incorporate review comments from Andy Cooper. ] + [ Pawan: Update commit message to replace RSB_VMEXIT with RETPOLINE ] + +Signed-off-by: Daniel Sneddon +Co-developed-by: Pawan Gupta +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/hw-vuln/spectre.rst | 8 +++ + arch/x86/include/asm/cpufeatures.h | 2 + arch/x86/include/asm/msr-index.h | 4 + + arch/x86/include/asm/nospec-branch.h | 15 ++++++ + arch/x86/kernel/cpu/bugs.c | 61 +++++++++++++++++++++++++- + arch/x86/kernel/cpu/common.c | 12 ++++- + arch/x86/kvm/vmx/vmenter.S | 1 + tools/arch/x86/include/asm/cpufeatures.h | 1 + 8 files changed, 101 insertions(+), 3 deletions(-) + +--- a/Documentation/admin-guide/hw-vuln/spectre.rst ++++ b/Documentation/admin-guide/hw-vuln/spectre.rst +@@ -422,6 +422,14 @@ The possible values in this file are: + 'RSB filling' Protection of RSB on context switch enabled + ============= =========================================== + ++ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: ++ ++ =========================== ======================================================= ++ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled ++ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable ++ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB ++ =========================== ======================================================= ++ + Full mitigation might require a microcode update from the CPU + vendor. When the necessary microcode is not available, the kernel will + report vulnerability. +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -286,6 +286,7 @@ + #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ + #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ + #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM exit when EIBRS is enabled */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +@@ -406,5 +407,6 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ ++#define X86_BUG_EIBRS_PBRSB X86_BUG(26) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -129,6 +129,10 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_PBRSB_NO BIT(24) /* ++ * Not susceptible to Post-Barrier ++ * Return Stack Buffer Predictions. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -63,6 +63,13 @@ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + ++#define __ISSUE_UNBALANCED_RET_GUARD(sp) \ ++ call 881f; \ ++ int3; \ ++881: \ ++ add $(BITS_PER_LONG/8), sp; \ ++ lfence; ++ + #ifdef __ASSEMBLY__ + + /* +@@ -132,6 +139,14 @@ + #endif + .endm + ++.macro ISSUE_UNBALANCED_RET_GUARD ftr:req ++ ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE "jmp .Lskip_pbrsb_\@", \ ++ __stringify(__ISSUE_UNBALANCED_RET_GUARD(%_ASM_SP)) \ ++ \ftr ++.Lskip_pbrsb_\@: ++.endm ++ + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1043,6 +1043,49 @@ static enum spectre_v2_mitigation __init + return SPECTRE_V2_RETPOLINE; + } + ++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) ++{ ++ /* ++ * Similar to context switches, there are two types of RSB attacks ++ * after VM exit: ++ * ++ * 1) RSB underflow ++ * ++ * 2) Poisoned RSB entry ++ * ++ * When retpoline is enabled, both are mitigated by filling/clearing ++ * the RSB. ++ * ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch ++ * prediction isolation protections, RSB still needs to be cleared ++ * because of #2. Note that SMEP provides no protection here, unlike ++ * user-space-poisoned RSB entries. ++ * ++ * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB ++ * bug is present then a LITE version of RSB protection is required, ++ * just a single call needs to retire before a RET is executed. ++ */ ++ switch (mode) { ++ case SPECTRE_V2_NONE: ++ /* These modes already fill RSB at vmexit */ ++ case SPECTRE_V2_LFENCE: ++ case SPECTRE_V2_RETPOLINE: ++ case SPECTRE_V2_EIBRS_RETPOLINE: ++ return; ++ ++ case SPECTRE_V2_EIBRS_LFENCE: ++ case SPECTRE_V2_EIBRS: ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); ++ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); ++ } ++ return; ++ } ++ ++ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); ++ dump_stack(); ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1135,6 +1178,8 @@ static void __init spectre_v2_select_mit + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + ++ spectre_v2_determine_rsb_fill_type_at_vmexit(mode); ++ + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. Enhanced IBRS protects firmware too, so, enable restricted +@@ -1879,6 +1924,19 @@ static char *ibpb_state(void) + return ""; + } + ++static char *pbrsb_eibrs_state(void) ++{ ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { ++ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || ++ boot_cpu_has(X86_FEATURE_RETPOLINE)) ++ return ", PBRSB-eIBRS: SW sequence"; ++ else ++ return ", PBRSB-eIBRS: Vulnerable"; ++ } else { ++ return ", PBRSB-eIBRS: Not affected"; ++ } ++} ++ + static ssize_t spectre_v2_show_state(char *buf) + { + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) +@@ -1891,12 +1949,13 @@ static ssize_t spectre_v2_show_state(cha + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + +- return sprintf(buf, "%s%s%s%s%s%s\n", ++ return sprintf(buf, "%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", ++ pbrsb_eibrs_state(), + spectre_v2_module_string()); + } + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1025,6 +1025,7 @@ static void identify_cpu_without_cpuid(s + #define NO_SWAPGS BIT(6) + #define NO_ITLB_MULTIHIT BIT(7) + #define NO_SPECTRE_V2 BIT(8) ++#define NO_EIBRS_PBRSB BIT(9) + + #define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } +@@ -1065,7 +1066,7 @@ static const __initconst struct x86_cpu_ + + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1075,7 +1076,9 @@ static const __initconst struct x86_cpu_ + * good enough for our purposes. + */ + +- VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +@@ -1236,6 +1239,11 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + ++ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && ++ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && ++ !(ia32_cap & ARCH_CAP_PBRSB_NO)) ++ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -92,6 +92,7 @@ ENTRY(vmx_vmexit) + pop %_ASM_AX + .Lvmexit_skip_rsb: + #endif ++ ISSUE_UNBALANCED_RET_GUARD X86_FEATURE_RSB_VMEXIT_LITE + ret + ENDPROC(vmx_vmexit) + +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -284,6 +284,7 @@ + #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ + #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ + #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -- 2.47.3