From db05bf19c3841c2327a755afeee046a53ef23897 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Jul 2022 18:08:11 +0200 Subject: [PATCH] 5.15-stable patches added patches: bpf-x86-respect-x86_feature_retpoline.patch bpf-x86-simplify-computing-label-offsets.patch intel_idle-disable-ibrs-during-long-idle.patch kvm-vmx-convert-launched-argument-to-flags.patch kvm-vmx-fix-ibrs-handling-after-vmexit.patch kvm-vmx-flatten-__vmx_vcpu_run.patch kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch objtool-add-entry-unret-validation.patch objtool-classify-symbols.patch objtool-default-ignore-int3-for-unreachable.patch objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch objtool-introduce-cfi-hash.patch objtool-re-add-unwind_hint_-save_restore.patch objtool-shrink-struct-instruction.patch objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch objtool-treat-.text.__x86.-as-noinstr.patch objtool-update-retpoline-validation.patch objtool-x86-replace-alternatives-with-.retpoline_sites.patch x86-add-magic-amd-return-thunk.patch x86-alternative-add-debug-prints-to-apply_retpolines.patch x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch x86-alternative-implement-.retpoline_sites-support.patch x86-alternative-try-inline-spectre_v2-retpoline-amd.patch x86-asm-fix-register-order.patch x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch x86-bpf-use-alternative-ret-encoding.patch x86-bugs-add-amd-retbleed-boot-parameter.patch x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch x86-bugs-add-retbleed-ibpb.patch x86-bugs-do-ibpb-fallback-check-only-once.patch x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch x86-bugs-enable-stibp-for-jmp2ret.patch x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch x86-bugs-optimize-spec_ctrl-msr-writes.patch x86-bugs-report-amd-retbleed-vulnerability.patch x86-bugs-report-intel-retbleed-vulnerability.patch x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch x86-common-stamp-out-the-stepping-madness.patch x86-cpu-amd-add-spectral-chicken.patch x86-cpu-amd-enumerate-btc_no.patch x86-cpufeatures-move-retpoline-flags-to-word-11.patch x86-entry-add-kernel-ibrs-implementation.patch x86-entry-avoid-very-early-ret.patch x86-entry-don-t-call-error_entry-for-xenpv.patch x86-entry-move-push_and_clear_regs-back-into-error_entry.patch x86-entry-move-push_and_clear_regs-out-of-error_entry.patch x86-entry-remove-skip_r11rcx.patch x86-entry-switch-the-stack-after-error_entry-returns.patch x86-ftrace-use-alternative-ret-encoding.patch x86-kexec-disable-ret-on-kexec.patch x86-kvm-fix-setcc-emulation-for-return-thunks.patch x86-kvm-vmx-make-noinstr-clean.patch x86-objtool-create-.return_sites.patch x86-realmode-build-with-d__disable_exports.patch x86-retbleed-add-fine-grained-kconfig-knobs.patch x86-retpoline-cleanup-some-ifdefery.patch x86-retpoline-create-a-retpoline-thunk-array.patch x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch x86-retpoline-remove-unused-replacement-symbols.patch x86-retpoline-swizzle-retpoline-thunk.patch x86-retpoline-use-mfunction-return.patch x86-sev-avoid-using-__x86_return_thunk.patch x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch x86-speculation-disable-rrsba-behavior.patch x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch x86-speculation-remove-x86_spec_ctrl_mask.patch x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch x86-static_call-serialize-__static_call_fixup-properly.patch x86-static_call-use-alternative-ret-encoding.patch x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch x86-undo-return-thunk-damage.patch x86-use-return-thunk-in-asm-code.patch x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch x86-xen-add-untrain_ret.patch x86-xen-rename-sys-entry-points.patch --- ...pf-x86-respect-x86_feature_retpoline.patch | 247 ++++++++ ...x86-simplify-computing-label-offsets.patch | 256 ++++++++ ...l_idle-disable-ibrs-during-long-idle.patch | 182 ++++++ ...x-convert-launched-argument-to-flags.patch | 170 +++++ ...m-vmx-fix-ibrs-handling-after-vmexit.patch | 38 ++ .../kvm-vmx-flatten-__vmx_vcpu_run.patch | 196 ++++++ ...est-rsb-poisoning-attacks-with-eibrs.patch | 240 +++++++ .../objtool-add-entry-unret-validation.patch | 528 ++++++++++++++++ queue-5.15/objtool-classify-symbols.patch | 135 ++++ ...-default-ignore-int3-for-unreachable.patch | 56 ++ ...ifying-code-in-.altinstr_replacement.patch | 112 ++++ queue-5.15/objtool-introduce-cfi-hash.patch | 458 ++++++++++++++ ...ool-re-add-unwind_hint_-save_restore.patch | 184 ++++++ .../objtool-shrink-struct-instruction.patch | 66 ++ ...tions-when-adding-return-thunk-sites.patch | 36 ++ ...bjtool-treat-.text.__x86.-as-noinstr.patch | 36 ++ .../objtool-update-retpoline-validation.patch | 111 ++++ ...e-alternatives-with-.retpoline_sites.patch | 493 +++++++++++++++ queue-5.15/series | 78 +++ .../x86-add-magic-amd-return-thunk.patch | 361 +++++++++++ ...add-debug-prints-to-apply_retpolines.patch | 48 ++ ...handle-jcc-__x86_indirect_thunk_-reg.patch | 96 +++ ...e-implement-.retpoline_sites-support.patch | 284 +++++++++ ...-try-inline-spectre_v2-retpoline-amd.patch | 96 +++ queue-5.15/x86-asm-fix-register-order.patch | 58 ++ ...m-fixup-odd-gen-for-each-reg.h-usage.patch | 52 ++ ...x86-bpf-use-alternative-ret-encoding.patch | 63 ++ ...bugs-add-amd-retbleed-boot-parameter.patch | 207 ++++++ ...n-lake-to-retbleed-affected-cpu-list.patch | 30 + queue-5.15/x86-bugs-add-retbleed-ibpb.patch | 253 ++++++++ ...ugs-do-ibpb-fallback-check-only-once.patch | 48 ++ ...-on-entry-when-ibpb-is-not-supported.patch | 48 ++ .../x86-bugs-enable-stibp-for-jmp2ret.patch | 142 +++++ ...-keep-a-per-cpu-ia32_spec_ctrl-value.patch | 118 ++++ ...6-bugs-optimize-spec_ctrl-msr-writes.patch | 108 ++++ ...gs-report-amd-retbleed-vulnerability.patch | 169 +++++ ...-report-intel-retbleed-vulnerability.patch | 174 ++++++ ...nd-spectre_v2_user_select_mitigation.patch | 102 +++ ...ommon-stamp-out-the-stepping-madness.patch | 77 +++ .../x86-cpu-amd-add-spectral-chicken.patch | 107 ++++ queue-5.15/x86-cpu-amd-enumerate-btc_no.patch | 85 +++ ...ures-move-retpoline-flags-to-word-11.patch | 47 ++ ...entry-add-kernel-ibrs-implementation.patch | 353 +++++++++++ .../x86-entry-avoid-very-early-ret.patch | 63 ++ ...try-don-t-call-error_entry-for-xenpv.patch | 49 ++ ...and_clear_regs-back-into-error_entry.patch | 72 +++ ...sh_and_clear_regs-out-of-error_entry.patch | 78 +++ queue-5.15/x86-entry-remove-skip_r11rcx.patch | 68 ++ ...-the-stack-after-error_entry-returns.patch | 81 +++ ...-ftrace-use-alternative-ret-encoding.patch | 46 ++ .../x86-kexec-disable-ret-on-kexec.patch | 173 ++++++ ...ix-setcc-emulation-for-return-thunks.patch | 97 +++ .../x86-kvm-vmx-make-noinstr-clean.patch | 74 +++ .../x86-objtool-create-.return_sites.patch | 198 ++++++ ...almode-build-with-d__disable_exports.patch | 28 + ...bleed-add-fine-grained-kconfig-knobs.patch | 587 ++++++++++++++++++ .../x86-retpoline-cleanup-some-ifdefery.patch | 50 ++ ...oline-create-a-retpoline-thunk-array.patch | 104 ++++ ...hunk-declarations-to-nospec-branch.h.patch | 72 +++ ...ne-remove-unused-replacement-symbols.patch | 96 +++ ...86-retpoline-swizzle-retpoline-thunk.patch | 40 ++ .../x86-retpoline-use-mfunction-return.patch | 78 +++ ...6-sev-avoid-using-__x86_return_thunk.patch | 47 ++ ...2-ibrs-option-to-support-kernel-ibrs.patch | 208 +++++++ ...6-speculation-disable-rrsba-behavior.patch | 154 +++++ ...culation-fill-rsb-on-vmexit-for-ibrs.patch | 134 ++++ ...ix-firmware-entry-spec_ctrl-handling.patch | 45 ++ ...-rsb-filling-with-config_retpoline-n.patch | 77 +++ ...-spec_ctrl-write-on-smt-state-change.patch | 33 + ...peculation-remove-x86_spec_ctrl_mask.patch | 87 +++ ...spec_ctrl-value-for-guest-entry-exit.patch | 56 ++ ...rialize-__static_call_fixup-properly.patch | 73 +++ ...ic_call-use-alternative-ret-encoding.patch | 182 ++++++ ...e-pt_regs-directly-in-fixup_bad_iret.patch | 100 +++ queue-5.15/x86-undo-return-thunk-damage.patch | 195 ++++++ .../x86-use-return-thunk-in-asm-code.patch | 93 +++ ...-don-t-use-ret-in-vsyscall-emulation.patch | 47 ++ queue-5.15/x86-xen-add-untrain_ret.patch | 45 ++ .../x86-xen-rename-sys-entry-points.patch | 133 ++++ 79 files changed, 10511 insertions(+) create mode 100644 queue-5.15/bpf-x86-respect-x86_feature_retpoline.patch create mode 100644 queue-5.15/bpf-x86-simplify-computing-label-offsets.patch create mode 100644 queue-5.15/intel_idle-disable-ibrs-during-long-idle.patch create mode 100644 queue-5.15/kvm-vmx-convert-launched-argument-to-flags.patch create mode 100644 queue-5.15/kvm-vmx-fix-ibrs-handling-after-vmexit.patch create mode 100644 queue-5.15/kvm-vmx-flatten-__vmx_vcpu_run.patch create mode 100644 queue-5.15/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch create mode 100644 queue-5.15/objtool-add-entry-unret-validation.patch create mode 100644 queue-5.15/objtool-classify-symbols.patch create mode 100644 queue-5.15/objtool-default-ignore-int3-for-unreachable.patch create mode 100644 queue-5.15/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch create mode 100644 queue-5.15/objtool-introduce-cfi-hash.patch create mode 100644 queue-5.15/objtool-re-add-unwind_hint_-save_restore.patch create mode 100644 queue-5.15/objtool-shrink-struct-instruction.patch create mode 100644 queue-5.15/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch create mode 100644 queue-5.15/objtool-treat-.text.__x86.-as-noinstr.patch create mode 100644 queue-5.15/objtool-update-retpoline-validation.patch create mode 100644 queue-5.15/objtool-x86-replace-alternatives-with-.retpoline_sites.patch create mode 100644 queue-5.15/series create mode 100644 queue-5.15/x86-add-magic-amd-return-thunk.patch create mode 100644 queue-5.15/x86-alternative-add-debug-prints-to-apply_retpolines.patch create mode 100644 queue-5.15/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch create mode 100644 queue-5.15/x86-alternative-implement-.retpoline_sites-support.patch create mode 100644 queue-5.15/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch create mode 100644 queue-5.15/x86-asm-fix-register-order.patch create mode 100644 queue-5.15/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch create mode 100644 queue-5.15/x86-bpf-use-alternative-ret-encoding.patch create mode 100644 queue-5.15/x86-bugs-add-amd-retbleed-boot-parameter.patch create mode 100644 queue-5.15/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch create mode 100644 queue-5.15/x86-bugs-add-retbleed-ibpb.patch create mode 100644 queue-5.15/x86-bugs-do-ibpb-fallback-check-only-once.patch create mode 100644 queue-5.15/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch create mode 100644 queue-5.15/x86-bugs-enable-stibp-for-jmp2ret.patch create mode 100644 queue-5.15/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch create mode 100644 queue-5.15/x86-bugs-optimize-spec_ctrl-msr-writes.patch create mode 100644 queue-5.15/x86-bugs-report-amd-retbleed-vulnerability.patch create mode 100644 queue-5.15/x86-bugs-report-intel-retbleed-vulnerability.patch create mode 100644 queue-5.15/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch create mode 100644 queue-5.15/x86-common-stamp-out-the-stepping-madness.patch create mode 100644 queue-5.15/x86-cpu-amd-add-spectral-chicken.patch create mode 100644 queue-5.15/x86-cpu-amd-enumerate-btc_no.patch create mode 100644 queue-5.15/x86-cpufeatures-move-retpoline-flags-to-word-11.patch create mode 100644 queue-5.15/x86-entry-add-kernel-ibrs-implementation.patch create mode 100644 queue-5.15/x86-entry-avoid-very-early-ret.patch create mode 100644 queue-5.15/x86-entry-don-t-call-error_entry-for-xenpv.patch create mode 100644 queue-5.15/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch create mode 100644 queue-5.15/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch create mode 100644 queue-5.15/x86-entry-remove-skip_r11rcx.patch create mode 100644 queue-5.15/x86-entry-switch-the-stack-after-error_entry-returns.patch create mode 100644 queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch create mode 100644 queue-5.15/x86-kexec-disable-ret-on-kexec.patch create mode 100644 queue-5.15/x86-kvm-fix-setcc-emulation-for-return-thunks.patch create mode 100644 queue-5.15/x86-kvm-vmx-make-noinstr-clean.patch create mode 100644 queue-5.15/x86-objtool-create-.return_sites.patch create mode 100644 queue-5.15/x86-realmode-build-with-d__disable_exports.patch create mode 100644 queue-5.15/x86-retbleed-add-fine-grained-kconfig-knobs.patch create mode 100644 queue-5.15/x86-retpoline-cleanup-some-ifdefery.patch create mode 100644 queue-5.15/x86-retpoline-create-a-retpoline-thunk-array.patch create mode 100644 queue-5.15/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch create mode 100644 queue-5.15/x86-retpoline-remove-unused-replacement-symbols.patch create mode 100644 queue-5.15/x86-retpoline-swizzle-retpoline-thunk.patch create mode 100644 queue-5.15/x86-retpoline-use-mfunction-return.patch create mode 100644 queue-5.15/x86-sev-avoid-using-__x86_return_thunk.patch create mode 100644 queue-5.15/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch create mode 100644 queue-5.15/x86-speculation-disable-rrsba-behavior.patch create mode 100644 queue-5.15/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch create mode 100644 queue-5.15/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch create mode 100644 queue-5.15/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch create mode 100644 queue-5.15/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch create mode 100644 queue-5.15/x86-speculation-remove-x86_spec_ctrl_mask.patch create mode 100644 queue-5.15/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch create mode 100644 queue-5.15/x86-static_call-serialize-__static_call_fixup-properly.patch create mode 100644 queue-5.15/x86-static_call-use-alternative-ret-encoding.patch create mode 100644 queue-5.15/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch create mode 100644 queue-5.15/x86-undo-return-thunk-damage.patch create mode 100644 queue-5.15/x86-use-return-thunk-in-asm-code.patch create mode 100644 queue-5.15/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch create mode 100644 queue-5.15/x86-xen-add-untrain_ret.patch create mode 100644 queue-5.15/x86-xen-rename-sys-entry-points.patch diff --git a/queue-5.15/bpf-x86-respect-x86_feature_retpoline.patch b/queue-5.15/bpf-x86-respect-x86_feature_retpoline.patch new file mode 100644 index 00000000000..6e7394b9179 --- /dev/null +++ b/queue-5.15/bpf-x86-respect-x86_feature_retpoline.patch @@ -0,0 +1,247 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:48 +0200 +Subject: bpf,x86: Respect X86_FEATURE_RETPOLINE* + +From: Peter Zijlstra + +commit 87c87ecd00c54ecd677798cb49ef27329e0fab41 upstream. + +Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and +unconditionally emits a thunk call, this is sub-optimal and doesn't +match the regular, compiler generated, code. + +Update the i386 JIT to emit code equal to what the compiler emits for +the regular kernel text (IOW. a plain THUNK call). + +Update the x86_64 JIT to emit code similar to the result of compiler +and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags. +Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg), +while doing a THUNK call for RETPOLINE. + +This removes the hard-coded retpoline thunks and shrinks the generated +code. Leaving a single retpoline thunk definition in the kernel. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Alexei Starovoitov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.614772675@infradead.org +[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 59 ----------------------------------- + arch/x86/net/bpf_jit_comp.c | 46 +++++++++++++-------------- + arch/x86/net/bpf_jit_comp32.c | 22 +++++++++++-- + 3 files changed, 41 insertions(+), 86 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -320,63 +320,4 @@ static inline void mds_idle_clear_cpu_bu + + #endif /* __ASSEMBLY__ */ + +-/* +- * Below is used in the eBPF JIT compiler and emits the byte sequence +- * for the following assembly: +- * +- * With retpolines configured: +- * +- * callq do_rop +- * spec_trap: +- * pause +- * lfence +- * jmp spec_trap +- * do_rop: +- * mov %rcx,(%rsp) for x86_64 +- * mov %edx,(%esp) for x86_32 +- * retq +- * +- * Without retpolines configured: +- * +- * jmp *%rcx for x86_64 +- * jmp *%edx for x86_32 +- */ +-#ifdef CONFIG_RETPOLINE +-# ifdef CONFIG_X86_64 +-# define RETPOLINE_RCX_BPF_JIT_SIZE 17 +-# define RETPOLINE_RCX_BPF_JIT() \ +-do { \ +- EMIT1_off32(0xE8, 7); /* callq do_rop */ \ +- /* spec_trap: */ \ +- EMIT2(0xF3, 0x90); /* pause */ \ +- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ +- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ +- /* do_rop: */ \ +- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \ +- EMIT1(0xC3); /* retq */ \ +-} while (0) +-# else /* !CONFIG_X86_64 */ +-# define RETPOLINE_EDX_BPF_JIT() \ +-do { \ +- EMIT1_off32(0xE8, 7); /* call do_rop */ \ +- /* spec_trap: */ \ +- EMIT2(0xF3, 0x90); /* pause */ \ +- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ +- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ +- /* do_rop: */ \ +- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ +- EMIT1(0xC3); /* ret */ \ +-} while (0) +-# endif +-#else /* !CONFIG_RETPOLINE */ +-# ifdef CONFIG_X86_64 +-# define RETPOLINE_RCX_BPF_JIT_SIZE 2 +-# define RETPOLINE_RCX_BPF_JIT() \ +- EMIT2(0xFF, 0xE1); /* jmp *%rcx */ +-# else /* !CONFIG_X86_64 */ +-# define RETPOLINE_EDX_BPF_JIT() \ +- EMIT2(0xFF, 0xE2) /* jmp *%edx */ +-# endif +-#endif +- + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -387,6 +387,25 @@ int bpf_arch_text_poke(void *ip, enum bp + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); + } + ++#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) ++ ++static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ ++#ifdef CONFIG_RETPOLINE ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { ++ EMIT_LFENCE(); ++ EMIT2(0xFF, 0xE0 + reg); ++ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { ++ emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); ++ } else ++#endif ++ EMIT2(0xFF, 0xE0 + reg); ++ ++ *pprog = prog; ++} ++ + /* + * Generate the following code: + * +@@ -468,7 +487,7 @@ static void emit_bpf_tail_call_indirect( + * rdi == ctx (1st arg) + * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET + */ +- RETPOLINE_RCX_BPF_JIT(); ++ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); + + /* out: */ + ctx->tail_call_indirect_label = prog - start; +@@ -1185,8 +1204,7 @@ static int do_jit(struct bpf_prog *bpf_p + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) +- /* Emit 'lfence' */ +- EMIT3(0x0F, 0xAE, 0xE8); ++ EMIT_LFENCE(); + break; + + /* ST: *(u8*)(dst_reg + off) = imm */ +@@ -2122,24 +2140,6 @@ cleanup: + return ret; + } + +-static int emit_fallback_jump(u8 **pprog) +-{ +- u8 *prog = *pprog; +- int err = 0; +- +-#ifdef CONFIG_RETPOLINE +- /* Note that this assumes the the compiler uses external +- * thunks for indirect calls. Both clang and GCC use the same +- * naming convention for external thunks. +- */ +- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); +-#else +- EMIT2(0xFF, 0xE2); /* jmp rdx */ +-#endif +- *pprog = prog; +- return err; +-} +- + static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) + { + u8 *jg_reloc, *prog = *pprog; +@@ -2161,9 +2161,7 @@ static int emit_bpf_dispatcher(u8 **ppro + if (err) + return err; + +- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ +- if (err) +- return err; ++ emit_indirect_jump(&prog, 2 /* rdx */, prog); + + *pprog = prog; + return 0; +--- a/arch/x86/net/bpf_jit_comp32.c ++++ b/arch/x86/net/bpf_jit_comp32.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + + /* +@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u3 + *pprog = prog; + } + ++static int emit_jmp_edx(u8 **pprog, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ int cnt = 0; ++ ++#ifdef CONFIG_RETPOLINE ++ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5)); ++#else ++ EMIT2(0xFF, 0xE2); ++#endif ++ *pprog = prog; ++ ++ return cnt; ++} ++ + /* + * Generate the following code: + * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... +@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u3 + * goto *(prog->bpf_func + prologue_size); + * out: + */ +-static void emit_bpf_tail_call(u8 **pprog) ++static void emit_bpf_tail_call(u8 **pprog, u8 *ip) + { + u8 *prog = *pprog; + int cnt = 0; +@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **ppro + * eax == ctx (1st arg) + * edx == prog->bpf_func + prologue_size + */ +- RETPOLINE_EDX_BPF_JIT(); ++ cnt += emit_jmp_edx(&prog, ip + cnt); + + if (jmp_label1 == -1) + jmp_label1 = cnt; +@@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_p + break; + } + case BPF_JMP | BPF_TAIL_CALL: +- emit_bpf_tail_call(&prog); ++ emit_bpf_tail_call(&prog, image + addrs[i - 1]); + break; + + /* cond jump */ diff --git a/queue-5.15/bpf-x86-simplify-computing-label-offsets.patch b/queue-5.15/bpf-x86-simplify-computing-label-offsets.patch new file mode 100644 index 00000000000..0b346659c37 --- /dev/null +++ b/queue-5.15/bpf-x86-simplify-computing-label-offsets.patch @@ -0,0 +1,256 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:47 +0200 +Subject: bpf,x86: Simplify computing label offsets + +From: Peter Zijlstra + +commit dceba0817ca329868a15e2e1dd46eb6340b69206 upstream. + +Take an idea from the 32bit JIT, which uses the multi-pass nature of +the JIT to compute the instruction offsets on a prior pass in order to +compute the relative jump offsets on a later pass. + +Application to the x86_64 JIT is slightly more involved because the +offsets depend on program variables (such as callee_regs_used and +stack_depth) and hence the computed offsets need to be kept in the +context of the JIT. + +This removes, IMO quite fragile, code that hard-codes the offsets and +tries to compute the length of variable parts of it. + +Convert both emit_bpf_tail_call_*() functions which have an out: label +at the end. Additionally emit_bpt_tail_call_direct() also has a poke +table entry, for which it computes the offset from the end (and thus +already relies on the previous pass to have computed addrs[i]), also +convert this to be a forward based offset. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Alexei Starovoitov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.552304864@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 123 +++++++++++++++----------------------------- + 1 file changed, 42 insertions(+), 81 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -224,6 +224,14 @@ static void jit_fill_hole(void *area, un + + struct jit_context { + int cleanup_addr; /* Epilogue code offset */ ++ ++ /* ++ * Program specific offsets of labels in the code; these rely on the ++ * JIT doing at least 2 passes, recording the position on the first ++ * pass, only to generate the correct offset on the second pass. ++ */ ++ int tail_call_direct_label; ++ int tail_call_indirect_label; + }; + + /* Maximum number of bytes emitted while JITing one eBPF insn */ +@@ -379,22 +387,6 @@ int bpf_arch_text_poke(void *ip, enum bp + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); + } + +-static int get_pop_bytes(bool *callee_regs_used) +-{ +- int bytes = 0; +- +- if (callee_regs_used[3]) +- bytes += 2; +- if (callee_regs_used[2]) +- bytes += 2; +- if (callee_regs_used[1]) +- bytes += 2; +- if (callee_regs_used[0]) +- bytes += 1; +- +- return bytes; +-} +- + /* + * Generate the following code: + * +@@ -410,29 +402,12 @@ static int get_pop_bytes(bool *callee_re + * out: + */ + static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, +- u32 stack_depth) ++ u32 stack_depth, u8 *ip, ++ struct jit_context *ctx) + { + int tcc_off = -4 - round_up(stack_depth, 8); +- u8 *prog = *pprog; +- int pop_bytes = 0; +- int off1 = 42; +- int off2 = 31; +- int off3 = 9; +- +- /* count the additional bytes used for popping callee regs from stack +- * that need to be taken into account for each of the offsets that +- * are used for bailing out of the tail call +- */ +- pop_bytes = get_pop_bytes(callee_regs_used); +- off1 += pop_bytes; +- off2 += pop_bytes; +- off3 += pop_bytes; +- +- if (stack_depth) { +- off1 += 7; +- off2 += 7; +- off3 += 7; +- } ++ u8 *prog = *pprog, *start = *pprog; ++ int offset; + + /* + * rdi - pointer to ctx +@@ -447,8 +422,9 @@ static void emit_bpf_tail_call_indirect( + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ + offsetof(struct bpf_array, map.max_entries)); +-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */ +- EMIT2(X86_JBE, OFFSET1); /* jbe out */ ++ ++ offset = ctx->tail_call_indirect_label - (prog + 2 - start); ++ EMIT2(X86_JBE, offset); /* jbe out */ + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) +@@ -456,8 +432,9 @@ static void emit_bpf_tail_call_indirect( + */ + EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE) +- EMIT2(X86_JA, OFFSET2); /* ja out */ ++ ++ offset = ctx->tail_call_indirect_label - (prog + 2 - start); ++ EMIT2(X86_JA, offset); /* ja out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ + +@@ -470,12 +447,11 @@ static void emit_bpf_tail_call_indirect( + * goto out; + */ + EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ +-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE) +- EMIT2(X86_JE, OFFSET3); /* je out */ + +- *pprog = prog; +- pop_callee_regs(pprog, callee_regs_used); +- prog = *pprog; ++ offset = ctx->tail_call_indirect_label - (prog + 2 - start); ++ EMIT2(X86_JE, offset); /* je out */ ++ ++ pop_callee_regs(&prog, callee_regs_used); + + EMIT1(0x58); /* pop rax */ + if (stack_depth) +@@ -495,38 +471,18 @@ static void emit_bpf_tail_call_indirect( + RETPOLINE_RCX_BPF_JIT(); + + /* out: */ ++ ctx->tail_call_indirect_label = prog - start; + *pprog = prog; + } + + static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, +- u8 **pprog, int addr, u8 *image, +- bool *callee_regs_used, u32 stack_depth) ++ u8 **pprog, u8 *ip, ++ bool *callee_regs_used, u32 stack_depth, ++ struct jit_context *ctx) + { + int tcc_off = -4 - round_up(stack_depth, 8); +- u8 *prog = *pprog; +- int pop_bytes = 0; +- int off1 = 20; +- int poke_off; +- +- /* count the additional bytes used for popping callee regs to stack +- * that need to be taken into account for jump offset that is used for +- * bailing out from of the tail call when limit is reached +- */ +- pop_bytes = get_pop_bytes(callee_regs_used); +- off1 += pop_bytes; +- +- /* +- * total bytes for: +- * - nop5/ jmpq $off +- * - pop callee regs +- * - sub rsp, $val if depth > 0 +- * - pop rax +- */ +- poke_off = X86_PATCH_SIZE + pop_bytes + 1; +- if (stack_depth) { +- poke_off += 7; +- off1 += 7; +- } ++ u8 *prog = *pprog, *start = *pprog; ++ int offset; + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) +@@ -534,28 +490,30 @@ static void emit_bpf_tail_call_direct(st + */ + EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +- EMIT2(X86_JA, off1); /* ja out */ ++ ++ offset = ctx->tail_call_direct_label - (prog + 2 - start); ++ EMIT2(X86_JA, offset); /* ja out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ + +- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); ++ poke->tailcall_bypass = ip + (prog - start); + poke->adj_off = X86_TAIL_CALL_OFFSET; +- poke->tailcall_target = image + (addr - X86_PATCH_SIZE); ++ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; + poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; + + emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, + poke->tailcall_bypass); + +- *pprog = prog; +- pop_callee_regs(pprog, callee_regs_used); +- prog = *pprog; ++ pop_callee_regs(&prog, callee_regs_used); + EMIT1(0x58); /* pop rax */ + if (stack_depth) + EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); + + memcpy(prog, x86_nops[5], X86_PATCH_SIZE); + prog += X86_PATCH_SIZE; ++ + /* out: */ ++ ctx->tail_call_direct_label = prog - start; + + *pprog = prog; + } +@@ -1453,13 +1411,16 @@ st: if (is_imm8(insn->off)) + case BPF_JMP | BPF_TAIL_CALL: + if (imm32) + emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], +- &prog, addrs[i], image, ++ &prog, image + addrs[i - 1], + callee_regs_used, +- bpf_prog->aux->stack_depth); ++ bpf_prog->aux->stack_depth, ++ ctx); + else + emit_bpf_tail_call_indirect(&prog, + callee_regs_used, +- bpf_prog->aux->stack_depth); ++ bpf_prog->aux->stack_depth, ++ image + addrs[i - 1], ++ ctx); + break; + + /* cond jump */ diff --git a/queue-5.15/intel_idle-disable-ibrs-during-long-idle.patch b/queue-5.15/intel_idle-disable-ibrs-during-long-idle.patch new file mode 100644 index 00000000000..bd277b08b8c --- /dev/null +++ b/queue-5.15/intel_idle-disable-ibrs-during-long-idle.patch @@ -0,0 +1,182 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:58 +0200 +Subject: intel_idle: Disable IBRS during long idle + +From: Peter Zijlstra + +commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream. + +Having IBRS enabled while the SMT sibling is idle unnecessarily slows +down the running sibling. OTOH, disabling IBRS around idle takes two +MSR writes, which will increase the idle latency. + +Therefore, only disable IBRS around deeper idle states. Shallow idle +states are bounded by the tick in duration, since NOHZ is not allowed +for them by virtue of their short target residency. + +Only do this for mwait-driven idle, since that keeps interrupts disabled +across idle, which makes disabling IBRS vs IRQ-entry a non-issue. + +Note: C6 is a random threshold, most importantly C1 probably shouldn't +disable IBRS, benchmarking needed. + +Suggested-by: Tim Chen +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: no CPUIDLE_FLAG_IRQ_ENABLE] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 6 ++++ + drivers/idle/intel_idle.c | 43 ++++++++++++++++++++++++++++++----- + 3 files changed, 44 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -256,6 +256,7 @@ static inline void indirect_branch_predi + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; + extern void write_spec_ctrl_current(u64 val, bool force); ++extern u64 spec_ctrl_current(void); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -79,6 +79,12 @@ void write_spec_ctrl_current(u64 val, bo + wrmsrl(MSR_IA32_SPEC_CTRL, val); + } + ++u64 spec_ctrl_current(void) ++{ ++ return this_cpu_read(x86_spec_ctrl_current); ++} ++EXPORT_SYMBOL_GPL(spec_ctrl_current); ++ + /* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -47,11 +47,13 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + #include ++#include + #include + #include + +@@ -94,6 +96,12 @@ static unsigned int mwait_substates __in + #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) + + /* ++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE ++ * above. ++ */ ++#define CPUIDLE_FLAG_IBRS BIT(16) ++ ++/* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. +@@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct c + return index; + } + ++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, ++ struct cpuidle_driver *drv, int index) ++{ ++ bool smt_active = sched_smt_active(); ++ u64 spec_ctrl = spec_ctrl_current(); ++ int ret; ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ ++ ret = intel_idle(dev, drv, index); ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); ++ ++ return ret; ++} ++ + /** + * intel_idle_s2idle - Ask the processor to enter the given idle state. + * @dev: cpuidle device of the target CPU. +@@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 85, + .target_residency = 200, + .enter = &intel_idle, +@@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C7s", + .desc = "MWAIT 0x33", +- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 124, + .target_residency = 800, + .enter = &intel_idle, +@@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C8", + .desc = "MWAIT 0x40", +- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 200, + .target_residency = 800, + .enter = &intel_idle, +@@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C9", + .desc = "MWAIT 0x50", +- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, +@@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C10", + .desc = "MWAIT 0x60", +- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 890, + .target_residency = 5000, + .enter = &intel_idle, +@@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[ + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 133, + .target_residency = 600, + .enter = &intel_idle, +@@ -1574,6 +1600,11 @@ static void __init intel_idle_init_cstat + /* Structure copy. */ + drv->states[drv->state_count] = cpuidle_state_table[cstate]; + ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && ++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { ++ drv->states[drv->state_count].enter = intel_idle_ibrs; ++ } ++ + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && diff --git a/queue-5.15/kvm-vmx-convert-launched-argument-to-flags.patch b/queue-5.15/kvm-vmx-convert-launched-argument-to-flags.patch new file mode 100644 index 00000000000..89506e263f5 --- /dev/null +++ b/queue-5.15/kvm-vmx-convert-launched-argument-to-flags.patch @@ -0,0 +1,170 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:12 +0200 +Subject: KVM: VMX: Convert launched argument to flags + +From: Josh Poimboeuf + +commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream. + +Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in +preparation for doing SPEC_CTRL handling immediately after vmexit, which +will need another flag. + +This is much easier than adding a fourth argument, because this code +supports both 32-bit and 64-bit, and the fourth argument on 32-bit would +have to be pushed on the stack. + +Note that __vmx_vcpu_run_flags() is called outside of the noinstr +critical section because it will soon start calling potentially +traceable functions. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 2 +- + arch/x86/kvm/vmx/run_flags.h | 7 +++++++ + arch/x86/kvm/vmx/vmenter.S | 9 +++++---- + arch/x86/kvm/vmx/vmx.c | 17 ++++++++++++++--- + arch/x86/kvm/vmx/vmx.h | 5 ++++- + 5 files changed, 31 insertions(+), 9 deletions(-) + create mode 100644 arch/x86/kvm/vmx/run_flags.h + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3077,7 +3077,7 @@ static int nested_vmx_check_vmentry_hw(s + } + + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ __vmx_vcpu_run_flags(vmx)); + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); +--- /dev/null ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __KVM_X86_VMX_RUN_FLAGS_H ++#define __KVM_X86_VMX_RUN_FLAGS_H ++ ++#define VMX_RUN_VMRESUME (1 << 0) ++ ++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include "run_flags.h" + + #define WORD_SIZE (BITS_PER_LONG / 8) + +@@ -34,7 +35,7 @@ + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @regs: unsigned long * (to guest registers) +- * @launched: %true if the VMCS has been launched ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + */ + push %_ASM_ARG2 + +- /* Copy @launched to BL, _ASM_ARG3 is volatile. */ ++ /* Copy @flags to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + + lea (%_ASM_SP), %_ASM_ARG2 +@@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- testb %bl, %bl ++ testb $VMX_RUN_VMRESUME, %bl + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX +@@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + + /* Check EFLAGS.ZF from 'testb' above */ +- je .Lvmlaunch ++ jz .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -835,6 +835,16 @@ static bool msr_write_intercepted(struct + MSR_IA32_SPEC_CTRL); + } + ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) ++{ ++ unsigned int flags = 0; ++ ++ if (vmx->loaded_vmcs->launched) ++ flags |= VMX_RUN_VMRESUME; ++ ++ return flags; ++} ++ + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) + { +@@ -6667,7 +6677,8 @@ static fastpath_t vmx_exit_handlers_fast + } + + static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, +- struct vcpu_vmx *vmx) ++ struct vcpu_vmx *vmx, ++ unsigned long flags) + { + kvm_guest_enter_irqoff(); + +@@ -6686,7 +6697,7 @@ static noinstr void vmx_vcpu_enter_exit( + native_write_cr2(vcpu->arch.cr2); + + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ flags); + + vcpu->arch.cr2 = native_read_cr2(); + +@@ -6786,7 +6797,7 @@ static fastpath_t vmx_vcpu_run(struct kv + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ +- vmx_vcpu_enter_exit(vcpu, vmx); ++ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + + /* + * We do not use IBRS in the kernel. If this vCPU has used the +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -13,6 +13,7 @@ + #include "vmcs.h" + #include "vmx_ops.h" + #include "cpuid.h" ++#include "run_flags.h" + + #define MSR_TYPE_R 1 + #define MSR_TYPE_W 2 +@@ -382,7 +383,9 @@ void vmx_set_virtual_apic_mode(struct kv + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); ++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, ++ unsigned int flags); + int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); + diff --git a/queue-5.15/kvm-vmx-fix-ibrs-handling-after-vmexit.patch b/queue-5.15/kvm-vmx-fix-ibrs-handling-after-vmexit.patch new file mode 100644 index 00000000000..68a94f4e233 --- /dev/null +++ b/queue-5.15/kvm-vmx-fix-ibrs-handling-after-vmexit.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:14 +0200 +Subject: KVM: VMX: Fix IBRS handling after vmexit + +From: Josh Poimboeuf + +commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream. + +For legacy IBRS to work, the IBRS bit needs to be always re-written +after vmexit, even if it's already on. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6685,8 +6685,13 @@ void noinstr vmx_spec_ctrl_restore_host( + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. ++ * ++ * For legacy IBRS, the IBRS bit always needs to be written after ++ * transitioning from a less privileged predictor mode, regardless of ++ * whether the guest/host values differ. + */ +- if (vmx->spec_ctrl != hostval) ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || ++ vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); diff --git a/queue-5.15/kvm-vmx-flatten-__vmx_vcpu_run.patch b/queue-5.15/kvm-vmx-flatten-__vmx_vcpu_run.patch new file mode 100644 index 00000000000..5d72a48171b --- /dev/null +++ b/queue-5.15/kvm-vmx-flatten-__vmx_vcpu_run.patch @@ -0,0 +1,196 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:11 +0200 +Subject: KVM: VMX: Flatten __vmx_vcpu_run() + +From: Josh Poimboeuf + +commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream. + +Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run(). This +will make it easier to do the spec_ctrl handling before the first RET. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +[cascardo: remove ENDBR] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmenter.S | 118 +++++++++++++++++---------------------------- + 1 file changed, 45 insertions(+), 73 deletions(-) + +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -31,68 +31,6 @@ + .section .noinstr.text, "ax" + + /** +- * vmx_vmenter - VM-Enter the current loaded VMCS +- * +- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME +- * +- * Returns: +- * %RFLAGS.CF is set on VM-Fail Invalid +- * %RFLAGS.ZF is set on VM-Fail Valid +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * Note that VMRESUME/VMLAUNCH fall-through and return directly if +- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump +- * to vmx_vmexit. +- */ +-SYM_FUNC_START_LOCAL(vmx_vmenter) +- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ +- je 2f +- +-1: vmresume +- RET +- +-2: vmlaunch +- RET +- +-3: cmpb $0, kvm_rebooting +- je 4f +- RET +-4: ud2 +- +- _ASM_EXTABLE(1b, 3b) +- _ASM_EXTABLE(2b, 3b) +- +-SYM_FUNC_END(vmx_vmenter) +- +-/** +- * vmx_vmexit - Handle a VMX VM-Exit +- * +- * Returns: +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump +- * here after hardware loads the host's state, i.e. this is the destination +- * referred to by VMCS.HOST_RIP. +- */ +-SYM_FUNC_START(vmx_vmexit) +-#ifdef CONFIG_RETPOLINE +- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE +- /* Preserve guest's RAX, it's used to stuff the RSB. */ +- push %_ASM_AX +- +- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ +- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ +- or $1, %_ASM_AX +- +- pop %_ASM_AX +-.Lvmexit_skip_rsb: +-#endif +- RET +-SYM_FUNC_END(vmx_vmexit) +- +-/** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @regs: unsigned long * (to guest registers) +@@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + +- /* Adjust RSP to account for the CALL to vmx_vmenter(). */ +- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 ++ lea (%_ASM_SP), %_ASM_ARG2 + call vmx_update_host_rsp + + /* Load @regs to RAX. */ +@@ -154,11 +91,36 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + +- /* Enter guest mode */ +- call vmx_vmenter ++ /* Check EFLAGS.ZF from 'testb' above */ ++ je .Lvmlaunch + +- /* Jump on VM-Fail. */ +- jbe 2f ++ /* ++ * After a successful VMRESUME/VMLAUNCH, control flow "magically" ++ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. ++ * So this isn't a typical function and objtool needs to be told to ++ * save the unwind state here and restore it below. ++ */ ++ UNWIND_HINT_SAVE ++ ++/* ++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at ++ * the 'vmx_vmexit' label below. ++ */ ++.Lvmresume: ++ vmresume ++ jmp .Lvmfail ++ ++.Lvmlaunch: ++ vmlaunch ++ jmp .Lvmfail ++ ++ _ASM_EXTABLE(.Lvmresume, .Lfixup) ++ _ASM_EXTABLE(.Lvmlaunch, .Lfixup) ++ ++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ++ ++ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ ++ UNWIND_HINT_RESTORE + + /* Temporarily save guest's RAX. */ + push %_ASM_AX +@@ -185,9 +147,13 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + ++ /* IMPORTANT: RSB must be stuffed before the first return. */ ++ FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ + xor %eax, %eax + ++.Lclear_regs: + /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded +@@ -197,7 +163,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + * free. RSP and RAX are exempt as RSP is restored by hardware during + * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + */ +-1: xor %ecx, %ecx ++ xor %ecx, %ecx + xor %edx, %edx + xor %ebx, %ebx + xor %ebp, %ebp +@@ -216,8 +182,8 @@ SYM_FUNC_START(__vmx_vcpu_run) + + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP +- pop %_ASM_BX + ++ pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 + pop %r13 +@@ -230,9 +196,15 @@ SYM_FUNC_START(__vmx_vcpu_run) + pop %_ASM_BP + RET + +- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ +-2: mov $1, %eax +- jmp 1b ++.Lfixup: ++ cmpb $0, kvm_rebooting ++ jne .Lvmfail ++ ud2 ++.Lvmfail: ++ /* VM-Fail: set return value to 1 */ ++ mov $1, %eax ++ jmp .Lclear_regs ++ + SYM_FUNC_END(__vmx_vcpu_run) + + diff --git a/queue-5.15/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch b/queue-5.15/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch new file mode 100644 index 00000000000..298f7542587 --- /dev/null +++ b/queue-5.15/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch @@ -0,0 +1,240 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:13 +0200 +Subject: KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS + +From: Josh Poimboeuf + +commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream. + +On eIBRS systems, the returns in the vmexit return path from +__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks. + +Fix that by moving the post-vmexit spec_ctrl handling to immediately +after the vmexit. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 4 ++ + arch/x86/kvm/vmx/run_flags.h | 1 + arch/x86/kvm/vmx/vmenter.S | 49 +++++++++++++++++++++++++++-------- + arch/x86/kvm/vmx/vmx.c | 48 ++++++++++++++++++++-------------- + arch/x86/kvm/vmx/vmx.h | 1 + 6 files changed, 73 insertions(+), 31 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -275,6 +275,7 @@ static inline void indirect_branch_predi + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern u64 x86_spec_ctrl_current; + extern void write_spec_ctrl_current(u64 val, bool force); + extern u64 spec_ctrl_current(void); + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -195,6 +195,10 @@ void __init check_bugs(void) + #endif + } + ++/* ++ * NOTE: For VMX, this function is not called in the vmexit path. ++ * It uses vmx_spec_ctrl_restore_host() instead. ++ */ + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +--- a/arch/x86/kvm/vmx/run_flags.h ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -3,5 +3,6 @@ + #define __KVM_X86_VMX_RUN_FLAGS_H + + #define VMX_RUN_VMRESUME (1 << 0) ++#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + + #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -33,9 +33,10 @@ + + /** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode +- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) ++ * @vmx: struct vcpu_vmx * + * @regs: unsigned long * (to guest registers) +- * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run) + #endif + push %_ASM_BX + ++ /* Save @vmx for SPEC_CTRL handling */ ++ push %_ASM_ARG1 ++ ++ /* Save @flags for SPEC_CTRL handling */ ++ push %_ASM_ARG3 ++ + /* + * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and + * @regs is needed after VM-Exit to save the guest's register values. +@@ -148,25 +155,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + mov %r15, VCPU_R15(%_ASM_AX) + #endif + +- /* IMPORTANT: RSB must be stuffed before the first return. */ +- FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ +- xor %eax, %eax ++ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ ++ xor %ebx, %ebx + + .Lclear_regs: + /* +- * Clear all general purpose registers except RSP and RAX to prevent ++ * Clear all general purpose registers except RSP and RBX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers + * could lead to speculative execution with the guest's values. + * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially + * free. RSP and RAX are exempt as RSP is restored by hardware during +- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. ++ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return ++ * value. + */ ++ xor %eax, %eax + xor %ecx, %ecx + xor %edx, %edx +- xor %ebx, %ebx + xor %ebp, %ebp + xor %esi, %esi + xor %edi, %edi +@@ -184,6 +189,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP + ++ /* ++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before ++ * the first unbalanced RET after vmexit! ++ * ++ * For retpoline, RSB filling is needed to prevent poisoned RSB entries ++ * and (in some cases) RSB underflow. ++ * ++ * eIBRS has its own protection against poisoned RSB, so it doesn't ++ * need the RSB filling sequence. But it does need to be enabled ++ * before the first unbalanced RET. ++ */ ++ ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ ++ pop %_ASM_ARG2 /* @flags */ ++ pop %_ASM_ARG1 /* @vmx */ ++ ++ call vmx_spec_ctrl_restore_host ++ ++ /* Put return value in AX */ ++ mov %_ASM_BX, %_ASM_AX ++ + pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 +@@ -203,7 +230,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + ud2 + .Lvmfail: + /* VM-Fail: set return value to 1 */ +- mov $1, %eax ++ mov $1, %_ASM_BX + jmp .Lclear_regs + + SYM_FUNC_END(__vmx_vcpu_run) +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -842,6 +842,14 @@ unsigned int __vmx_vcpu_run_flags(struct + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + ++ /* ++ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free ++ * to change it directly without causing a vmexit. In that case read ++ * it after vmexit and store it in vmx->spec_ctrl. ++ */ ++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) ++ flags |= VMX_RUN_SAVE_SPEC_CTRL; ++ + return flags; + } + +@@ -6664,6 +6672,26 @@ void noinstr vmx_update_host_rsp(struct + } + } + ++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, ++ unsigned int flags) ++{ ++ u64 hostval = this_cpu_read(x86_spec_ctrl_current); ++ ++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) ++ return; ++ ++ if (flags & VMX_RUN_SAVE_SPEC_CTRL) ++ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); ++ ++ /* ++ * If the guest/host SPEC_CTRL values differ, restore the host value. ++ */ ++ if (vmx->spec_ctrl != hostval) ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); ++ ++ barrier_nospec(); ++} ++ + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { + switch (to_vmx(vcpu)->exit_reason.basic) { +@@ -6799,26 +6827,6 @@ static fastpath_t vmx_vcpu_run(struct kv + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + +- /* +- * We do not use IBRS in the kernel. If this vCPU has used the +- * SPEC_CTRL MSR it may have left it on; save the value and +- * turn it off. This is much more efficient than blindly adding +- * it to the atomic save/restore list. Especially as the former +- * (Saving guest MSRs on vmexit) doesn't even exist in KVM. +- * +- * For non-nested case: +- * If the L01 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- * +- * For nested case: +- * If the L02 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- */ +- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) +- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); +- +- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); +- + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) { + current_evmcs->hv_clean_fields |= +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -383,6 +383,7 @@ void vmx_set_virtual_apic_mode(struct kv + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); ++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); + unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); + bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); diff --git a/queue-5.15/objtool-add-entry-unret-validation.patch b/queue-5.15/objtool-add-entry-unret-validation.patch new file mode 100644 index 00000000000..ed8dc41bc71 --- /dev/null +++ b/queue-5.15/objtool-add-entry-unret-validation.patch @@ -0,0 +1,528 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:03 +0200 +Subject: objtool: Add entry UNRET validation + +From: Peter Zijlstra + +commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream. + +Since entry asm is tricky, add a validation pass that ensures the +retbleed mitigation has been done before the first actual RET +instruction. + +Entry points are those that either have UNWIND_HINT_ENTRY, which acts +as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or +those that have UWIND_HINT_IRET_REGS at +0. + +This is basically a variant of validate_branch() that is +intra-function and it will simply follow all branches from marked +entry points and ensures that all paths lead to ANNOTATE_UNRET_END. + +If a path hits RET or an indirection the path is a fail and will be +reported. + +There are 3 ANNOTATE_UNRET_END instances: + + - UNTRAIN_RET itself + - exception from-kernel; this path doesn't need UNTRAIN_RET + - all early exceptions; these also don't need UNTRAIN_RET + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: tools/objtool/builtin-check.c no link option validation] +[cascardo: tools/objtool/check.c opts.ibt is ibt] +[cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts] +[cascardo: objtool is still called from scripts/link-vmlinux.sh] +[cascardo: no IBT support] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 3 + arch/x86/entry/entry_64_compat.S | 6 - + arch/x86/include/asm/nospec-branch.h | 12 ++ + arch/x86/include/asm/unwind_hints.h | 4 + arch/x86/kernel/head_64.S | 5 + arch/x86/xen/xen-asm.S | 10 - + include/linux/objtool.h | 3 + scripts/link-vmlinux.sh | 3 + tools/include/linux/objtool.h | 3 + tools/objtool/builtin-check.c | 3 + tools/objtool/check.c | 172 +++++++++++++++++++++++++++++++- + tools/objtool/include/objtool/builtin.h | 2 + tools/objtool/include/objtool/check.h | 6 + + 13 files changed, 217 insertions(+), 15 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -85,7 +85,7 @@ + */ + + SYM_CODE_START(entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + + swapgs + /* tss.sp2 is scratch space. */ +@@ -1067,6 +1067,7 @@ SYM_CODE_START_LOCAL(error_entry) + .Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ ++ ANNOTATE_UNRET_END + RET + + .Lbstep_iret: +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -49,7 +49,7 @@ + * 0(%ebp) arg6 + */ + SYM_CODE_START(entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* Interrupts are off on entry. */ + SWAPGS + +@@ -202,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat) + * 0(%esp) arg6 + */ + SYM_CODE_START(entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* Interrupts are off on entry. */ + swapgs + +@@ -349,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat) + * ebp arg6 + */ + SYM_CODE_START(entry_INT80_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* + * Interrupts are off on entry. + */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -82,6 +82,17 @@ + #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + + /* ++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should ++ * eventually turn into it's own annotation. ++ */ ++.macro ANNOTATE_UNRET_END ++#ifdef CONFIG_DEBUG_ENTRY ++ ANNOTATE_RETPOLINE_SAFE ++ nop ++#endif ++.endm ++ ++/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. +@@ -131,6 +142,7 @@ + */ + .macro UNTRAIN_RET + #ifdef CONFIG_RETPOLINE ++ ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + "call zen_untrain_ret", X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -11,6 +11,10 @@ + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + .endm + ++.macro UNWIND_HINT_ENTRY ++ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 ++.endm ++ + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 + .if \base == %rsp + .if \indirect +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -312,6 +312,8 @@ SYM_CODE_END(start_cpu0) + SYM_CODE_START_NOALIGN(vc_boot_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +@@ -369,6 +371,7 @@ SYM_CODE_START(early_idt_handler_array) + SYM_CODE_END(early_idt_handler_array) + + SYM_CODE_START_LOCAL(early_idt_handler_common) ++ ANNOTATE_UNRET_END + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. +@@ -415,6 +418,8 @@ SYM_CODE_END(early_idt_handler_common) + SYM_CODE_START_NOALIGN(vc_no_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -120,7 +120,7 @@ SYM_FUNC_END(xen_read_cr2_direct); + + .macro xen_pv_trap name + SYM_CODE_START(xen_\name) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + pop %rcx + pop %r11 + jmp \name +@@ -228,7 +228,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu + + /* Normal 64-bit system call target */ + SYM_CODE_START(xen_entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + popq %rcx + popq %r11 + +@@ -247,7 +247,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64) + + /* 32-bit compat syscall target */ + SYM_CODE_START(xen_entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + popq %rcx + popq %r11 + +@@ -264,7 +264,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ + SYM_CODE_START(xen_entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. +@@ -287,7 +287,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat) + + SYM_CODE_START(xen_entry_SYSCALL_compat) + SYM_CODE_START(xen_entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -32,11 +32,14 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 + + #ifdef CONFIG_STACK_VALIDATION + +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -120,6 +120,9 @@ objtool_link() + + if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then + objtoolopt="${objtoolopt} --noinstr" ++ if is_enabled CONFIG_RETPOLINE; then ++ objtoolopt="${objtoolopt} --unret" ++ fi + fi + + if [ -n "${objtoolopt}" ]; then +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -32,11 +32,14 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 + + #ifdef CONFIG_STACK_VALIDATION + +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -20,7 +20,7 @@ + #include + + bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, mcount, noinstr, backup, sls; ++ validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; + + static const char * const check_usage[] = { + "objtool check [] file.o", +@@ -36,6 +36,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), + OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), ++ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), + OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), + OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1847,6 +1847,19 @@ static int read_unwind_hints(struct objt + + insn->hint = true; + ++ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { ++ struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); ++ ++ if (sym && sym->bind == STB_GLOBAL) { ++ insn->entry = 1; ++ } ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_ENTRY) { ++ hint->type = UNWIND_HINT_TYPE_CALL; ++ insn->entry = 1; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + insn->cfi = &func_cfi; + continue; +@@ -1895,8 +1908,9 @@ static int read_retpoline_hints(struct o + + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && +- insn->type != INSN_RETURN) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", ++ insn->type != INSN_RETURN && ++ insn->type != INSN_NOP) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", + insn->sec, insn->offset); + return -1; + } +@@ -2996,8 +3010,8 @@ static int validate_branch(struct objtoo + return 1; + } + +- visited = 1 << state.uaccess; +- if (insn->visited) { ++ visited = VISITED_BRANCH << state.uaccess; ++ if (insn->visited & VISITED_BRANCH_MASK) { + if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) + return 1; + +@@ -3223,6 +3237,145 @@ static int validate_unwind_hints(struct + return warnings; + } + ++/* ++ * Validate rethunk entry constraint: must untrain RET before the first RET. ++ * ++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes ++ * before an actual RET instruction. ++ */ ++static int validate_entry(struct objtool_file *file, struct instruction *insn) ++{ ++ struct instruction *next, *dest; ++ int ret, warnings = 0; ++ ++ for (;;) { ++ next = next_insn_to_validate(file, insn); ++ ++ if (insn->visited & VISITED_ENTRY) ++ return 0; ++ ++ insn->visited |= VISITED_ENTRY; ++ ++ if (!insn->ignore_alts && !list_empty(&insn->alts)) { ++ struct alternative *alt; ++ bool skip_orig = false; ++ ++ list_for_each_entry(alt, &insn->alts, list) { ++ if (alt->skip_orig) ++ skip_orig = true; ++ ++ ret = validate_entry(file, alt->insn); ++ if (ret) { ++ if (backtrace) ++ BT_FUNC("(alt)", insn); ++ return ret; ++ } ++ } ++ ++ if (skip_orig) ++ return 0; ++ } ++ ++ switch (insn->type) { ++ ++ case INSN_CALL_DYNAMIC: ++ case INSN_JUMP_DYNAMIC: ++ case INSN_JUMP_DYNAMIC_CONDITIONAL: ++ WARN_FUNC("early indirect call", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_JUMP_UNCONDITIONAL: ++ case INSN_JUMP_CONDITIONAL: ++ if (!is_sibling_call(insn)) { ++ if (!insn->jump_dest) { ++ WARN_FUNC("unresolved jump target after linking?!?", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ret = validate_entry(file, insn->jump_dest); ++ if (ret) { ++ if (backtrace) { ++ BT_FUNC("(branch%s)", insn, ++ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); ++ } ++ return ret; ++ } ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) ++ return 0; ++ ++ break; ++ } ++ ++ /* fallthrough */ ++ case INSN_CALL: ++ dest = find_insn(file, insn->call_dest->sec, ++ insn->call_dest->offset); ++ if (!dest) { ++ WARN("Unresolved function after linking!?: %s", ++ insn->call_dest->name); ++ return -1; ++ } ++ ++ ret = validate_entry(file, dest); ++ if (ret) { ++ if (backtrace) ++ BT_FUNC("(call)", insn); ++ return ret; ++ } ++ /* ++ * If a call returns without error, it must have seen UNTRAIN_RET. ++ * Therefore any non-error return is a success. ++ */ ++ return 0; ++ ++ case INSN_RETURN: ++ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_NOP: ++ if (insn->retpoline_safe) ++ return 0; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!next) { ++ WARN_FUNC("teh end!", insn->sec, insn->offset); ++ return -1; ++ } ++ insn = next; ++ } ++ ++ return warnings; ++} ++ ++/* ++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END ++ * before RET. ++ */ ++static int validate_unret(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int ret, warnings = 0; ++ ++ for_each_insn(file, insn) { ++ if (!insn->entry) ++ continue; ++ ++ ret = validate_entry(file, insn); ++ if (ret < 0) { ++ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); ++ return ret; ++ } ++ warnings += ret; ++ } ++ ++ return warnings; ++} ++ + static int validate_retpoline(struct objtool_file *file) + { + struct instruction *insn; +@@ -3490,6 +3643,17 @@ int check(struct objtool_file *file) + goto out; + warnings += ret; + ++ if (unret) { ++ /* ++ * Must be after validate_branch() and friends, it plays ++ * further games with insn->visited. ++ */ ++ ret = validate_unret(file); ++ if (ret < 0) ++ return ret; ++ warnings += ret; ++ } ++ + if (!warnings) { + ret = validate_reachable_instructions(file); + if (ret < 0) +--- a/tools/objtool/include/objtool/builtin.h ++++ b/tools/objtool/include/objtool/builtin.h +@@ -9,7 +9,7 @@ + + extern const struct option check_options[]; + extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, mcount, noinstr, backup, sls; ++ validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; + + extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); + +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -48,6 +48,7 @@ struct instruction { + bool dead_end, ignore, ignore_alts; + bool hint; + bool retpoline_safe; ++ bool entry; + s8 instr; + u8 visited; + struct alt_group *alt_group; +@@ -62,6 +63,11 @@ struct instruction { + struct cfi_state *cfi; + }; + ++#define VISITED_BRANCH 0x01 ++#define VISITED_BRANCH_UACCESS 0x02 ++#define VISITED_BRANCH_MASK 0x03 ++#define VISITED_ENTRY 0x04 ++ + static inline bool is_static_jump(struct instruction *insn) + { + return insn->type == INSN_JUMP_CONDITIONAL || diff --git a/queue-5.15/objtool-classify-symbols.patch b/queue-5.15/objtool-classify-symbols.patch new file mode 100644 index 00000000000..19d6de201e0 --- /dev/null +++ b/queue-5.15/objtool-classify-symbols.patch @@ -0,0 +1,135 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:33 +0200 +Subject: objtool: Classify symbols + +From: Peter Zijlstra + +commit 1739c66eb7bd5f27f1b69a5a26e10e8327d1e136 upstream. + +In order to avoid calling str*cmp() on symbol names, over and over, do +them all once upfront and store the result. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.658539311@infradead.org +[cascardo: no pv_target on struct symbol, because of missing + db2b0c5d7b6f19b3c2cab08c531b65342eb5252b] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 34 ++++++++++++++++++++++------------ + tools/objtool/include/objtool/elf.h | 7 +++++-- + 2 files changed, 27 insertions(+), 14 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -859,8 +859,7 @@ static void add_call_dest(struct objtool + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ +- if (insn->sec->noinstr && +- !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { ++ if (insn->sec->noinstr && insn->call_dest->kcov) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); +@@ -884,7 +883,7 @@ static void add_call_dest(struct objtool + } + } + +- if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) { ++ if (mcount && insn->call_dest->fentry) { + if (sibling) + WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); + +@@ -934,7 +933,7 @@ static int add_jump_destinations(struct + } else if (reloc->sym->type == STT_SECTION) { + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); +- } else if (arch_is_retpoline(reloc->sym)) { ++ } else if (reloc->sym->retpoline_thunk) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. +@@ -1075,7 +1074,7 @@ static int add_call_destinations(struct + + add_call_dest(file, insn, dest, false); + +- } else if (arch_is_retpoline(reloc->sym)) { ++ } else if (reloc->sym->retpoline_thunk) { + /* + * Retpoline calls are really dynamic calls in + * disguise, so convert them accordingly. +@@ -1759,17 +1758,28 @@ static int read_intra_function_calls(str + return 0; + } + +-static int read_static_call_tramps(struct objtool_file *file) ++static int classify_symbols(struct objtool_file *file) + { + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->bind == STB_GLOBAL && +- !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, ++ if (func->bind != STB_GLOBAL) ++ continue; ++ ++ if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; ++ ++ if (arch_is_retpoline(func)) ++ func->retpoline_thunk = true; ++ ++ if (!strcmp(func->name, "__fentry__")) ++ func->fentry = true; ++ ++ if (!strncmp(func->name, "__sanitizer_cov_", 16)) ++ func->kcov = true; + } + } + +@@ -1831,7 +1841,7 @@ static int decode_sections(struct objtoo + /* + * Must be before add_{jump_call}_destination. + */ +- ret = read_static_call_tramps(file); ++ ret = classify_symbols(file); + if (ret) + return ret; + +@@ -1889,9 +1899,9 @@ static int decode_sections(struct objtoo + + static bool is_fentry_call(struct instruction *insn) + { +- if (insn->type == INSN_CALL && insn->call_dest && +- insn->call_dest->type == STT_NOTYPE && +- !strcmp(insn->call_dest->name, "__fentry__")) ++ if (insn->type == INSN_CALL && ++ insn->call_dest && ++ insn->call_dest->fentry) + return true; + + return false; +--- a/tools/objtool/include/objtool/elf.h ++++ b/tools/objtool/include/objtool/elf.h +@@ -54,8 +54,11 @@ struct symbol { + unsigned long offset; + unsigned int len; + struct symbol *pfunc, *cfunc, *alias; +- bool uaccess_safe; +- bool static_call_tramp; ++ u8 uaccess_safe : 1; ++ u8 static_call_tramp : 1; ++ u8 retpoline_thunk : 1; ++ u8 fentry : 1; ++ u8 kcov : 1; + }; + + struct reloc { diff --git a/queue-5.15/objtool-default-ignore-int3-for-unreachable.patch b/queue-5.15/objtool-default-ignore-int3-for-unreachable.patch new file mode 100644 index 00000000000..628b9ed8d6e --- /dev/null +++ b/queue-5.15/objtool-default-ignore-int3-for-unreachable.patch @@ -0,0 +1,56 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 8 Mar 2022 16:30:14 +0100 +Subject: objtool: Default ignore INT3 for unreachable + +From: Peter Zijlstra + +commit 1ffbe4e935f9b7308615c75be990aec07464d1e7 upstream. + +Ignore all INT3 instructions for unreachable code warnings, similar to NOP. +This allows using INT3 for various paddings instead of NOPs. + +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/20220308154317.343312938@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -2965,9 +2965,8 @@ static int validate_branch(struct objtoo + switch (insn->type) { + + case INSN_RETURN: +- if (next_insn && next_insn->type == INSN_TRAP) { +- next_insn->ignore = true; +- } else if (sls && !insn->retpoline_safe) { ++ if (sls && !insn->retpoline_safe && ++ next_insn && next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + } +@@ -3014,9 +3013,8 @@ static int validate_branch(struct objtoo + break; + + case INSN_JUMP_DYNAMIC: +- if (next_insn && next_insn->type == INSN_TRAP) { +- next_insn->ignore = true; +- } else if (sls && !insn->retpoline_safe) { ++ if (sls && !insn->retpoline_safe && ++ next_insn && next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + } +@@ -3187,7 +3185,7 @@ static bool ignore_unreachable_insn(stru + int i; + struct instruction *prev_insn; + +- if (insn->ignore || insn->type == INSN_NOP) ++ if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) + return true; + + /* diff --git a/queue-5.15/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch b/queue-5.15/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch new file mode 100644 index 00000000000..e4e91466614 --- /dev/null +++ b/queue-5.15/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch @@ -0,0 +1,112 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:34 +0200 +Subject: objtool: Explicitly avoid self modifying code in .altinstr_replacement + +From: Peter Zijlstra + +commit dd003edeffa3cb87bc9862582004f405d77d7670 upstream. + +Assume ALTERNATIVE()s know what they're doing and do not change, or +cause to change, instructions in .altinstr_replacement sections. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.722511775@infradead.org +[cascardo: context adjustment] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 43 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -840,18 +840,27 @@ static void remove_insn_ops(struct instr + } + } + +-static void add_call_dest(struct objtool_file *file, struct instruction *insn, +- struct symbol *dest, bool sibling) ++static void annotate_call_site(struct objtool_file *file, ++ struct instruction *insn, bool sibling) + { + struct reloc *reloc = insn_reloc(file, insn); ++ struct symbol *sym = insn->call_dest; + +- insn->call_dest = dest; +- if (!dest) ++ if (!sym) ++ sym = reloc->sym; ++ ++ /* ++ * Alternative replacement code is just template code which is ++ * sometimes copied to the original instruction. For now, don't ++ * annotate it. (In the future we might consider annotating the ++ * original instruction if/when it ever makes sense to do so.) ++ */ ++ if (!strcmp(insn->sec->name, ".altinstr_replacement")) + return; + +- if (insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->call_node, +- &file->static_call_list); ++ if (sym->static_call_tramp) { ++ list_add_tail(&insn->call_node, &file->static_call_list); ++ return; + } + + /* +@@ -859,7 +868,7 @@ static void add_call_dest(struct objtool + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ +- if (insn->sec->noinstr && insn->call_dest->kcov) { ++ if (insn->sec->noinstr && sym->kcov) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); +@@ -881,9 +890,11 @@ static void add_call_dest(struct objtool + */ + insn->retpoline_safe = true; + } ++ ++ return; + } + +- if (mcount && insn->call_dest->fentry) { ++ if (mcount && sym->fentry) { + if (sibling) + WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); + +@@ -898,9 +909,17 @@ static void add_call_dest(struct objtool + + insn->type = INSN_NOP; + +- list_add_tail(&insn->mcount_loc_node, +- &file->mcount_loc_list); ++ list_add_tail(&insn->mcount_loc_node, &file->mcount_loc_list); ++ return; + } ++} ++ ++static void add_call_dest(struct objtool_file *file, struct instruction *insn, ++ struct symbol *dest, bool sibling) ++{ ++ insn->call_dest = dest; ++ if (!dest) ++ return; + + /* + * Whatever stack impact regular CALLs have, should be undone +@@ -910,6 +929,8 @@ static void add_call_dest(struct objtool + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); ++ ++ annotate_call_site(file, insn, sibling); + } + + /* diff --git a/queue-5.15/objtool-introduce-cfi-hash.patch b/queue-5.15/objtool-introduce-cfi-hash.patch new file mode 100644 index 00000000000..2ba54e766e0 --- /dev/null +++ b/queue-5.15/objtool-introduce-cfi-hash.patch @@ -0,0 +1,458 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Thu, 24 Jun 2021 11:41:01 +0200 +Subject: objtool: Introduce CFI hash + +From: Peter Zijlstra + +commit 8b946cc38e063f0f7bb67789478c38f6d7d457c9 upstream. + +Andi reported that objtool on vmlinux.o consumes more memory than his +system has, leading to horrific performance. + +This is in part because we keep a struct instruction for every +instruction in the file in-memory. Shrink struct instruction by +removing the CFI state (which includes full register state) from it +and demand allocating it. + +Given most instructions don't actually change CFI state, there's lots +of repetition there, so add a hash table to find previous CFI +instances. + +Reduces memory consumption (and runtime) for processing an +x86_64-allyesconfig: + + pre: 4:40.84 real, 143.99 user, 44.18 sys, 30624988 mem + post: 2:14.61 real, 108.58 user, 25.04 sys, 16396184 mem + +Suggested-by: Andi Kleen +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20210624095147.756759107@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch/x86/decode.c | 20 +--- + tools/objtool/check.c | 154 ++++++++++++++++++++++++++++++---- + tools/objtool/include/objtool/arch.h | 2 + tools/objtool/include/objtool/cfi.h | 2 + tools/objtool/include/objtool/check.h | 2 + tools/objtool/orc_gen.c | 15 ++- + 6 files changed, 160 insertions(+), 35 deletions(-) + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -684,34 +684,32 @@ const char *arch_ret_insn(int len) + return ret[len-1]; + } + +-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) ++int arch_decode_hint_reg(u8 sp_reg, int *base) + { +- struct cfi_reg *cfa = &insn->cfi.cfa; +- + switch (sp_reg) { + case ORC_REG_UNDEFINED: +- cfa->base = CFI_UNDEFINED; ++ *base = CFI_UNDEFINED; + break; + case ORC_REG_SP: +- cfa->base = CFI_SP; ++ *base = CFI_SP; + break; + case ORC_REG_BP: +- cfa->base = CFI_BP; ++ *base = CFI_BP; + break; + case ORC_REG_SP_INDIRECT: +- cfa->base = CFI_SP_INDIRECT; ++ *base = CFI_SP_INDIRECT; + break; + case ORC_REG_R10: +- cfa->base = CFI_R10; ++ *base = CFI_R10; + break; + case ORC_REG_R13: +- cfa->base = CFI_R13; ++ *base = CFI_R13; + break; + case ORC_REG_DI: +- cfa->base = CFI_DI; ++ *base = CFI_DI; + break; + case ORC_REG_DX: +- cfa->base = CFI_DX; ++ *base = CFI_DX; + break; + default: + return -1; +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -27,7 +28,11 @@ struct alternative { + bool skip_orig; + }; + +-struct cfi_init_state initial_func_cfi; ++static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; ++ ++static struct cfi_init_state initial_func_cfi; ++static struct cfi_state init_cfi; ++static struct cfi_state func_cfi; + + struct instruction *find_insn(struct objtool_file *file, + struct section *sec, unsigned long offset) +@@ -267,6 +272,78 @@ static void init_insn_state(struct insn_ + state->noinstr = sec->noinstr; + } + ++static struct cfi_state *cfi_alloc(void) ++{ ++ struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); ++ if (!cfi) { ++ WARN("calloc failed"); ++ exit(1); ++ } ++ nr_cfi++; ++ return cfi; ++} ++ ++static int cfi_bits; ++static struct hlist_head *cfi_hash; ++ ++static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2) ++{ ++ return memcmp((void *)cfi1 + sizeof(cfi1->hash), ++ (void *)cfi2 + sizeof(cfi2->hash), ++ sizeof(struct cfi_state) - sizeof(struct hlist_node)); ++} ++ ++static inline u32 cfi_key(struct cfi_state *cfi) ++{ ++ return jhash((void *)cfi + sizeof(cfi->hash), ++ sizeof(*cfi) - sizeof(cfi->hash), 0); ++} ++ ++static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi) ++{ ++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; ++ struct cfi_state *obj; ++ ++ hlist_for_each_entry(obj, head, hash) { ++ if (!cficmp(cfi, obj)) { ++ nr_cfi_cache++; ++ return obj; ++ } ++ } ++ ++ obj = cfi_alloc(); ++ *obj = *cfi; ++ hlist_add_head(&obj->hash, head); ++ ++ return obj; ++} ++ ++static void cfi_hash_add(struct cfi_state *cfi) ++{ ++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; ++ ++ hlist_add_head(&cfi->hash, head); ++} ++ ++static void *cfi_hash_alloc(unsigned long size) ++{ ++ cfi_bits = max(10, ilog2(size)); ++ cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits, ++ PROT_READ|PROT_WRITE, ++ MAP_PRIVATE|MAP_ANON, -1, 0); ++ if (cfi_hash == (void *)-1L) { ++ WARN("mmap fail cfi_hash"); ++ cfi_hash = NULL; ++ } else if (stats) { ++ printf("cfi_bits: %d\n", cfi_bits); ++ } ++ ++ return cfi_hash; ++} ++ ++static unsigned long nr_insns; ++static unsigned long nr_insns_visited; ++ + /* + * Call the arch-specific instruction decoder for all the instructions and add + * them to the global instruction list. +@@ -277,7 +354,6 @@ static int decode_instructions(struct ob + struct symbol *func; + unsigned long offset; + struct instruction *insn; +- unsigned long nr_insns = 0; + int ret; + + for_each_sec(file, sec) { +@@ -303,7 +379,6 @@ static int decode_instructions(struct ob + memset(insn, 0, sizeof(*insn)); + INIT_LIST_HEAD(&insn->alts); + INIT_LIST_HEAD(&insn->stack_ops); +- init_cfi_state(&insn->cfi); + + insn->sec = sec; + insn->offset = offset; +@@ -1239,7 +1314,6 @@ static int handle_group_alt(struct objto + memset(nop, 0, sizeof(*nop)); + INIT_LIST_HEAD(&nop->alts); + INIT_LIST_HEAD(&nop->stack_ops); +- init_cfi_state(&nop->cfi); + + nop->sec = special_alt->new_sec; + nop->offset = special_alt->new_off + special_alt->new_len; +@@ -1648,10 +1722,11 @@ static void set_func_state(struct cfi_st + + static int read_unwind_hints(struct objtool_file *file) + { ++ struct cfi_state cfi = init_cfi; + struct section *sec, *relocsec; +- struct reloc *reloc; + struct unwind_hint *hint; + struct instruction *insn; ++ struct reloc *reloc; + int i; + + sec = find_section_by_name(file->elf, ".discard.unwind_hints"); +@@ -1689,19 +1764,24 @@ static int read_unwind_hints(struct objt + insn->hint = true; + + if (hint->type == UNWIND_HINT_TYPE_FUNC) { +- set_func_state(&insn->cfi); ++ insn->cfi = &func_cfi; + continue; + } + +- if (arch_decode_hint_reg(insn, hint->sp_reg)) { ++ if (insn->cfi) ++ cfi = *(insn->cfi); ++ ++ if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { + WARN_FUNC("unsupported unwind_hint sp base reg %d", + insn->sec, insn->offset, hint->sp_reg); + return -1; + } + +- insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset); +- insn->cfi.type = hint->type; +- insn->cfi.end = hint->end; ++ cfi.cfa.offset = bswap_if_needed(hint->sp_offset); ++ cfi.type = hint->type; ++ cfi.end = hint->end; ++ ++ insn->cfi = cfi_hash_find_or_add(&cfi); + } + + return 0; +@@ -2552,13 +2632,18 @@ static int propagate_alt_cfi(struct objt + if (!insn->alt_group) + return 0; + ++ if (!insn->cfi) { ++ WARN("CFI missing"); ++ return -1; ++ } ++ + alt_cfi = insn->alt_group->cfi; + group_off = insn->offset - insn->alt_group->first_insn->offset; + + if (!alt_cfi[group_off]) { +- alt_cfi[group_off] = &insn->cfi; ++ alt_cfi[group_off] = insn->cfi; + } else { +- if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { ++ if (cficmp(alt_cfi[group_off], insn->cfi)) { + WARN_FUNC("stack layout conflict in alternatives", + insn->sec, insn->offset); + return -1; +@@ -2609,9 +2694,14 @@ static int handle_insn_ops(struct instru + + static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) + { +- struct cfi_state *cfi1 = &insn->cfi; ++ struct cfi_state *cfi1 = insn->cfi; + int i; + ++ if (!cfi1) { ++ WARN("CFI missing"); ++ return false; ++ } ++ + if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { + + WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", +@@ -2796,7 +2886,7 @@ static int validate_branch(struct objtoo + struct instruction *insn, struct insn_state state) + { + struct alternative *alt; +- struct instruction *next_insn; ++ struct instruction *next_insn, *prev_insn = NULL; + struct section *sec; + u8 visited; + int ret; +@@ -2825,15 +2915,25 @@ static int validate_branch(struct objtoo + + if (insn->visited & visited) + return 0; ++ } else { ++ nr_insns_visited++; + } + + if (state.noinstr) + state.instr += insn->instr; + +- if (insn->hint) +- state.cfi = insn->cfi; +- else +- insn->cfi = state.cfi; ++ if (insn->hint) { ++ state.cfi = *insn->cfi; ++ } else { ++ /* XXX track if we actually changed state.cfi */ ++ ++ if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) { ++ insn->cfi = prev_insn->cfi; ++ nr_cfi_reused++; ++ } else { ++ insn->cfi = cfi_hash_find_or_add(&state.cfi); ++ } ++ } + + insn->visited |= visited; + +@@ -2997,6 +3097,7 @@ static int validate_branch(struct objtoo + return 1; + } + ++ prev_insn = insn; + insn = next_insn; + } + +@@ -3252,10 +3353,20 @@ int check(struct objtool_file *file) + int ret, warnings = 0; + + arch_initial_func_cfi_state(&initial_func_cfi); ++ init_cfi_state(&init_cfi); ++ init_cfi_state(&func_cfi); ++ set_func_state(&func_cfi); ++ ++ if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) ++ goto out; ++ ++ cfi_hash_add(&init_cfi); ++ cfi_hash_add(&func_cfi); + + ret = decode_sections(file); + if (ret < 0) + goto out; ++ + warnings += ret; + + if (list_empty(&file->insn_list)) +@@ -3313,6 +3424,13 @@ int check(struct objtool_file *file) + warnings += ret; + } + ++ if (stats) { ++ printf("nr_insns_visited: %ld\n", nr_insns_visited); ++ printf("nr_cfi: %ld\n", nr_cfi); ++ printf("nr_cfi_reused: %ld\n", nr_cfi_reused); ++ printf("nr_cfi_cache: %ld\n", nr_cfi_cache); ++ } ++ + out: + /* + * For now, don't fail the kernel build on fatal warnings. These +--- a/tools/objtool/include/objtool/arch.h ++++ b/tools/objtool/include/objtool/arch.h +@@ -85,7 +85,7 @@ unsigned long arch_dest_reloc_offset(int + const char *arch_nop_insn(int len); + const char *arch_ret_insn(int len); + +-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); ++int arch_decode_hint_reg(u8 sp_reg, int *base); + + bool arch_is_retpoline(struct symbol *sym); + +--- a/tools/objtool/include/objtool/cfi.h ++++ b/tools/objtool/include/objtool/cfi.h +@@ -7,6 +7,7 @@ + #define _OBJTOOL_CFI_H + + #include ++#include + + #define CFI_UNDEFINED -1 + #define CFI_CFA -2 +@@ -24,6 +25,7 @@ struct cfi_init_state { + }; + + struct cfi_state { ++ struct hlist_node hash; /* must be first, cficmp() */ + struct cfi_reg regs[CFI_NUM_REGS]; + struct cfi_reg vals[CFI_NUM_REGS]; + struct cfi_reg cfa; +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -59,7 +59,7 @@ struct instruction { + struct list_head alts; + struct symbol *func; + struct list_head stack_ops; +- struct cfi_state cfi; ++ struct cfi_state *cfi; + }; + + static inline bool is_static_jump(struct instruction *insn) +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -13,13 +13,19 @@ + #include + #include + +-static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) ++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, ++ struct instruction *insn) + { +- struct instruction *insn = container_of(cfi, struct instruction, cfi); + struct cfi_reg *bp = &cfi->regs[CFI_BP]; + + memset(orc, 0, sizeof(*orc)); + ++ if (!cfi) { ++ orc->end = 0; ++ orc->sp_reg = ORC_REG_UNDEFINED; ++ return 0; ++ } ++ + orc->end = cfi->end; + + if (cfi->cfa.base == CFI_UNDEFINED) { +@@ -162,7 +168,7 @@ int orc_create(struct objtool_file *file + int i; + + if (!alt_group) { +- if (init_orc_entry(&orc, &insn->cfi)) ++ if (init_orc_entry(&orc, insn->cfi, insn)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; +@@ -186,7 +192,8 @@ int orc_create(struct objtool_file *file + struct cfi_state *cfi = alt_group->cfi[i]; + if (!cfi) + continue; +- if (init_orc_entry(&orc, cfi)) ++ /* errors are reported on the original insn */ ++ if (init_orc_entry(&orc, cfi, insn)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; diff --git a/queue-5.15/objtool-re-add-unwind_hint_-save_restore.patch b/queue-5.15/objtool-re-add-unwind_hint_-save_restore.patch new file mode 100644 index 00000000000..aea4ecf7994 --- /dev/null +++ b/queue-5.15/objtool-re-add-unwind_hint_-save_restore.patch @@ -0,0 +1,184 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Fri, 24 Jun 2022 12:52:40 +0200 +Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} + +From: Josh Poimboeuf + +commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream. + +Commit + + c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") + +removed the save/restore unwind hints because they were no longer +needed. Now they're going to be needed again so re-add them. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/unwind_hints.h | 12 ++++++++-- + include/linux/objtool.h | 6 +++-- + tools/include/linux/objtool.h | 6 +++-- + tools/objtool/check.c | 40 ++++++++++++++++++++++++++++++++++ + tools/objtool/include/objtool/check.h | 1 + 5 files changed, 59 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -8,11 +8,11 @@ + #ifdef __ASSEMBLY__ + + .macro UNWIND_HINT_EMPTY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 + .endm + + .macro UNWIND_HINT_ENTRY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 + .endm + + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 +@@ -56,6 +56,14 @@ + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC + .endm + ++.macro UNWIND_HINT_SAVE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE ++.endm ++ ++.macro UNWIND_HINT_RESTORE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE ++.endm ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_UNWIND_HINTS_H */ +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -40,6 +40,8 @@ struct unwind_hint { + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 + #define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -102,7 +104,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -132,7 +134,7 @@ struct unwind_hint { + #define STACK_FRAME_NON_STANDARD(func) + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -40,6 +40,8 @@ struct unwind_hint { + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 + #define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -102,7 +104,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -132,7 +134,7 @@ struct unwind_hint { + #define STACK_FRAME_NON_STANDARD(func) + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1847,6 +1847,17 @@ static int read_unwind_hints(struct objt + + insn->hint = true; + ++ if (hint->type == UNWIND_HINT_TYPE_SAVE) { ++ insn->hint = false; ++ insn->save = true; ++ continue; ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_RESTORE) { ++ insn->restore = true; ++ continue; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + +@@ -3025,6 +3036,35 @@ static int validate_branch(struct objtoo + state.instr += insn->instr; + + if (insn->hint) { ++ if (insn->restore) { ++ struct instruction *save_insn, *i; ++ ++ i = insn; ++ save_insn = NULL; ++ ++ sym_for_each_insn_continue_reverse(file, func, i) { ++ if (i->save) { ++ save_insn = i; ++ break; ++ } ++ } ++ ++ if (!save_insn) { ++ WARN_FUNC("no corresponding CFI save for CFI restore", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ if (!save_insn->visited) { ++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ insn->cfi = save_insn->cfi; ++ nr_cfi_reused++; ++ } ++ + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -47,6 +47,7 @@ struct instruction { + unsigned long immediate; + bool dead_end, ignore, ignore_alts; + bool hint; ++ bool save, restore; + bool retpoline_safe; + bool entry; + s8 instr; diff --git a/queue-5.15/objtool-shrink-struct-instruction.patch b/queue-5.15/objtool-shrink-struct-instruction.patch new file mode 100644 index 00000000000..9b7284cb6cd --- /dev/null +++ b/queue-5.15/objtool-shrink-struct-instruction.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:35 +0200 +Subject: objtool: Shrink struct instruction + +From: Peter Zijlstra + +commit c509331b41b7365e17396c246e8c5797bccc8074 upstream. + +Any one instruction can only ever call a single function, therefore +insn->mcount_loc_node is superfluous and can use insn->call_node. + +This shrinks struct instruction, which is by far the most numerous +structure objtool creates. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.785456706@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 6 +++--- + tools/objtool/include/objtool/check.h | 1 - + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -551,7 +551,7 @@ static int create_mcount_loc_sections(st + return 0; + + idx = 0; +- list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) ++ list_for_each_entry(insn, &file->mcount_loc_list, call_node) + idx++; + + sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); +@@ -559,7 +559,7 @@ static int create_mcount_loc_sections(st + return -1; + + idx = 0; +- list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { ++ list_for_each_entry(insn, &file->mcount_loc_list, call_node) { + + loc = (unsigned long *)sec->data->d_buf + idx; + memset(loc, 0, sizeof(unsigned long)); +@@ -909,7 +909,7 @@ static void annotate_call_site(struct ob + + insn->type = INSN_NOP; + +- list_add_tail(&insn->mcount_loc_node, &file->mcount_loc_list); ++ list_add_tail(&insn->call_node, &file->mcount_loc_list); + return; + } + } +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -40,7 +40,6 @@ struct instruction { + struct list_head list; + struct hlist_node hash; + struct list_head call_node; +- struct list_head mcount_loc_node; + struct section *sec; + unsigned long offset; + unsigned int len; diff --git a/queue-5.15/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch b/queue-5.15/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch new file mode 100644 index 00000000000..8e220de9542 --- /dev/null +++ b/queue-5.15/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Fri, 1 Jul 2022 09:00:45 -0300 +Subject: objtool: skip non-text sections when adding return-thunk sites + +From: Thadeu Lima de Souza Cascardo + +The .discard.text section is added in order to reserve BRK, with a +temporary function just so it can give it a size. This adds a relocation to +the return thunk, which objtool will add to the .return_sites section. +Linking will then fail as there are references to the .discard.text +section. + +Do not add instructions from non-text sections to the list of return thunk +calls, avoiding the reference to .discard.text. + +Signed-off-by: Thadeu Lima de Souza Cascardo +Acked-by: Josh Poimboeuf +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1153,7 +1153,9 @@ static void add_return_call(struct objto + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + +- list_add_tail(&insn->call_node, &file->return_thunk_list); ++ /* Skip the non-text sections, specially .discard ones */ ++ if (insn->sec->text) ++ list_add_tail(&insn->call_node, &file->return_thunk_list); + } + + /* diff --git a/queue-5.15/objtool-treat-.text.__x86.-as-noinstr.patch b/queue-5.15/objtool-treat-.text.__x86.-as-noinstr.patch new file mode 100644 index 00000000000..6a3b908a0a2 --- /dev/null +++ b/queue-5.15/objtool-treat-.text.__x86.-as-noinstr.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:47 +0200 +Subject: objtool: Treat .text.__x86.* as noinstr + +From: Peter Zijlstra + +commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream. + +Needed because zen_untrain_ret() will be called from noinstr code. + +Also makes sense since the thunks MUST NOT contain instrumentation nor +be poked with dynamic instrumentation. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -367,7 +367,8 @@ static int decode_instructions(struct ob + sec->text = true; + + if (!strcmp(sec->name, ".noinstr.text") || +- !strcmp(sec->name, ".entry.text")) ++ !strcmp(sec->name, ".entry.text") || ++ !strncmp(sec->name, ".text.__x86.", 12)) + sec->noinstr = true; + + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { diff --git a/queue-5.15/objtool-update-retpoline-validation.patch b/queue-5.15/objtool-update-retpoline-validation.patch new file mode 100644 index 00000000000..a5bfeb22633 --- /dev/null +++ b/queue-5.15/objtool-update-retpoline-validation.patch @@ -0,0 +1,111 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:59 +0200 +Subject: objtool: Update Retpoline validation + +From: Peter Zijlstra + +commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream. + +Update retpoline validation with the new CONFIG_RETPOLINE requirement of +not having bare naked RET instructions. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict fixup at arch/x86/xen/xen-head.S] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 6 ++++++ + arch/x86/mm/mem_encrypt_boot.S | 2 ++ + arch/x86/xen/xen-head.S | 1 + + tools/objtool/check.c | 19 +++++++++++++------ + 4 files changed, 22 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -76,6 +76,12 @@ + .endm + + /* ++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions ++ * vs RETBleed validation. ++ */ ++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE ++ ++/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute) + pop %rbp + + /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE + ret + int3 + SYM_FUNC_END(sme_encrypt_execute) +@@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy) + pop %r15 + + /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE + ret + int3 + .L__enc_copy_end: +--- a/arch/x86/xen/xen-head.S ++++ b/arch/x86/xen/xen-head.S +@@ -70,6 +70,7 @@ SYM_CODE_START(hypercall_page) + .rept (PAGE_SIZE / 32) + UNWIND_HINT_FUNC + .skip 31, 0x90 ++ ANNOTATE_UNRET_SAFE + RET + .endr + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1894,8 +1894,9 @@ static int read_retpoline_hints(struct o + } + + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call", ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", + insn->sec, insn->offset); + return -1; + } +@@ -3229,7 +3230,8 @@ static int validate_retpoline(struct obj + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) + continue; + + if (insn->retpoline_safe) +@@ -3244,9 +3246,14 @@ static int validate_retpoline(struct obj + if (!strcmp(insn->sec->name, ".init.text") && !module) + continue; + +- WARN_FUNC("indirect %s found in RETPOLINE build", +- insn->sec, insn->offset, +- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ if (insn->type == INSN_RETURN) { ++ WARN_FUNC("'naked' return found in RETPOLINE build", ++ insn->sec, insn->offset); ++ } else { ++ WARN_FUNC("indirect %s found in RETPOLINE build", ++ insn->sec, insn->offset, ++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ } + + warnings++; + } diff --git a/queue-5.15/objtool-x86-replace-alternatives-with-.retpoline_sites.patch b/queue-5.15/objtool-x86-replace-alternatives-with-.retpoline_sites.patch new file mode 100644 index 00000000000..0b01268f243 --- /dev/null +++ b/queue-5.15/objtool-x86-replace-alternatives-with-.retpoline_sites.patch @@ -0,0 +1,493 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:36 +0200 +Subject: objtool,x86: Replace alternatives with .retpoline_sites + +From: Peter Zijlstra + +commit 134ab5bd1883312d7a4b3033b05c6b5a1bb8889b upstream. + +Instead of writing complete alternatives, simply provide a list of all +the retpoline thunk calls. Then the kernel is free to do with them as +it pleases. Simpler code all-round. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.850007165@infradead.org +[cascardo: fixed conflict because of missing + 8b946cc38e063f0f7bb67789478c38f6d7d457c9] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/vmlinux.lds.S | 14 +++ + tools/objtool/arch/x86/decode.c | 120 -------------------------------- + tools/objtool/check.c | 132 +++++++++++++++++++++++++----------- + tools/objtool/elf.c | 84 ---------------------- + tools/objtool/include/objtool/elf.h | 1 + tools/objtool/special.c | 8 -- + 6 files changed, 107 insertions(+), 252 deletions(-) + +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -272,6 +272,20 @@ SECTIONS + __parainstructions_end = .; + } + ++#ifdef CONFIG_RETPOLINE ++ /* ++ * List of instructions that call/jmp/jcc to retpoline thunks ++ * __x86_indirect_thunk_*(). These instructions can be patched along ++ * with alternatives, after which the section can be freed. ++ */ ++ . = ALIGN(8); ++ .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) { ++ __retpoline_sites = .; ++ *(.retpoline_sites) ++ __retpoline_sites_end = .; ++ } ++#endif ++ + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -684,126 +684,6 @@ const char *arch_ret_insn(int len) + return ret[len-1]; + } + +-/* asm/alternative.h ? */ +- +-#define ALTINSTR_FLAG_INV (1 << 15) +-#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) +- +-struct alt_instr { +- s32 instr_offset; /* original instruction */ +- s32 repl_offset; /* offset to replacement instruction */ +- u16 cpuid; /* cpuid bit set for replacement */ +- u8 instrlen; /* length of original instruction */ +- u8 replacementlen; /* length of new instruction */ +-} __packed; +- +-static int elf_add_alternative(struct elf *elf, +- struct instruction *orig, struct symbol *sym, +- int cpuid, u8 orig_len, u8 repl_len) +-{ +- const int size = sizeof(struct alt_instr); +- struct alt_instr *alt; +- struct section *sec; +- Elf_Scn *s; +- +- sec = find_section_by_name(elf, ".altinstructions"); +- if (!sec) { +- sec = elf_create_section(elf, ".altinstructions", +- SHF_ALLOC, 0, 0); +- +- if (!sec) { +- WARN_ELF("elf_create_section"); +- return -1; +- } +- } +- +- s = elf_getscn(elf->elf, sec->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return -1; +- } +- +- sec->data = elf_newdata(s); +- if (!sec->data) { +- WARN_ELF("elf_newdata"); +- return -1; +- } +- +- sec->data->d_size = size; +- sec->data->d_align = 1; +- +- alt = sec->data->d_buf = malloc(size); +- if (!sec->data->d_buf) { +- perror("malloc"); +- return -1; +- } +- memset(sec->data->d_buf, 0, size); +- +- if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, +- R_X86_64_PC32, orig->sec, orig->offset)) { +- WARN("elf_create_reloc: alt_instr::instr_offset"); +- return -1; +- } +- +- if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, +- R_X86_64_PC32, sym, 0)) { +- WARN("elf_create_reloc: alt_instr::repl_offset"); +- return -1; +- } +- +- alt->cpuid = bswap_if_needed(cpuid); +- alt->instrlen = orig_len; +- alt->replacementlen = repl_len; +- +- sec->sh.sh_size += size; +- sec->changed = true; +- +- return 0; +-} +- +-#define X86_FEATURE_RETPOLINE ( 7*32+12) +- +-int arch_rewrite_retpolines(struct objtool_file *file) +-{ +- struct instruction *insn; +- struct reloc *reloc; +- struct symbol *sym; +- char name[32] = ""; +- +- list_for_each_entry(insn, &file->retpoline_call_list, call_node) { +- +- if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) +- continue; +- +- if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) +- continue; +- +- reloc = insn->reloc; +- +- sprintf(name, "__x86_indirect_alt_%s_%s", +- insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", +- reloc->sym->name + 21); +- +- sym = find_symbol_by_name(file->elf, name); +- if (!sym) { +- sym = elf_create_undef_symbol(file->elf, name); +- if (!sym) { +- WARN("elf_create_undef_symbol"); +- return -1; +- } +- } +- +- if (elf_add_alternative(file->elf, insn, sym, +- ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { +- WARN("elf_add_alternative"); +- return -1; +- } +- } +- +- return 0; +-} +- + int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) + { + struct cfi_reg *cfa = &insn->cfi.cfa; +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -533,6 +533,52 @@ static int create_static_call_sections(s + return 0; + } + ++static int create_retpoline_sites_sections(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ int idx; ++ ++ sec = find_section_by_name(file->elf, ".retpoline_sites"); ++ if (sec) { ++ WARN("file already has .retpoline_sites, skipping"); ++ return 0; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) ++ idx++; ++ ++ if (!idx) ++ return 0; ++ ++ sec = elf_create_section(file->elf, ".retpoline_sites", 0, ++ sizeof(int), idx); ++ if (!sec) { ++ WARN("elf_create_section: .retpoline_sites"); ++ return -1; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) { ++ ++ int *site = (int *)sec->data->d_buf + idx; ++ *site = 0; ++ ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(int), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) { ++ WARN("elf_add_reloc_to_insn: .retpoline_sites"); ++ return -1; ++ } ++ ++ idx++; ++ } ++ ++ return 0; ++} ++ + static int create_mcount_loc_sections(struct objtool_file *file) + { + struct section *sec; +@@ -863,6 +909,11 @@ static void annotate_call_site(struct ob + return; + } + ++ if (sym->retpoline_thunk) { ++ list_add_tail(&insn->call_node, &file->retpoline_call_list); ++ return; ++ } ++ + /* + * Many compilers cannot disable KCOV with a function attribute + * so they need a little help, NOP out any KCOV calls from noinstr +@@ -933,6 +984,39 @@ static void add_call_dest(struct objtool + annotate_call_site(file, insn, sibling); + } + ++static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) ++{ ++ /* ++ * Retpoline calls/jumps are really dynamic calls/jumps in disguise, ++ * so convert them accordingly. ++ */ ++ switch (insn->type) { ++ case INSN_CALL: ++ insn->type = INSN_CALL_DYNAMIC; ++ break; ++ case INSN_JUMP_UNCONDITIONAL: ++ insn->type = INSN_JUMP_DYNAMIC; ++ break; ++ case INSN_JUMP_CONDITIONAL: ++ insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; ++ break; ++ default: ++ return; ++ } ++ ++ insn->retpoline_safe = true; ++ ++ /* ++ * Whatever stack impact regular CALLs have, should be undone ++ * by the RETURN of the called function. ++ * ++ * Annotated intra-function calls retain the stack_ops but ++ * are converted to JUMP, see read_intra_function_calls(). ++ */ ++ remove_insn_ops(insn); ++ ++ annotate_call_site(file, insn, false); ++} + /* + * Find the destination instructions for all jumps. + */ +@@ -955,19 +1039,7 @@ static int add_jump_destinations(struct + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); + } else if (reloc->sym->retpoline_thunk) { +- /* +- * Retpoline jumps are really dynamic jumps in +- * disguise, so convert them accordingly. +- */ +- if (insn->type == INSN_JUMP_UNCONDITIONAL) +- insn->type = INSN_JUMP_DYNAMIC; +- else +- insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; +- +- list_add_tail(&insn->call_node, +- &file->retpoline_call_list); +- +- insn->retpoline_safe = true; ++ add_retpoline_call(file, insn); + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ +@@ -1096,18 +1168,7 @@ static int add_call_destinations(struct + add_call_dest(file, insn, dest, false); + + } else if (reloc->sym->retpoline_thunk) { +- /* +- * Retpoline calls are really dynamic calls in +- * disguise, so convert them accordingly. +- */ +- insn->type = INSN_CALL_DYNAMIC; +- insn->retpoline_safe = true; +- +- list_add_tail(&insn->call_node, +- &file->retpoline_call_list); +- +- remove_insn_ops(insn); +- continue; ++ add_retpoline_call(file, insn); + + } else + add_call_dest(file, insn, reloc->sym, false); +@@ -1833,11 +1894,6 @@ static void mark_rodata(struct objtool_f + file->rodata = found; + } + +-__weak int arch_rewrite_retpolines(struct objtool_file *file) +-{ +- return 0; +-} +- + static int decode_sections(struct objtool_file *file) + { + int ret; +@@ -1906,15 +1962,6 @@ static int decode_sections(struct objtoo + if (ret) + return ret; + +- /* +- * Must be after add_special_section_alts(), since this will emit +- * alternatives. Must be after add_{jump,call}_destination(), since +- * those create the call insn lists. +- */ +- ret = arch_rewrite_retpolines(file); +- if (ret) +- return ret; +- + return 0; + } + +@@ -3252,6 +3299,13 @@ int check(struct objtool_file *file) + goto out; + warnings += ret; + ++ if (retpoline) { ++ ret = create_retpoline_sites_sections(file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ } ++ + if (mcount) { + ret = create_mcount_loc_sections(file); + if (ret < 0) +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -944,90 +944,6 @@ static int elf_add_string(struct elf *el + return len; + } + +-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) +-{ +- struct section *symtab, *symtab_shndx; +- struct symbol *sym; +- Elf_Data *data; +- Elf_Scn *s; +- +- sym = malloc(sizeof(*sym)); +- if (!sym) { +- perror("malloc"); +- return NULL; +- } +- memset(sym, 0, sizeof(*sym)); +- +- sym->name = strdup(name); +- +- sym->sym.st_name = elf_add_string(elf, NULL, sym->name); +- if (sym->sym.st_name == -1) +- return NULL; +- +- sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); +- // st_other 0 +- // st_shndx 0 +- // st_value 0 +- // st_size 0 +- +- symtab = find_section_by_name(elf, ".symtab"); +- if (!symtab) { +- WARN("can't find .symtab"); +- return NULL; +- } +- +- s = elf_getscn(elf->elf, symtab->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return NULL; +- } +- +- data = elf_newdata(s); +- if (!data) { +- WARN_ELF("elf_newdata"); +- return NULL; +- } +- +- data->d_buf = &sym->sym; +- data->d_size = sizeof(sym->sym); +- data->d_align = 1; +- data->d_type = ELF_T_SYM; +- +- sym->idx = symtab->sh.sh_size / sizeof(sym->sym); +- +- symtab->sh.sh_size += data->d_size; +- symtab->changed = true; +- +- symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); +- if (symtab_shndx) { +- s = elf_getscn(elf->elf, symtab_shndx->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return NULL; +- } +- +- data = elf_newdata(s); +- if (!data) { +- WARN_ELF("elf_newdata"); +- return NULL; +- } +- +- data->d_buf = &sym->sym.st_size; /* conveniently 0 */ +- data->d_size = sizeof(Elf32_Word); +- data->d_align = 4; +- data->d_type = ELF_T_WORD; +- +- symtab_shndx->sh.sh_size += 4; +- symtab_shndx->changed = true; +- } +- +- sym->sec = find_section_by_index(elf, 0); +- +- elf_add_symbol(elf, sym); +- +- return sym; +-} +- + struct section *elf_create_section(struct elf *elf, const char *name, + unsigned int sh_flags, size_t entsize, int nr) + { +--- a/tools/objtool/include/objtool/elf.h ++++ b/tools/objtool/include/objtool/elf.h +@@ -143,7 +143,6 @@ int elf_write_insn(struct elf *elf, stru + unsigned long offset, unsigned int len, + const char *insn); + int elf_write_reloc(struct elf *elf, struct reloc *reloc); +-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); + int elf_write(struct elf *elf); + void elf_close(struct elf *elf); + +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -109,14 +109,6 @@ static int get_alt_entry(struct elf *elf + return -1; + } + +- /* +- * Skip retpoline .altinstr_replacement... we already rewrite the +- * instructions for retpolines anyway, see arch_is_retpoline() +- * usage in add_{call,jump}_destinations(). +- */ +- if (arch_is_retpoline(new_reloc->sym)) +- return 1; +- + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); + + /* _ASM_EXTABLE_EX hack */ diff --git a/queue-5.15/series b/queue-5.15/series new file mode 100644 index 00000000000..30e3b622968 --- /dev/null +++ b/queue-5.15/series @@ -0,0 +1,78 @@ +x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch +x86-entry-switch-the-stack-after-error_entry-returns.patch +x86-entry-move-push_and_clear_regs-out-of-error_entry.patch +x86-entry-don-t-call-error_entry-for-xenpv.patch +objtool-classify-symbols.patch +objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch +objtool-shrink-struct-instruction.patch +objtool-x86-replace-alternatives-with-.retpoline_sites.patch +objtool-introduce-cfi-hash.patch +x86-retpoline-remove-unused-replacement-symbols.patch +x86-asm-fix-register-order.patch +x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch +x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch +x86-retpoline-create-a-retpoline-thunk-array.patch +x86-alternative-implement-.retpoline_sites-support.patch +x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch +x86-alternative-try-inline-spectre_v2-retpoline-amd.patch +x86-alternative-add-debug-prints-to-apply_retpolines.patch +bpf-x86-simplify-computing-label-offsets.patch +bpf-x86-respect-x86_feature_retpoline.patch +objtool-default-ignore-int3-for-unreachable.patch +x86-entry-remove-skip_r11rcx.patch +x86-realmode-build-with-d__disable_exports.patch +x86-kvm-vmx-make-noinstr-clean.patch +x86-cpufeatures-move-retpoline-flags-to-word-11.patch +x86-retpoline-cleanup-some-ifdefery.patch +x86-retpoline-swizzle-retpoline-thunk.patch +x86-retpoline-use-mfunction-return.patch +x86-undo-return-thunk-damage.patch +x86-objtool-create-.return_sites.patch +objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch +x86-static_call-use-alternative-ret-encoding.patch +x86-ftrace-use-alternative-ret-encoding.patch +x86-bpf-use-alternative-ret-encoding.patch +x86-kvm-fix-setcc-emulation-for-return-thunks.patch +x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch +x86-sev-avoid-using-__x86_return_thunk.patch +x86-use-return-thunk-in-asm-code.patch +x86-entry-avoid-very-early-ret.patch +objtool-treat-.text.__x86.-as-noinstr.patch +x86-add-magic-amd-return-thunk.patch +x86-bugs-report-amd-retbleed-vulnerability.patch +x86-bugs-add-amd-retbleed-boot-parameter.patch +x86-bugs-enable-stibp-for-jmp2ret.patch +x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch +x86-entry-add-kernel-ibrs-implementation.patch +x86-bugs-optimize-spec_ctrl-msr-writes.patch +x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch +x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch +x86-bugs-report-intel-retbleed-vulnerability.patch +intel_idle-disable-ibrs-during-long-idle.patch +objtool-update-retpoline-validation.patch +x86-xen-rename-sys-entry-points.patch +x86-xen-add-untrain_ret.patch +x86-bugs-add-retbleed-ibpb.patch +x86-bugs-do-ibpb-fallback-check-only-once.patch +objtool-add-entry-unret-validation.patch +x86-cpu-amd-add-spectral-chicken.patch +x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch +x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch +x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch +x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch +x86-speculation-remove-x86_spec_ctrl_mask.patch +objtool-re-add-unwind_hint_-save_restore.patch +kvm-vmx-flatten-__vmx_vcpu_run.patch +kvm-vmx-convert-launched-argument-to-flags.patch +kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch +kvm-vmx-fix-ibrs-handling-after-vmexit.patch +x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch +x86-common-stamp-out-the-stepping-madness.patch +x86-cpu-amd-enumerate-btc_no.patch +x86-retbleed-add-fine-grained-kconfig-knobs.patch +x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch +x86-entry-move-push_and_clear_regs-back-into-error_entry.patch +x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch +x86-kexec-disable-ret-on-kexec.patch +x86-speculation-disable-rrsba-behavior.patch +x86-static_call-serialize-__static_call_fixup-properly.patch diff --git a/queue-5.15/x86-add-magic-amd-return-thunk.patch b/queue-5.15/x86-add-magic-amd-return-thunk.patch new file mode 100644 index 00000000000..6e594ec2494 --- /dev/null +++ b/queue-5.15/x86-add-magic-amd-return-thunk.patch @@ -0,0 +1,361 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:48 +0200 +Subject: x86: Add magic AMD return-thunk + +From: Peter Zijlstra + +commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream. + +Note: needs to be in a section distinct from Retpolines such that the +Retpoline RET substitution cannot possibly use immediate jumps. + +ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a +little tricky but works due to the fact that zen_untrain_ret() doesn't +have any stack ops and as such will emit a single ORC entry at the +start (+0x3f). + +Meanwhile, unwinding an IP, including the __x86_return_thunk() one +(+0x40) will search for the largest ORC entry smaller or equal to the +IP, these will find the one ORC entry (+0x3f) and all works. + + [ Alexandre: SVM part. ] + [ bp: Build fix, massages. ] + +Suggested-by: Andrew Cooper +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflicts at arch/x86/entry/entry_64_compat.S] +[cascardo: there is no ANNOTATE_NOENDBR] +[cascardo: objtool commit 34c861e806478ac2ea4032721defbf1d6967df08 missing] +[cascardo: conflict fixup] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 6 ++ + arch/x86/entry/entry_64_compat.S | 4 + + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 - + arch/x86/include/asm/nospec-branch.h | 17 ++++++++ + arch/x86/kernel/vmlinux.lds.S | 2 + arch/x86/kvm/svm/vmenter.S | 18 ++++++++ + arch/x86/lib/retpoline.S | 63 +++++++++++++++++++++++++++++-- + tools/objtool/check.c | 20 ++++++++- + 9 files changed, 126 insertions(+), 8 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -94,6 +94,7 @@ SYM_CODE_START(entry_SYSCALL_64) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ +@@ -688,6 +689,7 @@ native_irq_return_ldt: + pushq %rdi /* Stash user RDI */ + swapgs /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ ++ UNTRAIN_RET + + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* user RAX */ +@@ -882,6 +884,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ++ UNTRAIN_RET + + /* + * Handling GSBASE depends on the availability of FSGSBASE. +@@ -992,6 +995,7 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ UNTRAIN_RET + + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + .Lerror_entry_from_usermode_after_swapgs: +@@ -1044,6 +1048,7 @@ SYM_CODE_START_LOCAL(error_entry) + SWAPGS + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ UNTRAIN_RET + + /* + * Pretend that the exception came from user mode: set up pt_regs +@@ -1138,6 +1143,7 @@ SYM_CODE_START(asm_exc_nmi) + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 ++ UNTRAIN_RET + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -71,6 +72,7 @@ SYM_CODE_START(entry_SYSENTER_compat) + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ + SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* + * User tracing code (ptrace or signal handlers) might assume that +@@ -211,6 +213,7 @@ SYM_CODE_START(entry_SYSCALL_compat) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ +@@ -377,6 +380,7 @@ SYM_CODE_START(entry_INT80_compat) + pushq (%rdi) /* pt_regs->di */ + .Lint80_keep_stack: + ++ UNTRAIN_RET + pushq %rsi /* pt_regs->si */ + xorl %esi, %esi /* nospec si */ + pushq %rdx /* pt_regs->dx */ +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -299,6 +299,7 @@ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ ++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -61,7 +61,8 @@ + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ +- (1 << (X86_FEATURE_RETHUNK & 31))) ++ (1 << (X86_FEATURE_RETHUNK & 31)) | \ ++ (1 << (X86_FEATURE_UNRET & 31))) + #endif + + /* Force disable because it's broken beyond repair */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -112,6 +112,22 @@ + #endif + .endm + ++/* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the ++ * return thunk isn't mapped into the userspace tables (then again, AMD ++ * typically has NO_MELTDOWN). ++ * ++ * Doesn't clobber any registers but does require a stable stack. ++ * ++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point ++ * where we have a stack but before any RET instruction. ++ */ ++.macro UNTRAIN_RET ++#ifdef CONFIG_RETPOLINE ++ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET ++#endif ++.endm ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_RETPOLINE_SAFE \ +@@ -121,6 +137,7 @@ + ".popsection\n\t" + + extern void __x86_return_thunk(void); ++extern void zen_untrain_ret(void); + + #ifdef CONFIG_RETPOLINE + +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -142,7 +142,7 @@ SECTIONS + + #ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; +- *(.text.__x86.indirect_thunk) ++ *(.text.__x86.*) + __indirect_thunk_end = .; + #endif + } :text =0xcccc +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run) + #endif + + /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize 'ret' if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ ++ /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers +@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + #endif + ++ /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize RET if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ + pop %_ASM_BX + + #ifdef CONFIG_X86_64 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -71,10 +71,67 @@ SYM_CODE_END(__x86_indirect_thunk_array) + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +-SYM_CODE_START(__x86_return_thunk) +- UNWIND_HINT_EMPTY ++ .section .text.__x86.return_thunk ++ ++/* ++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture: ++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for ++ * alignment within the BTB. ++ * 2) The instruction at zen_untrain_ret must contain, and not ++ * end with, the 0xc3 byte of the RET. ++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread ++ * from re-poisioning the BTB prediction. ++ */ ++ .align 64 ++ .skip 63, 0xcc ++SYM_FUNC_START_NOALIGN(zen_untrain_ret); ++ ++ /* ++ * As executed from zen_untrain_ret, this is: ++ * ++ * TEST $0xcc, %bl ++ * LFENCE ++ * JMP __x86_return_thunk ++ * ++ * Executing the TEST instruction has a side effect of evicting any BTB ++ * prediction (potentially attacker controlled) attached to the RET, as ++ * __x86_return_thunk + 1 isn't an instruction boundary at the moment. ++ */ ++ .byte 0xf6 ++ ++ /* ++ * As executed from __x86_return_thunk, this is a plain RET. ++ * ++ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. ++ * ++ * We subsequently jump backwards and architecturally execute the RET. ++ * This creates a correct BTB prediction (type=ret), but in the ++ * meantime we suffer Straight Line Speculation (because the type was ++ * no branch) which is halted by the INT3. ++ * ++ * With SMT enabled and STIBP active, a sibling thread cannot poison ++ * RET's prediction to a type of its choice, but can evict the ++ * prediction due to competitive sharing. If the prediction is ++ * evicted, __x86_return_thunk will suffer Straight Line Speculation ++ * which will be contained safely by the INT3. ++ */ ++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 + SYM_CODE_END(__x86_return_thunk) + +-__EXPORT_THUNK(__x86_return_thunk) ++ /* ++ * Ensure the TEST decoding / BTB invalidation is complete. ++ */ ++ lfence ++ ++ /* ++ * Jump back and execute the RET in the middle of the TEST instruction. ++ * INT3 is for SLS protection. ++ */ ++ jmp __x86_return_thunk ++ int3 ++SYM_FUNC_END(zen_untrain_ret) ++__EXPORT_THUNK(zen_untrain_ret) ++ ++EXPORT_SYMBOL(__x86_return_thunk) +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1145,7 +1145,7 @@ static void add_retpoline_call(struct ob + annotate_call_site(file, insn, false); + } + +-static void add_return_call(struct objtool_file *file, struct instruction *insn) ++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) + { + /* + * Return thunk tail calls are really just returns in disguise, +@@ -1155,7 +1155,7 @@ static void add_return_call(struct objto + insn->retpoline_safe = true; + + /* Skip the non-text sections, specially .discard ones */ +- if (insn->sec->text) ++ if (add && insn->sec->text) + list_add_tail(&insn->call_node, &file->return_thunk_list); + } + +@@ -1184,7 +1184,7 @@ static int add_jump_destinations(struct + add_retpoline_call(file, insn); + continue; + } else if (reloc->sym->return_thunk) { +- add_return_call(file, insn); ++ add_return_call(file, insn, true); + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ +@@ -1201,6 +1201,7 @@ static int add_jump_destinations(struct + + insn->jump_dest = find_insn(file, dest_sec, dest_off); + if (!insn->jump_dest) { ++ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); + + /* + * This is a special case where an alt instruction +@@ -1210,6 +1211,19 @@ static int add_jump_destinations(struct + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + ++ /* ++ * This is a special case for zen_untrain_ret(). ++ * It jumps to __x86_return_thunk(), but objtool ++ * can't find the thunk's starting RET ++ * instruction, because the RET is also in the ++ * middle of another instruction. Objtool only ++ * knows about the outer instruction. ++ */ ++ if (sym && sym->return_thunk) { ++ add_return_call(file, insn, false); ++ continue; ++ } ++ + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); diff --git a/queue-5.15/x86-alternative-add-debug-prints-to-apply_retpolines.patch b/queue-5.15/x86-alternative-add-debug-prints-to-apply_retpolines.patch new file mode 100644 index 00000000000..e64bc544aca --- /dev/null +++ b/queue-5.15/x86-alternative-add-debug-prints-to-apply_retpolines.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:45 +0200 +Subject: x86/alternative: Add debug prints to apply_retpolines() + +From: Peter Zijlstra + +commit d4b5a5c993009ffeb5febe3b701da3faab6adb96 upstream. + +Make sure we can see the text changes when booting with +'debug-alternative'. + +Example output: + + [ ] SMP alternatives: retpoline at: __traceiter_initcall_level+0x1f/0x30 (ffffffff8100066f) len: 5 to: __x86_indirect_thunk_rax+0x0/0x20 + [ ] SMP alternatives: ffffffff82603e58: [2:5) optimized NOPs: ff d0 0f 1f 00 + [ ] SMP alternatives: ffffffff8100066f: orig: e8 cc 30 00 01 + [ ] SMP alternatives: ffffffff8100066f: repl: ff d0 0f 1f 00 + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.422273830@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -492,9 +492,15 @@ void __init_or_module noinline apply_ret + continue; + } + ++ DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", ++ addr, addr, insn.length, ++ addr + insn.length + insn.immediate.value); ++ + len = patch_retpoline(addr, &insn, bytes); + if (len == insn.length) { + optimize_nops(bytes, len); ++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); ++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } diff --git a/queue-5.15/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch b/queue-5.15/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch new file mode 100644 index 00000000000..19602f26385 --- /dev/null +++ b/queue-5.15/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch @@ -0,0 +1,96 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:43 +0200 +Subject: x86/alternative: Handle Jcc __x86_indirect_thunk_\reg + +From: Peter Zijlstra + +commit 2f0cbb2a8e5bbf101e9de118fc0eb168111a5e1e upstream. + +Handle the rare cases where the compiler (clang) does an indirect +conditional tail-call using: + + Jcc __x86_indirect_thunk_\reg + +For the !RETPOLINE case this can be rewritten to fit the original (6 +byte) instruction like: + + Jncc.d8 1f + JMP *%\reg + NOP +1: + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.296470217@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 40 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 36 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -393,7 +393,8 @@ static int emit_indirect(int op, int reg + static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) + { + retpoline_thunk_t *target; +- int reg, i = 0; ++ int reg, ret, i = 0; ++ u8 op, cc; + + target = addr + insn->length + insn->immediate.value; + reg = target - __x86_indirect_thunk_array; +@@ -407,9 +408,36 @@ static int patch_retpoline(void *addr, s + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + return -1; + +- i = emit_indirect(insn->opcode.bytes[0], reg, bytes); +- if (i < 0) +- return i; ++ op = insn->opcode.bytes[0]; ++ ++ /* ++ * Convert: ++ * ++ * Jcc.d32 __x86_indirect_thunk_\reg ++ * ++ * into: ++ * ++ * Jncc.d8 1f ++ * JMP *%\reg ++ * NOP ++ * 1: ++ */ ++ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ ++ if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { ++ cc = insn->opcode.bytes[1] & 0xf; ++ cc ^= 1; /* invert condition */ ++ ++ bytes[i++] = 0x70 + cc; /* Jcc.d8 */ ++ bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */ ++ ++ /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */ ++ op = JMP32_INSN_OPCODE; ++ } ++ ++ ret = emit_indirect(op, reg, bytes + i); ++ if (ret < 0) ++ return ret; ++ i += ret; + + for (; i < insn->length;) + bytes[i++] = BYTES_NOP1; +@@ -443,6 +471,10 @@ void __init_or_module noinline apply_ret + case JMP32_INSN_OPCODE: + break; + ++ case 0x0f: /* escape */ ++ if (op2 >= 0x80 && op2 <= 0x8f) ++ break; ++ fallthrough; + default: + WARN_ON_ONCE(1); + continue; diff --git a/queue-5.15/x86-alternative-implement-.retpoline_sites-support.patch b/queue-5.15/x86-alternative-implement-.retpoline_sites-support.patch new file mode 100644 index 00000000000..493c649e9cc --- /dev/null +++ b/queue-5.15/x86-alternative-implement-.retpoline_sites-support.patch @@ -0,0 +1,284 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:42 +0200 +Subject: x86/alternative: Implement .retpoline_sites support + +From: Peter Zijlstra + +commit 7508500900814d14e2e085cdc4e28142721abbdf upstream. + +Rewrite retpoline thunk call sites to be indirect calls for +spectre_v2=off. This ensures spectre_v2=off is as near to a +RETPOLINE=n build as possible. + +This is the replacement for objtool writing alternative entries to +ensure the same and achieves feature-parity with the previous +approach. + +One noteworthy feature is that it relies on the thunks to be in +machine order to compute the register index. + +Specifically, this does not yet address the Jcc __x86_indirect_thunk_* +calls generated by clang, a future patch will add this. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org +[cascardo: small conflict fixup at arch/x86/kernel/module.c] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/um/kernel/um_arch.c | 4 + + arch/x86/include/asm/alternative.h | 1 + arch/x86/kernel/alternative.c | 141 +++++++++++++++++++++++++++++++++++-- + arch/x86/kernel/module.c | 9 ++ + 4 files changed, 150 insertions(+), 5 deletions(-) + +--- a/arch/um/kernel/um_arch.c ++++ b/arch/um/kernel/um_arch.c +@@ -421,6 +421,10 @@ void __init check_bugs(void) + os_check_bugs(); + } + ++void apply_retpolines(s32 *start, s32 *end) ++{ ++} ++ + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) + { + } +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -75,6 +75,7 @@ extern int alternatives_patched; + + extern void alternative_instructions(void); + extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); ++extern void apply_retpolines(s32 *start, s32 *end); + + struct module; + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + int __read_mostly alternatives_patched; + +@@ -113,6 +114,7 @@ static void __init_or_module add_nops(vo + } + } + ++extern s32 __retpoline_sites[], __retpoline_sites_end[]; + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + extern s32 __smp_locks[], __smp_locks_end[]; + void text_poke_early(void *addr, const void *opcode, size_t len); +@@ -221,7 +223,7 @@ static __always_inline int optimize_nops + * "noinline" to cause control flow change and thus invalidate I$ and + * cause refetch after modification. + */ +-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) ++static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) + { + struct insn insn; + int i = 0; +@@ -239,11 +241,11 @@ static void __init_or_module noinline op + * optimized. + */ + if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) +- i += optimize_nops_range(instr, a->instrlen, i); ++ i += optimize_nops_range(instr, len, i); + else + i += insn.length; + +- if (i >= a->instrlen) ++ if (i >= len) + return; + } + } +@@ -331,10 +333,135 @@ void __init_or_module noinline apply_alt + text_poke_early(instr, insn_buff, insn_buff_sz); + + next: +- optimize_nops(a, instr); ++ optimize_nops(instr, a->instrlen); + } + } + ++#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION) ++ ++/* ++ * CALL/JMP *%\reg ++ */ ++static int emit_indirect(int op, int reg, u8 *bytes) ++{ ++ int i = 0; ++ u8 modrm; ++ ++ switch (op) { ++ case CALL_INSN_OPCODE: ++ modrm = 0x10; /* Reg = 2; CALL r/m */ ++ break; ++ ++ case JMP32_INSN_OPCODE: ++ modrm = 0x20; /* Reg = 4; JMP r/m */ ++ break; ++ ++ default: ++ WARN_ON_ONCE(1); ++ return -1; ++ } ++ ++ if (reg >= 8) { ++ bytes[i++] = 0x41; /* REX.B prefix */ ++ reg -= 8; ++ } ++ ++ modrm |= 0xc0; /* Mod = 3 */ ++ modrm += reg; ++ ++ bytes[i++] = 0xff; /* opcode */ ++ bytes[i++] = modrm; ++ ++ return i; ++} ++ ++/* ++ * Rewrite the compiler generated retpoline thunk calls. ++ * ++ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate ++ * indirect instructions, avoiding the extra indirection. ++ * ++ * For example, convert: ++ * ++ * CALL __x86_indirect_thunk_\reg ++ * ++ * into: ++ * ++ * CALL *%\reg ++ * ++ */ ++static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) ++{ ++ retpoline_thunk_t *target; ++ int reg, i = 0; ++ ++ target = addr + insn->length + insn->immediate.value; ++ reg = target - __x86_indirect_thunk_array; ++ ++ if (WARN_ON_ONCE(reg & ~0xf)) ++ return -1; ++ ++ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ ++ BUG_ON(reg == 4); ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) ++ return -1; ++ ++ i = emit_indirect(insn->opcode.bytes[0], reg, bytes); ++ if (i < 0) ++ return i; ++ ++ for (; i < insn->length;) ++ bytes[i++] = BYTES_NOP1; ++ ++ return i; ++} ++ ++/* ++ * Generated by 'objtool --retpoline'. ++ */ ++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *addr = (void *)s + *s; ++ struct insn insn; ++ int len, ret; ++ u8 bytes[16]; ++ u8 op1, op2; ++ ++ ret = insn_decode_kernel(&insn, addr); ++ if (WARN_ON_ONCE(ret < 0)) ++ continue; ++ ++ op1 = insn.opcode.bytes[0]; ++ op2 = insn.opcode.bytes[1]; ++ ++ switch (op1) { ++ case CALL_INSN_OPCODE: ++ case JMP32_INSN_OPCODE: ++ break; ++ ++ default: ++ WARN_ON_ONCE(1); ++ continue; ++ } ++ ++ len = patch_retpoline(addr, &insn, bytes); ++ if (len == insn.length) { ++ optimize_nops(bytes, len); ++ text_poke_early(addr, bytes, len); ++ } ++ } ++} ++ ++#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ ++ ++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } ++ ++#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ ++ + #ifdef CONFIG_SMP + static void alternatives_smp_lock(const s32 *start, const s32 *end, + u8 *text, u8 *text_end) +@@ -643,6 +770,12 @@ void __init alternative_instructions(voi + apply_paravirt(__parainstructions, __parainstructions_end); + + /* ++ * Rewrite the retpolines, must be done before alternatives since ++ * those can rewrite the retpoline thunks. ++ */ ++ apply_retpolines(__retpoline_sites, __retpoline_sites_end); ++ ++ /* + * Then patch alternatives, such that those paravirt calls that are in + * alternatives can be overwritten by their immediate fragments. + */ +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -252,7 +252,8 @@ int module_finalize(const Elf_Ehdr *hdr, + struct module *me) + { + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, +- *para = NULL, *orc = NULL, *orc_ip = NULL; ++ *para = NULL, *orc = NULL, *orc_ip = NULL, ++ *retpolines = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { +@@ -268,6 +269,8 @@ int module_finalize(const Elf_Ehdr *hdr, + orc = s; + if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) + orc_ip = s; ++ if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) ++ retpolines = s; + } + + /* +@@ -278,6 +281,10 @@ int module_finalize(const Elf_Ehdr *hdr, + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } ++ if (retpolines) { ++ void *rseg = (void *)retpolines->sh_addr; ++ apply_retpolines(rseg, rseg + retpolines->sh_size); ++ } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; diff --git a/queue-5.15/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch b/queue-5.15/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch new file mode 100644 index 00000000000..a7a33418914 --- /dev/null +++ b/queue-5.15/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch @@ -0,0 +1,96 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:44 +0200 +Subject: x86/alternative: Try inline spectre_v2=retpoline,amd + +From: Peter Zijlstra + +commit bbe2df3f6b6da7848398d55b1311d58a16ec21e4 upstream. + +Try and replace retpoline thunk calls with: + + LFENCE + CALL *%\reg + +for spectre_v2=retpoline,amd. + +Specifically, the sequence above is 5 bytes for the low 8 registers, +but 6 bytes for the high 8 registers. This means that unless the +compilers prefix stuff the call with higher registers this replacement +will fail. + +Luckily GCC strongly favours RAX for the indirect calls and most (95%+ +for defconfig-x86_64) will be converted. OTOH clang strongly favours +R11 and almost nothing gets converted. + +Note: it will also generate a correct replacement for the Jcc.d32 +case, except unless the compilers start to prefix stuff that, it'll +never fit. Specifically: + + Jncc.d8 1f + LFENCE + JMP *%\reg +1: + +is 7-8 bytes long, where the original instruction in unpadded form is +only 6 bytes. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.359986601@infradead.org +[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -389,6 +389,7 @@ static int emit_indirect(int op, int reg + * + * CALL *%\reg + * ++ * It also tries to inline spectre_v2=retpoline,amd when size permits. + */ + static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) + { +@@ -405,7 +406,8 @@ static int patch_retpoline(void *addr, s + /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ + BUG_ON(reg == 4); + +- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && ++ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) + return -1; + + op = insn->opcode.bytes[0]; +@@ -418,8 +420,9 @@ static int patch_retpoline(void *addr, s + * into: + * + * Jncc.d8 1f ++ * [ LFENCE ] + * JMP *%\reg +- * NOP ++ * [ NOP ] + * 1: + */ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ +@@ -434,6 +437,15 @@ static int patch_retpoline(void *addr, s + op = JMP32_INSN_OPCODE; + } + ++ /* ++ * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { ++ bytes[i++] = 0x0f; ++ bytes[i++] = 0xae; ++ bytes[i++] = 0xe8; /* LFENCE */ ++ } ++ + ret = emit_indirect(op, reg, bytes + i); + if (ret < 0) + return ret; diff --git a/queue-5.15/x86-asm-fix-register-order.patch b/queue-5.15/x86-asm-fix-register-order.patch new file mode 100644 index 00000000000..54da871281b --- /dev/null +++ b/queue-5.15/x86-asm-fix-register-order.patch @@ -0,0 +1,58 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:38 +0200 +Subject: x86/asm: Fix register order + +From: Peter Zijlstra + +commit a92ede2d584a2e070def59c7e47e6b6f6341c55c upstream. + +Ensure the register order is correct; this allows for easy translation +between register number and trampoline and vice-versa. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.978573921@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/GEN-for-each-reg.h | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/GEN-for-each-reg.h ++++ b/arch/x86/include/asm/GEN-for-each-reg.h +@@ -1,11 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * These are in machine order; things rely on that. ++ */ + #ifdef CONFIG_64BIT + GEN(rax) +-GEN(rbx) + GEN(rcx) + GEN(rdx) ++GEN(rbx) ++GEN(rsp) ++GEN(rbp) + GEN(rsi) + GEN(rdi) +-GEN(rbp) + GEN(r8) + GEN(r9) + GEN(r10) +@@ -16,10 +21,11 @@ GEN(r14) + GEN(r15) + #else + GEN(eax) +-GEN(ebx) + GEN(ecx) + GEN(edx) ++GEN(ebx) ++GEN(esp) ++GEN(ebp) + GEN(esi) + GEN(edi) +-GEN(ebp) + #endif diff --git a/queue-5.15/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch b/queue-5.15/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch new file mode 100644 index 00000000000..ed89ac30eac --- /dev/null +++ b/queue-5.15/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:39 +0200 +Subject: x86/asm: Fixup odd GEN-for-each-reg.h usage + +From: Peter Zijlstra + +commit b6d3d9944bd7c9e8c06994ead3c9952f673f2a66 upstream. + +Currently GEN-for-each-reg.h usage leaves GEN defined, relying on any +subsequent usage to start with #undef, which is rude. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.041792350@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 2 +- + arch/x86/lib/retpoline.S | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -19,9 +19,9 @@ extern void cmpxchg8b_emu(void); + + #ifdef CONFIG_RETPOLINE + +-#undef GEN + #define GEN(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + #include ++#undef GEN + + #endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -55,10 +55,10 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) + #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + +-#undef GEN + #define GEN(reg) THUNK reg + #include +- + #undef GEN ++ + #define GEN(reg) EXPORT_THUNK(reg) + #include ++#undef GEN diff --git a/queue-5.15/x86-bpf-use-alternative-ret-encoding.patch b/queue-5.15/x86-bpf-use-alternative-ret-encoding.patch new file mode 100644 index 00000000000..943edfc85e1 --- /dev/null +++ b/queue-5.15/x86-bpf-use-alternative-ret-encoding.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:41 +0200 +Subject: x86/bpf: Use alternative RET encoding + +From: Peter Zijlstra + +commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream. + +Use the return thunk in eBPF generated code, if needed. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -406,6 +406,21 @@ static void emit_indirect_jump(u8 **ppro + *pprog = prog; + } + ++static void emit_return(u8 **pprog, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { ++ emit_jump(&prog, &__x86_return_thunk, ip); ++ } else { ++ EMIT1(0xC3); /* ret */ ++ if (IS_ENABLED(CONFIG_SLS)) ++ EMIT1(0xCC); /* int3 */ ++ } ++ ++ *pprog = prog; ++} ++ + /* + * Generate the following code: + * +@@ -1681,7 +1696,7 @@ emit_jmp: + ctx->cleanup_addr = proglen; + pop_callee_regs(&prog, callee_regs_used); + EMIT1(0xC9); /* leave */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); + break; + + default: +@@ -2127,7 +2142,7 @@ int arch_prepare_bpf_trampoline(struct b + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip our return address and return to parent */ + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, prog); + /* Make sure the trampoline generation logic doesn't overflow */ + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; diff --git a/queue-5.15/x86-bugs-add-amd-retbleed-boot-parameter.patch b/queue-5.15/x86-bugs-add-amd-retbleed-boot-parameter.patch new file mode 100644 index 00000000000..f34c6a7b646 --- /dev/null +++ b/queue-5.15/x86-bugs-add-amd-retbleed-boot-parameter.patch @@ -0,0 +1,207 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Alexandre Chartre +Date: Tue, 14 Jun 2022 23:15:50 +0200 +Subject: x86/bugs: Add AMD retbleed= boot parameter + +From: Alexandre Chartre + +commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream. + +Add the "retbleed=" boot parameter to select a mitigation for +RETBleed. Possible values are "off", "auto" and "unret" +(JMP2RET mitigation). The default value is "auto". + +Currently, "retbleed=auto" will select the unret mitigation on +AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on +Intel). + + [peterz: rebase; add hygon] + [jpoimboe: cleanups] + +Signed-off-by: Alexandre Chartre +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 15 +++ + arch/x86/Kconfig | 3 + arch/x86/kernel/cpu/bugs.c | 108 +++++++++++++++++++++++- + 3 files changed, 125 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4968,6 +4968,21 @@ + + retain_initrd [RAM] Keep initrd memory after extraction + ++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary ++ Speculative Code Execution with Return Instructions) ++ vulnerability. ++ ++ off - unconditionally disable ++ auto - automatically select a migitation ++ unret - force enable untrained return thunks, ++ only effective on AMD Zen {1,2} ++ based systems. ++ ++ Selecting 'auto' will choose a mitigation method at run ++ time according to the CPU. ++ ++ Not specifying this option is equivalent to retbleed=auto. ++ + rfkill.default_state= + 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, + etc. communication is blocked by default. +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -471,6 +471,9 @@ config RETPOLINE + config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ + config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -37,6 +37,7 @@ + #include "cpu.h" + + static void __init spectre_v1_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); +@@ -120,6 +121,12 @@ void __init check_bugs(void) + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); ++ retbleed_select_mitigation(); ++ /* ++ * spectre_v2_select_mitigation() relies on the state set by ++ * retbleed_select_mitigation(); specifically the STIBP selection is ++ * forced for UNRET. ++ */ + spectre_v2_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); +@@ -746,6 +753,100 @@ static int __init nospectre_v1_cmdline(c + early_param("nospectre_v1", nospectre_v1_cmdline); + + #undef pr_fmt ++#define pr_fmt(fmt) "RETBleed: " fmt ++ ++enum retbleed_mitigation { ++ RETBLEED_MITIGATION_NONE, ++ RETBLEED_MITIGATION_UNRET, ++}; ++ ++enum retbleed_mitigation_cmd { ++ RETBLEED_CMD_OFF, ++ RETBLEED_CMD_AUTO, ++ RETBLEED_CMD_UNRET, ++}; ++ ++const char * const retbleed_strings[] = { ++ [RETBLEED_MITIGATION_NONE] = "Vulnerable", ++ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++}; ++ ++static enum retbleed_mitigation retbleed_mitigation __ro_after_init = ++ RETBLEED_MITIGATION_NONE; ++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = ++ RETBLEED_CMD_AUTO; ++ ++static int __init retbleed_parse_cmdline(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ else if (!strcmp(str, "auto")) ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ else if (!strcmp(str, "unret")) ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ else ++ pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); ++ ++ return 0; ++} ++early_param("retbleed", retbleed_parse_cmdline); ++ ++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" ++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++ ++static void __init retbleed_select_mitigation(void) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) ++ return; ++ ++ switch (retbleed_cmd) { ++ case RETBLEED_CMD_OFF: ++ return; ++ ++ case RETBLEED_CMD_UNRET: ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ break; ++ ++ case RETBLEED_CMD_AUTO: ++ default: ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) ++ break; ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ break; ++ } ++ ++ switch (retbleed_mitigation) { ++ case RETBLEED_MITIGATION_UNRET: ++ ++ if (!IS_ENABLED(CONFIG_RETPOLINE) || ++ !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { ++ pr_err(RETBLEED_COMPILER_MSG); ++ retbleed_mitigation = RETBLEED_MITIGATION_NONE; ++ break; ++ } ++ ++ setup_force_cpu_cap(X86_FEATURE_RETHUNK); ++ setup_force_cpu_cap(X86_FEATURE_UNRET); ++ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ pr_err(RETBLEED_UNTRAIN_MSG); ++ break; ++ ++ default: ++ break; ++ } ++ ++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]); ++} ++ ++#undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + + static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = +@@ -1989,7 +2090,12 @@ static ssize_t srbds_show_state(char *bu + + static ssize_t retbleed_show_state(char *buf) + { +- return sprintf(buf, "Vulnerable\n"); ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && ++ (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + } + + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/queue-5.15/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch b/queue-5.15/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch new file mode 100644 index 00000000000..9d7b30faeb2 --- /dev/null +++ b/queue-5.15/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch @@ -0,0 +1,30 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Pawan Gupta +Date: Wed, 6 Jul 2022 15:01:15 -0700 +Subject: x86/bugs: Add Cannon lake to RETBleed affected CPU list + +From: Pawan Gupta + +commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream. + +Cannon lake is also affected by RETBleed, add it to the list. + +Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability") +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1132,6 +1132,7 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), diff --git a/queue-5.15/x86-bugs-add-retbleed-ibpb.patch b/queue-5.15/x86-bugs-add-retbleed-ibpb.patch new file mode 100644 index 00000000000..f16231c4222 --- /dev/null +++ b/queue-5.15/x86-bugs-add-retbleed-ibpb.patch @@ -0,0 +1,253 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:02 +0200 +Subject: x86/bugs: Add retbleed=ibpb + +From: Peter Zijlstra + +commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream. + +jmp2ret mitigates the easy-to-attack case at relatively low overhead. +It mitigates the long speculation windows after a mispredicted RET, but +it does not mitigate the short speculation window from arbitrary +instruction boundaries. + +On Zen2, there is a chicken bit which needs setting, which mitigates +"arbitrary instruction boundaries" down to just "basic block boundaries". + +But there is no fix for the short speculation window on basic block +boundaries, other than to flush the entire BTB to evict all attacker +predictions. + +On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP +or no-SMT): + + 1) Nothing System wide open + 2) jmp2ret May stop a script kiddy + 3) jmp2ret+chickenbit Raises the bar rather further + 4) IBPB Only thing which can count as "safe". + +Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit +on Zen1 according to lmbench. + + [ bp: Fixup feature bit comments, document option, 32-bit build fix. ] + +Suggested-by: Andrew Cooper +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 3 + + arch/x86/entry/Makefile | 2 - + arch/x86/entry/entry.S | 22 ++++++++++++ + arch/x86/include/asm/cpufeatures.h | 2 - + arch/x86/include/asm/nospec-branch.h | 8 +++- + arch/x86/kernel/cpu/bugs.c | 43 ++++++++++++++++++------ + 6 files changed, 67 insertions(+), 13 deletions(-) + create mode 100644 arch/x86/entry/entry.S + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4978,6 +4978,9 @@ + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). ++ ibpb - mitigate short speculation windows on ++ basic block boundaries too. Safe, highest ++ perf impact. + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. +--- a/arch/x86/entry/Makefile ++++ b/arch/x86/entry/Makefile +@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTR + + CFLAGS_common.o += -fno-stack-protector + +-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o ++obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o + obj-y += common.o + + obj-y += vdso/ +--- /dev/null ++++ b/arch/x86/entry/entry.S +@@ -0,0 +1,22 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Common place for both 32- and 64-bit entry routines. ++ */ ++ ++#include ++#include ++#include ++ ++.pushsection .noinstr.text, "ax" ++ ++SYM_FUNC_START(entry_ibpb) ++ movl $MSR_IA32_PRED_CMD, %ecx ++ movl $PRED_CMD_IBPB, %eax ++ xorl %edx, %edx ++ wrmsr ++ RET ++SYM_FUNC_END(entry_ibpb) ++/* For KVM */ ++EXPORT_SYMBOL_GPL(entry_ibpb); ++ ++.popsection +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -294,7 +294,7 @@ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +-/* FREE! (11*32+10) */ ++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ + /* FREE! (11*32+11) */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -123,14 +123,17 @@ + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * +- * Doesn't clobber any registers but does require a stable stack. ++ * While zen_untrain_ret() doesn't clobber anything but requires stack, ++ * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ + .macro UNTRAIN_RET + #ifdef CONFIG_RETPOLINE +- ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET ++ ALTERNATIVE_2 "", \ ++ "call zen_untrain_ret", X86_FEATURE_UNRET, \ ++ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm + +@@ -144,6 +147,7 @@ + + extern void __x86_return_thunk(void); + extern void zen_untrain_ret(void); ++extern void entry_ibpb(void); + + #ifdef CONFIG_RETPOLINE + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -798,6 +798,7 @@ static enum spectre_v2_mitigation spectr + enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, + }; +@@ -806,11 +807,13 @@ enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, ++ RETBLEED_CMD_IBPB, + }; + + const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + }; +@@ -840,6 +843,8 @@ static int __init retbleed_parse_cmdline + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "ibpb")) { ++ retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { +@@ -854,11 +859,13 @@ static int __init retbleed_parse_cmdline + early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" + #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) + { ++ bool mitigate_smt = false; ++ + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + +@@ -870,11 +877,21 @@ static void __init retbleed_select_mitig + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + ++ case RETBLEED_CMD_IBPB: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ break; ++ + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ ++ if (IS_ENABLED(CONFIG_RETPOLINE) && ++ IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } + + /* + * The Intel mitigation (IBRS) was already selected in +@@ -890,26 +907,34 @@ static void __init retbleed_select_mitig + if (!IS_ENABLED(CONFIG_RETPOLINE) || + !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { + pr_err(RETBLEED_COMPILER_MSG); +- retbleed_mitigation = RETBLEED_MITIGATION_NONE; +- break; ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ goto retbleed_force_ibpb; + } + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +- if (!boot_cpu_has(X86_FEATURE_STIBP) && +- (retbleed_nosmt || cpu_mitigations_auto_nosmt())) +- cpu_smt_disable(false); +- + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); ++ ++ mitigate_smt = true; ++ break; ++ ++ case RETBLEED_MITIGATION_IBPB: ++retbleed_force_ibpb: ++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); ++ mitigate_smt = true; + break; + + default: + break; + } + ++ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. diff --git a/queue-5.15/x86-bugs-do-ibpb-fallback-check-only-once.patch b/queue-5.15/x86-bugs-do-ibpb-fallback-check-only-once.patch new file mode 100644 index 00000000000..ce0185da39f --- /dev/null +++ b/queue-5.15/x86-bugs-do-ibpb-fallback-check-only-once.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 15:07:19 -0700 +Subject: x86/bugs: Do IBPB fallback check only once + +From: Josh Poimboeuf + +commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream. + +When booting with retbleed=auto, if the kernel wasn't built with +CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB. Make +sure a warning is printed in that case. The IBPB fallback check is done +twice, but it really only needs to be done once. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -884,18 +884,13 @@ static void __init retbleed_select_mitig + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { +- +- if (IS_ENABLED(CONFIG_RETPOLINE) && +- IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; +- else +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; +- } ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + + /* +- * The Intel mitigation (IBRS) was already selected in +- * spectre_v2_select_mitigation(). ++ * The Intel mitigation (IBRS or eIBRS) was already selected in ++ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will ++ * be set accordingly below. + */ + + break; diff --git a/queue-5.15/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch b/queue-5.15/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch new file mode 100644 index 00000000000..539a5adb44b --- /dev/null +++ b/queue-5.15/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Thu, 7 Jul 2022 13:41:52 -0300 +Subject: x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported + +From: Thadeu Lima de Souza Cascardo + +commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream. + +There are some VM configurations which have Skylake model but do not +support IBPB. In those cases, when using retbleed=ibpb, userspace is going +to be killed and kernel is going to panic. + +If the CPU does not support IBPB, warn and proceed with the auto option. Also, +do not fallback to IBPB on AMD/Hygon systems if it is not supported. + +Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb") +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -858,7 +858,10 @@ static void __init retbleed_select_mitig + break; + + case RETBLEED_CMD_IBPB: +- if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ if (!boot_cpu_has(X86_FEATURE_IBPB)) { ++ pr_err("WARNING: CPU does not support IBPB.\n"); ++ goto do_cmd_auto; ++ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); +@@ -873,7 +876,7 @@ do_cmd_auto: + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; +- else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + diff --git a/queue-5.15/x86-bugs-enable-stibp-for-jmp2ret.patch b/queue-5.15/x86-bugs-enable-stibp-for-jmp2ret.patch new file mode 100644 index 00000000000..91a01a968ab --- /dev/null +++ b/queue-5.15/x86-bugs-enable-stibp-for-jmp2ret.patch @@ -0,0 +1,142 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Kim Phillips +Date: Tue, 14 Jun 2022 23:15:51 +0200 +Subject: x86/bugs: Enable STIBP for JMP2RET + +From: Kim Phillips + +commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream. + +For untrained return thunks to be fully effective, STIBP must be enabled +or SMT disabled. + +Co-developed-by: Josh Poimboeuf +Signed-off-by: Josh Poimboeuf +Signed-off-by: Kim Phillips +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 16 ++++-- + arch/x86/kernel/cpu/bugs.c | 58 +++++++++++++++++++----- + 2 files changed, 57 insertions(+), 17 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4972,11 +4972,17 @@ + Speculative Code Execution with Return Instructions) + vulnerability. + +- off - unconditionally disable +- auto - automatically select a migitation +- unret - force enable untrained return thunks, +- only effective on AMD Zen {1,2} +- based systems. ++ off - no mitigation ++ auto - automatically select a migitation ++ auto,nosmt - automatically select a mitigation, ++ disabling SMT if necessary for ++ the full mitigation (only on Zen1 ++ and older without STIBP). ++ unret - force enable untrained return thunks, ++ only effective on AMD f15h-f17h ++ based systems. ++ unret,nosmt - like unret, will disable SMT when STIBP ++ is not available. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -776,19 +776,34 @@ static enum retbleed_mitigation retbleed + static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + ++static int __ro_after_init retbleed_nosmt = false; ++ + static int __init retbleed_parse_cmdline(char *str) + { + if (!str) + return -EINVAL; + +- if (!strcmp(str, "off")) +- retbleed_cmd = RETBLEED_CMD_OFF; +- else if (!strcmp(str, "auto")) +- retbleed_cmd = RETBLEED_CMD_AUTO; +- else if (!strcmp(str, "unret")) +- retbleed_cmd = RETBLEED_CMD_UNRET; +- else +- pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); ++ while (str) { ++ char *next = strchr(str, ','); ++ if (next) { ++ *next = 0; ++ next++; ++ } ++ ++ if (!strcmp(str, "off")) { ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ } else if (!strcmp(str, "auto")) { ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ } else if (!strcmp(str, "unret")) { ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "nosmt")) { ++ retbleed_nosmt = true; ++ } else { ++ pr_err("Ignoring unknown retbleed option (%s).", str); ++ } ++ ++ str = next; ++ } + + return 0; + } +@@ -834,6 +849,10 @@ static void __init retbleed_select_mitig + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + ++ if (!boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); +@@ -1080,6 +1099,13 @@ spectre_v2_user_select_mitigation(enum s + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (mode != SPECTRE_V2_USER_STRICT && ++ mode != SPECTRE_V2_USER_STRICT_PREFERRED) ++ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); ++ mode = SPECTRE_V2_USER_STRICT_PREFERRED; ++ } ++ + spectre_v2_user_stibp = mode; + + set_mode: +@@ -2090,10 +2116,18 @@ static ssize_t srbds_show_state(char *bu + + static ssize_t retbleed_show_state(char *buf) + { +- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && +- (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && +- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) +- return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s; SMT %s\n", ++ retbleed_strings[retbleed_mitigation], ++ !sched_smt_active() ? "disabled" : ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? ++ "enabled with STIBP protection" : "vulnerable"); ++ } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + } diff --git a/queue-5.15/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch b/queue-5.15/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch new file mode 100644 index 00000000000..8bd894758dc --- /dev/null +++ b/queue-5.15/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch @@ -0,0 +1,118 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:52 +0200 +Subject: x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value + +From: Peter Zijlstra + +commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream. + +Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can +differ from x86_spec_ctrl_base. As such, keep a per-CPU value +reflecting the current task's MSR content. + + [jpoimboe: rename] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++++++----- + arch/x86/kernel/process.c | 2 +- + 3 files changed, 25 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -254,6 +254,7 @@ static inline void indirect_branch_predi + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern void write_spec_ctrl_current(u64 val); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -49,12 +49,30 @@ static void __init mmio_select_mitigatio + static void __init srbds_select_mitigation(void); + static void __init l1d_flush_select_mitigation(void); + +-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ ++/* The base value of the SPEC_CTRL MSR without task-specific bits set */ + u64 x86_spec_ctrl_base; + EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); ++ ++/* The current value of the SPEC_CTRL MSR with task-specific bits set */ ++DEFINE_PER_CPU(u64, x86_spec_ctrl_current); ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); ++ + static DEFINE_MUTEX(spec_ctrl_mutex); + + /* ++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ ++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). ++ */ ++void write_spec_ctrl_current(u64 val) ++{ ++ if (this_cpu_read(x86_spec_ctrl_current) == val) ++ return; ++ ++ this_cpu_write(x86_spec_ctrl_current, val); ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); ++} ++ ++/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +@@ -1272,7 +1290,7 @@ static void __init spectre_v2_select_mit + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + + switch (mode) { +@@ -1327,7 +1345,7 @@ static void __init spectre_v2_select_mit + + static void update_stibp_msr(void * __unused) + { +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1570,7 +1588,7 @@ static enum ssb_mitigation __init __ssb_ + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + } + +@@ -1821,7 +1839,7 @@ int arch_prctl_spec_ctrl_get(struct task + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -584,7 +584,7 @@ static __always_inline void __speculatio + } + + if (updmsr) +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ write_spec_ctrl_current(msr); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/queue-5.15/x86-bugs-optimize-spec_ctrl-msr-writes.patch b/queue-5.15/x86-bugs-optimize-spec_ctrl-msr-writes.patch new file mode 100644 index 00000000000..dfbc21bf3d2 --- /dev/null +++ b/queue-5.15/x86-bugs-optimize-spec_ctrl-msr-writes.patch @@ -0,0 +1,108 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:54 +0200 +Subject: x86/bugs: Optimize SPEC_CTRL MSR writes + +From: Peter Zijlstra + +commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream. + +When changing SPEC_CTRL for user control, the WRMSR can be delayed +until return-to-user when KERNEL_IBRS has been enabled. + +This avoids an MSR write during context switch. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 2 +- + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ + arch/x86/kernel/process.c | 2 +- + 3 files changed, 14 insertions(+), 8 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -254,7 +254,7 @@ static inline void indirect_branch_predi + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; +-extern void write_spec_ctrl_current(u64 val); ++extern void write_spec_ctrl_current(u64 val, bool force); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -63,13 +63,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex); + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). + */ +-void write_spec_ctrl_current(u64 val) ++void write_spec_ctrl_current(u64 val, bool force) + { + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); +- wrmsrl(MSR_IA32_SPEC_CTRL, val); ++ ++ /* ++ * When KERNEL_IBRS this MSR is written on return-to-user, unless ++ * forced the update can be delayed until that time. ++ */ ++ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); + } + + /* +@@ -1290,7 +1296,7 @@ static void __init spectre_v2_select_mit + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + switch (mode) { +@@ -1345,7 +1351,7 @@ static void __init spectre_v2_select_mit + + static void update_stibp_msr(void * __unused) + { +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1588,7 +1594,7 @@ static enum ssb_mitigation __init __ssb_ + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + } + +@@ -1839,7 +1845,7 @@ int arch_prctl_spec_ctrl_get(struct task + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -584,7 +584,7 @@ static __always_inline void __speculatio + } + + if (updmsr) +- write_spec_ctrl_current(msr); ++ write_spec_ctrl_current(msr, false); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/queue-5.15/x86-bugs-report-amd-retbleed-vulnerability.patch b/queue-5.15/x86-bugs-report-amd-retbleed-vulnerability.patch new file mode 100644 index 00000000000..1c385a92291 --- /dev/null +++ b/queue-5.15/x86-bugs-report-amd-retbleed-vulnerability.patch @@ -0,0 +1,169 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Alexandre Chartre +Date: Tue, 14 Jun 2022 23:15:49 +0200 +Subject: x86/bugs: Report AMD retbleed vulnerability + +From: Alexandre Chartre + +commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream. + +Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary +Speculative Code Execution with Return Instructions) attack. + + [peterz: add hygon] + [kim: invert parity; fam15h] + +Co-developed-by: Kim Phillips +Signed-off-by: Kim Phillips +Signed-off-by: Alexandre Chartre +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++ + arch/x86/kernel/cpu/common.c | 19 +++++++++++++++++++ + drivers/base/cpu.c | 8 ++++++++ + include/linux/cpu.h | 2 ++ + 5 files changed, 43 insertions(+) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -443,5 +443,6 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ ++#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1987,6 +1987,11 @@ static ssize_t srbds_show_state(char *bu + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + } + ++static ssize_t retbleed_show_state(char *buf) ++{ ++ return sprintf(buf, "Vulnerable\n"); ++} ++ + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { +@@ -2032,6 +2037,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + ++ case X86_BUG_RETBLEED: ++ return retbleed_show_state(buf); ++ + default: + break; + } +@@ -2088,4 +2096,9 @@ ssize_t cpu_show_mmio_stale_data(struct + { + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } ++ ++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); ++} + #endif +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1095,16 +1095,27 @@ static const __initconst struct x86_cpu_ + {} + }; + ++#define VULNBL(vendor, family, model, blacklist) \ ++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) ++ + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + ++#define VULNBL_AMD(family, blacklist) \ ++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) ++ ++#define VULNBL_HYGON(family, blacklist) \ ++ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) ++ + #define SRBDS BIT(0) + /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ + #define MMIO BIT(1) + /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ + #define MMIO_SBDS BIT(2) ++/* CPU is affected by RETbleed, speculating where you would not expect it */ ++#define RETBLEED BIT(3) + + static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), +@@ -1137,6 +1148,11 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ ++ VULNBL_AMD(0x15, RETBLEED), ++ VULNBL_AMD(0x16, RETBLEED), ++ VULNBL_AMD(0x17, RETBLEED), ++ VULNBL_HYGON(0x18, RETBLEED), + {} + }; + +@@ -1238,6 +1254,9 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data( + return sysfs_emit(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sysfs_emit(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444 + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); + static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); + static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); ++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulner + &dev_attr_itlb_multihit.attr, + &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, ++ &dev_attr_retbleed.attr, + NULL + }; + +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct dev + extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); ++extern ssize_t cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/queue-5.15/x86-bugs-report-intel-retbleed-vulnerability.patch b/queue-5.15/x86-bugs-report-intel-retbleed-vulnerability.patch new file mode 100644 index 00000000000..5ff3e197167 --- /dev/null +++ b/queue-5.15/x86-bugs-report-intel-retbleed-vulnerability.patch @@ -0,0 +1,174 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 24 Jun 2022 13:48:58 +0200 +Subject: x86/bugs: Report Intel retbleed vulnerability + +From: Peter Zijlstra + +commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream. + +Skylake suffers from RSB underflow speculation issues; report this +vulnerability and it's mitigation (spectre_v2=ibrs). + + [jpoimboe: cleanups, eibrs] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kernel/cpu/bugs.c | 39 +++++++++++++++++++++++++++++++++------ + arch/x86/kernel/cpu/common.c | 24 ++++++++++++------------ + 3 files changed, 46 insertions(+), 18 deletions(-) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -91,6 +91,7 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ ++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ + #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ + #define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -783,12 +783,17 @@ static int __init nospectre_v1_cmdline(c + } + early_param("nospectre_v1", nospectre_v1_cmdline); + ++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = ++ SPECTRE_V2_NONE; ++ + #undef pr_fmt + #define pr_fmt(fmt) "RETBleed: " fmt + + enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBRS, ++ RETBLEED_MITIGATION_EIBRS, + }; + + enum retbleed_mitigation_cmd { +@@ -800,6 +805,8 @@ enum retbleed_mitigation_cmd { + const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", ++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + }; + + static enum retbleed_mitigation retbleed_mitigation __ro_after_init = +@@ -842,6 +849,7 @@ early_param("retbleed", retbleed_parse_c + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" + #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) + { +@@ -858,12 +866,15 @@ static void __init retbleed_select_mitig + + case RETBLEED_CMD_AUTO: + default: +- if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) +- break; +- + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ ++ /* ++ * The Intel mitigation (IBRS) was already selected in ++ * spectre_v2_select_mitigation(). ++ */ ++ + break; + } + +@@ -893,15 +904,31 @@ static void __init retbleed_select_mitig + break; + } + ++ /* ++ * Let IBRS trump all on Intel without affecting the effects of the ++ * retbleed= cmdline option. ++ */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ switch (spectre_v2_enabled) { ++ case SPECTRE_V2_IBRS: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS; ++ break; ++ case SPECTRE_V2_EIBRS: ++ case SPECTRE_V2_EIBRS_RETPOLINE: ++ case SPECTRE_V2_EIBRS_LFENCE: ++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; ++ break; ++ default: ++ pr_err(RETBLEED_INTEL_MSG); ++ } ++ } ++ + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); + } + + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + +-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = +- SPECTRE_V2_NONE; +- + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; + static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1127,24 +1127,24 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), ++ BIT(7) | BIT(0xB), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), +@@ -1254,7 +1254,7 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + +- if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) ++ if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) + setup_force_cpu_bug(X86_BUG_RETBLEED); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) diff --git a/queue-5.15/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch b/queue-5.15/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch new file mode 100644 index 00000000000..284f5f2fb9d --- /dev/null +++ b/queue-5.15/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch @@ -0,0 +1,102 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:56 +0200 +Subject: x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation() + +From: Peter Zijlstra + +commit 166115c08a9b0b846b783088808a27d739be6e8d upstream. + +retbleed will depend on spectre_v2, while spectre_v2_user depends on +retbleed. Break this cycle. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -37,8 +37,9 @@ + #include "cpu.h" + + static void __init spectre_v1_select_mitigation(void); +-static void __init retbleed_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); ++static void __init spectre_v2_user_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + static void __init mds_select_mitigation(void); +@@ -145,13 +146,19 @@ void __init check_bugs(void) + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); ++ spectre_v2_select_mitigation(); ++ /* ++ * retbleed_select_mitigation() relies on the state set by ++ * spectre_v2_select_mitigation(); specifically it wants to know about ++ * spectre_v2=ibrs. ++ */ + retbleed_select_mitigation(); + /* +- * spectre_v2_select_mitigation() relies on the state set by ++ * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ +- spectre_v2_select_mitigation(); ++ spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + md_clear_select_mitigation(); +@@ -1006,13 +1013,15 @@ static void __init spec_v2_user_print_co + pr_info("spectre_v2_user=%s forced on command line.\n", reason); + } + ++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; ++ + static enum spectre_v2_user_cmd __init +-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_parse_user_cmdline(void) + { + char arg[20]; + int ret, i; + +- switch (v2_cmd) { ++ switch (spectre_v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: +@@ -1047,7 +1056,7 @@ static inline bool spectre_v2_in_ibrs_mo + } + + static void __init +-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_user_select_mitigation(void) + { + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); +@@ -1060,7 +1069,7 @@ spectre_v2_user_select_mitigation(enum s + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + +- cmd = spectre_v2_parse_user_cmdline(v2_cmd); ++ cmd = spectre_v2_parse_user_cmdline(); + switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; +@@ -1384,7 +1393,7 @@ static void __init spectre_v2_select_mit + } + + /* Set up IBPB and STIBP depending on the general spectre V2 command */ +- spectre_v2_user_select_mitigation(cmd); ++ spectre_v2_cmd = cmd; + } + + static void update_stibp_msr(void * __unused) diff --git a/queue-5.15/x86-common-stamp-out-the-stepping-madness.patch b/queue-5.15/x86-common-stamp-out-the-stepping-madness.patch new file mode 100644 index 00000000000..9013b42cd78 --- /dev/null +++ b/queue-5.15/x86-common-stamp-out-the-stepping-madness.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 24 Jun 2022 14:03:25 +0200 +Subject: x86/common: Stamp out the stepping madness + +From: Peter Zijlstra + +commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream. + +The whole MMIO/RETBLEED enumeration went overboard on steppings. Get +rid of all that and simply use ANY. + +If a future stepping of these models would not be affected, it had +better set the relevant ARCH_CAP_$FOO_NO bit in +IA32_ARCH_CAPABILITIES. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Acked-by: Dave Hansen +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++--------------------- + 1 file changed, 16 insertions(+), 21 deletions(-) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1122,32 +1122,27 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), +- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), ++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), +- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), diff --git a/queue-5.15/x86-cpu-amd-add-spectral-chicken.patch b/queue-5.15/x86-cpu-amd-add-spectral-chicken.patch new file mode 100644 index 00000000000..846ea8ace7c --- /dev/null +++ b/queue-5.15/x86-cpu-amd-add-spectral-chicken.patch @@ -0,0 +1,107 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:04 +0200 +Subject: x86/cpu/amd: Add Spectral Chicken + +From: Peter Zijlstra + +commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream. + +Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken +bit for some speculation behaviour. It needs setting. + +Note: very belatedly AMD released naming; it's now officially called + MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT + but shall remain the SPECTRAL CHICKEN. + +Suggested-by: Andrew Cooper +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 3 +++ + arch/x86/kernel/cpu/amd.c | 23 ++++++++++++++++++++++- + arch/x86/kernel/cpu/cpu.h | 2 ++ + arch/x86/kernel/cpu/hygon.c | 6 ++++++ + 4 files changed, 33 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -515,6 +515,9 @@ + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + ++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 ++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) ++ + /* Fam 16h MSRs */ + #define MSR_F16H_L2I_PERF_CTL 0xc0010230 + #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -886,6 +886,26 @@ static void init_amd_bd(struct cpuinfo_x + clear_rdrand_cpuid_bit(c); + } + ++void init_spectral_chicken(struct cpuinfo_x86 *c) ++{ ++ u64 value; ++ ++ /* ++ * On Zen2 we offer this chicken (bit) on the altar of Speculation. ++ * ++ * This suppresses speculation from the middle of a basic block, i.e. it ++ * suppresses non-branch predictions. ++ * ++ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H ++ */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { ++ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { ++ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; ++ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); ++ } ++ } ++} ++ + static void init_amd_zn(struct cpuinfo_x86 *c) + { + set_cpu_cap(c, X86_FEATURE_ZEN); +@@ -931,7 +951,8 @@ static void init_amd(struct cpuinfo_x86 + case 0x12: init_amd_ln(c); break; + case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; +- case 0x17: fallthrough; ++ case 0x17: init_spectral_chicken(c); ++ fallthrough; + case 0x19: init_amd_zn(c); break; + } + +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -61,6 +61,8 @@ static inline void tsx_init(void) { } + static inline void tsx_ap_init(void) { } + #endif /* CONFIG_CPU_SUP_INTEL */ + ++extern void init_spectral_chicken(struct cpuinfo_x86 *c); ++ + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x8 + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); + ++ /* ++ * XXX someone from Hygon needs to confirm this DTRT ++ * ++ init_spectral_chicken(c); ++ */ ++ + set_cpu_cap(c, X86_FEATURE_ZEN); + set_cpu_cap(c, X86_FEATURE_CPB); + diff --git a/queue-5.15/x86-cpu-amd-enumerate-btc_no.patch b/queue-5.15/x86-cpu-amd-enumerate-btc_no.patch new file mode 100644 index 00000000000..c66082987f7 --- /dev/null +++ b/queue-5.15/x86-cpu-amd-enumerate-btc_no.patch @@ -0,0 +1,85 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Andrew Cooper +Date: Fri, 24 Jun 2022 14:41:21 +0100 +Subject: x86/cpu/amd: Enumerate BTC_NO + +From: Andrew Cooper + +commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream. + +BTC_NO indicates that hardware is not susceptible to Branch Type Confusion. + +Zen3 CPUs don't suffer BTC. + +Hypervisors are expected to synthesise BTC_NO when it is appropriate +given the migration pool, to prevent kernels using heuristics. + + [ bp: Massage. ] + +Signed-off-by: Andrew Cooper +Signed-off-by: Borislav Petkov +[cascardo: no X86_FEATURE_BRS] +[cascardo: no X86_FEATURE_CPPC] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/amd.c | 21 +++++++++++++++------ + arch/x86/kernel/cpu/common.c | 6 ++++-- + 3 files changed, 20 insertions(+), 8 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -319,6 +319,7 @@ + #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ ++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -914,12 +914,21 @@ static void init_amd_zn(struct cpuinfo_x + node_reclaim_distance = 32; + #endif + +- /* +- * Fix erratum 1076: CPB feature bit not being set in CPUID. +- * Always set it, except when running under a hypervisor. +- */ +- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) +- set_cpu_cap(c, X86_FEATURE_CPB); ++ /* Fix up CPUID bits, but only if not virtualised. */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { ++ ++ /* Erratum 1076: CPB feature bit not being set in CPUID. */ ++ if (!cpu_has(c, X86_FEATURE_CPB)) ++ set_cpu_cap(c, X86_FEATURE_CPB); ++ ++ /* ++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to ++ * Branch Type Confusion, but predate the allocation of the ++ * BTC_NO bit. ++ */ ++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) ++ set_cpu_cap(c, X86_FEATURE_BTC_NO); ++ } + } + + static void init_amd(struct cpuinfo_x86 *c) +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1249,8 +1249,10 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + +- if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) +- setup_force_cpu_bug(X86_BUG_RETBLEED); ++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) { ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ } + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; diff --git a/queue-5.15/x86-cpufeatures-move-retpoline-flags-to-word-11.patch b/queue-5.15/x86-cpufeatures-move-retpoline-flags-to-word-11.patch new file mode 100644 index 00000000000..7e1b18b513a --- /dev/null +++ b/queue-5.15/x86-cpufeatures-move-retpoline-flags-to-word-11.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:33 +0200 +Subject: x86/cpufeatures: Move RETPOLINE flags to word 11 + +From: Peter Zijlstra + +commit a883d624aed463c84c22596006e5a96f5b44db31 upstream. + +In order to extend the RETPOLINE features to 4, move them to word 11 +where there is still room. This mostly keeps DISABLE_RETPOLINE +simple. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,8 +203,8 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + /* FREE! ( 7*32+10) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++/* FREE! ( 7*32+12) */ ++/* FREE! ( 7*32+13) */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +@@ -294,6 +294,10 @@ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ ++/* FREE! (11*32+10) */ ++/* FREE! (11*32+11) */ ++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/queue-5.15/x86-entry-add-kernel-ibrs-implementation.patch b/queue-5.15/x86-entry-add-kernel-ibrs-implementation.patch new file mode 100644 index 00000000000..9da675e21f1 --- /dev/null +++ b/queue-5.15/x86-entry-add-kernel-ibrs-implementation.patch @@ -0,0 +1,353 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Sat, 9 Jul 2022 23:42:53 -0300 +Subject: x86/entry: Add kernel IBRS implementation + +From: Thadeu Lima de Souza Cascardo + +commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream. + +Implement Kernel IBRS - currently the only known option to mitigate RSB +underflow speculation issues on Skylake hardware. + +Note: since IBRS_ENTER requires fuller context established than +UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET +itself implies a RET, it must come after IBRS_ENTER. This means +IBRS_ENTER needs to also move UNTRAIN_RET. + +Note 2: KERNEL_IBRS is sub-optimal for XenPV. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict at arch/x86/entry/entry_64_compat.S] +[cascardo: conflict fixups, no ANNOTATE_NOENDBR] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/calling.h | 58 +++++++++++++++++++++++++++++++++++++ + arch/x86/entry/entry_64.S | 44 ++++++++++++++++++++++++---- + arch/x86/entry/entry_64_compat.S | 17 ++++++++-- + arch/x86/include/asm/cpufeatures.h | 2 - + 4 files changed, 111 insertions(+), 10 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -7,6 +7,8 @@ + #include + #include + #include ++#include ++#include + + /* + +@@ -282,6 +284,62 @@ For 32-bit we have the following convent + #endif + + /* ++ * IBRS kernel mitigation for Spectre_v2. ++ * ++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers ++ * the regs it uses (AX, CX, DX). Must be called before the first RET ++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction) ++ * ++ * The optional argument is used to save/restore the current value, ++ * which is used on the paranoid paths. ++ * ++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. ++ */ ++.macro IBRS_ENTER save_reg ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ rdmsr ++ shl $32, %rdx ++ or %rdx, %rax ++ mov %rax, \save_reg ++ test $SPEC_CTRL_IBRS, %eax ++ jz .Ldo_wrmsr_\@ ++ lfence ++ jmp .Lend_\@ ++.Ldo_wrmsr_\@: ++.endif ++ ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++.endm ++ ++/* ++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) ++ * regs. Must be called after the last RET. ++ */ ++.macro IBRS_EXIT save_reg ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ mov \save_reg, %rdx ++.else ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ andl $(~SPEC_CTRL_IBRS), %edx ++.endif ++ ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++.endm ++ ++/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -94,7 +94,6 @@ SYM_CODE_START(entry_SYSCALL_64) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ +@@ -111,6 +110,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + movq %rsp, %rdi + /* Sign extend the lower 32bit as syscall numbers are treated as int */ + movslq %eax, %rsi ++ ++ /* clobbers %rax, make sure it is after saving the syscall nr */ ++ IBRS_ENTER ++ UNTRAIN_RET ++ + call do_syscall_64 /* returns with IRQs disabled */ + + /* +@@ -190,6 +194,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + * perf profiles. Nothing jumps here. + */ + syscall_return_via_sysret: ++ IBRS_EXIT + POP_REGS pop_rdi=0 + + /* +@@ -582,6 +587,7 @@ __irqentry_text_end: + + SYM_CODE_START_LOCAL(common_interrupt_return) + SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ++ IBRS_EXIT + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testb $3, CS(%rsp) +@@ -861,6 +867,9 @@ SYM_CODE_END(xen_failsafe_callback) + * 1 -> no SWAPGS on exit + * + * Y GSBASE value at entry, must be restored in paranoid_exit ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_entry) + UNWIND_HINT_FUNC +@@ -884,7 +893,6 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 +- UNTRAIN_RET + + /* + * Handling GSBASE depends on the availability of FSGSBASE. +@@ -906,7 +914,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * is needed here. + */ + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx +- RET ++ jmp .Lparanoid_gsbase_done + + .Lparanoid_entry_checkgs: + /* EBX = 1 -> kernel GSBASE active, no restore required */ +@@ -925,8 +933,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) + xorl %ebx, %ebx + swapgs + .Lparanoid_kernel_gsbase: +- + FENCE_SWAPGS_KERNEL_ENTRY ++.Lparanoid_gsbase_done: ++ ++ /* ++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like ++ * CR3 above, keep the old value in a callee saved register. ++ */ ++ IBRS_ENTER save_reg=%r15 ++ UNTRAIN_RET ++ + RET + SYM_CODE_END(paranoid_entry) + +@@ -948,9 +964,19 @@ SYM_CODE_END(paranoid_entry) + * 1 -> no SWAPGS on exit + * + * Y User space GSBASE, must be restored unconditionally ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_exit) + UNWIND_HINT_REGS ++ ++ /* ++ * Must restore IBRS state before both CR3 and %GS since we need access ++ * to the per-CPU x86_spec_ctrl_shadow variable. ++ */ ++ IBRS_EXIT save_reg=%r15 ++ + /* + * The order of operations is important. RESTORE_CR3 requires + * kernel GSBASE. +@@ -995,10 +1021,12 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER + UNTRAIN_RET + + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + .Lerror_entry_from_usermode_after_swapgs: ++ + /* Put us onto the real thread stack. */ + call sync_regs + RET +@@ -1048,6 +1076,7 @@ SYM_CODE_START_LOCAL(error_entry) + SWAPGS + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER + UNTRAIN_RET + + /* +@@ -1143,7 +1172,6 @@ SYM_CODE_START(asm_exc_nmi) + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 +- UNTRAIN_RET + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ +@@ -1154,6 +1182,9 @@ SYM_CODE_START(asm_exc_nmi) + PUSH_AND_CLEAR_REGS rdx=(%rdx) + ENCODE_FRAME_POINTER + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're +@@ -1376,6 +1407,9 @@ end_repeat_nmi: + movq $-1, %rsi + call exc_nmi + ++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ ++ IBRS_EXIT save_reg=%r15 ++ + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -4,7 +4,6 @@ + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ +-#include "calling.h" + #include + #include + #include +@@ -18,6 +17,8 @@ + #include + #include + ++#include "calling.h" ++ + .section .entry.text, "ax" + + /* +@@ -72,7 +73,6 @@ SYM_CODE_START(entry_SYSENTER_compat) + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ + SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* + * User tracing code (ptrace or signal handlers) might assume that +@@ -114,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_af + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * SYSENTER doesn't filter flags, so we need to clear NT and AC + * ourselves. To save a few cycles, we can check whether +@@ -213,7 +216,6 @@ SYM_CODE_START(entry_SYSCALL_compat) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ +@@ -255,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft + + UNWIND_HINT_REGS + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_fast_syscall_32 + /* XEN PV guests always use IRET path */ +@@ -269,6 +274,8 @@ sysret32_from_system_call: + */ + STACKLEAK_ERASE + ++ IBRS_EXIT ++ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ +@@ -380,7 +387,6 @@ SYM_CODE_START(entry_INT80_compat) + pushq (%rdi) /* pt_regs->di */ + .Lint80_keep_stack: + +- UNTRAIN_RET + pushq %rsi /* pt_regs->si */ + xorl %esi, %esi /* nospec si */ + pushq %rdx /* pt_regs->dx */ +@@ -413,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat) + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_int80_syscall_32 + jmp swapgs_restore_regs_and_return_to_usermode +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,7 +203,7 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + /* FREE! ( 7*32+10) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-/* FREE! ( 7*32+12) */ ++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ + /* FREE! ( 7*32+13) */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ diff --git a/queue-5.15/x86-entry-avoid-very-early-ret.patch b/queue-5.15/x86-entry-avoid-very-early-ret.patch new file mode 100644 index 00000000000..40115649195 --- /dev/null +++ b/queue-5.15/x86-entry-avoid-very-early-ret.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:46 +0200 +Subject: x86/entry: Avoid very early RET + +From: Peter Zijlstra + +commit 7c81c0c9210c9bfab2bae76aab2999de5bad27db upstream. + +Commit + + ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") + +manages to introduce a CALL/RET pair that is before SWITCH_TO_KERNEL_CR3, +which means it is before RETBleed can be mitigated. + +Revert to an earlier version of the commit in Fixes. Down side is that +this will bloat .text size somewhat. The alternative is fully reverting +it. + +The purpose of this patch was to allow migrating error_entry() to C, +including the whole of kPTI. Much care needs to be taken moving that +forward to not re-introduce this problem of early RETs. + +Fixes: ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -314,14 +314,6 @@ SYM_CODE_END(ret_from_fork) + #endif + .endm + +-/* Save all registers in pt_regs */ +-SYM_CODE_START_LOCAL(push_and_clear_regs) +- UNWIND_HINT_FUNC +- PUSH_AND_CLEAR_REGS save_ret=1 +- ENCODE_FRAME_POINTER 8 +- RET +-SYM_CODE_END(push_and_clear_regs) +- + /** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called +@@ -329,8 +321,8 @@ SYM_CODE_END(push_and_clear_regs) + */ + .macro idtentry_body cfunc has_error_code:req + +- call push_and_clear_regs +- UNWIND_HINT_REGS ++ PUSH_AND_CLEAR_REGS ++ ENCODE_FRAME_POINTER + + /* + * Call error_entry() and switch to the task stack if from userspace. diff --git a/queue-5.15/x86-entry-don-t-call-error_entry-for-xenpv.patch b/queue-5.15/x86-entry-don-t-call-error_entry-for-xenpv.patch new file mode 100644 index 00000000000..ba07d43522a --- /dev/null +++ b/queue-5.15/x86-entry-don-t-call-error_entry-for-xenpv.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Lai Jiangshan +Date: Tue, 3 May 2022 11:21:06 +0800 +Subject: x86/entry: Don't call error_entry() for XENPV + +From: Lai Jiangshan + +commit 64cbd0acb58203fb769ed2f4eab526d43e243847 upstream. + +XENPV guests enter already on the task stack and they can't fault for +native_iret() nor native_load_gs_index() since they use their own pvop +for IRET and load_gs_index(). A CR3 switch is not needed either. + +So there is no reason to call error_entry() in XENPV. + + [ bp: Massage commit message. ] + +Signed-off-by: Lai Jiangshan +Signed-off-by: Borislav Petkov +Reviewed-by: Juergen Gross +Link: https://lore.kernel.org/r/20220503032107.680190-6-jiangshanlai@gmail.com +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -333,8 +333,17 @@ SYM_CODE_END(push_and_clear_regs) + call push_and_clear_regs + UNWIND_HINT_REGS + +- call error_entry +- movq %rax, %rsp /* switch to the task stack if from userspace */ ++ /* ++ * Call error_entry() and switch to the task stack if from userspace. ++ * ++ * When in XENPV, it is already in the task stack, and it can't fault ++ * for native_iret() nor native_load_gs_index() since XENPV uses its ++ * own pvops for IRET and load_gs_index(). And it doesn't need to ++ * switch the CR3. So it can skip invoking error_entry(). ++ */ ++ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ ++ "", X86_FEATURE_XENPV ++ + ENCODE_FRAME_POINTER + UNWIND_HINT_REGS + diff --git a/queue-5.15/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch b/queue-5.15/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch new file mode 100644 index 00000000000..4f429d58053 --- /dev/null +++ b/queue-5.15/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch @@ -0,0 +1,72 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Wed, 6 Jul 2022 15:33:30 +0200 +Subject: x86/entry: Move PUSH_AND_CLEAR_REGS() back into error_entry + +From: Peter Zijlstra + +commit 2c08b9b38f5b0f4a6c2d29be22b695e4ec4a556b upstream. + +Commit + + ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") + +moved PUSH_AND_CLEAR_REGS out of error_entry, into its own function, in +part to avoid calling error_entry() for XenPV. + +However, commit + + 7c81c0c9210c ("x86/entry: Avoid very early RET") + +had to change that because the 'ret' was too early and moved it into +idtentry, bloating the text size, since idtentry is expanded for every +exception vector. + +However, with the advent of xen_error_entry() in commit + + d147553b64bad ("x86/xen: Add UNTRAIN_RET") + +it became possible to remove PUSH_AND_CLEAR_REGS from idtentry, back +into *error_entry(). + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +[cascardo: error_entry still does cld] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -320,6 +320,8 @@ SYM_CODE_END(ret_from_fork) + + SYM_CODE_START_LOCAL(xen_error_entry) + UNWIND_HINT_FUNC ++ PUSH_AND_CLEAR_REGS save_ret=1 ++ ENCODE_FRAME_POINTER 8 + UNTRAIN_RET + RET + SYM_CODE_END(xen_error_entry) +@@ -331,9 +333,6 @@ SYM_CODE_END(xen_error_entry) + */ + .macro idtentry_body cfunc has_error_code:req + +- PUSH_AND_CLEAR_REGS +- ENCODE_FRAME_POINTER +- + /* + * Call error_entry() and switch to the task stack if from userspace. + * +@@ -1014,6 +1013,10 @@ SYM_CODE_END(paranoid_exit) + SYM_CODE_START_LOCAL(error_entry) + UNWIND_HINT_FUNC + cld ++ ++ PUSH_AND_CLEAR_REGS save_ret=1 ++ ENCODE_FRAME_POINTER 8 ++ + testb $3, CS+8(%rsp) + jz .Lerror_kernelspace + diff --git a/queue-5.15/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch b/queue-5.15/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch new file mode 100644 index 00000000000..e7ded69f3ee --- /dev/null +++ b/queue-5.15/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch @@ -0,0 +1,78 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Lai Jiangshan +Date: Thu, 21 Apr 2022 22:10:50 +0800 +Subject: x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() + +From: Lai Jiangshan + +commit ee774dac0da1543376a69fd90840af6aa86879b3 upstream. + +The macro idtentry() (through idtentry_body()) calls error_entry() +unconditionally even on XENPV. But XENPV needs to only push and clear +regs. + +PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to its +original place when the function returns, which means it is not possible +to convert it to a C function. + +Carve out PUSH_AND_CLEAR_REGS out of error_entry() and into a separate +function and call it before error_entry() in order to avoid calling +error_entry() on XENPV. + +It will also allow for error_entry() to be converted to C code that can +use inlined sync_regs() and save a function call. + + [ bp: Massage commit message. ] + +Signed-off-by: Lai Jiangshan +Signed-off-by: Borislav Petkov +Reviewed-by: Juergen Gross +Link: https://lore.kernel.org/r/20220503032107.680190-4-jiangshanlai@gmail.com +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -315,6 +315,14 @@ SYM_CODE_END(ret_from_fork) + #endif + .endm + ++/* Save all registers in pt_regs */ ++SYM_CODE_START_LOCAL(push_and_clear_regs) ++ UNWIND_HINT_FUNC ++ PUSH_AND_CLEAR_REGS save_ret=1 ++ ENCODE_FRAME_POINTER 8 ++ RET ++SYM_CODE_END(push_and_clear_regs) ++ + /** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called +@@ -322,6 +330,9 @@ SYM_CODE_END(ret_from_fork) + */ + .macro idtentry_body cfunc has_error_code:req + ++ call push_and_clear_regs ++ UNWIND_HINT_REGS ++ + call error_entry + movq %rax, %rsp /* switch to the task stack if from userspace */ + ENCODE_FRAME_POINTER +@@ -965,13 +976,11 @@ SYM_CODE_START_LOCAL(paranoid_exit) + SYM_CODE_END(paranoid_exit) + + /* +- * Save all registers in pt_regs, and switch GS if needed. ++ * Switch GS and CR3 if needed. + */ + SYM_CODE_START_LOCAL(error_entry) + UNWIND_HINT_FUNC + cld +- PUSH_AND_CLEAR_REGS save_ret=1 +- ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) + jz .Lerror_kernelspace + diff --git a/queue-5.15/x86-entry-remove-skip_r11rcx.patch b/queue-5.15/x86-entry-remove-skip_r11rcx.patch new file mode 100644 index 00000000000..000d0809e4b --- /dev/null +++ b/queue-5.15/x86-entry-remove-skip_r11rcx.patch @@ -0,0 +1,68 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 6 May 2022 14:14:35 +0200 +Subject: x86/entry: Remove skip_r11rcx + +From: Peter Zijlstra + +commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream. + +Yes, r11 and rcx have been restored previously, but since they're being +popped anyway (into rsi) might as well pop them into their own regs -- +setting them to the value they already are. + +Less magical code. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/calling.h | 10 +--------- + arch/x86/entry/entry_64.S | 3 +-- + 2 files changed, 2 insertions(+), 11 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -119,27 +119,19 @@ For 32-bit we have the following convent + CLEAR_REGS + .endm + +-.macro POP_REGS pop_rdi=1 skip_r11rcx=0 ++.macro POP_REGS pop_rdi=1 + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx +- .if \skip_r11rcx +- popq %rsi +- .else + popq %r11 +- .endif + popq %r10 + popq %r9 + popq %r8 + popq %rax +- .if \skip_r11rcx +- popq %rsi +- .else + popq %rcx +- .endif + popq %rdx + popq %rsi + .if \pop_rdi +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -189,8 +189,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + * perf profiles. Nothing jumps here. + */ + syscall_return_via_sysret: +- /* rcx and r11 are already restored (see code above) */ +- POP_REGS pop_rdi=0 skip_r11rcx=1 ++ POP_REGS pop_rdi=0 + + /* + * Now all regs are restored except RSP and RDI. diff --git a/queue-5.15/x86-entry-switch-the-stack-after-error_entry-returns.patch b/queue-5.15/x86-entry-switch-the-stack-after-error_entry-returns.patch new file mode 100644 index 00000000000..8c4194d94d9 --- /dev/null +++ b/queue-5.15/x86-entry-switch-the-stack-after-error_entry-returns.patch @@ -0,0 +1,81 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Lai Jiangshan +Date: Thu, 21 Apr 2022 22:10:49 +0800 +Subject: x86/entry: Switch the stack after error_entry() returns + +From: Lai Jiangshan + +commit 520a7e80c96d655fbe4650d9cc985bd9d0443389 upstream. + +error_entry() calls fixup_bad_iret() before sync_regs() if it is a fault +from a bad IRET, to copy pt_regs to the kernel stack. It switches to the +kernel stack directly after sync_regs(). + +But error_entry() itself is also a function call, so it has to stash +the address it is going to return to, in %r12 which is unnecessarily +complicated. + +Move the stack switching after error_entry() and get rid of the need to +handle the return address. + + [ bp: Massage commit message. ] + +Signed-off-by: Lai Jiangshan +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20220503032107.680190-3-jiangshanlai@gmail.com +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -323,6 +323,8 @@ SYM_CODE_END(ret_from_fork) + .macro idtentry_body cfunc has_error_code:req + + call error_entry ++ movq %rax, %rsp /* switch to the task stack if from userspace */ ++ ENCODE_FRAME_POINTER + UNWIND_HINT_REGS + + movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ +@@ -982,14 +984,10 @@ SYM_CODE_START_LOCAL(error_entry) + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ++ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ +- popq %r12 /* save return addr in %12 */ +- movq %rsp, %rdi /* arg0 = pt_regs pointer */ + call sync_regs +- movq %rax, %rsp /* switch stack */ +- ENCODE_FRAME_POINTER +- pushq %r12 + RET + + /* +@@ -1021,6 +1019,7 @@ SYM_CODE_START_LOCAL(error_entry) + */ + .Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY ++ leaq 8(%rsp), %rax /* return pt_regs pointer */ + RET + + .Lbstep_iret: +@@ -1041,12 +1040,9 @@ SYM_CODE_START_LOCAL(error_entry) + * Pretend that the exception came from user mode: set up pt_regs + * as if we faulted immediately after IRET. + */ +- popq %r12 /* save return addr in %12 */ +- movq %rsp, %rdi /* arg0 = pt_regs pointer */ ++ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + call fixup_bad_iret +- mov %rax, %rsp +- ENCODE_FRAME_POINTER +- pushq %r12 ++ mov %rax, %rdi + jmp .Lerror_entry_from_usermode_after_swapgs + SYM_CODE_END(error_entry) + diff --git a/queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch b/queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch new file mode 100644 index 00000000000..415fe33ae6c --- /dev/null +++ b/queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:40 +0200 +Subject: x86/ftrace: Use alternative RET encoding + +From: Peter Zijlstra + +commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream. + +Use the return thunk in ftrace trampolines, if needed. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: still copy return from ftrace_stub] +[cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/ftrace.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/ftrace.c ++++ b/arch/x86/kernel/ftrace.c +@@ -308,7 +308,7 @@ union ftrace_op_code_union { + } __attribute__((packed)); + }; + +-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) ++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) + + static unsigned long + create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +@@ -367,7 +367,10 @@ create_trampoline(struct ftrace_ops *ops + + /* The trampoline ends with ret(q) */ + retq = (unsigned long)ftrace_stub; +- ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE); ++ else ++ ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); + if (WARN_ON(ret < 0)) + goto fail; + diff --git a/queue-5.15/x86-kexec-disable-ret-on-kexec.patch b/queue-5.15/x86-kexec-disable-ret-on-kexec.patch new file mode 100644 index 00000000000..8dca2e776a2 --- /dev/null +++ b/queue-5.15/x86-kexec-disable-ret-on-kexec.patch @@ -0,0 +1,173 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Konrad Rzeszutek Wilk +Date: Fri, 8 Jul 2022 19:10:11 +0200 +Subject: x86/kexec: Disable RET on kexec + +From: Konrad Rzeszutek Wilk + +commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream. + +All the invocations unroll to __x86_return_thunk and this file +must be PIC independent. + +This fixes kexec on 64-bit AMD boxes. + + [ bp: Fix 32-bit build. ] + +Reported-by: Edward Tran +Reported-by: Awais Tanveer +Suggested-by: Ankur Arora +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Alexandre Chartre +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/relocate_kernel_32.S | 25 +++++++++++++++++++------ + arch/x86/kernel/relocate_kernel_64.S | 23 +++++++++++++++++------ + 2 files changed, 36 insertions(+), 12 deletions(-) + +--- a/arch/x86/kernel/relocate_kernel_32.S ++++ b/arch/x86/kernel/relocate_kernel_32.S +@@ -7,10 +7,12 @@ + #include + #include + #include ++#include + #include + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 2) +@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + 1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp + addl $PAGE_SIZE, %esp + 2: ++ ANNOTATE_RETPOLINE_SAFE + call *%edx + + /* get the re-entry point of the peer system */ +@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map + popl %edi + popl %esi + popl %ebx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + popl %edi + popl %ebx + popl %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -13,7 +13,8 @@ + #include + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 3) +@@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + /* jump to identity mapped page */ + addq $(identity_mapped - relocate_kernel), %r8 + pushq %r8 +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + xorl %r14d, %r14d + xorl %r15d, %r15d + +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + + 1: + popq %rdx +@@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + call swap_pages + movq $virtual_mapped, %rax + pushq %rax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map + popq %r12 + popq %rbp + popq %rbx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + lea PAGE_SIZE(%rax), %rsi + jmp 0b + 3: +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size diff --git a/queue-5.15/x86-kvm-fix-setcc-emulation-for-return-thunks.patch b/queue-5.15/x86-kvm-fix-setcc-emulation-for-return-thunks.patch new file mode 100644 index 00000000000..1663ec316f2 --- /dev/null +++ b/queue-5.15/x86-kvm-fix-setcc-emulation-for-return-thunks.patch @@ -0,0 +1,97 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:42 +0200 +Subject: x86/kvm: Fix SETcc emulation for return thunks + +From: Peter Zijlstra + +commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream. + +Prepare the SETcc fastop stuff for when RET can be larger still. + +The tricky bit here is that the expressions should not only be +constant C expressions, but also absolute GAS expressions. This means +no ?: and 'true' is ~0. + +Also ensure em_setcc() has the same alignment as the actual FOP_SETCC() +ops, this ensures there cannot be an alignment hole between em_setcc() +and the first op. + +Additionally, add a .skip directive to the FOP_SETCC() macro to fill +any remaining space with INT3 traps; however the primary purpose of +this directive is to generate AS warnings when the remaining space +goes negative. Which is a very good indication the alignment magic +went side-ways. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: ignore ENDBR when computing SETCC_LENGTH] +[cascardo: conflict fixup] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -321,13 +321,15 @@ static int fastop(struct x86_emulate_ctx + #define FOP_RET(name) \ + __FOP_RET(#name) + +-#define FOP_START(op) \ ++#define __FOP_START(op, align) \ + extern void em_##op(struct fastop *fake); \ + asm(".pushsection .text, \"ax\" \n\t" \ + ".global em_" #op " \n\t" \ +- ".align " __stringify(FASTOP_SIZE) " \n\t" \ ++ ".align " __stringify(align) " \n\t" \ + "em_" #op ":\n\t" + ++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) ++ + #define FOP_END \ + ".popsection") + +@@ -431,15 +433,14 @@ static int fastop(struct x86_emulate_ctx + /* + * Depending on .config the SETcc functions look like: + * +- * SETcc %al [3 bytes] +- * RET [1 byte] +- * INT3 [1 byte; CONFIG_SLS] +- * +- * Which gives possible sizes 4 or 5. When rounded up to the +- * next power-of-two alignment they become 4 or 8. ++ * SETcc %al [3 bytes] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] ++ * INT3 [1 byte; CONFIG_SLS] + */ +-#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) +-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ ++ IS_ENABLED(CONFIG_SLS)) ++#define SETCC_LENGTH (3 + RET_LENGTH) ++#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) + static_assert(SETCC_LENGTH <= SETCC_ALIGN); + + #define FOP_SETCC(op) \ +@@ -447,13 +448,14 @@ static_assert(SETCC_LENGTH <= SETCC_ALIG + ".type " #op ", @function \n\t" \ + #op ": \n\t" \ + #op " %al \n\t" \ +- __FOP_RET(#op) ++ __FOP_RET(#op) \ ++ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + + asm(".pushsection .fixup, \"ax\"\n" + "kvm_fastop_exception: xor %esi, %esi; " ASM_RET + ".popsection"); + +-FOP_START(setcc) ++__FOP_START(setcc, SETCC_ALIGN) + FOP_SETCC(seto) + FOP_SETCC(setno) + FOP_SETCC(setc) diff --git a/queue-5.15/x86-kvm-vmx-make-noinstr-clean.patch b/queue-5.15/x86-kvm-vmx-make-noinstr-clean.patch new file mode 100644 index 00000000000..77d2db7fc3b --- /dev/null +++ b/queue-5.15/x86-kvm-vmx-make-noinstr-clean.patch @@ -0,0 +1,74 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:32 +0200 +Subject: x86/kvm/vmx: Make noinstr clean + +From: Peter Zijlstra + +commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream. + +The recent mmio_stale_data fixes broke the noinstr constraints: + + vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section + vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section + +make it all happy again. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 6 +++--- + arch/x86/kvm/x86.c | 4 ++-- + include/linux/kvm_host.h | 2 +- + 3 files changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -380,9 +380,9 @@ static __always_inline void vmx_disable_ + if (!vmx->disable_fb_clear) + return; + +- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; + } +@@ -393,7 +393,7 @@ static __always_inline void vmx_enable_f + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + } + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -12177,9 +12177,9 @@ void kvm_arch_end_assignment(struct kvm + } + EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +-bool kvm_arch_has_assigned_device(struct kvm *kvm) ++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) + { +- return atomic_read(&kvm->arch.assigned_device_count); ++ return arch_atomic_read(&kvm->arch.assigned_device_count); + } + EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1233,7 +1233,7 @@ static inline void kvm_arch_end_assignme + { + } + +-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) ++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) + { + return false; + } diff --git a/queue-5.15/x86-objtool-create-.return_sites.patch b/queue-5.15/x86-objtool-create-.return_sites.patch new file mode 100644 index 00000000000..b25030ed18b --- /dev/null +++ b/queue-5.15/x86-objtool-create-.return_sites.patch @@ -0,0 +1,198 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:38 +0200 +Subject: x86,objtool: Create .return_sites + +From: Peter Zijlstra + +commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream. + +Find all the return-thunk sites and record them in a .return_sites +section such that the kernel can undo this. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict fixup because of functions added to support IBT] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch/x86/decode.c | 5 ++ + tools/objtool/check.c | 75 ++++++++++++++++++++++++++++++++ + tools/objtool/include/objtool/arch.h | 1 + tools/objtool/include/objtool/elf.h | 1 + tools/objtool/include/objtool/objtool.h | 1 + tools/objtool/objtool.c | 1 + 6 files changed, 84 insertions(+) + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -722,3 +722,8 @@ bool arch_is_retpoline(struct symbol *sy + { + return !strncmp(sym->name, "__x86_indirect_", 15); + } ++ ++bool arch_is_rethunk(struct symbol *sym) ++{ ++ return !strcmp(sym->name, "__x86_return_thunk"); ++} +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -654,6 +654,52 @@ static int create_retpoline_sites_sectio + return 0; + } + ++static int create_return_sites_sections(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ int idx; ++ ++ sec = find_section_by_name(file->elf, ".return_sites"); ++ if (sec) { ++ WARN("file already has .return_sites, skipping"); ++ return 0; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) ++ idx++; ++ ++ if (!idx) ++ return 0; ++ ++ sec = elf_create_section(file->elf, ".return_sites", 0, ++ sizeof(int), idx); ++ if (!sec) { ++ WARN("elf_create_section: .return_sites"); ++ return -1; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) { ++ ++ int *site = (int *)sec->data->d_buf + idx; ++ *site = 0; ++ ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(int), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) { ++ WARN("elf_add_reloc_to_insn: .return_sites"); ++ return -1; ++ } ++ ++ idx++; ++ } ++ ++ return 0; ++} ++ + static int create_mcount_loc_sections(struct objtool_file *file) + { + struct section *sec; +@@ -932,6 +978,11 @@ __weak bool arch_is_retpoline(struct sym + return false; + } + ++__weak bool arch_is_rethunk(struct symbol *sym) ++{ ++ return false; ++} ++ + #define NEGATIVE_RELOC ((void *)-1L) + + static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +@@ -1092,6 +1143,19 @@ static void add_retpoline_call(struct ob + + annotate_call_site(file, insn, false); + } ++ ++static void add_return_call(struct objtool_file *file, struct instruction *insn) ++{ ++ /* ++ * Return thunk tail calls are really just returns in disguise, ++ * so convert them accordingly. ++ */ ++ insn->type = INSN_RETURN; ++ insn->retpoline_safe = true; ++ ++ list_add_tail(&insn->call_node, &file->return_thunk_list); ++} ++ + /* + * Find the destination instructions for all jumps. + */ +@@ -1116,6 +1180,9 @@ static int add_jump_destinations(struct + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); + continue; ++ } else if (reloc->sym->return_thunk) { ++ add_return_call(file, insn); ++ continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ + add_call_dest(file, insn, reloc->sym, true); +@@ -1937,6 +2004,9 @@ static int classify_symbols(struct objto + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + ++ if (arch_is_rethunk(func)) ++ func->return_thunk = true; ++ + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + +@@ -3413,6 +3483,11 @@ int check(struct objtool_file *file) + if (ret < 0) + goto out; + warnings += ret; ++ ++ ret = create_return_sites_sections(file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; + } + + if (mcount) { +--- a/tools/objtool/include/objtool/arch.h ++++ b/tools/objtool/include/objtool/arch.h +@@ -88,6 +88,7 @@ const char *arch_ret_insn(int len); + int arch_decode_hint_reg(u8 sp_reg, int *base); + + bool arch_is_retpoline(struct symbol *sym); ++bool arch_is_rethunk(struct symbol *sym); + + int arch_rewrite_retpolines(struct objtool_file *file); + +--- a/tools/objtool/include/objtool/elf.h ++++ b/tools/objtool/include/objtool/elf.h +@@ -57,6 +57,7 @@ struct symbol { + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; ++ u8 return_thunk : 1; + u8 fentry : 1; + u8 kcov : 1; + }; +--- a/tools/objtool/include/objtool/objtool.h ++++ b/tools/objtool/include/objtool/objtool.h +@@ -19,6 +19,7 @@ struct objtool_file { + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); + struct list_head retpoline_call_list; ++ struct list_head return_thunk_list; + struct list_head static_call_list; + struct list_head mcount_loc_list; + bool ignore_unreachables, c_file, hints, rodata; +--- a/tools/objtool/objtool.c ++++ b/tools/objtool/objtool.c +@@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(c + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.retpoline_call_list); ++ INIT_LIST_HEAD(&file.return_thunk_list); + INIT_LIST_HEAD(&file.static_call_list); + INIT_LIST_HEAD(&file.mcount_loc_list); + file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); diff --git a/queue-5.15/x86-realmode-build-with-d__disable_exports.patch b/queue-5.15/x86-realmode-build-with-d__disable_exports.patch new file mode 100644 index 00000000000..772cb3a70b4 --- /dev/null +++ b/queue-5.15/x86-realmode-build-with-d__disable_exports.patch @@ -0,0 +1,28 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Fri, 1 Jul 2022 11:21:20 -0300 +Subject: x86/realmode: build with -D__DISABLE_EXPORTS + +From: Thadeu Lima de Souza Cascardo + +Commit 156ff4a544ae ("x86/ibt: Base IBT bits") added this option when +building realmode in order to disable IBT there. This is also needed in +order to disable return thunks. + +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -24,7 +24,7 @@ endif + + # How to compile the 16-bit code. Note we always compile for -march=i386; + # that way we can complain to the user if the CPU is insufficient. +-REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ ++REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ + -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ + -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ + -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) diff --git a/queue-5.15/x86-retbleed-add-fine-grained-kconfig-knobs.patch b/queue-5.15/x86-retbleed-add-fine-grained-kconfig-knobs.patch new file mode 100644 index 00000000000..e1ad1a743d4 --- /dev/null +++ b/queue-5.15/x86-retbleed-add-fine-grained-kconfig-knobs.patch @@ -0,0 +1,587 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Mon, 27 Jun 2022 22:21:17 +0000 +Subject: x86/retbleed: Add fine grained Kconfig knobs + +From: Peter Zijlstra + +commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream. + +Do fine-grained Kconfig for all the various retbleed parts. + +NOTE: if your compiler doesn't support return thunks this will +silently 'upgrade' your mitigation to IBPB, you might not like this. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +[cascardo: there is no CONFIG_OBJTOOL] +[cascardo: objtool calling and option parsing has changed] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 8 +- + arch/x86/Kconfig | 106 +++++++++++++++++++++++-------- + arch/x86/entry/calling.h | 4 + + arch/x86/include/asm/disabled-features.h | 18 ++++- + arch/x86/include/asm/linkage.h | 4 - + arch/x86/include/asm/nospec-branch.h | 10 ++ + arch/x86/include/asm/static_call.h | 2 + arch/x86/kernel/alternative.c | 5 + + arch/x86/kernel/cpu/amd.c | 2 + arch/x86/kernel/cpu/bugs.c | 42 +++++++----- + arch/x86/kernel/static_call.c | 2 + arch/x86/kvm/emulate.c | 4 - + arch/x86/lib/retpoline.S | 4 + + scripts/Makefile.build | 1 + scripts/link-vmlinux.sh | 2 + security/Kconfig | 11 --- + tools/objtool/builtin-check.c | 3 + tools/objtool/check.c | 9 ++ + tools/objtool/include/objtool/builtin.h | 2 + 19 files changed, 170 insertions(+), 69 deletions(-) + +--- a/Makefile ++++ b/Makefile +@@ -687,14 +687,18 @@ endif + + ifdef CONFIG_CC_IS_GCC + RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) + endif + ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline +-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + endif ++ ++ifdef CONFIG_RETHUNK ++RETHUNK_CFLAGS := -mfunction-return=thunk-extern ++RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) ++endif ++ + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -459,30 +459,6 @@ config GOLDFISH + def_bool y + depends on X86_GOLDFISH + +-config RETPOLINE +- bool "Avoid speculative indirect branches in kernel" +- default y +- help +- Compile kernel with the retpoline compiler options to guard against +- kernel-to-user data leaks by avoiding speculative indirect +- branches. Requires a compiler with -mindirect-branch=thunk-extern +- support for full protection. The kernel may run slower. +- +-config CC_HAS_SLS +- def_bool $(cc-option,-mharden-sls=all) +- +-config CC_HAS_RETURN_THUNK +- def_bool $(cc-option,-mfunction-return=thunk-extern) +- +-config SLS +- bool "Mitigate Straight-Line-Speculation" +- depends on CC_HAS_SLS && X86_64 +- default n +- help +- Compile the kernel with straight-line-speculation options to guard +- against straight line speculation. The kernel image might be slightly +- larger. +- + config X86_CPU_RESCTRL + bool "x86 CPU resource control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) +@@ -2410,6 +2386,88 @@ source "kernel/livepatch/Kconfig" + + endmenu + ++config CC_HAS_SLS ++ def_bool $(cc-option,-mharden-sls=all) ++ ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ ++menuconfig SPECULATION_MITIGATIONS ++ bool "Mitigations for speculative execution vulnerabilities" ++ default y ++ help ++ Say Y here to enable options which enable mitigations for ++ speculative execution hardware vulnerabilities. ++ ++ If you say N, all mitigations will be disabled. You really ++ should know what you are doing to say so. ++ ++if SPECULATION_MITIGATIONS ++ ++config PAGE_TABLE_ISOLATION ++ bool "Remove the kernel mapping in user mode" ++ default y ++ depends on (X86_64 || X86_PAE) ++ help ++ This feature reduces the number of hardware side channels by ++ ensuring that the majority of kernel addresses are not mapped ++ into userspace. ++ ++ See Documentation/x86/pti.rst for more details. ++ ++config RETPOLINE ++ bool "Avoid speculative indirect branches in kernel" ++ default y ++ help ++ Compile kernel with the retpoline compiler options to guard against ++ kernel-to-user data leaks by avoiding speculative indirect ++ branches. Requires a compiler with -mindirect-branch=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config RETHUNK ++ bool "Enable return-thunks" ++ depends on RETPOLINE && CC_HAS_RETURN_THUNK ++ default y ++ help ++ Compile the kernel with the return-thunks compiler option to guard ++ against kernel-to-user data leaks by avoiding return speculation. ++ Requires a compiler with -mfunction-return=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config CPU_UNRET_ENTRY ++ bool "Enable UNRET on kernel entry" ++ depends on CPU_SUP_AMD && RETHUNK ++ default y ++ help ++ Compile the kernel with support for the retbleed=unret mitigation. ++ ++config CPU_IBPB_ENTRY ++ bool "Enable IBPB on kernel entry" ++ depends on CPU_SUP_AMD ++ default y ++ help ++ Compile the kernel with support for the retbleed=ibpb mitigation. ++ ++config CPU_IBRS_ENTRY ++ bool "Enable IBRS on kernel entry" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Compile the kernel with support for the spectre_v2=ibrs mitigation. ++ This mitigates both spectre_v2 and retbleed at great cost to ++ performance. ++ ++config SLS ++ bool "Mitigate Straight-Line-Speculation" ++ depends on CC_HAS_SLS && X86_64 ++ default n ++ help ++ Compile the kernel with straight-line-speculation options to guard ++ against straight line speculation. The kernel image might be slightly ++ larger. ++ ++endif ++ + config ARCH_HAS_ADD_PAGES + def_bool y + depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -296,6 +296,7 @@ For 32-bit we have the following convent + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ + .macro IBRS_ENTER save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +@@ -316,6 +317,7 @@ For 32-bit we have the following convent + shr $32, %rdx + wrmsr + .Lend_\@: ++#endif + .endm + + /* +@@ -323,6 +325,7 @@ For 32-bit we have the following convent + * regs. Must be called after the last RET. + */ + .macro IBRS_EXIT save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +@@ -337,6 +340,7 @@ For 32-bit we have the following convent + shr $32, %rdx + wrmsr + .Lend_\@: ++#endif + .endm + + /* +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -60,9 +60,19 @@ + # define DISABLE_RETPOLINE 0 + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ +- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ +- (1 << (X86_FEATURE_RETHUNK & 31)) | \ +- (1 << (X86_FEATURE_UNRET & 31))) ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ ++#ifdef CONFIG_RETHUNK ++# define DISABLE_RETHUNK 0 ++#else ++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) ++#endif ++ ++#ifdef CONFIG_CPU_UNRET_ENTRY ++# define DISABLE_UNRET 0 ++#else ++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) + #endif + + /* Force disable because it's broken beyond repair */ +@@ -88,7 +98,7 @@ + #define DISABLED_MASK8 0 + #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 (DISABLE_RETPOLINE) ++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -18,7 +18,7 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + +-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) + #define RET jmp __x86_return_thunk + #else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS +@@ -30,7 +30,7 @@ + + #else /* __ASSEMBLY__ */ + +-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) + #define ASM_RET "jmp __x86_return_thunk\n\t" + #else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -127,6 +127,12 @@ + .Lskip_rsb_\@: + .endm + ++#ifdef CONFIG_CPU_UNRET_ENTRY ++#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" ++#else ++#define CALL_ZEN_UNTRAIN_RET "" ++#endif ++ + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD +@@ -139,10 +145,10 @@ + * where we have a stack but before any RET instruction. + */ + .macro UNTRAIN_RET +-#ifdef CONFIG_RETPOLINE ++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ +- "call zen_untrain_ret", X86_FEATURE_UNRET, \ ++ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -44,7 +44,7 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +-#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_RETHUNK + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") + #else +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -507,6 +507,7 @@ void __init_or_module noinline apply_ret + } + } + ++#ifdef CONFIG_RETHUNK + /* + * Rewrite the compiler generated return thunk tail-calls. + * +@@ -568,6 +569,10 @@ void __init_or_module noinline apply_ret + } + } + } ++#else ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } ++#endif /* CONFIG_RETHUNK */ ++ + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -888,6 +888,7 @@ static void init_amd_bd(struct cpuinfo_x + + void init_spectral_chicken(struct cpuinfo_x86 *c) + { ++#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* +@@ -904,6 +905,7 @@ void init_spectral_chicken(struct cpuinf + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } ++#endif + } + + static void init_amd_zn(struct cpuinfo_x86 *c) +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -835,7 +835,6 @@ static int __init retbleed_parse_cmdline + early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" + #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) +@@ -850,18 +849,33 @@ static void __init retbleed_select_mitig + return; + + case RETBLEED_CMD_UNRET: +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); ++ goto do_cmd_auto; ++ } + break; + + case RETBLEED_CMD_IBPB: +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); ++ goto do_cmd_auto; ++ } + break; + ++do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in +@@ -874,14 +888,6 @@ static void __init retbleed_select_mitig + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: +- +- if (!IS_ENABLED(CONFIG_RETPOLINE) || +- !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { +- pr_err(RETBLEED_COMPILER_MSG); +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; +- goto retbleed_force_ibpb; +- } +- + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +@@ -893,7 +899,6 @@ static void __init retbleed_select_mitig + break; + + case RETBLEED_MITIGATION_IBPB: +-retbleed_force_ibpb: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; +@@ -1264,6 +1269,12 @@ static enum spectre_v2_mitigation_cmd __ + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); +@@ -1321,7 +1332,8 @@ static void __init spectre_v2_select_mit + break; + } + +- if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && ++ boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -120,7 +120,7 @@ void arch_static_call_transform(void *si + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); + +-#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_RETHUNK + /* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -434,10 +434,10 @@ static int fastop(struct x86_emulate_ctx + * Depending on .config the SETcc functions look like: + * + * SETcc %al [3 bytes] +- * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] + */ +-#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) + #define SETCC_LENGTH (3 + RET_LENGTH) + #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -71,6 +71,8 @@ SYM_CODE_END(__x86_indirect_thunk_array) + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ ++#ifdef CONFIG_RETHUNK ++ + .section .text.__x86.return_thunk + + /* +@@ -135,3 +137,5 @@ SYM_FUNC_END(zen_untrain_ret) + __EXPORT_THUNK(zen_untrain_ret) + + EXPORT_SYMBOL(__x86_return_thunk) ++ ++#endif /* CONFIG_RETHUNK */ +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -231,6 +231,7 @@ objtool_args = \ + $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ + $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ + $(if $(CONFIG_RETPOLINE), --retpoline) \ ++ $(if $(CONFIG_RETHUNK), --rethunk) \ + $(if $(CONFIG_X86_SMAP), --uaccess) \ + $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ + $(if $(CONFIG_SLS), --sls) +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -120,7 +120,7 @@ objtool_link() + + if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then + objtoolopt="${objtoolopt} --noinstr" +- if is_enabled CONFIG_RETPOLINE; then ++ if is_enabled CONFIG_CPU_UNRET_ENTRY; then + objtoolopt="${objtoolopt} --unret" + fi + fi +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -54,17 +54,6 @@ config SECURITY_NETWORK + implement socket and networking access controls. + If you are unsure how to answer this question, answer N. + +-config PAGE_TABLE_ISOLATION +- bool "Remove the kernel mapping in user mode" +- default y +- depends on (X86_64 || X86_PAE) && !UML +- help +- This feature reduces the number of hardware side channels by +- ensuring that the majority of kernel addresses are not mapped +- into userspace. +- +- See Documentation/x86/pti.rst for more details. +- + config SECURITY_INFINIBAND + bool "Infiniband Security Hooks" + depends on SECURITY && INFINIBAND +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -20,7 +20,7 @@ + #include + + bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; ++ validate_dup, vmlinux, mcount, noinstr, backup, sls, unret, rethunk; + + static const char * const check_usage[] = { + "objtool check [] file.o", +@@ -36,6 +36,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), + OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), ++ OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"), + OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), + OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -3440,8 +3440,11 @@ static int validate_retpoline(struct obj + continue; + + if (insn->type == INSN_RETURN) { +- WARN_FUNC("'naked' return found in RETPOLINE build", +- insn->sec, insn->offset); ++ if (rethunk) { ++ WARN_FUNC("'naked' return found in RETHUNK build", ++ insn->sec, insn->offset); ++ } else ++ continue; + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, +@@ -3711,7 +3714,9 @@ int check(struct objtool_file *file) + if (ret < 0) + goto out; + warnings += ret; ++ } + ++ if (rethunk) { + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; +--- a/tools/objtool/include/objtool/builtin.h ++++ b/tools/objtool/include/objtool/builtin.h +@@ -9,7 +9,7 @@ + + extern const struct option check_options[]; + extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; ++ validate_dup, vmlinux, mcount, noinstr, backup, sls, unret, rethunk; + + extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); + diff --git a/queue-5.15/x86-retpoline-cleanup-some-ifdefery.patch b/queue-5.15/x86-retpoline-cleanup-some-ifdefery.patch new file mode 100644 index 00000000000..e0b1b6c0ff1 --- /dev/null +++ b/queue-5.15/x86-retpoline-cleanup-some-ifdefery.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:34 +0200 +Subject: x86/retpoline: Cleanup some #ifdefery + +From: Peter Zijlstra + +commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream. + +On it's own not much of a cleanup but it prepares for more/similar +code. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict fixup because of DISABLE_ENQCMD] +[cascardo: no changes at nospec-branch.h and bpf_jit_comp.c] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/disabled-features.h | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -56,6 +56,13 @@ + # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) + #endif + ++#ifdef CONFIG_RETPOLINE ++# define DISABLE_RETPOLINE 0 ++#else ++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ + /* Force disable because it's broken beyond repair */ + #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) + +@@ -79,7 +86,7 @@ + #define DISABLED_MASK8 0 + #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 0 ++#define DISABLED_MASK11 (DISABLE_RETPOLINE) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 diff --git a/queue-5.15/x86-retpoline-create-a-retpoline-thunk-array.patch b/queue-5.15/x86-retpoline-create-a-retpoline-thunk-array.patch new file mode 100644 index 00000000000..4ef470de18c --- /dev/null +++ b/queue-5.15/x86-retpoline-create-a-retpoline-thunk-array.patch @@ -0,0 +1,104 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:41 +0200 +Subject: x86/retpoline: Create a retpoline thunk array + +From: Peter Zijlstra + +commit 1a6f74429c42a3854980359a758e222005712aee upstream. + +Stick all the retpolines in a single symbol and have the individual +thunks as inner labels, this should guarantee thunk order and layout. + +Previously there were 16 (or rather 15 without rsp) separate symbols and +a toolchain might reasonably expect it could displace them however it +liked, with disregard for their relative position. + +However, now they're part of a larger symbol. Any change to their +relative position would disrupt this larger _array symbol and thus not +be sound. + +This is the same reasoning used for data symbols. On their own there +is no guarantee about their relative position wrt to one aonther, but +we're still able to do arrays because an array as a whole is a single +larger symbol. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.169659320@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 8 +++++++- + arch/x86/lib/retpoline.S | 14 +++++++++----- + 2 files changed, 16 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -12,6 +12,8 @@ + #include + #include + ++#define RETPOLINE_THUNK_SIZE 32 ++ + /* + * Fill the CPU return stack buffer. + * +@@ -120,11 +122,15 @@ + + #ifdef CONFIG_RETPOLINE + ++typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; ++ + #define GEN(reg) \ +- extern asmlinkage void __x86_indirect_thunk_ ## reg (void); ++ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; + #include + #undef GEN + ++extern retpoline_thunk_t __x86_indirect_thunk_array[]; ++ + #ifdef CONFIG_X86_64 + + /* +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -28,16 +28,14 @@ + + .macro THUNK reg + +- .align 32 +- +-SYM_FUNC_START(__x86_indirect_thunk_\reg) ++ .align RETPOLINE_THUNK_SIZE ++SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) ++ UNWIND_HINT_EMPTY + + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + +-SYM_FUNC_END(__x86_indirect_thunk_\reg) +- + .endm + + /* +@@ -55,10 +53,16 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) + #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + ++ .align RETPOLINE_THUNK_SIZE ++SYM_CODE_START(__x86_indirect_thunk_array) ++ + #define GEN(reg) THUNK reg + #include + #undef GEN + ++ .align RETPOLINE_THUNK_SIZE ++SYM_CODE_END(__x86_indirect_thunk_array) ++ + #define GEN(reg) EXPORT_THUNK(reg) + #include + #undef GEN diff --git a/queue-5.15/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch b/queue-5.15/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch new file mode 100644 index 00000000000..c6cb46f7a09 --- /dev/null +++ b/queue-5.15/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch @@ -0,0 +1,72 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:40 +0200 +Subject: x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h + +From: Peter Zijlstra + +commit 6fda8a38865607db739be3e567a2387376222dbd upstream. + +Because it makes no sense to split the retpoline gunk over multiple +headers. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.106290934@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 8 -------- + arch/x86/include/asm/nospec-branch.h | 7 +++++++ + arch/x86/net/bpf_jit_comp.c | 1 - + 3 files changed, 7 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -17,11 +17,3 @@ + extern void cmpxchg8b_emu(void); + #endif + +-#ifdef CONFIG_RETPOLINE +- +-#define GEN(reg) \ +- extern asmlinkage void __x86_indirect_thunk_ ## reg (void); +-#include +-#undef GEN +- +-#endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -5,6 +5,7 @@ + + #include + #include ++#include + + #include + #include +@@ -118,6 +119,12 @@ + ".popsection\n\t" + + #ifdef CONFIG_RETPOLINE ++ ++#define GEN(reg) \ ++ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); ++#include ++#undef GEN ++ + #ifdef CONFIG_X86_64 + + /* +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -15,7 +15,6 @@ + #include + #include + #include +-#include + + static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) + { diff --git a/queue-5.15/x86-retpoline-remove-unused-replacement-symbols.patch b/queue-5.15/x86-retpoline-remove-unused-replacement-symbols.patch new file mode 100644 index 00000000000..169917fbcb1 --- /dev/null +++ b/queue-5.15/x86-retpoline-remove-unused-replacement-symbols.patch @@ -0,0 +1,96 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:37 +0200 +Subject: x86/retpoline: Remove unused replacement symbols + +From: Peter Zijlstra + +commit 4fe79e710d9574a14993f8b4e16b7252da72d5e8 upstream. + +Now that objtool no longer creates alternatives, these replacement +symbols are no longer needed, remove them. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.915051744@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 10 -------- + arch/x86/lib/retpoline.S | 42 ---------------------------------- + 2 files changed, 52 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -24,14 +24,4 @@ extern void cmpxchg8b_emu(void); + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + #include + +-#undef GEN +-#define GEN(reg) \ +- extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); +-#include +- +-#undef GEN +-#define GEN(reg) \ +- extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); +-#include +- + #endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -41,36 +41,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + .endm + + /* +- * This generates .altinstr_replacement symbols for use by objtool. They, +- * however, must not actually live in .altinstr_replacement since that will be +- * discarded after init, but module alternatives will also reference these +- * symbols. +- * +- * Their names matches the "__x86_indirect_" prefix to mark them as retpolines. +- */ +-.macro ALT_THUNK reg +- +- .align 1 +- +-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) +- ANNOTATE_RETPOLINE_SAFE +-1: call *%\reg +-2: .skip 5-(2b-1b), 0x90 +-SYM_FUNC_END(__x86_indirect_alt_call_\reg) +- +-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg) +- +-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) +- ANNOTATE_RETPOLINE_SAFE +-1: jmp *%\reg +-2: .skip 5-(2b-1b), 0x90 +-SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) +- +-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) +- +-.endm +- +-/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather +@@ -92,15 +62,3 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_ + #undef GEN + #define GEN(reg) EXPORT_THUNK(reg) + #include +- +-#undef GEN +-#define GEN(reg) ALT_THUNK reg +-#include +- +-#undef GEN +-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg) +-#include +- +-#undef GEN +-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg) +-#include diff --git a/queue-5.15/x86-retpoline-swizzle-retpoline-thunk.patch b/queue-5.15/x86-retpoline-swizzle-retpoline-thunk.patch new file mode 100644 index 00000000000..493bd9e5c54 --- /dev/null +++ b/queue-5.15/x86-retpoline-swizzle-retpoline-thunk.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:35 +0200 +Subject: x86/retpoline: Swizzle retpoline thunk + +From: Peter Zijlstra + +commit 00e1533325fd1fb5459229fe37f235462649f668 upstream. + +Put the actual retpoline thunk as the original code so that it can +become more complicated. Specifically, it allows RET to be a JMP, +which can't be .altinstr_replacement since that doesn't do relocations +(except for the very first instruction). + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/lib/retpoline.S | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -32,9 +32,9 @@ + SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + +- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ +- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ +- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE ++ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ ++ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) + + .endm + diff --git a/queue-5.15/x86-retpoline-use-mfunction-return.patch b/queue-5.15/x86-retpoline-use-mfunction-return.patch new file mode 100644 index 00000000000..5e112a5e569 --- /dev/null +++ b/queue-5.15/x86-retpoline-use-mfunction-return.patch @@ -0,0 +1,78 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:36 +0200 +Subject: x86/retpoline: Use -mfunction-return + +From: Peter Zijlstra + +commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream. + +Utilize -mfunction-return=thunk-extern when available to have the +compiler replace RET instructions with direct JMPs to the symbol +__x86_return_thunk. This does not affect assembler (.S) sources, only C +sources. + +-mfunction-return=thunk-extern has been available since gcc 7.3 and +clang 15. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Nick Desaulniers +Reviewed-by: Josh Poimboeuf +Tested-by: Nick Desaulniers +Signed-off-by: Borislav Petkov +[cascardo: RETPOLINE_CFLAGS is at Makefile] +[cascardo: remove ANNOTATE_NOENDBR from __x86_return_thunk] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 2 ++ + arch/x86/include/asm/nospec-branch.h | 2 ++ + arch/x86/lib/retpoline.S | 12 ++++++++++++ + 3 files changed, 16 insertions(+) + +--- a/Makefile ++++ b/Makefile +@@ -687,11 +687,13 @@ endif + + ifdef CONFIG_CC_IS_GCC + RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) + endif + ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline ++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + endif + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -120,6 +120,8 @@ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + ++extern void __x86_return_thunk(void); ++ + #ifdef CONFIG_RETPOLINE + + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -66,3 +66,15 @@ SYM_CODE_END(__x86_indirect_thunk_array) + #define GEN(reg) EXPORT_THUNK(reg) + #include + #undef GEN ++ ++/* ++ * This function name is magical and is used by -mfunction-return=thunk-extern ++ * for the compiler to generate JMPs to it. ++ */ ++SYM_CODE_START(__x86_return_thunk) ++ UNWIND_HINT_EMPTY ++ ret ++ int3 ++SYM_CODE_END(__x86_return_thunk) ++ ++__EXPORT_THUNK(__x86_return_thunk) diff --git a/queue-5.15/x86-sev-avoid-using-__x86_return_thunk.patch b/queue-5.15/x86-sev-avoid-using-__x86_return_thunk.patch new file mode 100644 index 00000000000..cd21dc55c2a --- /dev/null +++ b/queue-5.15/x86-sev-avoid-using-__x86_return_thunk.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Kim Phillips +Date: Tue, 14 Jun 2022 23:15:44 +0200 +Subject: x86/sev: Avoid using __x86_return_thunk + +From: Kim Phillips + +commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream. + +Specifically, it's because __enc_copy() encrypts the kernel after +being relocated outside the kernel in sme_encrypt_execute(), and the +RET macro's jmp offset isn't amended prior to execution. + +Signed-off-by: Kim Phillips +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/mem_encrypt_boot.S | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute) + movq %rbp, %rsp /* Restore original stack pointer */ + pop %rbp + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ret ++ int3 + SYM_FUNC_END(sme_encrypt_execute) + + SYM_FUNC_START(__enc_copy) +@@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy) + pop %r12 + pop %r15 + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ret ++ int3 + .L__enc_copy_end: + SYM_FUNC_END(__enc_copy) diff --git a/queue-5.15/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch b/queue-5.15/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch new file mode 100644 index 00000000000..2e59e6e7964 --- /dev/null +++ b/queue-5.15/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch @@ -0,0 +1,208 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Pawan Gupta +Date: Tue, 14 Jun 2022 23:15:55 +0200 +Subject: x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS + +From: Pawan Gupta + +commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream. + +Extend spectre_v2= boot option with Kernel IBRS. + + [jpoimboe: no STIBP with IBRS] + +Signed-off-by: Pawan Gupta +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 1 + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 66 ++++++++++++++++++------ + 3 files changed, 54 insertions(+), 14 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5335,6 +5335,7 @@ + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE ++ ibrs - use IBRS to protect kernel + + Not specifying this option is equivalent to + spectre_v2=auto. +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -212,6 +212,7 @@ enum spectre_v2_mitigation { + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, ++ SPECTRE_V2_IBRS, + }; + + /* The indirect branch speculation control variants */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -965,6 +965,7 @@ enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, ++ SPECTRE_V2_CMD_IBRS, + }; + + enum spectre_v2_user_cmd { +@@ -1037,11 +1038,12 @@ spectre_v2_parse_user_cmdline(enum spect + return SPECTRE_V2_USER_CMD_AUTO; + } + +-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) ++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) + { +- return (mode == SPECTRE_V2_EIBRS || +- mode == SPECTRE_V2_EIBRS_RETPOLINE || +- mode == SPECTRE_V2_EIBRS_LFENCE); ++ return mode == SPECTRE_V2_IBRS || ++ mode == SPECTRE_V2_EIBRS || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_EIBRS_LFENCE; + } + + static void __init +@@ -1106,12 +1108,12 @@ spectre_v2_user_select_mitigation(enum s + } + + /* +- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not +- * required. ++ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, ++ * STIBP is not required. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP) || + !smt_possible || +- spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return; + + /* +@@ -1143,6 +1145,7 @@ static const char * const spectre_v2_str + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", ++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS", + }; + + static const struct { +@@ -1160,6 +1163,7 @@ static const struct { + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, ++ { "ibrs", SPECTRE_V2_CMD_IBRS, false }, + }; + + static void __init spec_v2_print_cond(const char *reason, bool secure) +@@ -1222,6 +1226,24 @@ static enum spectre_v2_mitigation_cmd __ + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { ++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { ++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { ++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); + return cmd; +@@ -1261,6 +1283,14 @@ static void __init spectre_v2_select_mit + break; + } + ++ if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ retbleed_cmd != RETBLEED_CMD_OFF && ++ boot_cpu_has(X86_FEATURE_IBRS) && ++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ mode = SPECTRE_V2_IBRS; ++ break; ++ } ++ + mode = spectre_v2_select_retpoline(); + break; + +@@ -1277,6 +1307,10 @@ static void __init spectre_v2_select_mit + mode = spectre_v2_select_retpoline(); + break; + ++ case SPECTRE_V2_CMD_IBRS: ++ mode = SPECTRE_V2_IBRS; ++ break; ++ + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; +@@ -1293,7 +1327,7 @@ static void __init spectre_v2_select_mit + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + +- if (spectre_v2_in_eibrs_mode(mode)) { ++ if (spectre_v2_in_ibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); +@@ -1304,6 +1338,10 @@ static void __init spectre_v2_select_mit + case SPECTRE_V2_EIBRS: + break; + ++ case SPECTRE_V2_IBRS: ++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); ++ break; ++ + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); +@@ -1330,17 +1368,17 @@ static void __init spectre_v2_select_mit + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + + /* +- * Retpoline means the kernel is safe because it has no indirect +- * branches. Enhanced IBRS protects firmware too, so, enable restricted +- * speculation around firmware calls only when Enhanced IBRS isn't +- * supported. ++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS ++ * and Enhanced IBRS protect firmware too, so enable IBRS around ++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise ++ * enabled. + * + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because + * the user might select retpoline on the kernel command line and if + * the CPU supports Enhanced IBRS, kernel might un-intentionally not + * enable IBRS around firmware calls. + */ +- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { ++ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } +@@ -2082,7 +2120,7 @@ static ssize_t mmio_stale_data_show_stat + + static char *stibp_state(void) + { +- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return ""; + + switch (spectre_v2_user_stibp) { diff --git a/queue-5.15/x86-speculation-disable-rrsba-behavior.patch b/queue-5.15/x86-speculation-disable-rrsba-behavior.patch new file mode 100644 index 00000000000..c48263ef8d6 --- /dev/null +++ b/queue-5.15/x86-speculation-disable-rrsba-behavior.patch @@ -0,0 +1,154 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Pawan Gupta +Date: Fri, 8 Jul 2022 13:36:09 -0700 +Subject: x86/speculation: Disable RRSBA behavior + +From: Pawan Gupta + +commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream. + +Some Intel processors may use alternate predictors for RETs on +RSB-underflow. This condition may be vulnerable to Branch History +Injection (BHI) and intramode-BTI. + +Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines, +eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against +such attacks. However, on RSB-underflow, RET target prediction may +fallback to alternate predictors. As a result, RET's predicted target +may get influenced by branch history. + +A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback +behavior when in kernel mode. When set, RETs will not take predictions +from alternate predictors, hence mitigating RETs as well. Support for +this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2). + +For spectre v2 mitigation, when a user selects a mitigation that +protects indirect CALLs and JMPs against BHI and intramode-BTI, set +RRSBA_DIS_S also to protect RETs for RSB-underflow case. + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +[cascardo: no X86_FEATURE_INTEL_PPIN] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/msr-index.h | 9 +++++++++ + arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++++++ + arch/x86/kernel/cpu/scattered.c | 1 + + tools/arch/x86/include/asm/msr-index.h | 9 +++++++++ + 5 files changed, 46 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -295,7 +295,7 @@ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ + #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +-/* FREE! (11*32+11) */ ++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -139,6 +141,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1311,6 +1311,22 @@ static enum spectre_v2_mitigation __init + return SPECTRE_V2_RETPOLINE; + } + ++/* Disable in-kernel use of non-RSB RET predictors */ ++static void __init spec_ctrl_disable_kernel_rrsba(void) ++{ ++ u64 ia32_cap; ++ ++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) ++ return; ++ ++ ia32_cap = x86_read_arch_cap_msr(); ++ ++ if (ia32_cap & ARCH_CAP_RRSBA) { ++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ } ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1405,6 +1421,16 @@ static void __init spectre_v2_select_mit + break; + } + ++ /* ++ * Disable alternate RSB predictions in kernel when indirect CALLs and ++ * JMPs gets protection against BHI and Intramode-BTI, but RET ++ * prediction from a non-RSB predictor is still a risk. ++ */ ++ if (mode == SPECTRE_V2_EIBRS_LFENCE || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_RETPOLINE) ++ spec_ctrl_disable_kernel_rrsba(); ++ + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); + +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -26,6 +26,7 @@ struct cpuid_bit { + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, ++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -138,6 +140,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* diff --git a/queue-5.15/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch b/queue-5.15/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch new file mode 100644 index 00000000000..3958fd7c516 --- /dev/null +++ b/queue-5.15/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch @@ -0,0 +1,134 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:15 +0200 +Subject: x86/speculation: Fill RSB on vmexit for IBRS + +From: Josh Poimboeuf + +commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream. + +Prevent RSB underflow/poisoning attacks with RSB. While at it, add a +bunch of comments to attempt to document the current state of tribal +knowledge about RSB attacks and what exactly is being mitigated. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 2 - + arch/x86/kernel/cpu/bugs.c | 63 ++++++++++++++++++++++++++++++++++--- + arch/x86/kvm/vmx/vmenter.S | 6 +-- + 3 files changed, 62 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -204,7 +204,7 @@ + /* FREE! ( 7*32+10) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ + #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +-/* FREE! ( 7*32+13) */ ++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1394,17 +1394,70 @@ static void __init spectre_v2_select_mit + pr_info("%s\n", spectre_v2_strings[mode]); + + /* +- * If spectre v2 protection has been enabled, unconditionally fill +- * RSB during a context switch; this protects against two independent +- * issues: ++ * If Spectre v2 protection has been enabled, fill the RSB during a ++ * context switch. In general there are two types of RSB attacks ++ * across context switches, for which the CALLs/RETs may be unbalanced. + * +- * - RSB underflow (and switch to BTB) on Skylake+ +- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs ++ * 1) RSB underflow ++ * ++ * Some Intel parts have "bottomless RSB". When the RSB is empty, ++ * speculated return targets may come from the branch predictor, ++ * which could have a user-poisoned BTB or BHB entry. ++ * ++ * AMD has it even worse: *all* returns are speculated from the BTB, ++ * regardless of the state of the RSB. ++ * ++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack ++ * scenario is mitigated by the IBRS branch prediction isolation ++ * properties, so the RSB buffer filling wouldn't be necessary to ++ * protect against this type of attack. ++ * ++ * The "user -> user" attack scenario is mitigated by RSB filling. ++ * ++ * 2) Poisoned RSB entry ++ * ++ * If the 'next' in-kernel return stack is shorter than 'prev', ++ * 'next' could be tricked into speculating with a user-poisoned RSB ++ * entry. ++ * ++ * The "user -> kernel" attack scenario is mitigated by SMEP and ++ * eIBRS. ++ * ++ * The "user -> user" scenario, also known as SpectreBHB, requires ++ * RSB clearing. ++ * ++ * So to mitigate all cases, unconditionally fill RSB on context ++ * switches. ++ * ++ * FIXME: Is this pointless for retbleed-affected AMD? + */ + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + + /* ++ * Similar to context switches, there are two types of RSB attacks ++ * after vmexit: ++ * ++ * 1) RSB underflow ++ * ++ * 2) Poisoned RSB entry ++ * ++ * When retpoline is enabled, both are mitigated by filling/clearing ++ * the RSB. ++ * ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch ++ * prediction isolation protections, RSB still needs to be cleared ++ * because of #2. Note that SMEP provides no protection here, unlike ++ * user-space-poisoned RSB entries. ++ * ++ * eIBRS, on the other hand, has RSB-poisoning protections, so it ++ * doesn't need RSB clearing after vmexit. ++ */ ++ if (boot_cpu_has(X86_FEATURE_RETPOLINE) || ++ boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ ++ /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -193,15 +193,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * +- * For retpoline, RSB filling is needed to prevent poisoned RSB entries +- * and (in some cases) RSB underflow. ++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB ++ * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + +- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ diff --git a/queue-5.15/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch b/queue-5.15/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch new file mode 100644 index 00000000000..2013f9f9bb8 --- /dev/null +++ b/queue-5.15/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:06 +0200 +Subject: x86/speculation: Fix firmware entry SPEC_CTRL handling + +From: Josh Poimboeuf + +commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream. + +The firmware entry code may accidentally clear STIBP or SSBD. Fix that. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -286,18 +286,16 @@ extern u64 spec_ctrl_current(void); + */ + #define firmware_restrict_branch_speculation_start() \ + do { \ +- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ +- \ + preempt_disable(); \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current() | SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ + } while (0) + + #define firmware_restrict_branch_speculation_end() \ + do { \ +- u64 val = x86_spec_ctrl_base; \ +- \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current(), \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ + } while (0) diff --git a/queue-5.15/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch b/queue-5.15/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch new file mode 100644 index 00000000000..b8b1bb064f9 --- /dev/null +++ b/queue-5.15/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:05 +0200 +Subject: x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n + +From: Josh Poimboeuf + +commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream. + +If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants +to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be +silently disabled. + +There's nothing retpoline-specific about RSB buffer filling. Remove the +CONFIG_RETPOLINE guards around it. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_32.S | 2 -- + arch/x86/entry/entry_64.S | 2 -- + arch/x86/include/asm/nospec-branch.h | 2 -- + 3 files changed, 6 deletions(-) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -701,7 +701,6 @@ SYM_CODE_START(__switch_to_asm) + movl %ebx, PER_CPU_VAR(__stack_chk_guard) + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -710,7 +709,6 @@ SYM_CODE_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* Restore flags or the incoming task to restore AC state. */ + popfl +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -248,7 +248,6 @@ SYM_FUNC_START(__switch_to_asm) + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -257,7 +256,6 @@ SYM_FUNC_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* restore callee-saved registers */ + popq %r15 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -122,11 +122,9 @@ + * monstrosity above, manually. + */ + .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +-#ifdef CONFIG_RETPOLINE + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) + .Lskip_rsb_\@: +-#endif + .endm + + /* diff --git a/queue-5.15/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch b/queue-5.15/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch new file mode 100644 index 00000000000..630d9cdec7f --- /dev/null +++ b/queue-5.15/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch @@ -0,0 +1,33 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:07 +0200 +Subject: x86/speculation: Fix SPEC_CTRL write on SMT state change + +From: Josh Poimboeuf + +commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream. + +If the SMT state changes, SSBD might get accidentally disabled. Fix +that. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1451,7 +1451,8 @@ static void __init spectre_v2_select_mit + + static void update_stibp_msr(void * __unused) + { +- write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); ++ write_spec_ctrl_current(val, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ diff --git a/queue-5.15/x86-speculation-remove-x86_spec_ctrl_mask.patch b/queue-5.15/x86-speculation-remove-x86_spec_ctrl_mask.patch new file mode 100644 index 00000000000..c57ffdfd9c3 --- /dev/null +++ b/queue-5.15/x86-speculation-remove-x86_spec_ctrl_mask.patch @@ -0,0 +1,87 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Fri, 17 Jun 2022 12:12:48 -0700 +Subject: x86/speculation: Remove x86_spec_ctrl_mask + +From: Josh Poimboeuf + +commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream. + +This mask has been made redundant by kvm_spec_ctrl_test_value(). And it +doesn't even work when MSR interception is disabled, as the guest can +just write to SPEC_CTRL directly. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Reviewed-by: Paolo Bonzini +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 31 +------------------------------ + 1 file changed, 1 insertion(+), 30 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -86,12 +86,6 @@ u64 spec_ctrl_current(void) + EXPORT_SYMBOL_GPL(spec_ctrl_current); + + /* +- * The vendor and possibly platform specific bits which can be modified in +- * x86_spec_ctrl_base. +- */ +-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +- +-/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). + */ +@@ -146,10 +140,6 @@ void __init check_bugs(void) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + +- /* Allow STIBP in MSR_SPEC_CTRL if supported */ +- if (boot_cpu_has(X86_FEATURE_STIBP)) +- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; +- + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); +@@ -208,19 +198,10 @@ void __init check_bugs(void) + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = spec_ctrl_current(); ++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + +- /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { +- /* +- * Restrict guest_spec_ctrl to supported values. Clear the +- * modifiable bits in the host base value and or the +- * modifiable bits from the guest value. +- */ +- guestval = hostval & ~x86_spec_ctrl_mask; +- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -1659,16 +1640,6 @@ static enum ssb_mitigation __init __ssb_ + } + + /* +- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper +- * bit in the mask to allow guests to use the mitigation even in the +- * case where the host does not enable it. +- */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) { +- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; +- } +- +- /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass diff --git a/queue-5.15/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch b/queue-5.15/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch new file mode 100644 index 00000000000..5592e432708 --- /dev/null +++ b/queue-5.15/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch @@ -0,0 +1,56 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:08 +0200 +Subject: x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit + +From: Josh Poimboeuf + +commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream. + +There's no need to recalculate the host value for every entry/exit. +Just use the cached value in spec_ctrl_current(). + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -208,7 +208,7 @@ void __init check_bugs(void) + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = x86_spec_ctrl_base; ++ u64 msrval, guestval, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + + /* Is MSR_SPEC_CTRL implemented ? */ +@@ -221,15 +221,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + +- /* SSBD controlled in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) +- hostval |= ssbd_tif_to_spec_ctrl(ti->flags); +- +- /* Conditional STIBP enabled? */ +- if (static_branch_unlikely(&switch_to_cond_stibp)) +- hostval |= stibp_tif_to_spec_ctrl(ti->flags); +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -1390,7 +1381,6 @@ static void __init spectre_v2_select_mit + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_ibrs_mode(mode)) { +- /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } diff --git a/queue-5.15/x86-static_call-serialize-__static_call_fixup-properly.patch b/queue-5.15/x86-static_call-serialize-__static_call_fixup-properly.patch new file mode 100644 index 00000000000..aaa7c4da4d2 --- /dev/null +++ b/queue-5.15/x86-static_call-serialize-__static_call_fixup-properly.patch @@ -0,0 +1,73 @@ +From c27c753ea6fd1237f4f96abf8b623d7bab505513 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 12 Jul 2022 14:01:06 +0200 +Subject: x86/static_call: Serialize __static_call_fixup() properly + +From: Thomas Gleixner + +commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream. + +__static_call_fixup() invokes __static_call_transform() without holding +text_mutex, which causes lockdep to complain in text_poke_bp(). + +Adding the proper locking cures that, but as this is either used during +early boot or during module finalizing, it's not required to use +text_poke_bp(). Add an argument to __static_call_transform() which tells +it to use text_poke_early() for it. + +Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/static_call.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -25,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e + + static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + +-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) ++static void __ref __static_call_transform(void *insn, enum insn_type type, ++ void *func, bool modinit) + { + const void *emulate = NULL; + int size = CALL_INSN_SIZE; +@@ -60,7 +61,7 @@ static void __ref __static_call_transfor + if (memcmp(insn, code, size) == 0) + return; + +- if (unlikely(system_state == SYSTEM_BOOTING)) ++ if (system_state == SYSTEM_BOOTING || modinit) + return text_poke_early(insn, code, size); + + text_poke_bp(insn, code, size, emulate); +@@ -108,12 +109,12 @@ void arch_static_call_transform(void *si + + if (tramp) { + __static_call_validate(tramp, true); +- __static_call_transform(tramp, __sc_insn(!func, true), func); ++ __static_call_transform(tramp, __sc_insn(!func, true), func, false); + } + + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail); +- __static_call_transform(site, __sc_insn(!func, tail), func); ++ __static_call_transform(site, __sc_insn(!func, tail), func, false); + } + + mutex_unlock(&text_mutex); +@@ -139,8 +140,10 @@ bool __static_call_fixup(void *tramp, u8 + return false; + } + ++ mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) +- __static_call_transform(tramp, RET, NULL); ++ __static_call_transform(tramp, RET, NULL, true); ++ mutex_unlock(&text_mutex); + + return true; + } diff --git a/queue-5.15/x86-static_call-use-alternative-ret-encoding.patch b/queue-5.15/x86-static_call-use-alternative-ret-encoding.patch new file mode 100644 index 00000000000..f452dba0c4d --- /dev/null +++ b/queue-5.15/x86-static_call-use-alternative-ret-encoding.patch @@ -0,0 +1,182 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:39 +0200 +Subject: x86,static_call: Use alternative RET encoding + +From: Peter Zijlstra + +commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream. + +In addition to teaching static_call about the new way to spell 'RET', +there is an added complication in that static_call() is allowed to +rewrite text before it is known which particular spelling is required. + +In order to deal with this; have a static_call specific fixup in the +apply_return() 'alternative' patching routine that will rewrite the +static_call trampoline to match the definite sequence. + +This in turn creates the problem of uniquely identifying static call +trampolines. Currently trampolines are 8 bytes, the first 5 being the +jmp.d32/ret sequence and the final 3 a byte sequence that spells out +'SCT'. + +This sequence is used in __static_call_validate() to ensure it is +patching a trampoline and not a random other jmp.d32. That is, +false-positives shouldn't be plenty, but aren't a big concern. + +OTOH the new __static_call_fixup() must not have false-positives, and +'SCT' decodes to the somewhat weird but semi plausible sequence: + + push %rbx + rex.XB push %r12 + +Additionally, there are SLS concerns with immediate jumps. Combined it +seems like a good moment to change the signature to a single 3 byte +trap instruction that is unique to this usage and will not ever get +generated by accident. + +As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes +to: + + ud1 %esp, %ecx + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: skip validation as introduced by 2105a92748e8 ("static_call,x86: Robustify trampoline patching")] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/static_call.h | 17 ++++++++++++++++ + arch/x86/kernel/alternative.c | 12 +++++++---- + arch/x86/kernel/static_call.c | 38 ++++++++++++++++++++++++++++++++++++- + 3 files changed, 62 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -21,6 +21,16 @@ + * relative displacement across sections. + */ + ++/* ++ * The trampoline is 8 bytes and of the general form: ++ * ++ * jmp.d32 \func ++ * ud1 %esp, %ecx ++ * ++ * That trailing #UD provides both a speculation stop and serves as a unique ++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[] ++ * and __static_call_fixup(). ++ */ + #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ + asm(".pushsection .static_call.text, \"ax\" \n" \ + ".align 4 \n" \ +@@ -34,8 +44,13 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + ++#ifdef CONFIG_RETPOLINE ++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") ++#else + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") ++#endif + + + #define ARCH_ADD_TRAMP_KEY(name) \ +@@ -44,4 +59,6 @@ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + ++extern bool __static_call_fixup(void *tramp, u8 op, void *dest); ++ + #endif /* _ASM_STATIC_CALL_H */ +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -538,18 +538,22 @@ void __init_or_module noinline apply_ret + s32 *s; + + for (s = start; s < end; s++) { +- void *addr = (void *)s + *s; ++ void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; +- u8 op1; ++ u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + +- op1 = insn.opcode.bytes[0]; +- if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) ++ op = insn.opcode.bytes[0]; ++ if (op == JMP32_INSN_OPCODE) ++ dest = addr + insn.length + insn.immediate.value; ++ ++ if (__static_call_fixup(addr, op, dest) || ++ WARN_ON_ONCE(dest != &__x86_return_thunk)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -12,6 +12,13 @@ enum insn_type { + }; + + /* ++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such ++ * that there is no false-positive trampoline identification while also being a ++ * speculation stop. ++ */ ++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; ++ ++/* + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax + */ + static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; +@@ -43,7 +50,10 @@ static void __ref __static_call_transfor + break; + + case RET: +- code = &retinsn; ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); ++ else ++ code = &retinsn; + break; + } + +@@ -109,3 +119,29 @@ void arch_static_call_transform(void *si + mutex_unlock(&text_mutex); + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); ++ ++#ifdef CONFIG_RETPOLINE ++/* ++ * This is called by apply_returns() to fix up static call trampolines, ++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as ++ * having a return trampoline. ++ * ++ * The problem is that static_call() is available before determining ++ * X86_FEATURE_RETHUNK and, by implication, running alternatives. ++ * ++ * This means that __static_call_transform() above can have overwritten the ++ * return trampoline and we now need to fix things up to be consistent. ++ */ ++bool __static_call_fixup(void *tramp, u8 op, void *dest) ++{ ++ if (memcmp(tramp+5, tramp_ud, 3)) { ++ /* Not a trampoline site, not our problem. */ ++ return false; ++ } ++ ++ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) ++ __static_call_transform(tramp, RET, NULL); ++ ++ return true; ++} ++#endif diff --git a/queue-5.15/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch b/queue-5.15/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch new file mode 100644 index 00000000000..b379f5e38c7 --- /dev/null +++ b/queue-5.15/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch @@ -0,0 +1,100 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Lai Jiangshan +Date: Thu, 21 Apr 2022 22:10:48 +0800 +Subject: x86/traps: Use pt_regs directly in fixup_bad_iret() + +From: Lai Jiangshan + +commit 0aca53c6b522f8d6e2681ca875acbbe105f5fdcf upstream. + +Always stash the address error_entry() is going to return to, in %r12 +and get rid of the void *error_entry_ret; slot in struct bad_iret_stack +which was supposed to account for it and pt_regs pushed on the stack. + +After this, both fixup_bad_iret() and sync_regs() can work on a struct +pt_regs pointer directly. + + [ bp: Rewrite commit message, touch ups. ] + +Signed-off-by: Lai Jiangshan +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20220503032107.680190-2-jiangshanlai@gmail.com +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 5 ++++- + arch/x86/include/asm/traps.h | 2 +- + arch/x86/kernel/traps.c | 19 +++++++------------ + 3 files changed, 12 insertions(+), 14 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1041,9 +1041,12 @@ SYM_CODE_START_LOCAL(error_entry) + * Pretend that the exception came from user mode: set up pt_regs + * as if we faulted immediately after IRET. + */ +- mov %rsp, %rdi ++ popq %r12 /* save return addr in %12 */ ++ movq %rsp, %rdi /* arg0 = pt_regs pointer */ + call fixup_bad_iret + mov %rax, %rsp ++ ENCODE_FRAME_POINTER ++ pushq %r12 + jmp .Lerror_entry_from_usermode_after_swapgs + SYM_CODE_END(error_entry) + +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -13,7 +13,7 @@ + #ifdef CONFIG_X86_64 + asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); + asmlinkage __visible notrace +-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); ++struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs); + void __init trap_init(void); + asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); + #endif +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -762,14 +762,10 @@ sync: + } + #endif + +-struct bad_iret_stack { +- void *error_entry_ret; +- struct pt_regs regs; +-}; +- +-asmlinkage __visible noinstr +-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) ++asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs) + { ++ struct pt_regs tmp, *new_stack; ++ + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault +@@ -778,19 +774,18 @@ struct bad_iret_stack *fixup_bad_iret(st + * just below the IRET frame) and we want to pretend that the + * exception came from the IRET target. + */ +- struct bad_iret_stack tmp, *new_stack = +- (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; ++ new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + + /* Copy the IRET target to the temporary storage. */ +- __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); ++ __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8); + + /* Copy the remainder of the stack from the current stack. */ +- __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); ++ __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip)); + + /* Update the entry stack */ + __memcpy(new_stack, &tmp, sizeof(tmp)); + +- BUG_ON(!user_mode(&new_stack->regs)); ++ BUG_ON(!user_mode(new_stack)); + return new_stack; + } + #endif diff --git a/queue-5.15/x86-undo-return-thunk-damage.patch b/queue-5.15/x86-undo-return-thunk-damage.patch new file mode 100644 index 00000000000..c474af33024 --- /dev/null +++ b/queue-5.15/x86-undo-return-thunk-damage.patch @@ -0,0 +1,195 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:37 +0200 +Subject: x86: Undo return-thunk damage + +From: Peter Zijlstra + +commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream. + +Introduce X86_FEATURE_RETHUNK for those afflicted with needing this. + + [ bp: Do only INT3 padding - simpler. ] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL] +[cascardo: no IBT support] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/alternative.h | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 + + arch/x86/kernel/alternative.c | 60 +++++++++++++++++++++++++++++++ + arch/x86/kernel/module.c | 8 +++- + arch/x86/kernel/vmlinux.lds.S | 7 +++ + 6 files changed, 78 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -76,6 +76,7 @@ extern int alternatives_patched; + extern void alternative_instructions(void); + extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + extern void apply_retpolines(s32 *start, s32 *end); ++extern void apply_returns(s32 *start, s32 *end); + + struct module; + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -298,6 +298,7 @@ + /* FREE! (11*32+11) */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -60,7 +60,8 @@ + # define DISABLE_RETPOLINE 0 + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ +- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ ++ (1 << (X86_FEATURE_RETHUNK & 31))) + #endif + + /* Force disable because it's broken beyond repair */ +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -115,6 +115,7 @@ static void __init_or_module add_nops(vo + } + + extern s32 __retpoline_sites[], __retpoline_sites_end[]; ++extern s32 __return_sites[], __return_sites_end[]; + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + extern s32 __smp_locks[], __smp_locks_end[]; + void text_poke_early(void *addr, const void *opcode, size_t len); +@@ -506,9 +507,67 @@ void __init_or_module noinline apply_ret + } + } + ++/* ++ * Rewrite the compiler generated return thunk tail-calls. ++ * ++ * For example, convert: ++ * ++ * JMP __x86_return_thunk ++ * ++ * into: ++ * ++ * RET ++ */ ++static int patch_return(void *addr, struct insn *insn, u8 *bytes) ++{ ++ int i = 0; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ return -1; ++ ++ bytes[i++] = RET_INSN_OPCODE; ++ ++ for (; i < insn->length;) ++ bytes[i++] = INT3_INSN_OPCODE; ++ ++ return i; ++} ++ ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *addr = (void *)s + *s; ++ struct insn insn; ++ int len, ret; ++ u8 bytes[16]; ++ u8 op1; ++ ++ ret = insn_decode_kernel(&insn, addr); ++ if (WARN_ON_ONCE(ret < 0)) ++ continue; ++ ++ op1 = insn.opcode.bytes[0]; ++ if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) ++ continue; ++ ++ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", ++ addr, addr, insn.length, ++ addr + insn.length + insn.immediate.value); ++ ++ len = patch_return(addr, &insn, bytes); ++ if (len == insn.length) { ++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); ++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); ++ text_poke_early(addr, bytes, len); ++ } ++ } ++} + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } + + #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ + +@@ -824,6 +883,7 @@ void __init alternative_instructions(voi + * those can rewrite the retpoline thunks. + */ + apply_retpolines(__retpoline_sites, __retpoline_sites_end); ++ apply_returns(__return_sites, __return_sites_end); + + /* + * Then patch alternatives, such that those paravirt calls that are in +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, + { + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL, *orc = NULL, *orc_ip = NULL, +- *retpolines = NULL; ++ *retpolines = NULL, *returns = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { +@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, + orc_ip = s; + if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) + retpolines = s; ++ if (!strcmp(".return_sites", secstrings + s->sh_name)) ++ returns = s; + } + + /* +@@ -285,6 +287,10 @@ int module_finalize(const Elf_Ehdr *hdr, + void *rseg = (void *)retpolines->sh_addr; + apply_retpolines(rseg, rseg + retpolines->sh_size); + } ++ if (returns) { ++ void *rseg = (void *)returns->sh_addr; ++ apply_returns(rseg, rseg + returns->sh_size); ++ } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -284,6 +284,13 @@ SECTIONS + *(.retpoline_sites) + __retpoline_sites_end = .; + } ++ ++ . = ALIGN(8); ++ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { ++ __return_sites = .; ++ *(.return_sites) ++ __return_sites_end = .; ++ } + #endif + + /* diff --git a/queue-5.15/x86-use-return-thunk-in-asm-code.patch b/queue-5.15/x86-use-return-thunk-in-asm-code.patch new file mode 100644 index 00000000000..367952e9f58 --- /dev/null +++ b/queue-5.15/x86-use-return-thunk-in-asm-code.patch @@ -0,0 +1,93 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:45 +0200 +Subject: x86: Use return-thunk in asm code + +From: Peter Zijlstra + +commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream. + +Use the return thunk in asm code. If the thunk isn't needed, it will +get patched into a RET instruction during boot by apply_returns(). + +Since alternatives can't handle relocations outside of the first +instruction, putting a 'jmp __x86_return_thunk' in one is not valid, +therefore carve out the memmove ERMS path into a separate label and jump +to it. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: no RANDSTRUCT_CFLAGS] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/vdso/Makefile | 1 + + arch/x86/include/asm/linkage.h | 8 ++++++++ + arch/x86/lib/memmove_64.S | 7 ++++++- + 3 files changed, 15 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/vdso/Makefile ++++ b/arch/x86/entry/vdso/Makefile +@@ -92,6 +92,7 @@ endif + endif + + $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) ++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO + + # + # vDSO code runs in userspace and -pg doesn't help with profiling anyway. +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -18,19 +18,27 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + ++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define RET jmp __x86_return_thunk ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define RET ret; int3 + #else + #define RET ret + #endif ++#endif /* CONFIG_RETPOLINE */ + + #else /* __ASSEMBLY__ */ + ++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define ASM_RET "jmp __x86_return_thunk\n\t" ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define ASM_RET "ret; int3\n\t" + #else + #define ASM_RET "ret\n\t" + #endif ++#endif /* CONFIG_RETPOLINE */ + + #endif /* __ASSEMBLY__ */ + +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) + /* FSRM implies ERMS => no length checks, do the copy directly */ + .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM +- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS ++ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + + /* + * movsq instruction have many startup latency +@@ -206,6 +206,11 @@ SYM_FUNC_START(__memmove) + movb %r11b, (%rdi) + 13: + RET ++ ++.Lmemmove_erms: ++ movq %rdx, %rcx ++ rep movsb ++ RET + SYM_FUNC_END(__memmove) + SYM_FUNC_END_ALIAS(memmove) + EXPORT_SYMBOL(__memmove) diff --git a/queue-5.15/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch b/queue-5.15/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch new file mode 100644 index 00000000000..078dd9c3a4e --- /dev/null +++ b/queue-5.15/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:43 +0200 +Subject: x86/vsyscall_emu/64: Don't use RET in vsyscall emulation + +From: Peter Zijlstra + +commit 15583e514eb16744b80be85dea0774ece153177d upstream. + +This is userspace code and doesn't play by the normal kernel rules. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S ++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +@@ -19,17 +19,20 @@ __vsyscall_page: + + mov $__NR_gettimeofday, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall +- RET ++ ret ++ int3 + + .balign 4096, 0xcc + diff --git a/queue-5.15/x86-xen-add-untrain_ret.patch b/queue-5.15/x86-xen-add-untrain_ret.patch new file mode 100644 index 00000000000..53f51adfd90 --- /dev/null +++ b/queue-5.15/x86-xen-add-untrain_ret.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:01 +0200 +Subject: x86/xen: Add UNTRAIN_RET + +From: Peter Zijlstra + +commit d147553b64bad34d2f92cb7d8ba454ae95c3baac upstream. + +Ensure the Xen entry also passes through UNTRAIN_RET. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -320,6 +320,12 @@ SYM_CODE_END(ret_from_fork) + #endif + .endm + ++SYM_CODE_START_LOCAL(xen_error_entry) ++ UNWIND_HINT_FUNC ++ UNTRAIN_RET ++ RET ++SYM_CODE_END(xen_error_entry) ++ + /** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called +@@ -339,7 +345,7 @@ SYM_CODE_END(ret_from_fork) + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ +- "", X86_FEATURE_XENPV ++ "call xen_error_entry", X86_FEATURE_XENPV + + ENCODE_FRAME_POINTER + UNWIND_HINT_REGS diff --git a/queue-5.15/x86-xen-rename-sys-entry-points.patch b/queue-5.15/x86-xen-rename-sys-entry-points.patch new file mode 100644 index 00000000000..8ee6c728005 --- /dev/null +++ b/queue-5.15/x86-xen-rename-sys-entry-points.patch @@ -0,0 +1,133 @@ +From foo@baz Tue Jul 12 05:06:57 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:00 +0200 +Subject: x86/xen: Rename SYS* entry points + +From: Peter Zijlstra + +commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream. + +Native SYS{CALL,ENTER} entry points are called +entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are +named consistently. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/xen/setup.c | 6 +++--- + arch/x86/xen/xen-asm.S | 20 ++++++++++---------- + arch/x86/xen/xen-ops.h | 6 +++--- + 3 files changed, 16 insertions(+), 16 deletions(-) + +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -922,7 +922,7 @@ void xen_enable_sysenter(void) + if (!boot_cpu_has(sysenter_feature)) + return; + +- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); ++ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); + if(ret != 0) + setup_clear_cpu_cap(sysenter_feature); + } +@@ -931,7 +931,7 @@ void xen_enable_syscall(void) + { + int ret; + +- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); ++ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); + if (ret != 0) { + printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); + /* Pretty fatal; 64-bit userspace has no other +@@ -940,7 +940,7 @@ void xen_enable_syscall(void) + + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { + ret = register_callback(CALLBACKTYPE_syscall32, +- xen_syscall32_target); ++ xen_entry_SYSCALL_compat); + if (ret != 0) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + } +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -227,7 +227,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu + */ + + /* Normal 64-bit system call target */ +-SYM_CODE_START(xen_syscall_target) ++SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_EMPTY + popq %rcx + popq %r11 +@@ -241,12 +241,12 @@ SYM_CODE_START(xen_syscall_target) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +-SYM_CODE_END(xen_syscall_target) ++SYM_CODE_END(xen_entry_SYSCALL_64) + + #ifdef CONFIG_IA32_EMULATION + + /* 32-bit compat syscall target */ +-SYM_CODE_START(xen_syscall32_target) ++SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_EMPTY + popq %rcx + popq %r11 +@@ -260,10 +260,10 @@ SYM_CODE_START(xen_syscall32_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ +-SYM_CODE_START(xen_sysenter_target) ++SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_EMPTY + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means +@@ -281,18 +281,18 @@ SYM_CODE_START(xen_sysenter_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +-SYM_CODE_END(xen_sysenter_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) + + #else /* !CONFIG_IA32_EMULATION */ + +-SYM_CODE_START(xen_syscall32_target) +-SYM_CODE_START(xen_sysenter_target) ++SYM_CODE_START(xen_entry_SYSCALL_compat) ++SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_EMPTY + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +-SYM_CODE_END(xen_sysenter_target) +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + #endif /* CONFIG_IA32_EMULATION */ +--- a/arch/x86/xen/xen-ops.h ++++ b/arch/x86/xen/xen-ops.h +@@ -10,10 +10,10 @@ + /* These are code, but not functions. Defined in entry.S */ + extern const char xen_failsafe_callback[]; + +-void xen_sysenter_target(void); ++void xen_entry_SYSENTER_compat(void); + #ifdef CONFIG_X86_64 +-void xen_syscall_target(void); +-void xen_syscall32_target(void); ++void xen_entry_SYSCALL_64(void); ++void xen_entry_SYSCALL_compat(void); + #endif + + extern void *xen_initial_gdt; -- 2.47.3