]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Jul 2022 16:09:07 +0000 (18:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Jul 2022 16:09:07 +0000 (18:09 +0200)
added patches:
intel_idle-disable-ibrs-during-long-idle.patch
kvm-vmx-convert-launched-argument-to-flags.patch
kvm-vmx-fix-ibrs-handling-after-vmexit.patch
kvm-vmx-flatten-__vmx_vcpu_run.patch
kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
kvm-vmx-prevent-rsb-underflow-before-vmenter.patch
objtool-add-entry-unret-validation.patch
objtool-re-add-unwind_hint_-save_restore.patch
objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
objtool-treat-.text.__x86.-as-noinstr.patch
objtool-update-retpoline-validation.patch
x86-add-magic-amd-return-thunk.patch
x86-bpf-use-alternative-ret-encoding.patch
x86-bugs-add-amd-retbleed-boot-parameter.patch
x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
x86-bugs-add-retbleed-ibpb.patch
x86-bugs-do-ibpb-fallback-check-only-once.patch
x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
x86-bugs-enable-stibp-for-jmp2ret.patch
x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
x86-bugs-optimize-spec_ctrl-msr-writes.patch
x86-bugs-report-amd-retbleed-vulnerability.patch
x86-bugs-report-intel-retbleed-vulnerability.patch
x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
x86-common-stamp-out-the-stepping-madness.patch
x86-cpu-amd-add-spectral-chicken.patch
x86-cpu-amd-enumerate-btc_no.patch
x86-cpufeatures-move-retpoline-flags-to-word-11.patch
x86-entry-add-kernel-ibrs-implementation.patch
x86-entry-avoid-very-early-ret.patch
x86-entry-don-t-call-error_entry-for-xenpv.patch
x86-entry-move-push_and_clear_regs-back-into-error_entry.patch
x86-entry-move-push_and_clear_regs-out-of-error_entry.patch
x86-entry-remove-skip_r11rcx.patch
x86-entry-switch-the-stack-after-error_entry-returns.patch
x86-ftrace-use-alternative-ret-encoding.patch
x86-kexec-disable-ret-on-kexec.patch
x86-kvm-fix-setcc-emulation-for-return-thunks.patch
x86-kvm-vmx-make-noinstr-clean.patch
x86-objtool-create-.return_sites.patch
x86-retbleed-add-fine-grained-kconfig-knobs.patch
x86-retpoline-cleanup-some-ifdefery.patch
x86-retpoline-swizzle-retpoline-thunk.patch
x86-retpoline-use-mfunction-return.patch
x86-sev-avoid-using-__x86_return_thunk.patch
x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
x86-speculation-disable-rrsba-behavior.patch
x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
x86-speculation-remove-x86_spec_ctrl_mask.patch
x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
x86-static_call-serialize-__static_call_fixup-properly.patch
x86-static_call-use-alternative-ret-encoding.patch
x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch
x86-undo-return-thunk-damage.patch
x86-use-return-thunk-in-asm-code.patch
x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
x86-xen-add-untrain_ret.patch
x86-xen-rename-sys-entry-points.patch

62 files changed:
queue-5.18/intel_idle-disable-ibrs-during-long-idle.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-convert-launched-argument-to-flags.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-fix-ibrs-handling-after-vmexit.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-flatten-__vmx_vcpu_run.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-prevent-rsb-underflow-before-vmenter.patch [new file with mode: 0644]
queue-5.18/objtool-add-entry-unret-validation.patch [new file with mode: 0644]
queue-5.18/objtool-re-add-unwind_hint_-save_restore.patch [new file with mode: 0644]
queue-5.18/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch [new file with mode: 0644]
queue-5.18/objtool-treat-.text.__x86.-as-noinstr.patch [new file with mode: 0644]
queue-5.18/objtool-update-retpoline-validation.patch [new file with mode: 0644]
queue-5.18/series [new file with mode: 0644]
queue-5.18/x86-add-magic-amd-return-thunk.patch [new file with mode: 0644]
queue-5.18/x86-bpf-use-alternative-ret-encoding.patch [new file with mode: 0644]
queue-5.18/x86-bugs-add-amd-retbleed-boot-parameter.patch [new file with mode: 0644]
queue-5.18/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch [new file with mode: 0644]
queue-5.18/x86-bugs-add-retbleed-ibpb.patch [new file with mode: 0644]
queue-5.18/x86-bugs-do-ibpb-fallback-check-only-once.patch [new file with mode: 0644]
queue-5.18/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch [new file with mode: 0644]
queue-5.18/x86-bugs-enable-stibp-for-jmp2ret.patch [new file with mode: 0644]
queue-5.18/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch [new file with mode: 0644]
queue-5.18/x86-bugs-optimize-spec_ctrl-msr-writes.patch [new file with mode: 0644]
queue-5.18/x86-bugs-report-amd-retbleed-vulnerability.patch [new file with mode: 0644]
queue-5.18/x86-bugs-report-intel-retbleed-vulnerability.patch [new file with mode: 0644]
queue-5.18/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch [new file with mode: 0644]
queue-5.18/x86-common-stamp-out-the-stepping-madness.patch [new file with mode: 0644]
queue-5.18/x86-cpu-amd-add-spectral-chicken.patch [new file with mode: 0644]
queue-5.18/x86-cpu-amd-enumerate-btc_no.patch [new file with mode: 0644]
queue-5.18/x86-cpufeatures-move-retpoline-flags-to-word-11.patch [new file with mode: 0644]
queue-5.18/x86-entry-add-kernel-ibrs-implementation.patch [new file with mode: 0644]
queue-5.18/x86-entry-avoid-very-early-ret.patch [new file with mode: 0644]
queue-5.18/x86-entry-don-t-call-error_entry-for-xenpv.patch [new file with mode: 0644]
queue-5.18/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch [new file with mode: 0644]
queue-5.18/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch [new file with mode: 0644]
queue-5.18/x86-entry-remove-skip_r11rcx.patch [new file with mode: 0644]
queue-5.18/x86-entry-switch-the-stack-after-error_entry-returns.patch [new file with mode: 0644]
queue-5.18/x86-ftrace-use-alternative-ret-encoding.patch [new file with mode: 0644]
queue-5.18/x86-kexec-disable-ret-on-kexec.patch [new file with mode: 0644]
queue-5.18/x86-kvm-fix-setcc-emulation-for-return-thunks.patch [new file with mode: 0644]
queue-5.18/x86-kvm-vmx-make-noinstr-clean.patch [new file with mode: 0644]
queue-5.18/x86-objtool-create-.return_sites.patch [new file with mode: 0644]
queue-5.18/x86-retbleed-add-fine-grained-kconfig-knobs.patch [new file with mode: 0644]
queue-5.18/x86-retpoline-cleanup-some-ifdefery.patch [new file with mode: 0644]
queue-5.18/x86-retpoline-swizzle-retpoline-thunk.patch [new file with mode: 0644]
queue-5.18/x86-retpoline-use-mfunction-return.patch [new file with mode: 0644]
queue-5.18/x86-sev-avoid-using-__x86_return_thunk.patch [new file with mode: 0644]
queue-5.18/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch [new file with mode: 0644]
queue-5.18/x86-speculation-disable-rrsba-behavior.patch [new file with mode: 0644]
queue-5.18/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch [new file with mode: 0644]
queue-5.18/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch [new file with mode: 0644]
queue-5.18/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch [new file with mode: 0644]
queue-5.18/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch [new file with mode: 0644]
queue-5.18/x86-speculation-remove-x86_spec_ctrl_mask.patch [new file with mode: 0644]
queue-5.18/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch [new file with mode: 0644]
queue-5.18/x86-static_call-serialize-__static_call_fixup-properly.patch [new file with mode: 0644]
queue-5.18/x86-static_call-use-alternative-ret-encoding.patch [new file with mode: 0644]
queue-5.18/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch [new file with mode: 0644]
queue-5.18/x86-undo-return-thunk-damage.patch [new file with mode: 0644]
queue-5.18/x86-use-return-thunk-in-asm-code.patch [new file with mode: 0644]
queue-5.18/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch [new file with mode: 0644]
queue-5.18/x86-xen-add-untrain_ret.patch [new file with mode: 0644]
queue-5.18/x86-xen-rename-sys-entry-points.patch [new file with mode: 0644]

diff --git a/queue-5.18/intel_idle-disable-ibrs-during-long-idle.patch b/queue-5.18/intel_idle-disable-ibrs-during-long-idle.patch
new file mode 100644 (file)
index 0000000..dc11888
--- /dev/null
@@ -0,0 +1,182 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:58 +0200
+Subject: intel_idle: Disable IBRS during long idle
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream.
+
+Having IBRS enabled while the SMT sibling is idle unnecessarily slows
+down the running sibling. OTOH, disabling IBRS around idle takes two
+MSR writes, which will increase the idle latency.
+
+Therefore, only disable IBRS around deeper idle states. Shallow idle
+states are bounded by the tick in duration, since NOHZ is not allowed
+for them by virtue of their short target residency.
+
+Only do this for mwait-driven idle, since that keeps interrupts disabled
+across idle, which makes disabling IBRS vs IRQ-entry a non-issue.
+
+Note: C6 is a random threshold, most importantly C1 probably shouldn't
+disable IBRS, benchmarking needed.
+
+Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    1 
+ arch/x86/kernel/cpu/bugs.c           |    6 ++++
+ drivers/idle/intel_idle.c            |   44 ++++++++++++++++++++++++++++++-----
+ 3 files changed, 45 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -255,6 +255,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
+ extern void write_spec_ctrl_current(u64 val, bool force);
++extern u64 spec_ctrl_current(void);
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -79,6 +79,12 @@ void write_spec_ctrl_current(u64 val, bo
+               wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ }
++u64 spec_ctrl_current(void)
++{
++      return this_cpu_read(x86_spec_ctrl_current);
++}
++EXPORT_SYMBOL_GPL(spec_ctrl_current);
++
+ /*
+  * The vendor and possibly platform specific bits which can be modified in
+  * x86_spec_ctrl_base.
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -47,11 +47,13 @@
+ #include <linux/tick.h>
+ #include <trace/events/power.h>
+ #include <linux/sched.h>
++#include <linux/sched/smt.h>
+ #include <linux/notifier.h>
+ #include <linux/cpu.h>
+ #include <linux/moduleparam.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/intel-family.h>
++#include <asm/nospec-branch.h>
+ #include <asm/mwait.h>
+ #include <asm/msr.h>
+@@ -106,6 +108,12 @@ static unsigned int mwait_substates __in
+ #define CPUIDLE_FLAG_ALWAYS_ENABLE    BIT(15)
+ /*
++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
++ * above.
++ */
++#define CPUIDLE_FLAG_IBRS             BIT(16)
++
++/*
+  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+  * the C-state (top nibble) and sub-state (bottom nibble)
+  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+@@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(stru
+       return ret;
+ }
++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
++                                   struct cpuidle_driver *drv, int index)
++{
++      bool smt_active = sched_smt_active();
++      u64 spec_ctrl = spec_ctrl_current();
++      int ret;
++
++      if (smt_active)
++              wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
++      ret = __intel_idle(dev, drv, index);
++
++      if (smt_active)
++              wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
++
++      return ret;
++}
++
+ /**
+  * intel_idle_s2idle - Ask the processor to enter the given idle state.
+  * @dev: cpuidle device of the target CPU.
+@@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C6",
+               .desc = "MWAIT 0x20",
+-              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 85,
+               .target_residency = 200,
+               .enter = &intel_idle,
+@@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C7s",
+               .desc = "MWAIT 0x33",
+-              .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 124,
+               .target_residency = 800,
+               .enter = &intel_idle,
+@@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C8",
+               .desc = "MWAIT 0x40",
+-              .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 200,
+               .target_residency = 800,
+               .enter = &intel_idle,
+@@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C9",
+               .desc = "MWAIT 0x50",
+-              .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 480,
+               .target_residency = 5000,
+               .enter = &intel_idle,
+@@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C10",
+               .desc = "MWAIT 0x60",
+-              .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 890,
+               .target_residency = 5000,
+               .enter = &intel_idle,
+@@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[
+       {
+               .name = "C6",
+               .desc = "MWAIT 0x20",
+-              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 133,
+               .target_residency = 600,
+               .enter = &intel_idle,
+@@ -1686,6 +1712,12 @@ static void __init intel_idle_init_cstat
+               if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
+                       drv->states[drv->state_count].enter = intel_idle_irq;
++              if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
++                  cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
++                      WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
++                      drv->states[drv->state_count].enter = intel_idle_ibrs;
++              }
++
+               if ((disabled_states_mask & BIT(drv->state_count)) ||
+                   ((icpu->use_acpi || force_use_acpi) &&
+                    intel_idle_off_by_default(mwait_hint) &&
diff --git a/queue-5.18/kvm-vmx-convert-launched-argument-to-flags.patch b/queue-5.18/kvm-vmx-convert-launched-argument-to-flags.patch
new file mode 100644 (file)
index 0000000..3c6abe8
--- /dev/null
@@ -0,0 +1,170 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:12 +0200
+Subject: KVM: VMX: Convert launched argument to flags
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream.
+
+Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in
+preparation for doing SPEC_CTRL handling immediately after vmexit, which
+will need another flag.
+
+This is much easier than adding a fourth argument, because this code
+supports both 32-bit and 64-bit, and the fourth argument on 32-bit would
+have to be pushed on the stack.
+
+Note that __vmx_vcpu_run_flags() is called outside of the noinstr
+critical section because it will soon start calling potentially
+traceable functions.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c    |    2 +-
+ arch/x86/kvm/vmx/run_flags.h |    7 +++++++
+ arch/x86/kvm/vmx/vmenter.S   |    9 +++++----
+ arch/x86/kvm/vmx/vmx.c       |   17 ++++++++++++++---
+ arch/x86/kvm/vmx/vmx.h       |    5 ++++-
+ 5 files changed, 31 insertions(+), 9 deletions(-)
+ create mode 100644 arch/x86/kvm/vmx/run_flags.h
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3091,7 +3091,7 @@ static int nested_vmx_check_vmentry_hw(s
+       }
+       vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+-                               vmx->loaded_vmcs->launched);
++                               __vmx_vcpu_run_flags(vmx));
+       if (vmx->msr_autoload.host.nr)
+               vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+--- /dev/null
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __KVM_X86_VMX_RUN_FLAGS_H
++#define __KVM_X86_VMX_RUN_FLAGS_H
++
++#define VMX_RUN_VMRESUME      (1 << 0)
++
++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -5,6 +5,7 @@
+ #include <asm/kvm_vcpu_regs.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/segment.h>
++#include "run_flags.h"
+ #define WORD_SIZE (BITS_PER_LONG / 8)
+@@ -34,7 +35,7 @@
+  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+  * @vmx:      struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+  * @regs:     unsigned long * (to guest registers)
+- * @launched: %true if the VMCS has been launched
++ * @flags:    VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+  *
+  * Returns:
+  *    0 on VM-Exit, 1 on VM-Fail
+@@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+        */
+       push %_ASM_ARG2
+-      /* Copy @launched to BL, _ASM_ARG3 is volatile. */
++      /* Copy @flags to BL, _ASM_ARG3 is volatile. */
+       mov %_ASM_ARG3B, %bl
+       lea (%_ASM_SP), %_ASM_ARG2
+@@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov (%_ASM_SP), %_ASM_AX
+       /* Check if vmlaunch or vmresume is needed */
+-      testb %bl, %bl
++      testb $VMX_RUN_VMRESUME, %bl
+       /* Load guest registers.  Don't clobber flags. */
+       mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+@@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+       /* Check EFLAGS.ZF from 'testb' above */
+-      je .Lvmlaunch
++      jz .Lvmlaunch
+       /*
+        * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -839,6 +839,16 @@ static bool msr_write_intercepted(struct
+                                        MSR_IA32_SPEC_CTRL);
+ }
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
++{
++      unsigned int flags = 0;
++
++      if (vmx->loaded_vmcs->launched)
++              flags |= VMX_RUN_VMRESUME;
++
++      return flags;
++}
++
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+               unsigned long entry, unsigned long exit)
+ {
+@@ -6827,7 +6837,8 @@ static fastpath_t vmx_exit_handlers_fast
+ }
+ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+-                                      struct vcpu_vmx *vmx)
++                                      struct vcpu_vmx *vmx,
++                                      unsigned long flags)
+ {
+       guest_state_enter_irqoff();
+@@ -6846,7 +6857,7 @@ static noinstr void vmx_vcpu_enter_exit(
+               native_write_cr2(vcpu->arch.cr2);
+       vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+-                                 vmx->loaded_vmcs->launched);
++                                 flags);
+       vcpu->arch.cr2 = native_read_cr2();
+@@ -6954,7 +6965,7 @@ static fastpath_t vmx_vcpu_run(struct kv
+       x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+       /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+-      vmx_vcpu_enter_exit(vcpu, vmx);
++      vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+       /*
+        * We do not use IBRS in the kernel. If this vCPU has used the
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -13,6 +13,7 @@
+ #include "vmcs.h"
+ #include "vmx_ops.h"
+ #include "cpuid.h"
++#include "run_flags.h"
+ #define MSR_TYPE_R    1
+ #define MSR_TYPE_W    2
+@@ -404,7 +405,9 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
++                  unsigned int flags);
+ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
diff --git a/queue-5.18/kvm-vmx-fix-ibrs-handling-after-vmexit.patch b/queue-5.18/kvm-vmx-fix-ibrs-handling-after-vmexit.patch
new file mode 100644 (file)
index 0000000..9319a93
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:14 +0200
+Subject: KVM: VMX: Fix IBRS handling after vmexit
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream.
+
+For legacy IBRS to work, the IBRS bit needs to be always re-written
+after vmexit, even if it's already on.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6845,8 +6845,13 @@ void noinstr vmx_spec_ctrl_restore_host(
+       /*
+        * If the guest/host SPEC_CTRL values differ, restore the host value.
++       *
++       * For legacy IBRS, the IBRS bit always needs to be written after
++       * transitioning from a less privileged predictor mode, regardless of
++       * whether the guest/host values differ.
+        */
+-      if (vmx->spec_ctrl != hostval)
++      if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
++          vmx->spec_ctrl != hostval)
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+       barrier_nospec();
diff --git a/queue-5.18/kvm-vmx-flatten-__vmx_vcpu_run.patch b/queue-5.18/kvm-vmx-flatten-__vmx_vcpu_run.patch
new file mode 100644 (file)
index 0000000..4ef2486
--- /dev/null
@@ -0,0 +1,196 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:11 +0200
+Subject: KVM: VMX: Flatten __vmx_vcpu_run()
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream.
+
+Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run().  This
+will make it easier to do the spec_ctrl handling before the first RET.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmenter.S |  119 +++++++++++++++++----------------------------
+ 1 file changed, 46 insertions(+), 73 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -31,68 +31,6 @@
+ .section .noinstr.text, "ax"
+ /**
+- * vmx_vmenter - VM-Enter the current loaded VMCS
+- *
+- * %RFLAGS.ZF:        !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
+- *
+- * Returns:
+- *    %RFLAGS.CF is set on VM-Fail Invalid
+- *    %RFLAGS.ZF is set on VM-Fail Valid
+- *    %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
+- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+- * to vmx_vmexit.
+- */
+-SYM_FUNC_START_LOCAL(vmx_vmenter)
+-      /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+-      je 2f
+-
+-1:    vmresume
+-      RET
+-
+-2:    vmlaunch
+-      RET
+-
+-3:    cmpb $0, kvm_rebooting
+-      je 4f
+-      RET
+-4:    ud2
+-
+-      _ASM_EXTABLE(1b, 3b)
+-      _ASM_EXTABLE(2b, 3b)
+-
+-SYM_FUNC_END(vmx_vmenter)
+-
+-/**
+- * vmx_vmexit - Handle a VMX VM-Exit
+- *
+- * Returns:
+- *    %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * This is vmx_vmenter's partner in crime.  On a VM-Exit, control will jump
+- * here after hardware loads the host's state, i.e. this is the destination
+- * referred to by VMCS.HOST_RIP.
+- */
+-SYM_FUNC_START(vmx_vmexit)
+-#ifdef CONFIG_RETPOLINE
+-      ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+-      /* Preserve guest's RAX, it's used to stuff the RSB. */
+-      push %_ASM_AX
+-
+-      /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+-      FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+-      /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+-      or $1, %_ASM_AX
+-
+-      pop %_ASM_AX
+-.Lvmexit_skip_rsb:
+-#endif
+-      RET
+-SYM_FUNC_END(vmx_vmexit)
+-
+-/**
+  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+  * @vmx:      struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+  * @regs:     unsigned long * (to guest registers)
+@@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+       mov %_ASM_ARG3B, %bl
+-      /* Adjust RSP to account for the CALL to vmx_vmenter(). */
+-      lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
++      lea (%_ASM_SP), %_ASM_ARG2
+       call vmx_update_host_rsp
+       /* Load @regs to RAX. */
+@@ -154,11 +91,37 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       /* Load guest RAX.  This kills the @regs pointer! */
+       mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+-      /* Enter guest mode */
+-      call vmx_vmenter
++      /* Check EFLAGS.ZF from 'testb' above */
++      je .Lvmlaunch
+-      /* Jump on VM-Fail. */
+-      jbe 2f
++      /*
++       * After a successful VMRESUME/VMLAUNCH, control flow "magically"
++       * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
++       * So this isn't a typical function and objtool needs to be told to
++       * save the unwind state here and restore it below.
++       */
++      UNWIND_HINT_SAVE
++
++/*
++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
++ * the 'vmx_vmexit' label below.
++ */
++.Lvmresume:
++      vmresume
++      jmp .Lvmfail
++
++.Lvmlaunch:
++      vmlaunch
++      jmp .Lvmfail
++
++      _ASM_EXTABLE(.Lvmresume, .Lfixup)
++      _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
++
++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
++
++      /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
++      UNWIND_HINT_RESTORE
++      ENDBR
+       /* Temporarily save guest's RAX. */
+       push %_ASM_AX
+@@ -185,9 +148,13 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
++      /* IMPORTANT: RSB must be stuffed before the first return. */
++      FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++
+       /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+       xor %eax, %eax
++.Lclear_regs:
+       /*
+        * Clear all general purpose registers except RSP and RAX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+@@ -197,7 +164,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+        * free.  RSP and RAX are exempt as RSP is restored by hardware during
+        * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+        */
+-1:    xor %ecx, %ecx
++      xor %ecx, %ecx
+       xor %edx, %edx
+       xor %ebx, %ebx
+       xor %ebp, %ebp
+@@ -216,8 +183,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       /* "POP" @regs. */
+       add $WORD_SIZE, %_ASM_SP
+-      pop %_ASM_BX
++      pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+       pop %r12
+       pop %r13
+@@ -230,9 +197,15 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       pop %_ASM_BP
+       RET
+-      /* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
+-2:    mov $1, %eax
+-      jmp 1b
++.Lfixup:
++      cmpb $0, kvm_rebooting
++      jne .Lvmfail
++      ud2
++.Lvmfail:
++      /* VM-Fail: set return value to 1 */
++      mov $1, %eax
++      jmp .Lclear_regs
++
+ SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/queue-5.18/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch b/queue-5.18/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
new file mode 100644 (file)
index 0000000..30b5997
--- /dev/null
@@ -0,0 +1,240 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:13 +0200
+Subject: KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream.
+
+On eIBRS systems, the returns in the vmexit return path from
+__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks.
+
+Fix that by moving the post-vmexit spec_ctrl handling to immediately
+after the vmexit.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    1 
+ arch/x86/kernel/cpu/bugs.c           |    4 ++
+ arch/x86/kvm/vmx/run_flags.h         |    1 
+ arch/x86/kvm/vmx/vmenter.S           |   49 +++++++++++++++++++++++++++--------
+ arch/x86/kvm/vmx/vmx.c               |   48 ++++++++++++++++++++--------------
+ arch/x86/kvm/vmx/vmx.h               |    1 
+ 6 files changed, 73 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -274,6 +274,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++extern u64 x86_spec_ctrl_current;
+ extern void write_spec_ctrl_current(u64 val, bool force);
+ extern u64 spec_ctrl_current(void);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -195,6 +195,10 @@ void __init check_bugs(void)
+ #endif
+ }
++/*
++ * NOTE: For VMX, this function is not called in the vmexit path.
++ * It uses vmx_spec_ctrl_restore_host() instead.
++ */
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+--- a/arch/x86/kvm/vmx/run_flags.h
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -3,5 +3,6 @@
+ #define __KVM_X86_VMX_RUN_FLAGS_H
+ #define VMX_RUN_VMRESUME      (1 << 0)
++#define VMX_RUN_SAVE_SPEC_CTRL        (1 << 1)
+ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -33,9 +33,10 @@
+ /**
+  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+- * @vmx:      struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
++ * @vmx:      struct vcpu_vmx *
+  * @regs:     unsigned long * (to guest registers)
+- * @flags:    VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
++ * @flags:    VMX_RUN_VMRESUME:       use VMRESUME instead of VMLAUNCH
++ *            VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
+  *
+  * Returns:
+  *    0 on VM-Exit, 1 on VM-Fail
+@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ #endif
+       push %_ASM_BX
++      /* Save @vmx for SPEC_CTRL handling */
++      push %_ASM_ARG1
++
++      /* Save @flags for SPEC_CTRL handling */
++      push %_ASM_ARG3
++
+       /*
+        * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
+        * @regs is needed after VM-Exit to save the guest's register values.
+@@ -149,25 +156,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+       mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
+-      /* IMPORTANT: RSB must be stuffed before the first return. */
+-      FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+-      /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+-      xor %eax, %eax
++      /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
++      xor %ebx, %ebx
+ .Lclear_regs:
+       /*
+-       * Clear all general purpose registers except RSP and RAX to prevent
++       * Clear all general purpose registers except RSP and RBX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+        * via the stack.  In theory, an L1 cache miss when restoring registers
+        * could lead to speculative execution with the guest's values.
+        * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
+        * free.  RSP and RAX are exempt as RSP is restored by hardware during
+-       * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
++       * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
++       * value.
+        */
++      xor %eax, %eax
+       xor %ecx, %ecx
+       xor %edx, %edx
+-      xor %ebx, %ebx
+       xor %ebp, %ebp
+       xor %esi, %esi
+       xor %edi, %edi
+@@ -185,6 +190,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+       /* "POP" @regs. */
+       add $WORD_SIZE, %_ASM_SP
++      /*
++       * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
++       * the first unbalanced RET after vmexit!
++       *
++       * For retpoline, RSB filling is needed to prevent poisoned RSB entries
++       * and (in some cases) RSB underflow.
++       *
++       * eIBRS has its own protection against poisoned RSB, so it doesn't
++       * need the RSB filling sequence.  But it does need to be enabled
++       * before the first unbalanced RET.
++         */
++
++      FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++
++      pop %_ASM_ARG2  /* @flags */
++      pop %_ASM_ARG1  /* @vmx */
++
++      call vmx_spec_ctrl_restore_host
++
++      /* Put return value in AX */
++      mov %_ASM_BX, %_ASM_AX
++
+       pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+       pop %r12
+@@ -204,7 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+       ud2
+ .Lvmfail:
+       /* VM-Fail: set return value to 1 */
+-      mov $1, %eax
++      mov $1, %_ASM_BX
+       jmp .Lclear_regs
+ SYM_FUNC_END(__vmx_vcpu_run)
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -846,6 +846,14 @@ unsigned int __vmx_vcpu_run_flags(struct
+       if (vmx->loaded_vmcs->launched)
+               flags |= VMX_RUN_VMRESUME;
++      /*
++       * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
++       * to change it directly without causing a vmexit.  In that case read
++       * it after vmexit and store it in vmx->spec_ctrl.
++       */
++      if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
++              flags |= VMX_RUN_SAVE_SPEC_CTRL;
++
+       return flags;
+ }
+@@ -6824,6 +6832,26 @@ void noinstr vmx_update_host_rsp(struct
+       }
+ }
++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
++                                      unsigned int flags)
++{
++      u64 hostval = this_cpu_read(x86_spec_ctrl_current);
++
++      if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
++              return;
++
++      if (flags & VMX_RUN_SAVE_SPEC_CTRL)
++              vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
++
++      /*
++       * If the guest/host SPEC_CTRL values differ, restore the host value.
++       */
++      if (vmx->spec_ctrl != hostval)
++              native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
++
++      barrier_nospec();
++}
++
+ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ {
+       switch (to_vmx(vcpu)->exit_reason.basic) {
+@@ -6967,26 +6995,6 @@ static fastpath_t vmx_vcpu_run(struct kv
+       /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+       vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+-      /*
+-       * We do not use IBRS in the kernel. If this vCPU has used the
+-       * SPEC_CTRL MSR it may have left it on; save the value and
+-       * turn it off. This is much more efficient than blindly adding
+-       * it to the atomic save/restore list. Especially as the former
+-       * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+-       *
+-       * For non-nested case:
+-       * If the L01 MSR bitmap does not intercept the MSR, then we need to
+-       * save it.
+-       *
+-       * For nested case:
+-       * If the L02 MSR bitmap does not intercept the MSR, then we need to
+-       * save it.
+-       */
+-      if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+-              vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+-
+-      x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+-
+       /* All fields are clean at this point */
+       if (static_branch_unlikely(&enable_evmcs)) {
+               current_evmcs->hv_clean_fields |=
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -405,6 +405,7 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
+ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
+ bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
+                   unsigned int flags);
diff --git a/queue-5.18/kvm-vmx-prevent-rsb-underflow-before-vmenter.patch b/queue-5.18/kvm-vmx-prevent-rsb-underflow-before-vmenter.patch
new file mode 100644 (file)
index 0000000..1898c80
--- /dev/null
@@ -0,0 +1,175 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:16 +0200
+Subject: KVM: VMX: Prevent RSB underflow before vmenter
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 07853adc29a058c5fd143c14e5ac528448a72ed9 upstream.
+
+On VMX, there are some balanced returns between the time the guest's
+SPEC_CTRL value is written, and the vmenter.
+
+Balanced returns (matched by a preceding call) are usually ok, but it's
+at least theoretically possible an NMI with a deep call stack could
+empty the RSB before one of the returns.
+
+For maximum paranoia, don't allow *any* returns (balanced or otherwise)
+between the SPEC_CTRL write and the vmenter.
+
+  [ bp: Fix 32-bit build. ]
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: header conflict fixup at arch/x86/kernel/asm-offsets.c]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/asm-offsets.c   |    6 ++++++
+ arch/x86/kernel/cpu/bugs.c      |    4 ++--
+ arch/x86/kvm/vmx/capabilities.h |    4 ++--
+ arch/x86/kvm/vmx/vmenter.S      |   29 +++++++++++++++++++++++++++++
+ arch/x86/kvm/vmx/vmx.c          |    8 --------
+ arch/x86/kvm/vmx/vmx.h          |    4 ++--
+ arch/x86/kvm/vmx/vmx_ops.h      |    2 +-
+ 7 files changed, 42 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -18,6 +18,7 @@
+ #include <asm/bootparam.h>
+ #include <asm/suspend.h>
+ #include <asm/tlbflush.h>
++#include "../kvm/vmx/vmx.h"
+ #ifdef CONFIG_XEN
+ #include <xen/interface/xen.h>
+@@ -90,4 +91,9 @@ static void __used common(void)
+       OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+       OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
+       OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
++
++      if (IS_ENABLED(CONFIG_KVM_INTEL)) {
++              BLANK();
++              OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
++      }
+ }
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -196,8 +196,8 @@ void __init check_bugs(void)
+ }
+ /*
+- * NOTE: For VMX, this function is not called in the vmexit path.
+- * It uses vmx_spec_ctrl_restore_host() instead.
++ * NOTE: This function is *only* called for SVM.  VMX spec_ctrl handling is
++ * done in vmenter.S.
+  */
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+--- a/arch/x86/kvm/vmx/capabilities.h
++++ b/arch/x86/kvm/vmx/capabilities.h
+@@ -4,8 +4,8 @@
+ #include <asm/vmx.h>
+-#include "lapic.h"
+-#include "x86.h"
++#include "../lapic.h"
++#include "../x86.h"
+ extern bool __read_mostly enable_vpid;
+ extern bool __read_mostly flexpriority_enabled;
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -1,9 +1,11 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ #include <linux/linkage.h>
+ #include <asm/asm.h>
++#include <asm/asm-offsets.h>
+ #include <asm/bitsperlong.h>
+ #include <asm/kvm_vcpu_regs.h>
+ #include <asm/nospec-branch.h>
++#include <asm/percpu.h>
+ #include <asm/segment.h>
+ #include "run_flags.h"
+@@ -73,6 +75,33 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       lea (%_ASM_SP), %_ASM_ARG2
+       call vmx_update_host_rsp
++      ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
++
++      /*
++       * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
++       * host's, write the MSR.
++       *
++       * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
++       * there must not be any returns or indirect branches between this code
++       * and vmentry.
++       */
++      mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
++      movl VMX_spec_ctrl(%_ASM_DI), %edi
++      movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
++      cmp %edi, %esi
++      je .Lspec_ctrl_done
++      mov $MSR_IA32_SPEC_CTRL, %ecx
++      xor %edx, %edx
++      mov %edi, %eax
++      wrmsr
++
++.Lspec_ctrl_done:
++
++      /*
++       * Since vmentry is serializing on affected CPUs, there's no need for
++       * an LFENCE to stop speculation from skipping the wrmsr.
++       */
++
+       /* Load @regs to RAX. */
+       mov (%_ASM_SP), %_ASM_AX
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6989,14 +6989,6 @@ static fastpath_t vmx_vcpu_run(struct kv
+       kvm_wait_lapic_expire(vcpu);
+-      /*
+-       * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+-       * it's non-zero. Since vmentry is serialising on affected CPUs, there
+-       * is no need to worry about the conditional branch over the wrmsr
+-       * being speculatively taken.
+-       */
+-      x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+-
+       /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+       vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -8,11 +8,11 @@
+ #include <asm/intel_pt.h>
+ #include "capabilities.h"
+-#include "kvm_cache_regs.h"
++#include "../kvm_cache_regs.h"
+ #include "posted_intr.h"
+ #include "vmcs.h"
+ #include "vmx_ops.h"
+-#include "cpuid.h"
++#include "../cpuid.h"
+ #include "run_flags.h"
+ #define MSR_TYPE_R    1
+--- a/arch/x86/kvm/vmx/vmx_ops.h
++++ b/arch/x86/kvm/vmx/vmx_ops.h
+@@ -8,7 +8,7 @@
+ #include "evmcs.h"
+ #include "vmcs.h"
+-#include "x86.h"
++#include "../x86.h"
+ asmlinkage void vmread_error(unsigned long field, bool fault);
+ __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/queue-5.18/objtool-add-entry-unret-validation.patch b/queue-5.18/objtool-add-entry-unret-validation.patch
new file mode 100644 (file)
index 0000000..b11d790
--- /dev/null
@@ -0,0 +1,542 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Mon, 11 Jul 2022 10:51:17 -0300
+Subject: objtool: Add entry UNRET validation
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream.
+
+Since entry asm is tricky, add a validation pass that ensures the
+retbleed mitigation has been done before the first actual RET
+instruction.
+
+Entry points are those that either have UNWIND_HINT_ENTRY, which acts
+as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or
+those that have UWIND_HINT_IRET_REGS at +0.
+
+This is basically a variant of validate_branch() that is
+intra-function and it will simply follow all branches from marked
+entry points and ensures that all paths lead to ANNOTATE_UNRET_END.
+
+If a path hits RET or an indirection the path is a fail and will be
+reported.
+
+There are 3 ANNOTATE_UNRET_END instances:
+
+ - UNTRAIN_RET itself
+ - exception from-kernel; this path doesn't need UNTRAIN_RET
+ - all early exceptions; these also don't need UNTRAIN_RET
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: tools/objtool/builtin-check.c no link option validation]
+[cascardo: tools/objtool/check.c opts.ibt is ibt]
+[cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts]
+[cascardo: objtool is still called from scripts/link-vmlinux.sh]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S               |    3 
+ arch/x86/entry/entry_64_compat.S        |    6 -
+ arch/x86/include/asm/nospec-branch.h    |   12 ++
+ arch/x86/include/asm/unwind_hints.h     |    4 
+ arch/x86/kernel/head_64.S               |    5 
+ arch/x86/xen/xen-asm.S                  |   10 -
+ include/linux/objtool.h                 |    3 
+ scripts/link-vmlinux.sh                 |    3 
+ tools/include/linux/objtool.h           |    3 
+ tools/objtool/builtin-check.c           |    3 
+ tools/objtool/check.c                   |  177 ++++++++++++++++++++++++++++++--
+ tools/objtool/include/objtool/builtin.h |    2 
+ tools/objtool/include/objtool/check.h   |   11 +
+ 13 files changed, 220 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -85,7 +85,7 @@
+  */
+ SYM_CODE_START(entry_SYSCALL_64)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       swapgs
+@@ -1088,6 +1088,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ .Lerror_entry_done_lfence:
+       FENCE_SWAPGS_KERNEL_ENTRY
+       leaq    8(%rsp), %rax                   /* return pt_regs pointer */
++      ANNOTATE_UNRET_END
+       RET
+ .Lbstep_iret:
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -49,7 +49,7 @@
+  * 0(%ebp) arg6
+  */
+ SYM_CODE_START(entry_SYSENTER_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       /* Interrupts are off on entry. */
+       SWAPGS
+@@ -204,7 +204,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
+  * 0(%esp) arg6
+  */
+ SYM_CODE_START(entry_SYSCALL_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       /* Interrupts are off on entry. */
+       swapgs
+@@ -353,7 +353,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
+  * ebp  arg6
+  */
+ SYM_CODE_START(entry_INT80_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       /*
+        * Interrupts are off on entry.
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -82,6 +82,17 @@
+ #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+ /*
++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
++ * eventually turn into it's own annotation.
++ */
++.macro ANNOTATE_UNRET_END
++#ifdef CONFIG_DEBUG_ENTRY
++      ANNOTATE_RETPOLINE_SAFE
++      nop
++#endif
++.endm
++
++/*
+  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+  * indirect jmp/call which may be susceptible to the Spectre variant 2
+  * attack.
+@@ -131,6 +142,7 @@
+  */
+ .macro UNTRAIN_RET
+ #ifdef CONFIG_RETPOLINE
++      ANNOTATE_UNRET_END
+       ALTERNATIVE_2 "",                                               \
+                     "call zen_untrain_ret", X86_FEATURE_UNRET,        \
+                     "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -11,6 +11,10 @@
+       UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ .endm
++.macro UNWIND_HINT_ENTRY
++      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1
++.endm
++
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+       .if \base == %rsp
+               .if \indirect
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -334,6 +334,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
+       UNWIND_HINT_IRET_REGS offset=8
+       ENDBR
++      ANNOTATE_UNRET_END
++
+       /* Build pt_regs */
+       PUSH_AND_CLEAR_REGS
+@@ -393,6 +395,7 @@ SYM_CODE_END(early_idt_handler_array)
+ SYM_CODE_START_LOCAL(early_idt_handler_common)
+       UNWIND_HINT_IRET_REGS offset=16
++      ANNOTATE_UNRET_END
+       /*
+        * The stack is the hardware frame, an error code or zero, and the
+        * vector number.
+@@ -442,6 +445,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
+       UNWIND_HINT_IRET_REGS offset=8
+       ENDBR
++      ANNOTATE_UNRET_END
++
+       /* Build pt_regs */
+       PUSH_AND_CLEAR_REGS
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
+ .macro xen_pv_trap name
+ SYM_CODE_START(xen_\name)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       pop %rcx
+       pop %r11
+@@ -235,7 +235,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+ /* Normal 64-bit system call target */
+ SYM_CODE_START(xen_entry_SYSCALL_64)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       popq %rcx
+       popq %r11
+@@ -255,7 +255,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64)
+ /* 32-bit compat syscall target */
+ SYM_CODE_START(xen_entry_SYSCALL_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       popq %rcx
+       popq %r11
+@@ -273,7 +273,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat)
+ /* 32-bit compat sysenter target */
+ SYM_CODE_START(xen_entry_SYSENTER_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       /*
+        * NB: Xen is polite and clears TF from EFLAGS for us.  This means
+@@ -297,7 +297,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat)
+ SYM_CODE_START(xen_entry_SYSCALL_compat)
+ SYM_CODE_START(xen_entry_SYSENTER_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       ENDBR
+       lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
+       mov $-ENOSYS, %rax
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -32,11 +32,14 @@ struct unwind_hint {
+  *
+  * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+  * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+  */
+ #define UNWIND_HINT_TYPE_CALL         0
+ #define UNWIND_HINT_TYPE_REGS         1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
++#define UNWIND_HINT_TYPE_ENTRY                4
+ #ifdef CONFIG_STACK_VALIDATION
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -130,6 +130,9 @@ objtool_link()
+       if is_enabled CONFIG_VMLINUX_VALIDATION; then
+               objtoolopt="${objtoolopt} --noinstr"
++              if is_enabled CONFIG_RETPOLINE; then
++                      objtoolopt="${objtoolopt} --unret"
++              fi
+       fi
+       if [ -n "${objtoolopt}" ]; then
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -32,11 +32,14 @@ struct unwind_hint {
+  *
+  * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+  * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+  */
+ #define UNWIND_HINT_TYPE_CALL         0
+ #define UNWIND_HINT_TYPE_REGS         1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
++#define UNWIND_HINT_TYPE_ENTRY                4
+ #ifdef CONFIG_STACK_VALIDATION
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -21,7 +21,7 @@
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+      lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
+-     ibt;
++     ibt, unret;
+ static const char * const check_usage[] = {
+       "objtool check [<options>] file.o",
+@@ -37,6 +37,7 @@ const struct option check_options[] = {
+       OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+       OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++      OPT_BOOLEAN(0,   "unret", &unret, "validate entry unret placement"),
+       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+       OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+       OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2031,16 +2031,24 @@ static int read_unwind_hints(struct objt
+               insn->hint = true;
+-              if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
++              if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+                       struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
+-                      if (sym && sym->bind == STB_GLOBAL &&
+-                          insn->type != INSN_ENDBR && !insn->noendbr) {
+-                              WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
+-                                        insn->sec, insn->offset);
++                      if (sym && sym->bind == STB_GLOBAL) {
++                              if (ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
++                                      WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
++                                                insn->sec, insn->offset);
++                              }
++
++                              insn->entry = 1;
+                       }
+               }
++              if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
++                      hint->type = UNWIND_HINT_TYPE_CALL;
++                      insn->entry = 1;
++              }
++
+               if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+                       insn->cfi = &func_cfi;
+                       continue;
+@@ -2115,8 +2123,9 @@ static int read_retpoline_hints(struct o
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+                   insn->type != INSN_CALL_DYNAMIC &&
+-                  insn->type != INSN_RETURN) {
+-                      WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret",
++                  insn->type != INSN_RETURN &&
++                  insn->type != INSN_NOP) {
++                      WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+@@ -3412,8 +3421,8 @@ static int validate_branch(struct objtoo
+                       return 1;
+               }
+-              visited = 1 << state.uaccess;
+-              if (insn->visited) {
++              visited = VISITED_BRANCH << state.uaccess;
++              if (insn->visited & VISITED_BRANCH_MASK) {
+                       if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
+                               return 1;
+@@ -3642,6 +3651,145 @@ static int validate_unwind_hints(struct
+       return warnings;
+ }
++/*
++ * Validate rethunk entry constraint: must untrain RET before the first RET.
++ *
++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
++ * before an actual RET instruction.
++ */
++static int validate_entry(struct objtool_file *file, struct instruction *insn)
++{
++      struct instruction *next, *dest;
++      int ret, warnings = 0;
++
++      for (;;) {
++              next = next_insn_to_validate(file, insn);
++
++              if (insn->visited & VISITED_ENTRY)
++                      return 0;
++
++              insn->visited |= VISITED_ENTRY;
++
++              if (!insn->ignore_alts && !list_empty(&insn->alts)) {
++                      struct alternative *alt;
++                      bool skip_orig = false;
++
++                      list_for_each_entry(alt, &insn->alts, list) {
++                              if (alt->skip_orig)
++                                      skip_orig = true;
++
++                              ret = validate_entry(file, alt->insn);
++                              if (ret) {
++                                      if (backtrace)
++                                              BT_FUNC("(alt)", insn);
++                                      return ret;
++                              }
++                      }
++
++                      if (skip_orig)
++                              return 0;
++              }
++
++              switch (insn->type) {
++
++              case INSN_CALL_DYNAMIC:
++              case INSN_JUMP_DYNAMIC:
++              case INSN_JUMP_DYNAMIC_CONDITIONAL:
++                      WARN_FUNC("early indirect call", insn->sec, insn->offset);
++                      return 1;
++
++              case INSN_JUMP_UNCONDITIONAL:
++              case INSN_JUMP_CONDITIONAL:
++                      if (!is_sibling_call(insn)) {
++                              if (!insn->jump_dest) {
++                                      WARN_FUNC("unresolved jump target after linking?!?",
++                                                insn->sec, insn->offset);
++                                      return -1;
++                              }
++                              ret = validate_entry(file, insn->jump_dest);
++                              if (ret) {
++                                      if (backtrace) {
++                                              BT_FUNC("(branch%s)", insn,
++                                                      insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
++                                      }
++                                      return ret;
++                              }
++
++                              if (insn->type == INSN_JUMP_UNCONDITIONAL)
++                                      return 0;
++
++                              break;
++                      }
++
++                      /* fallthrough */
++              case INSN_CALL:
++                      dest = find_insn(file, insn->call_dest->sec,
++                                       insn->call_dest->offset);
++                      if (!dest) {
++                              WARN("Unresolved function after linking!?: %s",
++                                   insn->call_dest->name);
++                              return -1;
++                      }
++
++                      ret = validate_entry(file, dest);
++                      if (ret) {
++                              if (backtrace)
++                                      BT_FUNC("(call)", insn);
++                              return ret;
++                      }
++                      /*
++                       * If a call returns without error, it must have seen UNTRAIN_RET.
++                       * Therefore any non-error return is a success.
++                       */
++                      return 0;
++
++              case INSN_RETURN:
++                      WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
++                      return 1;
++
++              case INSN_NOP:
++                      if (insn->retpoline_safe)
++                              return 0;
++                      break;
++
++              default:
++                      break;
++              }
++
++              if (!next) {
++                      WARN_FUNC("teh end!", insn->sec, insn->offset);
++                      return -1;
++              }
++              insn = next;
++      }
++
++      return warnings;
++}
++
++/*
++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
++ * before RET.
++ */
++static int validate_unret(struct objtool_file *file)
++{
++      struct instruction *insn;
++      int ret, warnings = 0;
++
++      for_each_insn(file, insn) {
++              if (!insn->entry)
++                      continue;
++
++              ret = validate_entry(file, insn);
++              if (ret < 0) {
++                      WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
++                      return ret;
++              }
++              warnings += ret;
++      }
++
++      return warnings;
++}
++
+ static int validate_retpoline(struct objtool_file *file)
+ {
+       struct instruction *insn;
+@@ -4005,6 +4153,17 @@ int check(struct objtool_file *file)
+               goto out;
+       warnings += ret;
++      if (unret) {
++              /*
++               * Must be after validate_branch() and friends, it plays
++               * further games with insn->visited.
++               */
++              ret = validate_unret(file);
++              if (ret < 0)
++                      return ret;
++              warnings += ret;
++      }
++
+       if (ibt) {
+               ret = validate_ibt(file);
+               if (ret < 0)
+--- a/tools/objtool/include/objtool/builtin.h
++++ b/tools/objtool/include/objtool/builtin.h
+@@ -10,7 +10,7 @@
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+           lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
+-          ibt;
++          ibt, unret;
+ extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
+--- a/tools/objtool/include/objtool/check.h
++++ b/tools/objtool/include/objtool/check.h
+@@ -51,8 +51,10 @@ struct instruction {
+          ignore_alts  : 1,
+          hint         : 1,
+          retpoline_safe : 1,
+-         noendbr      : 1;
+-              /* 2 bit hole */
++         noendbr      : 1,
++         entry        : 1;
++              /* 1 bit hole */
++
+       s8 instr;
+       u8 visited;
+       /* u8 hole */
+@@ -69,6 +71,11 @@ struct instruction {
+       struct cfi_state *cfi;
+ };
++#define VISITED_BRANCH                0x01
++#define VISITED_BRANCH_UACCESS        0x02
++#define VISITED_BRANCH_MASK   0x03
++#define VISITED_ENTRY         0x04
++
+ static inline bool is_static_jump(struct instruction *insn)
+ {
+       return insn->type == INSN_JUMP_CONDITIONAL ||
diff --git a/queue-5.18/objtool-re-add-unwind_hint_-save_restore.patch b/queue-5.18/objtool-re-add-unwind_hint_-save_restore.patch
new file mode 100644 (file)
index 0000000..0342a84
--- /dev/null
@@ -0,0 +1,205 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Fri, 24 Jun 2022 12:52:40 +0200
+Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE}
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream.
+
+Commit
+
+  c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints")
+
+removed the save/restore unwind hints because they were no longer
+needed. Now they're going to be needed again so re-add them.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/unwind_hints.h   |   12 ++++++++--
+ include/linux/objtool.h               |    6 +++--
+ tools/include/linux/objtool.h         |    6 +++--
+ tools/objtool/check.c                 |   40 ++++++++++++++++++++++++++++++++++
+ tools/objtool/include/objtool/check.h |   19 ++++++++--------
+ 5 files changed, 68 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -8,11 +8,11 @@
+ #ifdef __ASSEMBLY__
+ .macro UNWIND_HINT_EMPTY
+-      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
++      UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+ .endm
+ .macro UNWIND_HINT_ENTRY
+-      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1
++      UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
+ .endm
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+@@ -56,6 +56,14 @@
+       UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
+ .endm
++.macro UNWIND_HINT_SAVE
++      UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
++.endm
++
++.macro UNWIND_HINT_RESTORE
++      UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
++.endm
++
+ #else
+ #define UNWIND_HINT_FUNC \
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -40,6 +40,8 @@ struct unwind_hint {
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
+ #define UNWIND_HINT_TYPE_ENTRY                4
++#define UNWIND_HINT_TYPE_SAVE         5
++#define UNWIND_HINT_TYPE_RESTORE      6
+ #ifdef CONFIG_STACK_VALIDATION
+@@ -125,7 +127,7 @@ struct unwind_hint {
+  * the debuginfo as necessary.  It will also warn if it sees any
+  * inconsistencies.
+  */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+       .pushsection .discard.unwind_hints
+               /* struct unwind_hint */
+@@ -178,7 +180,7 @@ struct unwind_hint {
+ #define ASM_REACHABLE
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ .macro STACK_FRAME_NON_STANDARD func:req
+ .endm
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -40,6 +40,8 @@ struct unwind_hint {
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
+ #define UNWIND_HINT_TYPE_ENTRY                4
++#define UNWIND_HINT_TYPE_SAVE         5
++#define UNWIND_HINT_TYPE_RESTORE      6
+ #ifdef CONFIG_STACK_VALIDATION
+@@ -125,7 +127,7 @@ struct unwind_hint {
+  * the debuginfo as necessary.  It will also warn if it sees any
+  * inconsistencies.
+  */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+       .pushsection .discard.unwind_hints
+               /* struct unwind_hint */
+@@ -178,7 +180,7 @@ struct unwind_hint {
+ #define ASM_REACHABLE
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ .macro STACK_FRAME_NON_STANDARD func:req
+ .endm
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2031,6 +2031,17 @@ static int read_unwind_hints(struct objt
+               insn->hint = true;
++              if (hint->type == UNWIND_HINT_TYPE_SAVE) {
++                      insn->hint = false;
++                      insn->save = true;
++                      continue;
++              }
++
++              if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
++                      insn->restore = true;
++                      continue;
++              }
++
+               if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+                       struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
+@@ -3436,6 +3447,35 @@ static int validate_branch(struct objtoo
+                       state.instr += insn->instr;
+               if (insn->hint) {
++                      if (insn->restore) {
++                              struct instruction *save_insn, *i;
++
++                              i = insn;
++                              save_insn = NULL;
++
++                              sym_for_each_insn_continue_reverse(file, func, i) {
++                                      if (i->save) {
++                                              save_insn = i;
++                                              break;
++                                      }
++                              }
++
++                              if (!save_insn) {
++                                      WARN_FUNC("no corresponding CFI save for CFI restore",
++                                                sec, insn->offset);
++                                      return 1;
++                              }
++
++                              if (!save_insn->visited) {
++                                      WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
++                                                sec, insn->offset);
++                                      return 1;
++                              }
++
++                              insn->cfi = save_insn->cfi;
++                              nr_cfi_reused++;
++                      }
++
+                       state.cfi = *insn->cfi;
+               } else {
+                       /* XXX track if we actually changed state.cfi */
+--- a/tools/objtool/include/objtool/check.h
++++ b/tools/objtool/include/objtool/check.h
+@@ -46,18 +46,19 @@ struct instruction {
+       enum insn_type type;
+       unsigned long immediate;
+-      u8 dead_end     : 1,
+-         ignore       : 1,
+-         ignore_alts  : 1,
+-         hint         : 1,
+-         retpoline_safe : 1,
+-         noendbr      : 1,
+-         entry        : 1;
+-              /* 1 bit hole */
++      u16 dead_end            : 1,
++         ignore               : 1,
++         ignore_alts          : 1,
++         hint                 : 1,
++         save                 : 1,
++         restore              : 1,
++         retpoline_safe       : 1,
++         noendbr              : 1,
++         entry                : 1;
++              /* 7 bit hole */
+       s8 instr;
+       u8 visited;
+-      /* u8 hole */
+       struct alt_group *alt_group;
+       struct symbol *call_dest;
diff --git a/queue-5.18/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch b/queue-5.18/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
new file mode 100644 (file)
index 0000000..2114807
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 8 Jul 2022 14:00:07 -0300
+Subject: objtool: skip non-text sections when adding return-thunk sites
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+The .discard.text section is added in order to reserve BRK, with a
+temporary function just so it can give it a size. This adds a relocation to
+the return thunk, which objtool will add to the .return_sites section.
+Linking will then fail as there are references to the .discard.text
+section.
+
+Do not add instructions from non-text sections to the list of return thunk
+calls, avoiding the reference to .discard.text.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1308,7 +1308,9 @@ static void add_return_call(struct objto
+       insn->type = INSN_RETURN;
+       insn->retpoline_safe = true;
+-      list_add_tail(&insn->call_node, &file->return_thunk_list);
++      /* Skip the non-text sections, specially .discard ones */
++      if (insn->sec->text)
++              list_add_tail(&insn->call_node, &file->return_thunk_list);
+ }
+ static bool same_function(struct instruction *insn1, struct instruction *insn2)
diff --git a/queue-5.18/objtool-treat-.text.__x86.-as-noinstr.patch b/queue-5.18/objtool-treat-.text.__x86.-as-noinstr.patch
new file mode 100644 (file)
index 0000000..c59bb4a
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:47 +0200
+Subject: objtool: Treat .text.__x86.* as noinstr
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream.
+
+Needed because zen_untrain_ret() will be called from noinstr code.
+
+Also makes sense since the thunks MUST NOT contain instrumentation nor
+be poked with dynamic instrumentation.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -374,7 +374,8 @@ static int decode_instructions(struct ob
+                       sec->text = true;
+               if (!strcmp(sec->name, ".noinstr.text") ||
+-                  !strcmp(sec->name, ".entry.text"))
++                  !strcmp(sec->name, ".entry.text") ||
++                  !strncmp(sec->name, ".text.__x86.", 12))
+                       sec->noinstr = true;
+               for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
diff --git a/queue-5.18/objtool-update-retpoline-validation.patch b/queue-5.18/objtool-update-retpoline-validation.patch
new file mode 100644 (file)
index 0000000..359ce5d
--- /dev/null
@@ -0,0 +1,110 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:59 +0200
+Subject: objtool: Update Retpoline validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream.
+
+Update retpoline validation with the new CONFIG_RETPOLINE requirement of
+not having bare naked RET instructions.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    6 ++++++
+ arch/x86/mm/mem_encrypt_boot.S       |    2 ++
+ arch/x86/xen/xen-head.S              |    1 +
+ tools/objtool/check.c                |   19 +++++++++++++------
+ 4 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -76,6 +76,12 @@
+ .endm
+ /*
++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
++ * vs RETBleed validation.
++ */
++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
++
++/*
+  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+  * indirect jmp/call which may be susceptible to the Spectre variant 2
+  * attack.
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute)
+       pop     %rbp
+       /* Offset to __x86_return_thunk would be wrong here */
++      ANNOTATE_UNRET_SAFE
+       ret
+       int3
+ SYM_FUNC_END(sme_encrypt_execute)
+@@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy)
+       pop     %r15
+       /* Offset to __x86_return_thunk would be wrong here */
++      ANNOTATE_UNRET_SAFE
+       ret
+       int3
+ .L__enc_copy_end:
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page)
+       .rept (PAGE_SIZE / 32)
+               UNWIND_HINT_FUNC
+               ANNOTATE_NOENDBR
++              ANNOTATE_UNRET_SAFE
+               ret
+               /*
+                * Xen will write the hypercall page, and sort out ENDBR.
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2114,8 +2114,9 @@ static int read_retpoline_hints(struct o
+               }
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+-                  insn->type != INSN_CALL_DYNAMIC) {
+-                      WARN_FUNC("retpoline_safe hint not an indirect jump/call",
++                  insn->type != INSN_CALL_DYNAMIC &&
++                  insn->type != INSN_RETURN) {
++                      WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+@@ -3648,7 +3649,8 @@ static int validate_retpoline(struct obj
+       for_each_insn(file, insn) {
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+-                  insn->type != INSN_CALL_DYNAMIC)
++                  insn->type != INSN_CALL_DYNAMIC &&
++                  insn->type != INSN_RETURN)
+                       continue;
+               if (insn->retpoline_safe)
+@@ -3663,9 +3665,14 @@ static int validate_retpoline(struct obj
+               if (!strcmp(insn->sec->name, ".init.text") && !module)
+                       continue;
+-              WARN_FUNC("indirect %s found in RETPOLINE build",
+-                        insn->sec, insn->offset,
+-                        insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++              if (insn->type == INSN_RETURN) {
++                      WARN_FUNC("'naked' return found in RETPOLINE build",
++                                insn->sec, insn->offset);
++              } else {
++                      WARN_FUNC("indirect %s found in RETPOLINE build",
++                                insn->sec, insn->offset,
++                                insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++              }
+               warnings++;
+       }
diff --git a/queue-5.18/series b/queue-5.18/series
new file mode 100644 (file)
index 0000000..4e855eb
--- /dev/null
@@ -0,0 +1,61 @@
+x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch
+x86-entry-switch-the-stack-after-error_entry-returns.patch
+x86-entry-move-push_and_clear_regs-out-of-error_entry.patch
+x86-entry-don-t-call-error_entry-for-xenpv.patch
+x86-entry-remove-skip_r11rcx.patch
+x86-kvm-vmx-make-noinstr-clean.patch
+x86-cpufeatures-move-retpoline-flags-to-word-11.patch
+x86-retpoline-cleanup-some-ifdefery.patch
+x86-retpoline-swizzle-retpoline-thunk.patch
+x86-retpoline-use-mfunction-return.patch
+x86-undo-return-thunk-damage.patch
+x86-objtool-create-.return_sites.patch
+objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
+x86-static_call-use-alternative-ret-encoding.patch
+x86-ftrace-use-alternative-ret-encoding.patch
+x86-bpf-use-alternative-ret-encoding.patch
+x86-kvm-fix-setcc-emulation-for-return-thunks.patch
+x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
+x86-sev-avoid-using-__x86_return_thunk.patch
+x86-use-return-thunk-in-asm-code.patch
+x86-entry-avoid-very-early-ret.patch
+objtool-treat-.text.__x86.-as-noinstr.patch
+x86-add-magic-amd-return-thunk.patch
+x86-bugs-report-amd-retbleed-vulnerability.patch
+x86-bugs-add-amd-retbleed-boot-parameter.patch
+x86-bugs-enable-stibp-for-jmp2ret.patch
+x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
+x86-entry-add-kernel-ibrs-implementation.patch
+x86-bugs-optimize-spec_ctrl-msr-writes.patch
+x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
+x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
+x86-bugs-report-intel-retbleed-vulnerability.patch
+intel_idle-disable-ibrs-during-long-idle.patch
+objtool-update-retpoline-validation.patch
+x86-xen-rename-sys-entry-points.patch
+x86-xen-add-untrain_ret.patch
+x86-bugs-add-retbleed-ibpb.patch
+x86-bugs-do-ibpb-fallback-check-only-once.patch
+objtool-add-entry-unret-validation.patch
+x86-cpu-amd-add-spectral-chicken.patch
+x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
+x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
+x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
+x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
+x86-speculation-remove-x86_spec_ctrl_mask.patch
+objtool-re-add-unwind_hint_-save_restore.patch
+kvm-vmx-flatten-__vmx_vcpu_run.patch
+kvm-vmx-convert-launched-argument-to-flags.patch
+kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
+kvm-vmx-fix-ibrs-handling-after-vmexit.patch
+x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
+kvm-vmx-prevent-rsb-underflow-before-vmenter.patch
+x86-common-stamp-out-the-stepping-madness.patch
+x86-cpu-amd-enumerate-btc_no.patch
+x86-retbleed-add-fine-grained-kconfig-knobs.patch
+x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
+x86-entry-move-push_and_clear_regs-back-into-error_entry.patch
+x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
+x86-kexec-disable-ret-on-kexec.patch
+x86-speculation-disable-rrsba-behavior.patch
+x86-static_call-serialize-__static_call_fixup-properly.patch
diff --git a/queue-5.18/x86-add-magic-amd-return-thunk.patch b/queue-5.18/x86-add-magic-amd-return-thunk.patch
new file mode 100644 (file)
index 0000000..4345926
--- /dev/null
@@ -0,0 +1,353 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 8 Jul 2022 14:00:36 -0300
+Subject: x86: Add magic AMD return-thunk
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream.
+
+Note: needs to be in a section distinct from Retpolines such that the
+Retpoline RET substitution cannot possibly use immediate jumps.
+
+ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
+little tricky but works due to the fact that zen_untrain_ret() doesn't
+have any stack ops and as such will emit a single ORC entry at the
+start (+0x3f).
+
+Meanwhile, unwinding an IP, including the __x86_return_thunk() one
+(+0x40) will search for the largest ORC entry smaller or equal to the
+IP, these will find the one ORC entry (+0x3f) and all works.
+
+  [ Alexandre: SVM part. ]
+  [ bp: Build fix, massages. ]
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflicts at arch/x86/entry/entry_64_compat.S]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S                |    6 ++
+ arch/x86/entry/entry_64_compat.S         |    4 +
+ arch/x86/include/asm/cpufeatures.h       |    1 
+ arch/x86/include/asm/disabled-features.h |    3 -
+ arch/x86/include/asm/nospec-branch.h     |   17 ++++++++
+ arch/x86/kernel/vmlinux.lds.S            |    2 
+ arch/x86/kvm/svm/vmenter.S               |   18 ++++++++
+ arch/x86/lib/retpoline.S                 |   64 +++++++++++++++++++++++++++++--
+ tools/objtool/check.c                    |   21 ++++++++--
+ 9 files changed, 127 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -96,6 +96,7 @@ SYM_CODE_START(entry_SYSCALL_64)
+ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
++      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER_DS                              /* pt_regs->ss */
+@@ -708,6 +709,7 @@ native_irq_return_ldt:
+       pushq   %rdi                            /* Stash user RDI */
+       swapgs                                  /* to kernel GS */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
++      UNTRAIN_RET
+       movq    PER_CPU_VAR(espfix_waddr), %rdi
+       movq    %rax, (0*8)(%rdi)               /* user RAX */
+@@ -903,6 +905,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * be retrieved from a kernel internal table.
+        */
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
++      UNTRAIN_RET
+       /*
+        * Handling GSBASE depends on the availability of FSGSBASE.
+@@ -1013,6 +1016,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       FENCE_SWAPGS_USER_ENTRY
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      UNTRAIN_RET
+       leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
+ .Lerror_entry_from_usermode_after_swapgs:
+@@ -1065,6 +1069,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       SWAPGS
+       FENCE_SWAPGS_USER_ENTRY
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      UNTRAIN_RET
+       /*
+        * Pretend that the exception came from user mode: set up pt_regs
+@@ -1160,6 +1165,7 @@ SYM_CODE_START(asm_exc_nmi)
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT_IRET_REGS base=%rdx offset=8
++      UNTRAIN_RET
+       pushq   5*8(%rdx)       /* pt_regs->ss */
+       pushq   4*8(%rdx)       /* pt_regs->rsp */
+       pushq   3*8(%rdx)       /* pt_regs->flags */
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -14,6 +14,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/nospec-branch.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+@@ -72,6 +73,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
+       pushq   $__USER32_CS            /* pt_regs->cs */
+       pushq   $0                      /* pt_regs->ip = 0 (placeholder) */
+ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
++      UNTRAIN_RET
+       /*
+        * User tracing code (ptrace or signal handlers) might assume that
+@@ -215,6 +217,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
+ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
++      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER32_DS            /* pt_regs->ss */
+@@ -382,6 +385,7 @@ SYM_CODE_START(entry_INT80_compat)
+       pushq   (%rdi)                  /* pt_regs->di */
+ .Lint80_keep_stack:
++      UNTRAIN_RET
+       pushq   %rsi                    /* pt_regs->si */
+       xorl    %esi, %esi              /* nospec   si */
+       pushq   %rdx                    /* pt_regs->dx */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -300,6 +300,7 @@
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ #define X86_FEATURE_RETHUNK           (11*32+14) /* "" Use REturn THUNK */
++#define X86_FEATURE_UNRET             (11*32+15) /* "" AMD BTB untrain return */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI          (12*32+ 4) /* AVX VNNI instructions */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -61,7 +61,8 @@
+ #else
+ # define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+                                (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
+-                               (1 << (X86_FEATURE_RETHUNK & 31)))
++                               (1 << (X86_FEATURE_RETHUNK & 31)) | \
++                               (1 << (X86_FEATURE_UNRET & 31)))
+ #endif
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -112,6 +112,22 @@
+ #endif
+ .endm
++/*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
++ * return thunk isn't mapped into the userspace tables (then again, AMD
++ * typically has NO_MELTDOWN).
++ *
++ * Doesn't clobber any registers but does require a stable stack.
++ *
++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
++ * where we have a stack but before any RET instruction.
++ */
++.macro UNTRAIN_RET
++#ifdef CONFIG_RETPOLINE
++      ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
++#endif
++.endm
++
+ #else /* __ASSEMBLY__ */
+ #define ANNOTATE_RETPOLINE_SAFE                                       \
+@@ -124,6 +140,7 @@ typedef u8 retpoline_thunk_t[RETPOLINE_T
+ extern retpoline_thunk_t __x86_indirect_thunk_array[];
+ extern void __x86_return_thunk(void);
++extern void zen_untrain_ret(void);
+ #ifdef CONFIG_RETPOLINE
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -141,7 +141,7 @@ SECTIONS
+ #ifdef CONFIG_RETPOLINE
+               __indirect_thunk_start = .;
+-              *(.text.__x86.indirect_thunk)
++              *(.text.__x86.*)
+               __indirect_thunk_end = .;
+ #endif
+       } :text =0xcccc
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run)
+ #endif
+       /*
++       * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
++       * untrained as soon as we exit the VM and are back to the
++       * kernel. This should be done before re-enabling interrupts
++       * because interrupt handlers won't sanitize 'ret' if the return is
++       * from the kernel.
++       */
++      UNTRAIN_RET
++
++      /*
+        * Clear all general purpose registers except RSP and RAX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+        * via the stack.  In theory, an L1 cache miss when restoring registers
+@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
+       FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ #endif
++      /*
++       * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
++       * untrained as soon as we exit the VM and are back to the
++       * kernel. This should be done before re-enabling interrupts
++       * because interrupt handlers won't sanitize RET if the return is
++       * from the kernel.
++       */
++      UNTRAIN_RET
++
+       pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -72,11 +72,67 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+  * This function name is magical and is used by -mfunction-return=thunk-extern
+  * for the compiler to generate JMPs to it.
+  */
+-SYM_CODE_START(__x86_return_thunk)
+-      UNWIND_HINT_EMPTY
+-      ANNOTATE_NOENDBR
++      .section .text.__x86.return_thunk
++
++/*
++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
++ *    alignment within the BTB.
++ * 2) The instruction at zen_untrain_ret must contain, and not
++ *    end with, the 0xc3 byte of the RET.
++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
++ *    from re-poisioning the BTB prediction.
++ */
++      .align 64
++      .skip 63, 0xcc
++SYM_FUNC_START_NOALIGN(zen_untrain_ret);
++
++      /*
++       * As executed from zen_untrain_ret, this is:
++       *
++       *   TEST $0xcc, %bl
++       *   LFENCE
++       *   JMP __x86_return_thunk
++       *
++       * Executing the TEST instruction has a side effect of evicting any BTB
++       * prediction (potentially attacker controlled) attached to the RET, as
++       * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
++       */
++      .byte   0xf6
++
++      /*
++       * As executed from __x86_return_thunk, this is a plain RET.
++       *
++       * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
++       *
++       * We subsequently jump backwards and architecturally execute the RET.
++       * This creates a correct BTB prediction (type=ret), but in the
++       * meantime we suffer Straight Line Speculation (because the type was
++       * no branch) which is halted by the INT3.
++       *
++       * With SMT enabled and STIBP active, a sibling thread cannot poison
++       * RET's prediction to a type of its choice, but can evict the
++       * prediction due to competitive sharing. If the prediction is
++       * evicted, __x86_return_thunk will suffer Straight Line Speculation
++       * which will be contained safely by the INT3.
++       */
++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+       ret
+       int3
+ SYM_CODE_END(__x86_return_thunk)
+-__EXPORT_THUNK(__x86_return_thunk)
++      /*
++       * Ensure the TEST decoding / BTB invalidation is complete.
++       */
++      lfence
++
++      /*
++       * Jump back and execute the RET in the middle of the TEST instruction.
++       * INT3 is for SLS protection.
++       */
++      jmp __x86_return_thunk
++      int3
++SYM_FUNC_END(zen_untrain_ret)
++__EXPORT_THUNK(zen_untrain_ret)
++
++EXPORT_SYMBOL(__x86_return_thunk)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1300,7 +1300,7 @@ static void add_retpoline_call(struct ob
+       annotate_call_site(file, insn, false);
+ }
+-static void add_return_call(struct objtool_file *file, struct instruction *insn)
++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+ {
+       /*
+        * Return thunk tail calls are really just returns in disguise,
+@@ -1310,7 +1310,7 @@ static void add_return_call(struct objto
+       insn->retpoline_safe = true;
+       /* Skip the non-text sections, specially .discard ones */
+-      if (insn->sec->text)
++      if (add && insn->sec->text)
+               list_add_tail(&insn->call_node, &file->return_thunk_list);
+ }
+@@ -1367,7 +1367,7 @@ static int add_jump_destinations(struct
+                       add_retpoline_call(file, insn);
+                       continue;
+               } else if (reloc->sym->return_thunk) {
+-                      add_return_call(file, insn);
++                      add_return_call(file, insn, true);
+                       continue;
+               } else if (insn->func) {
+                       /*
+@@ -1387,6 +1387,21 @@ static int add_jump_destinations(struct
+               jump_dest = find_insn(file, dest_sec, dest_off);
+               if (!jump_dest) {
++                      struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
++
++                      /*
++                       * This is a special case for zen_untrain_ret().
++                       * It jumps to __x86_return_thunk(), but objtool
++                       * can't find the thunk's starting RET
++                       * instruction, because the RET is also in the
++                       * middle of another instruction.  Objtool only
++                       * knows about the outer instruction.
++                       */
++                      if (sym && sym->return_thunk) {
++                              add_return_call(file, insn, false);
++                              continue;
++                      }
++
+                       WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+                                 insn->sec, insn->offset, dest_sec->name,
+                                 dest_off);
diff --git a/queue-5.18/x86-bpf-use-alternative-ret-encoding.patch b/queue-5.18/x86-bpf-use-alternative-ret-encoding.patch
new file mode 100644 (file)
index 0000000..a1f1209
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:41 +0200
+Subject: x86/bpf: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream.
+
+Use the return thunk in eBPF generated code, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -420,6 +420,21 @@ static void emit_indirect_jump(u8 **ppro
+       *pprog = prog;
+ }
++static void emit_return(u8 **pprog, u8 *ip)
++{
++      u8 *prog = *pprog;
++
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
++              emit_jump(&prog, &__x86_return_thunk, ip);
++      } else {
++              EMIT1(0xC3);            /* ret */
++              if (IS_ENABLED(CONFIG_SLS))
++                      EMIT1(0xCC);    /* int3 */
++      }
++
++      *pprog = prog;
++}
++
+ /*
+  * Generate the following code:
+  *
+@@ -1680,7 +1695,7 @@ emit_jmp:
+                       ctx->cleanup_addr = proglen;
+                       pop_callee_regs(&prog, callee_regs_used);
+                       EMIT1(0xC9);         /* leave */
+-                      EMIT1(0xC3);         /* ret */
++                      emit_return(&prog, image + addrs[i - 1] + (prog - temp));
+                       break;
+               default:
+@@ -2157,7 +2172,7 @@ int arch_prepare_bpf_trampoline(struct b
+       if (flags & BPF_TRAMP_F_SKIP_FRAME)
+               /* skip our return address and return to parent */
+               EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+-      EMIT1(0xC3); /* ret */
++      emit_return(&prog, prog);
+       /* Make sure the trampoline generation logic doesn't overflow */
+       if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+               ret = -EFAULT;
diff --git a/queue-5.18/x86-bugs-add-amd-retbleed-boot-parameter.patch b/queue-5.18/x86-bugs-add-amd-retbleed-boot-parameter.patch
new file mode 100644 (file)
index 0000000..a972905
--- /dev/null
@@ -0,0 +1,207 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+Date: Tue, 14 Jun 2022 23:15:50 +0200
+Subject: x86/bugs: Add AMD retbleed= boot parameter
+
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+
+commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream.
+
+Add the "retbleed=<value>" boot parameter to select a mitigation for
+RETBleed. Possible values are "off", "auto" and "unret"
+(JMP2RET mitigation). The default value is "auto".
+
+Currently, "retbleed=auto" will select the unret mitigation on
+AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on
+Intel).
+
+  [peterz: rebase; add hygon]
+  [jpoimboe: cleanups]
+
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   15 +++
+ arch/x86/Kconfig                                |    3 
+ arch/x86/kernel/cpu/bugs.c                      |  108 +++++++++++++++++++++++-
+ 3 files changed, 125 insertions(+), 1 deletion(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5124,6 +5124,21 @@
+       retain_initrd   [RAM] Keep initrd memory after extraction
++      retbleed=       [X86] Control mitigation of RETBleed (Arbitrary
++                      Speculative Code Execution with Return Instructions)
++                      vulnerability.
++
++                      off         - unconditionally disable
++                      auto        - automatically select a migitation
++                      unret       - force enable untrained return thunks,
++                                    only effective on AMD Zen {1,2}
++                                    based systems.
++
++                      Selecting 'auto' will choose a mitigation method at run
++                      time according to the CPU.
++
++                      Not specifying this option is equivalent to retbleed=auto.
++
+       rfkill.default_state=
+               0       "airplane mode".  All wifi, bluetooth, wimax, gps, fm,
+                       etc. communication is blocked by default.
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -469,6 +469,9 @@ config RETPOLINE
+ config CC_HAS_SLS
+       def_bool $(cc-option,-mharden-sls=all)
++config CC_HAS_RETURN_THUNK
++      def_bool $(cc-option,-mfunction-return=thunk-extern)
++
+ config SLS
+       bool "Mitigate Straight-Line-Speculation"
+       depends on CC_HAS_SLS && X86_64
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -37,6 +37,7 @@
+ #include "cpu.h"
+ static void __init spectre_v1_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+@@ -120,6 +121,12 @@ void __init check_bugs(void)
+       /* Select the proper CPU mitigations before patching alternatives: */
+       spectre_v1_select_mitigation();
++      retbleed_select_mitigation();
++      /*
++       * spectre_v2_select_mitigation() relies on the state set by
++       * retbleed_select_mitigation(); specifically the STIBP selection is
++       * forced for UNRET.
++       */
+       spectre_v2_select_mitigation();
+       ssb_select_mitigation();
+       l1tf_select_mitigation();
+@@ -746,6 +753,100 @@ static int __init nospectre_v1_cmdline(c
+ early_param("nospectre_v1", nospectre_v1_cmdline);
+ #undef pr_fmt
++#define pr_fmt(fmt)     "RETBleed: " fmt
++
++enum retbleed_mitigation {
++      RETBLEED_MITIGATION_NONE,
++      RETBLEED_MITIGATION_UNRET,
++};
++
++enum retbleed_mitigation_cmd {
++      RETBLEED_CMD_OFF,
++      RETBLEED_CMD_AUTO,
++      RETBLEED_CMD_UNRET,
++};
++
++const char * const retbleed_strings[] = {
++      [RETBLEED_MITIGATION_NONE]      = "Vulnerable",
++      [RETBLEED_MITIGATION_UNRET]     = "Mitigation: untrained return thunk",
++};
++
++static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
++      RETBLEED_MITIGATION_NONE;
++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
++      RETBLEED_CMD_AUTO;
++
++static int __init retbleed_parse_cmdline(char *str)
++{
++      if (!str)
++              return -EINVAL;
++
++      if (!strcmp(str, "off"))
++              retbleed_cmd = RETBLEED_CMD_OFF;
++      else if (!strcmp(str, "auto"))
++              retbleed_cmd = RETBLEED_CMD_AUTO;
++      else if (!strcmp(str, "unret"))
++              retbleed_cmd = RETBLEED_CMD_UNRET;
++      else
++              pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
++
++      return 0;
++}
++early_param("retbleed", retbleed_parse_cmdline);
++
++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++
++static void __init retbleed_select_mitigation(void)
++{
++      if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
++              return;
++
++      switch (retbleed_cmd) {
++      case RETBLEED_CMD_OFF:
++              return;
++
++      case RETBLEED_CMD_UNRET:
++              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              break;
++
++      case RETBLEED_CMD_AUTO:
++      default:
++              if (!boot_cpu_has_bug(X86_BUG_RETBLEED))
++                      break;
++
++              if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              break;
++      }
++
++      switch (retbleed_mitigation) {
++      case RETBLEED_MITIGATION_UNRET:
++
++              if (!IS_ENABLED(CONFIG_RETPOLINE) ||
++                  !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
++                      pr_err(RETBLEED_COMPILER_MSG);
++                      retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++                      break;
++              }
++
++              setup_force_cpu_cap(X86_FEATURE_RETHUNK);
++              setup_force_cpu_cap(X86_FEATURE_UNRET);
++
++              if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++                  boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++                      pr_err(RETBLEED_UNTRAIN_MSG);
++              break;
++
++      default:
++              break;
++      }
++
++      pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
++}
++
++#undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V2 : " fmt
+ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+@@ -1989,7 +2090,12 @@ static ssize_t srbds_show_state(char *bu
+ static ssize_t retbleed_show_state(char *buf)
+ {
+-      return sprintf(buf, "Vulnerable\n");
++      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET &&
++          (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++           boot_cpu_data.x86_vendor != X86_VENDOR_HYGON))
++              return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++
++      return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
diff --git a/queue-5.18/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch b/queue-5.18/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
new file mode 100644 (file)
index 0000000..5964a5e
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 6 Jul 2022 15:01:15 -0700
+Subject: x86/bugs: Add Cannon lake to RETBleed affected CPU list
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream.
+
+Cannon lake is also affected by RETBleed, add it to the list.
+
+Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability")
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1268,6 +1268,7 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(CANNONLAKE_L,    X86_STEPPING_ANY,               RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
diff --git a/queue-5.18/x86-bugs-add-retbleed-ibpb.patch b/queue-5.18/x86-bugs-add-retbleed-ibpb.patch
new file mode 100644 (file)
index 0000000..ebc584d
--- /dev/null
@@ -0,0 +1,253 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:02 +0200
+Subject: x86/bugs: Add retbleed=ibpb
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream.
+
+jmp2ret mitigates the easy-to-attack case at relatively low overhead.
+It mitigates the long speculation windows after a mispredicted RET, but
+it does not mitigate the short speculation window from arbitrary
+instruction boundaries.
+
+On Zen2, there is a chicken bit which needs setting, which mitigates
+"arbitrary instruction boundaries" down to just "basic block boundaries".
+
+But there is no fix for the short speculation window on basic block
+boundaries, other than to flush the entire BTB to evict all attacker
+predictions.
+
+On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP
+or no-SMT):
+
+  1) Nothing           System wide open
+  2) jmp2ret           May stop a script kiddy
+  3) jmp2ret+chickenbit  Raises the bar rather further
+  4) IBPB              Only thing which can count as "safe".
+
+Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit
+on Zen1 according to lmbench.
+
+  [ bp: Fixup feature bit comments, document option, 32-bit build fix. ]
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    3 +
+ arch/x86/entry/Makefile                         |    2 -
+ arch/x86/entry/entry.S                          |   22 ++++++++++++
+ arch/x86/include/asm/cpufeatures.h              |    2 -
+ arch/x86/include/asm/nospec-branch.h            |    8 +++-
+ arch/x86/kernel/cpu/bugs.c                      |   43 ++++++++++++++++++------
+ 6 files changed, 67 insertions(+), 13 deletions(-)
+ create mode 100644 arch/x86/entry/entry.S
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5134,6 +5134,9 @@
+                                      disabling SMT if necessary for
+                                      the full mitigation (only on Zen1
+                                      and older without STIBP).
++                      ibpb         - mitigate short speculation windows on
++                                     basic block boundaries too. Safe, highest
++                                     perf impact.
+                       unret        - force enable untrained return thunks,
+                                      only effective on AMD f15h-f17h
+                                      based systems.
+--- a/arch/x86/entry/Makefile
++++ b/arch/x86/entry/Makefile
+@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o               = $(CC_FLAGS_FTR
+ CFLAGS_common.o                       += -fno-stack-protector
+-obj-y                         := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
++obj-y                         := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+ obj-y                         += common.o
+ obj-y                         += vdso/
+--- /dev/null
++++ b/arch/x86/entry/entry.S
+@@ -0,0 +1,22 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Common place for both 32- and 64-bit entry routines.
++ */
++
++#include <linux/linkage.h>
++#include <asm/export.h>
++#include <asm/msr-index.h>
++
++.pushsection .noinstr.text, "ax"
++
++SYM_FUNC_START(entry_ibpb)
++      movl    $MSR_IA32_PRED_CMD, %ecx
++      movl    $PRED_CMD_IBPB, %eax
++      xorl    %edx, %edx
++      wrmsr
++      RET
++SYM_FUNC_END(entry_ibpb)
++/* For KVM */
++EXPORT_SYMBOL_GPL(entry_ibpb);
++
++.popsection
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -295,7 +295,7 @@
+ #define X86_FEATURE_PER_THREAD_MBA    (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+ #define X86_FEATURE_SGX1              (11*32+ 8) /* "" Basic SGX */
+ #define X86_FEATURE_SGX2              (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+-/* FREE!                              (11*32+10) */
++#define X86_FEATURE_ENTRY_IBPB                (11*32+10) /* "" Issue an IBPB on kernel entry */
+ /* FREE!                              (11*32+11) */
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -123,14 +123,17 @@
+  * return thunk isn't mapped into the userspace tables (then again, AMD
+  * typically has NO_MELTDOWN).
+  *
+- * Doesn't clobber any registers but does require a stable stack.
++ * While zen_untrain_ret() doesn't clobber anything but requires stack,
++ * entry_ibpb() will clobber AX, CX, DX.
+  *
+  * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+  * where we have a stack but before any RET instruction.
+  */
+ .macro UNTRAIN_RET
+ #ifdef CONFIG_RETPOLINE
+-      ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
++      ALTERNATIVE_2 "",                                               \
++                    "call zen_untrain_ret", X86_FEATURE_UNRET,        \
++                    "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+@@ -147,6 +150,7 @@ extern retpoline_thunk_t __x86_indirect_
+ extern void __x86_return_thunk(void);
+ extern void zen_untrain_ret(void);
++extern void entry_ibpb(void);
+ #ifdef CONFIG_RETPOLINE
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -798,6 +798,7 @@ static enum spectre_v2_mitigation spectr
+ enum retbleed_mitigation {
+       RETBLEED_MITIGATION_NONE,
+       RETBLEED_MITIGATION_UNRET,
++      RETBLEED_MITIGATION_IBPB,
+       RETBLEED_MITIGATION_IBRS,
+       RETBLEED_MITIGATION_EIBRS,
+ };
+@@ -806,11 +807,13 @@ enum retbleed_mitigation_cmd {
+       RETBLEED_CMD_OFF,
+       RETBLEED_CMD_AUTO,
+       RETBLEED_CMD_UNRET,
++      RETBLEED_CMD_IBPB,
+ };
+ const char * const retbleed_strings[] = {
+       [RETBLEED_MITIGATION_NONE]      = "Vulnerable",
+       [RETBLEED_MITIGATION_UNRET]     = "Mitigation: untrained return thunk",
++      [RETBLEED_MITIGATION_IBPB]      = "Mitigation: IBPB",
+       [RETBLEED_MITIGATION_IBRS]      = "Mitigation: IBRS",
+       [RETBLEED_MITIGATION_EIBRS]     = "Mitigation: Enhanced IBRS",
+ };
+@@ -840,6 +843,8 @@ static int __init retbleed_parse_cmdline
+                       retbleed_cmd = RETBLEED_CMD_AUTO;
+               } else if (!strcmp(str, "unret")) {
+                       retbleed_cmd = RETBLEED_CMD_UNRET;
++              } else if (!strcmp(str, "ibpb")) {
++                      retbleed_cmd = RETBLEED_CMD_IBPB;
+               } else if (!strcmp(str, "nosmt")) {
+                       retbleed_nosmt = true;
+               } else {
+@@ -854,11 +859,13 @@ static int __init retbleed_parse_cmdline
+ early_param("retbleed", retbleed_parse_cmdline);
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n"
+ #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+ static void __init retbleed_select_mitigation(void)
+ {
++      bool mitigate_smt = false;
++
+       if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+               return;
+@@ -870,11 +877,21 @@ static void __init retbleed_select_mitig
+               retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+               break;
++      case RETBLEED_CMD_IBPB:
++              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              break;
++
+       case RETBLEED_CMD_AUTO:
+       default:
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+-                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+-                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++
++                      if (IS_ENABLED(CONFIG_RETPOLINE) &&
++                          IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK))
++                              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                      else
++                              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              }
+               /*
+                * The Intel mitigation (IBRS) was already selected in
+@@ -890,26 +907,34 @@ static void __init retbleed_select_mitig
+               if (!IS_ENABLED(CONFIG_RETPOLINE) ||
+                   !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
+                       pr_err(RETBLEED_COMPILER_MSG);
+-                      retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+-                      break;
++                      retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++                      goto retbleed_force_ibpb;
+               }
+               setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+               setup_force_cpu_cap(X86_FEATURE_UNRET);
+-              if (!boot_cpu_has(X86_FEATURE_STIBP) &&
+-                  (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
+-                      cpu_smt_disable(false);
+-
+               if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+                   boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+                       pr_err(RETBLEED_UNTRAIN_MSG);
++
++              mitigate_smt = true;
++              break;
++
++      case RETBLEED_MITIGATION_IBPB:
++retbleed_force_ibpb:
++              setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++              mitigate_smt = true;
+               break;
+       default:
+               break;
+       }
++      if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
++          (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++              cpu_smt_disable(false);
++
+       /*
+        * Let IBRS trump all on Intel without affecting the effects of the
+        * retbleed= cmdline option.
diff --git a/queue-5.18/x86-bugs-do-ibpb-fallback-check-only-once.patch b/queue-5.18/x86-bugs-do-ibpb-fallback-check-only-once.patch
new file mode 100644 (file)
index 0000000..2667f72
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 15:07:19 -0700
+Subject: x86/bugs: Do IBPB fallback check only once
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream.
+
+When booting with retbleed=auto, if the kernel wasn't built with
+CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB.  Make
+sure a warning is printed in that case.  The IBPB fallback check is done
+twice, but it really only needs to be done once.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -884,18 +884,13 @@ static void __init retbleed_select_mitig
+       case RETBLEED_CMD_AUTO:
+       default:
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+-                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+-
+-                      if (IS_ENABLED(CONFIG_RETPOLINE) &&
+-                          IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK))
+-                              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+-                      else
+-                              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+-              }
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+               /*
+-               * The Intel mitigation (IBRS) was already selected in
+-               * spectre_v2_select_mitigation().
++               * The Intel mitigation (IBRS or eIBRS) was already selected in
++               * spectre_v2_select_mitigation().  'retbleed_mitigation' will
++               * be set accordingly below.
+                */
+               break;
diff --git a/queue-5.18/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch b/queue-5.18/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
new file mode 100644 (file)
index 0000000..41b4a18
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Thu, 7 Jul 2022 13:41:52 -0300
+Subject: x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream.
+
+There are some VM configurations which have Skylake model but do not
+support IBPB. In those cases, when using retbleed=ibpb, userspace is going
+to be killed and kernel is going to panic.
+
+If the CPU does not support IBPB, warn and proceed with the auto option. Also,
+do not fallback to IBPB on AMD/Hygon systems if it is not supported.
+
+Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb")
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -858,7 +858,10 @@ static void __init retbleed_select_mitig
+               break;
+       case RETBLEED_CMD_IBPB:
+-              if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++              if (!boot_cpu_has(X86_FEATURE_IBPB)) {
++                      pr_err("WARNING: CPU does not support IBPB.\n");
++                      goto do_cmd_auto;
++              } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+                       retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+@@ -873,7 +876,7 @@ do_cmd_auto:
+                   boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+                       if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+                               retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+-                      else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY))
++                      else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+                               retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+               }
diff --git a/queue-5.18/x86-bugs-enable-stibp-for-jmp2ret.patch b/queue-5.18/x86-bugs-enable-stibp-for-jmp2ret.patch
new file mode 100644 (file)
index 0000000..8e69450
--- /dev/null
@@ -0,0 +1,142 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 14 Jun 2022 23:15:51 +0200
+Subject: x86/bugs: Enable STIBP for JMP2RET
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream.
+
+For untrained return thunks to be fully effective, STIBP must be enabled
+or SMT disabled.
+
+Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   16 ++++--
+ arch/x86/kernel/cpu/bugs.c                      |   58 +++++++++++++++++++-----
+ 2 files changed, 57 insertions(+), 17 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5128,11 +5128,17 @@
+                       Speculative Code Execution with Return Instructions)
+                       vulnerability.
+-                      off         - unconditionally disable
+-                      auto        - automatically select a migitation
+-                      unret       - force enable untrained return thunks,
+-                                    only effective on AMD Zen {1,2}
+-                                    based systems.
++                      off          - no mitigation
++                      auto         - automatically select a migitation
++                      auto,nosmt   - automatically select a mitigation,
++                                     disabling SMT if necessary for
++                                     the full mitigation (only on Zen1
++                                     and older without STIBP).
++                      unret        - force enable untrained return thunks,
++                                     only effective on AMD f15h-f17h
++                                     based systems.
++                      unret,nosmt  - like unret, will disable SMT when STIBP
++                                     is not available.
+                       Selecting 'auto' will choose a mitigation method at run
+                       time according to the CPU.
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -776,19 +776,34 @@ static enum retbleed_mitigation retbleed
+ static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+       RETBLEED_CMD_AUTO;
++static int __ro_after_init retbleed_nosmt = false;
++
+ static int __init retbleed_parse_cmdline(char *str)
+ {
+       if (!str)
+               return -EINVAL;
+-      if (!strcmp(str, "off"))
+-              retbleed_cmd = RETBLEED_CMD_OFF;
+-      else if (!strcmp(str, "auto"))
+-              retbleed_cmd = RETBLEED_CMD_AUTO;
+-      else if (!strcmp(str, "unret"))
+-              retbleed_cmd = RETBLEED_CMD_UNRET;
+-      else
+-              pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
++      while (str) {
++              char *next = strchr(str, ',');
++              if (next) {
++                      *next = 0;
++                      next++;
++              }
++
++              if (!strcmp(str, "off")) {
++                      retbleed_cmd = RETBLEED_CMD_OFF;
++              } else if (!strcmp(str, "auto")) {
++                      retbleed_cmd = RETBLEED_CMD_AUTO;
++              } else if (!strcmp(str, "unret")) {
++                      retbleed_cmd = RETBLEED_CMD_UNRET;
++              } else if (!strcmp(str, "nosmt")) {
++                      retbleed_nosmt = true;
++              } else {
++                      pr_err("Ignoring unknown retbleed option (%s).", str);
++              }
++
++              str = next;
++      }
+       return 0;
+ }
+@@ -834,6 +849,10 @@ static void __init retbleed_select_mitig
+               setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+               setup_force_cpu_cap(X86_FEATURE_UNRET);
++              if (!boot_cpu_has(X86_FEATURE_STIBP) &&
++                  (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++                      cpu_smt_disable(false);
++
+               if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+                   boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+                       pr_err(RETBLEED_UNTRAIN_MSG);
+@@ -1080,6 +1099,13 @@ spectre_v2_user_select_mitigation(enum s
+           boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+               mode = SPECTRE_V2_USER_STRICT_PREFERRED;
++      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
++              if (mode != SPECTRE_V2_USER_STRICT &&
++                  mode != SPECTRE_V2_USER_STRICT_PREFERRED)
++                      pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n");
++              mode = SPECTRE_V2_USER_STRICT_PREFERRED;
++      }
++
+       spectre_v2_user_stibp = mode;
+ set_mode:
+@@ -2090,10 +2116,18 @@ static ssize_t srbds_show_state(char *bu
+ static ssize_t retbleed_show_state(char *buf)
+ {
+-      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET &&
+-          (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+-           boot_cpu_data.x86_vendor != X86_VENDOR_HYGON))
+-              return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
++          if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++              boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++                  return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++
++          return sprintf(buf, "%s; SMT %s\n",
++                         retbleed_strings[retbleed_mitigation],
++                         !sched_smt_active() ? "disabled" :
++                         spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
++                         spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
++                         "enabled with STIBP protection" : "vulnerable");
++      }
+       return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+ }
diff --git a/queue-5.18/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch b/queue-5.18/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
new file mode 100644 (file)
index 0000000..3b79552
--- /dev/null
@@ -0,0 +1,118 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:52 +0200
+Subject: x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream.
+
+Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can
+differ from x86_spec_ctrl_base. As such, keep a per-CPU value
+reflecting the current task's MSR content.
+
+  [jpoimboe: rename]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    1 +
+ arch/x86/kernel/cpu/bugs.c           |   28 +++++++++++++++++++++++-----
+ arch/x86/kernel/process.c            |    2 +-
+ 3 files changed, 25 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -253,6 +253,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++extern void write_spec_ctrl_current(u64 val);
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -49,12 +49,30 @@ static void __init mmio_select_mitigatio
+ static void __init srbds_select_mitigation(void);
+ static void __init l1d_flush_select_mitigation(void);
+-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
++/* The base value of the SPEC_CTRL MSR without task-specific bits set */
+ u64 x86_spec_ctrl_base;
+ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++
++/* The current value of the SPEC_CTRL MSR with task-specific bits set */
++DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
++
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+ /*
++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
++ */
++void write_spec_ctrl_current(u64 val)
++{
++      if (this_cpu_read(x86_spec_ctrl_current) == val)
++              return;
++
++      this_cpu_write(x86_spec_ctrl_current, val);
++      wrmsrl(MSR_IA32_SPEC_CTRL, val);
++}
++
++/*
+  * The vendor and possibly platform specific bits which can be modified in
+  * x86_spec_ctrl_base.
+  */
+@@ -1272,7 +1290,7 @@ static void __init spectre_v2_select_mit
+       if (spectre_v2_in_eibrs_mode(mode)) {
+               /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+-              wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base);
+       }
+       switch (mode) {
+@@ -1327,7 +1345,7 @@ static void __init spectre_v2_select_mit
+ static void update_stibp_msr(void * __unused)
+ {
+-      wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++      write_spec_ctrl_current(x86_spec_ctrl_base);
+ }
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1570,7 +1588,7 @@ static enum ssb_mitigation __init __ssb_
+                       x86_amd_ssb_disable();
+               } else {
+                       x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+-                      wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++                      write_spec_ctrl_current(x86_spec_ctrl_base);
+               }
+       }
+@@ -1821,7 +1839,7 @@ int arch_prctl_spec_ctrl_get(struct task
+ void x86_spec_ctrl_setup_ap(void)
+ {
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+-              wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base);
+       if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+               x86_amd_ssb_disable();
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -600,7 +600,7 @@ static __always_inline void __speculatio
+       }
+       if (updmsr)
+-              wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++              write_spec_ctrl_current(msr);
+ }
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/queue-5.18/x86-bugs-optimize-spec_ctrl-msr-writes.patch b/queue-5.18/x86-bugs-optimize-spec_ctrl-msr-writes.patch
new file mode 100644 (file)
index 0000000..4eb5f84
--- /dev/null
@@ -0,0 +1,108 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:54 +0200
+Subject: x86/bugs: Optimize SPEC_CTRL MSR writes
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream.
+
+When changing SPEC_CTRL for user control, the WRMSR can be delayed
+until return-to-user when KERNEL_IBRS has been enabled.
+
+This avoids an MSR write during context switch.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    2 +-
+ arch/x86/kernel/cpu/bugs.c           |   18 ++++++++++++------
+ arch/x86/kernel/process.c            |    2 +-
+ 3 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -253,7 +253,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
+-extern void write_spec_ctrl_current(u64 val);
++extern void write_spec_ctrl_current(u64 val, bool force);
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -63,13 +63,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
+  * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+  * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
+  */
+-void write_spec_ctrl_current(u64 val)
++void write_spec_ctrl_current(u64 val, bool force)
+ {
+       if (this_cpu_read(x86_spec_ctrl_current) == val)
+               return;
+       this_cpu_write(x86_spec_ctrl_current, val);
+-      wrmsrl(MSR_IA32_SPEC_CTRL, val);
++
++      /*
++       * When KERNEL_IBRS this MSR is written on return-to-user, unless
++       * forced the update can be delayed until that time.
++       */
++      if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
++              wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ }
+ /*
+@@ -1290,7 +1296,7 @@ static void __init spectre_v2_select_mit
+       if (spectre_v2_in_eibrs_mode(mode)) {
+               /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+-              write_spec_ctrl_current(x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base, true);
+       }
+       switch (mode) {
+@@ -1345,7 +1351,7 @@ static void __init spectre_v2_select_mit
+ static void update_stibp_msr(void * __unused)
+ {
+-      write_spec_ctrl_current(x86_spec_ctrl_base);
++      write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1588,7 +1594,7 @@ static enum ssb_mitigation __init __ssb_
+                       x86_amd_ssb_disable();
+               } else {
+                       x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+-                      write_spec_ctrl_current(x86_spec_ctrl_base);
++                      write_spec_ctrl_current(x86_spec_ctrl_base, true);
+               }
+       }
+@@ -1839,7 +1845,7 @@ int arch_prctl_spec_ctrl_get(struct task
+ void x86_spec_ctrl_setup_ap(void)
+ {
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+-              write_spec_ctrl_current(x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base, true);
+       if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+               x86_amd_ssb_disable();
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -600,7 +600,7 @@ static __always_inline void __speculatio
+       }
+       if (updmsr)
+-              write_spec_ctrl_current(msr);
++              write_spec_ctrl_current(msr, false);
+ }
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/queue-5.18/x86-bugs-report-amd-retbleed-vulnerability.patch b/queue-5.18/x86-bugs-report-amd-retbleed-vulnerability.patch
new file mode 100644 (file)
index 0000000..620baa4
--- /dev/null
@@ -0,0 +1,169 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+Date: Tue, 14 Jun 2022 23:15:49 +0200
+Subject: x86/bugs: Report AMD retbleed vulnerability
+
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+
+commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream.
+
+Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary
+Speculative Code Execution with Return Instructions) attack.
+
+  [peterz: add hygon]
+  [kim: invert parity; fam15h]
+
+Co-developed-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ arch/x86/kernel/cpu/bugs.c         |   13 +++++++++++++
+ arch/x86/kernel/cpu/common.c       |   19 +++++++++++++++++++
+ drivers/base/cpu.c                 |    8 ++++++++
+ include/linux/cpu.h                |    2 ++
+ 5 files changed, 43 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -450,5 +450,6 @@
+ #define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS                 X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA               X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
++#define X86_BUG_RETBLEED              X86_BUG(26) /* CPU is affected by RETBleed */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1987,6 +1987,11 @@ static ssize_t srbds_show_state(char *bu
+       return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
+ }
++static ssize_t retbleed_show_state(char *buf)
++{
++      return sprintf(buf, "Vulnerable\n");
++}
++
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+                              char *buf, unsigned int bug)
+ {
+@@ -2032,6 +2037,9 @@ static ssize_t cpu_show_common(struct de
+       case X86_BUG_MMIO_STALE_DATA:
+               return mmio_stale_data_show_state(buf);
++      case X86_BUG_RETBLEED:
++              return retbleed_show_state(buf);
++
+       default:
+               break;
+       }
+@@ -2088,4 +2096,9 @@ ssize_t cpu_show_mmio_stale_data(struct
+ {
+       return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
++
++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
++{
++      return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1231,16 +1231,27 @@ static const __initconst struct x86_cpu_
+       {}
+ };
++#define VULNBL(vendor, family, model, blacklist)      \
++      X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
++
+ #define VULNBL_INTEL_STEPPINGS(model, steppings, issues)                 \
+       X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6,             \
+                                           INTEL_FAM6_##model, steppings, \
+                                           X86_FEATURE_ANY, issues)
++#define VULNBL_AMD(family, blacklist)         \
++      VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
++
++#define VULNBL_HYGON(family, blacklist)               \
++      VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
++
+ #define SRBDS         BIT(0)
+ /* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+ #define MMIO          BIT(1)
+ /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+ #define MMIO_SBDS     BIT(2)
++/* CPU is affected by RETbleed, speculating where you would not expect it */
++#define RETBLEED      BIT(3)
+ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
+       VULNBL_INTEL_STEPPINGS(IVYBRIDGE,       X86_STEPPING_ANY,               SRBDS),
+@@ -1273,6 +1284,11 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPINGS(0x0, 0x0),        MMIO | MMIO_SBDS),
++
++      VULNBL_AMD(0x15, RETBLEED),
++      VULNBL_AMD(0x16, RETBLEED),
++      VULNBL_AMD(0x17, RETBLEED),
++      VULNBL_HYGON(0x18, RETBLEED),
+       {}
+ };
+@@ -1374,6 +1390,9 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++      if (cpu_matches(cpu_vuln_blacklist, RETBLEED))
++              setup_force_cpu_bug(X86_BUG_RETBLEED);
++
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+               return;
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(
+       return sysfs_emit(buf, "Not affected\n");
+ }
++ssize_t __weak cpu_show_retbleed(struct device *dev,
++                               struct device_attribute *attr, char *buf)
++{
++      return sysfs_emit(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444
+ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulner
+       &dev_attr_itlb_multihit.attr,
+       &dev_attr_srbds.attr,
+       &dev_attr_mmio_stale_data.attr,
++      &dev_attr_retbleed.attr,
+       NULL
+ };
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct dev
+ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf);
++extern ssize_t cpu_show_retbleed(struct device *dev,
++                               struct device_attribute *attr, char *buf);
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-5.18/x86-bugs-report-intel-retbleed-vulnerability.patch b/queue-5.18/x86-bugs-report-intel-retbleed-vulnerability.patch
new file mode 100644 (file)
index 0000000..0c50d64
--- /dev/null
@@ -0,0 +1,174 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Jun 2022 13:48:58 +0200
+Subject: x86/bugs: Report Intel retbleed vulnerability
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream.
+
+Skylake suffers from RSB underflow speculation issues; report this
+vulnerability and it's mitigation (spectre_v2=ibrs).
+
+  [jpoimboe: cleanups, eibrs]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    1 +
+ arch/x86/kernel/cpu/bugs.c       |   39 +++++++++++++++++++++++++++++++++------
+ arch/x86/kernel/cpu/common.c     |   24 ++++++++++++------------
+ 3 files changed, 46 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -91,6 +91,7 @@
+ #define MSR_IA32_ARCH_CAPABILITIES    0x0000010a
+ #define ARCH_CAP_RDCL_NO              BIT(0)  /* Not susceptible to Meltdown */
+ #define ARCH_CAP_IBRS_ALL             BIT(1)  /* Enhanced IBRS support */
++#define ARCH_CAP_RSBA                 BIT(2)  /* RET may use alternative branch predictors */
+ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH        BIT(3)  /* Skip L1D flush on vmentry */
+ #define ARCH_CAP_SSB_NO                       BIT(4)  /*
+                                                * Not susceptible to Speculative Store Bypass
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -783,12 +783,17 @@ static int __init nospectre_v1_cmdline(c
+ }
+ early_param("nospectre_v1", nospectre_v1_cmdline);
++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
++      SPECTRE_V2_NONE;
++
+ #undef pr_fmt
+ #define pr_fmt(fmt)     "RETBleed: " fmt
+ enum retbleed_mitigation {
+       RETBLEED_MITIGATION_NONE,
+       RETBLEED_MITIGATION_UNRET,
++      RETBLEED_MITIGATION_IBRS,
++      RETBLEED_MITIGATION_EIBRS,
+ };
+ enum retbleed_mitigation_cmd {
+@@ -800,6 +805,8 @@ enum retbleed_mitigation_cmd {
+ const char * const retbleed_strings[] = {
+       [RETBLEED_MITIGATION_NONE]      = "Vulnerable",
+       [RETBLEED_MITIGATION_UNRET]     = "Mitigation: untrained return thunk",
++      [RETBLEED_MITIGATION_IBRS]      = "Mitigation: IBRS",
++      [RETBLEED_MITIGATION_EIBRS]     = "Mitigation: Enhanced IBRS",
+ };
+ static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+@@ -842,6 +849,7 @@ early_param("retbleed", retbleed_parse_c
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+ #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+ static void __init retbleed_select_mitigation(void)
+ {
+@@ -858,12 +866,15 @@ static void __init retbleed_select_mitig
+       case RETBLEED_CMD_AUTO:
+       default:
+-              if (!boot_cpu_has_bug(X86_BUG_RETBLEED))
+-                      break;
+-
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+                   boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+                       retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++
++              /*
++               * The Intel mitigation (IBRS) was already selected in
++               * spectre_v2_select_mitigation().
++               */
++
+               break;
+       }
+@@ -893,15 +904,31 @@ static void __init retbleed_select_mitig
+               break;
+       }
++      /*
++       * Let IBRS trump all on Intel without affecting the effects of the
++       * retbleed= cmdline option.
++       */
++      if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++              switch (spectre_v2_enabled) {
++              case SPECTRE_V2_IBRS:
++                      retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
++                      break;
++              case SPECTRE_V2_EIBRS:
++              case SPECTRE_V2_EIBRS_RETPOLINE:
++              case SPECTRE_V2_EIBRS_LFENCE:
++                      retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
++                      break;
++              default:
++                      pr_err(RETBLEED_INTEL_MSG);
++              }
++      }
++
+       pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+ #undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V2 : " fmt
+-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+-      SPECTRE_V2_NONE;
+-
+ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+       SPECTRE_V2_USER_NONE;
+ static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1263,24 +1263,24 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(BROADWELL_G,     X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       BIT(3) | BIT(4) | BIT(6) |
+-                                              BIT(7) | BIT(0xB),              MMIO),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO),
++                                              BIT(7) | BIT(0xB),              MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x9, 0xC),        SRBDS | MMIO),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x9, 0xC),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x9, 0xD),        SRBDS | MMIO),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x9, 0xD),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPINGS(0x5, 0x5),        MMIO | MMIO_SBDS),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPINGS(0x5, 0x5),        MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPINGS(0x1, 0x1),        MMIO),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPINGS(0x4, 0x6),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE,       BIT(2) | BIT(3) | BIT(5),       MMIO | MMIO_SBDS),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+-      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPINGS(0x1, 0x1),        MMIO),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE,       BIT(2) | BIT(3) | BIT(5),       MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPINGS(0x1, 0x1),        MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPINGS(0x0, 0x0),        MMIO | MMIO_SBDS),
+@@ -1390,7 +1390,7 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+-      if (cpu_matches(cpu_vuln_blacklist, RETBLEED))
++      if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)))
+               setup_force_cpu_bug(X86_BUG_RETBLEED);
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
diff --git a/queue-5.18/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch b/queue-5.18/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
new file mode 100644 (file)
index 0000000..f73413a
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:56 +0200
+Subject: x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 166115c08a9b0b846b783088808a27d739be6e8d upstream.
+
+retbleed will depend on spectre_v2, while spectre_v2_user depends on
+retbleed. Break this cycle.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -37,8 +37,9 @@
+ #include "cpu.h"
+ static void __init spectre_v1_select_mitigation(void);
+-static void __init retbleed_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
++static void __init spectre_v2_user_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
+@@ -145,13 +146,19 @@ void __init check_bugs(void)
+       /* Select the proper CPU mitigations before patching alternatives: */
+       spectre_v1_select_mitigation();
++      spectre_v2_select_mitigation();
++      /*
++       * retbleed_select_mitigation() relies on the state set by
++       * spectre_v2_select_mitigation(); specifically it wants to know about
++       * spectre_v2=ibrs.
++       */
+       retbleed_select_mitigation();
+       /*
+-       * spectre_v2_select_mitigation() relies on the state set by
++       * spectre_v2_user_select_mitigation() relies on the state set by
+        * retbleed_select_mitigation(); specifically the STIBP selection is
+        * forced for UNRET.
+        */
+-      spectre_v2_select_mitigation();
++      spectre_v2_user_select_mitigation();
+       ssb_select_mitigation();
+       l1tf_select_mitigation();
+       md_clear_select_mitigation();
+@@ -1006,13 +1013,15 @@ static void __init spec_v2_user_print_co
+               pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+ }
++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
++
+ static enum spectre_v2_user_cmd __init
+-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_parse_user_cmdline(void)
+ {
+       char arg[20];
+       int ret, i;
+-      switch (v2_cmd) {
++      switch (spectre_v2_cmd) {
+       case SPECTRE_V2_CMD_NONE:
+               return SPECTRE_V2_USER_CMD_NONE;
+       case SPECTRE_V2_CMD_FORCE:
+@@ -1047,7 +1056,7 @@ static inline bool spectre_v2_in_ibrs_mo
+ }
+ static void __init
+-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_user_select_mitigation(void)
+ {
+       enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+       bool smt_possible = IS_ENABLED(CONFIG_SMP);
+@@ -1060,7 +1069,7 @@ spectre_v2_user_select_mitigation(enum s
+           cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+               smt_possible = false;
+-      cmd = spectre_v2_parse_user_cmdline(v2_cmd);
++      cmd = spectre_v2_parse_user_cmdline();
+       switch (cmd) {
+       case SPECTRE_V2_USER_CMD_NONE:
+               goto set_mode;
+@@ -1384,7 +1393,7 @@ static void __init spectre_v2_select_mit
+       }
+       /* Set up IBPB and STIBP depending on the general spectre V2 command */
+-      spectre_v2_user_select_mitigation(cmd);
++      spectre_v2_cmd = cmd;
+ }
+ static void update_stibp_msr(void * __unused)
diff --git a/queue-5.18/x86-common-stamp-out-the-stepping-madness.patch b/queue-5.18/x86-common-stamp-out-the-stepping-madness.patch
new file mode 100644 (file)
index 0000000..725a3f7
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Jun 2022 14:03:25 +0200
+Subject: x86/common: Stamp out the stepping madness
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream.
+
+The whole MMIO/RETBLEED enumeration went overboard on steppings. Get
+rid of all that and simply use ANY.
+
+If a future stepping of these models would not be affected, it had
+better set the relevant ARCH_CAP_$FOO_NO bit in
+IA32_ARCH_CAPABILITIES.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |   37 ++++++++++++++++---------------------
+ 1 file changed, 16 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1258,32 +1258,27 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(HASWELL,         X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(HASWELL_L,       X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(HASWELL_G,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(HASWELL_X,       BIT(2) | BIT(4),                MMIO),
+-      VULNBL_INTEL_STEPPINGS(BROADWELL_D,     X86_STEPPINGS(0x3, 0x5),        MMIO),
++      VULNBL_INTEL_STEPPINGS(HASWELL_X,       X86_STEPPING_ANY,               MMIO),
++      VULNBL_INTEL_STEPPINGS(BROADWELL_D,     X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(BROADWELL_G,     X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       BIT(3) | BIT(4) | BIT(6) |
+-                                              BIT(7) | BIT(0xB),              MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x9, 0xC),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x9, 0xD),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPINGS(0x5, 0x5),        MMIO | MMIO_SBDS | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPINGS(0x1, 0x1),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPINGS(0x4, 0x6),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE,       BIT(2) | BIT(3) | BIT(5),       MMIO | MMIO_SBDS | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPINGS(0x1, 0x1),        MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
+-      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPINGS(0x0, 0x0),        MMIO | MMIO_SBDS),
++      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
+       VULNBL_AMD(0x15, RETBLEED),
+       VULNBL_AMD(0x16, RETBLEED),
diff --git a/queue-5.18/x86-cpu-amd-add-spectral-chicken.patch b/queue-5.18/x86-cpu-amd-add-spectral-chicken.patch
new file mode 100644 (file)
index 0000000..c72f7bf
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:04 +0200
+Subject: x86/cpu/amd: Add Spectral Chicken
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream.
+
+Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken
+bit for some speculation behaviour. It needs setting.
+
+Note: very belatedly AMD released naming; it's now officially called
+      MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT
+      but shall remain the SPECTRAL CHICKEN.
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    3 +++
+ arch/x86/kernel/cpu/amd.c        |   23 ++++++++++++++++++++++-
+ arch/x86/kernel/cpu/cpu.h        |    2 ++
+ arch/x86/kernel/cpu/hygon.c      |    6 ++++++
+ 4 files changed, 33 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -553,6 +553,9 @@
+ /* Fam 17h MSRs */
+ #define MSR_F17H_IRPERF                       0xc00000e9
++#define MSR_ZEN2_SPECTRAL_CHICKEN     0xc00110e3
++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL         0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR         0xc0010231
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -862,6 +862,26 @@ static void init_amd_bd(struct cpuinfo_x
+       clear_rdrand_cpuid_bit(c);
+ }
++void init_spectral_chicken(struct cpuinfo_x86 *c)
++{
++      u64 value;
++
++      /*
++       * On Zen2 we offer this chicken (bit) on the altar of Speculation.
++       *
++       * This suppresses speculation from the middle of a basic block, i.e. it
++       * suppresses non-branch predictions.
++       *
++       * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
++       */
++      if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
++              if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
++                      value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
++                      wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
++              }
++      }
++}
++
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+ {
+       set_cpu_cap(c, X86_FEATURE_ZEN);
+@@ -907,7 +927,8 @@ static void init_amd(struct cpuinfo_x86
+       case 0x12: init_amd_ln(c); break;
+       case 0x15: init_amd_bd(c); break;
+       case 0x16: init_amd_jg(c); break;
+-      case 0x17: fallthrough;
++      case 0x17: init_spectral_chicken(c);
++                 fallthrough;
+       case 0x19: init_amd_zn(c); break;
+       }
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -61,6 +61,8 @@ static inline void tsx_init(void) { }
+ static inline void tsx_ap_init(void) { }
+ #endif /* CONFIG_CPU_SUP_INTEL */
++extern void init_spectral_chicken(struct cpuinfo_x86 *c);
++
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/hygon.c
++++ b/arch/x86/kernel/cpu/hygon.c
+@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x8
+       /* get apicid instead of initial apic id from cpuid */
+       c->apicid = hard_smp_processor_id();
++      /*
++       * XXX someone from Hygon needs to confirm this DTRT
++       *
++      init_spectral_chicken(c);
++       */
++
+       set_cpu_cap(c, X86_FEATURE_ZEN);
+       set_cpu_cap(c, X86_FEATURE_CPB);
diff --git a/queue-5.18/x86-cpu-amd-enumerate-btc_no.patch b/queue-5.18/x86-cpu-amd-enumerate-btc_no.patch
new file mode 100644 (file)
index 0000000..0f5492b
--- /dev/null
@@ -0,0 +1,84 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 24 Jun 2022 14:41:21 +0100
+Subject: x86/cpu/amd: Enumerate BTC_NO
+
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+
+commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream.
+
+BTC_NO indicates that hardware is not susceptible to Branch Type Confusion.
+
+Zen3 CPUs don't suffer BTC.
+
+Hypervisors are expected to synthesise BTC_NO when it is appropriate
+given the migration pool, to prevent kernels using heuristics.
+
+  [ bp: Massage. ]
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no X86_FEATURE_BRS]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ arch/x86/kernel/cpu/amd.c          |   21 +++++++++++++++------
+ arch/x86/kernel/cpu/common.c       |    6 ++++--
+ 3 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -321,6 +321,7 @@
+ #define X86_FEATURE_VIRT_SSBD         (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+ #define X86_FEATURE_AMD_SSB_NO                (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+ #define X86_FEATURE_CPPC              (13*32+27) /* Collaborative Processor Performance Control */
++#define X86_FEATURE_BTC_NO            (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -890,12 +890,21 @@ static void init_amd_zn(struct cpuinfo_x
+       node_reclaim_distance = 32;
+ #endif
+-      /*
+-       * Fix erratum 1076: CPB feature bit not being set in CPUID.
+-       * Always set it, except when running under a hypervisor.
+-       */
+-      if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
+-              set_cpu_cap(c, X86_FEATURE_CPB);
++      /* Fix up CPUID bits, but only if not virtualised. */
++      if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
++
++              /* Erratum 1076: CPB feature bit not being set in CPUID. */
++              if (!cpu_has(c, X86_FEATURE_CPB))
++                      set_cpu_cap(c, X86_FEATURE_CPB);
++
++              /*
++               * Zen3 (Fam19 model < 0x10) parts are not susceptible to
++               * Branch Type Confusion, but predate the allocation of the
++               * BTC_NO bit.
++               */
++              if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
++                      set_cpu_cap(c, X86_FEATURE_BTC_NO);
++      }
+ }
+ static void init_amd(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1385,8 +1385,10 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+-      if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)))
+-              setup_force_cpu_bug(X86_BUG_RETBLEED);
++      if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
++              if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
++                      setup_force_cpu_bug(X86_BUG_RETBLEED);
++      }
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+               return;
diff --git a/queue-5.18/x86-cpufeatures-move-retpoline-flags-to-word-11.patch b/queue-5.18/x86-cpufeatures-move-retpoline-flags-to-word-11.patch
new file mode 100644 (file)
index 0000000..fad00ce
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:33 +0200
+Subject: x86/cpufeatures: Move RETPOLINE flags to word 11
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a883d624aed463c84c22596006e5a96f5b44db31 upstream.
+
+In order to extend the RETPOLINE features to 4, move them to word 11
+where there is still room. This mostly keeps DISABLE_RETPOLINE
+simple.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -203,8 +203,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ /* FREE!                                ( 7*32+10) */
+ #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-#define X86_FEATURE_RETPOLINE         ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_LFENCE  ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
++/* FREE!                              ( 7*32+12) */
++/* FREE!                              ( 7*32+13) */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL     ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+@@ -295,6 +295,10 @@
+ #define X86_FEATURE_PER_THREAD_MBA    (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+ #define X86_FEATURE_SGX1              (11*32+ 8) /* "" Basic SGX */
+ #define X86_FEATURE_SGX2              (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
++/* FREE!                              (11*32+10) */
++/* FREE!                              (11*32+11) */
++#define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI          (12*32+ 4) /* AVX VNNI instructions */
diff --git a/queue-5.18/x86-entry-add-kernel-ibrs-implementation.patch b/queue-5.18/x86-entry-add-kernel-ibrs-implementation.patch
new file mode 100644 (file)
index 0000000..5bc389c
--- /dev/null
@@ -0,0 +1,352 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 8 Jul 2022 13:50:38 -0300
+Subject: x86/entry: Add kernel IBRS implementation
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream.
+
+Implement Kernel IBRS - currently the only known option to mitigate RSB
+underflow speculation issues on Skylake hardware.
+
+Note: since IBRS_ENTER requires fuller context established than
+UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET
+itself implies a RET, it must come after IBRS_ENTER. This means
+IBRS_ENTER needs to also move UNTRAIN_RET.
+
+Note 2: KERNEL_IBRS is sub-optimal for XenPV.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict at arch/x86/entry/entry_64_compat.S]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h           |   58 +++++++++++++++++++++++++++++++++++++
+ arch/x86/entry/entry_64.S          |   44 ++++++++++++++++++++++++----
+ arch/x86/entry/entry_64_compat.S   |   17 ++++++++--
+ arch/x86/include/asm/cpufeatures.h |    2 -
+ 4 files changed, 111 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -7,6 +7,8 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/processor-flags.h>
+ #include <asm/ptrace-abi.h>
++#include <asm/msr.h>
++#include <asm/nospec-branch.h>
+ /*
+@@ -282,6 +284,62 @@ For 32-bit we have the following convent
+ #endif
+ /*
++ * IBRS kernel mitigation for Spectre_v2.
++ *
++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
++ * the regs it uses (AX, CX, DX). Must be called before the first RET
++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
++ *
++ * The optional argument is used to save/restore the current value,
++ * which is used on the paranoid paths.
++ *
++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
++ */
++.macro IBRS_ENTER save_reg
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++      movl    $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++      rdmsr
++      shl     $32, %rdx
++      or      %rdx, %rax
++      mov     %rax, \save_reg
++      test    $SPEC_CTRL_IBRS, %eax
++      jz      .Ldo_wrmsr_\@
++      lfence
++      jmp     .Lend_\@
++.Ldo_wrmsr_\@:
++.endif
++
++      movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++      movl    %edx, %eax
++      shr     $32, %rdx
++      wrmsr
++.Lend_\@:
++.endm
++
++/*
++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
++ * regs. Must be called after the last RET.
++ */
++.macro IBRS_EXIT save_reg
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++      movl    $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++      mov     \save_reg, %rdx
++.else
++      movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++      andl    $(~SPEC_CTRL_IBRS), %edx
++.endif
++
++      movl    %edx, %eax
++      shr     $32, %rdx
++      wrmsr
++.Lend_\@:
++.endm
++
++/*
+  * Mitigate Spectre v1 for conditional swapgs code paths.
+  *
+  * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -96,7 +96,6 @@ SYM_CODE_START(entry_SYSCALL_64)
+ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
+-      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER_DS                              /* pt_regs->ss */
+@@ -113,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+       movq    %rsp, %rdi
+       /* Sign extend the lower 32bit as syscall numbers are treated as int */
+       movslq  %eax, %rsi
++
++      /* clobbers %rax, make sure it is after saving the syscall nr */
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       call    do_syscall_64           /* returns with IRQs disabled */
+       /*
+@@ -192,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+        * perf profiles. Nothing jumps here.
+        */
+ syscall_return_via_sysret:
++      IBRS_EXIT
+       POP_REGS pop_rdi=0
+       /*
+@@ -596,6 +601,7 @@ __irqentry_text_end:
+ SYM_CODE_START_LOCAL(common_interrupt_return)
+ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
++      IBRS_EXIT
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates user mode. */
+       testb   $3, CS(%rsp)
+@@ -882,6 +888,9 @@ SYM_CODE_END(xen_failsafe_callback)
+  *              1 -> no SWAPGS on exit
+  *
+  *     Y        GSBASE value at entry, must be restored in paranoid_exit
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+  */
+ SYM_CODE_START_LOCAL(paranoid_entry)
+       UNWIND_HINT_FUNC
+@@ -905,7 +914,6 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * be retrieved from a kernel internal table.
+        */
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+-      UNTRAIN_RET
+       /*
+        * Handling GSBASE depends on the availability of FSGSBASE.
+@@ -927,7 +935,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * is needed here.
+        */
+       SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+-      RET
++      jmp .Lparanoid_gsbase_done
+ .Lparanoid_entry_checkgs:
+       /* EBX = 1 -> kernel GSBASE active, no restore required */
+@@ -946,8 +954,16 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+       xorl    %ebx, %ebx
+       swapgs
+ .Lparanoid_kernel_gsbase:
+-
+       FENCE_SWAPGS_KERNEL_ENTRY
++.Lparanoid_gsbase_done:
++
++      /*
++       * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
++       * CR3 above, keep the old value in a callee saved register.
++       */
++      IBRS_ENTER save_reg=%r15
++      UNTRAIN_RET
++
+       RET
+ SYM_CODE_END(paranoid_entry)
+@@ -969,9 +985,19 @@ SYM_CODE_END(paranoid_entry)
+  *              1 -> no SWAPGS on exit
+  *
+  *     Y        User space GSBASE, must be restored unconditionally
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+  */
+ SYM_CODE_START_LOCAL(paranoid_exit)
+       UNWIND_HINT_REGS
++
++      /*
++       * Must restore IBRS state before both CR3 and %GS since we need access
++       * to the per-CPU x86_spec_ctrl_shadow variable.
++       */
++      IBRS_EXIT save_reg=%r15
++
+       /*
+        * The order of operations is important. RESTORE_CR3 requires
+        * kernel GSBASE.
+@@ -1016,10 +1042,12 @@ SYM_CODE_START_LOCAL(error_entry)
+       FENCE_SWAPGS_USER_ENTRY
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      IBRS_ENTER
+       UNTRAIN_RET
+       leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
+ .Lerror_entry_from_usermode_after_swapgs:
++
+       /* Put us onto the real thread stack. */
+       call    sync_regs
+       RET
+@@ -1069,6 +1097,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       SWAPGS
+       FENCE_SWAPGS_USER_ENTRY
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      IBRS_ENTER
+       UNTRAIN_RET
+       /*
+@@ -1165,7 +1194,6 @@ SYM_CODE_START(asm_exc_nmi)
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT_IRET_REGS base=%rdx offset=8
+-      UNTRAIN_RET
+       pushq   5*8(%rdx)       /* pt_regs->ss */
+       pushq   4*8(%rdx)       /* pt_regs->rsp */
+       pushq   3*8(%rdx)       /* pt_regs->flags */
+@@ -1176,6 +1204,9 @@ SYM_CODE_START(asm_exc_nmi)
+       PUSH_AND_CLEAR_REGS rdx=(%rdx)
+       ENCODE_FRAME_POINTER
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       /*
+        * At this point we no longer need to worry about stack damage
+        * due to nesting -- we're on the normal thread stack and we're
+@@ -1400,6 +1431,9 @@ end_repeat_nmi:
+       movq    $-1, %rsi
+       call    exc_nmi
++      /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
++      IBRS_EXIT save_reg=%r15
++
+       /* Always restore stashed CR3 value (see paranoid_entry) */
+       RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -4,7 +4,6 @@
+  *
+  * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+  */
+-#include "calling.h"
+ #include <asm/asm-offsets.h>
+ #include <asm/current.h>
+ #include <asm/errno.h>
+@@ -18,6 +17,8 @@
+ #include <linux/linkage.h>
+ #include <linux/err.h>
++#include "calling.h"
++
+       .section .entry.text, "ax"
+ /*
+@@ -73,7 +74,6 @@ SYM_CODE_START(entry_SYSENTER_compat)
+       pushq   $__USER32_CS            /* pt_regs->cs */
+       pushq   $0                      /* pt_regs->ip = 0 (placeholder) */
+ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+-      UNTRAIN_RET
+       /*
+        * User tracing code (ptrace or signal handlers) might assume that
+@@ -115,6 +115,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_af
+       cld
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       /*
+        * SYSENTER doesn't filter flags, so we need to clear NT and AC
+        * ourselves.  To save a few cycles, we can check whether
+@@ -217,7 +220,6 @@ SYM_CODE_START(entry_SYSCALL_compat)
+ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
+-      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER32_DS            /* pt_regs->ss */
+@@ -259,6 +261,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
+       UNWIND_HINT_REGS
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       movq    %rsp, %rdi
+       call    do_fast_syscall_32
+       /* XEN PV guests always use IRET path */
+@@ -273,6 +278,8 @@ sysret32_from_system_call:
+        */
+       STACKLEAK_ERASE
++      IBRS_EXIT
++
+       movq    RBX(%rsp), %rbx         /* pt_regs->rbx */
+       movq    RBP(%rsp), %rbp         /* pt_regs->rbp */
+       movq    EFLAGS(%rsp), %r11      /* pt_regs->flags (in r11) */
+@@ -385,7 +392,6 @@ SYM_CODE_START(entry_INT80_compat)
+       pushq   (%rdi)                  /* pt_regs->di */
+ .Lint80_keep_stack:
+-      UNTRAIN_RET
+       pushq   %rsi                    /* pt_regs->si */
+       xorl    %esi, %esi              /* nospec   si */
+       pushq   %rdx                    /* pt_regs->dx */
+@@ -418,6 +424,9 @@ SYM_CODE_START(entry_INT80_compat)
+       cld
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       movq    %rsp, %rdi
+       call    do_int80_syscall_32
+       jmp     swapgs_restore_regs_and_return_to_usermode
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -203,7 +203,7 @@
+ #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ /* FREE!                                ( 7*32+10) */
+ #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-/* FREE!                              ( 7*32+12) */
++#define X86_FEATURE_KERNEL_IBRS               ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+ /* FREE!                              ( 7*32+13) */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
diff --git a/queue-5.18/x86-entry-avoid-very-early-ret.patch b/queue-5.18/x86-entry-avoid-very-early-ret.patch
new file mode 100644 (file)
index 0000000..44917f7
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:46 +0200
+Subject: x86/entry: Avoid very early RET
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7c81c0c9210c9bfab2bae76aab2999de5bad27db upstream.
+
+Commit
+
+  ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()")
+
+manages to introduce a CALL/RET pair that is before SWITCH_TO_KERNEL_CR3,
+which means it is before RETBleed can be mitigated.
+
+Revert to an earlier version of the commit in Fixes. Down side is that
+this will bloat .text size somewhat. The alternative is fully reverting
+it.
+
+The purpose of this patch was to allow migrating error_entry() to C,
+including the whole of kPTI. Much care needs to be taken moving that
+forward to not re-introduce this problem of early RETs.
+
+Fixes: ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |   12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -317,14 +317,6 @@ SYM_CODE_END(ret_from_fork)
+ #endif
+ .endm
+-/* Save all registers in pt_regs */
+-SYM_CODE_START_LOCAL(push_and_clear_regs)
+-      UNWIND_HINT_FUNC
+-      PUSH_AND_CLEAR_REGS save_ret=1
+-      ENCODE_FRAME_POINTER 8
+-      RET
+-SYM_CODE_END(push_and_clear_regs)
+-
+ /**
+  * idtentry_body - Macro to emit code calling the C function
+  * @cfunc:            C function to be called
+@@ -332,8 +324,8 @@ SYM_CODE_END(push_and_clear_regs)
+  */
+ .macro idtentry_body cfunc has_error_code:req
+-      call push_and_clear_regs
+-      UNWIND_HINT_REGS
++      PUSH_AND_CLEAR_REGS
++      ENCODE_FRAME_POINTER
+       /*
+        * Call error_entry() and switch to the task stack if from userspace.
diff --git a/queue-5.18/x86-entry-don-t-call-error_entry-for-xenpv.patch b/queue-5.18/x86-entry-don-t-call-error_entry-for-xenpv.patch
new file mode 100644 (file)
index 0000000..05cc20f
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Date: Tue, 3 May 2022 11:21:06 +0800
+Subject: x86/entry: Don't call error_entry() for XENPV
+
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+
+commit 64cbd0acb58203fb769ed2f4eab526d43e243847 upstream.
+
+XENPV guests enter already on the task stack and they can't fault for
+native_iret() nor native_load_gs_index() since they use their own pvop
+for IRET and load_gs_index(). A CR3 switch is not needed either.
+
+So there is no reason to call error_entry() in XENPV.
+
+  [ bp: Massage commit message. ]
+
+Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20220503032107.680190-6-jiangshanlai@gmail.com
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -336,8 +336,17 @@ SYM_CODE_END(push_and_clear_regs)
+       call push_and_clear_regs
+       UNWIND_HINT_REGS
+-      call    error_entry
+-      movq    %rax, %rsp                      /* switch to the task stack if from userspace */
++      /*
++       * Call error_entry() and switch to the task stack if from userspace.
++       *
++       * When in XENPV, it is already in the task stack, and it can't fault
++       * for native_iret() nor native_load_gs_index() since XENPV uses its
++       * own pvops for IRET and load_gs_index().  And it doesn't need to
++       * switch the CR3.  So it can skip invoking error_entry().
++       */
++      ALTERNATIVE "call error_entry; movq %rax, %rsp", \
++              "", X86_FEATURE_XENPV
++
+       ENCODE_FRAME_POINTER
+       UNWIND_HINT_REGS
diff --git a/queue-5.18/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch b/queue-5.18/x86-entry-move-push_and_clear_regs-back-into-error_entry.patch
new file mode 100644 (file)
index 0000000..7d81700
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 6 Jul 2022 15:33:30 +0200
+Subject: x86/entry: Move PUSH_AND_CLEAR_REGS() back into error_entry
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2c08b9b38f5b0f4a6c2d29be22b695e4ec4a556b upstream.
+
+Commit
+
+  ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()")
+
+moved PUSH_AND_CLEAR_REGS out of error_entry, into its own function, in
+part to avoid calling error_entry() for XenPV.
+
+However, commit
+
+  7c81c0c9210c ("x86/entry: Avoid very early RET")
+
+had to change that because the 'ret' was too early and moved it into
+idtentry, bloating the text size, since idtentry is expanded for every
+exception vector.
+
+However, with the advent of xen_error_entry() in commit
+
+  d147553b64bad ("x86/xen: Add UNTRAIN_RET")
+
+it became possible to remove PUSH_AND_CLEAR_REGS from idtentry, back
+into *error_entry().
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: error_entry still does cld]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -323,6 +323,8 @@ SYM_CODE_END(ret_from_fork)
+ SYM_CODE_START_LOCAL(xen_error_entry)
+       UNWIND_HINT_FUNC
++      PUSH_AND_CLEAR_REGS save_ret=1
++      ENCODE_FRAME_POINTER 8
+       UNTRAIN_RET
+       RET
+ SYM_CODE_END(xen_error_entry)
+@@ -334,9 +336,6 @@ SYM_CODE_END(xen_error_entry)
+  */
+ .macro idtentry_body cfunc has_error_code:req
+-      PUSH_AND_CLEAR_REGS
+-      ENCODE_FRAME_POINTER
+-
+       /*
+        * Call error_entry() and switch to the task stack if from userspace.
+        *
+@@ -1035,6 +1034,10 @@ SYM_CODE_END(paranoid_exit)
+ SYM_CODE_START_LOCAL(error_entry)
+       UNWIND_HINT_FUNC
+       cld
++
++      PUSH_AND_CLEAR_REGS save_ret=1
++      ENCODE_FRAME_POINTER 8
++
+       testb   $3, CS+8(%rsp)
+       jz      .Lerror_kernelspace
diff --git a/queue-5.18/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch b/queue-5.18/x86-entry-move-push_and_clear_regs-out-of-error_entry.patch
new file mode 100644 (file)
index 0000000..7110ad1
--- /dev/null
@@ -0,0 +1,78 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Date: Thu, 21 Apr 2022 22:10:50 +0800
+Subject: x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
+
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+
+commit ee774dac0da1543376a69fd90840af6aa86879b3 upstream.
+
+The macro idtentry() (through idtentry_body()) calls error_entry()
+unconditionally even on XENPV. But XENPV needs to only push and clear
+regs.
+
+PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to its
+original place when the function returns, which means it is not possible
+to convert it to a C function.
+
+Carve out PUSH_AND_CLEAR_REGS out of error_entry() and into a separate
+function and call it before error_entry() in order to avoid calling
+error_entry() on XENPV.
+
+It will also allow for error_entry() to be converted to C code that can
+use inlined sync_regs() and save a function call.
+
+  [ bp: Massage commit message. ]
+
+Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20220503032107.680190-4-jiangshanlai@gmail.com
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -318,6 +318,14 @@ SYM_CODE_END(ret_from_fork)
+ #endif
+ .endm
++/* Save all registers in pt_regs */
++SYM_CODE_START_LOCAL(push_and_clear_regs)
++      UNWIND_HINT_FUNC
++      PUSH_AND_CLEAR_REGS save_ret=1
++      ENCODE_FRAME_POINTER 8
++      RET
++SYM_CODE_END(push_and_clear_regs)
++
+ /**
+  * idtentry_body - Macro to emit code calling the C function
+  * @cfunc:            C function to be called
+@@ -325,6 +333,9 @@ SYM_CODE_END(ret_from_fork)
+  */
+ .macro idtentry_body cfunc has_error_code:req
++      call push_and_clear_regs
++      UNWIND_HINT_REGS
++
+       call    error_entry
+       movq    %rax, %rsp                      /* switch to the task stack if from userspace */
+       ENCODE_FRAME_POINTER
+@@ -986,13 +997,11 @@ SYM_CODE_START_LOCAL(paranoid_exit)
+ SYM_CODE_END(paranoid_exit)
+ /*
+- * Save all registers in pt_regs, and switch GS if needed.
++ * Switch GS and CR3 if needed.
+  */
+ SYM_CODE_START_LOCAL(error_entry)
+       UNWIND_HINT_FUNC
+       cld
+-      PUSH_AND_CLEAR_REGS save_ret=1
+-      ENCODE_FRAME_POINTER 8
+       testb   $3, CS+8(%rsp)
+       jz      .Lerror_kernelspace
diff --git a/queue-5.18/x86-entry-remove-skip_r11rcx.patch b/queue-5.18/x86-entry-remove-skip_r11rcx.patch
new file mode 100644 (file)
index 0000000..563f180
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 6 May 2022 14:14:35 +0200
+Subject: x86/entry: Remove skip_r11rcx
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream.
+
+Yes, r11 and rcx have been restored previously, but since they're being
+popped anyway (into rsi) might as well pop them into their own regs --
+setting them to the value they already are.
+
+Less magical code.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h  |   10 +---------
+ arch/x86/entry/entry_64.S |    3 +--
+ 2 files changed, 2 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -119,27 +119,19 @@ For 32-bit we have the following convent
+       CLEAR_REGS
+ .endm
+-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
++.macro POP_REGS pop_rdi=1
+       popq %r15
+       popq %r14
+       popq %r13
+       popq %r12
+       popq %rbp
+       popq %rbx
+-      .if \skip_r11rcx
+-      popq %rsi
+-      .else
+       popq %r11
+-      .endif
+       popq %r10
+       popq %r9
+       popq %r8
+       popq %rax
+-      .if \skip_r11rcx
+-      popq %rsi
+-      .else
+       popq %rcx
+-      .endif
+       popq %rdx
+       popq %rsi
+       .if \pop_rdi
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+        * perf profiles. Nothing jumps here.
+        */
+ syscall_return_via_sysret:
+-      /* rcx and r11 are already restored (see code above) */
+-      POP_REGS pop_rdi=0 skip_r11rcx=1
++      POP_REGS pop_rdi=0
+       /*
+        * Now all regs are restored except RSP and RDI.
diff --git a/queue-5.18/x86-entry-switch-the-stack-after-error_entry-returns.patch b/queue-5.18/x86-entry-switch-the-stack-after-error_entry-returns.patch
new file mode 100644 (file)
index 0000000..8666458
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Date: Thu, 21 Apr 2022 22:10:49 +0800
+Subject: x86/entry: Switch the stack after error_entry() returns
+
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+
+commit 520a7e80c96d655fbe4650d9cc985bd9d0443389 upstream.
+
+error_entry() calls fixup_bad_iret() before sync_regs() if it is a fault
+from a bad IRET, to copy pt_regs to the kernel stack. It switches to the
+kernel stack directly after sync_regs().
+
+But error_entry() itself is also a function call, so it has to stash
+the address it is going to return to, in %r12 which is unnecessarily
+complicated.
+
+Move the stack switching after error_entry() and get rid of the need to
+handle the return address.
+
+  [ bp: Massage commit message. ]
+
+Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20220503032107.680190-3-jiangshanlai@gmail.com
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |   16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -326,6 +326,8 @@ SYM_CODE_END(ret_from_fork)
+ .macro idtentry_body cfunc has_error_code:req
+       call    error_entry
++      movq    %rax, %rsp                      /* switch to the task stack if from userspace */
++      ENCODE_FRAME_POINTER
+       UNWIND_HINT_REGS
+       movq    %rsp, %rdi                      /* pt_regs pointer into 1st argument*/
+@@ -1003,14 +1005,10 @@ SYM_CODE_START_LOCAL(error_entry)
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
+ .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+-      popq    %r12                            /* save return addr in %12 */
+-      movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    sync_regs
+-      movq    %rax, %rsp                      /* switch stack */
+-      ENCODE_FRAME_POINTER
+-      pushq   %r12
+       RET
+       /*
+@@ -1042,6 +1040,7 @@ SYM_CODE_START_LOCAL(error_entry)
+        */
+ .Lerror_entry_done_lfence:
+       FENCE_SWAPGS_KERNEL_ENTRY
++      leaq    8(%rsp), %rax                   /* return pt_regs pointer */
+       RET
+ .Lbstep_iret:
+@@ -1062,12 +1061,9 @@ SYM_CODE_START_LOCAL(error_entry)
+        * Pretend that the exception came from user mode: set up pt_regs
+        * as if we faulted immediately after IRET.
+        */
+-      popq    %r12                            /* save return addr in %12 */
+-      movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
++      leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
+       call    fixup_bad_iret
+-      mov     %rax, %rsp
+-      ENCODE_FRAME_POINTER
+-      pushq   %r12
++      mov     %rax, %rdi
+       jmp     .Lerror_entry_from_usermode_after_swapgs
+ SYM_CODE_END(error_entry)
diff --git a/queue-5.18/x86-ftrace-use-alternative-ret-encoding.patch b/queue-5.18/x86-ftrace-use-alternative-ret-encoding.patch
new file mode 100644 (file)
index 0000000..f1b3a8a
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:40 +0200
+Subject: x86/ftrace: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream.
+
+Use the return thunk in ftrace trampolines, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -303,7 +303,7 @@ union ftrace_op_code_union {
+       } __attribute__((packed));
+ };
+-#define RET_SIZE              1 + IS_ENABLED(CONFIG_SLS)
++#define RET_SIZE              (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -359,7 +359,10 @@ create_trampoline(struct ftrace_ops *ops
+               goto fail;
+       ip = trampoline + size;
+-      memcpy(ip, retq, RET_SIZE);
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++              __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
++      else
++              memcpy(ip, retq, sizeof(retq));
+       /* No need to test direct calls on created trampolines */
+       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
diff --git a/queue-5.18/x86-kexec-disable-ret-on-kexec.patch b/queue-5.18/x86-kexec-disable-ret-on-kexec.patch
new file mode 100644 (file)
index 0000000..0358d47
--- /dev/null
@@ -0,0 +1,173 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Fri, 8 Jul 2022 19:10:11 +0200
+Subject: x86/kexec: Disable RET on kexec
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream.
+
+All the invocations unroll to __x86_return_thunk and this file
+must be PIC independent.
+
+This fixes kexec on 64-bit AMD boxes.
+
+  [ bp: Fix 32-bit build. ]
+
+Reported-by: Edward Tran <edward.tran@oracle.com>
+Reported-by: Awais Tanveer <awais.tanveer@oracle.com>
+Suggested-by: Ankur Arora <ankur.a.arora@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/relocate_kernel_32.S |   25 +++++++++++++++++++------
+ arch/x86/kernel/relocate_kernel_64.S |   23 +++++++++++++++++------
+ 2 files changed, 36 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kernel/relocate_kernel_32.S
++++ b/arch/x86/kernel/relocate_kernel_32.S
+@@ -7,10 +7,12 @@
+ #include <linux/linkage.h>
+ #include <asm/page_types.h>
+ #include <asm/kexec.h>
++#include <asm/nospec-branch.h>
+ #include <asm/processor-flags.h>
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+  */
+ #define PTR(x) (x << 2)
+@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+       movl    %edi, %eax
+       addl    $(identity_mapped - relocate_kernel), %eax
+       pushl   %eax
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(relocate_kernel)
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       xorl    %edx, %edx
+       xorl    %esi, %esi
+       xorl    %ebp, %ebp
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ 1:
+       popl    %edx
+       movl    CP_PA_SWAP_PAGE(%edi), %esp
+       addl    $PAGE_SIZE, %esp
+ 2:
++      ANNOTATE_RETPOLINE_SAFE
+       call    *%edx
+       /* get the re-entry point of the peer system */
+@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       movl    %edi, %eax
+       addl    $(virtual_mapped - relocate_kernel), %eax
+       pushl   %eax
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(identity_mapped)
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(virtual_mapped)
+       /* Do the copies */
+@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(swap_pages)
+       .globl kexec_control_code_size
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -13,7 +13,8 @@
+ #include <asm/unwind_hints.h>
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+  */
+ #define PTR(x) (x << 3)
+@@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+       /* jump to identity mapped page */
+       addq    $(identity_mapped - relocate_kernel), %r8
+       pushq   %r8
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(relocate_kernel)
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       xorl    %r14d, %r14d
+       xorl    %r15d, %r15d
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ 1:
+       popq    %rdx
+@@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       call    swap_pages
+       movq    $virtual_mapped, %rax
+       pushq   %rax
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(identity_mapped)
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       popq    %r12
+       popq    %rbp
+       popq    %rbx
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(virtual_mapped)
+       /* Do the copies */
+@@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+       lea     PAGE_SIZE(%rax), %rsi
+       jmp     0b
+ 3:
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(swap_pages)
+       .globl kexec_control_code_size
diff --git a/queue-5.18/x86-kvm-fix-setcc-emulation-for-return-thunks.patch b/queue-5.18/x86-kvm-fix-setcc-emulation-for-return-thunks.patch
new file mode 100644 (file)
index 0000000..7741852
--- /dev/null
@@ -0,0 +1,93 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:42 +0200
+Subject: x86/kvm: Fix SETcc emulation for return thunks
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream.
+
+Prepare the SETcc fastop stuff for when RET can be larger still.
+
+The tricky bit here is that the expressions should not only be
+constant C expressions, but also absolute GAS expressions. This means
+no ?: and 'true' is ~0.
+
+Also ensure em_setcc() has the same alignment as the actual FOP_SETCC()
+ops, this ensures there cannot be an alignment hole between em_setcc()
+and the first op.
+
+Additionally, add a .skip directive to the FOP_SETCC() macro to fill
+any remaining space with INT3 traps; however the primary purpose of
+this directive is to generate AS warnings when the remaining space
+goes negative. Which is a very good indication the alignment magic
+went side-ways.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   28 +++++++++++++++-------------
+ 1 file changed, 15 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -325,13 +325,15 @@ static int fastop(struct x86_emulate_ctx
+ #define FOP_RET(name) \
+       __FOP_RET(#name)
+-#define FOP_START(op) \
++#define __FOP_START(op, align) \
+       extern void em_##op(struct fastop *fake); \
+       asm(".pushsection .text, \"ax\" \n\t" \
+           ".global em_" #op " \n\t" \
+-          ".align " __stringify(FASTOP_SIZE) " \n\t" \
++          ".align " __stringify(align) " \n\t" \
+           "em_" #op ":\n\t"
++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
++
+ #define FOP_END \
+           ".popsection")
+@@ -435,16 +437,15 @@ static int fastop(struct x86_emulate_ctx
+ /*
+  * Depending on .config the SETcc functions look like:
+  *
+- * ENDBR       [4 bytes; CONFIG_X86_KERNEL_IBT]
+- * SETcc %al   [3 bytes]
+- * RET         [1 byte]
+- * INT3        [1 byte; CONFIG_SLS]
+- *
+- * Which gives possible sizes 4, 5, 8 or 9.  When rounded up to the
+- * next power-of-two alignment they become 4, 8 or 16 resp.
++ * ENDBR                      [4 bytes; CONFIG_X86_KERNEL_IBT]
++ * SETcc %al                  [3 bytes]
++ * RET | JMP __x86_return_thunk       [1,5 bytes; CONFIG_RETPOLINE]
++ * INT3                               [1 byte; CONFIG_SLS]
+  */
+-#define SETCC_LENGTH  (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
+-#define SETCC_ALIGN   (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
++#define RET_LENGTH    (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
++                       IS_ENABLED(CONFIG_SLS))
++#define SETCC_LENGTH  (ENDBR_INSN_SIZE + 3 + RET_LENGTH)
++#define SETCC_ALIGN   (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
+ static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+ #define FOP_SETCC(op) \
+@@ -453,9 +454,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIG
+       #op ": \n\t" \
+       ASM_ENDBR \
+       #op " %al \n\t" \
+-      __FOP_RET(#op)
++      __FOP_RET(#op) \
++      ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
+-FOP_START(setcc)
++__FOP_START(setcc, SETCC_ALIGN)
+ FOP_SETCC(seto)
+ FOP_SETCC(setno)
+ FOP_SETCC(setc)
diff --git a/queue-5.18/x86-kvm-vmx-make-noinstr-clean.patch b/queue-5.18/x86-kvm-vmx-make-noinstr-clean.patch
new file mode 100644 (file)
index 0000000..961aa8d
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:32 +0200
+Subject: x86/kvm/vmx: Make noinstr clean
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream.
+
+The recent mmio_stale_data fixes broke the noinstr constraints:
+
+  vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section
+  vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section
+
+make it all happy again.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c   |    6 +++---
+ arch/x86/kvm/x86.c       |    4 ++--
+ include/linux/kvm_host.h |    2 +-
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -383,9 +383,9 @@ static __always_inline void vmx_disable_
+       if (!vmx->disable_fb_clear)
+               return;
+-      rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++      msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
+       msr |= FB_CLEAR_DIS;
+-      wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++      native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+       /* Cache the MSR value to avoid reading it later */
+       vmx->msr_ia32_mcu_opt_ctrl = msr;
+ }
+@@ -396,7 +396,7 @@ static __always_inline void vmx_enable_f
+               return;
+       vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+-      wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
++      native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ }
+ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -12531,9 +12531,9 @@ void kvm_arch_end_assignment(struct kvm
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+-bool kvm_arch_has_assigned_device(struct kvm *kvm)
++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+-      return atomic_read(&kvm->arch.assigned_device_count);
++      return arch_atomic_read(&kvm->arch.assigned_device_count);
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1511,7 +1511,7 @@ static inline void kvm_arch_end_assignme
+ {
+ }
+-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+       return false;
+ }
diff --git a/queue-5.18/x86-objtool-create-.return_sites.patch b/queue-5.18/x86-objtool-create-.return_sites.patch
new file mode 100644 (file)
index 0000000..edca0ba
--- /dev/null
@@ -0,0 +1,196 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:38 +0200
+Subject: x86,objtool: Create .return_sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream.
+
+Find all the return-thunk sites and record them in a .return_sites
+section such that the kernel can undo this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c         |    5 ++
+ tools/objtool/check.c                   |   74 ++++++++++++++++++++++++++++++++
+ tools/objtool/include/objtool/arch.h    |    1 
+ tools/objtool/include/objtool/elf.h     |    1 
+ tools/objtool/include/objtool/objtool.h |    1 
+ tools/objtool/objtool.c                 |    1 
+ 6 files changed, 83 insertions(+)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sy
+ {
+       return !strncmp(sym->name, "__x86_indirect_", 15);
+ }
++
++bool arch_is_rethunk(struct symbol *sym)
++{
++      return !strcmp(sym->name, "__x86_return_thunk");
++}
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -747,6 +747,52 @@ static int create_retpoline_sites_sectio
+       return 0;
+ }
++static int create_return_sites_sections(struct objtool_file *file)
++{
++      struct instruction *insn;
++      struct section *sec;
++      int idx;
++
++      sec = find_section_by_name(file->elf, ".return_sites");
++      if (sec) {
++              WARN("file already has .return_sites, skipping");
++              return 0;
++      }
++
++      idx = 0;
++      list_for_each_entry(insn, &file->return_thunk_list, call_node)
++              idx++;
++
++      if (!idx)
++              return 0;
++
++      sec = elf_create_section(file->elf, ".return_sites", 0,
++                               sizeof(int), idx);
++      if (!sec) {
++              WARN("elf_create_section: .return_sites");
++              return -1;
++      }
++
++      idx = 0;
++      list_for_each_entry(insn, &file->return_thunk_list, call_node) {
++
++              int *site = (int *)sec->data->d_buf + idx;
++              *site = 0;
++
++              if (elf_add_reloc_to_insn(file->elf, sec,
++                                        idx * sizeof(int),
++                                        R_X86_64_PC32,
++                                        insn->sec, insn->offset)) {
++                      WARN("elf_add_reloc_to_insn: .return_sites");
++                      return -1;
++              }
++
++              idx++;
++      }
++
++      return 0;
++}
++
+ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
+ {
+       struct instruction *insn;
+@@ -1081,6 +1127,11 @@ __weak bool arch_is_retpoline(struct sym
+       return false;
+ }
++__weak bool arch_is_rethunk(struct symbol *sym)
++{
++      return false;
++}
++
+ #define NEGATIVE_RELOC        ((void *)-1L)
+ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+@@ -1248,6 +1299,18 @@ static void add_retpoline_call(struct ob
+       annotate_call_site(file, insn, false);
+ }
++static void add_return_call(struct objtool_file *file, struct instruction *insn)
++{
++      /*
++       * Return thunk tail calls are really just returns in disguise,
++       * so convert them accordingly.
++       */
++      insn->type = INSN_RETURN;
++      insn->retpoline_safe = true;
++
++      list_add_tail(&insn->call_node, &file->return_thunk_list);
++}
++
+ static bool same_function(struct instruction *insn1, struct instruction *insn2)
+ {
+       return insn1->func->pfunc == insn2->func->pfunc;
+@@ -1300,6 +1363,9 @@ static int add_jump_destinations(struct
+               } else if (reloc->sym->retpoline_thunk) {
+                       add_retpoline_call(file, insn);
+                       continue;
++              } else if (reloc->sym->return_thunk) {
++                      add_return_call(file, insn);
++                      continue;
+               } else if (insn->func) {
+                       /*
+                        * External sibling call or internal sibling call with
+@@ -2182,6 +2248,9 @@ static int classify_symbols(struct objto
+                       if (arch_is_retpoline(func))
+                               func->retpoline_thunk = true;
++                      if (arch_is_rethunk(func))
++                              func->return_thunk = true;
++
+                       if (!strcmp(func->name, "__fentry__"))
+                               func->fentry = true;
+@@ -3935,6 +4004,11 @@ int check(struct objtool_file *file)
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
++
++              ret = create_return_sites_sections(file);
++              if (ret < 0)
++                      goto out;
++              warnings += ret;
+       }
+       if (mcount) {
+--- a/tools/objtool/include/objtool/arch.h
++++ b/tools/objtool/include/objtool/arch.h
+@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
+ int arch_decode_hint_reg(u8 sp_reg, int *base);
+ bool arch_is_retpoline(struct symbol *sym);
++bool arch_is_rethunk(struct symbol *sym);
+ int arch_rewrite_retpolines(struct objtool_file *file);
+--- a/tools/objtool/include/objtool/elf.h
++++ b/tools/objtool/include/objtool/elf.h
+@@ -57,6 +57,7 @@ struct symbol {
+       u8 uaccess_safe      : 1;
+       u8 static_call_tramp : 1;
+       u8 retpoline_thunk   : 1;
++      u8 return_thunk      : 1;
+       u8 fentry            : 1;
+       u8 profiling_func    : 1;
+       struct list_head pv_target;
+--- a/tools/objtool/include/objtool/objtool.h
++++ b/tools/objtool/include/objtool/objtool.h
+@@ -24,6 +24,7 @@ struct objtool_file {
+       struct list_head insn_list;
+       DECLARE_HASHTABLE(insn_hash, 20);
+       struct list_head retpoline_call_list;
++      struct list_head return_thunk_list;
+       struct list_head static_call_list;
+       struct list_head mcount_loc_list;
+       struct list_head endbr_list;
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(c
+       INIT_LIST_HEAD(&file.insn_list);
+       hash_init(file.insn_hash);
+       INIT_LIST_HEAD(&file.retpoline_call_list);
++      INIT_LIST_HEAD(&file.return_thunk_list);
+       INIT_LIST_HEAD(&file.static_call_list);
+       INIT_LIST_HEAD(&file.mcount_loc_list);
+       INIT_LIST_HEAD(&file.endbr_list);
diff --git a/queue-5.18/x86-retbleed-add-fine-grained-kconfig-knobs.patch b/queue-5.18/x86-retbleed-add-fine-grained-kconfig-knobs.patch
new file mode 100644 (file)
index 0000000..7e8d13e
--- /dev/null
@@ -0,0 +1,587 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 27 Jun 2022 22:21:17 +0000
+Subject: x86/retbleed: Add fine grained Kconfig knobs
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream.
+
+Do fine-grained Kconfig for all the various retbleed parts.
+
+NOTE: if your compiler doesn't support return thunks this will
+silently 'upgrade' your mitigation to IBPB, you might not like this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: there is no CONFIG_OBJTOOL]
+[cascardo: objtool calling and option parsing has changed]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig                         |  106 +++++++++++++++++++++++--------
+ arch/x86/Makefile                        |    8 +-
+ arch/x86/entry/calling.h                 |    4 +
+ arch/x86/include/asm/disabled-features.h |   18 ++++-
+ arch/x86/include/asm/linkage.h           |    4 -
+ arch/x86/include/asm/nospec-branch.h     |   10 ++
+ arch/x86/include/asm/static_call.h       |    2 
+ arch/x86/kernel/alternative.c            |    5 +
+ arch/x86/kernel/cpu/amd.c                |    2 
+ arch/x86/kernel/cpu/bugs.c               |   42 +++++++-----
+ arch/x86/kernel/static_call.c            |    2 
+ arch/x86/kvm/emulate.c                   |    4 -
+ arch/x86/lib/retpoline.S                 |    4 +
+ scripts/Makefile.build                   |    1 
+ scripts/link-vmlinux.sh                  |    2 
+ security/Kconfig                         |   11 ---
+ tools/objtool/builtin-check.c            |    3 
+ tools/objtool/check.c                    |    9 ++
+ tools/objtool/include/objtool/builtin.h  |    2 
+ 19 files changed, 170 insertions(+), 69 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -457,30 +457,6 @@ config GOLDFISH
+       def_bool y
+       depends on X86_GOLDFISH
+-config RETPOLINE
+-      bool "Avoid speculative indirect branches in kernel"
+-      default y
+-      help
+-        Compile kernel with the retpoline compiler options to guard against
+-        kernel-to-user data leaks by avoiding speculative indirect
+-        branches. Requires a compiler with -mindirect-branch=thunk-extern
+-        support for full protection. The kernel may run slower.
+-
+-config CC_HAS_SLS
+-      def_bool $(cc-option,-mharden-sls=all)
+-
+-config CC_HAS_RETURN_THUNK
+-      def_bool $(cc-option,-mfunction-return=thunk-extern)
+-
+-config SLS
+-      bool "Mitigate Straight-Line-Speculation"
+-      depends on CC_HAS_SLS && X86_64
+-      default n
+-      help
+-        Compile the kernel with straight-line-speculation options to guard
+-        against straight line speculation. The kernel image might be slightly
+-        larger.
+-
+ config X86_CPU_RESCTRL
+       bool "x86 CPU resource control support"
+       depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
+@@ -2452,6 +2428,88 @@ source "kernel/livepatch/Kconfig"
+ endmenu
++config CC_HAS_SLS
++      def_bool $(cc-option,-mharden-sls=all)
++
++config CC_HAS_RETURN_THUNK
++      def_bool $(cc-option,-mfunction-return=thunk-extern)
++
++menuconfig SPECULATION_MITIGATIONS
++      bool "Mitigations for speculative execution vulnerabilities"
++      default y
++      help
++        Say Y here to enable options which enable mitigations for
++        speculative execution hardware vulnerabilities.
++
++        If you say N, all mitigations will be disabled. You really
++        should know what you are doing to say so.
++
++if SPECULATION_MITIGATIONS
++
++config PAGE_TABLE_ISOLATION
++      bool "Remove the kernel mapping in user mode"
++      default y
++      depends on (X86_64 || X86_PAE)
++      help
++        This feature reduces the number of hardware side channels by
++        ensuring that the majority of kernel addresses are not mapped
++        into userspace.
++
++        See Documentation/x86/pti.rst for more details.
++
++config RETPOLINE
++      bool "Avoid speculative indirect branches in kernel"
++      default y
++      help
++        Compile kernel with the retpoline compiler options to guard against
++        kernel-to-user data leaks by avoiding speculative indirect
++        branches. Requires a compiler with -mindirect-branch=thunk-extern
++        support for full protection. The kernel may run slower.
++
++config RETHUNK
++      bool "Enable return-thunks"
++      depends on RETPOLINE && CC_HAS_RETURN_THUNK
++      default y
++      help
++        Compile the kernel with the return-thunks compiler option to guard
++        against kernel-to-user data leaks by avoiding return speculation.
++        Requires a compiler with -mfunction-return=thunk-extern
++        support for full protection. The kernel may run slower.
++
++config CPU_UNRET_ENTRY
++      bool "Enable UNRET on kernel entry"
++      depends on CPU_SUP_AMD && RETHUNK
++      default y
++      help
++        Compile the kernel with support for the retbleed=unret mitigation.
++
++config CPU_IBPB_ENTRY
++      bool "Enable IBPB on kernel entry"
++      depends on CPU_SUP_AMD
++      default y
++      help
++        Compile the kernel with support for the retbleed=ibpb mitigation.
++
++config CPU_IBRS_ENTRY
++      bool "Enable IBRS on kernel entry"
++      depends on CPU_SUP_INTEL
++      default y
++      help
++        Compile the kernel with support for the spectre_v2=ibrs mitigation.
++        This mitigates both spectre_v2 and retbleed at great cost to
++        performance.
++
++config SLS
++      bool "Mitigate Straight-Line-Speculation"
++      depends on CC_HAS_SLS && X86_64
++      default n
++      help
++        Compile the kernel with straight-line-speculation options to guard
++        against straight line speculation. The kernel image might be slightly
++        larger.
++
++endif
++
+ config ARCH_HAS_ADD_PAGES
+       def_bool y
+       depends on ARCH_ENABLE_MEMORY_HOTPLUG
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -15,14 +15,18 @@ endif
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS      := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ RETPOLINE_CFLAGS      += $(call cc-option,-mindirect-branch-cs-prefix)
+-RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS      := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
+-RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ endif
++
++ifdef CONFIG_RETHUNK
++RETHUNK_CFLAGS                := -mfunction-return=thunk-extern
++RETPOLINE_CFLAGS      += $(RETHUNK_CFLAGS)
++endif
++
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -296,6 +296,7 @@ For 32-bit we have the following convent
+  * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+  */
+ .macro IBRS_ENTER save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+       movl    $MSR_IA32_SPEC_CTRL, %ecx
+@@ -316,6 +317,7 @@ For 32-bit we have the following convent
+       shr     $32, %rdx
+       wrmsr
+ .Lend_\@:
++#endif
+ .endm
+ /*
+@@ -323,6 +325,7 @@ For 32-bit we have the following convent
+  * regs. Must be called after the last RET.
+  */
+ .macro IBRS_EXIT save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+       movl    $MSR_IA32_SPEC_CTRL, %ecx
+@@ -337,6 +340,7 @@ For 32-bit we have the following convent
+       shr     $32, %rdx
+       wrmsr
+ .Lend_\@:
++#endif
+ .endm
+ /*
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -60,9 +60,19 @@
+ # define DISABLE_RETPOLINE    0
+ #else
+ # define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+-                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
+-                               (1 << (X86_FEATURE_RETHUNK & 31)) | \
+-                               (1 << (X86_FEATURE_UNRET & 31)))
++                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
++#ifdef CONFIG_RETHUNK
++# define DISABLE_RETHUNK      0
++#else
++# define DISABLE_RETHUNK      (1 << (X86_FEATURE_RETHUNK & 31))
++#endif
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
++# define DISABLE_UNRET                0
++#else
++# define DISABLE_UNRET                (1 << (X86_FEATURE_UNRET & 31))
+ #endif
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+@@ -91,7 +101,7 @@
+ #define DISABLED_MASK8        0
+ #define DISABLED_MASK9        (DISABLE_SMAP|DISABLE_SGX)
+ #define DISABLED_MASK10       0
+-#define DISABLED_MASK11       (DISABLE_RETPOLINE)
++#define DISABLED_MASK11       (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+ #define DISABLED_MASK12       0
+ #define DISABLED_MASK13       0
+ #define DISABLED_MASK14       0
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -19,7 +19,7 @@
+ #define __ALIGN_STR   __stringify(__ALIGN)
+ #endif
+-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define RET   jmp __x86_return_thunk
+ #else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+@@ -31,7 +31,7 @@
+ #else /* __ASSEMBLY__ */
+-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define ASM_RET       "jmp __x86_return_thunk\n\t"
+ #else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -127,6 +127,12 @@
+ .Lskip_rsb_\@:
+ .endm
++#ifdef CONFIG_CPU_UNRET_ENTRY
++#define CALL_ZEN_UNTRAIN_RET  "call zen_untrain_ret"
++#else
++#define CALL_ZEN_UNTRAIN_RET  ""
++#endif
++
+ /*
+  * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+  * return thunk isn't mapped into the userspace tables (then again, AMD
+@@ -139,10 +145,10 @@
+  * where we have a stack but before any RET instruction.
+  */
+ .macro UNTRAIN_RET
+-#ifdef CONFIG_RETPOLINE
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+       ANNOTATE_UNRET_END
+       ALTERNATIVE_2 "",                                               \
+-                    "call zen_untrain_ret", X86_FEATURE_UNRET,        \
++                    CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
+                     "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -46,7 +46,7 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)                     \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+-#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_RETHUNK
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+ #else
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -508,6 +508,7 @@ void __init_or_module noinline apply_ret
+       }
+ }
++#ifdef CONFIG_RETHUNK
+ /*
+  * Rewrite the compiler generated return thunk tail-calls.
+  *
+@@ -569,6 +570,10 @@ void __init_or_module noinline apply_ret
+               }
+       }
+ }
++#else
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
++#endif /* CONFIG_RETHUNK */
++
+ #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -864,6 +864,7 @@ static void init_amd_bd(struct cpuinfo_x
+ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ {
++#ifdef CONFIG_CPU_UNRET_ENTRY
+       u64 value;
+       /*
+@@ -880,6 +881,7 @@ void init_spectral_chicken(struct cpuinf
+                       wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+               }
+       }
++#endif
+ }
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -835,7 +835,6 @@ static int __init retbleed_parse_cmdline
+ early_param("retbleed", retbleed_parse_cmdline);
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n"
+ #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+ static void __init retbleed_select_mitigation(void)
+@@ -850,18 +849,33 @@ static void __init retbleed_select_mitig
+               return;
+       case RETBLEED_CMD_UNRET:
+-              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
++                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              } else {
++                      pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
++                      goto do_cmd_auto;
++              }
+               break;
+       case RETBLEED_CMD_IBPB:
+-              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++                      retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              } else {
++                      pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
++                      goto do_cmd_auto;
++              }
+               break;
++do_cmd_auto:
+       case RETBLEED_CMD_AUTO:
+       default:
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+-                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+-                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++                      if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
++                              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                      else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY))
++                              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              }
+               /*
+                * The Intel mitigation (IBRS or eIBRS) was already selected in
+@@ -874,14 +888,6 @@ static void __init retbleed_select_mitig
+       switch (retbleed_mitigation) {
+       case RETBLEED_MITIGATION_UNRET:
+-
+-              if (!IS_ENABLED(CONFIG_RETPOLINE) ||
+-                  !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
+-                      pr_err(RETBLEED_COMPILER_MSG);
+-                      retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+-                      goto retbleed_force_ibpb;
+-              }
+-
+               setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+               setup_force_cpu_cap(X86_FEATURE_UNRET);
+@@ -893,7 +899,6 @@ static void __init retbleed_select_mitig
+               break;
+       case RETBLEED_MITIGATION_IBPB:
+-retbleed_force_ibpb:
+               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+               mitigate_smt = true;
+               break;
+@@ -1264,6 +1269,12 @@ static enum spectre_v2_mitigation_cmd __
+               return SPECTRE_V2_CMD_AUTO;
+       }
++      if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
++              pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
+       if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+               pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+                      mitigation_options[i].option);
+@@ -1321,7 +1332,8 @@ static void __init spectre_v2_select_mit
+                       break;
+               }
+-              if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++              if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
++                  boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+                   retbleed_cmd != RETBLEED_CMD_OFF &&
+                   boot_cpu_has(X86_FEATURE_IBRS) &&
+                   boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -126,7 +126,7 @@ void arch_static_call_transform(void *si
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
+-#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_RETHUNK
+ /*
+  * This is called by apply_returns() to fix up static call trampolines,
+  * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -439,10 +439,10 @@ static int fastop(struct x86_emulate_ctx
+  *
+  * ENDBR                      [4 bytes; CONFIG_X86_KERNEL_IBT]
+  * SETcc %al                  [3 bytes]
+- * RET | JMP __x86_return_thunk       [1,5 bytes; CONFIG_RETPOLINE]
++ * RET | JMP __x86_return_thunk       [1,5 bytes; CONFIG_RETHUNK]
+  * INT3                               [1 byte; CONFIG_SLS]
+  */
+-#define RET_LENGTH    (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
++#define RET_LENGTH    (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \
+                        IS_ENABLED(CONFIG_SLS))
+ #define SETCC_LENGTH  (ENDBR_INSN_SIZE + 3 + RET_LENGTH)
+ #define SETCC_ALIGN   (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -72,6 +72,8 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+  * This function name is magical and is used by -mfunction-return=thunk-extern
+  * for the compiler to generate JMPs to it.
+  */
++#ifdef CONFIG_RETHUNK
++
+       .section .text.__x86.return_thunk
+ /*
+@@ -136,3 +138,5 @@ SYM_FUNC_END(zen_untrain_ret)
+ __EXPORT_THUNK(zen_untrain_ret)
+ EXPORT_SYMBOL(__x86_return_thunk)
++
++#endif /* CONFIG_RETHUNK */
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -233,6 +233,7 @@ objtool_args =                                                             \
+       $(if $(CONFIG_FRAME_POINTER),, --no-fp)                         \
+       $(if $(CONFIG_GCOV_KERNEL), --no-unreachable)                   \
+       $(if $(CONFIG_RETPOLINE), --retpoline)                          \
++      $(if $(CONFIG_RETHUNK), --rethunk)                              \
+       $(if $(CONFIG_X86_SMAP), --uaccess)                             \
+       $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)             \
+       $(if $(CONFIG_SLS), --sls)
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -130,7 +130,7 @@ objtool_link()
+       if is_enabled CONFIG_VMLINUX_VALIDATION; then
+               objtoolopt="${objtoolopt} --noinstr"
+-              if is_enabled CONFIG_RETPOLINE; then
++              if is_enabled CONFIG_CPU_UNRET_ENTRY; then
+                       objtoolopt="${objtoolopt} --unret"
+               fi
+       fi
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -54,17 +54,6 @@ config SECURITY_NETWORK
+         implement socket and networking access controls.
+         If you are unsure how to answer this question, answer N.
+-config PAGE_TABLE_ISOLATION
+-      bool "Remove the kernel mapping in user mode"
+-      default y
+-      depends on (X86_64 || X86_PAE) && !UML
+-      help
+-        This feature reduces the number of hardware side channels by
+-        ensuring that the majority of kernel addresses are not mapped
+-        into userspace.
+-
+-        See Documentation/x86/pti.rst for more details.
+-
+ config SECURITY_INFINIBAND
+       bool "Infiniband Security Hooks"
+       depends on SECURITY && INFINIBAND
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -21,7 +21,7 @@
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+      lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
+-     ibt, unret;
++     ibt, unret, rethunk;
+ static const char * const check_usage[] = {
+       "objtool check [<options>] file.o",
+@@ -37,6 +37,7 @@ const struct option check_options[] = {
+       OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+       OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++      OPT_BOOLEAN(0,   "rethunk", &rethunk, "validate and annotate rethunk usage"),
+       OPT_BOOLEAN(0,   "unret", &unret, "validate entry unret placement"),
+       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+       OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -3854,8 +3854,11 @@ static int validate_retpoline(struct obj
+                       continue;
+               if (insn->type == INSN_RETURN) {
+-                      WARN_FUNC("'naked' return found in RETPOLINE build",
+-                                insn->sec, insn->offset);
++                      if (rethunk) {
++                              WARN_FUNC("'naked' return found in RETHUNK build",
++                                        insn->sec, insn->offset);
++                      } else
++                              continue;
+               } else {
+                       WARN_FUNC("indirect %s found in RETPOLINE build",
+                                 insn->sec, insn->offset,
+@@ -4228,7 +4231,9 @@ int check(struct objtool_file *file)
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
++      }
++      if (rethunk) {
+               ret = create_return_sites_sections(file);
+               if (ret < 0)
+                       goto out;
+--- a/tools/objtool/include/objtool/builtin.h
++++ b/tools/objtool/include/objtool/builtin.h
+@@ -10,7 +10,7 @@
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+           lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
+-          ibt, unret;
++          ibt, unret, rethunk;
+ extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
diff --git a/queue-5.18/x86-retpoline-cleanup-some-ifdefery.patch b/queue-5.18/x86-retpoline-cleanup-some-ifdefery.patch
new file mode 100644 (file)
index 0000000..505aa72
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:34 +0200
+Subject: x86/retpoline: Cleanup some #ifdefery
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream.
+
+On it's own not much of a cleanup but it prepares for more/similar
+code.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/disabled-features.h |    9 ++++++++-
+ arch/x86/include/asm/nospec-branch.h     |    7 +++----
+ arch/x86/net/bpf_jit_comp.c              |    7 +++----
+ 3 files changed, 14 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -56,6 +56,13 @@
+ # define DISABLE_PTI          (1 << (X86_FEATURE_PTI & 31))
+ #endif
++#ifdef CONFIG_RETPOLINE
++# define DISABLE_RETPOLINE    0
++#else
++# define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
++                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+ # define DISABLE_ENQCMD               0
+ #else
+@@ -82,7 +89,7 @@
+ #define DISABLED_MASK8        0
+ #define DISABLED_MASK9        (DISABLE_SMAP|DISABLE_SGX)
+ #define DISABLED_MASK10       0
+-#define DISABLED_MASK11       0
++#define DISABLED_MASK11       (DISABLE_RETPOLINE)
+ #define DISABLED_MASK12       0
+ #define DISABLED_MASK13       0
+ #define DISABLED_MASK14       0
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -120,17 +120,16 @@
+       _ASM_PTR " 999b\n\t"                                    \
+       ".popsection\n\t"
+-#ifdef CONFIG_RETPOLINE
+-
+ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
++extern retpoline_thunk_t __x86_indirect_thunk_array[];
++
++#ifdef CONFIG_RETPOLINE
+ #define GEN(reg) \
+       extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
+-extern retpoline_thunk_t __x86_indirect_thunk_array[];
+-
+ #ifdef CONFIG_X86_64
+ /*
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -407,16 +407,15 @@ static void emit_indirect_jump(u8 **ppro
+ {
+       u8 *prog = *pprog;
+-#ifdef CONFIG_RETPOLINE
+       if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+               EMIT_LFENCE();
+               EMIT2(0xFF, 0xE0 + reg);
+       } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+               OPTIMIZER_HIDE_VAR(reg);
+               emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
+-      } else
+-#endif
+-      EMIT2(0xFF, 0xE0 + reg);
++      } else {
++              EMIT2(0xFF, 0xE0 + reg);
++      }
+       *pprog = prog;
+ }
diff --git a/queue-5.18/x86-retpoline-swizzle-retpoline-thunk.patch b/queue-5.18/x86-retpoline-swizzle-retpoline-thunk.patch
new file mode 100644 (file)
index 0000000..2d871e2
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:35 +0200
+Subject: x86/retpoline: Swizzle retpoline thunk
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 00e1533325fd1fb5459229fe37f235462649f668 upstream.
+
+Put the actual retpoline thunk as the original code so that it can
+become more complicated. Specifically, it allows RET to be a JMP,
+which can't be .altinstr_replacement since that doesn't do relocations
+(except for the very first instruction).
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\re
+       UNWIND_HINT_EMPTY
+       ANNOTATE_NOENDBR
+-      ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+-                    __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+-                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
++      ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
++                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
++                    __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
+ .endm
diff --git a/queue-5.18/x86-retpoline-use-mfunction-return.patch b/queue-5.18/x86-retpoline-use-mfunction-return.patch
new file mode 100644 (file)
index 0000000..f91c68a
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:36 +0200
+Subject: x86/retpoline: Use -mfunction-return
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream.
+
+Utilize -mfunction-return=thunk-extern when available to have the
+compiler replace RET instructions with direct JMPs to the symbol
+__x86_return_thunk. This does not affect assembler (.S) sources, only C
+sources.
+
+-mfunction-return=thunk-extern has been available since gcc 7.3 and
+clang 15.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Makefile                    |    2 ++
+ arch/x86/include/asm/nospec-branch.h |    2 ++
+ arch/x86/lib/retpoline.S             |   13 +++++++++++++
+ 3 files changed, 17 insertions(+)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -15,11 +15,13 @@ endif
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS      := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ RETPOLINE_CFLAGS      += $(call cc-option,-mindirect-branch-cs-prefix)
++RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS      := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
++RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ endif
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -123,6 +123,8 @@
+ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+ extern retpoline_thunk_t __x86_indirect_thunk_array[];
++extern void __x86_return_thunk(void);
++
+ #ifdef CONFIG_RETPOLINE
+ #define GEN(reg) \
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -67,3 +67,16 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
++
++/*
++ * This function name is magical and is used by -mfunction-return=thunk-extern
++ * for the compiler to generate JMPs to it.
++ */
++SYM_CODE_START(__x86_return_thunk)
++      UNWIND_HINT_EMPTY
++      ANNOTATE_NOENDBR
++      ret
++      int3
++SYM_CODE_END(__x86_return_thunk)
++
++__EXPORT_THUNK(__x86_return_thunk)
diff --git a/queue-5.18/x86-sev-avoid-using-__x86_return_thunk.patch b/queue-5.18/x86-sev-avoid-using-__x86_return_thunk.patch
new file mode 100644 (file)
index 0000000..c632af7
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 14 Jun 2022 23:15:44 +0200
+Subject: x86/sev: Avoid using __x86_return_thunk
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream.
+
+Specifically, it's because __enc_copy() encrypts the kernel after
+being relocated outside the kernel in sme_encrypt_execute(), and the
+RET macro's jmp offset isn't amended prior to execution.
+
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/mem_encrypt_boot.S |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute)
+       movq    %rbp, %rsp              /* Restore original stack pointer */
+       pop     %rbp
+-      RET
++      /* Offset to __x86_return_thunk would be wrong here */
++      ret
++      int3
+ SYM_FUNC_END(sme_encrypt_execute)
+ SYM_FUNC_START(__enc_copy)
+@@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy)
+       pop     %r12
+       pop     %r15
+-      RET
++      /* Offset to __x86_return_thunk would be wrong here */
++      ret
++      int3
+ .L__enc_copy_end:
+ SYM_FUNC_END(__enc_copy)
diff --git a/queue-5.18/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch b/queue-5.18/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
new file mode 100644 (file)
index 0000000..d23695a
--- /dev/null
@@ -0,0 +1,208 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 14 Jun 2022 23:15:55 +0200
+Subject: x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream.
+
+Extend spectre_v2= boot option with Kernel IBRS.
+
+  [jpoimboe: no STIBP with IBRS]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    1 
+ arch/x86/include/asm/nospec-branch.h            |    1 
+ arch/x86/kernel/cpu/bugs.c                      |   66 ++++++++++++++++++------
+ 3 files changed, 54 insertions(+), 14 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5503,6 +5503,7 @@
+                       eibrs             - enhanced IBRS
+                       eibrs,retpoline   - enhanced IBRS + Retpolines
+                       eibrs,lfence      - enhanced IBRS + LFENCE
++                      ibrs              - use IBRS to protect kernel
+                       Not specifying this option is equivalent to
+                       spectre_v2=auto.
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -211,6 +211,7 @@ enum spectre_v2_mitigation {
+       SPECTRE_V2_EIBRS,
+       SPECTRE_V2_EIBRS_RETPOLINE,
+       SPECTRE_V2_EIBRS_LFENCE,
++      SPECTRE_V2_IBRS,
+ };
+ /* The indirect branch speculation control variants */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -965,6 +965,7 @@ enum spectre_v2_mitigation_cmd {
+       SPECTRE_V2_CMD_EIBRS,
+       SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+       SPECTRE_V2_CMD_EIBRS_LFENCE,
++      SPECTRE_V2_CMD_IBRS,
+ };
+ enum spectre_v2_user_cmd {
+@@ -1037,11 +1038,12 @@ spectre_v2_parse_user_cmdline(enum spect
+       return SPECTRE_V2_USER_CMD_AUTO;
+ }
+-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+ {
+-      return (mode == SPECTRE_V2_EIBRS ||
+-              mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+-              mode == SPECTRE_V2_EIBRS_LFENCE);
++      return mode == SPECTRE_V2_IBRS ||
++             mode == SPECTRE_V2_EIBRS ||
++             mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++             mode == SPECTRE_V2_EIBRS_LFENCE;
+ }
+ static void __init
+@@ -1106,12 +1108,12 @@ spectre_v2_user_select_mitigation(enum s
+       }
+       /*
+-       * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
+-       * required.
++       * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
++       * STIBP is not required.
+        */
+       if (!boot_cpu_has(X86_FEATURE_STIBP) ||
+           !smt_possible ||
+-          spectre_v2_in_eibrs_mode(spectre_v2_enabled))
++          spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+               return;
+       /*
+@@ -1143,6 +1145,7 @@ static const char * const spectre_v2_str
+       [SPECTRE_V2_EIBRS]                      = "Mitigation: Enhanced IBRS",
+       [SPECTRE_V2_EIBRS_LFENCE]               = "Mitigation: Enhanced IBRS + LFENCE",
+       [SPECTRE_V2_EIBRS_RETPOLINE]            = "Mitigation: Enhanced IBRS + Retpolines",
++      [SPECTRE_V2_IBRS]                       = "Mitigation: IBRS",
+ };
+ static const struct {
+@@ -1160,6 +1163,7 @@ static const struct {
+       { "eibrs,lfence",       SPECTRE_V2_CMD_EIBRS_LFENCE,      false },
+       { "eibrs,retpoline",    SPECTRE_V2_CMD_EIBRS_RETPOLINE,   false },
+       { "auto",               SPECTRE_V2_CMD_AUTO,              false },
++      { "ibrs",               SPECTRE_V2_CMD_IBRS,              false },
+ };
+ static void __init spec_v2_print_cond(const char *reason, bool secure)
+@@ -1222,6 +1226,24 @@ static enum spectre_v2_mitigation_cmd __
+               return SPECTRE_V2_CMD_AUTO;
+       }
++      if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
++              pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
++      if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
++              pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
++      if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
++              pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
+       spec_v2_print_cond(mitigation_options[i].option,
+                          mitigation_options[i].secure);
+       return cmd;
+@@ -1261,6 +1283,14 @@ static void __init spectre_v2_select_mit
+                       break;
+               }
++              if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++                  retbleed_cmd != RETBLEED_CMD_OFF &&
++                  boot_cpu_has(X86_FEATURE_IBRS) &&
++                  boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++                      mode = SPECTRE_V2_IBRS;
++                      break;
++              }
++
+               mode = spectre_v2_select_retpoline();
+               break;
+@@ -1277,6 +1307,10 @@ static void __init spectre_v2_select_mit
+               mode = spectre_v2_select_retpoline();
+               break;
++      case SPECTRE_V2_CMD_IBRS:
++              mode = SPECTRE_V2_IBRS;
++              break;
++
+       case SPECTRE_V2_CMD_EIBRS:
+               mode = SPECTRE_V2_EIBRS;
+               break;
+@@ -1293,7 +1327,7 @@ static void __init spectre_v2_select_mit
+       if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+               pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+-      if (spectre_v2_in_eibrs_mode(mode)) {
++      if (spectre_v2_in_ibrs_mode(mode)) {
+               /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+               write_spec_ctrl_current(x86_spec_ctrl_base, true);
+@@ -1304,6 +1338,10 @@ static void __init spectre_v2_select_mit
+       case SPECTRE_V2_EIBRS:
+               break;
++      case SPECTRE_V2_IBRS:
++              setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
++              break;
++
+       case SPECTRE_V2_LFENCE:
+       case SPECTRE_V2_EIBRS_LFENCE:
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+@@ -1330,17 +1368,17 @@ static void __init spectre_v2_select_mit
+       pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+       /*
+-       * Retpoline means the kernel is safe because it has no indirect
+-       * branches. Enhanced IBRS protects firmware too, so, enable restricted
+-       * speculation around firmware calls only when Enhanced IBRS isn't
+-       * supported.
++       * Retpoline protects the kernel, but doesn't protect firmware.  IBRS
++       * and Enhanced IBRS protect firmware too, so enable IBRS around
++       * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
++       * enabled.
+        *
+        * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
+        * the user might select retpoline on the kernel command line and if
+        * the CPU supports Enhanced IBRS, kernel might un-intentionally not
+        * enable IBRS around firmware calls.
+        */
+-      if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
++      if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+               setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+               pr_info("Enabling Restricted Speculation for firmware calls\n");
+       }
+@@ -2082,7 +2120,7 @@ static ssize_t mmio_stale_data_show_stat
+ static char *stibp_state(void)
+ {
+-      if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
++      if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+               return "";
+       switch (spectre_v2_user_stibp) {
diff --git a/queue-5.18/x86-speculation-disable-rrsba-behavior.patch b/queue-5.18/x86-speculation-disable-rrsba-behavior.patch
new file mode 100644 (file)
index 0000000..2b73268
--- /dev/null
@@ -0,0 +1,153 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Fri, 8 Jul 2022 13:36:09 -0700
+Subject: x86/speculation: Disable RRSBA behavior
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream.
+
+Some Intel processors may use alternate predictors for RETs on
+RSB-underflow. This condition may be vulnerable to Branch History
+Injection (BHI) and intramode-BTI.
+
+Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines,
+eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against
+such attacks. However, on RSB-underflow, RET target prediction may
+fallback to alternate predictors. As a result, RET's predicted target
+may get influenced by branch history.
+
+A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback
+behavior when in kernel mode. When set, RETs will not take predictions
+from alternate predictors, hence mitigating RETs as well. Support for
+this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2).
+
+For spectre v2 mitigation, when a user selects a mitigation that
+protects indirect CALLs and JMPs against BHI and intramode-BTI, set
+RRSBA_DIS_S also to protect RETs for RSB-underflow case.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h     |    2 +-
+ arch/x86/include/asm/msr-index.h       |    9 +++++++++
+ arch/x86/kernel/cpu/bugs.c             |   26 ++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/scattered.c        |    1 +
+ tools/arch/x86/include/asm/msr-index.h |    9 +++++++++
+ 5 files changed, 46 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -296,7 +296,7 @@
+ #define X86_FEATURE_SGX1              (11*32+ 8) /* "" Basic SGX */
+ #define X86_FEATURE_SGX2              (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+ #define X86_FEATURE_ENTRY_IBPB                (11*32+10) /* "" Issue an IBPB on kernel entry */
+-/* FREE!                              (11*32+11) */
++#define X86_FEATURE_RRSBA_CTRL                (11*32+11) /* "" RET prediction control */
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ #define X86_FEATURE_RETHUNK           (11*32+14) /* "" Use REturn THUNK */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP                       BIT(SPEC_CTRL_STIBP_SHIFT)      /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT          2          /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD                        BIT(SPEC_CTRL_SSBD_SHIFT)       /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT   6          /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S         BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+ #define MSR_IA32_PRED_CMD             0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB                 BIT(0)     /* Indirect Branch Prediction Barrier */
+@@ -139,6 +141,13 @@
+                                                * bit available to control VERW
+                                                * behavior.
+                                                */
++#define ARCH_CAP_RRSBA                        BIT(19) /*
++                                               * Indicates RET may use predictors
++                                               * other than the RSB. With eIBRS
++                                               * enabled predictions in kernel mode
++                                               * are restricted to targets in
++                                               * kernel.
++                                               */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1311,6 +1311,22 @@ static enum spectre_v2_mitigation __init
+       return SPECTRE_V2_RETPOLINE;
+ }
++/* Disable in-kernel use of non-RSB RET predictors */
++static void __init spec_ctrl_disable_kernel_rrsba(void)
++{
++      u64 ia32_cap;
++
++      if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
++              return;
++
++      ia32_cap = x86_read_arch_cap_msr();
++
++      if (ia32_cap & ARCH_CAP_RRSBA) {
++              x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
++              write_spec_ctrl_current(x86_spec_ctrl_base, true);
++      }
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+       enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -1405,6 +1421,16 @@ static void __init spectre_v2_select_mit
+               break;
+       }
++      /*
++       * Disable alternate RSB predictions in kernel when indirect CALLs and
++       * JMPs gets protection against BHI and Intramode-BTI, but RET
++       * prediction from a non-RSB predictor is still a risk.
++       */
++      if (mode == SPECTRE_V2_EIBRS_LFENCE ||
++          mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++          mode == SPECTRE_V2_RETPOLINE)
++              spec_ctrl_disable_kernel_rrsba();
++
+       spectre_v2_enabled = mode;
+       pr_info("%s\n", spectre_v2_strings[mode]);
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits
+       { X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
+       { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
+       { X86_FEATURE_INTEL_PPIN,       CPUID_EBX,  0, 0x00000007, 1 },
++      { X86_FEATURE_RRSBA_CTRL,       CPUID_EDX,  2, 0x00000007, 2 },
+       { X86_FEATURE_CQM_LLC,          CPUID_EDX,  1, 0x0000000f, 0 },
+       { X86_FEATURE_CQM_OCCUP_LLC,    CPUID_EDX,  0, 0x0000000f, 1 },
+       { X86_FEATURE_CQM_MBM_TOTAL,    CPUID_EDX,  1, 0x0000000f, 1 },
+--- a/tools/arch/x86/include/asm/msr-index.h
++++ b/tools/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP                       BIT(SPEC_CTRL_STIBP_SHIFT)      /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT          2          /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD                        BIT(SPEC_CTRL_SSBD_SHIFT)       /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT   6          /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S         BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+ #define MSR_IA32_PRED_CMD             0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB                 BIT(0)     /* Indirect Branch Prediction Barrier */
+@@ -138,6 +140,13 @@
+                                                * bit available to control VERW
+                                                * behavior.
+                                                */
++#define ARCH_CAP_RRSBA                        BIT(19) /*
++                                               * Indicates RET may use predictors
++                                               * other than the RSB. With eIBRS
++                                               * enabled predictions in kernel mode
++                                               * are restricted to targets in
++                                               * kernel.
++                                               */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
diff --git a/queue-5.18/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch b/queue-5.18/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
new file mode 100644 (file)
index 0000000..bc68ab4
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:15 +0200
+Subject: x86/speculation: Fill RSB on vmexit for IBRS
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream.
+
+Prevent RSB underflow/poisoning attacks with RSB.  While at it, add a
+bunch of comments to attempt to document the current state of tribal
+knowledge about RSB attacks and what exactly is being mitigated.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    2 -
+ arch/x86/kernel/cpu/bugs.c         |   63 ++++++++++++++++++++++++++++++++++---
+ arch/x86/kvm/vmx/vmenter.S         |    6 +--
+ 3 files changed, 62 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -204,7 +204,7 @@
+ /* FREE!                                ( 7*32+10) */
+ #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+ #define X86_FEATURE_KERNEL_IBRS               ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+-/* FREE!                              ( 7*32+13) */
++#define X86_FEATURE_RSB_VMEXIT                ( 7*32+13) /* "" Fill RSB on VM-Exit */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL     ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1394,17 +1394,70 @@ static void __init spectre_v2_select_mit
+       pr_info("%s\n", spectre_v2_strings[mode]);
+       /*
+-       * If spectre v2 protection has been enabled, unconditionally fill
+-       * RSB during a context switch; this protects against two independent
+-       * issues:
++       * If Spectre v2 protection has been enabled, fill the RSB during a
++       * context switch.  In general there are two types of RSB attacks
++       * across context switches, for which the CALLs/RETs may be unbalanced.
+        *
+-       *      - RSB underflow (and switch to BTB) on Skylake+
+-       *      - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
++       * 1) RSB underflow
++       *
++       *    Some Intel parts have "bottomless RSB".  When the RSB is empty,
++       *    speculated return targets may come from the branch predictor,
++       *    which could have a user-poisoned BTB or BHB entry.
++       *
++       *    AMD has it even worse: *all* returns are speculated from the BTB,
++       *    regardless of the state of the RSB.
++       *
++       *    When IBRS or eIBRS is enabled, the "user -> kernel" attack
++       *    scenario is mitigated by the IBRS branch prediction isolation
++       *    properties, so the RSB buffer filling wouldn't be necessary to
++       *    protect against this type of attack.
++       *
++       *    The "user -> user" attack scenario is mitigated by RSB filling.
++       *
++       * 2) Poisoned RSB entry
++       *
++       *    If the 'next' in-kernel return stack is shorter than 'prev',
++       *    'next' could be tricked into speculating with a user-poisoned RSB
++       *    entry.
++       *
++       *    The "user -> kernel" attack scenario is mitigated by SMEP and
++       *    eIBRS.
++       *
++       *    The "user -> user" scenario, also known as SpectreBHB, requires
++       *    RSB clearing.
++       *
++       * So to mitigate all cases, unconditionally fill RSB on context
++       * switches.
++       *
++       * FIXME: Is this pointless for retbleed-affected AMD?
+        */
+       setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+       pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+       /*
++       * Similar to context switches, there are two types of RSB attacks
++       * after vmexit:
++       *
++       * 1) RSB underflow
++       *
++       * 2) Poisoned RSB entry
++       *
++       * When retpoline is enabled, both are mitigated by filling/clearing
++       * the RSB.
++       *
++       * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
++       * prediction isolation protections, RSB still needs to be cleared
++       * because of #2.  Note that SMEP provides no protection here, unlike
++       * user-space-poisoned RSB entries.
++       *
++       * eIBRS, on the other hand, has RSB-poisoning protections, so it
++       * doesn't need RSB clearing after vmexit.
++       */
++      if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
++          boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
++              setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
++
++      /*
+        * Retpoline protects the kernel, but doesn't protect firmware.  IBRS
+        * and Enhanced IBRS protect firmware too, so enable IBRS around
+        * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -194,15 +194,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+        * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+        * the first unbalanced RET after vmexit!
+        *
+-       * For retpoline, RSB filling is needed to prevent poisoned RSB entries
+-       * and (in some cases) RSB underflow.
++       * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
++       * entries and (in some cases) RSB underflow.
+        *
+        * eIBRS has its own protection against poisoned RSB, so it doesn't
+        * need the RSB filling sequence.  But it does need to be enabled
+        * before the first unbalanced RET.
+          */
+-      FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++      FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
+       pop %_ASM_ARG2  /* @flags */
+       pop %_ASM_ARG1  /* @vmx */
diff --git a/queue-5.18/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch b/queue-5.18/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
new file mode 100644 (file)
index 0000000..4a4a2d8
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:06 +0200
+Subject: x86/speculation: Fix firmware entry SPEC_CTRL handling
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream.
+
+The firmware entry code may accidentally clear STIBP or SSBD. Fix that.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -285,18 +285,16 @@ extern u64 spec_ctrl_current(void);
+  */
+ #define firmware_restrict_branch_speculation_start()                  \
+ do {                                                                  \
+-      u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS;                  \
+-                                                                      \
+       preempt_disable();                                              \
+-      alternative_msr_write(MSR_IA32_SPEC_CTRL, val,                  \
++      alternative_msr_write(MSR_IA32_SPEC_CTRL,                       \
++                            spec_ctrl_current() | SPEC_CTRL_IBRS,     \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+ } while (0)
+ #define firmware_restrict_branch_speculation_end()                    \
+ do {                                                                  \
+-      u64 val = x86_spec_ctrl_base;                                   \
+-                                                                      \
+-      alternative_msr_write(MSR_IA32_SPEC_CTRL, val,                  \
++      alternative_msr_write(MSR_IA32_SPEC_CTRL,                       \
++                            spec_ctrl_current(),                      \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+       preempt_enable();                                               \
+ } while (0)
diff --git a/queue-5.18/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch b/queue-5.18/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
new file mode 100644 (file)
index 0000000..3cf787b
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:05 +0200
+Subject: x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream.
+
+If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants
+to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be
+silently disabled.
+
+There's nothing retpoline-specific about RSB buffer filling.  Remove the
+CONFIG_RETPOLINE guards around it.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S            |    2 --
+ arch/x86/entry/entry_64.S            |    2 --
+ arch/x86/include/asm/nospec-branch.h |    2 --
+ 3 files changed, 6 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm)
+       movl    %ebx, PER_CPU_VAR(__stack_chk_guard)
+ #endif
+-#ifdef CONFIG_RETPOLINE
+       /*
+        * When switching from a shallower to a deeper call stack
+        * the RSB may either underflow or use entries populated
+@@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm)
+        * speculative execution to prevent attack.
+        */
+       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+       /* Restore flags or the incoming task to restore AC state. */
+       popfl
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -250,7 +250,6 @@ SYM_FUNC_START(__switch_to_asm)
+       movq    %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+ #endif
+-#ifdef CONFIG_RETPOLINE
+       /*
+        * When switching from a shallower to a deeper call stack
+        * the RSB may either underflow or use entries populated
+@@ -259,7 +258,6 @@ SYM_FUNC_START(__switch_to_asm)
+        * speculative execution to prevent attack.
+        */
+       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+       /* restore callee-saved registers */
+       popq    %r15
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -122,11 +122,9 @@
+   * monstrosity above, manually.
+   */
+ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+-#ifdef CONFIG_RETPOLINE
+       ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+       __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+ .Lskip_rsb_\@:
+-#endif
+ .endm
+ /*
diff --git a/queue-5.18/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch b/queue-5.18/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
new file mode 100644 (file)
index 0000000..3058818
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:07 +0200
+Subject: x86/speculation: Fix SPEC_CTRL write on SMT state change
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream.
+
+If the SMT state changes, SSBD might get accidentally disabled.  Fix
+that.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1451,7 +1451,8 @@ static void __init spectre_v2_select_mit
+ static void update_stibp_msr(void * __unused)
+ {
+-      write_spec_ctrl_current(x86_spec_ctrl_base, true);
++      u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
++      write_spec_ctrl_current(val, true);
+ }
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
diff --git a/queue-5.18/x86-speculation-remove-x86_spec_ctrl_mask.patch b/queue-5.18/x86-speculation-remove-x86_spec_ctrl_mask.patch
new file mode 100644 (file)
index 0000000..f3bf85a
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Fri, 17 Jun 2022 12:12:48 -0700
+Subject: x86/speculation: Remove x86_spec_ctrl_mask
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream.
+
+This mask has been made redundant by kvm_spec_ctrl_test_value().  And it
+doesn't even work when MSR interception is disabled, as the guest can
+just write to SPEC_CTRL directly.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   31 +------------------------------
+ 1 file changed, 1 insertion(+), 30 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -86,12 +86,6 @@ u64 spec_ctrl_current(void)
+ EXPORT_SYMBOL_GPL(spec_ctrl_current);
+ /*
+- * The vendor and possibly platform specific bits which can be modified in
+- * x86_spec_ctrl_base.
+- */
+-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+-
+-/*
+  * AMD specific MSR info for Speculative Store Bypass control.
+  * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+  */
+@@ -146,10 +140,6 @@ void __init check_bugs(void)
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+               rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+-      /* Allow STIBP in MSR_SPEC_CTRL if supported */
+-      if (boot_cpu_has(X86_FEATURE_STIBP))
+-              x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+-
+       /* Select the proper CPU mitigations before patching alternatives: */
+       spectre_v1_select_mitigation();
+       spectre_v2_select_mitigation();
+@@ -208,19 +198,10 @@ void __init check_bugs(void)
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+-      u64 msrval, guestval, hostval = spec_ctrl_current();
++      u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+       struct thread_info *ti = current_thread_info();
+-      /* Is MSR_SPEC_CTRL implemented ? */
+       if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+-              /*
+-               * Restrict guest_spec_ctrl to supported values. Clear the
+-               * modifiable bits in the host base value and or the
+-               * modifiable bits from the guest value.
+-               */
+-              guestval = hostval & ~x86_spec_ctrl_mask;
+-              guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+-
+               if (hostval != guestval) {
+                       msrval = setguest ? guestval : hostval;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -1659,16 +1640,6 @@ static enum ssb_mitigation __init __ssb_
+       }
+       /*
+-       * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+-       * bit in the mask to allow guests to use the mitigation even in the
+-       * case where the host does not enable it.
+-       */
+-      if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+-          static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+-              x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+-      }
+-
+-      /*
+        * We have three CPU feature flags that are in play here:
+        *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+        *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
diff --git a/queue-5.18/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch b/queue-5.18/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
new file mode 100644 (file)
index 0000000..6c319ec
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:08 +0200
+Subject: x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream.
+
+There's no need to recalculate the host value for every entry/exit.
+Just use the cached value in spec_ctrl_current().
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -208,7 +208,7 @@ void __init check_bugs(void)
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+-      u64 msrval, guestval, hostval = x86_spec_ctrl_base;
++      u64 msrval, guestval, hostval = spec_ctrl_current();
+       struct thread_info *ti = current_thread_info();
+       /* Is MSR_SPEC_CTRL implemented ? */
+@@ -221,15 +221,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl,
+               guestval = hostval & ~x86_spec_ctrl_mask;
+               guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+-              /* SSBD controlled in MSR_SPEC_CTRL */
+-              if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+-                  static_cpu_has(X86_FEATURE_AMD_SSBD))
+-                      hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+-
+-              /* Conditional STIBP enabled? */
+-              if (static_branch_unlikely(&switch_to_cond_stibp))
+-                      hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+-
+               if (hostval != guestval) {
+                       msrval = setguest ? guestval : hostval;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -1390,7 +1381,6 @@ static void __init spectre_v2_select_mit
+               pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+       if (spectre_v2_in_ibrs_mode(mode)) {
+-              /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+               write_spec_ctrl_current(x86_spec_ctrl_base, true);
+       }
diff --git a/queue-5.18/x86-static_call-serialize-__static_call_fixup-properly.patch b/queue-5.18/x86-static_call-serialize-__static_call_fixup-properly.patch
new file mode 100644 (file)
index 0000000..e837d72
--- /dev/null
@@ -0,0 +1,73 @@
+From c27c753ea6fd1237f4f96abf8b623d7bab505513 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2022 14:01:06 +0200
+Subject: x86/static_call: Serialize __static_call_fixup() properly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream.
+
+__static_call_fixup() invokes __static_call_transform() without holding
+text_mutex, which causes lockdep to complain in text_poke_bp().
+
+Adding the proper locking cures that, but as this is either used during
+early boot or during module finalizing, it's not required to use
+text_poke_bp(). Add an argument to __static_call_transform() which tells
+it to use text_poke_early() for it.
+
+Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/static_call.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -25,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e
+ static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
+-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
++static void __ref __static_call_transform(void *insn, enum insn_type type,
++                                        void *func, bool modinit)
+ {
+       const void *emulate = NULL;
+       int size = CALL_INSN_SIZE;
+@@ -60,7 +61,7 @@ static void __ref __static_call_transfor
+       if (memcmp(insn, code, size) == 0)
+               return;
+-      if (unlikely(system_state == SYSTEM_BOOTING))
++      if (system_state == SYSTEM_BOOTING || modinit)
+               return text_poke_early(insn, code, size);
+       text_poke_bp(insn, code, size, emulate);
+@@ -114,12 +115,12 @@ void arch_static_call_transform(void *si
+       if (tramp) {
+               __static_call_validate(tramp, true, true);
+-              __static_call_transform(tramp, __sc_insn(!func, true), func);
++              __static_call_transform(tramp, __sc_insn(!func, true), func, false);
+       }
+       if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
+               __static_call_validate(site, tail, false);
+-              __static_call_transform(site, __sc_insn(!func, tail), func);
++              __static_call_transform(site, __sc_insn(!func, tail), func, false);
+       }
+       mutex_unlock(&text_mutex);
+@@ -145,8 +146,10 @@ bool __static_call_fixup(void *tramp, u8
+               return false;
+       }
++      mutex_lock(&text_mutex);
+       if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
+-              __static_call_transform(tramp, RET, NULL);
++              __static_call_transform(tramp, RET, NULL, true);
++      mutex_unlock(&text_mutex);
+       return true;
+ }
diff --git a/queue-5.18/x86-static_call-use-alternative-ret-encoding.patch b/queue-5.18/x86-static_call-use-alternative-ret-encoding.patch
new file mode 100644 (file)
index 0000000..b381729
--- /dev/null
@@ -0,0 +1,199 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:39 +0200
+Subject: x86,static_call: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream.
+
+In addition to teaching static_call about the new way to spell 'RET',
+there is an added complication in that static_call() is allowed to
+rewrite text before it is known which particular spelling is required.
+
+In order to deal with this; have a static_call specific fixup in the
+apply_return() 'alternative' patching routine that will rewrite the
+static_call trampoline to match the definite sequence.
+
+This in turn creates the problem of uniquely identifying static call
+trampolines. Currently trampolines are 8 bytes, the first 5 being the
+jmp.d32/ret sequence and the final 3 a byte sequence that spells out
+'SCT'.
+
+This sequence is used in __static_call_validate() to ensure it is
+patching a trampoline and not a random other jmp.d32. That is,
+false-positives shouldn't be plenty, but aren't a big concern.
+
+OTOH the new __static_call_fixup() must not have false-positives, and
+'SCT' decodes to the somewhat weird but semi plausible sequence:
+
+  push %rbx
+  rex.XB push %r12
+
+Additionally, there are SLS concerns with immediate jumps. Combined it
+seems like a good moment to change the signature to a single 3 byte
+trap instruction that is unique to this usage and will not ever get
+generated by accident.
+
+As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes
+to:
+
+  ud1 %esp, %ecx
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/static_call.h |   19 ++++++++++++++++-
+ arch/x86/kernel/alternative.c      |   12 +++++++----
+ arch/x86/kernel/static_call.c      |   40 +++++++++++++++++++++++++++++++++++--
+ 3 files changed, 64 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -21,6 +21,16 @@
+  * relative displacement across sections.
+  */
++/*
++ * The trampoline is 8 bytes and of the general form:
++ *
++ *   jmp.d32 \func
++ *   ud1 %esp, %ecx
++ *
++ * That trailing #UD provides both a speculation stop and serves as a unique
++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
++ * and __static_call_fixup().
++ */
+ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns)                  \
+       asm(".pushsection .static_call.text, \"ax\"             \n"     \
+           ".align 4                                           \n"     \
+@@ -28,7 +38,7 @@
+           STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
+           ANNOTATE_NOENDBR                                            \
+           insns "                                             \n"     \
+-          ".byte 0x53, 0x43, 0x54                             \n"     \
++          ".byte 0x0f, 0xb9, 0xcc                             \n"     \
+           ".type " STATIC_CALL_TRAMP_STR(name) ", @function   \n"     \
+           ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+           ".popsection                                        \n")
+@@ -36,8 +46,13 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)                     \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
++#ifdef CONFIG_RETPOLINE
++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
++      __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
++#else
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
++#endif
+ #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)                      \
+       ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
+@@ -48,4 +63,6 @@
+           ".long " STATIC_CALL_KEY_STR(name) " - .            \n"     \
+           ".popsection                                        \n")
++extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
++
+ #endif /* _ASM_STATIC_CALL_H */
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -539,18 +539,22 @@ void __init_or_module noinline apply_ret
+       s32 *s;
+       for (s = start; s < end; s++) {
+-              void *addr = (void *)s + *s;
++              void *dest = NULL, *addr = (void *)s + *s;
+               struct insn insn;
+               int len, ret;
+               u8 bytes[16];
+-              u8 op1;
++              u8 op;
+               ret = insn_decode_kernel(&insn, addr);
+               if (WARN_ON_ONCE(ret < 0))
+                       continue;
+-              op1 = insn.opcode.bytes[0];
+-              if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE))
++              op = insn.opcode.bytes[0];
++              if (op == JMP32_INSN_OPCODE)
++                      dest = addr + insn.length + insn.immediate.value;
++
++              if (__static_call_fixup(addr, op, dest) ||
++                  WARN_ON_ONCE(dest != &__x86_return_thunk))
+                       continue;
+               DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -12,6 +12,13 @@ enum insn_type {
+ };
+ /*
++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
++ * that there is no false-positive trampoline identification while also being a
++ * speculation stop.
++ */
++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
++
++/*
+  * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
+  */
+ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
+@@ -43,7 +50,10 @@ static void __ref __static_call_transfor
+               break;
+       case RET:
+-              code = &retinsn;
++              if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++                      code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
++              else
++                      code = &retinsn;
+               break;
+       }
+@@ -60,7 +70,7 @@ static void __static_call_validate(void
+ {
+       u8 opcode = *(u8 *)insn;
+-      if (tramp && memcmp(insn+5, "SCT", 3)) {
++      if (tramp && memcmp(insn+5, tramp_ud, 3)) {
+               pr_err("trampoline signature fail");
+               BUG();
+       }
+@@ -115,3 +125,29 @@ void arch_static_call_transform(void *si
+       mutex_unlock(&text_mutex);
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
++
++#ifdef CONFIG_RETPOLINE
++/*
++ * This is called by apply_returns() to fix up static call trampolines,
++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
++ * having a return trampoline.
++ *
++ * The problem is that static_call() is available before determining
++ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
++ *
++ * This means that __static_call_transform() above can have overwritten the
++ * return trampoline and we now need to fix things up to be consistent.
++ */
++bool __static_call_fixup(void *tramp, u8 op, void *dest)
++{
++      if (memcmp(tramp+5, tramp_ud, 3)) {
++              /* Not a trampoline site, not our problem. */
++              return false;
++      }
++
++      if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
++              __static_call_transform(tramp, RET, NULL);
++
++      return true;
++}
++#endif
diff --git a/queue-5.18/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch b/queue-5.18/x86-traps-use-pt_regs-directly-in-fixup_bad_iret.patch
new file mode 100644 (file)
index 0000000..5e2e48d
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Date: Thu, 21 Apr 2022 22:10:48 +0800
+Subject: x86/traps: Use pt_regs directly in fixup_bad_iret()
+
+From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+
+commit 0aca53c6b522f8d6e2681ca875acbbe105f5fdcf upstream.
+
+Always stash the address error_entry() is going to return to, in %r12
+and get rid of the void *error_entry_ret; slot in struct bad_iret_stack
+which was supposed to account for it and pt_regs pushed on the stack.
+
+After this, both fixup_bad_iret() and sync_regs() can work on a struct
+pt_regs pointer directly.
+
+  [ bp: Rewrite commit message, touch ups. ]
+
+Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20220503032107.680190-2-jiangshanlai@gmail.com
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S    |    5 ++++-
+ arch/x86/include/asm/traps.h |    2 +-
+ arch/x86/kernel/traps.c      |   19 +++++++------------
+ 3 files changed, 12 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1062,9 +1062,12 @@ SYM_CODE_START_LOCAL(error_entry)
+        * Pretend that the exception came from user mode: set up pt_regs
+        * as if we faulted immediately after IRET.
+        */
+-      mov     %rsp, %rdi
++      popq    %r12                            /* save return addr in %12 */
++      movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    fixup_bad_iret
+       mov     %rax, %rsp
++      ENCODE_FRAME_POINTER
++      pushq   %r12
+       jmp     .Lerror_entry_from_usermode_after_swapgs
+ SYM_CODE_END(error_entry)
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -13,7 +13,7 @@
+ #ifdef CONFIG_X86_64
+ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
+ asmlinkage __visible notrace
+-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
++struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
+ void __init trap_init(void);
+ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
+ #endif
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -892,14 +892,10 @@ sync:
+ }
+ #endif
+-struct bad_iret_stack {
+-      void *error_entry_ret;
+-      struct pt_regs regs;
+-};
+-
+-asmlinkage __visible noinstr
+-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
++asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
+ {
++      struct pt_regs tmp, *new_stack;
++
+       /*
+        * This is called from entry_64.S early in handling a fault
+        * caused by a bad iret to user mode.  To handle the fault
+@@ -908,19 +904,18 @@ struct bad_iret_stack *fixup_bad_iret(st
+        * just below the IRET frame) and we want to pretend that the
+        * exception came from the IRET target.
+        */
+-      struct bad_iret_stack tmp, *new_stack =
+-              (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
++      new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+       /* Copy the IRET target to the temporary storage. */
+-      __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
++      __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
+       /* Copy the remainder of the stack from the current stack. */
+-      __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
++      __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
+       /* Update the entry stack */
+       __memcpy(new_stack, &tmp, sizeof(tmp));
+-      BUG_ON(!user_mode(&new_stack->regs));
++      BUG_ON(!user_mode(new_stack));
+       return new_stack;
+ }
+ #endif
diff --git a/queue-5.18/x86-undo-return-thunk-damage.patch b/queue-5.18/x86-undo-return-thunk-damage.patch
new file mode 100644 (file)
index 0000000..9a0db25
--- /dev/null
@@ -0,0 +1,194 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:37 +0200
+Subject: x86: Undo return-thunk damage
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream.
+
+Introduce X86_FEATURE_RETHUNK for those afflicted with needing this.
+
+  [ bp: Do only INT3 padding - simpler. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h       |    1 
+ arch/x86/include/asm/cpufeatures.h       |    1 
+ arch/x86/include/asm/disabled-features.h |    3 +
+ arch/x86/kernel/alternative.c            |   60 +++++++++++++++++++++++++++++++
+ arch/x86/kernel/module.c                 |    8 +++-
+ arch/x86/kernel/vmlinux.lds.S            |    7 +++
+ 6 files changed, 78 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -76,6 +76,7 @@ extern int alternatives_patched;
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+ extern void apply_retpolines(s32 *start, s32 *end);
++extern void apply_returns(s32 *start, s32 *end);
+ extern void apply_ibt_endbr(s32 *start, s32 *end);
+ struct module;
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -299,6 +299,7 @@
+ /* FREE!                              (11*32+11) */
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
++#define X86_FEATURE_RETHUNK           (11*32+14) /* "" Use REturn THUNK */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI          (12*32+ 4) /* AVX VNNI instructions */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -60,7 +60,8 @@
+ # define DISABLE_RETPOLINE    0
+ #else
+ # define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+-                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
++                               (1 << (X86_FEATURE_RETHUNK & 31)))
+ #endif
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -115,6 +115,7 @@ static void __init_or_module add_nops(vo
+ }
+ extern s32 __retpoline_sites[], __retpoline_sites_end[];
++extern s32 __return_sites[], __return_sites_end[];
+ extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ extern s32 __smp_locks[], __smp_locks_end[];
+@@ -507,9 +508,67 @@ void __init_or_module noinline apply_ret
+       }
+ }
++/*
++ * Rewrite the compiler generated return thunk tail-calls.
++ *
++ * For example, convert:
++ *
++ *   JMP __x86_return_thunk
++ *
++ * into:
++ *
++ *   RET
++ */
++static int patch_return(void *addr, struct insn *insn, u8 *bytes)
++{
++      int i = 0;
++
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++              return -1;
++
++      bytes[i++] = RET_INSN_OPCODE;
++
++      for (; i < insn->length;)
++              bytes[i++] = INT3_INSN_OPCODE;
++
++      return i;
++}
++
++void __init_or_module noinline apply_returns(s32 *start, s32 *end)
++{
++      s32 *s;
++
++      for (s = start; s < end; s++) {
++              void *addr = (void *)s + *s;
++              struct insn insn;
++              int len, ret;
++              u8 bytes[16];
++              u8 op1;
++
++              ret = insn_decode_kernel(&insn, addr);
++              if (WARN_ON_ONCE(ret < 0))
++                      continue;
++
++              op1 = insn.opcode.bytes[0];
++              if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE))
++                      continue;
++
++              DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
++                      addr, addr, insn.length,
++                      addr + insn.length + insn.immediate.value);
++
++              len = patch_return(addr, &insn, bytes);
++              if (len == insn.length) {
++                      DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
++                      DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
++                      text_poke_early(addr, bytes, len);
++              }
++      }
++}
+ #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+ #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+@@ -860,6 +919,7 @@ void __init alternative_instructions(voi
+        * those can rewrite the retpoline thunks.
+        */
+       apply_retpolines(__retpoline_sites, __retpoline_sites_end);
++      apply_returns(__return_sites, __return_sites_end);
+       /*
+        * Then patch alternatives, such that those paravirt calls that are in
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr,
+ {
+       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+               *para = NULL, *orc = NULL, *orc_ip = NULL,
+-              *retpolines = NULL, *ibt_endbr = NULL;
++              *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
+       char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr,
+                       orc_ip = s;
+               if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
+                       retpolines = s;
++              if (!strcmp(".return_sites", secstrings + s->sh_name))
++                      returns = s;
+               if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
+                       ibt_endbr = s;
+       }
+@@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr,
+               void *rseg = (void *)retpolines->sh_addr;
+               apply_retpolines(rseg, rseg + retpolines->sh_size);
+       }
++      if (returns) {
++              void *rseg = (void *)returns->sh_addr;
++              apply_returns(rseg, rseg + returns->sh_size);
++      }
+       if (alt) {
+               /* patch .altinstructions */
+               void *aseg = (void *)alt->sh_addr;
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -283,6 +283,13 @@ SECTIONS
+               *(.retpoline_sites)
+               __retpoline_sites_end = .;
+       }
++
++      . = ALIGN(8);
++      .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
++              __return_sites = .;
++              *(.return_sites)
++              __return_sites_end = .;
++      }
+ #endif
+ #ifdef CONFIG_X86_KERNEL_IBT
diff --git a/queue-5.18/x86-use-return-thunk-in-asm-code.patch b/queue-5.18/x86-use-return-thunk-in-asm-code.patch
new file mode 100644 (file)
index 0000000..57ab157
--- /dev/null
@@ -0,0 +1,93 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:45 +0200
+Subject: x86: Use return-thunk in asm code
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream.
+
+Use the return thunk in asm code. If the thunk isn't needed, it will
+get patched into a RET instruction during boot by apply_returns().
+
+Since alternatives can't handle relocations outside of the first
+instruction, putting a 'jmp __x86_return_thunk' in one is not valid,
+therefore carve out the memmove ERMS path into a separate label and jump
+to it.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no RANDSTRUCT_CFLAGS]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vdso/Makefile   |    1 +
+ arch/x86/include/asm/linkage.h |    8 ++++++++
+ arch/x86/lib/memmove_64.S      |    7 ++++++-
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/vdso/Makefile
++++ b/arch/x86/entry/vdso/Makefile
+@@ -92,6 +92,7 @@ endif
+ endif
+ $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
+ #
+ # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -19,19 +19,27 @@
+ #define __ALIGN_STR   __stringify(__ALIGN)
+ #endif
++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define RET   jmp __x86_return_thunk
++#else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+ #define RET   ret; int3
+ #else
+ #define RET   ret
+ #endif
++#endif /* CONFIG_RETPOLINE */
+ #else /* __ASSEMBLY__ */
++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define ASM_RET       "jmp __x86_return_thunk\n\t"
++#else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+ #define ASM_RET       "ret; int3\n\t"
+ #else
+ #define ASM_RET       "ret\n\t"
+ #endif
++#endif /* CONFIG_RETPOLINE */
+ #endif /* __ASSEMBLY__ */
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove)
+       /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+       ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+-      ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
++      ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
+       /*
+        * movsq instruction have many startup latency
+@@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove)
+       movb %r11b, (%rdi)
+ 13:
+       RET
++
++.Lmemmove_erms:
++      movq %rdx, %rcx
++      rep movsb
++      RET
+ SYM_FUNC_END(__memmove)
+ EXPORT_SYMBOL(__memmove)
diff --git a/queue-5.18/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch b/queue-5.18/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
new file mode 100644 (file)
index 0000000..9d87435
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:43 +0200
+Subject: x86/vsyscall_emu/64: Don't use RET in vsyscall emulation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 15583e514eb16744b80be85dea0774ece153177d upstream.
+
+This is userspace code and doesn't play by the normal kernel rules.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vsyscall/vsyscall_emu_64.S |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+@@ -19,17 +19,20 @@ __vsyscall_page:
+       mov $__NR_gettimeofday, %rax
+       syscall
+-      RET
++      ret
++      int3
+       .balign 1024, 0xcc
+       mov $__NR_time, %rax
+       syscall
+-      RET
++      ret
++      int3
+       .balign 1024, 0xcc
+       mov $__NR_getcpu, %rax
+       syscall
+-      RET
++      ret
++      int3
+       .balign 4096, 0xcc
diff --git a/queue-5.18/x86-xen-add-untrain_ret.patch b/queue-5.18/x86-xen-add-untrain_ret.patch
new file mode 100644 (file)
index 0000000..ca884a1
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:01 +0200
+Subject: x86/xen: Add UNTRAIN_RET
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d147553b64bad34d2f92cb7d8ba454ae95c3baac upstream.
+
+Ensure the Xen entry also passes through UNTRAIN_RET.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -323,6 +323,12 @@ SYM_CODE_END(ret_from_fork)
+ #endif
+ .endm
++SYM_CODE_START_LOCAL(xen_error_entry)
++      UNWIND_HINT_FUNC
++      UNTRAIN_RET
++      RET
++SYM_CODE_END(xen_error_entry)
++
+ /**
+  * idtentry_body - Macro to emit code calling the C function
+  * @cfunc:            C function to be called
+@@ -342,7 +348,7 @@ SYM_CODE_END(ret_from_fork)
+        * switch the CR3.  So it can skip invoking error_entry().
+        */
+       ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+-              "", X86_FEATURE_XENPV
++                  "call xen_error_entry", X86_FEATURE_XENPV
+       ENCODE_FRAME_POINTER
+       UNWIND_HINT_REGS
diff --git a/queue-5.18/x86-xen-rename-sys-entry-points.patch b/queue-5.18/x86-xen-rename-sys-entry-points.patch
new file mode 100644 (file)
index 0000000..7d40572
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Tue Jul 12 05:03:58 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:00 +0200
+Subject: x86/xen: Rename SYS* entry points
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream.
+
+Native SYS{CALL,ENTER} entry points are called
+entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are
+named consistently.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/setup.c   |    6 +++---
+ arch/x86/xen/xen-asm.S |   20 ++++++++++----------
+ arch/x86/xen/xen-ops.h |    6 +++---
+ 3 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -918,7 +918,7 @@ void xen_enable_sysenter(void)
+       if (!boot_cpu_has(sysenter_feature))
+               return;
+-      ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
++      ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
+       if(ret != 0)
+               setup_clear_cpu_cap(sysenter_feature);
+ }
+@@ -927,7 +927,7 @@ void xen_enable_syscall(void)
+ {
+       int ret;
+-      ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
++      ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
+       if (ret != 0) {
+               printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
+               /* Pretty fatal; 64-bit userspace has no other
+@@ -936,7 +936,7 @@ void xen_enable_syscall(void)
+       if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
+               ret = register_callback(CALLBACKTYPE_syscall32,
+-                                      xen_syscall32_target);
++                                      xen_entry_SYSCALL_compat);
+               if (ret != 0)
+                       setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+       }
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -234,7 +234,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+  */
+ /* Normal 64-bit system call target */
+-SYM_CODE_START(xen_syscall_target)
++SYM_CODE_START(xen_entry_SYSCALL_64)
+       UNWIND_HINT_EMPTY
+       ENDBR
+       popq %rcx
+@@ -249,12 +249,12 @@ SYM_CODE_START(xen_syscall_target)
+       movq $__USER_CS, 1*8(%rsp)
+       jmp entry_SYSCALL_64_after_hwframe
+-SYM_CODE_END(xen_syscall_target)
++SYM_CODE_END(xen_entry_SYSCALL_64)
+ #ifdef CONFIG_IA32_EMULATION
+ /* 32-bit compat syscall target */
+-SYM_CODE_START(xen_syscall32_target)
++SYM_CODE_START(xen_entry_SYSCALL_compat)
+       UNWIND_HINT_EMPTY
+       ENDBR
+       popq %rcx
+@@ -269,10 +269,10 @@ SYM_CODE_START(xen_syscall32_target)
+       movq $__USER32_CS, 1*8(%rsp)
+       jmp entry_SYSCALL_compat_after_hwframe
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+ /* 32-bit compat sysenter target */
+-SYM_CODE_START(xen_sysenter_target)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
+       UNWIND_HINT_EMPTY
+       ENDBR
+       /*
+@@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target)
+       movq $__USER32_CS, 1*8(%rsp)
+       jmp entry_SYSENTER_compat_after_hwframe
+-SYM_CODE_END(xen_sysenter_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
+ #else /* !CONFIG_IA32_EMULATION */
+-SYM_CODE_START(xen_syscall32_target)
+-SYM_CODE_START(xen_sysenter_target)
++SYM_CODE_START(xen_entry_SYSCALL_compat)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
+       UNWIND_HINT_EMPTY
+       ENDBR
+       lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
+       mov $-ENOSYS, %rax
+       pushq $0
+       jmp hypercall_iret
+-SYM_CODE_END(xen_sysenter_target)
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+ #endif        /* CONFIG_IA32_EMULATION */
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -10,10 +10,10 @@
+ /* These are code, but not functions.  Defined in entry.S */
+ extern const char xen_failsafe_callback[];
+-void xen_sysenter_target(void);
++void xen_entry_SYSENTER_compat(void);
+ #ifdef CONFIG_X86_64
+-void xen_syscall_target(void);
+-void xen_syscall32_target(void);
++void xen_entry_SYSCALL_64(void);
++void xen_entry_SYSCALL_compat(void);
+ #endif
+ extern void *xen_initial_gdt;