]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Jul 2022 16:07:36 +0000 (18:07 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Jul 2022 16:07:36 +0000 (18:07 +0200)
added patches:
bpf-x86-respect-x86_feature_retpoline.patch
bpf-x86-simplify-computing-label-offsets.patch
crypto-x86-poly1305-fixup-sls.patch
intel_idle-disable-ibrs-during-long-idle.patch
kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch
kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch
kvm-vmx-convert-launched-argument-to-flags.patch
kvm-vmx-fix-ibrs-handling-after-vmexit.patch
kvm-vmx-flatten-__vmx_vcpu_run.patch
kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch
makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch
objtool-add-alt_group-struct.patch
objtool-add-elf_create_reloc-helper.patch
objtool-add-elf_create_undef_symbol.patch
objtool-add-entry-unret-validation.patch
objtool-add-straight-line-speculation-validation.patch
objtool-assume-only-elf-functions-do-sibling-calls.patch
objtool-cache-instruction-relocs.patch
objtool-classify-symbols.patch
objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch
objtool-correctly-handle-retpoline-thunk-calls.patch
objtool-create-reloc-sections-implicitly.patch
objtool-default-ignore-int3-for-unreachable.patch
objtool-don-t-make-.altinstructions-writable.patch
objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch
objtool-extract-elf_strtab_concat.patch
objtool-extract-elf_symbol_add.patch
objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch
objtool-fix-code-relocs-vs-weak-symbols.patch
objtool-fix-objtool-regression-on-x32-systems.patch
objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch
objtool-fix-symbol-creation.patch
objtool-fix-type-of-reloc-addend.patch
objtool-handle-__sanitize_cov-tail-calls.patch
objtool-handle-per-arch-retpoline-naming.patch
objtool-introduce-cfi-hash.patch
objtool-keep-track-of-retpoline-call-sites.patch
objtool-make-.altinstructions-section-entry-size-consistent.patch
objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch
objtool-print-out-the-symbol-type-when-complaining-about-it.patch
objtool-re-add-unwind_hint_-save_restore.patch
objtool-refactor-orc-section-generation.patch
objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch
objtool-rework-the-elf_rebuild_reloc_section-logic.patch
objtool-skip-magical-retpoline-.altinstr_replacement.patch
objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
objtool-support-asm-jump-tables.patch
objtool-support-retpoline-jump-detection-for-vmlinux.o.patch
objtool-support-stack-layout-changes-in-alternatives.patch
objtool-teach-get_alt_entry-about-more-relocation-types.patch
objtool-treat-.text.__x86.-as-noinstr.patch
objtool-update-retpoline-validation.patch
objtool-x86-ignore-__x86_indirect_alt_-symbols.patch
objtool-x86-replace-alternatives-with-.retpoline_sites.patch
objtool-x86-rewrite-retpoline-thunk-calls.patch
tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch
x86-add-insn_decode_kernel.patch
x86-add-magic-amd-return-thunk.patch
x86-add-straight-line-speculation-mitigation.patch
x86-alternative-add-debug-prints-to-apply_retpolines.patch
x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch
x86-alternative-implement-.retpoline_sites-support.patch
x86-alternative-merge-include-files.patch
x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch
x86-alternative-relax-text_poke_bp-constraint.patch
x86-alternative-support-alternative_ternary.patch
x86-alternative-support-not-feature.patch
x86-alternative-try-inline-spectre_v2-retpoline-amd.patch
x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch
x86-alternative-use-insn_decode.patch
x86-alternatives-optimize-optimize_nops.patch
x86-asm-fix-register-order.patch
x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch
x86-bpf-use-alternative-ret-encoding.patch
x86-bugs-add-amd-retbleed-boot-parameter.patch
x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
x86-bugs-add-retbleed-ibpb.patch
x86-bugs-do-ibpb-fallback-check-only-once.patch
x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
x86-bugs-enable-stibp-for-jmp2ret.patch
x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
x86-bugs-optimize-spec_ctrl-msr-writes.patch
x86-bugs-report-amd-retbleed-vulnerability.patch
x86-bugs-report-intel-retbleed-vulnerability.patch
x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
x86-common-stamp-out-the-stepping-madness.patch
x86-cpu-amd-add-spectral-chicken.patch
x86-cpu-amd-enumerate-btc_no.patch
x86-cpufeatures-move-retpoline-flags-to-word-11.patch
x86-entry-add-kernel-ibrs-implementation.patch
x86-entry-remove-skip_r11rcx.patch
x86-ftrace-use-alternative-ret-encoding.patch
x86-insn-add-a-__ignore_sync_check__-marker.patch
x86-insn-add-an-insn_decode-api.patch
x86-insn-eval-handle-return-values-from-the-decoder.patch
x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch
x86-kexec-disable-ret-on-kexec.patch
x86-kvm-fix-setcc-emulation-for-return-thunks.patch
x86-kvm-vmx-make-noinstr-clean.patch
x86-lib-atomic64_386_32-rename-things.patch
x86-objtool-create-.return_sites.patch
x86-prepare-asm-files-for-straight-line-speculation.patch
x86-prepare-inline-asm-for-straight-line-speculation.patch
x86-realmode-build-with-d__disable_exports.patch
x86-retbleed-add-fine-grained-kconfig-knobs.patch
x86-retpoline-cleanup-some-ifdefery.patch
x86-retpoline-create-a-retpoline-thunk-array.patch
x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch
x86-retpoline-remove-unused-replacement-symbols.patch
x86-retpoline-simplify-retpolines.patch
x86-retpoline-swizzle-retpoline-thunk.patch
x86-retpoline-use-mfunction-return.patch
x86-sev-avoid-using-__x86_return_thunk.patch
x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
x86-speculation-disable-rrsba-behavior.patch
x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
x86-speculation-remove-x86_spec_ctrl_mask.patch
x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
x86-static_call-serialize-__static_call_fixup-properly.patch
x86-static_call-use-alternative-ret-encoding.patch
x86-undo-return-thunk-damage.patch
x86-use-return-thunk-in-asm-code.patch
x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
x86-xen-rename-sys-entry-points.patch
x86-xen-support-objtool-validation-in-xen-asm.s.patch
x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch

131 files changed:
queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch [new file with mode: 0644]
queue-5.10/bpf-x86-simplify-computing-label-offsets.patch [new file with mode: 0644]
queue-5.10/crypto-x86-poly1305-fixup-sls.patch [new file with mode: 0644]
queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch [new file with mode: 0644]
queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch [new file with mode: 0644]
queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch [new file with mode: 0644]
queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch [new file with mode: 0644]
queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch [new file with mode: 0644]
queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch [new file with mode: 0644]
queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch [new file with mode: 0644]
queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch [new file with mode: 0644]
queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch [new file with mode: 0644]
queue-5.10/objtool-add-alt_group-struct.patch [new file with mode: 0644]
queue-5.10/objtool-add-elf_create_reloc-helper.patch [new file with mode: 0644]
queue-5.10/objtool-add-elf_create_undef_symbol.patch [new file with mode: 0644]
queue-5.10/objtool-add-entry-unret-validation.patch [new file with mode: 0644]
queue-5.10/objtool-add-straight-line-speculation-validation.patch [new file with mode: 0644]
queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch [new file with mode: 0644]
queue-5.10/objtool-cache-instruction-relocs.patch [new file with mode: 0644]
queue-5.10/objtool-classify-symbols.patch [new file with mode: 0644]
queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch [new file with mode: 0644]
queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch [new file with mode: 0644]
queue-5.10/objtool-create-reloc-sections-implicitly.patch [new file with mode: 0644]
queue-5.10/objtool-default-ignore-int3-for-unreachable.patch [new file with mode: 0644]
queue-5.10/objtool-don-t-make-.altinstructions-writable.patch [new file with mode: 0644]
queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch [new file with mode: 0644]
queue-5.10/objtool-extract-elf_strtab_concat.patch [new file with mode: 0644]
queue-5.10/objtool-extract-elf_symbol_add.patch [new file with mode: 0644]
queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch [new file with mode: 0644]
queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch [new file with mode: 0644]
queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch [new file with mode: 0644]
queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch [new file with mode: 0644]
queue-5.10/objtool-fix-symbol-creation.patch [new file with mode: 0644]
queue-5.10/objtool-fix-type-of-reloc-addend.patch [new file with mode: 0644]
queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch [new file with mode: 0644]
queue-5.10/objtool-handle-per-arch-retpoline-naming.patch [new file with mode: 0644]
queue-5.10/objtool-introduce-cfi-hash.patch [new file with mode: 0644]
queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch [new file with mode: 0644]
queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch [new file with mode: 0644]
queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch [new file with mode: 0644]
queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch [new file with mode: 0644]
queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch [new file with mode: 0644]
queue-5.10/objtool-refactor-orc-section-generation.patch [new file with mode: 0644]
queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch [new file with mode: 0644]
queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch [new file with mode: 0644]
queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch [new file with mode: 0644]
queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch [new file with mode: 0644]
queue-5.10/objtool-support-asm-jump-tables.patch [new file with mode: 0644]
queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch [new file with mode: 0644]
queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch [new file with mode: 0644]
queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch [new file with mode: 0644]
queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch [new file with mode: 0644]
queue-5.10/objtool-update-retpoline-validation.patch [new file with mode: 0644]
queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch [new file with mode: 0644]
queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch [new file with mode: 0644]
queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch [new file with mode: 0644]
queue-5.10/series [new file with mode: 0644]
queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch [new file with mode: 0644]
queue-5.10/x86-add-insn_decode_kernel.patch [new file with mode: 0644]
queue-5.10/x86-add-magic-amd-return-thunk.patch [new file with mode: 0644]
queue-5.10/x86-add-straight-line-speculation-mitigation.patch [new file with mode: 0644]
queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch [new file with mode: 0644]
queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch [new file with mode: 0644]
queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch [new file with mode: 0644]
queue-5.10/x86-alternative-merge-include-files.patch [new file with mode: 0644]
queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch [new file with mode: 0644]
queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch [new file with mode: 0644]
queue-5.10/x86-alternative-support-alternative_ternary.patch [new file with mode: 0644]
queue-5.10/x86-alternative-support-not-feature.patch [new file with mode: 0644]
queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch [new file with mode: 0644]
queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch [new file with mode: 0644]
queue-5.10/x86-alternative-use-insn_decode.patch [new file with mode: 0644]
queue-5.10/x86-alternatives-optimize-optimize_nops.patch [new file with mode: 0644]
queue-5.10/x86-asm-fix-register-order.patch [new file with mode: 0644]
queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch [new file with mode: 0644]
queue-5.10/x86-bpf-use-alternative-ret-encoding.patch [new file with mode: 0644]
queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch [new file with mode: 0644]
queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch [new file with mode: 0644]
queue-5.10/x86-bugs-add-retbleed-ibpb.patch [new file with mode: 0644]
queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch [new file with mode: 0644]
queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch [new file with mode: 0644]
queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch [new file with mode: 0644]
queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch [new file with mode: 0644]
queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch [new file with mode: 0644]
queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch [new file with mode: 0644]
queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch [new file with mode: 0644]
queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch [new file with mode: 0644]
queue-5.10/x86-common-stamp-out-the-stepping-madness.patch [new file with mode: 0644]
queue-5.10/x86-cpu-amd-add-spectral-chicken.patch [new file with mode: 0644]
queue-5.10/x86-cpu-amd-enumerate-btc_no.patch [new file with mode: 0644]
queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch [new file with mode: 0644]
queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch [new file with mode: 0644]
queue-5.10/x86-entry-remove-skip_r11rcx.patch [new file with mode: 0644]
queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch [new file with mode: 0644]
queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch [new file with mode: 0644]
queue-5.10/x86-insn-add-an-insn_decode-api.patch [new file with mode: 0644]
queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch [new file with mode: 0644]
queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch [new file with mode: 0644]
queue-5.10/x86-kexec-disable-ret-on-kexec.patch [new file with mode: 0644]
queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch [new file with mode: 0644]
queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch [new file with mode: 0644]
queue-5.10/x86-lib-atomic64_386_32-rename-things.patch [new file with mode: 0644]
queue-5.10/x86-objtool-create-.return_sites.patch [new file with mode: 0644]
queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch [new file with mode: 0644]
queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch [new file with mode: 0644]
queue-5.10/x86-realmode-build-with-d__disable_exports.patch [new file with mode: 0644]
queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-simplify-retpolines.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch [new file with mode: 0644]
queue-5.10/x86-retpoline-use-mfunction-return.patch [new file with mode: 0644]
queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch [new file with mode: 0644]
queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch [new file with mode: 0644]
queue-5.10/x86-speculation-disable-rrsba-behavior.patch [new file with mode: 0644]
queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch [new file with mode: 0644]
queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch [new file with mode: 0644]
queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch [new file with mode: 0644]
queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch [new file with mode: 0644]
queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch [new file with mode: 0644]
queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch [new file with mode: 0644]
queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch [new file with mode: 0644]
queue-5.10/x86-static_call-use-alternative-ret-encoding.patch [new file with mode: 0644]
queue-5.10/x86-undo-return-thunk-damage.patch [new file with mode: 0644]
queue-5.10/x86-use-return-thunk-in-asm-code.patch [new file with mode: 0644]
queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch [new file with mode: 0644]
queue-5.10/x86-xen-rename-sys-entry-points.patch [new file with mode: 0644]
queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch [new file with mode: 0644]
queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch [new file with mode: 0644]

diff --git a/queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch b/queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch
new file mode 100644 (file)
index 0000000..64ff53e
--- /dev/null
@@ -0,0 +1,253 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:48 +0200
+Subject: bpf,x86: Respect X86_FEATURE_RETPOLINE*
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 87c87ecd00c54ecd677798cb49ef27329e0fab41 upstream.
+
+Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and
+unconditionally emits a thunk call, this is sub-optimal and doesn't
+match the regular, compiler generated, code.
+
+Update the i386 JIT to emit code equal to what the compiler emits for
+the regular kernel text (IOW. a plain THUNK call).
+
+Update the x86_64 JIT to emit code similar to the result of compiler
+and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags.
+Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg),
+while doing a THUNK call for RETPOLINE.
+
+This removes the hard-coded retpoline thunks and shrinks the generated
+code. Leaving a single retpoline thunk definition in the kernel.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.614772675@infradead.org
+[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: add the necessary cnt variable to
+ emit_indirect_jump()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |   59 -----------------------------------
+ arch/x86/net/bpf_jit_comp.c          |   49 +++++++++++++----------------
+ arch/x86/net/bpf_jit_comp32.c        |   22 +++++++++++--
+ 3 files changed, 42 insertions(+), 88 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -318,63 +318,4 @@ static inline void mds_idle_clear_cpu_bu
+ #endif /* __ASSEMBLY__ */
+-/*
+- * Below is used in the eBPF JIT compiler and emits the byte sequence
+- * for the following assembly:
+- *
+- * With retpolines configured:
+- *
+- *    callq do_rop
+- *  spec_trap:
+- *    pause
+- *    lfence
+- *    jmp spec_trap
+- *  do_rop:
+- *    mov %rcx,(%rsp) for x86_64
+- *    mov %edx,(%esp) for x86_32
+- *    retq
+- *
+- * Without retpolines configured:
+- *
+- *    jmp *%rcx for x86_64
+- *    jmp *%edx for x86_32
+- */
+-#ifdef CONFIG_RETPOLINE
+-# ifdef CONFIG_X86_64
+-#  define RETPOLINE_RCX_BPF_JIT_SIZE  17
+-#  define RETPOLINE_RCX_BPF_JIT()                             \
+-do {                                                          \
+-      EMIT1_off32(0xE8, 7);    /* callq do_rop */             \
+-      /* spec_trap: */                                        \
+-      EMIT2(0xF3, 0x90);       /* pause */                    \
+-      EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
+-      EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
+-      /* do_rop: */                                           \
+-      EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */    \
+-      EMIT1(0xC3);             /* retq */                     \
+-} while (0)
+-# else /* !CONFIG_X86_64 */
+-#  define RETPOLINE_EDX_BPF_JIT()                             \
+-do {                                                          \
+-      EMIT1_off32(0xE8, 7);    /* call do_rop */              \
+-      /* spec_trap: */                                        \
+-      EMIT2(0xF3, 0x90);       /* pause */                    \
+-      EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
+-      EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
+-      /* do_rop: */                                           \
+-      EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */          \
+-      EMIT1(0xC3);             /* ret */                      \
+-} while (0)
+-# endif
+-#else /* !CONFIG_RETPOLINE */
+-# ifdef CONFIG_X86_64
+-#  define RETPOLINE_RCX_BPF_JIT_SIZE  2
+-#  define RETPOLINE_RCX_BPF_JIT()                             \
+-      EMIT2(0xFF, 0xE1);       /* jmp *%rcx */
+-# else /* !CONFIG_X86_64 */
+-#  define RETPOLINE_EDX_BPF_JIT()                             \
+-      EMIT2(0xFF, 0xE2)        /* jmp *%edx */
+-# endif
+-#endif
+-
+ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -379,6 +379,26 @@ int bpf_arch_text_poke(void *ip, enum bp
+       return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ }
++#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
++
++static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
++{
++      u8 *prog = *pprog;
++      int cnt = 0;
++
++#ifdef CONFIG_RETPOLINE
++      if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
++              EMIT_LFENCE();
++              EMIT2(0xFF, 0xE0 + reg);
++      } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
++              emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
++      } else
++#endif
++      EMIT2(0xFF, 0xE0 + reg);
++
++      *pprog = prog;
++}
++
+ /*
+  * Generate the following code:
+  *
+@@ -460,7 +480,7 @@ static void emit_bpf_tail_call_indirect(
+        * rdi == ctx (1st arg)
+        * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
+        */
+-      RETPOLINE_RCX_BPF_JIT();
++      emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
+       /* out: */
+       ctx->tail_call_indirect_label = prog - start;
+@@ -1099,8 +1119,7 @@ static int do_jit(struct bpf_prog *bpf_p
+                       /* speculation barrier */
+               case BPF_ST | BPF_NOSPEC:
+                       if (boot_cpu_has(X86_FEATURE_XMM2))
+-                              /* Emit 'lfence' */
+-                              EMIT3(0x0F, 0xAE, 0xE8);
++                              EMIT_LFENCE();
+                       break;
+                       /* ST: *(u8*)(dst_reg + off) = imm */
+@@ -1878,26 +1897,6 @@ cleanup:
+       return ret;
+ }
+-static int emit_fallback_jump(u8 **pprog)
+-{
+-      u8 *prog = *pprog;
+-      int err = 0;
+-
+-#ifdef CONFIG_RETPOLINE
+-      /* Note that this assumes the the compiler uses external
+-       * thunks for indirect calls. Both clang and GCC use the same
+-       * naming convention for external thunks.
+-       */
+-      err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
+-#else
+-      int cnt = 0;
+-
+-      EMIT2(0xFF, 0xE2);      /* jmp rdx */
+-#endif
+-      *pprog = prog;
+-      return err;
+-}
+-
+ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
+ {
+       u8 *jg_reloc, *prog = *pprog;
+@@ -1919,9 +1918,7 @@ static int emit_bpf_dispatcher(u8 **ppro
+               if (err)
+                       return err;
+-              err = emit_fallback_jump(&prog);        /* jmp thunk/indirect */
+-              if (err)
+-                      return err;
++              emit_indirect_jump(&prog, 2 /* rdx */, prog);
+               *pprog = prog;
+               return 0;
+--- a/arch/x86/net/bpf_jit_comp32.c
++++ b/arch/x86/net/bpf_jit_comp32.c
+@@ -15,6 +15,7 @@
+ #include <asm/cacheflush.h>
+ #include <asm/set_memory.h>
+ #include <asm/nospec-branch.h>
++#include <asm/asm-prototypes.h>
+ #include <linux/bpf.h>
+ /*
+@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u3
+       *pprog = prog;
+ }
++static int emit_jmp_edx(u8 **pprog, u8 *ip)
++{
++      u8 *prog = *pprog;
++      int cnt = 0;
++
++#ifdef CONFIG_RETPOLINE
++      EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
++#else
++      EMIT2(0xFF, 0xE2);
++#endif
++      *pprog = prog;
++
++      return cnt;
++}
++
+ /*
+  * Generate the following code:
+  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u3
+  *   goto *(prog->bpf_func + prologue_size);
+  * out:
+  */
+-static void emit_bpf_tail_call(u8 **pprog)
++static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
+ {
+       u8 *prog = *pprog;
+       int cnt = 0;
+@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **ppro
+        * eax == ctx (1st arg)
+        * edx == prog->bpf_func + prologue_size
+        */
+-      RETPOLINE_EDX_BPF_JIT();
++      cnt += emit_jmp_edx(&prog, ip + cnt);
+       if (jmp_label1 == -1)
+               jmp_label1 = cnt;
+@@ -1929,7 +1945,7 @@ static int do_jit(struct bpf_prog *bpf_p
+                       break;
+               }
+               case BPF_JMP | BPF_TAIL_CALL:
+-                      emit_bpf_tail_call(&prog);
++                      emit_bpf_tail_call(&prog, image + addrs[i - 1]);
+                       break;
+               /* cond jump */
diff --git a/queue-5.10/bpf-x86-simplify-computing-label-offsets.patch b/queue-5.10/bpf-x86-simplify-computing-label-offsets.patch
new file mode 100644 (file)
index 0000000..7f05f0f
--- /dev/null
@@ -0,0 +1,261 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:47 +0200
+Subject: bpf,x86: Simplify computing label offsets
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit dceba0817ca329868a15e2e1dd46eb6340b69206 upstream.
+
+Take an idea from the 32bit JIT, which uses the multi-pass nature of
+the JIT to compute the instruction offsets on a prior pass in order to
+compute the relative jump offsets on a later pass.
+
+Application to the x86_64 JIT is slightly more involved because the
+offsets depend on program variables (such as callee_regs_used and
+stack_depth) and hence the computed offsets need to be kept in the
+context of the JIT.
+
+This removes, IMO quite fragile, code that hard-codes the offsets and
+tries to compute the length of variable parts of it.
+
+Convert both emit_bpf_tail_call_*() functions which have an out: label
+at the end. Additionally emit_bpt_tail_call_direct() also has a poke
+table entry, for which it computes the offset from the end (and thus
+already relies on the previous pass to have computed addrs[i]), also
+convert this to be a forward based offset.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.552304864@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: keep the cnt variable in
+ emit_bpf_tail_call_{,in}direct()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |  125 ++++++++++++++------------------------------
+ 1 file changed, 42 insertions(+), 83 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -212,6 +212,14 @@ static void jit_fill_hole(void *area, un
+ struct jit_context {
+       int cleanup_addr; /* Epilogue code offset */
++
++      /*
++       * Program specific offsets of labels in the code; these rely on the
++       * JIT doing at least 2 passes, recording the position on the first
++       * pass, only to generate the correct offset on the second pass.
++       */
++      int tail_call_direct_label;
++      int tail_call_indirect_label;
+ };
+ /* Maximum number of bytes emitted while JITing one eBPF insn */
+@@ -371,22 +379,6 @@ int bpf_arch_text_poke(void *ip, enum bp
+       return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ }
+-static int get_pop_bytes(bool *callee_regs_used)
+-{
+-      int bytes = 0;
+-
+-      if (callee_regs_used[3])
+-              bytes += 2;
+-      if (callee_regs_used[2])
+-              bytes += 2;
+-      if (callee_regs_used[1])
+-              bytes += 2;
+-      if (callee_regs_used[0])
+-              bytes += 1;
+-
+-      return bytes;
+-}
+-
+ /*
+  * Generate the following code:
+  *
+@@ -402,30 +394,12 @@ static int get_pop_bytes(bool *callee_re
+  * out:
+  */
+ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+-                                      u32 stack_depth)
++                                      u32 stack_depth, u8 *ip,
++                                      struct jit_context *ctx)
+ {
+       int tcc_off = -4 - round_up(stack_depth, 8);
+-      u8 *prog = *pprog;
+-      int pop_bytes = 0;
+-      int off1 = 42;
+-      int off2 = 31;
+-      int off3 = 9;
+-      int cnt = 0;
+-
+-      /* count the additional bytes used for popping callee regs from stack
+-       * that need to be taken into account for each of the offsets that
+-       * are used for bailing out of the tail call
+-       */
+-      pop_bytes = get_pop_bytes(callee_regs_used);
+-      off1 += pop_bytes;
+-      off2 += pop_bytes;
+-      off3 += pop_bytes;
+-
+-      if (stack_depth) {
+-              off1 += 7;
+-              off2 += 7;
+-              off3 += 7;
+-      }
++      u8 *prog = *pprog, *start = *pprog;
++      int cnt = 0, offset;
+       /*
+        * rdi - pointer to ctx
+@@ -440,8 +414,9 @@ static void emit_bpf_tail_call_indirect(
+       EMIT2(0x89, 0xD2);                        /* mov edx, edx */
+       EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
+             offsetof(struct bpf_array, map.max_entries));
+-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
+-      EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
++
++      offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++      EMIT2(X86_JBE, offset);                   /* jbe out */
+       /*
+        * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+@@ -449,8 +424,9 @@ static void emit_bpf_tail_call_indirect(
+        */
+       EMIT2_off32(0x8B, 0x85, tcc_off);         /* mov eax, dword ptr [rbp - tcc_off] */
+       EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
+-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
+-      EMIT2(X86_JA, OFFSET2);                   /* ja out */
++
++      offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++      EMIT2(X86_JA, offset);                    /* ja out */
+       EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
+       EMIT2_off32(0x89, 0x85, tcc_off);         /* mov dword ptr [rbp - tcc_off], eax */
+@@ -463,12 +439,11 @@ static void emit_bpf_tail_call_indirect(
+        *      goto out;
+        */
+       EMIT3(0x48, 0x85, 0xC9);                  /* test rcx,rcx */
+-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
+-      EMIT2(X86_JE, OFFSET3);                   /* je out */
+-      *pprog = prog;
+-      pop_callee_regs(pprog, callee_regs_used);
+-      prog = *pprog;
++      offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++      EMIT2(X86_JE, offset);                    /* je out */
++
++      pop_callee_regs(&prog, callee_regs_used);
+       EMIT1(0x58);                              /* pop rax */
+       if (stack_depth)
+@@ -488,39 +463,18 @@ static void emit_bpf_tail_call_indirect(
+       RETPOLINE_RCX_BPF_JIT();
+       /* out: */
++      ctx->tail_call_indirect_label = prog - start;
+       *pprog = prog;
+ }
+ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+-                                    u8 **pprog, int addr, u8 *image,
+-                                    bool *callee_regs_used, u32 stack_depth)
++                                    u8 **pprog, u8 *ip,
++                                    bool *callee_regs_used, u32 stack_depth,
++                                    struct jit_context *ctx)
+ {
+       int tcc_off = -4 - round_up(stack_depth, 8);
+-      u8 *prog = *pprog;
+-      int pop_bytes = 0;
+-      int off1 = 20;
+-      int poke_off;
+-      int cnt = 0;
+-
+-      /* count the additional bytes used for popping callee regs to stack
+-       * that need to be taken into account for jump offset that is used for
+-       * bailing out from of the tail call when limit is reached
+-       */
+-      pop_bytes = get_pop_bytes(callee_regs_used);
+-      off1 += pop_bytes;
+-
+-      /*
+-       * total bytes for:
+-       * - nop5/ jmpq $off
+-       * - pop callee regs
+-       * - sub rsp, $val if depth > 0
+-       * - pop rax
+-       */
+-      poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+-      if (stack_depth) {
+-              poke_off += 7;
+-              off1 += 7;
+-      }
++      u8 *prog = *pprog, *start = *pprog;
++      int cnt = 0, offset;
+       /*
+        * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+@@ -528,28 +482,30 @@ static void emit_bpf_tail_call_direct(st
+        */
+       EMIT2_off32(0x8B, 0x85, tcc_off);             /* mov eax, dword ptr [rbp - tcc_off] */
+       EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);         /* cmp eax, MAX_TAIL_CALL_CNT */
+-      EMIT2(X86_JA, off1);                          /* ja out */
++
++      offset = ctx->tail_call_direct_label - (prog + 2 - start);
++      EMIT2(X86_JA, offset);                        /* ja out */
+       EMIT3(0x83, 0xC0, 0x01);                      /* add eax, 1 */
+       EMIT2_off32(0x89, 0x85, tcc_off);             /* mov dword ptr [rbp - tcc_off], eax */
+-      poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
++      poke->tailcall_bypass = ip + (prog - start);
+       poke->adj_off = X86_TAIL_CALL_OFFSET;
+-      poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
++      poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
+       poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
+       emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
+                 poke->tailcall_bypass);
+-      *pprog = prog;
+-      pop_callee_regs(pprog, callee_regs_used);
+-      prog = *pprog;
++      pop_callee_regs(&prog, callee_regs_used);
+       EMIT1(0x58);                                  /* pop rax */
+       if (stack_depth)
+               EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
+       memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+       prog += X86_PATCH_SIZE;
++
+       /* out: */
++      ctx->tail_call_direct_label = prog - start;
+       *pprog = prog;
+ }
+@@ -1274,13 +1230,16 @@ xadd:                  if (is_imm8(insn->off))
+               case BPF_JMP | BPF_TAIL_CALL:
+                       if (imm32)
+                               emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+-                                                        &prog, addrs[i], image,
++                                                        &prog, image + addrs[i - 1],
+                                                         callee_regs_used,
+-                                                        bpf_prog->aux->stack_depth);
++                                                        bpf_prog->aux->stack_depth,
++                                                        ctx);
+                       else
+                               emit_bpf_tail_call_indirect(&prog,
+                                                           callee_regs_used,
+-                                                          bpf_prog->aux->stack_depth);
++                                                          bpf_prog->aux->stack_depth,
++                                                          image + addrs[i - 1],
++                                                          ctx);
+                       break;
+                       /* cond jump */
diff --git a/queue-5.10/crypto-x86-poly1305-fixup-sls.patch b/queue-5.10/crypto-x86-poly1305-fixup-sls.patch
new file mode 100644 (file)
index 0000000..f902493
--- /dev/null
@@ -0,0 +1,210 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 24 Mar 2022 00:05:55 +0100
+Subject: crypto: x86/poly1305 - Fixup SLS
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7ed7aa4de9421229be6d331ed52d5cd09c99f409 upstream.
+
+Due to being a perl generated asm file, it got missed by the mass
+convertion script.
+
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_init_x86_64()+0x3a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_x86_64()+0xf2: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_emit_x86_64()+0x37: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: __poly1305_block()+0x6d: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: __poly1305_init_avx()+0x1e8: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx()+0x18a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx()+0xaf8: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_emit_avx()+0x99: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx2()+0x18a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx2()+0x776: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x18a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x796: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x10bd: missing int3 after ret
+
+Fixes: f94909ceb1ed ("x86: Prepare asm files for straight-line-speculation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/poly1305-x86_64-cryptogams.pl |   38 +++++++++++++-------------
+ 1 file changed, 19 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
++++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+@@ -297,7 +297,7 @@ ___
+ $code.=<<___;
+       mov     \$1,%eax
+ .Lno_key:
+-      ret
++      RET
+ ___
+ &end_function("poly1305_init_x86_64");
+@@ -373,7 +373,7 @@ $code.=<<___;
+ .cfi_adjust_cfa_offset        -48
+ .Lno_data:
+ .Lblocks_epilogue:
+-      ret
++      RET
+ .cfi_endproc
+ ___
+ &end_function("poly1305_blocks_x86_64");
+@@ -399,7 +399,7 @@ $code.=<<___;
+       mov     %rax,0($mac)    # write result
+       mov     %rcx,8($mac)
+-      ret
++      RET
+ ___
+ &end_function("poly1305_emit_x86_64");
+ if ($avx) {
+@@ -429,7 +429,7 @@ ___
+       &poly1305_iteration();
+ $code.=<<___;
+       pop $ctx
+-      ret
++      RET
+ .size __poly1305_block,.-__poly1305_block
+ .type __poly1305_init_avx,\@abi-omnipotent
+@@ -594,7 +594,7 @@ __poly1305_init_avx:
+       lea     -48-64($ctx),$ctx       # size [de-]optimization
+       pop %rbp
+-      ret
++      RET
+ .size __poly1305_init_avx,.-__poly1305_init_avx
+ ___
+@@ -747,7 +747,7 @@ $code.=<<___;
+ .cfi_restore  %rbp
+ .Lno_data_avx:
+ .Lblocks_avx_epilogue:
+-      ret
++      RET
+ .cfi_endproc
+ .align        32
+@@ -1452,7 +1452,7 @@ $code.=<<___     if (!$win64);
+ ___
+ $code.=<<___;
+       vzeroupper
+-      ret
++      RET
+ .cfi_endproc
+ ___
+ &end_function("poly1305_blocks_avx");
+@@ -1508,7 +1508,7 @@ $code.=<<___;
+       mov     %rax,0($mac)    # write result
+       mov     %rcx,8($mac)
+-      ret
++      RET
+ ___
+ &end_function("poly1305_emit_avx");
+@@ -1675,7 +1675,7 @@ $code.=<<___;
+ .cfi_restore  %rbp
+ .Lno_data_avx2$suffix:
+ .Lblocks_avx2_epilogue$suffix:
+-      ret
++      RET
+ .cfi_endproc
+ .align        32
+@@ -2201,7 +2201,7 @@ $code.=<<___     if (!$win64);
+ ___
+ $code.=<<___;
+       vzeroupper
+-      ret
++      RET
+ .cfi_endproc
+ ___
+ if($avx > 2 && $avx512) {
+@@ -2792,7 +2792,7 @@ $code.=<<___     if (!$win64);
+ .cfi_def_cfa_register %rsp
+ ___
+ $code.=<<___;
+-      ret
++      RET
+ .cfi_endproc
+ ___
+@@ -2893,7 +2893,7 @@ $code.=<<___     if ($flavour =~ /elf32/);
+ ___
+ $code.=<<___;
+       mov     \$1,%eax
+-      ret
++      RET
+ .size poly1305_init_base2_44,.-poly1305_init_base2_44
+ ___
+ {
+@@ -3010,7 +3010,7 @@ poly1305_blocks_vpmadd52:
+       jnz             .Lblocks_vpmadd52_4x
+ .Lno_data_vpmadd52:
+-      ret
++      RET
+ .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
+ ___
+ }
+@@ -3451,7 +3451,7 @@ poly1305_blocks_vpmadd52_4x:
+       vzeroall
+ .Lno_data_vpmadd52_4x:
+-      ret
++      RET
+ .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
+ ___
+ }
+@@ -3824,7 +3824,7 @@ $code.=<<___;
+       vzeroall
+ .Lno_data_vpmadd52_8x:
+-      ret
++      RET
+ .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
+ ___
+ }
+@@ -3861,7 +3861,7 @@ poly1305_emit_base2_44:
+       mov     %rax,0($mac)    # write result
+       mov     %rcx,8($mac)
+-      ret
++      RET
+ .size poly1305_emit_base2_44,.-poly1305_emit_base2_44
+ ___
+ }     }       }
+@@ -3916,7 +3916,7 @@ xor128_encrypt_n_pad:
+ .Ldone_enc:
+       mov     $otp,%rax
+-      ret
++      RET
+ .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+ .globl        xor128_decrypt_n_pad
+@@ -3967,7 +3967,7 @@ xor128_decrypt_n_pad:
+ .Ldone_dec:
+       mov     $otp,%rax
+-      ret
++      RET
+ .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
+ ___
+ }
+@@ -4109,7 +4109,7 @@ avx_handler:
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+-      ret
++      RET
+ .size avx_handler,.-avx_handler
+ .section      .pdata
diff --git a/queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch b/queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch
new file mode 100644 (file)
index 0000000..836e6b9
--- /dev/null
@@ -0,0 +1,183 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:58 +0200
+Subject: intel_idle: Disable IBRS during long idle
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream.
+
+Having IBRS enabled while the SMT sibling is idle unnecessarily slows
+down the running sibling. OTOH, disabling IBRS around idle takes two
+MSR writes, which will increase the idle latency.
+
+Therefore, only disable IBRS around deeper idle states. Shallow idle
+states are bounded by the tick in duration, since NOHZ is not allowed
+for them by virtue of their short target residency.
+
+Only do this for mwait-driven idle, since that keeps interrupts disabled
+across idle, which makes disabling IBRS vs IRQ-entry a non-issue.
+
+Note: C6 is a random threshold, most importantly C1 probably shouldn't
+disable IBRS, benchmarking needed.
+
+Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no CPUIDLE_FLAG_IRQ_ENABLE]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    1 
+ arch/x86/kernel/cpu/bugs.c           |    6 ++++
+ drivers/idle/intel_idle.c            |   43 ++++++++++++++++++++++++++++++-----
+ 3 files changed, 44 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -256,6 +256,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
+ extern void write_spec_ctrl_current(u64 val, bool force);
++extern u64 spec_ctrl_current(void);
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -78,6 +78,12 @@ void write_spec_ctrl_current(u64 val, bo
+               wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ }
++u64 spec_ctrl_current(void)
++{
++      return this_cpu_read(x86_spec_ctrl_current);
++}
++EXPORT_SYMBOL_GPL(spec_ctrl_current);
++
+ /*
+  * The vendor and possibly platform specific bits which can be modified in
+  * x86_spec_ctrl_base.
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -47,11 +47,13 @@
+ #include <linux/tick.h>
+ #include <trace/events/power.h>
+ #include <linux/sched.h>
++#include <linux/sched/smt.h>
+ #include <linux/notifier.h>
+ #include <linux/cpu.h>
+ #include <linux/moduleparam.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/intel-family.h>
++#include <asm/nospec-branch.h>
+ #include <asm/mwait.h>
+ #include <asm/msr.h>
+@@ -94,6 +96,12 @@ static unsigned int mwait_substates __in
+ #define CPUIDLE_FLAG_ALWAYS_ENABLE    BIT(15)
+ /*
++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
++ * above.
++ */
++#define CPUIDLE_FLAG_IBRS             BIT(16)
++
++/*
+  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+  * the C-state (top nibble) and sub-state (bottom nibble)
+  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+@@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct c
+       return index;
+ }
++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
++                                   struct cpuidle_driver *drv, int index)
++{
++      bool smt_active = sched_smt_active();
++      u64 spec_ctrl = spec_ctrl_current();
++      int ret;
++
++      if (smt_active)
++              wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
++      ret = intel_idle(dev, drv, index);
++
++      if (smt_active)
++              wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
++
++      return ret;
++}
++
+ /**
+  * intel_idle_s2idle - Ask the processor to enter the given idle state.
+  * @dev: cpuidle device of the target CPU.
+@@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C6",
+               .desc = "MWAIT 0x20",
+-              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 85,
+               .target_residency = 200,
+               .enter = &intel_idle,
+@@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C7s",
+               .desc = "MWAIT 0x33",
+-              .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 124,
+               .target_residency = 800,
+               .enter = &intel_idle,
+@@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C8",
+               .desc = "MWAIT 0x40",
+-              .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 200,
+               .target_residency = 800,
+               .enter = &intel_idle,
+@@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C9",
+               .desc = "MWAIT 0x50",
+-              .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 480,
+               .target_residency = 5000,
+               .enter = &intel_idle,
+@@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[
+       {
+               .name = "C10",
+               .desc = "MWAIT 0x60",
+-              .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 890,
+               .target_residency = 5000,
+               .enter = &intel_idle,
+@@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[
+       {
+               .name = "C6",
+               .desc = "MWAIT 0x20",
+-              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+               .exit_latency = 133,
+               .target_residency = 600,
+               .enter = &intel_idle,
+@@ -1501,6 +1527,11 @@ static void __init intel_idle_init_cstat
+               /* Structure copy. */
+               drv->states[drv->state_count] = cpuidle_state_table[cstate];
++              if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
++                  cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
++                      drv->states[drv->state_count].enter = intel_idle_ibrs;
++              }
++
+               if ((disabled_states_mask & BIT(drv->state_count)) ||
+                   ((icpu->use_acpi || force_use_acpi) &&
+                    intel_idle_off_by_default(mwait_hint) &&
diff --git a/queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch b/queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch
new file mode 100644 (file)
index 0000000..b505df4
--- /dev/null
@@ -0,0 +1,110 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 16 Mar 2022 22:05:52 +0100
+Subject: kvm/emulate: Fix SETcc emulation function offsets with SLS
+
+From: Borislav Petkov <bp@suse.de>
+
+commit fe83f5eae432ccc8e90082d6ed506d5233547473 upstream.
+
+The commit in Fixes started adding INT3 after RETs as a mitigation
+against straight-line speculation.
+
+The fastop SETcc implementation in kvm's insn emulator uses macro magic
+to generate all possible SETcc functions and to jump to them when
+emulating the respective instruction.
+
+However, it hardcodes the size and alignment of those functions to 4: a
+three-byte SETcc insn and a single-byte RET. BUT, with SLS, there's an
+INT3 that gets slapped after the RET, which brings the whole scheme out
+of alignment:
+
+  15:   0f 90 c0                seto   %al
+  18:   c3                      ret
+  19:   cc                      int3
+  1a:   0f 1f 00                nopl   (%rax)
+  1d:   0f 91 c0                setno  %al
+  20:   c3                      ret
+  21:   cc                      int3
+  22:   0f 1f 00                nopl   (%rax)
+  25:   0f 92 c0                setb   %al
+  28:   c3                      ret
+  29:   cc                      int3
+
+and this explodes like this:
+
+  int3: 0000 [#1] PREEMPT SMP PTI
+  CPU: 0 PID: 2435 Comm: qemu-system-x86 Not tainted 5.17.0-rc8-sls #1
+  Hardware name: Dell Inc. Precision WorkStation T3400  /0TP412, BIOS A14 04/30/2012
+  RIP: 0010:setc+0x5/0x8 [kvm]
+  Code: 00 00 0f 1f 00 0f b6 05 43 24 06 00 c3 cc 0f 1f 80 00 00 00 00 0f 90 c0 c3 cc 0f \
+         1f 00 0f 91 c0 c3 cc 0f 1f 00 0f 92 c0 c3 cc <0f> 1f 00 0f 93 c0 c3 cc 0f 1f 00 \
+         0f 94 c0 c3 cc 0f 1f 00 0f 95 c0
+  Call Trace:
+   <TASK>
+   ? x86_emulate_insn [kvm]
+   ? x86_emulate_instruction [kvm]
+   ? vmx_handle_exit [kvm_intel]
+   ? kvm_arch_vcpu_ioctl_run [kvm]
+   ? kvm_vcpu_ioctl [kvm]
+   ? __x64_sys_ioctl
+   ? do_syscall_64
+   ? entry_SYSCALL_64_after_hwframe
+   </TASK>
+
+Raise the alignment value when SLS is enabled and use a macro for that
+instead of hard-coding naked numbers.
+
+Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation")
+Reported-by: Jamie Heilman <jamie@audible.transient.net>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Jamie Heilman <jamie@audible.transient.net>
+Link: https://lore.kernel.org/r/YjGzJwjrvxg5YZ0Z@audible.transient.net
+[Add a comment and a bit of safety checking, since this is going to be changed
+ again for IBT support. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -428,8 +428,23 @@ static int fastop(struct x86_emulate_ctx
+       FOP_END
+ /* Special case for SETcc - 1 instruction per cc */
++
++/*
++ * Depending on .config the SETcc functions look like:
++ *
++ * SETcc %al   [3 bytes]
++ * RET         [1 byte]
++ * INT3        [1 byte; CONFIG_SLS]
++ *
++ * Which gives possible sizes 4 or 5.  When rounded up to the
++ * next power-of-two alignment they become 4 or 8.
++ */
++#define SETCC_LENGTH  (4 + IS_ENABLED(CONFIG_SLS))
++#define SETCC_ALIGN   (4 << IS_ENABLED(CONFIG_SLS))
++static_assert(SETCC_LENGTH <= SETCC_ALIGN);
++
+ #define FOP_SETCC(op) \
+-      ".align 4 \n\t" \
++      ".align " __stringify(SETCC_ALIGN) " \n\t" \
+       ".type " #op ", @function \n\t" \
+       #op ": \n\t" \
+       #op " %al \n\t" \
+@@ -1055,7 +1070,7 @@ static int em_bsr_c(struct x86_emulate_c
+ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
+ {
+       u8 rc;
+-      void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
++      void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
+       flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+       asm("push %[flags]; popf; " CALL_NOSPEC
diff --git a/queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch b/queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch
new file mode 100644 (file)
index 0000000..2a859f5
--- /dev/null
@@ -0,0 +1,112 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Wed, 30 Dec 2020 16:26:57 -0800
+Subject: KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 150f17bfab37e981ba03b37440638138ff2aa9ec upstream.
+
+Replace inline assembly in nested_vmx_check_vmentry_hw
+with a call to __vmx_vcpu_run.  The function is not
+performance critical, so (double) GPR save/restore
+in __vmx_vcpu_run can be tolerated, as far as performance
+effects are concerned.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Reviewed-and-tested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+[sean: dropped versioning info from changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20201231002702.2223707-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c  |   32 +++-----------------------------
+ arch/x86/kvm/vmx/vmenter.S |    2 +-
+ arch/x86/kvm/vmx/vmx.c     |    2 --
+ arch/x86/kvm/vmx/vmx.h     |    1 +
+ 4 files changed, 5 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -12,6 +12,7 @@
+ #include "nested.h"
+ #include "pmu.h"
+ #include "trace.h"
++#include "vmx.h"
+ #include "x86.h"
+ static bool __read_mostly enable_shadow_vmcs = 1;
+@@ -3075,35 +3076,8 @@ static int nested_vmx_check_vmentry_hw(s
+               vmx->loaded_vmcs->host_state.cr4 = cr4;
+       }
+-      asm(
+-              "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
+-              "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+-              "je 1f \n\t"
+-              __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
+-              "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+-              "1: \n\t"
+-              "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
+-
+-              /* Check if vmlaunch or vmresume is needed */
+-              "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
+-
+-              /*
+-               * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
+-               * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
+-               * Valid.  vmx_vmenter() directly "returns" RFLAGS, and so the
+-               * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
+-               */
+-              "call vmx_vmenter\n\t"
+-
+-              CC_SET(be)
+-            : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
+-            : [HOST_RSP]"r"((unsigned long)HOST_RSP),
+-              [loaded_vmcs]"r"(vmx->loaded_vmcs),
+-              [launched]"i"(offsetof(struct loaded_vmcs, launched)),
+-              [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
+-              [wordsize]"i"(sizeof(ulong))
+-            : "memory"
+-      );
++      vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
++                               vmx->loaded_vmcs->launched);
+       if (vmx->msr_autoload.host.nr)
+               vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -44,7 +44,7 @@
+  * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+  * to vmx_vmexit.
+  */
+-SYM_FUNC_START(vmx_vmenter)
++SYM_FUNC_START_LOCAL(vmx_vmenter)
+       /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+       je 2f
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6687,8 +6687,6 @@ static fastpath_t vmx_exit_handlers_fast
+       }
+ }
+-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+-
+ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+                                       struct vcpu_vmx *vmx)
+ {
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -365,6 +365,7 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
diff --git a/queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch b/queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch
new file mode 100644 (file)
index 0000000..8ba2501
--- /dev/null
@@ -0,0 +1,171 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:12 +0200
+Subject: KVM: VMX: Convert launched argument to flags
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream.
+
+Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in
+preparation for doing SPEC_CTRL handling immediately after vmexit, which
+will need another flag.
+
+This is much easier than adding a fourth argument, because this code
+supports both 32-bit and 64-bit, and the fourth argument on 32-bit would
+have to be pushed on the stack.
+
+Note that __vmx_vcpu_run_flags() is called outside of the noinstr
+critical section because it will soon start calling potentially
+traceable functions.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c    |    2 +-
+ arch/x86/kvm/vmx/run_flags.h |    7 +++++++
+ arch/x86/kvm/vmx/vmenter.S   |    9 +++++----
+ arch/x86/kvm/vmx/vmx.c       |   17 ++++++++++++++---
+ arch/x86/kvm/vmx/vmx.h       |    5 ++++-
+ 5 files changed, 31 insertions(+), 9 deletions(-)
+ create mode 100644 arch/x86/kvm/vmx/run_flags.h
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3077,7 +3077,7 @@ static int nested_vmx_check_vmentry_hw(s
+       }
+       vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+-                               vmx->loaded_vmcs->launched);
++                               __vmx_vcpu_run_flags(vmx));
+       if (vmx->msr_autoload.host.nr)
+               vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+--- /dev/null
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __KVM_X86_VMX_RUN_FLAGS_H
++#define __KVM_X86_VMX_RUN_FLAGS_H
++
++#define VMX_RUN_VMRESUME      (1 << 0)
++
++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -5,6 +5,7 @@
+ #include <asm/kvm_vcpu_regs.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/segment.h>
++#include "run_flags.h"
+ #define WORD_SIZE (BITS_PER_LONG / 8)
+@@ -34,7 +35,7 @@
+  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+  * @vmx:      struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+  * @regs:     unsigned long * (to guest registers)
+- * @launched: %true if the VMCS has been launched
++ * @flags:    VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+  *
+  * Returns:
+  *    0 on VM-Exit, 1 on VM-Fail
+@@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+        */
+       push %_ASM_ARG2
+-      /* Copy @launched to BL, _ASM_ARG3 is volatile. */
++      /* Copy @flags to BL, _ASM_ARG3 is volatile. */
+       mov %_ASM_ARG3B, %bl
+       lea (%_ASM_SP), %_ASM_ARG2
+@@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov (%_ASM_SP), %_ASM_AX
+       /* Check if vmlaunch or vmresume is needed */
+-      testb %bl, %bl
++      testb $VMX_RUN_VMRESUME, %bl
+       /* Load guest registers.  Don't clobber flags. */
+       mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+@@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+       /* Check EFLAGS.ZF from 'testb' above */
+-      je .Lvmlaunch
++      jz .Lvmlaunch
+       /*
+        * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -936,6 +936,16 @@ static bool msr_write_intercepted(struct
+       return true;
+ }
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
++{
++      unsigned int flags = 0;
++
++      if (vmx->loaded_vmcs->launched)
++              flags |= VMX_RUN_VMRESUME;
++
++      return flags;
++}
++
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+               unsigned long entry, unsigned long exit)
+ {
+@@ -6688,7 +6698,8 @@ static fastpath_t vmx_exit_handlers_fast
+ }
+ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+-                                      struct vcpu_vmx *vmx)
++                                      struct vcpu_vmx *vmx,
++                                      unsigned long flags)
+ {
+       /*
+        * VMENTER enables interrupts (host state), but the kernel state is
+@@ -6725,7 +6736,7 @@ static noinstr void vmx_vcpu_enter_exit(
+               native_write_cr2(vcpu->arch.cr2);
+       vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+-                                 vmx->loaded_vmcs->launched);
++                                 flags);
+       vcpu->arch.cr2 = native_read_cr2();
+@@ -6824,7 +6835,7 @@ reenter_guest:
+       x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+       /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+-      vmx_vcpu_enter_exit(vcpu, vmx);
++      vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+       /*
+        * We do not use IBRS in the kernel. If this vCPU has used the
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -13,6 +13,7 @@
+ #include "vmcs.h"
+ #include "vmx_ops.h"
+ #include "cpuid.h"
++#include "run_flags.h"
+ extern const u32 vmx_msr_index[];
+@@ -365,7 +366,9 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
++                  unsigned int flags);
+ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
diff --git a/queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch b/queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch
new file mode 100644 (file)
index 0000000..1ece0ea
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:14 +0200
+Subject: KVM: VMX: Fix IBRS handling after vmexit
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream.
+
+For legacy IBRS to work, the IBRS bit needs to be always re-written
+after vmexit, even if it's already on.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6706,8 +6706,13 @@ void noinstr vmx_spec_ctrl_restore_host(
+       /*
+        * If the guest/host SPEC_CTRL values differ, restore the host value.
++       *
++       * For legacy IBRS, the IBRS bit always needs to be written after
++       * transitioning from a less privileged predictor mode, regardless of
++       * whether the guest/host values differ.
+        */
+-      if (vmx->spec_ctrl != hostval)
++      if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
++          vmx->spec_ctrl != hostval)
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+       barrier_nospec();
diff --git a/queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch b/queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch
new file mode 100644 (file)
index 0000000..add91ea
--- /dev/null
@@ -0,0 +1,197 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:11 +0200
+Subject: KVM: VMX: Flatten __vmx_vcpu_run()
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream.
+
+Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run().  This
+will make it easier to do the spec_ctrl handling before the first RET.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: remove ENDBR]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmenter.S |  118 +++++++++++++++++----------------------------
+ 1 file changed, 45 insertions(+), 73 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -31,68 +31,6 @@
+ .section .noinstr.text, "ax"
+ /**
+- * vmx_vmenter - VM-Enter the current loaded VMCS
+- *
+- * %RFLAGS.ZF:        !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
+- *
+- * Returns:
+- *    %RFLAGS.CF is set on VM-Fail Invalid
+- *    %RFLAGS.ZF is set on VM-Fail Valid
+- *    %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
+- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+- * to vmx_vmexit.
+- */
+-SYM_FUNC_START_LOCAL(vmx_vmenter)
+-      /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+-      je 2f
+-
+-1:    vmresume
+-      RET
+-
+-2:    vmlaunch
+-      RET
+-
+-3:    cmpb $0, kvm_rebooting
+-      je 4f
+-      RET
+-4:    ud2
+-
+-      _ASM_EXTABLE(1b, 3b)
+-      _ASM_EXTABLE(2b, 3b)
+-
+-SYM_FUNC_END(vmx_vmenter)
+-
+-/**
+- * vmx_vmexit - Handle a VMX VM-Exit
+- *
+- * Returns:
+- *    %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * This is vmx_vmenter's partner in crime.  On a VM-Exit, control will jump
+- * here after hardware loads the host's state, i.e. this is the destination
+- * referred to by VMCS.HOST_RIP.
+- */
+-SYM_FUNC_START(vmx_vmexit)
+-#ifdef CONFIG_RETPOLINE
+-      ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+-      /* Preserve guest's RAX, it's used to stuff the RSB. */
+-      push %_ASM_AX
+-
+-      /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+-      FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+-      /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+-      or $1, %_ASM_AX
+-
+-      pop %_ASM_AX
+-.Lvmexit_skip_rsb:
+-#endif
+-      RET
+-SYM_FUNC_END(vmx_vmexit)
+-
+-/**
+  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+  * @vmx:      struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+  * @regs:     unsigned long * (to guest registers)
+@@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+       mov %_ASM_ARG3B, %bl
+-      /* Adjust RSP to account for the CALL to vmx_vmenter(). */
+-      lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
++      lea (%_ASM_SP), %_ASM_ARG2
+       call vmx_update_host_rsp
+       /* Load @regs to RAX. */
+@@ -154,11 +91,36 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       /* Load guest RAX.  This kills the @regs pointer! */
+       mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+-      /* Enter guest mode */
+-      call vmx_vmenter
++      /* Check EFLAGS.ZF from 'testb' above */
++      je .Lvmlaunch
+-      /* Jump on VM-Fail. */
+-      jbe 2f
++      /*
++       * After a successful VMRESUME/VMLAUNCH, control flow "magically"
++       * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
++       * So this isn't a typical function and objtool needs to be told to
++       * save the unwind state here and restore it below.
++       */
++      UNWIND_HINT_SAVE
++
++/*
++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
++ * the 'vmx_vmexit' label below.
++ */
++.Lvmresume:
++      vmresume
++      jmp .Lvmfail
++
++.Lvmlaunch:
++      vmlaunch
++      jmp .Lvmfail
++
++      _ASM_EXTABLE(.Lvmresume, .Lfixup)
++      _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
++
++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
++
++      /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
++      UNWIND_HINT_RESTORE
+       /* Temporarily save guest's RAX. */
+       push %_ASM_AX
+@@ -185,9 +147,13 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
++      /* IMPORTANT: RSB must be stuffed before the first return. */
++      FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++
+       /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+       xor %eax, %eax
++.Lclear_regs:
+       /*
+        * Clear all general purpose registers except RSP and RAX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+@@ -197,7 +163,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+        * free.  RSP and RAX are exempt as RSP is restored by hardware during
+        * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+        */
+-1:    xor %ecx, %ecx
++      xor %ecx, %ecx
+       xor %edx, %edx
+       xor %ebx, %ebx
+       xor %ebp, %ebp
+@@ -216,8 +182,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       /* "POP" @regs. */
+       add $WORD_SIZE, %_ASM_SP
+-      pop %_ASM_BX
++      pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+       pop %r12
+       pop %r13
+@@ -230,9 +196,15 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       pop %_ASM_BP
+       RET
+-      /* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
+-2:    mov $1, %eax
+-      jmp 1b
++.Lfixup:
++      cmpb $0, kvm_rebooting
++      jne .Lvmfail
++      ud2
++.Lvmfail:
++      /* VM-Fail: set return value to 1 */
++      mov $1, %eax
++      jmp .Lclear_regs
++
+ SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch b/queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
new file mode 100644 (file)
index 0000000..cface60
--- /dev/null
@@ -0,0 +1,241 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:13 +0200
+Subject: KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream.
+
+On eIBRS systems, the returns in the vmexit return path from
+__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks.
+
+Fix that by moving the post-vmexit spec_ctrl handling to immediately
+after the vmexit.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    1 
+ arch/x86/kernel/cpu/bugs.c           |    4 ++
+ arch/x86/kvm/vmx/run_flags.h         |    1 
+ arch/x86/kvm/vmx/vmenter.S           |   49 +++++++++++++++++++++++++++--------
+ arch/x86/kvm/vmx/vmx.c               |   48 ++++++++++++++++++++--------------
+ arch/x86/kvm/vmx/vmx.h               |    1 
+ 6 files changed, 73 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -275,6 +275,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++extern u64 x86_spec_ctrl_current;
+ extern void write_spec_ctrl_current(u64 val, bool force);
+ extern u64 spec_ctrl_current(void);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -186,6 +186,10 @@ void __init check_bugs(void)
+ #endif
+ }
++/*
++ * NOTE: For VMX, this function is not called in the vmexit path.
++ * It uses vmx_spec_ctrl_restore_host() instead.
++ */
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+--- a/arch/x86/kvm/vmx/run_flags.h
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -3,5 +3,6 @@
+ #define __KVM_X86_VMX_RUN_FLAGS_H
+ #define VMX_RUN_VMRESUME      (1 << 0)
++#define VMX_RUN_SAVE_SPEC_CTRL        (1 << 1)
+ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -33,9 +33,10 @@
+ /**
+  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+- * @vmx:      struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
++ * @vmx:      struct vcpu_vmx *
+  * @regs:     unsigned long * (to guest registers)
+- * @flags:    VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
++ * @flags:    VMX_RUN_VMRESUME:       use VMRESUME instead of VMLAUNCH
++ *            VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
+  *
+  * Returns:
+  *    0 on VM-Exit, 1 on VM-Fail
+@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ #endif
+       push %_ASM_BX
++      /* Save @vmx for SPEC_CTRL handling */
++      push %_ASM_ARG1
++
++      /* Save @flags for SPEC_CTRL handling */
++      push %_ASM_ARG3
++
+       /*
+        * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
+        * @regs is needed after VM-Exit to save the guest's register values.
+@@ -148,25 +155,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+       mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
+-      /* IMPORTANT: RSB must be stuffed before the first return. */
+-      FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+-      /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+-      xor %eax, %eax
++      /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
++      xor %ebx, %ebx
+ .Lclear_regs:
+       /*
+-       * Clear all general purpose registers except RSP and RAX to prevent
++       * Clear all general purpose registers except RSP and RBX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+        * via the stack.  In theory, an L1 cache miss when restoring registers
+        * could lead to speculative execution with the guest's values.
+        * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
+        * free.  RSP and RAX are exempt as RSP is restored by hardware during
+-       * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
++       * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
++       * value.
+        */
++      xor %eax, %eax
+       xor %ecx, %ecx
+       xor %edx, %edx
+-      xor %ebx, %ebx
+       xor %ebp, %ebp
+       xor %esi, %esi
+       xor %edi, %edi
+@@ -184,6 +189,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+       /* "POP" @regs. */
+       add $WORD_SIZE, %_ASM_SP
++      /*
++       * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
++       * the first unbalanced RET after vmexit!
++       *
++       * For retpoline, RSB filling is needed to prevent poisoned RSB entries
++       * and (in some cases) RSB underflow.
++       *
++       * eIBRS has its own protection against poisoned RSB, so it doesn't
++       * need the RSB filling sequence.  But it does need to be enabled
++       * before the first unbalanced RET.
++         */
++
++      FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++
++      pop %_ASM_ARG2  /* @flags */
++      pop %_ASM_ARG1  /* @vmx */
++
++      call vmx_spec_ctrl_restore_host
++
++      /* Put return value in AX */
++      mov %_ASM_BX, %_ASM_AX
++
+       pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+       pop %r12
+@@ -203,7 +230,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+       ud2
+ .Lvmfail:
+       /* VM-Fail: set return value to 1 */
+-      mov $1, %eax
++      mov $1, %_ASM_BX
+       jmp .Lclear_regs
+ SYM_FUNC_END(__vmx_vcpu_run)
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -943,6 +943,14 @@ unsigned int __vmx_vcpu_run_flags(struct
+       if (vmx->loaded_vmcs->launched)
+               flags |= VMX_RUN_VMRESUME;
++      /*
++       * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
++       * to change it directly without causing a vmexit.  In that case read
++       * it after vmexit and store it in vmx->spec_ctrl.
++       */
++      if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
++              flags |= VMX_RUN_SAVE_SPEC_CTRL;
++
+       return flags;
+ }
+@@ -6685,6 +6693,26 @@ void noinstr vmx_update_host_rsp(struct
+       }
+ }
++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
++                                      unsigned int flags)
++{
++      u64 hostval = this_cpu_read(x86_spec_ctrl_current);
++
++      if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
++              return;
++
++      if (flags & VMX_RUN_SAVE_SPEC_CTRL)
++              vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
++
++      /*
++       * If the guest/host SPEC_CTRL values differ, restore the host value.
++       */
++      if (vmx->spec_ctrl != hostval)
++              native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
++
++      barrier_nospec();
++}
++
+ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ {
+       switch (to_vmx(vcpu)->exit_reason.basic) {
+@@ -6837,26 +6865,6 @@ reenter_guest:
+       /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+       vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+-      /*
+-       * We do not use IBRS in the kernel. If this vCPU has used the
+-       * SPEC_CTRL MSR it may have left it on; save the value and
+-       * turn it off. This is much more efficient than blindly adding
+-       * it to the atomic save/restore list. Especially as the former
+-       * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+-       *
+-       * For non-nested case:
+-       * If the L01 MSR bitmap does not intercept the MSR, then we need to
+-       * save it.
+-       *
+-       * For nested case:
+-       * If the L02 MSR bitmap does not intercept the MSR, then we need to
+-       * save it.
+-       */
+-      if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+-              vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+-
+-      x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+-
+       /* All fields are clean at this point */
+       if (static_branch_unlikely(&enable_evmcs))
+               current_evmcs->hv_clean_fields |=
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -366,6 +366,7 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
+ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
+ bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
+                   unsigned int flags);
diff --git a/queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch b/queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch
new file mode 100644 (file)
index 0000000..64171a4
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Thu, 29 Oct 2020 15:04:57 +0100
+Subject: KVM/VMX: Use TEST %REG,%REG instead of CMP $0,%REG in vmenter.S
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 6c44221b05236cc65d76cb5dc2463f738edff39d upstream.
+
+Saves one byte in __vmx_vcpu_run for the same functionality.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Message-Id: <20201029140457.126965-1-ubizjak@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmenter.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       mov (%_ASM_SP), %_ASM_AX
+       /* Check if vmlaunch or vmresume is needed */
+-      cmpb $0, %bl
++      testb %bl, %bl
+       /* Load guest registers.  Don't clobber flags. */
+       mov VCPU_RCX(%_ASM_AX), %_ASM_CX
diff --git a/queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch b/queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch
new file mode 100644 (file)
index 0000000..130b635
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 11 Jul 2022 00:31:38 +0200
+Subject: Makefile: Set retpoline cflags based on CONFIG_CC_IS_{CLANG,GCC}
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+This was done as part of commit 7d73c3e9c51400d3e0e755488050804e4d44737a
+"Makefile: remove stale cc-option checks" upstream, and is needed to
+support backporting further retpoline changes.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -670,12 +670,14 @@ ifdef CONFIG_FUNCTION_TRACER
+   CC_FLAGS_FTRACE := -pg
+ endif
+-RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+-RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register
+-RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+-RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline
+-RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+-RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG)))
++ifdef CONFIG_CC_IS_GCC
++RETPOLINE_CFLAGS      := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
++endif
++ifdef CONFIG_CC_IS_CLANG
++RETPOLINE_CFLAGS      := -mretpoline-external-thunk
++RETPOLINE_VDSO_CFLAGS := -mretpoline
++endif
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
diff --git a/queue-5.10/objtool-add-alt_group-struct.patch b/queue-5.10/objtool-add-alt_group-struct.patch
new file mode 100644 (file)
index 0000000..96c7b98
--- /dev/null
@@ -0,0 +1,129 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 18 Dec 2020 14:19:32 -0600
+Subject: objtool: Add 'alt_group' struct
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit b23cc71c62747f2e4c3e56138872cf47e1294f8a upstream.
+
+Create a new struct associated with each group of alternatives
+instructions.  This will help with the removal of fake jumps, and more
+importantly with adding support for stack layout changes in
+alternatives.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   29 +++++++++++++++++++++++------
+ tools/objtool/check.h |   13 ++++++++++++-
+ 2 files changed, 35 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1012,20 +1012,28 @@ static int handle_group_alt(struct objto
+                           struct instruction *orig_insn,
+                           struct instruction **new_insn)
+ {
+-      static unsigned int alt_group_next_index = 1;
+       struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+-      unsigned int alt_group = alt_group_next_index++;
++      struct alt_group *orig_alt_group, *new_alt_group;
+       unsigned long dest_off;
++
++      orig_alt_group = malloc(sizeof(*orig_alt_group));
++      if (!orig_alt_group) {
++              WARN("malloc failed");
++              return -1;
++      }
+       last_orig_insn = NULL;
+       insn = orig_insn;
+       sec_for_each_insn_from(file, insn) {
+               if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+                       break;
+-              insn->alt_group = alt_group;
++              insn->alt_group = orig_alt_group;
+               last_orig_insn = insn;
+       }
++      orig_alt_group->orig_group = NULL;
++      orig_alt_group->first_insn = orig_insn;
++      orig_alt_group->last_insn = last_orig_insn;
+       if (next_insn_same_sec(file, last_orig_insn)) {
+               fake_jump = malloc(sizeof(*fake_jump));
+@@ -1056,8 +1064,13 @@ static int handle_group_alt(struct objto
+               return 0;
+       }
++      new_alt_group = malloc(sizeof(*new_alt_group));
++      if (!new_alt_group) {
++              WARN("malloc failed");
++              return -1;
++      }
++
+       last_new_insn = NULL;
+-      alt_group = alt_group_next_index++;
+       insn = *new_insn;
+       sec_for_each_insn_from(file, insn) {
+               struct reloc *alt_reloc;
+@@ -1069,7 +1082,7 @@ static int handle_group_alt(struct objto
+               insn->ignore = orig_insn->ignore_alts;
+               insn->func = orig_insn->func;
+-              insn->alt_group = alt_group;
++              insn->alt_group = new_alt_group;
+               /*
+                * Since alternative replacement code is copy/pasted by the
+@@ -1118,6 +1131,10 @@ static int handle_group_alt(struct objto
+               return -1;
+       }
++      new_alt_group->orig_group = orig_alt_group;
++      new_alt_group->first_insn = *new_insn;
++      new_alt_group->last_insn = last_new_insn;
++
+       if (fake_jump)
+               list_add(&fake_jump->list, &last_new_insn->list);
+@@ -2440,7 +2457,7 @@ static int validate_return(struct symbol
+ static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+ {
+       struct instruction *first_insn = insn;
+-      int alt_group = insn->alt_group;
++      struct alt_group *alt_group = insn->alt_group;
+       sec_for_each_insn_continue(file, insn) {
+               if (insn->alt_group != alt_group)
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -19,6 +19,17 @@ struct insn_state {
+       s8 instr;
+ };
++struct alt_group {
++      /*
++       * Pointer from a replacement group to the original group.  NULL if it
++       * *is* the original group.
++       */
++      struct alt_group *orig_group;
++
++      /* First and last instructions in the group */
++      struct instruction *first_insn, *last_insn;
++};
++
+ struct instruction {
+       struct list_head list;
+       struct hlist_node hash;
+@@ -34,7 +45,7 @@ struct instruction {
+       s8 instr;
+       u8 visited;
+       u8 ret_offset;
+-      int alt_group;
++      struct alt_group *alt_group;
+       struct symbol *call_dest;
+       struct instruction *jump_dest;
+       struct instruction *first_jump_src;
diff --git a/queue-5.10/objtool-add-elf_create_reloc-helper.patch b/queue-5.10/objtool-add-elf_create_reloc-helper.patch
new file mode 100644 (file)
index 0000000..74d92a2
--- /dev/null
@@ -0,0 +1,302 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:07 +0100
+Subject: objtool: Add elf_create_reloc() helper
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ef47cc01cb4abcd760d8ac66b9361d6ade4d0846 upstream.
+
+We have 4 instances of adding a relocation. Create a common helper
+to avoid growing even more.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.817438847@infradead.org
+[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c   |   43 +++++-------------------
+ tools/objtool/elf.c     |   86 +++++++++++++++++++++++++++++++-----------------
+ tools/objtool/elf.h     |   10 +++--
+ tools/objtool/orc_gen.c |   30 +++-------------
+ 4 files changed, 79 insertions(+), 90 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -433,8 +433,7 @@ reachable:
+ static int create_static_call_sections(struct objtool_file *file)
+ {
+-      struct section *sec, *reloc_sec;
+-      struct reloc *reloc;
++      struct section *sec;
+       struct static_call_site *site;
+       struct instruction *insn;
+       struct symbol *key_sym;
+@@ -460,8 +459,7 @@ static int create_static_call_sections(s
+       if (!sec)
+               return -1;
+-      reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+-      if (!reloc_sec)
++      if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+               return -1;
+       idx = 0;
+@@ -471,25 +469,11 @@ static int create_static_call_sections(s
+               memset(site, 0, sizeof(struct static_call_site));
+               /* populate reloc for 'addr' */
+-              reloc = malloc(sizeof(*reloc));
+-
+-              if (!reloc) {
+-                      perror("malloc");
++              if (elf_add_reloc_to_insn(file->elf, sec,
++                                        idx * sizeof(struct static_call_site),
++                                        R_X86_64_PC32,
++                                        insn->sec, insn->offset))
+                       return -1;
+-              }
+-              memset(reloc, 0, sizeof(*reloc));
+-
+-              insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
+-              if (!reloc->sym) {
+-                      WARN_FUNC("static call tramp: missing containing symbol",
+-                                insn->sec, insn->offset);
+-                      return -1;
+-              }
+-
+-              reloc->type = R_X86_64_PC32;
+-              reloc->offset = idx * sizeof(struct static_call_site);
+-              reloc->sec = reloc_sec;
+-              elf_add_reloc(file->elf, reloc);
+               /* find key symbol */
+               key_name = strdup(insn->call_dest->name);
+@@ -526,18 +510,11 @@ static int create_static_call_sections(s
+               free(key_name);
+               /* populate reloc for 'key' */
+-              reloc = malloc(sizeof(*reloc));
+-              if (!reloc) {
+-                      perror("malloc");
++              if (elf_add_reloc(file->elf, sec,
++                                idx * sizeof(struct static_call_site) + 4,
++                                R_X86_64_PC32, key_sym,
++                                is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
+                       return -1;
+-              }
+-              memset(reloc, 0, sizeof(*reloc));
+-              reloc->sym = key_sym;
+-              reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+-              reloc->type = R_X86_64_PC32;
+-              reloc->offset = idx * sizeof(struct static_call_site) + 4;
+-              reloc->sec = reloc_sec;
+-              elf_add_reloc(file->elf, reloc);
+               idx++;
+       }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -262,32 +262,6 @@ struct reloc *find_reloc_by_dest(const s
+       return find_reloc_by_dest_range(elf, sec, offset, 1);
+ }
+-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+-                            struct reloc *reloc)
+-{
+-      if (sec->sym) {
+-              reloc->sym = sec->sym;
+-              reloc->addend = offset;
+-              return;
+-      }
+-
+-      /*
+-       * The Clang assembler strips section symbols, so we have to reference
+-       * the function symbol instead:
+-       */
+-      reloc->sym = find_symbol_containing(sec, offset);
+-      if (!reloc->sym) {
+-              /*
+-               * Hack alert.  This happens when we need to reference the NOP
+-               * pad insn immediately after the function.
+-               */
+-              reloc->sym = find_symbol_containing(sec, offset - 1);
+-      }
+-
+-      if (reloc->sym)
+-              reloc->addend = offset - reloc->sym->offset;
+-}
+-
+ static int read_sections(struct elf *elf)
+ {
+       Elf_Scn *s = NULL;
+@@ -524,14 +498,66 @@ err:
+       return -1;
+ }
+-void elf_add_reloc(struct elf *elf, struct reloc *reloc)
++int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
++                unsigned int type, struct symbol *sym, int addend)
+ {
+-      struct section *sec = reloc->sec;
++      struct reloc *reloc;
+-      list_add_tail(&reloc->list, &sec->reloc_list);
++      reloc = malloc(sizeof(*reloc));
++      if (!reloc) {
++              perror("malloc");
++              return -1;
++      }
++      memset(reloc, 0, sizeof(*reloc));
++
++      reloc->sec = sec->reloc;
++      reloc->offset = offset;
++      reloc->type = type;
++      reloc->sym = sym;
++      reloc->addend = addend;
++
++      list_add_tail(&reloc->list, &sec->reloc->reloc_list);
+       elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+-      sec->changed = true;
++      sec->reloc->changed = true;
++
++      return 0;
++}
++
++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
++                        unsigned long offset, unsigned int type,
++                        struct section *insn_sec, unsigned long insn_off)
++{
++      struct symbol *sym;
++      int addend;
++
++      if (insn_sec->sym) {
++              sym = insn_sec->sym;
++              addend = insn_off;
++
++      } else {
++              /*
++               * The Clang assembler strips section symbols, so we have to
++               * reference the function symbol instead:
++               */
++              sym = find_symbol_containing(insn_sec, insn_off);
++              if (!sym) {
++                      /*
++                       * Hack alert.  This happens when we need to reference
++                       * the NOP pad insn immediately after the function.
++                       */
++                      sym = find_symbol_containing(insn_sec, insn_off - 1);
++              }
++
++              if (!sym) {
++                      WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
++                      return -1;
++              }
++
++              addend = insn_off - sym->offset;
++      }
++
++      return elf_add_reloc(elf, sec, offset, type, sym, addend);
+ }
+ static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -123,7 +123,13 @@ static inline u32 reloc_hash(struct relo
+ struct elf *elf_open_read(const char *name, int flags);
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+ struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
+-void elf_add_reloc(struct elf *elf, struct reloc *reloc);
++
++int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
++                unsigned int type, struct symbol *sym, int addend);
++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
++                        unsigned long offset, unsigned int type,
++                        struct section *insn_sec, unsigned long insn_off);
++
+ int elf_write_insn(struct elf *elf, struct section *sec,
+                  unsigned long offset, unsigned int len,
+                  const char *insn);
+@@ -140,8 +146,6 @@ struct reloc *find_reloc_by_dest(const s
+ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
+                                    unsigned long offset, unsigned int len);
+ struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+-                            struct reloc *reloc);
+ #define for_each_sec(file, sec)                                               \
+       list_for_each_entry(sec, &file->elf->sections, list)
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -81,37 +81,20 @@ static int init_orc_entry(struct orc_ent
+ }
+ static int write_orc_entry(struct elf *elf, struct section *orc_sec,
+-                         struct section *ip_rsec, unsigned int idx,
++                         struct section *ip_sec, unsigned int idx,
+                          struct section *insn_sec, unsigned long insn_off,
+                          struct orc_entry *o)
+ {
+       struct orc_entry *orc;
+-      struct reloc *reloc;
+       /* populate ORC data */
+       orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
+       memcpy(orc, o, sizeof(*orc));
+       /* populate reloc for ip */
+-      reloc = malloc(sizeof(*reloc));
+-      if (!reloc) {
+-              perror("malloc");
++      if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32,
++                                insn_sec, insn_off))
+               return -1;
+-      }
+-      memset(reloc, 0, sizeof(*reloc));
+-
+-      insn_to_reloc_sym_addend(insn_sec, insn_off, reloc);
+-      if (!reloc->sym) {
+-              WARN("missing symbol for insn at offset 0x%lx",
+-                   insn_off);
+-              return -1;
+-      }
+-
+-      reloc->type = R_X86_64_PC32;
+-      reloc->offset = idx * sizeof(int);
+-      reloc->sec = ip_rsec;
+-
+-      elf_add_reloc(elf, reloc);
+       return 0;
+ }
+@@ -150,7 +133,7 @@ static unsigned long alt_group_len(struc
+ int orc_create(struct objtool_file *file)
+ {
+-      struct section *sec, *ip_rsec, *orc_sec;
++      struct section *sec, *orc_sec;
+       unsigned int nr = 0, idx = 0;
+       struct orc_list_entry *entry;
+       struct list_head orc_list;
+@@ -239,13 +222,12 @@ int orc_create(struct objtool_file *file
+       sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+       if (!sec)
+               return -1;
+-      ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+-      if (!ip_rsec)
++      if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+               return -1;
+       /* Write ORC entries to sections: */
+       list_for_each_entry(entry, &orc_list, list) {
+-              if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
++              if (write_orc_entry(file->elf, orc_sec, sec, idx++,
+                                   entry->insn_sec, entry->insn_off,
+                                   &entry->orc))
+                       return -1;
diff --git a/queue-5.10/objtool-add-elf_create_undef_symbol.patch b/queue-5.10/objtool-add-elf_create_undef_symbol.patch
new file mode 100644 (file)
index 0000000..55b3afc
--- /dev/null
@@ -0,0 +1,103 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:11 +0100
+Subject: objtool: Add elf_create_undef_symbol()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046 upstream.
+
+Allow objtool to create undefined symbols; this allows creating
+relocations to symbols not currently in the symbol table.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.064743095@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c |   60 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ tools/objtool/elf.h |    1 
+ 2 files changed, 61 insertions(+)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -766,6 +766,66 @@ static int elf_add_string(struct elf *el
+       return len;
+ }
++struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
++{
++      struct section *symtab;
++      struct symbol *sym;
++      Elf_Data *data;
++      Elf_Scn *s;
++
++      sym = malloc(sizeof(*sym));
++      if (!sym) {
++              perror("malloc");
++              return NULL;
++      }
++      memset(sym, 0, sizeof(*sym));
++
++      sym->name = strdup(name);
++
++      sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
++      if (sym->sym.st_name == -1)
++              return NULL;
++
++      sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
++      // st_other 0
++      // st_shndx 0
++      // st_value 0
++      // st_size 0
++
++      symtab = find_section_by_name(elf, ".symtab");
++      if (!symtab) {
++              WARN("can't find .symtab");
++              return NULL;
++      }
++
++      s = elf_getscn(elf->elf, symtab->idx);
++      if (!s) {
++              WARN_ELF("elf_getscn");
++              return NULL;
++      }
++
++      data = elf_newdata(s);
++      if (!data) {
++              WARN_ELF("elf_newdata");
++              return NULL;
++      }
++
++      data->d_buf = &sym->sym;
++      data->d_size = sizeof(sym->sym);
++      data->d_align = 1;
++
++      sym->idx = symtab->len / sizeof(sym->sym);
++
++      symtab->len += data->d_size;
++      symtab->changed = true;
++
++      sym->sec = find_section_by_index(elf, 0);
++
++      elf_add_symbol(elf, sym);
++
++      return sym;
++}
++
+ struct section *elf_create_section(struct elf *elf, const char *name,
+                                  unsigned int sh_flags, size_t entsize, int nr)
+ {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, stru
+                  unsigned long offset, unsigned int len,
+                  const char *insn);
+ int elf_write_reloc(struct elf *elf, struct reloc *reloc);
++struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
+ int elf_write(struct elf *elf);
+ void elf_close(struct elf *elf);
diff --git a/queue-5.10/objtool-add-entry-unret-validation.patch b/queue-5.10/objtool-add-entry-unret-validation.patch
new file mode 100644 (file)
index 0000000..f65919c
--- /dev/null
@@ -0,0 +1,533 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:03 +0200
+Subject: objtool: Add entry UNRET validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream.
+
+Since entry asm is tricky, add a validation pass that ensures the
+retbleed mitigation has been done before the first actual RET
+instruction.
+
+Entry points are those that either have UNWIND_HINT_ENTRY, which acts
+as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or
+those that have UWIND_HINT_IRET_REGS at +0.
+
+This is basically a variant of validate_branch() that is
+intra-function and it will simply follow all branches from marked
+entry points and ensures that all paths lead to ANNOTATE_UNRET_END.
+
+If a path hits RET or an indirection the path is a fail and will be
+reported.
+
+There are 3 ANNOTATE_UNRET_END instances:
+
+ - UNTRAIN_RET itself
+ - exception from-kernel; this path doesn't need UNTRAIN_RET
+ - all early exceptions; these also don't need UNTRAIN_RET
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: arch/x86/entry/entry_64.S no pt_regs return at .Lerror_entry_done_lfence]
+[cascardo: tools/objtool/builtin-check.c no link option validation]
+[cascardo: tools/objtool/check.c opts.ibt is ibt]
+[cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts]
+[cascardo: objtool is still called from scripts/link-vmlinux.sh]
+[cascardo: no IBT support]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - In scripts/link-vmlinux.sh, use "test -n" instead of is_enabled
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S            |    3 
+ arch/x86/entry/entry_64_compat.S     |    6 -
+ arch/x86/include/asm/nospec-branch.h |   12 ++
+ arch/x86/include/asm/unwind_hints.h  |    4 
+ arch/x86/kernel/head_64.S            |    5 +
+ arch/x86/xen/xen-asm.S               |   10 +-
+ include/linux/objtool.h              |    3 
+ scripts/link-vmlinux.sh              |    3 
+ tools/include/linux/objtool.h        |    3 
+ tools/objtool/builtin-check.c        |    3 
+ tools/objtool/builtin.h              |    2 
+ tools/objtool/check.c                |  172 ++++++++++++++++++++++++++++++++++-
+ tools/objtool/check.h                |    6 +
+ 13 files changed, 217 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -93,7 +93,7 @@ SYM_CODE_END(native_usergs_sysret64)
+  */
+ SYM_CODE_START(entry_SYSCALL_64)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       swapgs
+       /* tss.sp2 is scratch space. */
+@@ -1094,6 +1094,7 @@ SYM_CODE_START_LOCAL(error_entry)
+        */
+ .Lerror_entry_done_lfence:
+       FENCE_SWAPGS_KERNEL_ENTRY
++      ANNOTATE_UNRET_END
+       RET
+ .Lbstep_iret:
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -49,7 +49,7 @@
+  * 0(%ebp) arg6
+  */
+ SYM_CODE_START(entry_SYSENTER_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       /* Interrupts are off on entry. */
+       SWAPGS
+@@ -202,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
+  * 0(%esp) arg6
+  */
+ SYM_CODE_START(entry_SYSCALL_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       /* Interrupts are off on entry. */
+       swapgs
+@@ -349,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
+  * ebp  arg6
+  */
+ SYM_CODE_START(entry_INT80_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       /*
+        * Interrupts are off on entry.
+        */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -82,6 +82,17 @@
+ #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+ /*
++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
++ * eventually turn into it's own annotation.
++ */
++.macro ANNOTATE_UNRET_END
++#ifdef CONFIG_DEBUG_ENTRY
++      ANNOTATE_RETPOLINE_SAFE
++      nop
++#endif
++.endm
++
++/*
+  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+  * indirect jmp/call which may be susceptible to the Spectre variant 2
+  * attack.
+@@ -131,6 +142,7 @@
+  */
+ .macro UNTRAIN_RET
+ #ifdef CONFIG_RETPOLINE
++      ANNOTATE_UNRET_END
+       ALTERNATIVE_2 "",                                               \
+                     "call zen_untrain_ret", X86_FEATURE_UNRET,        \
+                     "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -11,6 +11,10 @@
+       UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ .endm
++.macro UNWIND_HINT_ENTRY
++      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1
++.endm
++
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+       .if \base == %rsp
+               .if \indirect
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -321,6 +321,8 @@ SYM_CODE_END(start_cpu0)
+ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
+       UNWIND_HINT_IRET_REGS offset=8
++      ANNOTATE_UNRET_END
++
+       /* Build pt_regs */
+       PUSH_AND_CLEAR_REGS
+@@ -378,6 +380,7 @@ SYM_CODE_START(early_idt_handler_array)
+ SYM_CODE_END(early_idt_handler_array)
+ SYM_CODE_START_LOCAL(early_idt_handler_common)
++      ANNOTATE_UNRET_END
+       /*
+        * The stack is the hardware frame, an error code or zero, and the
+        * vector number.
+@@ -424,6 +427,8 @@ SYM_CODE_END(early_idt_handler_common)
+ SYM_CODE_START_NOALIGN(vc_no_ghcb)
+       UNWIND_HINT_IRET_REGS offset=8
++      ANNOTATE_UNRET_END
++
+       /* Build pt_regs */
+       PUSH_AND_CLEAR_REGS
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -148,7 +148,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
+ .macro xen_pv_trap name
+ SYM_CODE_START(xen_\name)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       pop %rcx
+       pop %r11
+       jmp  \name
+@@ -277,7 +277,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+ /* Normal 64-bit system call target */
+ SYM_CODE_START(xen_entry_SYSCALL_64)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       popq %rcx
+       popq %r11
+@@ -296,7 +296,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64)
+ /* 32-bit compat syscall target */
+ SYM_CODE_START(xen_entry_SYSCALL_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       popq %rcx
+       popq %r11
+@@ -313,7 +313,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat)
+ /* 32-bit compat sysenter target */
+ SYM_CODE_START(xen_entry_SYSENTER_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       /*
+        * NB: Xen is polite and clears TF from EFLAGS for us.  This means
+        * that we don't need to guard against single step exceptions here.
+@@ -336,7 +336,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat)
+ SYM_CODE_START(xen_entry_SYSCALL_compat)
+ SYM_CODE_START(xen_entry_SYSENTER_compat)
+-      UNWIND_HINT_EMPTY
++      UNWIND_HINT_ENTRY
+       lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
+       mov $-ENOSYS, %rax
+       pushq $0
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -32,11 +32,14 @@ struct unwind_hint {
+  *
+  * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+  * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+  */
+ #define UNWIND_HINT_TYPE_CALL         0
+ #define UNWIND_HINT_TYPE_REGS         1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
++#define UNWIND_HINT_TYPE_ENTRY                4
+ #ifdef CONFIG_STACK_VALIDATION
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -65,6 +65,9 @@ objtool_link()
+       if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+               objtoolopt="check"
++              if [ -n "${CONFIG_RETPOLINE}" ]; then
++                      objtoolopt="${objtoolopt} --unret"
++              fi
+               if [ -z "${CONFIG_FRAME_POINTER}" ]; then
+                       objtoolopt="${objtoolopt} --no-fp"
+               fi
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -32,11 +32,14 @@ struct unwind_hint {
+  *
+  * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+  * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+  */
+ #define UNWIND_HINT_TYPE_CALL         0
+ #define UNWIND_HINT_TYPE_REGS         1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
++#define UNWIND_HINT_TYPE_ENTRY                4
+ #ifdef CONFIG_STACK_VALIDATION
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -19,7 +19,7 @@
+ #include "objtool.h"
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+-     validate_dup, vmlinux, sls;
++     validate_dup, vmlinux, sls, unret;
+ static const char * const check_usage[] = {
+       "objtool check [<options>] file.o",
+@@ -30,6 +30,7 @@ const struct option check_options[] = {
+       OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+       OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++      OPT_BOOLEAN(0,   "unret", &unret, "validate entry unret placement"),
+       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+       OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+       OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -9,7 +9,7 @@
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+-            validate_dup, vmlinux, sls;
++            validate_dup, vmlinux, sls, unret;
+ extern int cmd_check(int argc, const char **argv);
+ extern int cmd_orc(int argc, const char **argv);
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1752,6 +1752,19 @@ static int read_unwind_hints(struct objt
+               insn->hint = true;
++              if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
++                      struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
++
++                      if (sym && sym->bind == STB_GLOBAL) {
++                              insn->entry = 1;
++                      }
++              }
++
++              if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
++                      hint->type = UNWIND_HINT_TYPE_CALL;
++                      insn->entry = 1;
++              }
++
+               if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+                       insn->cfi = &func_cfi;
+                       continue;
+@@ -1800,8 +1813,9 @@ static int read_retpoline_hints(struct o
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+                   insn->type != INSN_CALL_DYNAMIC &&
+-                  insn->type != INSN_RETURN) {
+-                      WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret",
++                  insn->type != INSN_RETURN &&
++                  insn->type != INSN_NOP) {
++                      WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+@@ -2818,8 +2832,8 @@ static int validate_branch(struct objtoo
+                       return 1;
+               }
+-              visited = 1 << state.uaccess;
+-              if (insn->visited) {
++              visited = VISITED_BRANCH << state.uaccess;
++              if (insn->visited & VISITED_BRANCH_MASK) {
+                       if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
+                               return 1;
+@@ -3045,6 +3059,145 @@ static int validate_unwind_hints(struct
+       return warnings;
+ }
++/*
++ * Validate rethunk entry constraint: must untrain RET before the first RET.
++ *
++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
++ * before an actual RET instruction.
++ */
++static int validate_entry(struct objtool_file *file, struct instruction *insn)
++{
++      struct instruction *next, *dest;
++      int ret, warnings = 0;
++
++      for (;;) {
++              next = next_insn_to_validate(file, insn);
++
++              if (insn->visited & VISITED_ENTRY)
++                      return 0;
++
++              insn->visited |= VISITED_ENTRY;
++
++              if (!insn->ignore_alts && !list_empty(&insn->alts)) {
++                      struct alternative *alt;
++                      bool skip_orig = false;
++
++                      list_for_each_entry(alt, &insn->alts, list) {
++                              if (alt->skip_orig)
++                                      skip_orig = true;
++
++                              ret = validate_entry(file, alt->insn);
++                              if (ret) {
++                                      if (backtrace)
++                                              BT_FUNC("(alt)", insn);
++                                      return ret;
++                              }
++                      }
++
++                      if (skip_orig)
++                              return 0;
++              }
++
++              switch (insn->type) {
++
++              case INSN_CALL_DYNAMIC:
++              case INSN_JUMP_DYNAMIC:
++              case INSN_JUMP_DYNAMIC_CONDITIONAL:
++                      WARN_FUNC("early indirect call", insn->sec, insn->offset);
++                      return 1;
++
++              case INSN_JUMP_UNCONDITIONAL:
++              case INSN_JUMP_CONDITIONAL:
++                      if (!is_sibling_call(insn)) {
++                              if (!insn->jump_dest) {
++                                      WARN_FUNC("unresolved jump target after linking?!?",
++                                                insn->sec, insn->offset);
++                                      return -1;
++                              }
++                              ret = validate_entry(file, insn->jump_dest);
++                              if (ret) {
++                                      if (backtrace) {
++                                              BT_FUNC("(branch%s)", insn,
++                                                      insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
++                                      }
++                                      return ret;
++                              }
++
++                              if (insn->type == INSN_JUMP_UNCONDITIONAL)
++                                      return 0;
++
++                              break;
++                      }
++
++                      /* fallthrough */
++              case INSN_CALL:
++                      dest = find_insn(file, insn->call_dest->sec,
++                                       insn->call_dest->offset);
++                      if (!dest) {
++                              WARN("Unresolved function after linking!?: %s",
++                                   insn->call_dest->name);
++                              return -1;
++                      }
++
++                      ret = validate_entry(file, dest);
++                      if (ret) {
++                              if (backtrace)
++                                      BT_FUNC("(call)", insn);
++                              return ret;
++                      }
++                      /*
++                       * If a call returns without error, it must have seen UNTRAIN_RET.
++                       * Therefore any non-error return is a success.
++                       */
++                      return 0;
++
++              case INSN_RETURN:
++                      WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
++                      return 1;
++
++              case INSN_NOP:
++                      if (insn->retpoline_safe)
++                              return 0;
++                      break;
++
++              default:
++                      break;
++              }
++
++              if (!next) {
++                      WARN_FUNC("teh end!", insn->sec, insn->offset);
++                      return -1;
++              }
++              insn = next;
++      }
++
++      return warnings;
++}
++
++/*
++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
++ * before RET.
++ */
++static int validate_unret(struct objtool_file *file)
++{
++      struct instruction *insn;
++      int ret, warnings = 0;
++
++      for_each_insn(file, insn) {
++              if (!insn->entry)
++                      continue;
++
++              ret = validate_entry(file, insn);
++              if (ret < 0) {
++                      WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
++                      return ret;
++              }
++              warnings += ret;
++      }
++
++      return warnings;
++}
++
+ static int validate_retpoline(struct objtool_file *file)
+ {
+       struct instruction *insn;
+@@ -3312,6 +3465,17 @@ int check(struct objtool_file *file)
+               goto out;
+       warnings += ret;
++      if (unret) {
++              /*
++               * Must be after validate_branch() and friends, it plays
++               * further games with insn->visited.
++               */
++              ret = validate_unret(file);
++              if (ret < 0)
++                      return ret;
++              warnings += ret;
++      }
++
+       if (!warnings) {
+               ret = validate_reachable_instructions(file);
+               if (ret < 0)
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -48,6 +48,7 @@ struct instruction {
+       bool dead_end, ignore, ignore_alts;
+       bool hint;
+       bool retpoline_safe;
++      bool entry;
+       s8 instr;
+       u8 visited;
+       struct alt_group *alt_group;
+@@ -62,6 +63,11 @@ struct instruction {
+       struct cfi_state *cfi;
+ };
++#define VISITED_BRANCH                0x01
++#define VISITED_BRANCH_UACCESS        0x02
++#define VISITED_BRANCH_MASK   0x03
++#define VISITED_ENTRY         0x04
++
+ static inline bool is_static_jump(struct instruction *insn)
+ {
+       return insn->type == INSN_JUMP_CONDITIONAL ||
diff --git a/queue-5.10/objtool-add-straight-line-speculation-validation.patch b/queue-5.10/objtool-add-straight-line-speculation-validation.patch
new file mode 100644 (file)
index 0000000..4ac4876
--- /dev/null
@@ -0,0 +1,135 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:42 +0100
+Subject: objtool: Add straight-line-speculation validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1cc1e4c8aab4213bd4e6353dec2620476a233d6d upstream.
+
+Teach objtool to validate the straight-line-speculation constraints:
+
+ - speculation trap after indirect calls
+ - speculation trap after RET
+
+Notable: when an instruction is annotated RETPOLINE_SAFE, indicating
+  speculation isn't a problem, also don't care about sls for that
+  instruction.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134908.023037659@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10: adjust filenames, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h            |    1 +
+ tools/objtool/arch/x86/decode.c |   13 +++++++++----
+ tools/objtool/builtin-check.c   |    4 +++-
+ tools/objtool/builtin.h         |    3 ++-
+ tools/objtool/check.c           |   14 ++++++++++++++
+ 5 files changed, 29 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -26,6 +26,7 @@ enum insn_type {
+       INSN_CLAC,
+       INSN_STD,
+       INSN_CLD,
++      INSN_TRAP,
+       INSN_OTHER,
+ };
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -456,6 +456,11 @@ int arch_decode_instruction(const struct
+               break;
++      case 0xcc:
++              /* int3 */
++              *type = INSN_TRAP;
++              break;
++
+       case 0xe3:
+               /* jecxz/jrcxz */
+               *type = INSN_JUMP_CONDITIONAL;
+@@ -592,10 +597,10 @@ const char *arch_ret_insn(int len)
+ {
+       static const char ret[5][5] = {
+               { BYTE_RET },
+-              { BYTE_RET, 0x90 },
+-              { BYTE_RET, 0x66, 0x90 },
+-              { BYTE_RET, 0x0f, 0x1f, 0x00 },
+-              { BYTE_RET, 0x0f, 0x1f, 0x40, 0x00 },
++              { BYTE_RET, 0xcc },
++              { BYTE_RET, 0xcc, 0x90 },
++              { BYTE_RET, 0xcc, 0x66, 0x90 },
++              { BYTE_RET, 0xcc, 0x0f, 0x1f, 0x00 },
+       };
+       if (len < 1 || len > 5) {
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -18,7 +18,8 @@
+ #include "builtin.h"
+ #include "objtool.h"
+-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
++bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
++     validate_dup, vmlinux, sls;
+ static const char * const check_usage[] = {
+       "objtool check [<options>] file.o",
+@@ -35,6 +36,7 @@ const struct option check_options[] = {
+       OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
+       OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+       OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
++      OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
+       OPT_END(),
+ };
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -8,7 +8,8 @@
+ #include <subcmd/parse-options.h>
+ extern const struct option check_options[];
+-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
++extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
++            validate_dup, vmlinux, sls;
+ extern int cmd_check(int argc, const char **argv);
+ extern int cmd_orc(int argc, const char **argv);
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2775,6 +2775,12 @@ static int validate_branch(struct objtoo
+               switch (insn->type) {
+               case INSN_RETURN:
++                      if (next_insn && next_insn->type == INSN_TRAP) {
++                              next_insn->ignore = true;
++                      } else if (sls && !insn->retpoline_safe) {
++                              WARN_FUNC("missing int3 after ret",
++                                        insn->sec, insn->offset);
++                      }
+                       return validate_return(func, insn, &state);
+               case INSN_CALL:
+@@ -2818,6 +2824,14 @@ static int validate_branch(struct objtoo
+                       break;
+               case INSN_JUMP_DYNAMIC:
++                      if (next_insn && next_insn->type == INSN_TRAP) {
++                              next_insn->ignore = true;
++                      } else if (sls && !insn->retpoline_safe) {
++                              WARN_FUNC("missing int3 after indirect jump",
++                                        insn->sec, insn->offset);
++                      }
++
++                      /* fallthrough */
+               case INSN_JUMP_DYNAMIC_CONDITIONAL:
+                       if (is_sibling_call(insn)) {
+                               ret = validate_sibling_call(insn, &state);
diff --git a/queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch b/queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch
new file mode 100644 (file)
index 0000000..373bc65
--- /dev/null
@@ -0,0 +1,121 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:22 -0600
+Subject: objtool: Assume only ELF functions do sibling calls
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit ecf11ba4d066fe527586c6edd6ca68457ca55cf4 upstream.
+
+There's an inconsistency in how sibling calls are detected in
+non-function asm code, depending on the scope of the object.  If the
+target code is external to the object, objtool considers it a sibling
+call.  If the target code is internal but not a function, objtool
+*doesn't* consider it a sibling call.
+
+This can cause some inconsistencies between per-object and vmlinux.o
+validation.
+
+Instead, assume only ELF functions can do sibling calls.  This generally
+matches existing reality, and makes sibling call validation consistent
+between vmlinux.o and per-object.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/0e9ab6f3628cc7bf3bde7aa6762d54d7df19ad78.1611263461.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   36 ++++++++++++++++++++++--------------
+ 1 file changed, 22 insertions(+), 14 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -109,15 +109,20 @@ static struct instruction *prev_insn_sam
+ static bool is_sibling_call(struct instruction *insn)
+ {
++      /*
++       * Assume only ELF functions can make sibling calls.  This ensures
++       * sibling call detection consistency between vmlinux.o and individual
++       * objects.
++       */
++      if (!insn->func)
++              return false;
++
+       /* An indirect jump is either a sibling call or a jump to a table. */
+       if (insn->type == INSN_JUMP_DYNAMIC)
+               return list_empty(&insn->alts);
+-      if (!is_static_jump(insn))
+-              return false;
+-
+       /* add_jump_destinations() sets insn->call_dest for sibling calls. */
+-      return !!insn->call_dest;
++      return (is_static_jump(insn) && insn->call_dest);
+ }
+ /*
+@@ -788,7 +793,7 @@ static int add_jump_destinations(struct
+                       continue;
+               reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+-                                             insn->offset, insn->len);
++                                               insn->offset, insn->len);
+               if (!reloc) {
+                       dest_sec = insn->sec;
+                       dest_off = arch_jump_destination(insn);
+@@ -808,18 +813,21 @@ static int add_jump_destinations(struct
+                       insn->retpoline_safe = true;
+                       continue;
+-              } else if (reloc->sym->sec->idx) {
+-                      dest_sec = reloc->sym->sec;
+-                      dest_off = reloc->sym->sym.st_value +
+-                                 arch_dest_reloc_offset(reloc->addend);
+-              } else {
+-                      /* external sibling call */
++              } else if (insn->func) {
++                      /* internal or external sibling call (with reloc) */
+                       insn->call_dest = reloc->sym;
+                       if (insn->call_dest->static_call_tramp) {
+                               list_add_tail(&insn->static_call_node,
+                                             &file->static_call_list);
+                       }
+                       continue;
++              } else if (reloc->sym->sec->idx) {
++                      dest_sec = reloc->sym->sec;
++                      dest_off = reloc->sym->sym.st_value +
++                                 arch_dest_reloc_offset(reloc->addend);
++              } else {
++                      /* non-func asm code jumping to another file */
++                      continue;
+               }
+               insn->jump_dest = find_insn(file, dest_sec, dest_off);
+@@ -868,7 +876,7 @@ static int add_jump_destinations(struct
+                       } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+                                  insn->jump_dest->offset == insn->jump_dest->func->offset) {
+-                              /* internal sibling call */
++                              /* internal sibling call (without reloc) */
+                               insn->call_dest = insn->jump_dest->func;
+                               if (insn->call_dest->static_call_tramp) {
+                                       list_add_tail(&insn->static_call_node,
+@@ -2570,7 +2578,7 @@ static int validate_branch(struct objtoo
+               case INSN_JUMP_CONDITIONAL:
+               case INSN_JUMP_UNCONDITIONAL:
+-                      if (func && is_sibling_call(insn)) {
++                      if (is_sibling_call(insn)) {
+                               ret = validate_sibling_call(insn, &state);
+                               if (ret)
+                                       return ret;
+@@ -2592,7 +2600,7 @@ static int validate_branch(struct objtoo
+               case INSN_JUMP_DYNAMIC:
+               case INSN_JUMP_DYNAMIC_CONDITIONAL:
+-                      if (func && is_sibling_call(insn)) {
++                      if (is_sibling_call(insn)) {
+                               ret = validate_sibling_call(insn, &state);
+                               if (ret)
+                                       return ret;
diff --git a/queue-5.10/objtool-cache-instruction-relocs.patch b/queue-5.10/objtool-cache-instruction-relocs.patch
new file mode 100644 (file)
index 0000000..33986a8
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:13 +0100
+Subject: objtool: Cache instruction relocs
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7bd2a600f3e9d27286bbf23c83d599e9cc7cf245 upstream.
+
+Track the reloc of instructions in the new instruction->reloc field
+to avoid having to look them up again later.
+
+( Technically x86 instructions can have two relocations, but not jumps
+  and calls, for which we're using this. )
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.195441549@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   28 ++++++++++++++++++++++------
+ tools/objtool/check.h |    1 +
+ 2 files changed, 23 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -754,6 +754,25 @@ __weak bool arch_is_retpoline(struct sym
+       return false;
+ }
++#define NEGATIVE_RELOC        ((void *)-1L)
++
++static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
++{
++      if (insn->reloc == NEGATIVE_RELOC)
++              return NULL;
++
++      if (!insn->reloc) {
++              insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
++                                                     insn->offset, insn->len);
++              if (!insn->reloc) {
++                      insn->reloc = NEGATIVE_RELOC;
++                      return NULL;
++              }
++      }
++
++      return insn->reloc;
++}
++
+ /*
+  * Find the destination instructions for all jumps.
+  */
+@@ -768,8 +787,7 @@ static int add_jump_destinations(struct
+               if (!is_static_jump(insn))
+                       continue;
+-              reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+-                                               insn->offset, insn->len);
++              reloc = insn_reloc(file, insn);
+               if (!reloc) {
+                       dest_sec = insn->sec;
+                       dest_off = arch_jump_destination(insn);
+@@ -901,8 +919,7 @@ static int add_call_destinations(struct
+               if (insn->type != INSN_CALL)
+                       continue;
+-              reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+-                                             insn->offset, insn->len);
++              reloc = insn_reloc(file, insn);
+               if (!reloc) {
+                       dest_off = arch_jump_destination(insn);
+                       insn->call_dest = find_call_destination(insn->sec, dest_off);
+@@ -1085,8 +1102,7 @@ static int handle_group_alt(struct objto
+                * alternatives code can adjust the relative offsets
+                * accordingly.
+                */
+-              alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+-                                                 insn->offset, insn->len);
++              alt_reloc = insn_reloc(file, insn);
+               if (alt_reloc &&
+                   !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -55,6 +55,7 @@ struct instruction {
+       struct instruction *jump_dest;
+       struct instruction *first_jump_src;
+       struct reloc *jump_table;
++      struct reloc *reloc;
+       struct list_head alts;
+       struct symbol *func;
+       struct list_head stack_ops;
diff --git a/queue-5.10/objtool-classify-symbols.patch b/queue-5.10/objtool-classify-symbols.patch
new file mode 100644 (file)
index 0000000..b5957a5
--- /dev/null
@@ -0,0 +1,128 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:33 +0200
+Subject: objtool: Classify symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1739c66eb7bd5f27f1b69a5a26e10e8327d1e136 upstream.
+
+In order to avoid calling str*cmp() on symbol names, over and over, do
+them all once upfront and store the result.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.658539311@infradead.org
+[cascardo: no pv_target on struct symbol, because of missing
+ db2b0c5d7b6f19b3c2cab08c531b65342eb5252b]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: objtool doesn't have any mcount handling]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   32 +++++++++++++++++++++-----------
+ tools/objtool/elf.h   |    7 +++++--
+ 2 files changed, 26 insertions(+), 13 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -889,8 +889,7 @@ static void add_call_dest(struct objtool
+        * so they need a little help, NOP out any KCOV calls from noinstr
+        * text.
+        */
+-      if (insn->sec->noinstr &&
+-          !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
++      if (insn->sec->noinstr && insn->call_dest->kcov) {
+               if (reloc) {
+                       reloc->type = R_NONE;
+                       elf_write_reloc(file->elf, reloc);
+@@ -935,7 +934,7 @@ static int add_jump_destinations(struct
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+-              } else if (arch_is_retpoline(reloc->sym)) {
++              } else if (reloc->sym->retpoline_thunk) {
+                       /*
+                        * Retpoline jumps are really dynamic jumps in
+                        * disguise, so convert them accordingly.
+@@ -1076,7 +1075,7 @@ static int add_call_destinations(struct
+                       add_call_dest(file, insn, dest, false);
+-              } else if (arch_is_retpoline(reloc->sym)) {
++              } else if (reloc->sym->retpoline_thunk) {
+                       /*
+                        * Retpoline calls are really dynamic calls in
+                        * disguise, so convert them accordingly.
+@@ -1733,17 +1732,28 @@ static int read_intra_function_calls(str
+       return 0;
+ }
+-static int read_static_call_tramps(struct objtool_file *file)
++static int classify_symbols(struct objtool_file *file)
+ {
+       struct section *sec;
+       struct symbol *func;
+       for_each_sec(file, sec) {
+               list_for_each_entry(func, &sec->symbol_list, list) {
+-                      if (func->bind == STB_GLOBAL &&
+-                          !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
++                      if (func->bind != STB_GLOBAL)
++                              continue;
++
++                      if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+                                    strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+                               func->static_call_tramp = true;
++
++                      if (arch_is_retpoline(func))
++                              func->retpoline_thunk = true;
++
++                      if (!strcmp(func->name, "__fentry__"))
++                              func->fentry = true;
++
++                      if (!strncmp(func->name, "__sanitizer_cov_", 16))
++                              func->kcov = true;
+               }
+       }
+@@ -1805,7 +1815,7 @@ static int decode_sections(struct objtoo
+       /*
+        * Must be before add_{jump_call}_destination.
+        */
+-      ret = read_static_call_tramps(file);
++      ret = classify_symbols(file);
+       if (ret)
+               return ret;
+@@ -1863,9 +1873,9 @@ static int decode_sections(struct objtoo
+ static bool is_fentry_call(struct instruction *insn)
+ {
+-      if (insn->type == INSN_CALL && insn->call_dest &&
+-          insn->call_dest->type == STT_NOTYPE &&
+-          !strcmp(insn->call_dest->name, "__fentry__"))
++      if (insn->type == INSN_CALL &&
++          insn->call_dest &&
++          insn->call_dest->fentry)
+               return true;
+       return false;
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -55,8 +55,11 @@ struct symbol {
+       unsigned long offset;
+       unsigned int len;
+       struct symbol *pfunc, *cfunc, *alias;
+-      bool uaccess_safe;
+-      bool static_call_tramp;
++      u8 uaccess_safe      : 1;
++      u8 static_call_tramp : 1;
++      u8 retpoline_thunk   : 1;
++      u8 fentry            : 1;
++      u8 kcov              : 1;
+ };
+ struct reloc {
diff --git a/queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch b/queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch
new file mode 100644 (file)
index 0000000..2779762
--- /dev/null
@@ -0,0 +1,239 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:24 -0600
+Subject: objtool: Combine UNWIND_HINT_RET_OFFSET and UNWIND_HINT_FUNC
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit b735bd3e68824316655252a931a3353a6ebc036f upstream.
+
+The ORC metadata generated for UNWIND_HINT_FUNC isn't actually very
+func-like.  With certain usages it can cause stack state mismatches
+because it doesn't set the return address (CFI_RA).
+
+Also, users of UNWIND_HINT_RET_OFFSET no longer need to set a custom
+return stack offset.  Instead they just need to specify a func-like
+situation, so the current ret_offset code is hacky for no good reason.
+
+Solve both problems by simplifying the RET_OFFSET handling and
+converting it into a more useful UNWIND_HINT_FUNC.
+
+If we end up needing the old 'ret_offset' functionality again in the
+future, we should be able to support it pretty easily with the addition
+of a custom 'sp_offset' in UNWIND_HINT_FUNC.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/db9d1f5d79dddfbb3725ef6d8ec3477ad199948d.1611263462.git.jpoimboe@redhat.com
+[bwh: Backported to 5.10:
+ - Don't use bswap_if_needed() since we don't have any of the other fixes
+   for mixed-endian cross-compilation
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/unwind_hints.h |   13 +-----------
+ arch/x86/kernel/ftrace_64.S         |    2 -
+ arch/x86/lib/retpoline.S            |    2 -
+ include/linux/objtool.h             |    5 +++-
+ tools/include/linux/objtool.h       |    5 +++-
+ tools/objtool/arch/x86/decode.c     |    4 +--
+ tools/objtool/check.c               |   37 ++++++++++++++----------------------
+ tools/objtool/check.h               |    1 
+ 8 files changed, 29 insertions(+), 40 deletions(-)
+
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -48,17 +48,8 @@
+       UNWIND_HINT_REGS base=\base offset=\offset partial=1
+ .endm
+-.macro UNWIND_HINT_FUNC sp_offset=8
+-      UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL
+-.endm
+-
+-/*
+- * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN
+- * and sibling calls. On these, sp_offset denotes the expected offset from
+- * initial_func_cfi.
+- */
+-.macro UNWIND_HINT_RET_OFFSET sp_offset=8
+-      UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
++.macro UNWIND_HINT_FUNC
++      UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
+ .endm
+ #endif /* __ASSEMBLY__ */
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -265,7 +265,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end,
+       restore_mcount_regs 8
+       /* Restore flags */
+       popfq
+-      UNWIND_HINT_RET_OFFSET
++      UNWIND_HINT_FUNC
+       jmp     ftrace_epilogue
+ SYM_FUNC_END(ftrace_regs_caller)
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -28,7 +28,7 @@ SYM_FUNC_START_NOALIGN(__x86_retpoline_\
+       jmp     .Lspec_trap_\@
+ .Ldo_rop_\@:
+       mov     %\reg, (%_ASM_SP)
+-      UNWIND_HINT_RET_OFFSET
++      UNWIND_HINT_FUNC
+       ret
+ SYM_FUNC_END(__x86_retpoline_\reg)
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -29,11 +29,14 @@ struct unwind_hint {
+  *
+  * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+  * sp_reg+sp_offset points to the iret return frame.
++ *
++ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
++ * Useful for code which doesn't have an ELF function annotation.
+  */
+ #define UNWIND_HINT_TYPE_CALL         0
+ #define UNWIND_HINT_TYPE_REGS         1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+-#define UNWIND_HINT_TYPE_RET_OFFSET   3
++#define UNWIND_HINT_TYPE_FUNC         3
+ #ifdef CONFIG_STACK_VALIDATION
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -29,11 +29,14 @@ struct unwind_hint {
+  *
+  * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+  * sp_reg+sp_offset points to the iret return frame.
++ *
++ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
++ * Useful for code which doesn't have an ELF function annotation.
+  */
+ #define UNWIND_HINT_TYPE_CALL         0
+ #define UNWIND_HINT_TYPE_REGS         1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+-#define UNWIND_HINT_TYPE_RET_OFFSET   3
++#define UNWIND_HINT_TYPE_FUNC         3
+ #ifdef CONFIG_STACK_VALIDATION
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -563,8 +563,8 @@ void arch_initial_func_cfi_state(struct
+       state->cfa.offset = 8;
+       /* initial RA (return address) */
+-      state->regs[16].base = CFI_CFA;
+-      state->regs[16].offset = -8;
++      state->regs[CFI_RA].base = CFI_CFA;
++      state->regs[CFI_RA].offset = -8;
+ }
+ const char *arch_nop_insn(int len)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1423,13 +1423,20 @@ static int add_jump_table_alts(struct ob
+       return 0;
+ }
++static void set_func_state(struct cfi_state *state)
++{
++      state->cfa = initial_func_cfi.cfa;
++      memcpy(&state->regs, &initial_func_cfi.regs,
++             CFI_NUM_REGS * sizeof(struct cfi_reg));
++      state->stack_size = initial_func_cfi.cfa.offset;
++}
++
+ static int read_unwind_hints(struct objtool_file *file)
+ {
+       struct section *sec, *relocsec;
+       struct reloc *reloc;
+       struct unwind_hint *hint;
+       struct instruction *insn;
+-      struct cfi_reg *cfa;
+       int i;
+       sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+@@ -1464,22 +1471,20 @@ static int read_unwind_hints(struct objt
+                       return -1;
+               }
+-              cfa = &insn->cfi.cfa;
++              insn->hint = true;
+-              if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
+-                      insn->ret_offset = hint->sp_offset;
++              if (hint->type == UNWIND_HINT_TYPE_FUNC) {
++                      set_func_state(&insn->cfi);
+                       continue;
+               }
+-              insn->hint = true;
+-
+               if (arch_decode_hint_reg(insn, hint->sp_reg)) {
+                       WARN_FUNC("unsupported unwind_hint sp base reg %d",
+                                 insn->sec, insn->offset, hint->sp_reg);
+                       return -1;
+               }
+-              cfa->offset = hint->sp_offset;
++              insn->cfi.cfa.offset = hint->sp_offset;
+               insn->cfi.type = hint->type;
+               insn->cfi.end = hint->end;
+       }
+@@ -1742,27 +1747,18 @@ static bool is_fentry_call(struct instru
+ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
+ {
+-      u8 ret_offset = insn->ret_offset;
+       struct cfi_state *cfi = &state->cfi;
+       int i;
+       if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
+               return true;
+-      if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
++      if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
+               return true;
+-      if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
++      if (cfi->stack_size != initial_func_cfi.cfa.offset)
+               return true;
+-      /*
+-       * If there is a ret offset hint then don't check registers
+-       * because a callee-saved register might have been pushed on
+-       * the stack.
+-       */
+-      if (ret_offset)
+-              return false;
+-
+       for (i = 0; i < CFI_NUM_REGS; i++) {
+               if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
+                   cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
+@@ -2863,10 +2859,7 @@ static int validate_section(struct objto
+                       continue;
+               init_insn_state(&state, sec);
+-              state.cfi.cfa = initial_func_cfi.cfa;
+-              memcpy(&state.cfi.regs, &initial_func_cfi.regs,
+-                     CFI_NUM_REGS * sizeof(struct cfi_reg));
+-              state.cfi.stack_size = initial_func_cfi.cfa.offset;
++              set_func_state(&state.cfi);
+               warnings += validate_symbol(file, sec, func, &state);
+       }
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -50,7 +50,6 @@ struct instruction {
+       bool retpoline_safe;
+       s8 instr;
+       u8 visited;
+-      u8 ret_offset;
+       struct alt_group *alt_group;
+       struct symbol *call_dest;
+       struct instruction *jump_dest;
diff --git a/queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch b/queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch
new file mode 100644 (file)
index 0000000..f5e2804
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:03 +0100
+Subject: objtool: Correctly handle retpoline thunk calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32 upstream.
+
+Just like JMP handling, convert a direct CALL to a retpoline thunk
+into a retpoline safe indirect CALL.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.567568238@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -953,6 +953,18 @@ static int add_call_destinations(struct
+                                         dest_off);
+                               return -1;
+                       }
++
++              } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
++                      /*
++                       * Retpoline calls are really dynamic calls in
++                       * disguise, so convert them accordingly.
++                       */
++                      insn->type = INSN_CALL_DYNAMIC;
++                      insn->retpoline_safe = true;
++
++                      remove_insn_ops(insn);
++                      continue;
++
+               } else
+                       insn->call_dest = reloc->sym;
diff --git a/queue-5.10/objtool-create-reloc-sections-implicitly.patch b/queue-5.10/objtool-create-reloc-sections-implicitly.patch
new file mode 100644 (file)
index 0000000..029650d
--- /dev/null
@@ -0,0 +1,90 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:08 +0100
+Subject: objtool: Create reloc sections implicitly
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e upstream.
+
+Have elf_add_reloc() create the relocation section implicitly.
+
+Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.880174448@infradead.org
+[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c   |    3 ---
+ tools/objtool/elf.c     |    9 ++++++++-
+ tools/objtool/elf.h     |    1 -
+ tools/objtool/orc_gen.c |    2 --
+ 4 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -459,9 +459,6 @@ static int create_static_call_sections(s
+       if (!sec)
+               return -1;
+-      if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+-              return -1;
+-
+       idx = 0;
+       list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -498,11 +498,18 @@ err:
+       return -1;
+ }
++static struct section *elf_create_reloc_section(struct elf *elf,
++                                              struct section *base,
++                                              int reltype);
++
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+                 unsigned int type, struct symbol *sym, int addend)
+ {
+       struct reloc *reloc;
++      if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
++              return -1;
++
+       reloc = malloc(sizeof(*reloc));
+       if (!reloc) {
+               perror("malloc");
+@@ -880,7 +887,7 @@ static struct section *elf_create_rela_r
+       return sec;
+ }
+-struct section *elf_create_reloc_section(struct elf *elf,
++static struct section *elf_create_reloc_section(struct elf *elf,
+                                        struct section *base,
+                                        int reltype)
+ {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct relo
+ struct elf *elf_open_read(const char *name, int flags);
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+-struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+                 unsigned int type, struct symbol *sym, int addend);
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -222,8 +222,6 @@ int orc_create(struct objtool_file *file
+       sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+       if (!sec)
+               return -1;
+-      if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+-              return -1;
+       /* Write ORC entries to sections: */
+       list_for_each_entry(entry, &orc_list, list) {
diff --git a/queue-5.10/objtool-default-ignore-int3-for-unreachable.patch b/queue-5.10/objtool-default-ignore-int3-for-unreachable.patch
new file mode 100644 (file)
index 0000000..32cd3bc
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 8 Mar 2022 16:30:14 +0100
+Subject: objtool: Default ignore INT3 for unreachable
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1ffbe4e935f9b7308615c75be990aec07464d1e7 upstream.
+
+Ignore all INT3 instructions for unreachable code warnings, similar to NOP.
+This allows using INT3 for various paddings instead of NOPs.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/20220308154317.343312938@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2775,9 +2775,8 @@ static int validate_branch(struct objtoo
+               switch (insn->type) {
+               case INSN_RETURN:
+-                      if (next_insn && next_insn->type == INSN_TRAP) {
+-                              next_insn->ignore = true;
+-                      } else if (sls && !insn->retpoline_safe) {
++                      if (sls && !insn->retpoline_safe &&
++                          next_insn && next_insn->type != INSN_TRAP) {
+                               WARN_FUNC("missing int3 after ret",
+                                         insn->sec, insn->offset);
+                       }
+@@ -2824,9 +2823,8 @@ static int validate_branch(struct objtoo
+                       break;
+               case INSN_JUMP_DYNAMIC:
+-                      if (next_insn && next_insn->type == INSN_TRAP) {
+-                              next_insn->ignore = true;
+-                      } else if (sls && !insn->retpoline_safe) {
++                      if (sls && !insn->retpoline_safe &&
++                          next_insn && next_insn->type != INSN_TRAP) {
+                               WARN_FUNC("missing int3 after indirect jump",
+                                         insn->sec, insn->offset);
+                       }
+@@ -2997,7 +2995,7 @@ static bool ignore_unreachable_insn(stru
+       int i;
+       struct instruction *prev_insn;
+-      if (insn->ignore || insn->type == INSN_NOP)
++      if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP)
+               return true;
+       /*
diff --git a/queue-5.10/objtool-don-t-make-.altinstructions-writable.patch b/queue-5.10/objtool-don-t-make-.altinstructions-writable.patch
new file mode 100644 (file)
index 0000000..2dd17b9
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 23 Jun 2021 10:42:28 -0500
+Subject: objtool: Don't make .altinstructions writable
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit e31694e0a7a709293319475d8001e05e31f2178c upstream.
+
+When objtool creates the .altinstructions section, it sets the SHF_WRITE
+flag to make the section writable -- unless the section had already been
+previously created by the kernel.  The mismatch between kernel-created
+and objtool-created section flags can cause failures with external
+tooling (kpatch-build).  And the section doesn't need to be writable
+anyway.
+
+Make the section flags consistent with the kernel's.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/6c284ae89717889ea136f9f0064d914cd8329d31.1624462939.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -611,7 +611,7 @@ static int elf_add_alternative(struct el
+       sec = find_section_by_name(elf, ".altinstructions");
+       if (!sec) {
+               sec = elf_create_section(elf, ".altinstructions",
+-                                       SHF_WRITE, size, 0);
++                                       SHF_ALLOC, size, 0);
+               if (!sec) {
+                       WARN_ELF("elf_create_section");
diff --git a/queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch b/queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch
new file mode 100644 (file)
index 0000000..088e69c
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:34 +0200
+Subject: objtool: Explicitly avoid self modifying code in .altinstr_replacement
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit dd003edeffa3cb87bc9862582004f405d77d7670 upstream.
+
+Assume ALTERNATIVE()s know what they're doing and do not change, or
+cause to change, instructions in .altinstr_replacement sections.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.722511775@infradead.org
+[cascardo: context adjustment]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: objtool doesn't have any mcount handling]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   36 ++++++++++++++++++++++++++++--------
+ 1 file changed, 28 insertions(+), 8 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -870,18 +870,27 @@ static void remove_insn_ops(struct instr
+       }
+ }
+-static void add_call_dest(struct objtool_file *file, struct instruction *insn,
+-                        struct symbol *dest, bool sibling)
++static void annotate_call_site(struct objtool_file *file,
++                             struct instruction *insn, bool sibling)
+ {
+       struct reloc *reloc = insn_reloc(file, insn);
++      struct symbol *sym = insn->call_dest;
+-      insn->call_dest = dest;
+-      if (!dest)
++      if (!sym)
++              sym = reloc->sym;
++
++      /*
++       * Alternative replacement code is just template code which is
++       * sometimes copied to the original instruction. For now, don't
++       * annotate it. (In the future we might consider annotating the
++       * original instruction if/when it ever makes sense to do so.)
++       */
++      if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+               return;
+-      if (insn->call_dest->static_call_tramp) {
+-              list_add_tail(&insn->call_node,
+-                            &file->static_call_list);
++      if (sym->static_call_tramp) {
++              list_add_tail(&insn->call_node, &file->static_call_list);
++              return;
+       }
+       /*
+@@ -889,7 +898,7 @@ static void add_call_dest(struct objtool
+        * so they need a little help, NOP out any KCOV calls from noinstr
+        * text.
+        */
+-      if (insn->sec->noinstr && insn->call_dest->kcov) {
++      if (insn->sec->noinstr && sym->kcov) {
+               if (reloc) {
+                       reloc->type = R_NONE;
+                       elf_write_reloc(file->elf, reloc);
+@@ -901,7 +910,16 @@ static void add_call_dest(struct objtool
+                                      : arch_nop_insn(insn->len));
+               insn->type = sibling ? INSN_RETURN : INSN_NOP;
++              return;
+       }
++}
++
++static void add_call_dest(struct objtool_file *file, struct instruction *insn,
++                        struct symbol *dest, bool sibling)
++{
++      insn->call_dest = dest;
++      if (!dest)
++              return;
+       /*
+        * Whatever stack impact regular CALLs have, should be undone
+@@ -911,6 +929,8 @@ static void add_call_dest(struct objtool
+        * are converted to JUMP, see read_intra_function_calls().
+        */
+       remove_insn_ops(insn);
++
++      annotate_call_site(file, insn, sibling);
+ }
+ /*
diff --git a/queue-5.10/objtool-extract-elf_strtab_concat.patch b/queue-5.10/objtool-extract-elf_strtab_concat.patch
new file mode 100644 (file)
index 0000000..5ad9294
--- /dev/null
@@ -0,0 +1,112 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:09 +0100
+Subject: objtool: Extract elf_strtab_concat()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 417a4dc91e559f92404c2544f785b02ce75784c3 upstream.
+
+Create a common helper to append strings to a strtab.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.941474004@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c |   60 ++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 38 insertions(+), 22 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -724,13 +724,48 @@ err:
+       return NULL;
+ }
++static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
++{
++      Elf_Data *data;
++      Elf_Scn *s;
++      int len;
++
++      if (!strtab)
++              strtab = find_section_by_name(elf, ".strtab");
++      if (!strtab) {
++              WARN("can't find .strtab section");
++              return -1;
++      }
++
++      s = elf_getscn(elf->elf, strtab->idx);
++      if (!s) {
++              WARN_ELF("elf_getscn");
++              return -1;
++      }
++
++      data = elf_newdata(s);
++      if (!data) {
++              WARN_ELF("elf_newdata");
++              return -1;
++      }
++
++      data->d_buf = str;
++      data->d_size = strlen(str) + 1;
++      data->d_align = 1;
++
++      len = strtab->len;
++      strtab->len += data->d_size;
++      strtab->changed = true;
++
++      return len;
++}
++
+ struct section *elf_create_section(struct elf *elf, const char *name,
+                                  unsigned int sh_flags, size_t entsize, int nr)
+ {
+       struct section *sec, *shstrtab;
+       size_t size = entsize * nr;
+       Elf_Scn *s;
+-      Elf_Data *data;
+       sec = malloc(sizeof(*sec));
+       if (!sec) {
+@@ -787,7 +822,6 @@ struct section *elf_create_section(struc
+       sec->sh.sh_addralign = 1;
+       sec->sh.sh_flags = SHF_ALLOC | sh_flags;
+-
+       /* Add section name to .shstrtab (or .strtab for Clang) */
+       shstrtab = find_section_by_name(elf, ".shstrtab");
+       if (!shstrtab)
+@@ -796,27 +830,9 @@ struct section *elf_create_section(struc
+               WARN("can't find .shstrtab or .strtab section");
+               return NULL;
+       }
+-
+-      s = elf_getscn(elf->elf, shstrtab->idx);
+-      if (!s) {
+-              WARN_ELF("elf_getscn");
+-              return NULL;
+-      }
+-
+-      data = elf_newdata(s);
+-      if (!data) {
+-              WARN_ELF("elf_newdata");
++      sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
++      if (sec->sh.sh_name == -1)
+               return NULL;
+-      }
+-
+-      data->d_buf = sec->name;
+-      data->d_size = strlen(name) + 1;
+-      data->d_align = 1;
+-
+-      sec->sh.sh_name = shstrtab->len;
+-
+-      shstrtab->len += strlen(name) + 1;
+-      shstrtab->changed = true;
+       list_add_tail(&sec->list, &elf->sections);
+       elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
diff --git a/queue-5.10/objtool-extract-elf_symbol_add.patch b/queue-5.10/objtool-extract-elf_symbol_add.patch
new file mode 100644 (file)
index 0000000..17d139b
--- /dev/null
@@ -0,0 +1,112 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:10 +0100
+Subject: objtool: Extract elf_symbol_add()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9a7827b7789c630c1efdb121daa42c6e77dce97f upstream.
+
+Create a common helper to add symbols.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.003468981@infradead.org
+[bwh: Backported to 5.10: rb_add() parameter order is different]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c |   56 ++++++++++++++++++++++++++++------------------------
+ 1 file changed, 31 insertions(+), 25 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -341,12 +341,39 @@ static int read_sections(struct elf *elf
+       return 0;
+ }
++static void elf_add_symbol(struct elf *elf, struct symbol *sym)
++{
++      struct list_head *entry;
++      struct rb_node *pnode;
++
++      sym->type = GELF_ST_TYPE(sym->sym.st_info);
++      sym->bind = GELF_ST_BIND(sym->sym.st_info);
++
++      sym->offset = sym->sym.st_value;
++      sym->len = sym->sym.st_size;
++
++      rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset);
++      pnode = rb_prev(&sym->node);
++      if (pnode)
++              entry = &rb_entry(pnode, struct symbol, node)->list;
++      else
++              entry = &sym->sec->symbol_list;
++      list_add(&sym->list, entry);
++      elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
++      elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
++
++      /*
++       * Don't store empty STT_NOTYPE symbols in the rbtree.  They
++       * can exist within a function, confusing the sorting.
++       */
++      if (!sym->len)
++              rb_erase(&sym->node, &sym->sec->symbol_tree);
++}
++
+ static int read_symbols(struct elf *elf)
+ {
+       struct section *symtab, *symtab_shndx, *sec;
+       struct symbol *sym, *pfunc;
+-      struct list_head *entry;
+-      struct rb_node *pnode;
+       int symbols_nr, i;
+       char *coldstr;
+       Elf_Data *shndx_data = NULL;
+@@ -391,9 +418,6 @@ static int read_symbols(struct elf *elf)
+                       goto err;
+               }
+-              sym->type = GELF_ST_TYPE(sym->sym.st_info);
+-              sym->bind = GELF_ST_BIND(sym->sym.st_info);
+-
+               if ((sym->sym.st_shndx > SHN_UNDEF &&
+                    sym->sym.st_shndx < SHN_LORESERVE) ||
+                   (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
+@@ -406,32 +430,14 @@ static int read_symbols(struct elf *elf)
+                                    sym->name);
+                               goto err;
+                       }
+-                      if (sym->type == STT_SECTION) {
++                      if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
+                               sym->name = sym->sec->name;
+                               sym->sec->sym = sym;
+                       }
+               } else
+                       sym->sec = find_section_by_index(elf, 0);
+-              sym->offset = sym->sym.st_value;
+-              sym->len = sym->sym.st_size;
+-
+-              rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset);
+-              pnode = rb_prev(&sym->node);
+-              if (pnode)
+-                      entry = &rb_entry(pnode, struct symbol, node)->list;
+-              else
+-                      entry = &sym->sec->symbol_list;
+-              list_add(&sym->list, entry);
+-              elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+-              elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+-
+-              /*
+-               * Don't store empty STT_NOTYPE symbols in the rbtree.  They
+-               * can exist within a function, confusing the sorting.
+-               */
+-              if (!sym->len)
+-                      rb_erase(&sym->node, &sym->sec->symbol_tree);
++              elf_add_symbol(elf, sym);
+       }
+       if (stats)
diff --git a/queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch b/queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch
new file mode 100644 (file)
index 0000000..a7b01c1
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 7 Jun 2021 11:45:58 +0200
+Subject: objtool: Fix .symtab_shndx handling for elf_create_undef_symbol()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 584fd3b31889852d0d6f3dd1e3d8e9619b660d2c upstream.
+
+When an ELF object uses extended symbol section indexes (IOW it has a
+.symtab_shndx section), these must be kept in sync with the regular
+symbol table (.symtab).
+
+So for every new symbol we emit, make sure to also emit a
+.symtab_shndx value to keep the arrays of equal size.
+
+Note: since we're writing an UNDEF symbol, most GElf_Sym fields will
+be 0 and we can repurpose one (st_size) to host the 0 for the xshndx
+value.
+
+Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()")
+Reported-by: Nick Desaulniers <ndesaulniers@google.com>
+Suggested-by: Fangrui Song <maskray@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Link: https://lkml.kernel.org/r/YL3q1qFO9QIRL/BA@hirez.programming.kicks-ass.net
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c |   25 ++++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -768,7 +768,7 @@ static int elf_add_string(struct elf *el
+ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+ {
+-      struct section *symtab;
++      struct section *symtab, *symtab_shndx;
+       struct symbol *sym;
+       Elf_Data *data;
+       Elf_Scn *s;
+@@ -819,6 +819,29 @@ struct symbol *elf_create_undef_symbol(s
+       symtab->len += data->d_size;
+       symtab->changed = true;
++      symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
++      if (symtab_shndx) {
++              s = elf_getscn(elf->elf, symtab_shndx->idx);
++              if (!s) {
++                      WARN_ELF("elf_getscn");
++                      return NULL;
++              }
++
++              data = elf_newdata(s);
++              if (!data) {
++                      WARN_ELF("elf_newdata");
++                      return NULL;
++              }
++
++              data->d_buf = &sym->sym.st_size; /* conveniently 0 */
++              data->d_size = sizeof(Elf32_Word);
++              data->d_align = 4;
++              data->d_type = ELF_T_WORD;
++
++              symtab_shndx->len += 4;
++              symtab_shndx->changed = true;
++      }
++
+       sym->sec = find_section_by_index(elf, 0);
+       elf_add_symbol(elf, sym);
diff --git a/queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch b/queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch
new file mode 100644 (file)
index 0000000..0c34450
--- /dev/null
@@ -0,0 +1,358 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sun, 17 Apr 2022 17:03:36 +0200
+Subject: objtool: Fix code relocs vs weak symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4abff6d48dbcea8200c7ea35ba70c242d128ebf3 upstream.
+
+Occasionally objtool driven code patching (think .static_call_sites
+.retpoline_sites etc..) goes sideways and it tries to patch an
+instruction that doesn't match.
+
+Much head-scatching and cursing later the problem is as outlined below
+and affects every section that objtool generates for us, very much
+including the ORC data. The below uses .static_call_sites because it's
+convenient for demonstration purposes, but as mentioned the ORC
+sections, .retpoline_sites and __mount_loc are all similarly affected.
+
+Consider:
+
+foo-weak.c:
+
+  extern void __SCT__foo(void);
+
+  __attribute__((weak)) void foo(void)
+  {
+         return __SCT__foo();
+  }
+
+foo.c:
+
+  extern void __SCT__foo(void);
+  extern void my_foo(void);
+
+  void foo(void)
+  {
+         my_foo();
+         return __SCT__foo();
+  }
+
+These generate the obvious code
+(gcc -O2 -fcf-protection=none -fno-asynchronous-unwind-tables -c foo*.c):
+
+foo-weak.o:
+0000000000000000 <foo>:
+   0:   e9 00 00 00 00          jmpq   5 <foo+0x5>      1: R_X86_64_PLT32       __SCT__foo-0x4
+
+foo.o:
+0000000000000000 <foo>:
+   0:   48 83 ec 08             sub    $0x8,%rsp
+   4:   e8 00 00 00 00          callq  9 <foo+0x9>      5: R_X86_64_PLT32       my_foo-0x4
+   9:   48 83 c4 08             add    $0x8,%rsp
+   d:   e9 00 00 00 00          jmpq   12 <foo+0x12>    e: R_X86_64_PLT32       __SCT__foo-0x4
+
+Now, when we link these two files together, you get something like
+(ld -r -o foos.o foo-weak.o foo.o):
+
+foos.o:
+0000000000000000 <foo-0x10>:
+   0:   e9 00 00 00 00          jmpq   5 <foo-0xb>      1: R_X86_64_PLT32       __SCT__foo-0x4
+   5:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)
+   f:   90                      nop
+
+0000000000000010 <foo>:
+  10:   48 83 ec 08             sub    $0x8,%rsp
+  14:   e8 00 00 00 00          callq  19 <foo+0x9>     15: R_X86_64_PLT32      my_foo-0x4
+  19:   48 83 c4 08             add    $0x8,%rsp
+  1d:   e9 00 00 00 00          jmpq   22 <foo+0x12>    1e: R_X86_64_PLT32      __SCT__foo-0x4
+
+Noting that ld preserves the weak function text, but strips the symbol
+off of it (hence objdump doing that funny negative offset thing). This
+does lead to 'interesting' unused code issues with objtool when ran on
+linked objects, but that seems to be working (fingers crossed).
+
+So far so good.. Now lets consider the objtool static_call output
+section (readelf output, old binutils):
+
+foo-weak.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+0000000000000000  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 0
+0000000000000004  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+
+foo.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+0000000000000000  0000000200000002 R_X86_64_PC32          0000000000000000 .text + d
+0000000000000004  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+
+foos.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+0000000000000000  0000000100000002 R_X86_64_PC32          0000000000000000 .text + 0
+0000000000000004  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+0000000000000008  0000000100000002 R_X86_64_PC32          0000000000000000 .text + 1d
+000000000000000c  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+
+So we have two patch sites, one in the dead code of the weak foo and one
+in the real foo. All is well.
+
+*HOWEVER*, when the toolchain strips unused section symbols it
+generates things like this (using new enough binutils):
+
+foo-weak.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+0000000000000000  0000000200000002 R_X86_64_PC32          0000000000000000 foo + 0
+0000000000000004  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+
+foo.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+0000000000000000  0000000200000002 R_X86_64_PC32          0000000000000000 foo + d
+0000000000000004  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+
+foos.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+0000000000000000  0000000100000002 R_X86_64_PC32          0000000000000000 foo + 0
+0000000000000004  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+0000000000000008  0000000100000002 R_X86_64_PC32          0000000000000000 foo + d
+000000000000000c  0000000d00000002 R_X86_64_PC32          0000000000000000 __SCT__foo + 1
+
+And now we can see how that foos.o .static_call_sites goes side-ways, we
+now have _two_ patch sites in foo. One for the weak symbol at foo+0
+(which is no longer a static_call site!) and one at foo+d which is in
+fact the right location.
+
+This seems to happen when objtool cannot find a section symbol, in which
+case it falls back to any other symbol to key off of, however in this
+case that goes terribly wrong!
+
+As such, teach objtool to create a section symbol when there isn't
+one.
+
+Fixes: 44f6a7c0755d ("objtool: Fix seg fault with Clang non-section symbols")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lkml.kernel.org/r/20220419203807.655552918@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c |  187 +++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 165 insertions(+), 22 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -537,37 +537,180 @@ int elf_add_reloc(struct elf *elf, struc
+       return 0;
+ }
+-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+-                        unsigned long offset, unsigned int type,
+-                        struct section *insn_sec, unsigned long insn_off)
++/*
++ * Ensure that any reloc section containing references to @sym is marked
++ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
++ * with the new symbol index.
++ */
++static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
++{
++      struct section *sec;
++
++      list_for_each_entry(sec, &elf->sections, list) {
++              struct reloc *reloc;
++
++              if (sec->changed)
++                      continue;
++
++              list_for_each_entry(reloc, &sec->reloc_list, list) {
++                      if (reloc->sym == sym) {
++                              sec->changed = true;
++                              break;
++                      }
++              }
++      }
++}
++
++/*
++ * Move the first global symbol, as per sh_info, into a new, higher symbol
++ * index. This fees up the shndx for a new local symbol.
++ */
++static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
++                                struct section *symtab_shndx)
+ {
++      Elf_Data *data, *shndx_data = NULL;
++      Elf32_Word first_non_local;
+       struct symbol *sym;
+-      int addend;
++      Elf_Scn *s;
+-      if (insn_sec->sym) {
+-              sym = insn_sec->sym;
+-              addend = insn_off;
++      first_non_local = symtab->sh.sh_info;
+-      } else {
+-              /*
+-               * The Clang assembler strips section symbols, so we have to
+-               * reference the function symbol instead:
+-               */
+-              sym = find_symbol_containing(insn_sec, insn_off);
+-              if (!sym) {
+-                      /*
+-                       * Hack alert.  This happens when we need to reference
+-                       * the NOP pad insn immediately after the function.
+-                       */
+-                      sym = find_symbol_containing(insn_sec, insn_off - 1);
++      sym = find_symbol_by_index(elf, first_non_local);
++      if (!sym) {
++              WARN("no non-local symbols !?");
++              return first_non_local;
++      }
++
++      s = elf_getscn(elf->elf, symtab->idx);
++      if (!s) {
++              WARN_ELF("elf_getscn");
++              return -1;
++      }
++
++      data = elf_newdata(s);
++      if (!data) {
++              WARN_ELF("elf_newdata");
++              return -1;
++      }
++
++      data->d_buf = &sym->sym;
++      data->d_size = sizeof(sym->sym);
++      data->d_align = 1;
++      data->d_type = ELF_T_SYM;
++
++      sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
++      elf_dirty_reloc_sym(elf, sym);
++
++      symtab->sh.sh_info += 1;
++      symtab->sh.sh_size += data->d_size;
++      symtab->changed = true;
++
++      if (symtab_shndx) {
++              s = elf_getscn(elf->elf, symtab_shndx->idx);
++              if (!s) {
++                      WARN_ELF("elf_getscn");
++                      return -1;
+               }
+-              if (!sym) {
+-                      WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
++              shndx_data = elf_newdata(s);
++              if (!shndx_data) {
++                      WARN_ELF("elf_newshndx_data");
+                       return -1;
+               }
+-              addend = insn_off - sym->offset;
++              shndx_data->d_buf = &sym->sec->idx;
++              shndx_data->d_size = sizeof(Elf32_Word);
++              shndx_data->d_align = 4;
++              shndx_data->d_type = ELF_T_WORD;
++
++              symtab_shndx->sh.sh_size += 4;
++              symtab_shndx->changed = true;
++      }
++
++      return first_non_local;
++}
++
++static struct symbol *
++elf_create_section_symbol(struct elf *elf, struct section *sec)
++{
++      struct section *symtab, *symtab_shndx;
++      Elf_Data *shndx_data = NULL;
++      struct symbol *sym;
++      Elf32_Word shndx;
++
++      symtab = find_section_by_name(elf, ".symtab");
++      if (symtab) {
++              symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
++              if (symtab_shndx)
++                      shndx_data = symtab_shndx->data;
++      } else {
++              WARN("no .symtab");
++              return NULL;
++      }
++
++      sym = malloc(sizeof(*sym));
++      if (!sym) {
++              perror("malloc");
++              return NULL;
++      }
++      memset(sym, 0, sizeof(*sym));
++
++      sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
++      if (sym->idx < 0) {
++              WARN("elf_move_global_symbol");
++              return NULL;
++      }
++
++      sym->name = sec->name;
++      sym->sec = sec;
++
++      // st_name 0
++      sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
++      // st_other 0
++      // st_value 0
++      // st_size 0
++      shndx = sec->idx;
++      if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
++              sym->sym.st_shndx = shndx;
++              if (!shndx_data)
++                      shndx = 0;
++      } else {
++              sym->sym.st_shndx = SHN_XINDEX;
++              if (!shndx_data) {
++                      WARN("no .symtab_shndx");
++                      return NULL;
++              }
++      }
++
++      if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
++              WARN_ELF("gelf_update_symshndx");
++              return NULL;
++      }
++
++      elf_add_symbol(elf, sym);
++
++      return sym;
++}
++
++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
++                        unsigned long offset, unsigned int type,
++                        struct section *insn_sec, unsigned long insn_off)
++{
++      struct symbol *sym = insn_sec->sym;
++      int addend = insn_off;
++
++      if (!sym) {
++              /*
++               * Due to how weak functions work, we must use section based
++               * relocations. Symbol based relocations would result in the
++               * weak and non-weak function annotations being overlaid on the
++               * non-weak function after linking.
++               */
++              sym = elf_create_section_symbol(elf, insn_sec);
++              if (!sym)
++                      return -1;
++
++              insn_sec->sym = sym;
+       }
+       return elf_add_reloc(elf, sec, offset, type, sym, addend);
diff --git a/queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch b/queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch
new file mode 100644 (file)
index 0000000..3e3a60d
--- /dev/null
@@ -0,0 +1,103 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 16 May 2022 11:06:36 -0400
+Subject: objtool: Fix objtool regression on x32 systems
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 22682a07acc308ef78681572e19502ce8893c4d4 upstream.
+
+Commit c087c6e7b551 ("objtool: Fix type of reloc::addend") failed to
+appreciate cross building from ILP32 hosts, where 'int' == 'long' and
+the issue persists.
+
+As such, use s64/int64_t/Elf64_Sxword for this field and suffer the
+pain that is ISO C99 printf formats for it.
+
+Fixes: c087c6e7b551 ("objtool: Fix type of reloc::addend")
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+[peterz: reword changelog, s/long long/s64/]
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/alpine.LRH.2.02.2205161041260.11556@file01.intranet.prod.int.rdu2.redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    9 +++++----
+ tools/objtool/elf.c   |    2 +-
+ tools/objtool/elf.h   |    4 ++--
+ 3 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -5,6 +5,7 @@
+ #include <string.h>
+ #include <stdlib.h>
++#include <inttypes.h>
+ #include <sys/mman.h>
+ #include "builtin.h"
+@@ -467,12 +468,12 @@ static int add_dead_ends(struct objtool_
+               else if (reloc->addend == reloc->sym->sec->len) {
+                       insn = find_last_insn(file, reloc->sym->sec);
+                       if (!insn) {
+-                              WARN("can't find unreachable insn at %s+0x%lx",
++                              WARN("can't find unreachable insn at %s+0x%" PRIx64,
+                                    reloc->sym->sec->name, reloc->addend);
+                               return -1;
+                       }
+               } else {
+-                      WARN("can't find unreachable insn at %s+0x%lx",
++                      WARN("can't find unreachable insn at %s+0x%" PRIx64,
+                            reloc->sym->sec->name, reloc->addend);
+                       return -1;
+               }
+@@ -502,12 +503,12 @@ reachable:
+               else if (reloc->addend == reloc->sym->sec->len) {
+                       insn = find_last_insn(file, reloc->sym->sec);
+                       if (!insn) {
+-                              WARN("can't find reachable insn at %s+0x%lx",
++                              WARN("can't find reachable insn at %s+0x%" PRIx64,
+                                    reloc->sym->sec->name, reloc->addend);
+                               return -1;
+                       }
+               } else {
+-                      WARN("can't find reachable insn at %s+0x%lx",
++                      WARN("can't find reachable insn at %s+0x%" PRIx64,
+                            reloc->sym->sec->name, reloc->addend);
+                       return -1;
+               }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -510,7 +510,7 @@ static struct section *elf_create_reloc_
+                                               int reltype);
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+-                unsigned int type, struct symbol *sym, long addend)
++                unsigned int type, struct symbol *sym, s64 addend)
+ {
+       struct reloc *reloc;
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -73,7 +73,7 @@ struct reloc {
+       struct symbol *sym;
+       unsigned long offset;
+       unsigned int type;
+-      long addend;
++      s64 addend;
+       int idx;
+       bool jump_table_start;
+ };
+@@ -127,7 +127,7 @@ struct elf *elf_open_read(const char *na
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+-                unsigned int type, struct symbol *sym, long addend);
++                unsigned int type, struct symbol *sym, s64 addend);
+ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+                         unsigned long offset, unsigned int type,
+                         struct section *insn_sec, unsigned long insn_off);
diff --git a/queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch b/queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch
new file mode 100644 (file)
index 0000000..bf49353
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 23 Mar 2022 23:35:01 +0100
+Subject: objtool: Fix SLS validation for kcov tail-call replacement
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7a53f408902d913cd541b4f8ad7dbcd4961f5b82 upstream.
+
+Since not all compilers have a function attribute to disable KCOV
+instrumentation, objtool can rewrite KCOV instrumentation in noinstr
+functions as per commit:
+
+  f56dae88a81f ("objtool: Handle __sanitize_cov*() tail calls")
+
+However, this has subtle interaction with the SLS validation from
+commit:
+
+  1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation")
+
+In that when a tail-call instrucion is replaced with a RET an
+additional INT3 instruction is also written, but is not represented in
+the decoded instruction stream.
+
+This then leads to false positive missing INT3 objtool warnings in
+noinstr code.
+
+Instead of adding additional struct instruction objects, mark the RET
+instruction with retpoline_safe to suppress the warning (since we know
+there really is an INT3).
+
+Fixes: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20220323230712.GA8939@worktop.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -961,6 +961,17 @@ static void annotate_call_site(struct ob
+                                      : arch_nop_insn(insn->len));
+               insn->type = sibling ? INSN_RETURN : INSN_NOP;
++
++              if (sibling) {
++                      /*
++                       * We've replaced the tail-call JMP insn by two new
++                       * insn: RET; INT3, except we only have a single struct
++                       * insn here. Mark it retpoline_safe to avoid the SLS
++                       * warning, instead of adding another insn.
++                       */
++                      insn->retpoline_safe = true;
++              }
++
+               return;
+       }
+ }
diff --git a/queue-5.10/objtool-fix-symbol-creation.patch b/queue-5.10/objtool-fix-symbol-creation.patch
new file mode 100644 (file)
index 0000000..fa16132
--- /dev/null
@@ -0,0 +1,350 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 17 May 2022 17:42:04 +0200
+Subject: objtool: Fix symbol creation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ead165fa1042247b033afad7be4be9b815d04ade upstream.
+
+Nathan reported objtool failing with the following messages:
+
+  warning: objtool: no non-local symbols !?
+  warning: objtool: gelf_update_symshndx: invalid section index
+
+The problem is due to commit 4abff6d48dbc ("objtool: Fix code relocs
+vs weak symbols") failing to consider the case where an object would
+have no non-local symbols.
+
+The problem that commit tries to address is adding a STB_LOCAL symbol
+to the symbol table in light of the ELF spec's requirement that:
+
+  In each symbol table, all symbols with STB_LOCAL binding preced the
+  weak and global symbols.  As ``Sections'' above describes, a symbol
+  table section's sh_info section header member holds the symbol table
+  index for the first non-local symbol.
+
+The approach taken is to find this first non-local symbol, move that
+to the end and then re-use the freed spot to insert a new local symbol
+and increment sh_info.
+
+Except it never considered the case of object files without global
+symbols and got a whole bunch of details wrong -- so many in fact that
+it is a wonder it ever worked :/
+
+Specifically:
+
+ - It failed to re-hash the symbol on the new index, so a subsequent
+   find_symbol_by_index() would not find it at the new location and a
+   query for the old location would now return a non-deterministic
+   choice between the old and new symbol.
+
+ - It failed to appreciate that the GElf wrappers are not a valid disk
+   format (it works because GElf is basically Elf64 and we only
+   support x86_64 atm.)
+
+ - It failed to fully appreciate how horrible the libelf API really is
+   and got the gelf_update_symshndx() call pretty much completely
+   wrong; with the direct consequence that if inserting a second
+   STB_LOCAL symbol would require moving the same STB_GLOBAL symbol
+   again it would completely come unstuck.
+
+Write a new elf_update_symbol() function that wraps all the magic
+required to update or create a new symbol at a given index.
+
+Specifically, gelf_update_sym*() require an @ndx argument that is
+relative to the @data argument; this means you have to manually
+iterate the section data descriptor list and update @ndx.
+
+Fixes: 4abff6d48dbc ("objtool: Fix code relocs vs weak symbols")
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/YoPCTEYjoPqE4ZxB@hirez.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10: elf_hash_add() takes a hash table pointer,
+ not just a name]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c |  196 +++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 128 insertions(+), 68 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -346,6 +346,8 @@ static void elf_add_symbol(struct elf *e
+       struct list_head *entry;
+       struct rb_node *pnode;
++      sym->alias = sym;
++
+       sym->type = GELF_ST_TYPE(sym->sym.st_info);
+       sym->bind = GELF_ST_BIND(sym->sym.st_info);
+@@ -401,7 +403,6 @@ static int read_symbols(struct elf *elf)
+                       return -1;
+               }
+               memset(sym, 0, sizeof(*sym));
+-              sym->alias = sym;
+               sym->idx = i;
+@@ -562,24 +563,21 @@ static void elf_dirty_reloc_sym(struct e
+ }
+ /*
+- * Move the first global symbol, as per sh_info, into a new, higher symbol
+- * index. This fees up the shndx for a new local symbol.
++ * The libelf API is terrible; gelf_update_sym*() takes a data block relative
++ * index value, *NOT* the symbol index. As such, iterate the data blocks and
++ * adjust index until it fits.
++ *
++ * If no data block is found, allow adding a new data block provided the index
++ * is only one past the end.
+  */
+-static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
+-                                struct section *symtab_shndx)
++static int elf_update_symbol(struct elf *elf, struct section *symtab,
++                           struct section *symtab_shndx, struct symbol *sym)
+ {
+-      Elf_Data *data, *shndx_data = NULL;
+-      Elf32_Word first_non_local;
+-      struct symbol *sym;
+-      Elf_Scn *s;
+-
+-      first_non_local = symtab->sh.sh_info;
+-
+-      sym = find_symbol_by_index(elf, first_non_local);
+-      if (!sym) {
+-              WARN("no non-local symbols !?");
+-              return first_non_local;
+-      }
++      Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
++      Elf_Data *symtab_data = NULL, *shndx_data = NULL;
++      Elf64_Xword entsize = symtab->sh.sh_entsize;
++      int max_idx, idx = sym->idx;
++      Elf_Scn *s, *t = NULL;
+       s = elf_getscn(elf->elf, symtab->idx);
+       if (!s) {
+@@ -587,79 +585,124 @@ static int elf_move_global_symbol(struct
+               return -1;
+       }
+-      data = elf_newdata(s);
+-      if (!data) {
+-              WARN_ELF("elf_newdata");
+-              return -1;
++      if (symtab_shndx) {
++              t = elf_getscn(elf->elf, symtab_shndx->idx);
++              if (!t) {
++                      WARN_ELF("elf_getscn");
++                      return -1;
++              }
+       }
+-      data->d_buf = &sym->sym;
+-      data->d_size = sizeof(sym->sym);
+-      data->d_align = 1;
+-      data->d_type = ELF_T_SYM;
++      for (;;) {
++              /* get next data descriptor for the relevant sections */
++              symtab_data = elf_getdata(s, symtab_data);
++              if (t)
++                      shndx_data = elf_getdata(t, shndx_data);
++
++              /* end-of-list */
++              if (!symtab_data) {
++                      void *buf;
++
++                      if (idx) {
++                              /* we don't do holes in symbol tables */
++                              WARN("index out of range");
++                              return -1;
++                      }
+-      sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
+-      elf_dirty_reloc_sym(elf, sym);
++                      /* if @idx == 0, it's the next contiguous entry, create it */
++                      symtab_data = elf_newdata(s);
++                      if (t)
++                              shndx_data = elf_newdata(t);
++
++                      buf = calloc(1, entsize);
++                      if (!buf) {
++                              WARN("malloc");
++                              return -1;
++                      }
+-      symtab->sh.sh_info += 1;
+-      symtab->sh.sh_size += data->d_size;
+-      symtab->changed = true;
++                      symtab_data->d_buf = buf;
++                      symtab_data->d_size = entsize;
++                      symtab_data->d_align = 1;
++                      symtab_data->d_type = ELF_T_SYM;
++
++                      symtab->sh.sh_size += entsize;
++                      symtab->changed = true;
++
++                      if (t) {
++                              shndx_data->d_buf = &sym->sec->idx;
++                              shndx_data->d_size = sizeof(Elf32_Word);
++                              shndx_data->d_align = sizeof(Elf32_Word);
++                              shndx_data->d_type = ELF_T_WORD;
+-      if (symtab_shndx) {
+-              s = elf_getscn(elf->elf, symtab_shndx->idx);
+-              if (!s) {
+-                      WARN_ELF("elf_getscn");
++                              symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
++                              symtab_shndx->changed = true;
++                      }
++
++                      break;
++              }
++
++              /* empty blocks should not happen */
++              if (!symtab_data->d_size) {
++                      WARN("zero size data");
+                       return -1;
+               }
+-              shndx_data = elf_newdata(s);
++              /* is this the right block? */
++              max_idx = symtab_data->d_size / entsize;
++              if (idx < max_idx)
++                      break;
++
++              /* adjust index and try again */
++              idx -= max_idx;
++      }
++
++      /* something went side-ways */
++      if (idx < 0) {
++              WARN("negative index");
++              return -1;
++      }
++
++      /* setup extended section index magic and write the symbol */
++      if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
++              sym->sym.st_shndx = shndx;
++              if (!shndx_data)
++                      shndx = 0;
++      } else {
++              sym->sym.st_shndx = SHN_XINDEX;
+               if (!shndx_data) {
+-                      WARN_ELF("elf_newshndx_data");
++                      WARN("no .symtab_shndx");
+                       return -1;
+               }
++      }
+-              shndx_data->d_buf = &sym->sec->idx;
+-              shndx_data->d_size = sizeof(Elf32_Word);
+-              shndx_data->d_align = 4;
+-              shndx_data->d_type = ELF_T_WORD;
+-
+-              symtab_shndx->sh.sh_size += 4;
+-              symtab_shndx->changed = true;
++      if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) {
++              WARN_ELF("gelf_update_symshndx");
++              return -1;
+       }
+-      return first_non_local;
++      return 0;
+ }
+ static struct symbol *
+ elf_create_section_symbol(struct elf *elf, struct section *sec)
+ {
+       struct section *symtab, *symtab_shndx;
+-      Elf_Data *shndx_data = NULL;
+-      struct symbol *sym;
+-      Elf32_Word shndx;
++      Elf32_Word first_non_local, new_idx;
++      struct symbol *sym, *old;
+       symtab = find_section_by_name(elf, ".symtab");
+       if (symtab) {
+               symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+-              if (symtab_shndx)
+-                      shndx_data = symtab_shndx->data;
+       } else {
+               WARN("no .symtab");
+               return NULL;
+       }
+-      sym = malloc(sizeof(*sym));
++      sym = calloc(1, sizeof(*sym));
+       if (!sym) {
+               perror("malloc");
+               return NULL;
+       }
+-      memset(sym, 0, sizeof(*sym));
+-
+-      sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
+-      if (sym->idx < 0) {
+-              WARN("elf_move_global_symbol");
+-              return NULL;
+-      }
+       sym->name = sec->name;
+       sym->sec = sec;
+@@ -669,24 +712,41 @@ elf_create_section_symbol(struct elf *el
+       // st_other 0
+       // st_value 0
+       // st_size 0
+-      shndx = sec->idx;
+-      if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+-              sym->sym.st_shndx = shndx;
+-              if (!shndx_data)
+-                      shndx = 0;
+-      } else {
+-              sym->sym.st_shndx = SHN_XINDEX;
+-              if (!shndx_data) {
+-                      WARN("no .symtab_shndx");
++
++      /*
++       * Move the first global symbol, as per sh_info, into a new, higher
++       * symbol index. This fees up a spot for a new local symbol.
++       */
++      first_non_local = symtab->sh.sh_info;
++      new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
++      old = find_symbol_by_index(elf, first_non_local);
++      if (old) {
++              old->idx = new_idx;
++
++              hlist_del(&old->hash);
++              elf_hash_add(elf->symbol_hash, &old->hash, old->idx);
++
++              elf_dirty_reloc_sym(elf, old);
++
++              if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
++                      WARN("elf_update_symbol move");
+                       return NULL;
+               }
++
++              new_idx = first_non_local;
+       }
+-      if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
+-              WARN_ELF("gelf_update_symshndx");
++      sym->idx = new_idx;
++      if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
++              WARN("elf_update_symbol");
+               return NULL;
+       }
++      /*
++       * Either way, we added a LOCAL symbol.
++       */
++      symtab->sh.sh_info += 1;
++
+       elf_add_symbol(elf, sym);
+       return sym;
diff --git a/queue-5.10/objtool-fix-type-of-reloc-addend.patch b/queue-5.10/objtool-fix-type-of-reloc-addend.patch
new file mode 100644 (file)
index 0000000..4998d7e
--- /dev/null
@@ -0,0 +1,92 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sun, 17 Apr 2022 17:03:40 +0200
+Subject: objtool: Fix type of reloc::addend
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c087c6e7b551b7f208c0b852304f044954cf2bb3 upstream.
+
+Elf{32,64}_Rela::r_addend is of type: Elf{32,64}_Sword, that means
+that our reloc::addend needs to be long or face tuncation issues when
+we do elf_rebuild_reloc_section():
+
+  - 107:  48 b8 00 00 00 00 00 00 00 00   movabs $0x0,%rax        109: R_X86_64_64        level4_kernel_pgt+0x80000067
+  + 107:  48 b8 00 00 00 00 00 00 00 00   movabs $0x0,%rax        109: R_X86_64_64        level4_kernel_pgt-0x7fffff99
+
+Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lkml.kernel.org/r/20220419203807.596871927@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    8 ++++----
+ tools/objtool/elf.c   |    2 +-
+ tools/objtool/elf.h   |    4 ++--
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -467,12 +467,12 @@ static int add_dead_ends(struct objtool_
+               else if (reloc->addend == reloc->sym->sec->len) {
+                       insn = find_last_insn(file, reloc->sym->sec);
+                       if (!insn) {
+-                              WARN("can't find unreachable insn at %s+0x%x",
++                              WARN("can't find unreachable insn at %s+0x%lx",
+                                    reloc->sym->sec->name, reloc->addend);
+                               return -1;
+                       }
+               } else {
+-                      WARN("can't find unreachable insn at %s+0x%x",
++                      WARN("can't find unreachable insn at %s+0x%lx",
+                            reloc->sym->sec->name, reloc->addend);
+                       return -1;
+               }
+@@ -502,12 +502,12 @@ reachable:
+               else if (reloc->addend == reloc->sym->sec->len) {
+                       insn = find_last_insn(file, reloc->sym->sec);
+                       if (!insn) {
+-                              WARN("can't find reachable insn at %s+0x%x",
++                              WARN("can't find reachable insn at %s+0x%lx",
+                                    reloc->sym->sec->name, reloc->addend);
+                               return -1;
+                       }
+               } else {
+-                      WARN("can't find reachable insn at %s+0x%x",
++                      WARN("can't find reachable insn at %s+0x%lx",
+                            reloc->sym->sec->name, reloc->addend);
+                       return -1;
+               }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -509,7 +509,7 @@ static struct section *elf_create_reloc_
+                                               int reltype);
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+-                unsigned int type, struct symbol *sym, int addend)
++                unsigned int type, struct symbol *sym, long addend)
+ {
+       struct reloc *reloc;
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -73,7 +73,7 @@ struct reloc {
+       struct symbol *sym;
+       unsigned long offset;
+       unsigned int type;
+-      int addend;
++      long addend;
+       int idx;
+       bool jump_table_start;
+ };
+@@ -127,7 +127,7 @@ struct elf *elf_open_read(const char *na
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+-                unsigned int type, struct symbol *sym, int addend);
++                unsigned int type, struct symbol *sym, long addend);
+ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+                         unsigned long offset, unsigned int type,
+                         struct section *insn_sec, unsigned long insn_off);
diff --git a/queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch b/queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch
new file mode 100644 (file)
index 0000000..d05e187
--- /dev/null
@@ -0,0 +1,256 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 24 Jun 2021 11:41:02 +0200
+Subject: objtool: Handle __sanitize_cov*() tail calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f56dae88a81fded66adf2bea9922d1d98d1da14f upstream.
+
+Turns out the compilers also generate tail calls to __sanitize_cov*(),
+make sure to also patch those out in noinstr code.
+
+Fixes: 0f1441b44e82 ("objtool: Fix noinstr vs KCOV")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Marco Elver <elver@google.com>
+Link: https://lore.kernel.org/r/20210624095147.818783799@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+[bwh: Backported to 5.10:
+ - objtool doesn't have any mcount handling
+ - Write the NOPs as hex literals since we can't use <asm/nops.h>]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h            |    1 
+ tools/objtool/arch/x86/decode.c |   20 ++++++
+ tools/objtool/check.c           |  123 +++++++++++++++++++++-------------------
+ 3 files changed, 86 insertions(+), 58 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -83,6 +83,7 @@ unsigned long arch_jump_destination(stru
+ unsigned long arch_dest_reloc_offset(int addend);
+ const char *arch_nop_insn(int len);
++const char *arch_ret_insn(int len);
+ int arch_decode_hint_reg(u8 sp_reg, int *base);
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -586,6 +586,26 @@ const char *arch_nop_insn(int len)
+       return nops[len-1];
+ }
++#define BYTE_RET      0xC3
++
++const char *arch_ret_insn(int len)
++{
++      static const char ret[5][5] = {
++              { BYTE_RET },
++              { BYTE_RET, 0x90 },
++              { BYTE_RET, 0x66, 0x90 },
++              { BYTE_RET, 0x0f, 0x1f, 0x00 },
++              { BYTE_RET, 0x0f, 0x1f, 0x40, 0x00 },
++      };
++
++      if (len < 1 || len > 5) {
++              WARN("invalid RET size: %d\n", len);
++              return NULL;
++      }
++
++      return ret[len-1];
++}
++
+ /* asm/alternative.h ? */
+ #define ALTINSTR_FLAG_INV     (1 << 15)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -860,6 +860,60 @@ static struct reloc *insn_reloc(struct o
+       return insn->reloc;
+ }
++static void remove_insn_ops(struct instruction *insn)
++{
++      struct stack_op *op, *tmp;
++
++      list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
++              list_del(&op->list);
++              free(op);
++      }
++}
++
++static void add_call_dest(struct objtool_file *file, struct instruction *insn,
++                        struct symbol *dest, bool sibling)
++{
++      struct reloc *reloc = insn_reloc(file, insn);
++
++      insn->call_dest = dest;
++      if (!dest)
++              return;
++
++      if (insn->call_dest->static_call_tramp) {
++              list_add_tail(&insn->call_node,
++                            &file->static_call_list);
++      }
++
++      /*
++       * Many compilers cannot disable KCOV with a function attribute
++       * so they need a little help, NOP out any KCOV calls from noinstr
++       * text.
++       */
++      if (insn->sec->noinstr &&
++          !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
++              if (reloc) {
++                      reloc->type = R_NONE;
++                      elf_write_reloc(file->elf, reloc);
++              }
++
++              elf_write_insn(file->elf, insn->sec,
++                             insn->offset, insn->len,
++                             sibling ? arch_ret_insn(insn->len)
++                                     : arch_nop_insn(insn->len));
++
++              insn->type = sibling ? INSN_RETURN : INSN_NOP;
++      }
++
++      /*
++       * Whatever stack impact regular CALLs have, should be undone
++       * by the RETURN of the called function.
++       *
++       * Annotated intra-function calls retain the stack_ops but
++       * are converted to JUMP, see read_intra_function_calls().
++       */
++      remove_insn_ops(insn);
++}
++
+ /*
+  * Find the destination instructions for all jumps.
+  */
+@@ -898,11 +952,7 @@ static int add_jump_destinations(struct
+                       continue;
+               } else if (insn->func) {
+                       /* internal or external sibling call (with reloc) */
+-                      insn->call_dest = reloc->sym;
+-                      if (insn->call_dest->static_call_tramp) {
+-                              list_add_tail(&insn->call_node,
+-                                            &file->static_call_list);
+-                      }
++                      add_call_dest(file, insn, reloc->sym, true);
+                       continue;
+               } else if (reloc->sym->sec->idx) {
+                       dest_sec = reloc->sym->sec;
+@@ -958,13 +1008,8 @@ static int add_jump_destinations(struct
+                       } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+                                  insn->jump_dest->offset == insn->jump_dest->func->offset) {
+-
+                               /* internal sibling call (without reloc) */
+-                              insn->call_dest = insn->jump_dest->func;
+-                              if (insn->call_dest->static_call_tramp) {
+-                                      list_add_tail(&insn->call_node,
+-                                                    &file->static_call_list);
+-                              }
++                              add_call_dest(file, insn, insn->jump_dest->func, true);
+                       }
+               }
+       }
+@@ -972,16 +1017,6 @@ static int add_jump_destinations(struct
+       return 0;
+ }
+-static void remove_insn_ops(struct instruction *insn)
+-{
+-      struct stack_op *op, *tmp;
+-
+-      list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+-              list_del(&op->list);
+-              free(op);
+-      }
+-}
+-
+ static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+ {
+       struct symbol *call_dest;
+@@ -1000,6 +1035,7 @@ static int add_call_destinations(struct
+ {
+       struct instruction *insn;
+       unsigned long dest_off;
++      struct symbol *dest;
+       struct reloc *reloc;
+       for_each_insn(file, insn) {
+@@ -1009,7 +1045,9 @@ static int add_call_destinations(struct
+               reloc = insn_reloc(file, insn);
+               if (!reloc) {
+                       dest_off = arch_jump_destination(insn);
+-                      insn->call_dest = find_call_destination(insn->sec, dest_off);
++                      dest = find_call_destination(insn->sec, dest_off);
++
++                      add_call_dest(file, insn, dest, false);
+                       if (insn->ignore)
+                               continue;
+@@ -1027,9 +1065,8 @@ static int add_call_destinations(struct
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+-                      insn->call_dest = find_call_destination(reloc->sym->sec,
+-                                                              dest_off);
+-                      if (!insn->call_dest) {
++                      dest = find_call_destination(reloc->sym->sec, dest_off);
++                      if (!dest) {
+                               WARN_FUNC("can't find call dest symbol at %s+0x%lx",
+                                         insn->sec, insn->offset,
+                                         reloc->sym->sec->name,
+@@ -1037,6 +1074,8 @@ static int add_call_destinations(struct
+                               return -1;
+                       }
++                      add_call_dest(file, insn, dest, false);
++
+               } else if (arch_is_retpoline(reloc->sym)) {
+                       /*
+                        * Retpoline calls are really dynamic calls in
+@@ -1052,39 +1091,7 @@ static int add_call_destinations(struct
+                       continue;
+               } else
+-                      insn->call_dest = reloc->sym;
+-
+-              if (insn->call_dest && insn->call_dest->static_call_tramp) {
+-                      list_add_tail(&insn->call_node,
+-                                    &file->static_call_list);
+-              }
+-
+-              /*
+-               * Many compilers cannot disable KCOV with a function attribute
+-               * so they need a little help, NOP out any KCOV calls from noinstr
+-               * text.
+-               */
+-              if (insn->sec->noinstr &&
+-                  !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
+-                      if (reloc) {
+-                              reloc->type = R_NONE;
+-                              elf_write_reloc(file->elf, reloc);
+-                      }
+-
+-                      elf_write_insn(file->elf, insn->sec,
+-                                     insn->offset, insn->len,
+-                                     arch_nop_insn(insn->len));
+-                      insn->type = INSN_NOP;
+-              }
+-
+-              /*
+-               * Whatever stack impact regular CALLs have, should be undone
+-               * by the RETURN of the called function.
+-               *
+-               * Annotated intra-function calls retain the stack_ops but
+-               * are converted to JUMP, see read_intra_function_calls().
+-               */
+-              remove_insn_ops(insn);
++                      add_call_dest(file, insn, reloc->sym, false);
+       }
+       return 0;
diff --git a/queue-5.10/objtool-handle-per-arch-retpoline-naming.patch b/queue-5.10/objtool-handle-per-arch-retpoline-naming.patch
new file mode 100644 (file)
index 0000000..fd753c0
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:04 +0100
+Subject: objtool: Handle per arch retpoline naming
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 530b4ddd9dd92b263081f5c7786d39a8129c8b2d upstream.
+
+The __x86_indirect_ naming is obviously not generic. Shorten to allow
+matching some additional magic names later.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.630296706@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h            |    2 ++
+ tools/objtool/arch/x86/decode.c |    5 +++++
+ tools/objtool/check.c           |    9 +++++++--
+ 3 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -86,4 +86,6 @@ const char *arch_nop_insn(int len);
+ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
++bool arch_is_retpoline(struct symbol *sym);
++
+ #endif /* _ARCH_H */
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -620,3 +620,8 @@ int arch_decode_hint_reg(struct instruct
+       return 0;
+ }
++
++bool arch_is_retpoline(struct symbol *sym)
++{
++      return !strncmp(sym->name, "__x86_indirect_", 15);
++}
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -778,6 +778,11 @@ static int add_ignore_alternatives(struc
+       return 0;
+ }
++__weak bool arch_is_retpoline(struct symbol *sym)
++{
++      return false;
++}
++
+ /*
+  * Find the destination instructions for all jumps.
+  */
+@@ -800,7 +805,7 @@ static int add_jump_destinations(struct
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+-              } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
++              } else if (arch_is_retpoline(reloc->sym)) {
+                       /*
+                        * Retpoline jumps are really dynamic jumps in
+                        * disguise, so convert them accordingly.
+@@ -954,7 +959,7 @@ static int add_call_destinations(struct
+                               return -1;
+                       }
+-              } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
++              } else if (arch_is_retpoline(reloc->sym)) {
+                       /*
+                        * Retpoline calls are really dynamic calls in
+                        * disguise, so convert them accordingly.
diff --git a/queue-5.10/objtool-introduce-cfi-hash.patch b/queue-5.10/objtool-introduce-cfi-hash.patch
new file mode 100644 (file)
index 0000000..9fc42d5
--- /dev/null
@@ -0,0 +1,466 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 24 Jun 2021 11:41:01 +0200
+Subject: objtool: Introduce CFI hash
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 8b946cc38e063f0f7bb67789478c38f6d7d457c9 upstream.
+
+Andi reported that objtool on vmlinux.o consumes more memory than his
+system has, leading to horrific performance.
+
+This is in part because we keep a struct instruction for every
+instruction in the file in-memory. Shrink struct instruction by
+removing the CFI state (which includes full register state) from it
+and demand allocating it.
+
+Given most instructions don't actually change CFI state, there's lots
+of repetition there, so add a hash table to find previous CFI
+instances.
+
+Reduces memory consumption (and runtime) for processing an
+x86_64-allyesconfig:
+
+  pre:  4:40.84 real,   143.99 user,    44.18 sys,      30624988 mem
+  post: 2:14.61 real,   108.58 user,    25.04 sys,      16396184 mem
+
+Suggested-by: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20210624095147.756759107@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - Don't use bswap_if_needed() since we don't have any of the other fixes
+   for mixed-endian cross-compilation
+ - Since we don't have "objtool: Rewrite hashtable sizing", make
+   cfi_hash_alloc() set the number of bits similarly to elf_hash_bits()
+ - objtool doesn't have any mcount handling
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h            |    2 
+ tools/objtool/arch/x86/decode.c |   20 ++---
+ tools/objtool/cfi.h             |    2 
+ tools/objtool/check.c           |  154 +++++++++++++++++++++++++++++++++++-----
+ tools/objtool/check.h           |    2 
+ tools/objtool/orc_gen.c         |   15 ++-
+ 6 files changed, 160 insertions(+), 35 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -84,7 +84,7 @@ unsigned long arch_dest_reloc_offset(int
+ const char *arch_nop_insn(int len);
+-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
++int arch_decode_hint_reg(u8 sp_reg, int *base);
+ bool arch_is_retpoline(struct symbol *sym);
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -706,34 +706,32 @@ int arch_rewrite_retpolines(struct objto
+       return 0;
+ }
+-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
++int arch_decode_hint_reg(u8 sp_reg, int *base)
+ {
+-      struct cfi_reg *cfa = &insn->cfi.cfa;
+-
+       switch (sp_reg) {
+       case ORC_REG_UNDEFINED:
+-              cfa->base = CFI_UNDEFINED;
++              *base = CFI_UNDEFINED;
+               break;
+       case ORC_REG_SP:
+-              cfa->base = CFI_SP;
++              *base = CFI_SP;
+               break;
+       case ORC_REG_BP:
+-              cfa->base = CFI_BP;
++              *base = CFI_BP;
+               break;
+       case ORC_REG_SP_INDIRECT:
+-              cfa->base = CFI_SP_INDIRECT;
++              *base = CFI_SP_INDIRECT;
+               break;
+       case ORC_REG_R10:
+-              cfa->base = CFI_R10;
++              *base = CFI_R10;
+               break;
+       case ORC_REG_R13:
+-              cfa->base = CFI_R13;
++              *base = CFI_R13;
+               break;
+       case ORC_REG_DI:
+-              cfa->base = CFI_DI;
++              *base = CFI_DI;
+               break;
+       case ORC_REG_DX:
+-              cfa->base = CFI_DX;
++              *base = CFI_DX;
+               break;
+       default:
+               return -1;
+--- a/tools/objtool/cfi.h
++++ b/tools/objtool/cfi.h
+@@ -7,6 +7,7 @@
+ #define _OBJTOOL_CFI_H
+ #include "cfi_regs.h"
++#include <linux/list.h>
+ #define CFI_UNDEFINED         -1
+ #define CFI_CFA                       -2
+@@ -24,6 +25,7 @@ struct cfi_init_state {
+ };
+ struct cfi_state {
++      struct hlist_node hash; /* must be first, cficmp() */
+       struct cfi_reg regs[CFI_NUM_REGS];
+       struct cfi_reg vals[CFI_NUM_REGS];
+       struct cfi_reg cfa;
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -5,6 +5,7 @@
+ #include <string.h>
+ #include <stdlib.h>
++#include <sys/mman.h>
+ #include "builtin.h"
+ #include "cfi.h"
+@@ -25,7 +26,11 @@ struct alternative {
+       bool skip_orig;
+ };
+-struct cfi_init_state initial_func_cfi;
++static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
++
++static struct cfi_init_state initial_func_cfi;
++static struct cfi_state init_cfi;
++static struct cfi_state func_cfi;
+ struct instruction *find_insn(struct objtool_file *file,
+                             struct section *sec, unsigned long offset)
+@@ -265,6 +270,78 @@ static void init_insn_state(struct insn_
+               state->noinstr = sec->noinstr;
+ }
++static struct cfi_state *cfi_alloc(void)
++{
++      struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1);
++      if (!cfi) {
++              WARN("calloc failed");
++              exit(1);
++      }
++      nr_cfi++;
++      return cfi;
++}
++
++static int cfi_bits;
++static struct hlist_head *cfi_hash;
++
++static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2)
++{
++      return memcmp((void *)cfi1 + sizeof(cfi1->hash),
++                    (void *)cfi2 + sizeof(cfi2->hash),
++                    sizeof(struct cfi_state) - sizeof(struct hlist_node));
++}
++
++static inline u32 cfi_key(struct cfi_state *cfi)
++{
++      return jhash((void *)cfi + sizeof(cfi->hash),
++                   sizeof(*cfi) - sizeof(cfi->hash), 0);
++}
++
++static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi)
++{
++      struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
++      struct cfi_state *obj;
++
++      hlist_for_each_entry(obj, head, hash) {
++              if (!cficmp(cfi, obj)) {
++                      nr_cfi_cache++;
++                      return obj;
++              }
++      }
++
++      obj = cfi_alloc();
++      *obj = *cfi;
++      hlist_add_head(&obj->hash, head);
++
++      return obj;
++}
++
++static void cfi_hash_add(struct cfi_state *cfi)
++{
++      struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
++
++      hlist_add_head(&cfi->hash, head);
++}
++
++static void *cfi_hash_alloc(void)
++{
++      cfi_bits = vmlinux ? ELF_HASH_BITS - 3 : 13;
++      cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits,
++                      PROT_READ|PROT_WRITE,
++                      MAP_PRIVATE|MAP_ANON, -1, 0);
++      if (cfi_hash == (void *)-1L) {
++              WARN("mmap fail cfi_hash");
++              cfi_hash = NULL;
++      }  else if (stats) {
++              printf("cfi_bits: %d\n", cfi_bits);
++      }
++
++      return cfi_hash;
++}
++
++static unsigned long nr_insns;
++static unsigned long nr_insns_visited;
++
+ /*
+  * Call the arch-specific instruction decoder for all the instructions and add
+  * them to the global instruction list.
+@@ -275,7 +352,6 @@ static int decode_instructions(struct ob
+       struct symbol *func;
+       unsigned long offset;
+       struct instruction *insn;
+-      unsigned long nr_insns = 0;
+       int ret;
+       for_each_sec(file, sec) {
+@@ -301,7 +377,6 @@ static int decode_instructions(struct ob
+                       memset(insn, 0, sizeof(*insn));
+                       INIT_LIST_HEAD(&insn->alts);
+                       INIT_LIST_HEAD(&insn->stack_ops);
+-                      init_cfi_state(&insn->cfi);
+                       insn->sec = sec;
+                       insn->offset = offset;
+@@ -1077,7 +1152,6 @@ static int handle_group_alt(struct objto
+               memset(nop, 0, sizeof(*nop));
+               INIT_LIST_HEAD(&nop->alts);
+               INIT_LIST_HEAD(&nop->stack_ops);
+-              init_cfi_state(&nop->cfi);
+               nop->sec = special_alt->new_sec;
+               nop->offset = special_alt->new_off + special_alt->new_len;
+@@ -1454,10 +1528,11 @@ static void set_func_state(struct cfi_st
+ static int read_unwind_hints(struct objtool_file *file)
+ {
++      struct cfi_state cfi = init_cfi;
+       struct section *sec, *relocsec;
+-      struct reloc *reloc;
+       struct unwind_hint *hint;
+       struct instruction *insn;
++      struct reloc *reloc;
+       int i;
+       sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+@@ -1495,19 +1570,24 @@ static int read_unwind_hints(struct objt
+               insn->hint = true;
+               if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+-                      set_func_state(&insn->cfi);
++                      insn->cfi = &func_cfi;
+                       continue;
+               }
+-              if (arch_decode_hint_reg(insn, hint->sp_reg)) {
++              if (insn->cfi)
++                      cfi = *(insn->cfi);
++
++              if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) {
+                       WARN_FUNC("unsupported unwind_hint sp base reg %d",
+                                 insn->sec, insn->offset, hint->sp_reg);
+                       return -1;
+               }
+-              insn->cfi.cfa.offset = hint->sp_offset;
+-              insn->cfi.type = hint->type;
+-              insn->cfi.end = hint->end;
++              cfi.cfa.offset = hint->sp_offset;
++              cfi.type = hint->type;
++              cfi.end = hint->end;
++
++              insn->cfi = cfi_hash_find_or_add(&cfi);
+       }
+       return 0;
+@@ -2283,13 +2363,18 @@ static int propagate_alt_cfi(struct objt
+       if (!insn->alt_group)
+               return 0;
++      if (!insn->cfi) {
++              WARN("CFI missing");
++              return -1;
++      }
++
+       alt_cfi = insn->alt_group->cfi;
+       group_off = insn->offset - insn->alt_group->first_insn->offset;
+       if (!alt_cfi[group_off]) {
+-              alt_cfi[group_off] = &insn->cfi;
++              alt_cfi[group_off] = insn->cfi;
+       } else {
+-              if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
++              if (cficmp(alt_cfi[group_off], insn->cfi)) {
+                       WARN_FUNC("stack layout conflict in alternatives",
+                                 insn->sec, insn->offset);
+                       return -1;
+@@ -2335,9 +2420,14 @@ static int handle_insn_ops(struct instru
+ static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
+ {
+-      struct cfi_state *cfi1 = &insn->cfi;
++      struct cfi_state *cfi1 = insn->cfi;
+       int i;
++      if (!cfi1) {
++              WARN("CFI missing");
++              return false;
++      }
++
+       if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
+               WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+@@ -2522,7 +2612,7 @@ static int validate_branch(struct objtoo
+                          struct instruction *insn, struct insn_state state)
+ {
+       struct alternative *alt;
+-      struct instruction *next_insn;
++      struct instruction *next_insn, *prev_insn = NULL;
+       struct section *sec;
+       u8 visited;
+       int ret;
+@@ -2551,15 +2641,25 @@ static int validate_branch(struct objtoo
+                       if (insn->visited & visited)
+                               return 0;
++              } else {
++                      nr_insns_visited++;
+               }
+               if (state.noinstr)
+                       state.instr += insn->instr;
+-              if (insn->hint)
+-                      state.cfi = insn->cfi;
+-              else
+-                      insn->cfi = state.cfi;
++              if (insn->hint) {
++                      state.cfi = *insn->cfi;
++              } else {
++                      /* XXX track if we actually changed state.cfi */
++
++                      if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
++                              insn->cfi = prev_insn->cfi;
++                              nr_cfi_reused++;
++                      } else {
++                              insn->cfi = cfi_hash_find_or_add(&state.cfi);
++                      }
++              }
+               insn->visited |= visited;
+@@ -2709,6 +2809,7 @@ static int validate_branch(struct objtoo
+                       return 1;
+               }
++              prev_insn = insn;
+               insn = next_insn;
+       }
+@@ -2964,10 +3065,20 @@ int check(struct objtool_file *file)
+       int ret, warnings = 0;
+       arch_initial_func_cfi_state(&initial_func_cfi);
++      init_cfi_state(&init_cfi);
++      init_cfi_state(&func_cfi);
++      set_func_state(&func_cfi);
++
++      if (!cfi_hash_alloc())
++              goto out;
++
++      cfi_hash_add(&init_cfi);
++      cfi_hash_add(&func_cfi);
+       ret = decode_sections(file);
+       if (ret < 0)
+               goto out;
++
+       warnings += ret;
+       if (list_empty(&file->insn_list))
+@@ -3011,6 +3122,13 @@ int check(struct objtool_file *file)
+               goto out;
+       warnings += ret;
++      if (stats) {
++              printf("nr_insns_visited: %ld\n", nr_insns_visited);
++              printf("nr_cfi: %ld\n", nr_cfi);
++              printf("nr_cfi_reused: %ld\n", nr_cfi_reused);
++              printf("nr_cfi_cache: %ld\n", nr_cfi_cache);
++      }
++
+ out:
+       /*
+        *  For now, don't fail the kernel build on fatal warnings.  These
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -59,7 +59,7 @@ struct instruction {
+       struct list_head alts;
+       struct symbol *func;
+       struct list_head stack_ops;
+-      struct cfi_state cfi;
++      struct cfi_state *cfi;
+ };
+ static inline bool is_static_jump(struct instruction *insn)
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -12,13 +12,19 @@
+ #include "check.h"
+ #include "warn.h"
+-static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi,
++                        struct instruction *insn)
+ {
+-      struct instruction *insn = container_of(cfi, struct instruction, cfi);
+       struct cfi_reg *bp = &cfi->regs[CFI_BP];
+       memset(orc, 0, sizeof(*orc));
++      if (!cfi) {
++              orc->end = 0;
++              orc->sp_reg = ORC_REG_UNDEFINED;
++              return 0;
++      }
++
+       orc->end = cfi->end;
+       if (cfi->cfa.base == CFI_UNDEFINED) {
+@@ -159,7 +165,7 @@ int orc_create(struct objtool_file *file
+                       int i;
+                       if (!alt_group) {
+-                              if (init_orc_entry(&orc, &insn->cfi))
++                              if (init_orc_entry(&orc, insn->cfi, insn))
+                                       return -1;
+                               if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+                                       continue;
+@@ -183,7 +189,8 @@ int orc_create(struct objtool_file *file
+                               struct cfi_state *cfi = alt_group->cfi[i];
+                               if (!cfi)
+                                       continue;
+-                              if (init_orc_entry(&orc, cfi))
++                              /* errors are reported on the original insn */
++                              if (init_orc_entry(&orc, cfi, insn))
+                                       return -1;
+                               if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+                                       continue;
diff --git a/queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch b/queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch
new file mode 100644 (file)
index 0000000..4621cc3
--- /dev/null
@@ -0,0 +1,177 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:12 +0100
+Subject: objtool: Keep track of retpoline call sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 43d5430ad74ef5156353af7aec352426ec7a8e57 upstream.
+
+Provide infrastructure for architectures to rewrite/augment compiler
+generated retpoline calls. Similar to what we do for static_call()s,
+keep track of the instructions that are retpoline calls.
+
+Use the same list_head, since a retpoline call cannot also be a
+static_call.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.130805730@infradead.org
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h    |    2 ++
+ tools/objtool/check.c   |   34 +++++++++++++++++++++++++++++-----
+ tools/objtool/check.h   |    2 +-
+ tools/objtool/objtool.c |    1 +
+ tools/objtool/objtool.h |    1 +
+ 5 files changed, 34 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -88,4 +88,6 @@ int arch_decode_hint_reg(struct instruct
+ bool arch_is_retpoline(struct symbol *sym);
++int arch_rewrite_retpolines(struct objtool_file *file);
++
+ #endif /* _ARCH_H */
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -451,7 +451,7 @@ static int create_static_call_sections(s
+               return 0;
+       idx = 0;
+-      list_for_each_entry(insn, &file->static_call_list, static_call_node)
++      list_for_each_entry(insn, &file->static_call_list, call_node)
+               idx++;
+       sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+@@ -460,7 +460,7 @@ static int create_static_call_sections(s
+               return -1;
+       idx = 0;
+-      list_for_each_entry(insn, &file->static_call_list, static_call_node) {
++      list_for_each_entry(insn, &file->static_call_list, call_node) {
+               site = (struct static_call_site *)sec->data->d_buf + idx;
+               memset(site, 0, sizeof(struct static_call_site));
+@@ -786,13 +786,16 @@ static int add_jump_destinations(struct
+                       else
+                               insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
++                      list_add_tail(&insn->call_node,
++                                    &file->retpoline_call_list);
++
+                       insn->retpoline_safe = true;
+                       continue;
+               } else if (insn->func) {
+                       /* internal or external sibling call (with reloc) */
+                       insn->call_dest = reloc->sym;
+                       if (insn->call_dest->static_call_tramp) {
+-                              list_add_tail(&insn->static_call_node,
++                              list_add_tail(&insn->call_node,
+                                             &file->static_call_list);
+                       }
+                       continue;
+@@ -854,7 +857,7 @@ static int add_jump_destinations(struct
+                               /* internal sibling call (without reloc) */
+                               insn->call_dest = insn->jump_dest->func;
+                               if (insn->call_dest->static_call_tramp) {
+-                                      list_add_tail(&insn->static_call_node,
++                                      list_add_tail(&insn->call_node,
+                                                     &file->static_call_list);
+                               }
+                       }
+@@ -938,6 +941,9 @@ static int add_call_destinations(struct
+                       insn->type = INSN_CALL_DYNAMIC;
+                       insn->retpoline_safe = true;
++                      list_add_tail(&insn->call_node,
++                                    &file->retpoline_call_list);
++
+                       remove_insn_ops(insn);
+                       continue;
+@@ -945,7 +951,7 @@ static int add_call_destinations(struct
+                       insn->call_dest = reloc->sym;
+               if (insn->call_dest && insn->call_dest->static_call_tramp) {
+-                      list_add_tail(&insn->static_call_node,
++                      list_add_tail(&insn->call_node,
+                                     &file->static_call_list);
+               }
+@@ -1655,6 +1661,11 @@ static void mark_rodata(struct objtool_f
+       file->rodata = found;
+ }
++__weak int arch_rewrite_retpolines(struct objtool_file *file)
++{
++      return 0;
++}
++
+ static int decode_sections(struct objtool_file *file)
+ {
+       int ret;
+@@ -1683,6 +1694,10 @@ static int decode_sections(struct objtoo
+       if (ret)
+               return ret;
++      /*
++       * Must be before add_special_section_alts() as that depends on
++       * jump_dest being set.
++       */
+       ret = add_jump_destinations(file);
+       if (ret)
+               return ret;
+@@ -1719,6 +1734,15 @@ static int decode_sections(struct objtoo
+       if (ret)
+               return ret;
++      /*
++       * Must be after add_special_section_alts(), since this will emit
++       * alternatives. Must be after add_{jump,call}_destination(), since
++       * those create the call insn lists.
++       */
++      ret = arch_rewrite_retpolines(file);
++      if (ret)
++              return ret;
++
+       return 0;
+ }
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -39,7 +39,7 @@ struct alt_group {
+ struct instruction {
+       struct list_head list;
+       struct hlist_node hash;
+-      struct list_head static_call_node;
++      struct list_head call_node;
+       struct section *sec;
+       unsigned long offset;
+       unsigned int len;
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -61,6 +61,7 @@ struct objtool_file *objtool_open_read(c
+       INIT_LIST_HEAD(&file.insn_list);
+       hash_init(file.insn_hash);
++      INIT_LIST_HEAD(&file.retpoline_call_list);
+       INIT_LIST_HEAD(&file.static_call_list);
+       file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+       file.ignore_unreachables = no_unreachable;
+--- a/tools/objtool/objtool.h
++++ b/tools/objtool/objtool.h
+@@ -18,6 +18,7 @@ struct objtool_file {
+       struct elf *elf;
+       struct list_head insn_list;
+       DECLARE_HASHTABLE(insn_hash, 20);
++      struct list_head retpoline_call_list;
+       struct list_head static_call_list;
+       bool ignore_unreachables, c_file, hints, rodata;
+ };
diff --git a/queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch b/queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch
new file mode 100644 (file)
index 0000000..6d27abf
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Joe Lawrence <joe.lawrence@redhat.com>
+Date: Sun, 22 Aug 2021 18:50:36 -0400
+Subject: objtool: Make .altinstructions section entry size consistent
+
+From: Joe Lawrence <joe.lawrence@redhat.com>
+
+commit dc02368164bd0ec603e3f5b3dd8252744a667b8a upstream.
+
+Commit e31694e0a7a7 ("objtool: Don't make .altinstructions writable")
+aligned objtool-created and kernel-created .altinstructions section
+flags, but there remains a minor discrepency in their use of a section
+entry size: objtool sets one while the kernel build does not.
+
+While sh_entsize of sizeof(struct alt_instr) seems intuitive, this small
+deviation can cause failures with external tooling (kpatch-build).
+
+Fix this by creating new .altinstructions sections with sh_entsize of 0
+and then later updating sec->sh_size as alternatives are added to the
+section.  An added benefit is avoiding the data descriptor and buffer
+created by elf_create_section(), but previously unused by
+elf_add_alternative().
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/20210822225037.54620-2-joe.lawrence@redhat.com
+Cc: Andy Lavr <andy.lavr@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: x86@kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -611,7 +611,7 @@ static int elf_add_alternative(struct el
+       sec = find_section_by_name(elf, ".altinstructions");
+       if (!sec) {
+               sec = elf_create_section(elf, ".altinstructions",
+-                                       SHF_ALLOC, size, 0);
++                                       SHF_ALLOC, 0, 0);
+               if (!sec) {
+                       WARN_ELF("elf_create_section");
diff --git a/queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch b/queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch
new file mode 100644 (file)
index 0000000..4ecdcc4
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 10 Jun 2021 09:04:29 +0200
+Subject: objtool: Only rewrite unconditional retpoline thunk calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2d49b721dc18c113d5221f4cf5a6104eb66cb7f2 upstream.
+
+It turns out that the compilers generate conditional branches to the
+retpoline thunks like:
+
+  5d5:   0f 85 00 00 00 00       jne    5db <cpuidle_reflect+0x22>
+       5d7: R_X86_64_PLT32     __x86_indirect_thunk_r11-0x4
+
+while the rewrite can only handle JMP/CALL to the thunks. The result
+is the alternative wrecking the code. Make sure to skip writing the
+alternatives for conditional branches.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Reported-by: Lukasz Majczak <lma@semihalf.com>
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -674,6 +674,10 @@ int arch_rewrite_retpolines(struct objto
+       list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
++              if (insn->type != INSN_JUMP_DYNAMIC &&
++                  insn->type != INSN_CALL_DYNAMIC)
++                      continue;
++
+               if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+                       continue;
diff --git a/queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch b/queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch
new file mode 100644 (file)
index 0000000..786987f
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 3 Oct 2021 13:45:48 -0700
+Subject: objtool: print out the symbol type when complaining about it
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7fab1c12bde926c5a8c7d5984c551d0854d7e0b3 upstream.
+
+The objtool warning that the kvm instruction emulation code triggered
+wasn't very useful:
+
+    arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception
+
+in that it helpfully tells you which symbol name it had trouble figuring
+out the relocation for, but it doesn't actually say what the unknown
+symbol type was that triggered it all.
+
+In this case it was because of missing type information (type 0, aka
+STT_NOTYPE), but on the whole it really should just have printed that
+out as part of the message.
+
+Because if this warning triggers, that's very much the first thing you
+want to know - why did reloc2sec_off() return failure for that symbol?
+
+So rather than just saying you can't handle some type of symbol without
+saying what the type _was_, just print out the type number too.
+
+Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types")
+Link: https://lore.kernel.org/lkml/CAHk-=wiZwq-0LknKhXN4M+T8jbxn_2i9mcKpO+OaBSSq_Eh7tg@mail.gmail.com/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -106,8 +106,10 @@ static int get_alt_entry(struct elf *elf
+               return -1;
+       }
+       if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) {
+-              WARN_FUNC("don't know how to handle reloc symbol type: %s",
+-                         sec, offset + entry->orig, orig_reloc->sym->name);
++              WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
++                         sec, offset + entry->orig,
++                         orig_reloc->sym->type,
++                         orig_reloc->sym->name);
+               return -1;
+       }
+@@ -128,8 +130,10 @@ static int get_alt_entry(struct elf *elf
+                       return 1;
+               if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) {
+-                      WARN_FUNC("don't know how to handle reloc symbol type: %s",
+-                                sec, offset + entry->new, new_reloc->sym->name);
++                      WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
++                                sec, offset + entry->new,
++                                new_reloc->sym->type,
++                                new_reloc->sym->name);
+                       return -1;
+               }
diff --git a/queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch b/queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch
new file mode 100644 (file)
index 0000000..3a789a5
--- /dev/null
@@ -0,0 +1,185 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Fri, 24 Jun 2022 12:52:40 +0200
+Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE}
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream.
+
+Commit
+
+  c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints")
+
+removed the save/restore unwind hints because they were no longer
+needed. Now they're going to be needed again so re-add them.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/unwind_hints.h |   12 +++++++++-
+ include/linux/objtool.h             |    6 +++--
+ tools/include/linux/objtool.h       |    6 +++--
+ tools/objtool/check.c               |   40 ++++++++++++++++++++++++++++++++++++
+ tools/objtool/check.h               |    1 
+ 5 files changed, 59 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -8,11 +8,11 @@
+ #ifdef __ASSEMBLY__
+ .macro UNWIND_HINT_EMPTY
+-      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
++      UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+ .endm
+ .macro UNWIND_HINT_ENTRY
+-      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1
++      UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
+ .endm
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+@@ -56,6 +56,14 @@
+       UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
+ .endm
++.macro UNWIND_HINT_SAVE
++      UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
++.endm
++
++.macro UNWIND_HINT_RESTORE
++      UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
++.endm
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_UNWIND_HINTS_H */
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -40,6 +40,8 @@ struct unwind_hint {
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
+ #define UNWIND_HINT_TYPE_ENTRY                4
++#define UNWIND_HINT_TYPE_SAVE         5
++#define UNWIND_HINT_TYPE_RESTORE      6
+ #ifdef CONFIG_STACK_VALIDATION
+@@ -102,7 +104,7 @@ struct unwind_hint {
+  * the debuginfo as necessary.  It will also warn if it sees any
+  * inconsistencies.
+  */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+       .pushsection .discard.unwind_hints
+               /* struct unwind_hint */
+@@ -126,7 +128,7 @@ struct unwind_hint {
+ #define STACK_FRAME_NON_STANDARD(func)
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ #endif
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -40,6 +40,8 @@ struct unwind_hint {
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC         3
+ #define UNWIND_HINT_TYPE_ENTRY                4
++#define UNWIND_HINT_TYPE_SAVE         5
++#define UNWIND_HINT_TYPE_RESTORE      6
+ #ifdef CONFIG_STACK_VALIDATION
+@@ -102,7 +104,7 @@ struct unwind_hint {
+  * the debuginfo as necessary.  It will also warn if it sees any
+  * inconsistencies.
+  */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+       .pushsection .discard.unwind_hints
+               /* struct unwind_hint */
+@@ -126,7 +128,7 @@ struct unwind_hint {
+ #define STACK_FRAME_NON_STANDARD(func)
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ #endif
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1752,6 +1752,17 @@ static int read_unwind_hints(struct objt
+               insn->hint = true;
++              if (hint->type == UNWIND_HINT_TYPE_SAVE) {
++                      insn->hint = false;
++                      insn->save = true;
++                      continue;
++              }
++
++              if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
++                      insn->restore = true;
++                      continue;
++              }
++
+               if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+                       struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
+@@ -2847,6 +2858,35 @@ static int validate_branch(struct objtoo
+                       state.instr += insn->instr;
+               if (insn->hint) {
++                      if (insn->restore) {
++                              struct instruction *save_insn, *i;
++
++                              i = insn;
++                              save_insn = NULL;
++
++                              sym_for_each_insn_continue_reverse(file, func, i) {
++                                      if (i->save) {
++                                              save_insn = i;
++                                              break;
++                                      }
++                              }
++
++                              if (!save_insn) {
++                                      WARN_FUNC("no corresponding CFI save for CFI restore",
++                                                sec, insn->offset);
++                                      return 1;
++                              }
++
++                              if (!save_insn->visited) {
++                                      WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
++                                                sec, insn->offset);
++                                      return 1;
++                              }
++
++                              insn->cfi = save_insn->cfi;
++                              nr_cfi_reused++;
++                      }
++
+                       state.cfi = *insn->cfi;
+               } else {
+                       /* XXX track if we actually changed state.cfi */
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -47,6 +47,7 @@ struct instruction {
+       unsigned long immediate;
+       bool dead_end, ignore, ignore_alts;
+       bool hint;
++      bool save, restore;
+       bool retpoline_safe;
+       bool entry;
+       s8 instr;
diff --git a/queue-5.10/objtool-refactor-orc-section-generation.patch b/queue-5.10/objtool-refactor-orc-section-generation.patch
new file mode 100644 (file)
index 0000000..f7bde59
--- /dev/null
@@ -0,0 +1,439 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 17 Dec 2020 15:02:42 -0600
+Subject: objtool: Refactor ORC section generation
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit ab4e0744e99b87e1a223e89fc3c9ae44f727c9a6 upstream.
+
+Decouple ORC entries from instructions.  This simplifies the
+control/data flow, and is going to make it easier to support alternative
+instructions which change the stack layout.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/Makefile      |    4 
+ tools/objtool/arch.h        |    4 
+ tools/objtool/builtin-orc.c |    6 
+ tools/objtool/check.h       |    3 
+ tools/objtool/objtool.h     |    3 
+ tools/objtool/orc_gen.c     |  274 ++++++++++++++++++++++----------------------
+ tools/objtool/weak.c        |    7 -
+ 7 files changed, 141 insertions(+), 160 deletions(-)
+
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -46,10 +46,6 @@ ifeq ($(SRCARCH),x86)
+       SUBCMD_ORC := y
+ endif
+-ifeq ($(SUBCMD_ORC),y)
+-      CFLAGS += -DINSN_USE_ORC
+-endif
+-
+ export SUBCMD_CHECK SUBCMD_ORC
+ export srctree OUTPUT CFLAGS SRCARCH AWK
+ include $(srctree)/tools/build/Makefile.include
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -11,10 +11,6 @@
+ #include "objtool.h"
+ #include "cfi.h"
+-#ifdef INSN_USE_ORC
+-#include <asm/orc_types.h>
+-#endif
+-
+ enum insn_type {
+       INSN_JUMP_CONDITIONAL,
+       INSN_JUMP_UNCONDITIONAL,
+--- a/tools/objtool/builtin-orc.c
++++ b/tools/objtool/builtin-orc.c
+@@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv)
+               if (list_empty(&file->insn_list))
+                       return 0;
+-              ret = create_orc(file);
+-              if (ret)
+-                      return ret;
+-
+-              ret = create_orc_sections(file);
++              ret = orc_create(file);
+               if (ret)
+                       return ret;
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -43,9 +43,6 @@ struct instruction {
+       struct symbol *func;
+       struct list_head stack_ops;
+       struct cfi_state cfi;
+-#ifdef INSN_USE_ORC
+-      struct orc_entry orc;
+-#endif
+ };
+ static inline bool is_static_jump(struct instruction *insn)
+--- a/tools/objtool/objtool.h
++++ b/tools/objtool/objtool.h
+@@ -26,7 +26,6 @@ struct objtool_file *objtool_open_read(c
+ int check(struct objtool_file *file);
+ int orc_dump(const char *objname);
+-int create_orc(struct objtool_file *file);
+-int create_orc_sections(struct objtool_file *file);
++int orc_create(struct objtool_file *file);
+ #endif /* _OBJTOOL_H */
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -12,89 +12,84 @@
+ #include "check.h"
+ #include "warn.h"
+-int create_orc(struct objtool_file *file)
++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
+ {
+-      struct instruction *insn;
++      struct instruction *insn = container_of(cfi, struct instruction, cfi);
++      struct cfi_reg *bp = &cfi->regs[CFI_BP];
+-      for_each_insn(file, insn) {
+-              struct orc_entry *orc = &insn->orc;
+-              struct cfi_reg *cfa = &insn->cfi.cfa;
+-              struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
++      memset(orc, 0, sizeof(*orc));
+-              if (!insn->sec->text)
+-                      continue;
+-
+-              orc->end = insn->cfi.end;
+-
+-              if (cfa->base == CFI_UNDEFINED) {
+-                      orc->sp_reg = ORC_REG_UNDEFINED;
+-                      continue;
+-              }
+-
+-              switch (cfa->base) {
+-              case CFI_SP:
+-                      orc->sp_reg = ORC_REG_SP;
+-                      break;
+-              case CFI_SP_INDIRECT:
+-                      orc->sp_reg = ORC_REG_SP_INDIRECT;
+-                      break;
+-              case CFI_BP:
+-                      orc->sp_reg = ORC_REG_BP;
+-                      break;
+-              case CFI_BP_INDIRECT:
+-                      orc->sp_reg = ORC_REG_BP_INDIRECT;
+-                      break;
+-              case CFI_R10:
+-                      orc->sp_reg = ORC_REG_R10;
+-                      break;
+-              case CFI_R13:
+-                      orc->sp_reg = ORC_REG_R13;
+-                      break;
+-              case CFI_DI:
+-                      orc->sp_reg = ORC_REG_DI;
+-                      break;
+-              case CFI_DX:
+-                      orc->sp_reg = ORC_REG_DX;
+-                      break;
+-              default:
+-                      WARN_FUNC("unknown CFA base reg %d",
+-                                insn->sec, insn->offset, cfa->base);
+-                      return -1;
+-              }
++      orc->end = cfi->end;
+-              switch(bp->base) {
+-              case CFI_UNDEFINED:
+-                      orc->bp_reg = ORC_REG_UNDEFINED;
+-                      break;
+-              case CFI_CFA:
+-                      orc->bp_reg = ORC_REG_PREV_SP;
+-                      break;
+-              case CFI_BP:
+-                      orc->bp_reg = ORC_REG_BP;
+-                      break;
+-              default:
+-                      WARN_FUNC("unknown BP base reg %d",
+-                                insn->sec, insn->offset, bp->base);
+-                      return -1;
+-              }
++      if (cfi->cfa.base == CFI_UNDEFINED) {
++              orc->sp_reg = ORC_REG_UNDEFINED;
++              return 0;
++      }
++
++      switch (cfi->cfa.base) {
++      case CFI_SP:
++              orc->sp_reg = ORC_REG_SP;
++              break;
++      case CFI_SP_INDIRECT:
++              orc->sp_reg = ORC_REG_SP_INDIRECT;
++              break;
++      case CFI_BP:
++              orc->sp_reg = ORC_REG_BP;
++              break;
++      case CFI_BP_INDIRECT:
++              orc->sp_reg = ORC_REG_BP_INDIRECT;
++              break;
++      case CFI_R10:
++              orc->sp_reg = ORC_REG_R10;
++              break;
++      case CFI_R13:
++              orc->sp_reg = ORC_REG_R13;
++              break;
++      case CFI_DI:
++              orc->sp_reg = ORC_REG_DI;
++              break;
++      case CFI_DX:
++              orc->sp_reg = ORC_REG_DX;
++              break;
++      default:
++              WARN_FUNC("unknown CFA base reg %d",
++                        insn->sec, insn->offset, cfi->cfa.base);
++              return -1;
++      }
+-              orc->sp_offset = cfa->offset;
+-              orc->bp_offset = bp->offset;
+-              orc->type = insn->cfi.type;
++      switch (bp->base) {
++      case CFI_UNDEFINED:
++              orc->bp_reg = ORC_REG_UNDEFINED;
++              break;
++      case CFI_CFA:
++              orc->bp_reg = ORC_REG_PREV_SP;
++              break;
++      case CFI_BP:
++              orc->bp_reg = ORC_REG_BP;
++              break;
++      default:
++              WARN_FUNC("unknown BP base reg %d",
++                        insn->sec, insn->offset, bp->base);
++              return -1;
+       }
++      orc->sp_offset = cfi->cfa.offset;
++      orc->bp_offset = bp->offset;
++      orc->type = cfi->type;
++
+       return 0;
+ }
+-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
+-                              unsigned int idx, struct section *insn_sec,
+-                              unsigned long insn_off, struct orc_entry *o)
++static int write_orc_entry(struct elf *elf, struct section *orc_sec,
++                         struct section *ip_rsec, unsigned int idx,
++                         struct section *insn_sec, unsigned long insn_off,
++                         struct orc_entry *o)
+ {
+       struct orc_entry *orc;
+       struct reloc *reloc;
+       /* populate ORC data */
+-      orc = (struct orc_entry *)u_sec->data->d_buf + idx;
++      orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
+       memcpy(orc, o, sizeof(*orc));
+       /* populate reloc for ip */
+@@ -114,102 +109,109 @@ static int create_orc_entry(struct elf *
+       reloc->type = R_X86_64_PC32;
+       reloc->offset = idx * sizeof(int);
+-      reloc->sec = ip_relocsec;
++      reloc->sec = ip_rsec;
+       elf_add_reloc(elf, reloc);
+       return 0;
+ }
+-int create_orc_sections(struct objtool_file *file)
++struct orc_list_entry {
++      struct list_head list;
++      struct orc_entry orc;
++      struct section *insn_sec;
++      unsigned long insn_off;
++};
++
++static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc,
++                      struct section *sec, unsigned long offset)
++{
++      struct orc_list_entry *entry = malloc(sizeof(*entry));
++
++      if (!entry) {
++              WARN("malloc failed");
++              return -1;
++      }
++
++      entry->orc      = *orc;
++      entry->insn_sec = sec;
++      entry->insn_off = offset;
++
++      list_add_tail(&entry->list, orc_list);
++      return 0;
++}
++
++int orc_create(struct objtool_file *file)
+ {
+-      struct instruction *insn, *prev_insn;
+-      struct section *sec, *u_sec, *ip_relocsec;
+-      unsigned int idx;
++      struct section *sec, *ip_rsec, *orc_sec;
++      unsigned int nr = 0, idx = 0;
++      struct orc_list_entry *entry;
++      struct list_head orc_list;
+-      struct orc_entry empty = {
+-              .sp_reg = ORC_REG_UNDEFINED,
++      struct orc_entry null = {
++              .sp_reg  = ORC_REG_UNDEFINED,
+               .bp_reg  = ORC_REG_UNDEFINED,
+               .type    = UNWIND_HINT_TYPE_CALL,
+       };
+-      sec = find_section_by_name(file->elf, ".orc_unwind");
+-      if (sec) {
+-              WARN("file already has .orc_unwind section, skipping");
+-              return -1;
+-      }
+-
+-      /* count the number of needed orcs */
+-      idx = 0;
++      /* Build a deduplicated list of ORC entries: */
++      INIT_LIST_HEAD(&orc_list);
+       for_each_sec(file, sec) {
++              struct orc_entry orc, prev_orc = {0};
++              struct instruction *insn;
++              bool empty = true;
++
+               if (!sec->text)
+                       continue;
+-              prev_insn = NULL;
+               sec_for_each_insn(file, sec, insn) {
+-                      if (!prev_insn ||
+-                          memcmp(&insn->orc, &prev_insn->orc,
+-                                 sizeof(struct orc_entry))) {
+-                              idx++;
+-                      }
+-                      prev_insn = insn;
++                      if (init_orc_entry(&orc, &insn->cfi))
++                              return -1;
++                      if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++                              continue;
++                      if (orc_list_add(&orc_list, &orc, sec, insn->offset))
++                              return -1;
++                      nr++;
++                      prev_orc = orc;
++                      empty = false;
+               }
+-              /* section terminator */
+-              if (prev_insn)
+-                      idx++;
++              /* Add a section terminator */
++              if (!empty) {
++                      orc_list_add(&orc_list, &null, sec, sec->len);
++                      nr++;
++              }
+       }
+-      if (!idx)
+-              return -1;
++      if (!nr)
++              return 0;
++      /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */
++      sec = find_section_by_name(file->elf, ".orc_unwind");
++      if (sec) {
++              WARN("file already has .orc_unwind section, skipping");
++              return -1;
++      }
++      orc_sec = elf_create_section(file->elf, ".orc_unwind", 0,
++                                   sizeof(struct orc_entry), nr);
++      if (!orc_sec)
++              return -1;
+-      /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
+-      sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
++      sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+       if (!sec)
+               return -1;
+-
+-      ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+-      if (!ip_relocsec)
++      ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
++      if (!ip_rsec)
+               return -1;
+-      /* create .orc_unwind section */
+-      u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
+-                                 sizeof(struct orc_entry), idx);
+-
+-      /* populate sections */
+-      idx = 0;
+-      for_each_sec(file, sec) {
+-              if (!sec->text)
+-                      continue;
+-
+-              prev_insn = NULL;
+-              sec_for_each_insn(file, sec, insn) {
+-                      if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
+-                                               sizeof(struct orc_entry))) {
+-
+-                              if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
+-                                                   insn->sec, insn->offset,
+-                                                   &insn->orc))
+-                                      return -1;
+-
+-                              idx++;
+-                      }
+-                      prev_insn = insn;
+-              }
+-
+-              /* section terminator */
+-              if (prev_insn) {
+-                      if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
+-                                           prev_insn->sec,
+-                                           prev_insn->offset + prev_insn->len,
+-                                           &empty))
+-                              return -1;
+-
+-                      idx++;
+-              }
++      /* Write ORC entries to sections: */
++      list_for_each_entry(entry, &orc_list, list) {
++              if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
++                                  entry->insn_sec, entry->insn_off,
++                                  &entry->orc))
++                      return -1;
+       }
+-      if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
++      if (elf_rebuild_reloc_section(file->elf, ip_rsec))
+               return -1;
+       return 0;
+--- a/tools/objtool/weak.c
++++ b/tools/objtool/weak.c
+@@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname
+       UNSUPPORTED("orc");
+ }
+-int __weak create_orc(struct objtool_file *file)
+-{
+-      UNSUPPORTED("orc");
+-}
+-
+-int __weak create_orc_sections(struct objtool_file *file)
++int __weak orc_create(struct objtool_file *file)
+ {
+       UNSUPPORTED("orc");
+ }
diff --git a/queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch b/queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch
new file mode 100644 (file)
index 0000000..5b2fb24
--- /dev/null
@@ -0,0 +1,91 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 4 Oct 2021 10:07:50 -0700
+Subject: objtool: Remove reloc symbol type checks in get_alt_entry()
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 4d8b35968bbf9e42b6b202eedb510e2c82ad8b38 upstream.
+
+Converting a special section's relocation reference to a symbol is
+straightforward.  No need for objtool to complain that it doesn't know
+how to handle it.  Just handle it.
+
+This fixes the following warning:
+
+  arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception
+
+Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types")
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/feadbc3dfb3440d973580fad8d3db873cbfe1694.1633367242.git.jpoimboe@redhat.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: x86@kernel.org
+Cc: Miroslav Benes <mbenes@suse.cz>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c |   36 +++++++-----------------------------
+ 1 file changed, 7 insertions(+), 29 deletions(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -55,22 +55,11 @@ void __weak arch_handle_alternative(unsi
+ {
+ }
+-static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off)
++static void reloc_to_sec_off(struct reloc *reloc, struct section **sec,
++                           unsigned long *off)
+ {
+-      switch (reloc->sym->type) {
+-      case STT_FUNC:
+-              *sec = reloc->sym->sec;
+-              *off = reloc->sym->offset + reloc->addend;
+-              return true;
+-
+-      case STT_SECTION:
+-              *sec = reloc->sym->sec;
+-              *off = reloc->addend;
+-              return true;
+-
+-      default:
+-              return false;
+-      }
++      *sec = reloc->sym->sec;
++      *off = reloc->sym->offset + reloc->addend;
+ }
+ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+@@ -105,13 +94,8 @@ static int get_alt_entry(struct elf *elf
+               WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+               return -1;
+       }
+-      if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) {
+-              WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
+-                         sec, offset + entry->orig,
+-                         orig_reloc->sym->type,
+-                         orig_reloc->sym->name);
+-              return -1;
+-      }
++
++      reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
+       if (!entry->group || alt->new_len) {
+               new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
+@@ -129,13 +113,7 @@ static int get_alt_entry(struct elf *elf
+               if (arch_is_retpoline(new_reloc->sym))
+                       return 1;
+-              if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) {
+-                      WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
+-                                sec, offset + entry->new,
+-                                new_reloc->sym->type,
+-                                new_reloc->sym->name);
+-                      return -1;
+-              }
++              reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off);
+               /* _ASM_EXTABLE_EX hack */
+               if (alt->new_off >= 0x7ffffff0)
diff --git a/queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch b/queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch
new file mode 100644 (file)
index 0000000..90bc0fc
--- /dev/null
@@ -0,0 +1,126 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:06 +0100
+Subject: objtool: Rework the elf_rebuild_reloc_section() logic
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3a647607b57ad8346e659ddd3b951ac292c83690 upstream.
+
+Instead of manually calling elf_rebuild_reloc_section() on sections
+we've called elf_add_reloc() on, have elf_write() DTRT.
+
+This makes it easier to add random relocations in places without
+carefully tracking when we're done and need to flush what section.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.754213408@infradead.org
+[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c   |    3 ---
+ tools/objtool/elf.c     |   20 ++++++++++++++------
+ tools/objtool/elf.h     |    1 -
+ tools/objtool/orc_gen.c |    3 ---
+ 4 files changed, 14 insertions(+), 13 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -542,9 +542,6 @@ static int create_static_call_sections(s
+               idx++;
+       }
+-      if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+-              return -1;
+-
+       return 0;
+ }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -530,6 +530,8 @@ void elf_add_reloc(struct elf *elf, stru
+       list_add_tail(&reloc->list, &sec->reloc_list);
+       elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
++
++      sec->changed = true;
+ }
+ static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+@@ -609,7 +611,9 @@ static int read_relocs(struct elf *elf)
+                               return -1;
+                       }
+-                      elf_add_reloc(elf, reloc);
++                      list_add_tail(&reloc->list, &sec->reloc_list);
++                      elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
++
+                       nr_reloc++;
+               }
+               max_reloc = max(max_reloc, nr_reloc);
+@@ -920,14 +924,11 @@ static int elf_rebuild_rela_reloc_sectio
+       return 0;
+ }
+-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
++static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+ {
+       struct reloc *reloc;
+       int nr;
+-      sec->changed = true;
+-      elf->changed = true;
+-
+       nr = 0;
+       list_for_each_entry(reloc, &sec->reloc_list, list)
+               nr++;
+@@ -991,9 +992,15 @@ int elf_write(struct elf *elf)
+       struct section *sec;
+       Elf_Scn *s;
+-      /* Update section headers for changed sections: */
++      /* Update changed relocation sections and section headers: */
+       list_for_each_entry(sec, &elf->sections, list) {
+               if (sec->changed) {
++                      if (sec->base &&
++                          elf_rebuild_reloc_section(elf, sec)) {
++                              WARN("elf_rebuild_reloc_section");
++                              return -1;
++                      }
++
+                       s = elf_getscn(elf->elf, sec->idx);
+                       if (!s) {
+                               WARN_ELF("elf_getscn");
+@@ -1005,6 +1012,7 @@ int elf_write(struct elf *elf)
+                       }
+                       sec->changed = false;
++                      elf->changed = true;
+               }
+       }
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(c
+ struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+ void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+                             struct reloc *reloc);
+-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
+ #define for_each_sec(file, sec)                                               \
+       list_for_each_entry(sec, &file->elf->sections, list)
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -251,8 +251,5 @@ int orc_create(struct objtool_file *file
+                       return -1;
+       }
+-      if (elf_rebuild_reloc_section(file->elf, ip_rsec))
+-              return -1;
+-
+       return 0;
+ }
diff --git a/queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch b/queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch
new file mode 100644 (file)
index 0000000..a1ca660
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:14 +0100
+Subject: objtool: Skip magical retpoline .altinstr_replacement
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 50e7b4a1a1b264fc7df0698f2defb93cadf19a7b upstream.
+
+When the .altinstr_replacement is a retpoline, skip the alternative.
+We already special case retpolines anyway.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.259429287@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -104,6 +104,14 @@ static int get_alt_entry(struct elf *elf
+                       return -1;
+               }
++              /*
++               * Skip retpoline .altinstr_replacement... we already rewrite the
++               * instructions for retpolines anyway, see arch_is_retpoline()
++               * usage in add_{call,jump}_destinations().
++               */
++              if (arch_is_retpoline(new_reloc->sym))
++                      return 1;
++
+               alt->new_sec = new_reloc->sym->sec;
+               alt->new_off = (unsigned int)new_reloc->addend;
+@@ -152,7 +160,9 @@ int special_get_alts(struct elf *elf, st
+                       memset(alt, 0, sizeof(*alt));
+                       ret = get_alt_entry(elf, entry, sec, idx, alt);
+-                      if (ret)
++                      if (ret > 0)
++                              continue;
++                      if (ret < 0)
+                               return ret;
+                       list_add_tail(&alt->list, alts);
diff --git a/queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch b/queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
new file mode 100644 (file)
index 0000000..ee3474a
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 1 Jul 2022 09:00:45 -0300
+Subject: objtool: skip non-text sections when adding return-thunk sites
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+The .discard.text section is added in order to reserve BRK, with a
+temporary function just so it can give it a size. This adds a relocation to
+the return thunk, which objtool will add to the .return_sites section.
+Linking will then fail as there are references to the .discard.text
+section.
+
+Do not add instructions from non-text sections to the list of return thunk
+calls, avoiding the reference to .discard.text.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1090,7 +1090,9 @@ static void add_return_call(struct objto
+       insn->type = INSN_RETURN;
+       insn->retpoline_safe = true;
+-      list_add_tail(&insn->call_node, &file->return_thunk_list);
++      /* Skip the non-text sections, specially .discard ones */
++      if (insn->sec->text)
++              list_add_tail(&insn->call_node, &file->return_thunk_list);
+ }
+ /*
diff --git a/queue-5.10/objtool-support-asm-jump-tables.patch b/queue-5.10/objtool-support-asm-jump-tables.patch
new file mode 100644 (file)
index 0000000..f3f0ad6
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 24 Feb 2021 10:29:14 -0600
+Subject: objtool: Support asm jump tables
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 99033461e685b48549ec77608b4bda75ddf772ce upstream.
+
+Objtool detection of asm jump tables would normally just work, except
+for the fact that asm retpolines use alternatives.  Objtool thinks the
+alternative code path (a jump to the retpoline) is a sibling call.
+
+Don't treat alternative indirect branches as sibling calls when the
+original instruction has a jump table.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Ard Biesheuvel <ardb@kernel.org>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Tested-by: Sami Tolvanen <samitolvanen@google.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Link: https://lore.kernel.org/r/460cf4dc675d64e1124146562cabd2c05aa322e8.1614182415.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -107,6 +107,18 @@ static struct instruction *prev_insn_sam
+       for (insn = next_insn_same_sec(file, insn); insn;               \
+            insn = next_insn_same_sec(file, insn))
++static bool is_jump_table_jump(struct instruction *insn)
++{
++      struct alt_group *alt_group = insn->alt_group;
++
++      if (insn->jump_table)
++              return true;
++
++      /* Retpoline alternative for a jump table? */
++      return alt_group && alt_group->orig_group &&
++             alt_group->orig_group->first_insn->jump_table;
++}
++
+ static bool is_sibling_call(struct instruction *insn)
+ {
+       /*
+@@ -119,7 +131,7 @@ static bool is_sibling_call(struct instr
+       /* An indirect jump is either a sibling call or a jump to a table. */
+       if (insn->type == INSN_JUMP_DYNAMIC)
+-              return list_empty(&insn->alts);
++              return !is_jump_table_jump(insn);
+       /* add_jump_destinations() sets insn->call_dest for sibling calls. */
+       return (is_static_jump(insn) && insn->call_dest);
diff --git a/queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch b/queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch
new file mode 100644 (file)
index 0000000..e4c8503
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:20 -0600
+Subject: objtool: Support retpoline jump detection for vmlinux.o
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 31a7424bc58063a8e0466c3c10f31a52ec2be4f6 upstream.
+
+Objtool converts direct retpoline jumps to type INSN_JUMP_DYNAMIC, since
+that's what they are semantically.
+
+That conversion doesn't work in vmlinux.o validation because the
+indirect thunk function is present in the object, so the intra-object
+jump check succeeds before the retpoline jump check gets a chance.
+
+Rearrange the checks: check for a retpoline jump before checking for an
+intra-object jump.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/4302893513770dde68ddc22a9d6a2a04aca491dd.1611263461.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -795,10 +795,6 @@ static int add_jump_destinations(struct
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+-              } else if (reloc->sym->sec->idx) {
+-                      dest_sec = reloc->sym->sec;
+-                      dest_off = reloc->sym->sym.st_value +
+-                                 arch_dest_reloc_offset(reloc->addend);
+               } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+                          !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+                       /*
+@@ -812,6 +808,10 @@ static int add_jump_destinations(struct
+                       insn->retpoline_safe = true;
+                       continue;
++              } else if (reloc->sym->sec->idx) {
++                      dest_sec = reloc->sym->sec;
++                      dest_off = reloc->sym->sym.st_value +
++                                 arch_dest_reloc_offset(reloc->addend);
+               } else {
+                       /* external sibling call */
+                       insn->call_dest = reloc->sym;
diff --git a/queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch b/queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch
new file mode 100644 (file)
index 0000000..0738bc3
--- /dev/null
@@ -0,0 +1,514 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 18 Dec 2020 14:26:21 -0600
+Subject: objtool: Support stack layout changes in alternatives
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit c9c324dc22aab1687da37001b321b6dfa93a0699 upstream.
+
+The ORC unwinder showed a warning [1] which revealed the stack layout
+didn't match what was expected.  The problem was that paravirt patching
+had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP".  That changed
+the stack layout between the PUSHF and the POP, so unwinding from an
+interrupt which occurred between those two instructions would fail.
+
+Part of the agreed upon solution was to rework the custom paravirt
+patching code to use alternatives instead, since objtool already knows
+how to read alternatives (and converging runtime patching infrastructure
+is always a good thing anyway).  But the main problem still remains,
+which is that runtime patching can change the stack layout.
+
+Making stack layout changes in alternatives was disallowed with commit
+7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
+is going to be doing it, it needs to be supported.
+
+One way to do so would be to modify the ORC table when the code gets
+patched.  But ORC is simple -- a good thing! -- and it's best to leave
+it alone.
+
+Instead, support stack layout changes by "flattening" all possible stack
+states (CFI) from parallel alternative code streams into a single set of
+linear states.  The only necessary limitation is that CFI conflicts are
+disallowed at all possible instruction boundaries.
+
+For example, this scenario is allowed:
+
+          Alt1                    Alt2                    Alt3
+
+   0x00   CALL *pv_ops.save_fl    CALL xen_save_fl        PUSHF
+   0x01                                                   POP %RAX
+   0x02                                                   NOP
+   ...
+   0x05                           NOP
+   ...
+   0x07   <insn>
+
+The unwind information for offset-0x00 is identical for all 3
+alternatives.  Similarly offset-0x05 and higher also are identical (and
+the same as 0x00).  However offset-0x01 has deviating CFI, but that is
+only relevant for Alt3, neither of the other alternative instruction
+streams will ever hit that offset.
+
+This scenario is NOT allowed:
+
+          Alt1                    Alt2
+
+   0x00   CALL *pv_ops.save_fl    PUSHF
+   0x01                           NOP6
+   ...
+   0x07   NOP                     POP %RAX
+
+The problem here is that offset-0x7, which is an instruction boundary in
+both possible instruction patch streams, has two conflicting stack
+layouts.
+
+[ The above examples were stolen from Peter Zijlstra. ]
+
+The new flattened CFI array is used both for the detection of conflicts
+(like the second example above) and the generation of linear ORC
+entries.
+
+BTW, another benefit of these changes is that, thanks to some related
+cleanups (new fake nops and alt_group struct) objtool can finally be rid
+of fake jumps, which were a constant source of headaches.
+
+[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
+
+Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/Documentation/stack-validation.txt |   14 -
+ tools/objtool/check.c                            |  196 +++++++++++------------
+ tools/objtool/check.h                            |    6 
+ tools/objtool/orc_gen.c                          |   56 +++++-
+ 4 files changed, 160 insertions(+), 112 deletions(-)
+
+--- a/tools/objtool/Documentation/stack-validation.txt
++++ b/tools/objtool/Documentation/stack-validation.txt
+@@ -315,13 +315,15 @@ they mean, and suggestions for how to fi
+       function tracing inserts additional calls, which is not obvious from the
+       sources).
+-10. file.o: warning: func()+0x5c: alternative modifies stack
++10. file.o: warning: func()+0x5c: stack layout conflict in alternatives
+-    This means that an alternative includes instructions that modify the
+-    stack. The problem is that there is only one ORC unwind table, this means
+-    that the ORC unwind entries must be valid for each of the alternatives.
+-    The easiest way to enforce this is to ensure alternatives do not contain
+-    any ORC entries, which in turn implies the above constraint.
++    This means that in the use of the alternative() or ALTERNATIVE()
++    macro, the code paths have conflicting modifications to the stack.
++    The problem is that there is only one ORC unwind table, which means
++    that the ORC unwind entries must be consistent for all possible
++    instruction boundaries regardless of which code has been patched.
++    This limitation can be overcome by massaging the alternatives with
++    NOPs to shift the stack changes around so they no longer conflict.
+ 11. file.o: warning: unannotated intra-function call
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -19,8 +19,6 @@
+ #include <linux/kernel.h>
+ #include <linux/static_call_types.h>
+-#define FAKE_JUMP_OFFSET -1
+-
+ struct alternative {
+       struct list_head list;
+       struct instruction *insn;
+@@ -789,9 +787,6 @@ static int add_jump_destinations(struct
+               if (!is_static_jump(insn))
+                       continue;
+-              if (insn->offset == FAKE_JUMP_OFFSET)
+-                      continue;
+-
+               reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+                                              insn->offset, insn->len);
+               if (!reloc) {
+@@ -991,28 +986,15 @@ static int add_call_destinations(struct
+ }
+ /*
+- * The .alternatives section requires some extra special care, over and above
+- * what other special sections require:
+- *
+- * 1. Because alternatives are patched in-place, we need to insert a fake jump
+- *    instruction at the end so that validate_branch() skips all the original
+- *    replaced instructions when validating the new instruction path.
+- *
+- * 2. An added wrinkle is that the new instruction length might be zero.  In
+- *    that case the old instructions are replaced with noops.  We simulate that
+- *    by creating a fake jump as the only new instruction.
+- *
+- * 3. In some cases, the alternative section includes an instruction which
+- *    conditionally jumps to the _end_ of the entry.  We have to modify these
+- *    jumps' destinations to point back to .text rather than the end of the
+- *    entry in .altinstr_replacement.
++ * The .alternatives section requires some extra special care over and above
++ * other special sections because alternatives are patched in place.
+  */
+ static int handle_group_alt(struct objtool_file *file,
+                           struct special_alt *special_alt,
+                           struct instruction *orig_insn,
+                           struct instruction **new_insn)
+ {
+-      struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
++      struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+       struct alt_group *orig_alt_group, *new_alt_group;
+       unsigned long dest_off;
+@@ -1022,6 +1004,13 @@ static int handle_group_alt(struct objto
+               WARN("malloc failed");
+               return -1;
+       }
++      orig_alt_group->cfi = calloc(special_alt->orig_len,
++                                   sizeof(struct cfi_state *));
++      if (!orig_alt_group->cfi) {
++              WARN("calloc failed");
++              return -1;
++      }
++
+       last_orig_insn = NULL;
+       insn = orig_insn;
+       sec_for_each_insn_from(file, insn) {
+@@ -1035,42 +1024,45 @@ static int handle_group_alt(struct objto
+       orig_alt_group->first_insn = orig_insn;
+       orig_alt_group->last_insn = last_orig_insn;
+-      if (next_insn_same_sec(file, last_orig_insn)) {
+-              fake_jump = malloc(sizeof(*fake_jump));
+-              if (!fake_jump) {
+-                      WARN("malloc failed");
+-                      return -1;
+-              }
+-              memset(fake_jump, 0, sizeof(*fake_jump));
+-              INIT_LIST_HEAD(&fake_jump->alts);
+-              INIT_LIST_HEAD(&fake_jump->stack_ops);
+-              init_cfi_state(&fake_jump->cfi);
+-
+-              fake_jump->sec = special_alt->new_sec;
+-              fake_jump->offset = FAKE_JUMP_OFFSET;
+-              fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+-              fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+-              fake_jump->func = orig_insn->func;
++
++      new_alt_group = malloc(sizeof(*new_alt_group));
++      if (!new_alt_group) {
++              WARN("malloc failed");
++              return -1;
+       }
+-      if (!special_alt->new_len) {
+-              if (!fake_jump) {
+-                      WARN("%s: empty alternative at end of section",
+-                           special_alt->orig_sec->name);
++      if (special_alt->new_len < special_alt->orig_len) {
++              /*
++               * Insert a fake nop at the end to make the replacement
++               * alt_group the same size as the original.  This is needed to
++               * allow propagate_alt_cfi() to do its magic.  When the last
++               * instruction affects the stack, the instruction after it (the
++               * nop) will propagate the new state to the shared CFI array.
++               */
++              nop = malloc(sizeof(*nop));
++              if (!nop) {
++                      WARN("malloc failed");
+                       return -1;
+               }
+-
+-              *new_insn = fake_jump;
+-              return 0;
++              memset(nop, 0, sizeof(*nop));
++              INIT_LIST_HEAD(&nop->alts);
++              INIT_LIST_HEAD(&nop->stack_ops);
++              init_cfi_state(&nop->cfi);
++
++              nop->sec = special_alt->new_sec;
++              nop->offset = special_alt->new_off + special_alt->new_len;
++              nop->len = special_alt->orig_len - special_alt->new_len;
++              nop->type = INSN_NOP;
++              nop->func = orig_insn->func;
++              nop->alt_group = new_alt_group;
++              nop->ignore = orig_insn->ignore_alts;
+       }
+-      new_alt_group = malloc(sizeof(*new_alt_group));
+-      if (!new_alt_group) {
+-              WARN("malloc failed");
+-              return -1;
++      if (!special_alt->new_len) {
++              *new_insn = nop;
++              goto end;
+       }
+-      last_new_insn = NULL;
+       insn = *new_insn;
+       sec_for_each_insn_from(file, insn) {
+               struct reloc *alt_reloc;
+@@ -1109,14 +1101,8 @@ static int handle_group_alt(struct objto
+                       continue;
+               dest_off = arch_jump_destination(insn);
+-              if (dest_off == special_alt->new_off + special_alt->new_len) {
+-                      if (!fake_jump) {
+-                              WARN("%s: alternative jump to end of section",
+-                                   special_alt->orig_sec->name);
+-                              return -1;
+-                      }
+-                      insn->jump_dest = fake_jump;
+-              }
++              if (dest_off == special_alt->new_off + special_alt->new_len)
++                      insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+               if (!insn->jump_dest) {
+                       WARN_FUNC("can't find alternative jump destination",
+@@ -1131,13 +1117,13 @@ static int handle_group_alt(struct objto
+               return -1;
+       }
++      if (nop)
++              list_add(&nop->list, &last_new_insn->list);
++end:
+       new_alt_group->orig_group = orig_alt_group;
+       new_alt_group->first_insn = *new_insn;
+-      new_alt_group->last_insn = last_new_insn;
+-
+-      if (fake_jump)
+-              list_add(&fake_jump->list, &last_new_insn->list);
+-
++      new_alt_group->last_insn = nop ? : last_new_insn;
++      new_alt_group->cfi = orig_alt_group->cfi;
+       return 0;
+ }
+@@ -2237,22 +2223,47 @@ static int update_cfi_state(struct instr
+       return 0;
+ }
+-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
++/*
++ * The stack layouts of alternatives instructions can sometimes diverge when
++ * they have stack modifications.  That's fine as long as the potential stack
++ * layouts don't conflict at any given potential instruction boundary.
++ *
++ * Flatten the CFIs of the different alternative code streams (both original
++ * and replacement) into a single shared CFI array which can be used to detect
++ * conflicts and nicely feed a linear array of ORC entries to the unwinder.
++ */
++static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
+ {
+-      struct stack_op *op;
++      struct cfi_state **alt_cfi;
++      int group_off;
+-      list_for_each_entry(op, &insn->stack_ops, list) {
+-              struct cfi_state old_cfi = state->cfi;
+-              int res;
++      if (!insn->alt_group)
++              return 0;
+-              res = update_cfi_state(insn, &state->cfi, op);
+-              if (res)
+-                      return res;
++      alt_cfi = insn->alt_group->cfi;
++      group_off = insn->offset - insn->alt_group->first_insn->offset;
+-              if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
+-                      WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
++      if (!alt_cfi[group_off]) {
++              alt_cfi[group_off] = &insn->cfi;
++      } else {
++              if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
++                      WARN_FUNC("stack layout conflict in alternatives",
++                                insn->sec, insn->offset);
+                       return -1;
+               }
++      }
++
++      return 0;
++}
++
++static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
++{
++      struct stack_op *op;
++
++      list_for_each_entry(op, &insn->stack_ops, list) {
++
++              if (update_cfi_state(insn, &state->cfi, op))
++                      return 1;
+               if (op->dest.type == OP_DEST_PUSHF) {
+                       if (!state->uaccess_stack) {
+@@ -2442,28 +2453,20 @@ static int validate_return(struct symbol
+       return 0;
+ }
+-/*
+- * Alternatives should not contain any ORC entries, this in turn means they
+- * should not contain any CFI ops, which implies all instructions should have
+- * the same same CFI state.
+- *
+- * It is possible to constuct alternatives that have unreachable holes that go
+- * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+- * states which then results in ORC entries, which we just said we didn't want.
+- *
+- * Avoid them by copying the CFI entry of the first instruction into the whole
+- * alternative.
+- */
+-static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
++static struct instruction *next_insn_to_validate(struct objtool_file *file,
++                                               struct instruction *insn)
+ {
+-      struct instruction *first_insn = insn;
+       struct alt_group *alt_group = insn->alt_group;
+-      sec_for_each_insn_continue(file, insn) {
+-              if (insn->alt_group != alt_group)
+-                      break;
+-              insn->cfi = first_insn->cfi;
+-      }
++      /*
++       * Simulate the fact that alternatives are patched in-place.  When the
++       * end of a replacement alt_group is reached, redirect objtool flow to
++       * the end of the original alt_group.
++       */
++      if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
++              return next_insn_same_sec(file, alt_group->orig_group->last_insn);
++
++      return next_insn_same_sec(file, insn);
+ }
+ /*
+@@ -2484,7 +2487,7 @@ static int validate_branch(struct objtoo
+       sec = insn->sec;
+       while (1) {
+-              next_insn = next_insn_same_sec(file, insn);
++              next_insn = next_insn_to_validate(file, insn);
+               if (file->c_file && func && insn->func && func != insn->func->pfunc) {
+                       WARN("%s() falls through to next function %s()",
+@@ -2517,6 +2520,9 @@ static int validate_branch(struct objtoo
+               insn->visited |= visited;
++              if (propagate_alt_cfi(file, insn))
++                      return 1;
++
+               if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+                       bool skip_orig = false;
+@@ -2532,9 +2538,6 @@ static int validate_branch(struct objtoo
+                               }
+                       }
+-                      if (insn->alt_group)
+-                              fill_alternative_cfi(file, insn);
+-
+                       if (skip_orig)
+                               return 0;
+               }
+@@ -2767,9 +2770,6 @@ static bool ignore_unreachable_insn(stru
+           !strcmp(insn->sec->name, ".altinstr_aux"))
+               return true;
+-      if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
+-              return true;
+-
+       if (!insn->func)
+               return false;
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -28,6 +28,12 @@ struct alt_group {
+       /* First and last instructions in the group */
+       struct instruction *first_insn, *last_insn;
++
++      /*
++       * Byte-offset-addressed len-sized array of pointers to CFI structs.
++       * This is shared with the other alt_groups in the same alternative.
++       */
++      struct cfi_state **cfi;
+ };
+ struct instruction {
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -141,6 +141,13 @@ static int orc_list_add(struct list_head
+       return 0;
+ }
++static unsigned long alt_group_len(struct alt_group *alt_group)
++{
++      return alt_group->last_insn->offset +
++             alt_group->last_insn->len -
++             alt_group->first_insn->offset;
++}
++
+ int orc_create(struct objtool_file *file)
+ {
+       struct section *sec, *ip_rsec, *orc_sec;
+@@ -165,15 +172,48 @@ int orc_create(struct objtool_file *file
+                       continue;
+               sec_for_each_insn(file, sec, insn) {
+-                      if (init_orc_entry(&orc, &insn->cfi))
+-                              return -1;
+-                      if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++                      struct alt_group *alt_group = insn->alt_group;
++                      int i;
++
++                      if (!alt_group) {
++                              if (init_orc_entry(&orc, &insn->cfi))
++                                      return -1;
++                              if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++                                      continue;
++                              if (orc_list_add(&orc_list, &orc, sec,
++                                               insn->offset))
++                                      return -1;
++                              nr++;
++                              prev_orc = orc;
++                              empty = false;
+                               continue;
+-                      if (orc_list_add(&orc_list, &orc, sec, insn->offset))
+-                              return -1;
+-                      nr++;
+-                      prev_orc = orc;
+-                      empty = false;
++                      }
++
++                      /*
++                       * Alternatives can have different stack layout
++                       * possibilities (but they shouldn't conflict).
++                       * Instead of traversing the instructions, use the
++                       * alt_group's flattened byte-offset-addressed CFI
++                       * array.
++                       */
++                      for (i = 0; i < alt_group_len(alt_group); i++) {
++                              struct cfi_state *cfi = alt_group->cfi[i];
++                              if (!cfi)
++                                      continue;
++                              if (init_orc_entry(&orc, cfi))
++                                      return -1;
++                              if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++                                      continue;
++                              if (orc_list_add(&orc_list, &orc, insn->sec,
++                                               insn->offset + i))
++                                      return -1;
++                              nr++;
++                              prev_orc = orc;
++                              empty = false;
++                      }
++
++                      /* Skip to the end of the alt_group */
++                      insn = alt_group->last_insn;
+               }
+               /* Add a section terminator */
diff --git a/queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch b/queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch
new file mode 100644 (file)
index 0000000..79fe95a
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 30 Sep 2021 12:43:10 +0200
+Subject: objtool: Teach get_alt_entry() about more relocation types
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 24ff652573754fe4c03213ebd26b17e86842feb3 upstream.
+
+Occasionally objtool encounters symbol (as opposed to section)
+relocations in .altinstructions. Typically they are the alternatives
+written by elf_add_alternative() as encountered on a noinstr
+validation run on vmlinux after having already ran objtool on the
+individual .o files.
+
+Basically this is the counterpart of commit 44f6a7c0755d ("objtool:
+Fix seg fault with Clang non-section symbols"), because when these new
+assemblers (binutils now also does this) strip the section symbols,
+elf_add_reloc_to_insn() is forced to emit symbol based relocations.
+
+As such, teach get_alt_entry() about different relocation types.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Reported-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Link: https://lore.kernel.org/r/YVWUvknIEVNkPvnP@hirez.programming.kicks-ass.net
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c |   32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -55,6 +55,24 @@ void __weak arch_handle_alternative(unsi
+ {
+ }
++static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off)
++{
++      switch (reloc->sym->type) {
++      case STT_FUNC:
++              *sec = reloc->sym->sec;
++              *off = reloc->sym->offset + reloc->addend;
++              return true;
++
++      case STT_SECTION:
++              *sec = reloc->sym->sec;
++              *off = reloc->addend;
++              return true;
++
++      default:
++              return false;
++      }
++}
++
+ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+                        struct section *sec, int idx,
+                        struct special_alt *alt)
+@@ -87,15 +105,12 @@ static int get_alt_entry(struct elf *elf
+               WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+               return -1;
+       }
+-      if (orig_reloc->sym->type != STT_SECTION) {
+-              WARN_FUNC("don't know how to handle non-section reloc symbol %s",
++      if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) {
++              WARN_FUNC("don't know how to handle reloc symbol type: %s",
+                          sec, offset + entry->orig, orig_reloc->sym->name);
+               return -1;
+       }
+-      alt->orig_sec = orig_reloc->sym->sec;
+-      alt->orig_off = orig_reloc->addend;
+-
+       if (!entry->group || alt->new_len) {
+               new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
+               if (!new_reloc) {
+@@ -112,8 +127,11 @@ static int get_alt_entry(struct elf *elf
+               if (arch_is_retpoline(new_reloc->sym))
+                       return 1;
+-              alt->new_sec = new_reloc->sym->sec;
+-              alt->new_off = (unsigned int)new_reloc->addend;
++              if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) {
++                      WARN_FUNC("don't know how to handle reloc symbol type: %s",
++                                sec, offset + entry->new, new_reloc->sym->name);
++                      return -1;
++              }
+               /* _ASM_EXTABLE_EX hack */
+               if (alt->new_off >= 0x7ffffff0)
diff --git a/queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch b/queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch
new file mode 100644 (file)
index 0000000..3762e41
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:47 +0200
+Subject: objtool: Treat .text.__x86.* as noinstr
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream.
+
+Needed because zen_untrain_ret() will be called from noinstr code.
+
+Also makes sense since the thunks MUST NOT contain instrumentation nor
+be poked with dynamic instrumentation.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -366,7 +366,8 @@ static int decode_instructions(struct ob
+                       sec->text = true;
+               if (!strcmp(sec->name, ".noinstr.text") ||
+-                  !strcmp(sec->name, ".entry.text"))
++                  !strcmp(sec->name, ".entry.text") ||
++                  !strncmp(sec->name, ".text.__x86.", 12))
+                       sec->noinstr = true;
+               for (offset = 0; offset < sec->len; offset += insn->len) {
diff --git a/queue-5.10/objtool-update-retpoline-validation.patch b/queue-5.10/objtool-update-retpoline-validation.patch
new file mode 100644 (file)
index 0000000..ddde9ca
--- /dev/null
@@ -0,0 +1,112 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:59 +0200
+Subject: objtool: Update Retpoline validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream.
+
+Update retpoline validation with the new CONFIG_RETPOLINE requirement of
+not having bare naked RET instructions.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict fixup at arch/x86/xen/xen-head.S]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    6 ++++++
+ arch/x86/mm/mem_encrypt_boot.S       |    2 ++
+ arch/x86/xen/xen-head.S              |    1 +
+ tools/objtool/check.c                |   19 +++++++++++++------
+ 4 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -76,6 +76,12 @@
+ .endm
+ /*
++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
++ * vs RETBleed validation.
++ */
++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
++
++/*
+  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+  * indirect jmp/call which may be susceptible to the Spectre variant 2
+  * attack.
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute)
+       pop     %rbp
+       /* Offset to __x86_return_thunk would be wrong here */
++      ANNOTATE_UNRET_SAFE
+       ret
+       int3
+ SYM_FUNC_END(sme_encrypt_execute)
+@@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy)
+       pop     %r15
+       /* Offset to __x86_return_thunk would be wrong here */
++      ANNOTATE_UNRET_SAFE
+       ret
+       int3
+ .L__enc_copy_end:
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -70,6 +70,7 @@ SYM_CODE_START(hypercall_page)
+       .rept (PAGE_SIZE / 32)
+               UNWIND_HINT_FUNC
+               .skip 31, 0x90
++              ANNOTATE_UNRET_SAFE
+               RET
+       .endr
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1799,8 +1799,9 @@ static int read_retpoline_hints(struct o
+               }
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+-                  insn->type != INSN_CALL_DYNAMIC) {
+-                      WARN_FUNC("retpoline_safe hint not an indirect jump/call",
++                  insn->type != INSN_CALL_DYNAMIC &&
++                  insn->type != INSN_RETURN) {
++                      WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+@@ -3051,7 +3052,8 @@ static int validate_retpoline(struct obj
+       for_each_insn(file, insn) {
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+-                  insn->type != INSN_CALL_DYNAMIC)
++                  insn->type != INSN_CALL_DYNAMIC &&
++                  insn->type != INSN_RETURN)
+                       continue;
+               if (insn->retpoline_safe)
+@@ -3066,9 +3068,14 @@ static int validate_retpoline(struct obj
+               if (!strcmp(insn->sec->name, ".init.text") && !module)
+                       continue;
+-              WARN_FUNC("indirect %s found in RETPOLINE build",
+-                        insn->sec, insn->offset,
+-                        insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++              if (insn->type == INSN_RETURN) {
++                      WARN_FUNC("'naked' return found in RETPOLINE build",
++                                insn->sec, insn->offset);
++              } else {
++                      WARN_FUNC("indirect %s found in RETPOLINE build",
++                                insn->sec, insn->offset,
++                                insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++              }
+               warnings++;
+       }
diff --git a/queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch b/queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch
new file mode 100644 (file)
index 0000000..6c4585c
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 21 Jun 2021 16:13:55 +0200
+Subject: objtool/x86: Ignore __x86_indirect_alt_* symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 31197d3a0f1caeb60fb01f6755e28347e4f44037 upstream.
+
+Because the __x86_indirect_alt* symbols are just that, objtool will
+try and validate them as regular symbols, instead of the alternative
+replacements that they are.
+
+This goes sideways for FRAME_POINTER=y builds; which generate a fair
+amount of warnings.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/YNCgxwLBiK9wclYJ@hirez.programming.kicks-ass.net
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_al
+ 2:    .skip   5-(2b-1b), 0x90
+ SYM_FUNC_END(__x86_indirect_alt_call_\reg)
++STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
++
+ SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+       ANNOTATE_RETPOLINE_SAFE
+ 1:    jmp     *%\reg
+ 2:    .skip   5-(2b-1b), 0x90
+ SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
++STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
++
+ .endm
+ /*
diff --git a/queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch b/queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch
new file mode 100644 (file)
index 0000000..f815a74
--- /dev/null
@@ -0,0 +1,494 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:36 +0200
+Subject: objtool,x86: Replace alternatives with .retpoline_sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 134ab5bd1883312d7a4b3033b05c6b5a1bb8889b upstream.
+
+Instead of writing complete alternatives, simply provide a list of all
+the retpoline thunk calls. Then the kernel is free to do with them as
+it pleases. Simpler code all-round.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.850007165@infradead.org
+[cascardo: fixed conflict because of missing
+ 8b946cc38e063f0f7bb67789478c38f6d7d457c9]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: deleted functions had slightly different code]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/vmlinux.lds.S   |   14 ++++
+ tools/objtool/arch/x86/decode.c |  120 ------------------------------------
+ tools/objtool/check.c           |  132 ++++++++++++++++++++++++++++------------
+ tools/objtool/elf.c             |   83 -------------------------
+ tools/objtool/elf.h             |    1 
+ tools/objtool/special.c         |    8 --
+ 6 files changed, 107 insertions(+), 251 deletions(-)
+
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -272,6 +272,20 @@ SECTIONS
+               __parainstructions_end = .;
+       }
++#ifdef CONFIG_RETPOLINE
++      /*
++       * List of instructions that call/jmp/jcc to retpoline thunks
++       * __x86_indirect_thunk_*(). These instructions can be patched along
++       * with alternatives, after which the section can be freed.
++       */
++      . = ALIGN(8);
++      .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
++              __retpoline_sites = .;
++              *(.retpoline_sites)
++              __retpoline_sites_end = .;
++      }
++#endif
++
+       /*
+        * struct alt_inst entries. From the header (alternative.h):
+        * "Alternative instructions for different CPU types or capabilities"
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -606,126 +606,6 @@ const char *arch_ret_insn(int len)
+       return ret[len-1];
+ }
+-/* asm/alternative.h ? */
+-
+-#define ALTINSTR_FLAG_INV     (1 << 15)
+-#define ALT_NOT(feat)         ((feat) | ALTINSTR_FLAG_INV)
+-
+-struct alt_instr {
+-      s32 instr_offset;       /* original instruction */
+-      s32 repl_offset;        /* offset to replacement instruction */
+-      u16 cpuid;              /* cpuid bit set for replacement */
+-      u8  instrlen;           /* length of original instruction */
+-      u8  replacementlen;     /* length of new instruction */
+-} __packed;
+-
+-static int elf_add_alternative(struct elf *elf,
+-                             struct instruction *orig, struct symbol *sym,
+-                             int cpuid, u8 orig_len, u8 repl_len)
+-{
+-      const int size = sizeof(struct alt_instr);
+-      struct alt_instr *alt;
+-      struct section *sec;
+-      Elf_Scn *s;
+-
+-      sec = find_section_by_name(elf, ".altinstructions");
+-      if (!sec) {
+-              sec = elf_create_section(elf, ".altinstructions",
+-                                       SHF_ALLOC, 0, 0);
+-
+-              if (!sec) {
+-                      WARN_ELF("elf_create_section");
+-                      return -1;
+-              }
+-      }
+-
+-      s = elf_getscn(elf->elf, sec->idx);
+-      if (!s) {
+-              WARN_ELF("elf_getscn");
+-              return -1;
+-      }
+-
+-      sec->data = elf_newdata(s);
+-      if (!sec->data) {
+-              WARN_ELF("elf_newdata");
+-              return -1;
+-      }
+-
+-      sec->data->d_size = size;
+-      sec->data->d_align = 1;
+-
+-      alt = sec->data->d_buf = malloc(size);
+-      if (!sec->data->d_buf) {
+-              perror("malloc");
+-              return -1;
+-      }
+-      memset(sec->data->d_buf, 0, size);
+-
+-      if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+-                                R_X86_64_PC32, orig->sec, orig->offset)) {
+-              WARN("elf_create_reloc: alt_instr::instr_offset");
+-              return -1;
+-      }
+-
+-      if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+-                        R_X86_64_PC32, sym, 0)) {
+-              WARN("elf_create_reloc: alt_instr::repl_offset");
+-              return -1;
+-      }
+-
+-      alt->cpuid = cpuid;
+-      alt->instrlen = orig_len;
+-      alt->replacementlen = repl_len;
+-
+-      sec->sh.sh_size += size;
+-      sec->changed = true;
+-
+-      return 0;
+-}
+-
+-#define X86_FEATURE_RETPOLINE                ( 7*32+12)
+-
+-int arch_rewrite_retpolines(struct objtool_file *file)
+-{
+-      struct instruction *insn;
+-      struct reloc *reloc;
+-      struct symbol *sym;
+-      char name[32] = "";
+-
+-      list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+-
+-              if (insn->type != INSN_JUMP_DYNAMIC &&
+-                  insn->type != INSN_CALL_DYNAMIC)
+-                      continue;
+-
+-              if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+-                      continue;
+-
+-              reloc = insn->reloc;
+-
+-              sprintf(name, "__x86_indirect_alt_%s_%s",
+-                      insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+-                      reloc->sym->name + 21);
+-
+-              sym = find_symbol_by_name(file->elf, name);
+-              if (!sym) {
+-                      sym = elf_create_undef_symbol(file->elf, name);
+-                      if (!sym) {
+-                              WARN("elf_create_undef_symbol");
+-                              return -1;
+-                      }
+-              }
+-
+-              if (elf_add_alternative(file->elf, insn, sym,
+-                                      ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+-                      WARN("elf_add_alternative");
+-                      return -1;
+-              }
+-      }
+-
+-      return 0;
+-}
+-
+ int arch_decode_hint_reg(u8 sp_reg, int *base)
+ {
+       switch (sp_reg) {
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -606,6 +606,52 @@ static int create_static_call_sections(s
+       return 0;
+ }
++static int create_retpoline_sites_sections(struct objtool_file *file)
++{
++      struct instruction *insn;
++      struct section *sec;
++      int idx;
++
++      sec = find_section_by_name(file->elf, ".retpoline_sites");
++      if (sec) {
++              WARN("file already has .retpoline_sites, skipping");
++              return 0;
++      }
++
++      idx = 0;
++      list_for_each_entry(insn, &file->retpoline_call_list, call_node)
++              idx++;
++
++      if (!idx)
++              return 0;
++
++      sec = elf_create_section(file->elf, ".retpoline_sites", 0,
++                               sizeof(int), idx);
++      if (!sec) {
++              WARN("elf_create_section: .retpoline_sites");
++              return -1;
++      }
++
++      idx = 0;
++      list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
++
++              int *site = (int *)sec->data->d_buf + idx;
++              *site = 0;
++
++              if (elf_add_reloc_to_insn(file->elf, sec,
++                                        idx * sizeof(int),
++                                        R_X86_64_PC32,
++                                        insn->sec, insn->offset)) {
++                      WARN("elf_add_reloc_to_insn: .retpoline_sites");
++                      return -1;
++              }
++
++              idx++;
++      }
++
++      return 0;
++}
++
+ /*
+  * Warnings shouldn't be reported for ignored functions.
+  */
+@@ -893,6 +939,11 @@ static void annotate_call_site(struct ob
+               return;
+       }
++      if (sym->retpoline_thunk) {
++              list_add_tail(&insn->call_node, &file->retpoline_call_list);
++              return;
++      }
++
+       /*
+        * Many compilers cannot disable KCOV with a function attribute
+        * so they need a little help, NOP out any KCOV calls from noinstr
+@@ -933,6 +984,39 @@ static void add_call_dest(struct objtool
+       annotate_call_site(file, insn, sibling);
+ }
++static void add_retpoline_call(struct objtool_file *file, struct instruction *insn)
++{
++      /*
++       * Retpoline calls/jumps are really dynamic calls/jumps in disguise,
++       * so convert them accordingly.
++       */
++      switch (insn->type) {
++      case INSN_CALL:
++              insn->type = INSN_CALL_DYNAMIC;
++              break;
++      case INSN_JUMP_UNCONDITIONAL:
++              insn->type = INSN_JUMP_DYNAMIC;
++              break;
++      case INSN_JUMP_CONDITIONAL:
++              insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
++              break;
++      default:
++              return;
++      }
++
++      insn->retpoline_safe = true;
++
++      /*
++       * Whatever stack impact regular CALLs have, should be undone
++       * by the RETURN of the called function.
++       *
++       * Annotated intra-function calls retain the stack_ops but
++       * are converted to JUMP, see read_intra_function_calls().
++       */
++      remove_insn_ops(insn);
++
++      annotate_call_site(file, insn, false);
++}
+ /*
+  * Find the destination instructions for all jumps.
+  */
+@@ -955,19 +1039,7 @@ static int add_jump_destinations(struct
+                       dest_sec = reloc->sym->sec;
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+               } else if (reloc->sym->retpoline_thunk) {
+-                      /*
+-                       * Retpoline jumps are really dynamic jumps in
+-                       * disguise, so convert them accordingly.
+-                       */
+-                      if (insn->type == INSN_JUMP_UNCONDITIONAL)
+-                              insn->type = INSN_JUMP_DYNAMIC;
+-                      else
+-                              insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+-
+-                      list_add_tail(&insn->call_node,
+-                                    &file->retpoline_call_list);
+-
+-                      insn->retpoline_safe = true;
++                      add_retpoline_call(file, insn);
+                       continue;
+               } else if (insn->func) {
+                       /* internal or external sibling call (with reloc) */
+@@ -1096,18 +1168,7 @@ static int add_call_destinations(struct
+                       add_call_dest(file, insn, dest, false);
+               } else if (reloc->sym->retpoline_thunk) {
+-                      /*
+-                       * Retpoline calls are really dynamic calls in
+-                       * disguise, so convert them accordingly.
+-                       */
+-                      insn->type = INSN_CALL_DYNAMIC;
+-                      insn->retpoline_safe = true;
+-
+-                      list_add_tail(&insn->call_node,
+-                                    &file->retpoline_call_list);
+-
+-                      remove_insn_ops(insn);
+-                      continue;
++                      add_retpoline_call(file, insn);
+               } else
+                       add_call_dest(file, insn, reloc->sym, false);
+@@ -1806,11 +1867,6 @@ static void mark_rodata(struct objtool_f
+       file->rodata = found;
+ }
+-__weak int arch_rewrite_retpolines(struct objtool_file *file)
+-{
+-      return 0;
+-}
+-
+ static int decode_sections(struct objtool_file *file)
+ {
+       int ret;
+@@ -1879,15 +1935,6 @@ static int decode_sections(struct objtoo
+       if (ret)
+               return ret;
+-      /*
+-       * Must be after add_special_section_alts(), since this will emit
+-       * alternatives. Must be after add_{jump,call}_destination(), since
+-       * those create the call insn lists.
+-       */
+-      ret = arch_rewrite_retpolines(file);
+-      if (ret)
+-              return ret;
+-
+       return 0;
+ }
+@@ -3159,6 +3206,13 @@ int check(struct objtool_file *file)
+               goto out;
+       warnings += ret;
++      if (retpoline) {
++              ret = create_retpoline_sites_sections(file);
++              if (ret < 0)
++                      goto out;
++              warnings += ret;
++      }
++
+       if (stats) {
+               printf("nr_insns_visited: %ld\n", nr_insns_visited);
+               printf("nr_cfi: %ld\n", nr_cfi);
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -766,89 +766,6 @@ static int elf_add_string(struct elf *el
+       return len;
+ }
+-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+-{
+-      struct section *symtab, *symtab_shndx;
+-      struct symbol *sym;
+-      Elf_Data *data;
+-      Elf_Scn *s;
+-
+-      sym = malloc(sizeof(*sym));
+-      if (!sym) {
+-              perror("malloc");
+-              return NULL;
+-      }
+-      memset(sym, 0, sizeof(*sym));
+-
+-      sym->name = strdup(name);
+-
+-      sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+-      if (sym->sym.st_name == -1)
+-              return NULL;
+-
+-      sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+-      // st_other 0
+-      // st_shndx 0
+-      // st_value 0
+-      // st_size 0
+-
+-      symtab = find_section_by_name(elf, ".symtab");
+-      if (!symtab) {
+-              WARN("can't find .symtab");
+-              return NULL;
+-      }
+-
+-      s = elf_getscn(elf->elf, symtab->idx);
+-      if (!s) {
+-              WARN_ELF("elf_getscn");
+-              return NULL;
+-      }
+-
+-      data = elf_newdata(s);
+-      if (!data) {
+-              WARN_ELF("elf_newdata");
+-              return NULL;
+-      }
+-
+-      data->d_buf = &sym->sym;
+-      data->d_size = sizeof(sym->sym);
+-      data->d_align = 1;
+-
+-      sym->idx = symtab->len / sizeof(sym->sym);
+-
+-      symtab->len += data->d_size;
+-      symtab->changed = true;
+-
+-      symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+-      if (symtab_shndx) {
+-              s = elf_getscn(elf->elf, symtab_shndx->idx);
+-              if (!s) {
+-                      WARN_ELF("elf_getscn");
+-                      return NULL;
+-              }
+-
+-              data = elf_newdata(s);
+-              if (!data) {
+-                      WARN_ELF("elf_newdata");
+-                      return NULL;
+-              }
+-
+-              data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+-              data->d_size = sizeof(Elf32_Word);
+-              data->d_align = 4;
+-              data->d_type = ELF_T_WORD;
+-
+-              symtab_shndx->len += 4;
+-              symtab_shndx->changed = true;
+-      }
+-
+-      sym->sec = find_section_by_index(elf, 0);
+-
+-      elf_add_symbol(elf, sym);
+-
+-      return sym;
+-}
+-
+ struct section *elf_create_section(struct elf *elf, const char *name,
+                                  unsigned int sh_flags, size_t entsize, int nr)
+ {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -136,7 +136,6 @@ int elf_write_insn(struct elf *elf, stru
+                  unsigned long offset, unsigned int len,
+                  const char *insn);
+ int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
+ int elf_write(struct elf *elf);
+ void elf_close(struct elf *elf);
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -105,14 +105,6 @@ static int get_alt_entry(struct elf *elf
+                       return -1;
+               }
+-              /*
+-               * Skip retpoline .altinstr_replacement... we already rewrite the
+-               * instructions for retpolines anyway, see arch_is_retpoline()
+-               * usage in add_{call,jump}_destinations().
+-               */
+-              if (arch_is_retpoline(new_reloc->sym))
+-                      return 1;
+-
+               reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off);
+               /* _ASM_EXTABLE_EX hack */
diff --git a/queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch b/queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch
new file mode 100644 (file)
index 0000000..953817e
--- /dev/null
@@ -0,0 +1,262 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:15 +0100
+Subject: objtool/x86: Rewrite retpoline thunk calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9bc0bb50727c8ac69fbb33fb937431cf3518ff37 upstream.
+
+When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
+indirect call, have objtool rewrite it to:
+
+       ALTERNATIVE "call __x86_indirect_thunk_\reg",
+                   "call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)
+
+Additionally, in order to not emit endless identical
+.altinst_replacement chunks, use a global symbol for them, see
+__x86_indirect_alt_*.
+
+This also avoids objtool from having to do code generation.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.320177914@infradead.org
+[bwh: Backported to 5.10: include "arch_elf.h" instead of "arch/elf.h"]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h |   12 ++-
+ arch/x86/lib/retpoline.S              |   41 +++++++++++
+ tools/objtool/arch/x86/decode.c       |  117 ++++++++++++++++++++++++++++++++++
+ 3 files changed, 167 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
+ #ifdef CONFIG_RETPOLINE
+-#define DECL_INDIRECT_THUNK(reg) \
++#undef GEN
++#define GEN(reg) \
+       extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
++#include <asm/GEN-for-each-reg.h>
++
++#undef GEN
++#define GEN(reg) \
++      extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
++#include <asm/GEN-for-each-reg.h>
+ #undef GEN
+-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
++#define GEN(reg) \
++      extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
+ #include <asm/GEN-for-each-reg.h>
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -10,6 +10,8 @@
+ #include <asm/unwind_hints.h>
+ #include <asm/frame.h>
++      .section .text.__x86.indirect_thunk
++
+ .macro RETPOLINE reg
+       ANNOTATE_INTRA_FUNCTION_CALL
+       call    .Ldo_rop_\@
+@@ -25,9 +27,9 @@
+ .endm
+ .macro THUNK reg
+-      .section .text.__x86.indirect_thunk
+       .align 32
++
+ SYM_FUNC_START(__x86_indirect_thunk_\reg)
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ .endm
+ /*
++ * This generates .altinstr_replacement symbols for use by objtool. They,
++ * however, must not actually live in .altinstr_replacement since that will be
++ * discarded after init, but module alternatives will also reference these
++ * symbols.
++ *
++ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
++ */
++.macro ALT_THUNK reg
++
++      .align 1
++
++SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
++      ANNOTATE_RETPOLINE_SAFE
++1:    call    *%\reg
++2:    .skip   5-(2b-1b), 0x90
++SYM_FUNC_END(__x86_indirect_alt_call_\reg)
++
++SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
++      ANNOTATE_RETPOLINE_SAFE
++1:    jmp     *%\reg
++2:    .skip   5-(2b-1b), 0x90
++SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
++
++.endm
++
++/*
+  * Despite being an assembler file we can't just use .irp here
+  * because __KSYM_DEPS__ only uses the C preprocessor and would
+  * only see one instance of "__x86_indirect_thunk_\reg" rather
+@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
++#undef GEN
++#define GEN(reg) ALT_THUNK reg
++#include <asm/GEN-for-each-reg.h>
++
++#undef GEN
++#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
++#include <asm/GEN-for-each-reg.h>
++
++#undef GEN
++#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
++#include <asm/GEN-for-each-reg.h>
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -16,6 +16,7 @@
+ #include "../../arch.h"
+ #include "../../warn.h"
+ #include <asm/orc_types.h>
++#include "arch_elf.h"
+ static unsigned char op_to_cfi_reg[][2] = {
+       {CFI_AX, CFI_R8},
+@@ -585,6 +586,122 @@ const char *arch_nop_insn(int len)
+       return nops[len-1];
+ }
++/* asm/alternative.h ? */
++
++#define ALTINSTR_FLAG_INV     (1 << 15)
++#define ALT_NOT(feat)         ((feat) | ALTINSTR_FLAG_INV)
++
++struct alt_instr {
++      s32 instr_offset;       /* original instruction */
++      s32 repl_offset;        /* offset to replacement instruction */
++      u16 cpuid;              /* cpuid bit set for replacement */
++      u8  instrlen;           /* length of original instruction */
++      u8  replacementlen;     /* length of new instruction */
++} __packed;
++
++static int elf_add_alternative(struct elf *elf,
++                             struct instruction *orig, struct symbol *sym,
++                             int cpuid, u8 orig_len, u8 repl_len)
++{
++      const int size = sizeof(struct alt_instr);
++      struct alt_instr *alt;
++      struct section *sec;
++      Elf_Scn *s;
++
++      sec = find_section_by_name(elf, ".altinstructions");
++      if (!sec) {
++              sec = elf_create_section(elf, ".altinstructions",
++                                       SHF_WRITE, size, 0);
++
++              if (!sec) {
++                      WARN_ELF("elf_create_section");
++                      return -1;
++              }
++      }
++
++      s = elf_getscn(elf->elf, sec->idx);
++      if (!s) {
++              WARN_ELF("elf_getscn");
++              return -1;
++      }
++
++      sec->data = elf_newdata(s);
++      if (!sec->data) {
++              WARN_ELF("elf_newdata");
++              return -1;
++      }
++
++      sec->data->d_size = size;
++      sec->data->d_align = 1;
++
++      alt = sec->data->d_buf = malloc(size);
++      if (!sec->data->d_buf) {
++              perror("malloc");
++              return -1;
++      }
++      memset(sec->data->d_buf, 0, size);
++
++      if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
++                                R_X86_64_PC32, orig->sec, orig->offset)) {
++              WARN("elf_create_reloc: alt_instr::instr_offset");
++              return -1;
++      }
++
++      if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
++                        R_X86_64_PC32, sym, 0)) {
++              WARN("elf_create_reloc: alt_instr::repl_offset");
++              return -1;
++      }
++
++      alt->cpuid = cpuid;
++      alt->instrlen = orig_len;
++      alt->replacementlen = repl_len;
++
++      sec->sh.sh_size += size;
++      sec->changed = true;
++
++      return 0;
++}
++
++#define X86_FEATURE_RETPOLINE                ( 7*32+12)
++
++int arch_rewrite_retpolines(struct objtool_file *file)
++{
++      struct instruction *insn;
++      struct reloc *reloc;
++      struct symbol *sym;
++      char name[32] = "";
++
++      list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
++
++              if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
++                      continue;
++
++              reloc = insn->reloc;
++
++              sprintf(name, "__x86_indirect_alt_%s_%s",
++                      insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
++                      reloc->sym->name + 21);
++
++              sym = find_symbol_by_name(file->elf, name);
++              if (!sym) {
++                      sym = elf_create_undef_symbol(file->elf, name);
++                      if (!sym) {
++                              WARN("elf_create_undef_symbol");
++                              return -1;
++                      }
++              }
++
++              if (elf_add_alternative(file->elf, insn, sym,
++                                      ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
++                      WARN("elf_add_alternative");
++                      return -1;
++              }
++      }
++
++      return 0;
++}
++
+ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
+ {
+       struct cfi_reg *cfa = &insn->cfi.cfa;
diff --git a/queue-5.10/series b/queue-5.10/series
new file mode 100644 (file)
index 0000000..91daa3e
--- /dev/null
@@ -0,0 +1,130 @@
+kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch
+kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch
+objtool-refactor-orc-section-generation.patch
+objtool-add-alt_group-struct.patch
+objtool-support-stack-layout-changes-in-alternatives.patch
+objtool-support-retpoline-jump-detection-for-vmlinux.o.patch
+objtool-assume-only-elf-functions-do-sibling-calls.patch
+objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch
+x86-xen-support-objtool-validation-in-xen-asm.s.patch
+x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch
+x86-alternative-merge-include-files.patch
+x86-alternative-support-not-feature.patch
+x86-alternative-support-alternative_ternary.patch
+x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch
+x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch
+x86-insn-add-a-__ignore_sync_check__-marker.patch
+x86-insn-add-an-insn_decode-api.patch
+x86-insn-eval-handle-return-values-from-the-decoder.patch
+x86-alternative-use-insn_decode.patch
+x86-add-insn_decode_kernel.patch
+x86-alternatives-optimize-optimize_nops.patch
+x86-retpoline-simplify-retpolines.patch
+objtool-correctly-handle-retpoline-thunk-calls.patch
+objtool-handle-per-arch-retpoline-naming.patch
+objtool-rework-the-elf_rebuild_reloc_section-logic.patch
+objtool-add-elf_create_reloc-helper.patch
+objtool-create-reloc-sections-implicitly.patch
+objtool-extract-elf_strtab_concat.patch
+objtool-extract-elf_symbol_add.patch
+objtool-add-elf_create_undef_symbol.patch
+objtool-keep-track-of-retpoline-call-sites.patch
+objtool-cache-instruction-relocs.patch
+objtool-skip-magical-retpoline-.altinstr_replacement.patch
+objtool-x86-rewrite-retpoline-thunk-calls.patch
+objtool-support-asm-jump-tables.patch
+x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch
+objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch
+objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch
+objtool-x86-ignore-__x86_indirect_alt_-symbols.patch
+objtool-don-t-make-.altinstructions-writable.patch
+objtool-teach-get_alt_entry-about-more-relocation-types.patch
+objtool-print-out-the-symbol-type-when-complaining-about-it.patch
+objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch
+objtool-make-.altinstructions-section-entry-size-consistent.patch
+objtool-introduce-cfi-hash.patch
+objtool-handle-__sanitize_cov-tail-calls.patch
+objtool-classify-symbols.patch
+objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch
+objtool-x86-replace-alternatives-with-.retpoline_sites.patch
+x86-retpoline-remove-unused-replacement-symbols.patch
+x86-asm-fix-register-order.patch
+x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch
+x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch
+x86-retpoline-create-a-retpoline-thunk-array.patch
+x86-alternative-implement-.retpoline_sites-support.patch
+x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch
+x86-alternative-try-inline-spectre_v2-retpoline-amd.patch
+x86-alternative-add-debug-prints-to-apply_retpolines.patch
+bpf-x86-simplify-computing-label-offsets.patch
+bpf-x86-respect-x86_feature_retpoline.patch
+x86-lib-atomic64_386_32-rename-things.patch
+x86-prepare-asm-files-for-straight-line-speculation.patch
+x86-prepare-inline-asm-for-straight-line-speculation.patch
+x86-alternative-relax-text_poke_bp-constraint.patch
+objtool-add-straight-line-speculation-validation.patch
+x86-add-straight-line-speculation-mitigation.patch
+tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch
+kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch
+objtool-default-ignore-int3-for-unreachable.patch
+crypto-x86-poly1305-fixup-sls.patch
+objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch
+objtool-fix-code-relocs-vs-weak-symbols.patch
+objtool-fix-type-of-reloc-addend.patch
+objtool-fix-symbol-creation.patch
+x86-entry-remove-skip_r11rcx.patch
+objtool-fix-objtool-regression-on-x32-systems.patch
+x86-realmode-build-with-d__disable_exports.patch
+x86-kvm-vmx-make-noinstr-clean.patch
+x86-cpufeatures-move-retpoline-flags-to-word-11.patch
+x86-retpoline-cleanup-some-ifdefery.patch
+x86-retpoline-swizzle-retpoline-thunk.patch
+makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch
+x86-retpoline-use-mfunction-return.patch
+x86-undo-return-thunk-damage.patch
+x86-objtool-create-.return_sites.patch
+objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
+x86-static_call-use-alternative-ret-encoding.patch
+x86-ftrace-use-alternative-ret-encoding.patch
+x86-bpf-use-alternative-ret-encoding.patch
+x86-kvm-fix-setcc-emulation-for-return-thunks.patch
+x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
+x86-sev-avoid-using-__x86_return_thunk.patch
+x86-use-return-thunk-in-asm-code.patch
+objtool-treat-.text.__x86.-as-noinstr.patch
+x86-add-magic-amd-return-thunk.patch
+x86-bugs-report-amd-retbleed-vulnerability.patch
+x86-bugs-add-amd-retbleed-boot-parameter.patch
+x86-bugs-enable-stibp-for-jmp2ret.patch
+x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
+x86-entry-add-kernel-ibrs-implementation.patch
+x86-bugs-optimize-spec_ctrl-msr-writes.patch
+x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
+x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
+x86-bugs-report-intel-retbleed-vulnerability.patch
+intel_idle-disable-ibrs-during-long-idle.patch
+objtool-update-retpoline-validation.patch
+x86-xen-rename-sys-entry-points.patch
+x86-bugs-add-retbleed-ibpb.patch
+x86-bugs-do-ibpb-fallback-check-only-once.patch
+objtool-add-entry-unret-validation.patch
+x86-cpu-amd-add-spectral-chicken.patch
+x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
+x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
+x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
+x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
+x86-speculation-remove-x86_spec_ctrl_mask.patch
+objtool-re-add-unwind_hint_-save_restore.patch
+kvm-vmx-flatten-__vmx_vcpu_run.patch
+kvm-vmx-convert-launched-argument-to-flags.patch
+kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
+kvm-vmx-fix-ibrs-handling-after-vmexit.patch
+x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
+x86-common-stamp-out-the-stepping-madness.patch
+x86-cpu-amd-enumerate-btc_no.patch
+x86-retbleed-add-fine-grained-kconfig-knobs.patch
+x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
+x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
+x86-kexec-disable-ret-on-kexec.patch
+x86-speculation-disable-rrsba-behavior.patch
+x86-static_call-serialize-__static_call_fixup-properly.patch
diff --git a/queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch b/queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch
new file mode 100644 (file)
index 0000000..ed1d040
--- /dev/null
@@ -0,0 +1,120 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+Date: Sun, 9 May 2021 10:19:37 -0300
+Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy'
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+commit 35cb8c713a496e8c114eed5e2a5a30b359876df2 upstream.
+
+To bring in the change made in this cset:
+
+  f94909ceb1ed4bfd ("x86: Prepare asm files for straight-line-speculation")
+
+It silences these perf tools build warnings, no change in the tools:
+
+  Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
+  diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
+  Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
+  diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S
+
+The code generated was checked before and after using 'objdump -d /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o',
+no changes.
+
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/arch/x86/lib/memcpy_64.S |   12 ++++++------
+ tools/arch/x86/lib/memset_64.S |    6 +++---
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/tools/arch/x86/lib/memcpy_64.S
++++ b/tools/arch/x86/lib/memcpy_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
+       rep movsq
+       movl %edx, %ecx
+       rep movsb
+-      ret
++      RET
+ SYM_FUNC_END(memcpy)
+ SYM_FUNC_END_ALIAS(__memcpy)
+ EXPORT_SYMBOL(memcpy)
+@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
+       movq %rdi, %rax
+       movq %rdx, %rcx
+       rep movsb
+-      ret
++      RET
+ SYM_FUNC_END(memcpy_erms)
+ SYM_FUNC_START_LOCAL(memcpy_orig)
+@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movq %r9,       1*8(%rdi)
+       movq %r10,      -2*8(%rdi, %rdx)
+       movq %r11,      -1*8(%rdi, %rdx)
+-      retq
++      RET
+       .p2align 4
+ .Lless_16bytes:
+       cmpl $8,        %edx
+@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movq -1*8(%rsi, %rdx),  %r9
+       movq %r8,       0*8(%rdi)
+       movq %r9,       -1*8(%rdi, %rdx)
+-      retq
++      RET
+       .p2align 4
+ .Lless_8bytes:
+       cmpl $4,        %edx
+@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movl -4(%rsi, %rdx), %r8d
+       movl %ecx, (%rdi)
+       movl %r8d, -4(%rdi, %rdx)
+-      retq
++      RET
+       .p2align 4
+ .Lless_3bytes:
+       subl $1, %edx
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movb %cl, (%rdi)
+ .Lend:
+-      retq
++      RET
+ SYM_FUNC_END(memcpy_orig)
+ .popsection
+--- a/tools/arch/x86/lib/memset_64.S
++++ b/tools/arch/x86/lib/memset_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
+       movl %edx,%ecx
+       rep stosb
+       movq %r9,%rax
+-      ret
++      RET
+ SYM_FUNC_END(__memset)
+ SYM_FUNC_END_ALIAS(memset)
+ EXPORT_SYMBOL(memset)
+@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
+       movq %rdx,%rcx
+       rep stosb
+       movq %r9,%rax
+-      ret
++      RET
+ SYM_FUNC_END(memset_erms)
+ SYM_FUNC_START_LOCAL(memset_orig)
+@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
+ .Lende:
+       movq    %r10,%rax
+-      ret
++      RET
+ .Lbad_alignment:
+       cmpq $7,%rdx
diff --git a/queue-5.10/x86-add-insn_decode_kernel.patch b/queue-5.10/x86-add-insn_decode_kernel.patch
new file mode 100644 (file)
index 0000000..0c7b322
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 11 Jul 2022 00:43:31 +0200
+Subject: x86: Add insn_decode_kernel()
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+This was done by commit 52fa82c21f64e900a72437269a5cc9e0034b424e
+upstream, but this backport avoids changing all callers of the
+old decoder API.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/insn.h       |    2 ++
+ arch/x86/kernel/alternative.c     |    2 +-
+ tools/arch/x86/include/asm/insn.h |    2 ++
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/insn.h
++++ b/arch/x86/include/asm/insn.h
+@@ -105,6 +105,8 @@ enum insn_mode {
+ extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
++#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
++
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+ {
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -1290,7 +1290,7 @@ static void text_poke_loc_init(struct te
+       if (!emulate)
+               emulate = opcode;
+-      ret = insn_decode(&insn, emulate, MAX_INSN_SIZE, INSN_MODE_KERN);
++      ret = insn_decode_kernel(&insn, emulate);
+       BUG_ON(ret < 0);
+       BUG_ON(len != insn.length);
+--- a/tools/arch/x86/include/asm/insn.h
++++ b/tools/arch/x86/include/asm/insn.h
+@@ -105,6 +105,8 @@ enum insn_mode {
+ extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
++#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
++
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+ {
diff --git a/queue-5.10/x86-add-magic-amd-return-thunk.patch b/queue-5.10/x86-add-magic-amd-return-thunk.patch
new file mode 100644 (file)
index 0000000..fb329bd
--- /dev/null
@@ -0,0 +1,348 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:48 +0200
+Subject: x86: Add magic AMD return-thunk
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream.
+
+Note: needs to be in a section distinct from Retpolines such that the
+Retpoline RET substitution cannot possibly use immediate jumps.
+
+ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
+little tricky but works due to the fact that zen_untrain_ret() doesn't
+have any stack ops and as such will emit a single ORC entry at the
+start (+0x3f).
+
+Meanwhile, unwinding an IP, including the __x86_return_thunk() one
+(+0x40) will search for the largest ORC entry smaller or equal to the
+IP, these will find the one ORC entry (+0x3f) and all works.
+
+  [ Alexandre: SVM part. ]
+  [ bp: Build fix, massages. ]
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflicts at arch/x86/entry/entry_64_compat.S]
+[cascardo: there is no ANNOTATE_NOENDBR]
+[cascardo: objtool commit 34c861e806478ac2ea4032721defbf1d6967df08 missing]
+[cascardo: conflict fixup]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: SEV-ES is not supported, so drop the change
+ in arch/x86/kvm/svm/vmenter.S]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S                |    6 ++
+ arch/x86/entry/entry_64_compat.S         |    4 +
+ arch/x86/include/asm/cpufeatures.h       |    1 
+ arch/x86/include/asm/disabled-features.h |    3 -
+ arch/x86/include/asm/nospec-branch.h     |   17 ++++++++
+ arch/x86/kernel/vmlinux.lds.S            |    2 
+ arch/x86/kvm/svm/vmenter.S               |    9 ++++
+ arch/x86/lib/retpoline.S                 |   63 +++++++++++++++++++++++++++++--
+ tools/objtool/check.c                    |   20 ++++++++-
+ 9 files changed, 117 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -102,6 +102,7 @@ SYM_CODE_START(entry_SYSCALL_64)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
++      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER_DS                              /* pt_regs->ss */
+@@ -675,6 +676,7 @@ native_irq_return_ldt:
+       pushq   %rdi                            /* Stash user RDI */
+       swapgs                                  /* to kernel GS */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
++      UNTRAIN_RET
+       movq    PER_CPU_VAR(espfix_waddr), %rdi
+       movq    %rax, (0*8)(%rdi)               /* user RAX */
+@@ -910,6 +912,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * be retrieved from a kernel internal table.
+        */
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
++      UNTRAIN_RET
+       /*
+        * Handling GSBASE depends on the availability of FSGSBASE.
+@@ -1022,6 +1025,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       FENCE_SWAPGS_USER_ENTRY
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      UNTRAIN_RET
+ .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+@@ -1077,6 +1081,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       SWAPGS
+       FENCE_SWAPGS_USER_ENTRY
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      UNTRAIN_RET
+       /*
+        * Pretend that the exception came from user mode: set up pt_regs
+@@ -1171,6 +1176,7 @@ SYM_CODE_START(asm_exc_nmi)
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT_IRET_REGS base=%rdx offset=8
++      UNTRAIN_RET
+       pushq   5*8(%rdx)       /* pt_regs->ss */
+       pushq   4*8(%rdx)       /* pt_regs->rsp */
+       pushq   3*8(%rdx)       /* pt_regs->flags */
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -14,6 +14,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/nospec-branch.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+@@ -71,6 +72,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
+       pushq   $__USER32_CS            /* pt_regs->cs */
+       pushq   $0                      /* pt_regs->ip = 0 (placeholder) */
+ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
++      UNTRAIN_RET
+       /*
+        * User tracing code (ptrace or signal handlers) might assume that
+@@ -211,6 +213,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
++      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER32_DS            /* pt_regs->ss */
+@@ -377,6 +380,7 @@ SYM_CODE_START(entry_INT80_compat)
+       pushq   (%rdi)                  /* pt_regs->di */
+ .Lint80_keep_stack:
++      UNTRAIN_RET
+       pushq   %rsi                    /* pt_regs->si */
+       xorl    %esi, %esi              /* nospec   si */
+       pushq   %rdx                    /* pt_regs->dx */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -297,6 +297,7 @@
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ #define X86_FEATURE_RETHUNK           (11*32+14) /* "" Use REturn THUNK */
++#define X86_FEATURE_UNRET             (11*32+15) /* "" AMD BTB untrain return */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16               (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -61,7 +61,8 @@
+ #else
+ # define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+                                (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
+-                               (1 << (X86_FEATURE_RETHUNK & 31)))
++                               (1 << (X86_FEATURE_RETHUNK & 31)) | \
++                               (1 << (X86_FEATURE_UNRET & 31)))
+ #endif
+ /* Force disable because it's broken beyond repair */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -112,6 +112,22 @@
+ #endif
+ .endm
++/*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
++ * return thunk isn't mapped into the userspace tables (then again, AMD
++ * typically has NO_MELTDOWN).
++ *
++ * Doesn't clobber any registers but does require a stable stack.
++ *
++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
++ * where we have a stack but before any RET instruction.
++ */
++.macro UNTRAIN_RET
++#ifdef CONFIG_RETPOLINE
++      ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
++#endif
++.endm
++
+ #else /* __ASSEMBLY__ */
+ #define ANNOTATE_RETPOLINE_SAFE                                       \
+@@ -121,6 +137,7 @@
+       ".popsection\n\t"
+ extern void __x86_return_thunk(void);
++extern void zen_untrain_ret(void);
+ #ifdef CONFIG_RETPOLINE
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -142,7 +142,7 @@ SECTIONS
+ #ifdef CONFIG_RETPOLINE
+               __indirect_thunk_start = .;
+-              *(.text.__x86.indirect_thunk)
++              *(.text.__x86.*)
+               __indirect_thunk_end = .;
+ #endif
+       } :text =0xcccc
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -129,6 +129,15 @@ SYM_FUNC_START(__svm_vcpu_run)
+ #endif
+       /*
++       * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
++       * untrained as soon as we exit the VM and are back to the
++       * kernel. This should be done before re-enabling interrupts
++       * because interrupt handlers won't sanitize 'ret' if the return is
++       * from the kernel.
++       */
++      UNTRAIN_RET
++
++      /*
+        * Clear all general purpose registers except RSP and RAX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+        * via the stack.  In theory, an L1 cache miss when restoring registers
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -71,10 +71,67 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+  * This function name is magical and is used by -mfunction-return=thunk-extern
+  * for the compiler to generate JMPs to it.
+  */
+-SYM_CODE_START(__x86_return_thunk)
+-      UNWIND_HINT_EMPTY
++      .section .text.__x86.return_thunk
++
++/*
++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
++ *    alignment within the BTB.
++ * 2) The instruction at zen_untrain_ret must contain, and not
++ *    end with, the 0xc3 byte of the RET.
++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
++ *    from re-poisioning the BTB prediction.
++ */
++      .align 64
++      .skip 63, 0xcc
++SYM_FUNC_START_NOALIGN(zen_untrain_ret);
++
++      /*
++       * As executed from zen_untrain_ret, this is:
++       *
++       *   TEST $0xcc, %bl
++       *   LFENCE
++       *   JMP __x86_return_thunk
++       *
++       * Executing the TEST instruction has a side effect of evicting any BTB
++       * prediction (potentially attacker controlled) attached to the RET, as
++       * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
++       */
++      .byte   0xf6
++
++      /*
++       * As executed from __x86_return_thunk, this is a plain RET.
++       *
++       * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
++       *
++       * We subsequently jump backwards and architecturally execute the RET.
++       * This creates a correct BTB prediction (type=ret), but in the
++       * meantime we suffer Straight Line Speculation (because the type was
++       * no branch) which is halted by the INT3.
++       *
++       * With SMT enabled and STIBP active, a sibling thread cannot poison
++       * RET's prediction to a type of its choice, but can evict the
++       * prediction due to competitive sharing. If the prediction is
++       * evicted, __x86_return_thunk will suffer Straight Line Speculation
++       * which will be contained safely by the INT3.
++       */
++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+       ret
+       int3
+ SYM_CODE_END(__x86_return_thunk)
+-__EXPORT_THUNK(__x86_return_thunk)
++      /*
++       * Ensure the TEST decoding / BTB invalidation is complete.
++       */
++      lfence
++
++      /*
++       * Jump back and execute the RET in the middle of the TEST instruction.
++       * INT3 is for SLS protection.
++       */
++      jmp __x86_return_thunk
++      int3
++SYM_FUNC_END(zen_untrain_ret)
++__EXPORT_THUNK(zen_untrain_ret)
++
++EXPORT_SYMBOL(__x86_return_thunk)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1082,7 +1082,7 @@ static void add_retpoline_call(struct ob
+       annotate_call_site(file, insn, false);
+ }
+-static void add_return_call(struct objtool_file *file, struct instruction *insn)
++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+ {
+       /*
+        * Return thunk tail calls are really just returns in disguise,
+@@ -1092,7 +1092,7 @@ static void add_return_call(struct objto
+       insn->retpoline_safe = true;
+       /* Skip the non-text sections, specially .discard ones */
+-      if (insn->sec->text)
++      if (add && insn->sec->text)
+               list_add_tail(&insn->call_node, &file->return_thunk_list);
+ }
+@@ -1121,7 +1121,7 @@ static int add_jump_destinations(struct
+                       add_retpoline_call(file, insn);
+                       continue;
+               } else if (reloc->sym->return_thunk) {
+-                      add_return_call(file, insn);
++                      add_return_call(file, insn, true);
+                       continue;
+               } else if (insn->func) {
+                       /* internal or external sibling call (with reloc) */
+@@ -1138,6 +1138,7 @@ static int add_jump_destinations(struct
+               insn->jump_dest = find_insn(file, dest_sec, dest_off);
+               if (!insn->jump_dest) {
++                      struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+                       /*
+                        * This is a special case where an alt instruction
+@@ -1147,6 +1148,19 @@ static int add_jump_destinations(struct
+                       if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+                               continue;
++                      /*
++                       * This is a special case for zen_untrain_ret().
++                       * It jumps to __x86_return_thunk(), but objtool
++                       * can't find the thunk's starting RET
++                       * instruction, because the RET is also in the
++                       * middle of another instruction.  Objtool only
++                       * knows about the outer instruction.
++                       */
++                      if (sym && sym->return_thunk) {
++                              add_return_call(file, insn, false);
++                              continue;
++                      }
++
+                       WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+                                 insn->sec, insn->offset, dest_sec->name,
+                                 dest_off);
diff --git a/queue-5.10/x86-add-straight-line-speculation-mitigation.patch b/queue-5.10/x86-add-straight-line-speculation-mitigation.patch
new file mode 100644 (file)
index 0000000..8ba8355
--- /dev/null
@@ -0,0 +1,200 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:44 +0100
+Subject: x86: Add straight-line-speculation mitigation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e463a09af2f0677b9485a7e8e4e70b396b2ffb6f upstream.
+
+Make use of an upcoming GCC feature to mitigate
+straight-line-speculation for x86:
+
+  https://gcc.gnu.org/g:53a643f8568067d7700a9f2facc8ba39974973d3
+  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952
+  https://bugs.llvm.org/show_bug.cgi?id=52323
+
+It's built tested on x86_64-allyesconfig using GCC-12 and GCC-11.
+
+Maintenance overhead of this should be fairly low due to objtool
+validation.
+
+Size overhead of all these additional int3 instructions comes to:
+
+     text         data     bss     dec     hex filename
+  22267751     6933356 2011368 31212475        1dc43bb defconfig-build/vmlinux
+  22804126     6933356 1470696 31208178        1dc32f2 defconfig-build/vmlinux.sls
+
+Or roughly 2.4% additional text.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134908.140103474@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10:
+ - In scripts/Makefile.build, add the objtool option with an ifdef
+   block, same as for other options
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig                   |   12 ++++++++++++
+ arch/x86/Makefile                  |    6 +++++-
+ arch/x86/include/asm/linkage.h     |   10 ++++++++++
+ arch/x86/include/asm/static_call.h |    2 +-
+ arch/x86/kernel/ftrace.c           |    2 +-
+ arch/x86/kernel/static_call.c      |    5 +++--
+ arch/x86/lib/memmove_64.S          |    2 +-
+ arch/x86/lib/retpoline.S           |    2 +-
+ scripts/Makefile.build             |    3 +++
+ scripts/link-vmlinux.sh            |    3 +++
+ 10 files changed, 40 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -462,6 +462,18 @@ config RETPOLINE
+         branches. Requires a compiler with -mindirect-branch=thunk-extern
+         support for full protection. The kernel may run slower.
++config CC_HAS_SLS
++      def_bool $(cc-option,-mharden-sls=all)
++
++config SLS
++      bool "Mitigate Straight-Line-Speculation"
++      depends on CC_HAS_SLS && X86_64
++      default n
++      help
++        Compile the kernel with straight-line-speculation options to guard
++        against straight line speculation. The kernel image might be slightly
++        larger.
++
+ config X86_CPU_RESCTRL
+       bool "x86 CPU resource control support"
+       depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -196,7 +196,11 @@ ifdef CONFIG_RETPOLINE
+   endif
+ endif
+-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
++ifdef CONFIG_SLS
++  KBUILD_CFLAGS += -mharden-sls=all
++endif
++
++KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+ ifdef CONFIG_X86_NEED_RELOCS
+ LDFLAGS_vmlinux := --emit-relocs --discard-none
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,9 +18,19 @@
+ #define __ALIGN_STR   __stringify(__ALIGN)
+ #endif
++#ifdef CONFIG_SLS
++#define RET   ret; int3
++#else
++#define RET   ret
++#endif
++
+ #else /* __ASSEMBLY__ */
++#ifdef CONFIG_SLS
++#define ASM_RET       "ret; int3\n\t"
++#else
+ #define ASM_RET       "ret\n\t"
++#endif
+ #endif /* __ASSEMBLY__ */
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -35,7 +35,7 @@
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
+-      __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
++      __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+ #define ARCH_ADD_TRAMP_KEY(name)                                      \
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -308,7 +308,7 @@ union ftrace_op_code_union {
+       } __attribute__((packed));
+ };
+-#define RET_SIZE              1
++#define RET_SIZE              1 + IS_ENABLED(CONFIG_SLS)
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -11,6 +11,8 @@ enum insn_type {
+       RET = 3,  /* tramp / site cond-tail-call */
+ };
++static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
++
+ static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+ {
+       int size = CALL_INSN_SIZE;
+@@ -30,8 +32,7 @@ static void __ref __static_call_transfor
+               break;
+       case RET:
+-              code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+-              size = RET_INSN_SIZE;
++              code = &retinsn;
+               break;
+       }
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+       /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+       ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+-      ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS
++      ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
+       /*
+        * movsq instruction have many startup latency
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\re
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+                     __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+-                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
++                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
+ .endm
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -230,6 +230,9 @@ endif
+ ifdef CONFIG_X86_SMAP
+   objtool_args += --uaccess
+ endif
++ifdef CONFIG_SLS
++  objtool_args += --sls
++endif
+ # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
+ # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -77,6 +77,9 @@ objtool_link()
+               if [ -n "${CONFIG_X86_SMAP}" ]; then
+                       objtoolopt="${objtoolopt} --uaccess"
+               fi
++              if [ -n "${CONFIG_SLS}" ]; then
++                      objtoolopt="${objtoolopt} --sls"
++              fi
+               info OBJTOOL ${1}
+               tools/objtool/objtool ${objtoolopt} ${1}
+       fi
diff --git a/queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch b/queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch
new file mode 100644 (file)
index 0000000..2c19ce2
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:45 +0200
+Subject: x86/alternative: Add debug prints to apply_retpolines()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d4b5a5c993009ffeb5febe3b701da3faab6adb96 upstream.
+
+Make sure we can see the text changes when booting with
+'debug-alternative'.
+
+Example output:
+
+ [ ] SMP alternatives: retpoline at: __traceiter_initcall_level+0x1f/0x30 (ffffffff8100066f) len: 5 to: __x86_indirect_thunk_rax+0x0/0x20
+ [ ] SMP alternatives: ffffffff82603e58: [2:5) optimized NOPs: ff d0 0f 1f 00
+ [ ] SMP alternatives: ffffffff8100066f: orig: e8 cc 30 00 01
+ [ ] SMP alternatives: ffffffff8100066f: repl: ff d0 0f 1f 00
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.422273830@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -647,9 +647,15 @@ void __init_or_module noinline apply_ret
+                       continue;
+               }
++              DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
++                      addr, addr, insn.length,
++                      addr + insn.length + insn.immediate.value);
++
+               len = patch_retpoline(addr, &insn, bytes);
+               if (len == insn.length) {
+                       optimize_nops(bytes, len);
++                      DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
++                      DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+                       text_poke_early(addr, bytes, len);
+               }
+       }
diff --git a/queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch b/queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch
new file mode 100644 (file)
index 0000000..b424a27
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:43 +0200
+Subject: x86/alternative: Handle Jcc __x86_indirect_thunk_\reg
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2f0cbb2a8e5bbf101e9de118fc0eb168111a5e1e upstream.
+
+Handle the rare cases where the compiler (clang) does an indirect
+conditional tail-call using:
+
+  Jcc __x86_indirect_thunk_\reg
+
+For the !RETPOLINE case this can be rewritten to fit the original (6
+byte) instruction like:
+
+  Jncc.d8      1f
+  JMP          *%\reg
+  NOP
+1:
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.296470217@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |   40 ++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 36 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -548,7 +548,8 @@ static int emit_indirect(int op, int reg
+ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
+ {
+       retpoline_thunk_t *target;
+-      int reg, i = 0;
++      int reg, ret, i = 0;
++      u8 op, cc;
+       target = addr + insn->length + insn->immediate.value;
+       reg = target - __x86_indirect_thunk_array;
+@@ -562,9 +563,36 @@ static int patch_retpoline(void *addr, s
+       if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+               return -1;
+-      i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
+-      if (i < 0)
+-              return i;
++      op = insn->opcode.bytes[0];
++
++      /*
++       * Convert:
++       *
++       *   Jcc.d32 __x86_indirect_thunk_\reg
++       *
++       * into:
++       *
++       *   Jncc.d8 1f
++       *   JMP *%\reg
++       *   NOP
++       * 1:
++       */
++      /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
++      if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
++              cc = insn->opcode.bytes[1] & 0xf;
++              cc ^= 1; /* invert condition */
++
++              bytes[i++] = 0x70 + cc;        /* Jcc.d8 */
++              bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
++
++              /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
++              op = JMP32_INSN_OPCODE;
++      }
++
++      ret = emit_indirect(op, reg, bytes + i);
++      if (ret < 0)
++              return ret;
++      i += ret;
+       for (; i < insn->length;)
+               bytes[i++] = 0x90;
+@@ -598,6 +626,10 @@ void __init_or_module noinline apply_ret
+               case JMP32_INSN_OPCODE:
+                       break;
++              case 0x0f: /* escape */
++                      if (op2 >= 0x80 && op2 <= 0x8f)
++                              break;
++                      fallthrough;
+               default:
+                       WARN_ON_ONCE(1);
+                       continue;
diff --git a/queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch b/queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch
new file mode 100644 (file)
index 0000000..de3d909
--- /dev/null
@@ -0,0 +1,283 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:42 +0200
+Subject: x86/alternative: Implement .retpoline_sites support
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7508500900814d14e2e085cdc4e28142721abbdf upstream.
+
+Rewrite retpoline thunk call sites to be indirect calls for
+spectre_v2=off. This ensures spectre_v2=off is as near to a
+RETPOLINE=n build as possible.
+
+This is the replacement for objtool writing alternative entries to
+ensure the same and achieves feature-parity with the previous
+approach.
+
+One noteworthy feature is that it relies on the thunks to be in
+machine order to compute the register index.
+
+Specifically, this does not yet address the Jcc __x86_indirect_thunk_*
+calls generated by clang, a future patch will add this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org
+[cascardo: small conflict fixup at arch/x86/kernel/module.c]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - Use hex literal instead of BYTES_NOP1
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/kernel/um_arch.c           |    4 +
+ arch/x86/include/asm/alternative.h |    1 
+ arch/x86/kernel/alternative.c      |  141 +++++++++++++++++++++++++++++++++++--
+ arch/x86/kernel/module.c           |    9 ++
+ 4 files changed, 150 insertions(+), 5 deletions(-)
+
+--- a/arch/um/kernel/um_arch.c
++++ b/arch/um/kernel/um_arch.c
+@@ -358,6 +358,10 @@ void __init check_bugs(void)
+       os_check_bugs();
+ }
++void apply_retpolines(s32 *start, s32 *end)
++{
++}
++
+ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+ {
+ }
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -75,6 +75,7 @@ extern int alternatives_patched;
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
++extern void apply_retpolines(s32 *start, s32 *end);
+ struct module;
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -28,6 +28,7 @@
+ #include <asm/insn.h>
+ #include <asm/io.h>
+ #include <asm/fixmap.h>
++#include <asm/asm-prototypes.h>
+ int __read_mostly alternatives_patched;
+@@ -268,6 +269,7 @@ static void __init_or_module add_nops(vo
+       }
+ }
++extern s32 __retpoline_sites[], __retpoline_sites_end[];
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ extern s32 __smp_locks[], __smp_locks_end[];
+ void text_poke_early(void *addr, const void *opcode, size_t len);
+@@ -376,7 +378,7 @@ static __always_inline int optimize_nops
+  * "noinline" to cause control flow change and thus invalidate I$ and
+  * cause refetch after modification.
+  */
+-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
++static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
+ {
+       struct insn insn;
+       int i = 0;
+@@ -394,11 +396,11 @@ static void __init_or_module noinline op
+                * optimized.
+                */
+               if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+-                      i += optimize_nops_range(instr, a->instrlen, i);
++                      i += optimize_nops_range(instr, len, i);
+               else
+                       i += insn.length;
+-              if (i >= a->instrlen)
++              if (i >= len)
+                       return;
+       }
+ }
+@@ -486,10 +488,135 @@ void __init_or_module noinline apply_alt
+               text_poke_early(instr, insn_buff, insn_buff_sz);
+ next:
+-              optimize_nops(a, instr);
++              optimize_nops(instr, a->instrlen);
+       }
+ }
++#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
++
++/*
++ * CALL/JMP *%\reg
++ */
++static int emit_indirect(int op, int reg, u8 *bytes)
++{
++      int i = 0;
++      u8 modrm;
++
++      switch (op) {
++      case CALL_INSN_OPCODE:
++              modrm = 0x10; /* Reg = 2; CALL r/m */
++              break;
++
++      case JMP32_INSN_OPCODE:
++              modrm = 0x20; /* Reg = 4; JMP r/m */
++              break;
++
++      default:
++              WARN_ON_ONCE(1);
++              return -1;
++      }
++
++      if (reg >= 8) {
++              bytes[i++] = 0x41; /* REX.B prefix */
++              reg -= 8;
++      }
++
++      modrm |= 0xc0; /* Mod = 3 */
++      modrm += reg;
++
++      bytes[i++] = 0xff; /* opcode */
++      bytes[i++] = modrm;
++
++      return i;
++}
++
++/*
++ * Rewrite the compiler generated retpoline thunk calls.
++ *
++ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
++ * indirect instructions, avoiding the extra indirection.
++ *
++ * For example, convert:
++ *
++ *   CALL __x86_indirect_thunk_\reg
++ *
++ * into:
++ *
++ *   CALL *%\reg
++ *
++ */
++static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
++{
++      retpoline_thunk_t *target;
++      int reg, i = 0;
++
++      target = addr + insn->length + insn->immediate.value;
++      reg = target - __x86_indirect_thunk_array;
++
++      if (WARN_ON_ONCE(reg & ~0xf))
++              return -1;
++
++      /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
++      BUG_ON(reg == 4);
++
++      if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
++              return -1;
++
++      i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
++      if (i < 0)
++              return i;
++
++      for (; i < insn->length;)
++              bytes[i++] = 0x90;
++
++      return i;
++}
++
++/*
++ * Generated by 'objtool --retpoline'.
++ */
++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
++{
++      s32 *s;
++
++      for (s = start; s < end; s++) {
++              void *addr = (void *)s + *s;
++              struct insn insn;
++              int len, ret;
++              u8 bytes[16];
++              u8 op1, op2;
++
++              ret = insn_decode_kernel(&insn, addr);
++              if (WARN_ON_ONCE(ret < 0))
++                      continue;
++
++              op1 = insn.opcode.bytes[0];
++              op2 = insn.opcode.bytes[1];
++
++              switch (op1) {
++              case CALL_INSN_OPCODE:
++              case JMP32_INSN_OPCODE:
++                      break;
++
++              default:
++                      WARN_ON_ONCE(1);
++                      continue;
++              }
++
++              len = patch_retpoline(addr, &insn, bytes);
++              if (len == insn.length) {
++                      optimize_nops(bytes, len);
++                      text_poke_early(addr, bytes, len);
++              }
++      }
++}
++
++#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
++
++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
++
++#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
++
+ #ifdef CONFIG_SMP
+ static void alternatives_smp_lock(const s32 *start, const s32 *end,
+                                 u8 *text, u8 *text_end)
+@@ -774,6 +901,12 @@ void __init alternative_instructions(voi
+        * patching.
+        */
++      /*
++       * Rewrite the retpolines, must be done before alternatives since
++       * those can rewrite the retpoline thunks.
++       */
++      apply_retpolines(__retpoline_sites, __retpoline_sites_end);
++
+       apply_alternatives(__alt_instructions, __alt_instructions_end);
+ #ifdef CONFIG_SMP
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
+                   struct module *me)
+ {
+       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+-              *para = NULL, *orc = NULL, *orc_ip = NULL;
++              *para = NULL, *orc = NULL, *orc_ip = NULL,
++              *retpolines = NULL;
+       char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
+                       orc = s;
+               if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
+                       orc_ip = s;
++              if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
++                      retpolines = s;
+       }
++      if (retpolines) {
++              void *rseg = (void *)retpolines->sh_addr;
++              apply_retpolines(rseg, rseg + retpolines->sh_size);
++      }
+       if (alt) {
+               /* patch .altinstructions */
+               void *aseg = (void *)alt->sh_addr;
diff --git a/queue-5.10/x86-alternative-merge-include-files.patch b/queue-5.10/x86-alternative-merge-include-files.patch
new file mode 100644 (file)
index 0000000..8a848cd
--- /dev/null
@@ -0,0 +1,433 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:06 +0100
+Subject: x86/alternative: Merge include files
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 5e21a3ecad1500e35b46701e7f3f232e15d78e69 upstream.
+
+Merge arch/x86/include/asm/alternative-asm.h into
+arch/x86/include/asm/alternative.h in order to make it easier to use
+common definitions later.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210311142319.4723-2-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S                |    2 
+ arch/x86/entry/vdso/vdso32/system_call.S |    2 
+ arch/x86/include/asm/alternative-asm.h   |  114 -------------------------------
+ arch/x86/include/asm/alternative.h       |  112 +++++++++++++++++++++++++++++-
+ arch/x86/include/asm/nospec-branch.h     |    1 
+ arch/x86/include/asm/smap.h              |    5 -
+ arch/x86/lib/atomic64_386_32.S           |    2 
+ arch/x86/lib/atomic64_cx8_32.S           |    2 
+ arch/x86/lib/copy_page_64.S              |    2 
+ arch/x86/lib/copy_user_64.S              |    2 
+ arch/x86/lib/memcpy_64.S                 |    2 
+ arch/x86/lib/memmove_64.S                |    2 
+ arch/x86/lib/memset_64.S                 |    2 
+ arch/x86/lib/retpoline.S                 |    2 
+ 14 files changed, 120 insertions(+), 132 deletions(-)
+ delete mode 100644 arch/x86/include/asm/alternative-asm.h
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -40,7 +40,7 @@
+ #include <asm/processor-flags.h>
+ #include <asm/irq_vectors.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/frame.h>
+--- a/arch/x86/entry/vdso/vdso32/system_call.S
++++ b/arch/x86/entry/vdso/vdso32/system_call.S
+@@ -6,7 +6,7 @@
+ #include <linux/linkage.h>
+ #include <asm/dwarf2.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+       .text
+       .globl __kernel_vsyscall
+--- a/arch/x86/include/asm/alternative-asm.h
++++ /dev/null
+@@ -1,114 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
+-#define _ASM_X86_ALTERNATIVE_ASM_H
+-
+-#ifdef __ASSEMBLY__
+-
+-#include <asm/asm.h>
+-
+-#ifdef CONFIG_SMP
+-      .macro LOCK_PREFIX
+-672:  lock
+-      .pushsection .smp_locks,"a"
+-      .balign 4
+-      .long 672b - .
+-      .popsection
+-      .endm
+-#else
+-      .macro LOCK_PREFIX
+-      .endm
+-#endif
+-
+-/*
+- * objtool annotation to ignore the alternatives and only consider the original
+- * instruction(s).
+- */
+-.macro ANNOTATE_IGNORE_ALTERNATIVE
+-      .Lannotate_\@:
+-      .pushsection .discard.ignore_alts
+-      .long .Lannotate_\@ - .
+-      .popsection
+-.endm
+-
+-/*
+- * Issue one struct alt_instr descriptor entry (need to put it into
+- * the section .altinstructions, see below). This entry contains
+- * enough information for the alternatives patching code to patch an
+- * instruction. See apply_alternatives().
+- */
+-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+-      .long \orig - .
+-      .long \alt - .
+-      .word \feature
+-      .byte \orig_len
+-      .byte \alt_len
+-      .byte \pad_len
+-.endm
+-
+-/*
+- * Define an alternative between two instructions. If @feature is
+- * present, early code in apply_alternatives() replaces @oldinstr with
+- * @newinstr. ".skip" directive takes care of proper instruction padding
+- * in case @newinstr is longer than @oldinstr.
+- */
+-.macro ALTERNATIVE oldinstr, newinstr, feature
+-140:
+-      \oldinstr
+-141:
+-      .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+-142:
+-
+-      .pushsection .altinstructions,"a"
+-      altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+-      .popsection
+-
+-      .pushsection .altinstr_replacement,"ax"
+-143:
+-      \newinstr
+-144:
+-      .popsection
+-.endm
+-
+-#define old_len                       141b-140b
+-#define new_len1              144f-143f
+-#define new_len2              145f-144f
+-
+-/*
+- * gas compatible max based on the idea from:
+- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+- *
+- * The additional "-" is needed because gas uses a "true" value of -1.
+- */
+-#define alt_max_short(a, b)   ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+-
+-
+-/*
+- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+- * @feature2, it replaces @oldinstr with @feature2.
+- */
+-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+-140:
+-      \oldinstr
+-141:
+-      .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+-              (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+-142:
+-
+-      .pushsection .altinstructions,"a"
+-      altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+-      altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+-      .popsection
+-
+-      .pushsection .altinstr_replacement,"ax"
+-143:
+-      \newinstr1
+-144:
+-      \newinstr2
+-145:
+-      .popsection
+-.endm
+-
+-#endif  /*  __ASSEMBLY__  */
+-
+-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -2,13 +2,14 @@
+ #ifndef _ASM_X86_ALTERNATIVE_H
+ #define _ASM_X86_ALTERNATIVE_H
+-#ifndef __ASSEMBLY__
+-
+ #include <linux/types.h>
+-#include <linux/stddef.h>
+ #include <linux/stringify.h>
+ #include <asm/asm.h>
++#ifndef __ASSEMBLY__
++
++#include <linux/stddef.h>
++
+ /*
+  * Alternative inline assembly for SMP.
+  *
+@@ -271,6 +272,111 @@ static inline int alternatives_text_rese
+  */
+ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
++#else /* __ASSEMBLY__ */
++
++#ifdef CONFIG_SMP
++      .macro LOCK_PREFIX
++672:  lock
++      .pushsection .smp_locks,"a"
++      .balign 4
++      .long 672b - .
++      .popsection
++      .endm
++#else
++      .macro LOCK_PREFIX
++      .endm
++#endif
++
++/*
++ * objtool annotation to ignore the alternatives and only consider the original
++ * instruction(s).
++ */
++.macro ANNOTATE_IGNORE_ALTERNATIVE
++      .Lannotate_\@:
++      .pushsection .discard.ignore_alts
++      .long .Lannotate_\@ - .
++      .popsection
++.endm
++
++/*
++ * Issue one struct alt_instr descriptor entry (need to put it into
++ * the section .altinstructions, see below). This entry contains
++ * enough information for the alternatives patching code to patch an
++ * instruction. See apply_alternatives().
++ */
++.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
++      .long \orig - .
++      .long \alt - .
++      .word \feature
++      .byte \orig_len
++      .byte \alt_len
++      .byte \pad_len
++.endm
++
++/*
++ * Define an alternative between two instructions. If @feature is
++ * present, early code in apply_alternatives() replaces @oldinstr with
++ * @newinstr. ".skip" directive takes care of proper instruction padding
++ * in case @newinstr is longer than @oldinstr.
++ */
++.macro ALTERNATIVE oldinstr, newinstr, feature
++140:
++      \oldinstr
++141:
++      .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
++142:
++
++      .pushsection .altinstructions,"a"
++      altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
++      .popsection
++
++      .pushsection .altinstr_replacement,"ax"
++143:
++      \newinstr
++144:
++      .popsection
++.endm
++
++#define old_len                       141b-140b
++#define new_len1              144f-143f
++#define new_len2              145f-144f
++
++/*
++ * gas compatible max based on the idea from:
++ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
++ *
++ * The additional "-" is needed because gas uses a "true" value of -1.
++ */
++#define alt_max_short(a, b)   ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
++
++
++/*
++ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
++ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
++ * @feature2, it replaces @oldinstr with @feature2.
++ */
++.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
++140:
++      \oldinstr
++141:
++      .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
++              (alt_max_short(new_len1, new_len2) - (old_len)),0x90
++142:
++
++      .pushsection .altinstructions,"a"
++      altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
++      altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
++      .popsection
++
++      .pushsection .altinstr_replacement,"ax"
++143:
++      \newinstr1
++144:
++      \newinstr2
++145:
++      .popsection
++.endm
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_ALTERNATIVE_H */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -7,7 +7,6 @@
+ #include <linux/objtool.h>
+ #include <asm/alternative.h>
+-#include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/msr-index.h>
+ #include <asm/unwind_hints.h>
+--- a/arch/x86/include/asm/smap.h
++++ b/arch/x86/include/asm/smap.h
+@@ -11,6 +11,7 @@
+ #include <asm/nops.h>
+ #include <asm/cpufeatures.h>
++#include <asm/alternative.h>
+ /* "Raw" instruction opcodes */
+ #define __ASM_CLAC    ".byte 0x0f,0x01,0xca"
+@@ -18,8 +19,6 @@
+ #ifdef __ASSEMBLY__
+-#include <asm/alternative-asm.h>
+-
+ #ifdef CONFIG_X86_SMAP
+ #define ASM_CLAC \
+@@ -37,8 +36,6 @@
+ #else /* __ASSEMBLY__ */
+-#include <asm/alternative.h>
+-
+ #ifdef CONFIG_X86_SMAP
+ static __always_inline void clac(void)
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -6,7 +6,7 @@
+  */
+ #include <linux/linkage.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ /* if you want SMP support, implement these with real spinlocks */
+ .macro LOCK reg
+--- a/arch/x86/lib/atomic64_cx8_32.S
++++ b/arch/x86/lib/atomic64_cx8_32.S
+@@ -6,7 +6,7 @@
+  */
+ #include <linux/linkage.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ .macro read64 reg
+       movl %ebx, %eax
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -3,7 +3,7 @@
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+ /*
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -11,7 +11,7 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/export.h>
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -4,7 +4,7 @@
+ #include <linux/linkage.h>
+ #include <asm/errno.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+ .pushsection .noinstr.text, "ax"
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -8,7 +8,7 @@
+  */
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+ #undef memmove
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -3,7 +3,7 @@
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+ /*
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -4,7 +4,7 @@
+ #include <linux/linkage.h>
+ #include <asm/dwarf2.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/unwind_hints.h>
diff --git a/queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch b/queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch
new file mode 100644 (file)
index 0000000..73995ad
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 1 Jun 2021 17:51:22 +0200
+Subject: x86/alternative: Optimize single-byte NOPs at an arbitrary position
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 2b31e8ed96b260ce2c22bd62ecbb9458399e3b62 upstream.
+
+Up until now the assumption was that an alternative patching site would
+have some instructions at the beginning and trailing single-byte NOPs
+(0x90) padding. Therefore, the patching machinery would go and optimize
+those single-byte NOPs into longer ones.
+
+However, this assumption is broken on 32-bit when code like
+hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
+killer CALL_NOSPEC. The 32-bit version of that macro would align certain
+insns to 16 bytes, leading to the compiler issuing a one or more
+single-byte NOPs, depending on the holes it needs to fill for alignment.
+
+That would lead to the warning in optimize_nops() to fire:
+
+  ------------[ cut here ]------------
+  Not a NOP at 0xc27fb598
+   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13
+
+due to that function verifying whether all of the following bytes really
+are single-byte NOPs.
+
+Therefore, carve out the NOP padding into a separate function and call
+it for each NOP range beginning with a single-byte NOP.
+
+Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
+Reported-by: Richard Narron <richard@aaazen.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
+Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |   64 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -338,41 +338,69 @@ done:
+ }
+ /*
++ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
++ *
++ * @instr: instruction byte stream
++ * @instrlen: length of the above
++ * @off: offset within @instr where the first NOP has been detected
++ *
++ * Return: number of NOPs found (and replaced).
++ */
++static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
++{
++      unsigned long flags;
++      int i = off, nnops;
++
++      while (i < instrlen) {
++              if (instr[i] != 0x90)
++                      break;
++
++              i++;
++      }
++
++      nnops = i - off;
++
++      if (nnops <= 1)
++              return nnops;
++
++      local_irq_save(flags);
++      add_nops(instr + off, nnops);
++      local_irq_restore(flags);
++
++      DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
++
++      return nnops;
++}
++
++/*
+  * "noinline" to cause control flow change and thus invalidate I$ and
+  * cause refetch after modification.
+  */
+ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+-      unsigned long flags;
+       struct insn insn;
+-      int nop, i = 0;
++      int i = 0;
+       /*
+-       * Jump over the non-NOP insns, the remaining bytes must be single-byte
+-       * NOPs, optimize them.
++       * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
++       * ones.
+        */
+       for (;;) {
+               if (insn_decode_kernel(&insn, &instr[i]))
+                       return;
++              /*
++               * See if this and any potentially following NOPs can be
++               * optimized.
++               */
+               if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+-                      break;
++                      i += optimize_nops_range(instr, a->instrlen, i);
++              else
++                      i += insn.length;
+-              if ((i += insn.length) >= a->instrlen)
++              if (i >= a->instrlen)
+                       return;
+       }
+-
+-      for (nop = i; i < a->instrlen; i++) {
+-              if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+-                      return;
+-      }
+-
+-      local_irq_save(flags);
+-      add_nops(instr + nop, i - nop);
+-      local_irq_restore(flags);
+-
+-      DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
+-                 instr, nop, a->instrlen);
+ }
+ /*
diff --git a/queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch b/queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch
new file mode 100644 (file)
index 0000000..e1b4915
--- /dev/null
@@ -0,0 +1,172 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:43 +0100
+Subject: x86/alternative: Relax text_poke_bp() constraint
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 26c44b776dba4ac692a0bf5a3836feb8a63fea6b upstream.
+
+Currently, text_poke_bp() is very strict to only allow patching a
+single instruction; however with straight-line-speculation it will be
+required to patch: ret; int3, which is two instructions.
+
+As such, relax the constraints a little to allow int3 padding for all
+instructions that do not imply the execution of the next instruction,
+ie: RET, JMP.d8 and JMP.d32.
+
+While there, rename the text_poke_loc::rel32 field to ::disp.
+
+Note: this fills up the text_poke_loc structure which is now a round
+  16 bytes big.
+
+  [ bp: Put comments ontop instead of on the side. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134908.082342723@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |   49 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 34 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -1243,10 +1243,13 @@ void text_poke_sync(void)
+ }
+ struct text_poke_loc {
+-      s32 rel_addr; /* addr := _stext + rel_addr */
+-      s32 rel32;
++      /* addr := _stext + rel_addr */
++      s32 rel_addr;
++      s32 disp;
++      u8 len;
+       u8 opcode;
+       const u8 text[POKE_MAX_OPCODE_SIZE];
++      /* see text_poke_bp_batch() */
+       u8 old;
+ };
+@@ -1261,7 +1264,8 @@ static struct bp_patching_desc *bp_desc;
+ static __always_inline
+ struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
+ {
+-      struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */
++      /* rcu_dereference */
++      struct bp_patching_desc *desc = __READ_ONCE(*descp);
+       if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
+               return NULL;
+@@ -1295,7 +1299,7 @@ noinstr int poke_int3_handler(struct pt_
+ {
+       struct bp_patching_desc *desc;
+       struct text_poke_loc *tp;
+-      int len, ret = 0;
++      int ret = 0;
+       void *ip;
+       if (user_mode(regs))
+@@ -1335,8 +1339,7 @@ noinstr int poke_int3_handler(struct pt_
+                       goto out_put;
+       }
+-      len = text_opcode_size(tp->opcode);
+-      ip += len;
++      ip += tp->len;
+       switch (tp->opcode) {
+       case INT3_INSN_OPCODE:
+@@ -1351,12 +1354,12 @@ noinstr int poke_int3_handler(struct pt_
+               break;
+       case CALL_INSN_OPCODE:
+-              int3_emulate_call(regs, (long)ip + tp->rel32);
++              int3_emulate_call(regs, (long)ip + tp->disp);
+               break;
+       case JMP32_INSN_OPCODE:
+       case JMP8_INSN_OPCODE:
+-              int3_emulate_jmp(regs, (long)ip + tp->rel32);
++              int3_emulate_jmp(regs, (long)ip + tp->disp);
+               break;
+       default:
+@@ -1431,7 +1434,7 @@ static void text_poke_bp_batch(struct te
+        */
+       for (do_sync = 0, i = 0; i < nr_entries; i++) {
+               u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
+-              int len = text_opcode_size(tp[i].opcode);
++              int len = tp[i].len;
+               if (len - INT3_INSN_SIZE > 0) {
+                       memcpy(old + INT3_INSN_SIZE,
+@@ -1508,21 +1511,37 @@ static void text_poke_loc_init(struct te
+                              const void *opcode, size_t len, const void *emulate)
+ {
+       struct insn insn;
+-      int ret;
++      int ret, i;
+       memcpy((void *)tp->text, opcode, len);
+       if (!emulate)
+               emulate = opcode;
+       ret = insn_decode_kernel(&insn, emulate);
+-
+       BUG_ON(ret < 0);
+-      BUG_ON(len != insn.length);
+       tp->rel_addr = addr - (void *)_stext;
++      tp->len = len;
+       tp->opcode = insn.opcode.bytes[0];
+       switch (tp->opcode) {
++      case RET_INSN_OPCODE:
++      case JMP32_INSN_OPCODE:
++      case JMP8_INSN_OPCODE:
++              /*
++               * Control flow instructions without implied execution of the
++               * next instruction can be padded with INT3.
++               */
++              for (i = insn.length; i < len; i++)
++                      BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
++              break;
++
++      default:
++              BUG_ON(len != insn.length);
++      };
++
++
++      switch (tp->opcode) {
+       case INT3_INSN_OPCODE:
+       case RET_INSN_OPCODE:
+               break;
+@@ -1530,7 +1549,7 @@ static void text_poke_loc_init(struct te
+       case CALL_INSN_OPCODE:
+       case JMP32_INSN_OPCODE:
+       case JMP8_INSN_OPCODE:
+-              tp->rel32 = insn.immediate.value;
++              tp->disp = insn.immediate.value;
+               break;
+       default: /* assume NOP */
+@@ -1538,13 +1557,13 @@ static void text_poke_loc_init(struct te
+               case 2: /* NOP2 -- emulate as JMP8+0 */
+                       BUG_ON(memcmp(emulate, ideal_nops[len], len));
+                       tp->opcode = JMP8_INSN_OPCODE;
+-                      tp->rel32 = 0;
++                      tp->disp = 0;
+                       break;
+               case 5: /* NOP5 -- emulate as JMP32+0 */
+                       BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+                       tp->opcode = JMP32_INSN_OPCODE;
+-                      tp->rel32 = 0;
++                      tp->disp = 0;
+                       break;
+               default: /* unknown instruction */
diff --git a/queue-5.10/x86-alternative-support-alternative_ternary.patch b/queue-5.10/x86-alternative-support-alternative_ternary.patch
new file mode 100644 (file)
index 0000000..6e17377
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:11 +0100
+Subject: x86/alternative: Support ALTERNATIVE_TERNARY
+
+From: Juergen Gross <jgross@suse.com>
+
+commit e208b3c4a9748b2c17aa09ba663b5096ccf82dce upstream.
+
+Add ALTERNATIVE_TERNARY support for replacing an initial instruction
+with either of two instructions depending on a feature:
+
+  ALTERNATIVE_TERNARY "default_instr", FEATURE_NR,
+                      "feature_on_instr", "feature_off_instr"
+
+which will start with "default_instr" and at patch time will,
+depending on FEATURE_NR being set or not, patch that with either
+"feature_on_instr" or "feature_off_instr".
+
+ [ bp: Add comment ontop. ]
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210311142319.4723-7-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -179,6 +179,11 @@ static inline int alternatives_text_rese
+       ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
+       ".popsection\n"
++/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
++#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
++      ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS,        \
++                    newinstr_yes, feature)
++
+ #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
+       OLDINSTR_3(oldinsn, 1, 2, 3)                                            \
+       ".pushsection .altinstructions,\"a\"\n"                                 \
+@@ -210,6 +215,9 @@ static inline int alternatives_text_rese
+ #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+       asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
++#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
++      asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory")
++
+ /*
+  * Alternative inline assembly with input.
+  *
+@@ -380,6 +388,11 @@ static inline int alternatives_text_rese
+       .popsection
+ .endm
++/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
++#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
++      ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS,        \
++      newinstr_yes, feature
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/queue-5.10/x86-alternative-support-not-feature.patch b/queue-5.10/x86-alternative-support-not-feature.patch
new file mode 100644 (file)
index 0000000..1258bd6
--- /dev/null
@@ -0,0 +1,91 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:10 +0100
+Subject: x86/alternative: Support not-feature
+
+From: Juergen Gross <jgross@suse.com>
+
+commit dda7bb76484978316bb412a353789ebc5901de36 upstream.
+
+Add support for alternative patching for the case a feature is not
+present on the current CPU. For users of ALTERNATIVE() and friends, an
+inverted feature is specified by applying the ALT_NOT() macro to it,
+e.g.:
+
+  ALTERNATIVE(old, new, ALT_NOT(feature));
+
+Committer note:
+
+The decision to encode the NOT-bit in the feature bit itself is because
+a future change which would make objtool generate such alternative
+calls, would keep the code in objtool itself fairly simple.
+
+Also, this allows for the alternative macros to support the NOT feature
+without having to change them.
+
+Finally, the u16 cpuid member encoding the X86_FEATURE_ flags is not an
+ABI so if more bits are needed, cpuid itself can be enlarged or a flags
+field can be added to struct alt_instr after having considered the size
+growth in either cases.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210311142319.4723-6-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h |    3 +++
+ arch/x86/kernel/alternative.c      |   20 +++++++++++++++-----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -6,6 +6,9 @@
+ #include <linux/stringify.h>
+ #include <asm/asm.h>
++#define ALTINSTR_FLAG_INV     (1 << 15)
++#define ALT_NOT(feat)         ((feat) | ALTINSTR_FLAG_INV)
++
+ #ifndef __ASSEMBLY__
+ #include <linux/stddef.h>
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -388,21 +388,31 @@ void __init_or_module noinline apply_alt
+        */
+       for (a = start; a < end; a++) {
+               int insn_buff_sz = 0;
++              /* Mask away "NOT" flag bit for feature to test. */
++              u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
+               instr = (u8 *)&a->instr_offset + a->instr_offset;
+               replacement = (u8 *)&a->repl_offset + a->repl_offset;
+               BUG_ON(a->instrlen > sizeof(insn_buff));
+-              BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
+-              if (!boot_cpu_has(a->cpuid)) {
++              BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
++
++              /*
++               * Patch if either:
++               * - feature is present
++               * - feature not present but ALTINSTR_FLAG_INV is set to mean,
++               *   patch if feature is *NOT* present.
++               */
++              if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
+                       if (a->padlen > 1)
+                               optimize_nops(a, instr);
+                       continue;
+               }
+-              DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+-                      a->cpuid >> 5,
+-                      a->cpuid & 0x1f,
++              DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
++                      (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
++                      feature >> 5,
++                      feature & 0x1f,
+                       instr, instr, a->instrlen,
+                       replacement, a->replacementlen, a->padlen);
diff --git a/queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch b/queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch
new file mode 100644 (file)
index 0000000..80fca85
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:44 +0200
+Subject: x86/alternative: Try inline spectre_v2=retpoline,amd
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bbe2df3f6b6da7848398d55b1311d58a16ec21e4 upstream.
+
+Try and replace retpoline thunk calls with:
+
+  LFENCE
+  CALL    *%\reg
+
+for spectre_v2=retpoline,amd.
+
+Specifically, the sequence above is 5 bytes for the low 8 registers,
+but 6 bytes for the high 8 registers. This means that unless the
+compilers prefix stuff the call with higher registers this replacement
+will fail.
+
+Luckily GCC strongly favours RAX for the indirect calls and most (95%+
+for defconfig-x86_64) will be converted. OTOH clang strongly favours
+R11 and almost nothing gets converted.
+
+Note: it will also generate a correct replacement for the Jcc.d32
+case, except unless the compilers start to prefix stuff that, it'll
+never fit. Specifically:
+
+  Jncc.d8 1f
+  LFENCE
+  JMP     *%\reg
+1:
+
+is 7-8 bytes long, where the original instruction in unpadded form is
+only 6 bytes.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.359986601@infradead.org
+[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -544,6 +544,7 @@ static int emit_indirect(int op, int reg
+  *
+  *   CALL *%\reg
+  *
++ * It also tries to inline spectre_v2=retpoline,amd when size permits.
+  */
+ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
+ {
+@@ -560,7 +561,8 @@ static int patch_retpoline(void *addr, s
+       /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
+       BUG_ON(reg == 4);
+-      if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
++      if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
++          !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
+               return -1;
+       op = insn->opcode.bytes[0];
+@@ -573,8 +575,9 @@ static int patch_retpoline(void *addr, s
+        * into:
+        *
+        *   Jncc.d8 1f
++       *   [ LFENCE ]
+        *   JMP *%\reg
+-       *   NOP
++       *   [ NOP ]
+        * 1:
+        */
+       /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
+@@ -589,6 +592,15 @@ static int patch_retpoline(void *addr, s
+               op = JMP32_INSN_OPCODE;
+       }
++      /*
++       * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
++       */
++      if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
++              bytes[i++] = 0x0f;
++              bytes[i++] = 0xae;
++              bytes[i++] = 0xe8; /* LFENCE */
++      }
++
+       ret = emit_indirect(op, reg, bytes + i);
+       if (ret < 0)
+               return ret;
diff --git a/queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch b/queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch
new file mode 100644 (file)
index 0000000..4464ee3
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:12 +0100
+Subject: x86/alternative: Use ALTERNATIVE_TERNARY() in _static_cpu_has()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 2fe2a2c7a97c9bc32acc79154b75e754280f7867 upstream.
+
+_static_cpu_has() contains a completely open coded version of
+ALTERNATIVE_TERNARY(). Replace that with the macro instead.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210311142319.4723-8-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeature.h |   41 ++++++++------------------------------
+ 1 file changed, 9 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -8,6 +8,7 @@
+ #include <asm/asm.h>
+ #include <linux/bitops.h>
++#include <asm/alternative.h>
+ enum cpuid_leafs
+ {
+@@ -172,39 +173,15 @@ extern void clear_cpu_cap(struct cpuinfo
+  */
+ static __always_inline bool _static_cpu_has(u16 bit)
+ {
+-      asm_volatile_goto("1: jmp 6f\n"
+-               "2:\n"
+-               ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+-                       "((5f-4f) - (2b-1b)),0x90\n"
+-               "3:\n"
+-               ".section .altinstructions,\"a\"\n"
+-               " .long 1b - .\n"              /* src offset */
+-               " .long 4f - .\n"              /* repl offset */
+-               " .word %P[always]\n"          /* always replace */
+-               " .byte 3b - 1b\n"             /* src len */
+-               " .byte 5f - 4f\n"             /* repl len */
+-               " .byte 3b - 2b\n"             /* pad len */
+-               ".previous\n"
+-               ".section .altinstr_replacement,\"ax\"\n"
+-               "4: jmp %l[t_no]\n"
+-               "5:\n"
+-               ".previous\n"
+-               ".section .altinstructions,\"a\"\n"
+-               " .long 1b - .\n"              /* src offset */
+-               " .long 0\n"                   /* no replacement */
+-               " .word %P[feature]\n"         /* feature bit */
+-               " .byte 3b - 1b\n"             /* src len */
+-               " .byte 0\n"                   /* repl len */
+-               " .byte 0\n"                   /* pad len */
+-               ".previous\n"
+-               ".section .altinstr_aux,\"ax\"\n"
+-               "6:\n"
+-               " testb %[bitnum],%[cap_byte]\n"
+-               " jnz %l[t_yes]\n"
+-               " jmp %l[t_no]\n"
+-               ".previous\n"
++      asm_volatile_goto(
++              ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
++              ".section .altinstr_aux,\"ax\"\n"
++              "6:\n"
++              " testb %[bitnum],%[cap_byte]\n"
++              " jnz %l[t_yes]\n"
++              " jmp %l[t_no]\n"
++              ".previous\n"
+                : : [feature]  "i" (bit),
+-                   [always]   "i" (X86_FEATURE_ALWAYS),
+                    [bitnum]   "i" (1 << (bit & 7)),
+                    [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+                : : t_yes, t_no);
diff --git a/queue-5.10/x86-alternative-use-insn_decode.patch b/queue-5.10/x86-alternative-use-insn_decode.patch
new file mode 100644 (file)
index 0000000..db84d54
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 6 Nov 2020 19:37:25 +0100
+Subject: x86/alternative: Use insn_decode()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 63c66cde7bbcc79aac14b25861c5b2495eede57b upstream.
+
+No functional changes, just simplification.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210304174237.31945-10-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -1284,15 +1284,15 @@ static void text_poke_loc_init(struct te
+                              const void *opcode, size_t len, const void *emulate)
+ {
+       struct insn insn;
++      int ret;
+       memcpy((void *)tp->text, opcode, len);
+       if (!emulate)
+               emulate = opcode;
+-      kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+-      insn_get_length(&insn);
++      ret = insn_decode(&insn, emulate, MAX_INSN_SIZE, INSN_MODE_KERN);
+-      BUG_ON(!insn_complete(&insn));
++      BUG_ON(ret < 0);
+       BUG_ON(len != insn.length);
+       tp->rel_addr = addr - (void *)_stext;
diff --git a/queue-5.10/x86-alternatives-optimize-optimize_nops.patch b/queue-5.10/x86-alternatives-optimize-optimize_nops.patch
new file mode 100644 (file)
index 0000000..4115ae4
--- /dev/null
@@ -0,0 +1,216 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:01 +0100
+Subject: x86/alternatives: Optimize optimize_nops()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 23c1ad538f4f371bdb67d8a112314842d5db7e5a upstream.
+
+Currently, optimize_nops() scans to see if the alternative starts with
+NOPs. However, the emit pattern is:
+
+  141: \oldinstr
+  142: .skip (len-(142b-141b)), 0x90
+
+That is, when 'oldinstr' is short, the tail is padded with NOPs. This case
+never gets optimized.
+
+Rewrite optimize_nops() to replace any trailing string of NOPs inside
+the alternative to larger NOPs. Also run it irrespective of patching,
+replacing NOPs in both the original and replaced code.
+
+A direct consequence is that 'padlen' becomes superfluous, so remove it.
+
+ [ bp:
+   - Adjust commit message
+   - remove a stale comment about needing to pad
+   - add a comment in optimize_nops()
+   - exit early if the NOP verif. loop catches a mismatch - function
+     should not not add NOPs in that case
+   - fix the "optimized NOPs" offsets output ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/20210326151259.442992235@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h            |   17 ++-------
+ arch/x86/kernel/alternative.c                 |   49 ++++++++++++++++----------
+ tools/objtool/arch/x86/include/arch_special.h |    2 -
+ 3 files changed, 37 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -65,7 +65,6 @@ struct alt_instr {
+       u16 cpuid;              /* cpuid bit set for replacement */
+       u8  instrlen;           /* length of original instruction */
+       u8  replacementlen;     /* length of new instruction */
+-      u8  padlen;             /* length of build-time padding */
+ } __packed;
+ /*
+@@ -104,7 +103,6 @@ static inline int alternatives_text_rese
+ #define alt_end_marker                "663"
+ #define alt_slen              "662b-661b"
+-#define alt_pad_len           alt_end_marker"b-662b"
+ #define alt_total_slen                alt_end_marker"b-661b"
+ #define alt_rlen(num)         e_replacement(num)"f-"b_replacement(num)"f"
+@@ -151,8 +149,7 @@ static inline int alternatives_text_rese
+       " .long " b_replacement(num)"f - .\n"           /* new instruction */ \
+       " .word " __stringify(feature) "\n"             /* feature bit     */ \
+       " .byte " alt_total_slen "\n"                   /* source len      */ \
+-      " .byte " alt_rlen(num) "\n"                    /* replacement len */ \
+-      " .byte " alt_pad_len "\n"                      /* pad len */
++      " .byte " alt_rlen(num) "\n"                    /* replacement len */
+ #define ALTINSTR_REPLACEMENT(newinstr, feature, num)  /* replacement */       \
+       "# ALT: replacement " #num "\n"                                         \
+@@ -224,9 +221,6 @@ static inline int alternatives_text_rese
+  * Peculiarities:
+  * No memory clobber here.
+  * Argument numbers start with 1.
+- * Best is to use constraints that are fixed size (like (%1) ... "r")
+- * If you use variable sized constraints like "m" or "g" in the
+- * replacement make sure to pad to the worst case length.
+  * Leaving an unused argument 0 to keep API compatibility.
+  */
+ #define alternative_input(oldinstr, newinstr, feature, input...)      \
+@@ -315,13 +309,12 @@ static inline int alternatives_text_rese
+  * enough information for the alternatives patching code to patch an
+  * instruction. See apply_alternatives().
+  */
+-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
++.macro altinstruction_entry orig alt feature orig_len alt_len
+       .long \orig - .
+       .long \alt - .
+       .word \feature
+       .byte \orig_len
+       .byte \alt_len
+-      .byte \pad_len
+ .endm
+ /*
+@@ -338,7 +331,7 @@ static inline int alternatives_text_rese
+ 142:
+       .pushsection .altinstructions,"a"
+-      altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
++      altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
+       .popsection
+       .pushsection .altinstr_replacement,"ax"
+@@ -375,8 +368,8 @@ static inline int alternatives_text_rese
+ 142:
+       .pushsection .altinstructions,"a"
+-      altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+-      altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
++      altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
++      altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
+       .popsection
+       .pushsection .altinstr_replacement,"ax"
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -344,19 +344,35 @@ done:
+ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+       unsigned long flags;
+-      int i;
++      struct insn insn;
++      int nop, i = 0;
+-      for (i = 0; i < a->padlen; i++) {
+-              if (instr[i] != 0x90)
++      /*
++       * Jump over the non-NOP insns, the remaining bytes must be single-byte
++       * NOPs, optimize them.
++       */
++      for (;;) {
++              if (insn_decode_kernel(&insn, &instr[i]))
++                      return;
++
++              if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
++                      break;
++
++              if ((i += insn.length) >= a->instrlen)
++                      return;
++      }
++
++      for (nop = i; i < a->instrlen; i++) {
++              if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+                       return;
+       }
+       local_irq_save(flags);
+-      add_nops(instr + (a->instrlen - a->padlen), a->padlen);
++      add_nops(instr + nop, i - nop);
+       local_irq_restore(flags);
+       DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
+-                 instr, a->instrlen - a->padlen, a->padlen);
++                 instr, nop, a->instrlen);
+ }
+ /*
+@@ -402,19 +418,15 @@ void __init_or_module noinline apply_alt
+                * - feature not present but ALTINSTR_FLAG_INV is set to mean,
+                *   patch if feature is *NOT* present.
+                */
+-              if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
+-                      if (a->padlen > 1)
+-                              optimize_nops(a, instr);
+-
+-                      continue;
+-              }
++              if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
++                      goto next;
+-              DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
++              DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
+                       (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+                       feature >> 5,
+                       feature & 0x1f,
+                       instr, instr, a->instrlen,
+-                      replacement, a->replacementlen, a->padlen);
++                      replacement, a->replacementlen);
+               DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
+               DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+@@ -438,14 +450,15 @@ void __init_or_module noinline apply_alt
+               if (a->replacementlen && is_jmp(replacement[0]))
+                       recompute_jump(a, instr, replacement, insn_buff);
+-              if (a->instrlen > a->replacementlen) {
+-                      add_nops(insn_buff + a->replacementlen,
+-                               a->instrlen - a->replacementlen);
+-                      insn_buff_sz += a->instrlen - a->replacementlen;
+-              }
++              for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
++                      insn_buff[insn_buff_sz] = 0x90;
++
+               DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
+               text_poke_early(instr, insn_buff, insn_buff_sz);
++
++next:
++              optimize_nops(a, instr);
+       }
+ }
+--- a/tools/objtool/arch/x86/include/arch_special.h
++++ b/tools/objtool/arch/x86/include/arch_special.h
+@@ -10,7 +10,7 @@
+ #define JUMP_ORIG_OFFSET      0
+ #define JUMP_NEW_OFFSET               4
+-#define ALT_ENTRY_SIZE                13
++#define ALT_ENTRY_SIZE                12
+ #define ALT_ORIG_OFFSET               0
+ #define ALT_NEW_OFFSET                4
+ #define ALT_FEATURE_OFFSET    8
diff --git a/queue-5.10/x86-asm-fix-register-order.patch b/queue-5.10/x86-asm-fix-register-order.patch
new file mode 100644 (file)
index 0000000..cbc6c1d
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:38 +0200
+Subject: x86/asm: Fix register order
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a92ede2d584a2e070def59c7e47e6b6f6341c55c upstream.
+
+Ensure the register order is correct; this allows for easy translation
+between register number and trampoline and vice-versa.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.978573921@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/GEN-for-each-reg.h |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/GEN-for-each-reg.h
++++ b/arch/x86/include/asm/GEN-for-each-reg.h
+@@ -1,11 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are in machine order; things rely on that.
++ */
+ #ifdef CONFIG_64BIT
+ GEN(rax)
+-GEN(rbx)
+ GEN(rcx)
+ GEN(rdx)
++GEN(rbx)
++GEN(rsp)
++GEN(rbp)
+ GEN(rsi)
+ GEN(rdi)
+-GEN(rbp)
+ GEN(r8)
+ GEN(r9)
+ GEN(r10)
+@@ -16,10 +21,11 @@ GEN(r14)
+ GEN(r15)
+ #else
+ GEN(eax)
+-GEN(ebx)
+ GEN(ecx)
+ GEN(edx)
++GEN(ebx)
++GEN(esp)
++GEN(ebp)
+ GEN(esi)
+ GEN(edi)
+-GEN(ebp)
+ #endif
diff --git a/queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch b/queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch
new file mode 100644 (file)
index 0000000..3f09f94
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:39 +0200
+Subject: x86/asm: Fixup odd GEN-for-each-reg.h usage
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b6d3d9944bd7c9e8c06994ead3c9952f673f2a66 upstream.
+
+Currently GEN-for-each-reg.h usage leaves GEN defined, relying on any
+subsequent usage to start with #undef, which is rude.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.041792350@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h |    2 +-
+ arch/x86/lib/retpoline.S              |    4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -19,9 +19,9 @@ extern void cmpxchg8b_emu(void);
+ #ifdef CONFIG_RETPOLINE
+-#undef GEN
+ #define GEN(reg) \
+       extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+ #include <asm/GEN-for-each-reg.h>
++#undef GEN
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -55,10 +55,10 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ #define __EXPORT_THUNK(sym)   _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg)     __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+-#undef GEN
+ #define GEN(reg) THUNK reg
+ #include <asm/GEN-for-each-reg.h>
+-
+ #undef GEN
++
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
++#undef GEN
diff --git a/queue-5.10/x86-bpf-use-alternative-ret-encoding.patch b/queue-5.10/x86-bpf-use-alternative-ret-encoding.patch
new file mode 100644 (file)
index 0000000..fc47d49
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:41 +0200
+Subject: x86/bpf: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream.
+
+Use the return thunk in eBPF generated code, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: add the necessary cnt variable to emit_return()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |   20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -399,6 +399,22 @@ static void emit_indirect_jump(u8 **ppro
+       *pprog = prog;
+ }
++static void emit_return(u8 **pprog, u8 *ip)
++{
++      u8 *prog = *pprog;
++      int cnt = 0;
++
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
++              emit_jump(&prog, &__x86_return_thunk, ip);
++      } else {
++              EMIT1(0xC3);            /* ret */
++              if (IS_ENABLED(CONFIG_SLS))
++                      EMIT1(0xCC);    /* int3 */
++      }
++
++      *pprog = prog;
++}
++
+ /*
+  * Generate the following code:
+  *
+@@ -1443,7 +1459,7 @@ emit_jmp:
+                       ctx->cleanup_addr = proglen;
+                       pop_callee_regs(&prog, callee_regs_used);
+                       EMIT1(0xC9);         /* leave */
+-                      EMIT1(0xC3);         /* ret */
++                      emit_return(&prog, image + addrs[i - 1] + (prog - temp));
+                       break;
+               default:
+@@ -1884,7 +1900,7 @@ int arch_prepare_bpf_trampoline(struct b
+       if (flags & BPF_TRAMP_F_SKIP_FRAME)
+               /* skip our return address and return to parent */
+               EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+-      EMIT1(0xC3); /* ret */
++      emit_return(&prog, prog);
+       /* Make sure the trampoline generation logic doesn't overflow */
+       if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+               ret = -EFAULT;
diff --git a/queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch b/queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch
new file mode 100644 (file)
index 0000000..dec71dd
--- /dev/null
@@ -0,0 +1,208 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+Date: Tue, 14 Jun 2022 23:15:50 +0200
+Subject: x86/bugs: Add AMD retbleed= boot parameter
+
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+
+commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream.
+
+Add the "retbleed=<value>" boot parameter to select a mitigation for
+RETBleed. Possible values are "off", "auto" and "unret"
+(JMP2RET mitigation). The default value is "auto".
+
+Currently, "retbleed=auto" will select the unret mitigation on
+AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on
+Intel).
+
+  [peterz: rebase; add hygon]
+  [jpoimboe: cleanups]
+
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   15 +++
+ arch/x86/Kconfig                                |    3 
+ arch/x86/kernel/cpu/bugs.c                      |  108 +++++++++++++++++++++++-
+ 3 files changed, 125 insertions(+), 1 deletion(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4656,6 +4656,21 @@
+       retain_initrd   [RAM] Keep initrd memory after extraction
++      retbleed=       [X86] Control mitigation of RETBleed (Arbitrary
++                      Speculative Code Execution with Return Instructions)
++                      vulnerability.
++
++                      off         - unconditionally disable
++                      auto        - automatically select a migitation
++                      unret       - force enable untrained return thunks,
++                                    only effective on AMD Zen {1,2}
++                                    based systems.
++
++                      Selecting 'auto' will choose a mitigation method at run
++                      time according to the CPU.
++
++                      Not specifying this option is equivalent to retbleed=auto.
++
+       rfkill.default_state=
+               0       "airplane mode".  All wifi, bluetooth, wimax, gps, fm,
+                       etc. communication is blocked by default.
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -465,6 +465,9 @@ config RETPOLINE
+ config CC_HAS_SLS
+       def_bool $(cc-option,-mharden-sls=all)
++config CC_HAS_RETURN_THUNK
++      def_bool $(cc-option,-mfunction-return=thunk-extern)
++
+ config SLS
+       bool "Mitigate Straight-Line-Speculation"
+       depends on CC_HAS_SLS && X86_64
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -37,6 +37,7 @@
+ #include "cpu.h"
+ static void __init spectre_v1_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+@@ -112,6 +113,12 @@ void __init check_bugs(void)
+       /* Select the proper CPU mitigations before patching alternatives: */
+       spectre_v1_select_mitigation();
++      retbleed_select_mitigation();
++      /*
++       * spectre_v2_select_mitigation() relies on the state set by
++       * retbleed_select_mitigation(); specifically the STIBP selection is
++       * forced for UNRET.
++       */
+       spectre_v2_select_mitigation();
+       ssb_select_mitigation();
+       l1tf_select_mitigation();
+@@ -709,6 +716,100 @@ static int __init nospectre_v1_cmdline(c
+ early_param("nospectre_v1", nospectre_v1_cmdline);
+ #undef pr_fmt
++#define pr_fmt(fmt)     "RETBleed: " fmt
++
++enum retbleed_mitigation {
++      RETBLEED_MITIGATION_NONE,
++      RETBLEED_MITIGATION_UNRET,
++};
++
++enum retbleed_mitigation_cmd {
++      RETBLEED_CMD_OFF,
++      RETBLEED_CMD_AUTO,
++      RETBLEED_CMD_UNRET,
++};
++
++const char * const retbleed_strings[] = {
++      [RETBLEED_MITIGATION_NONE]      = "Vulnerable",
++      [RETBLEED_MITIGATION_UNRET]     = "Mitigation: untrained return thunk",
++};
++
++static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
++      RETBLEED_MITIGATION_NONE;
++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
++      RETBLEED_CMD_AUTO;
++
++static int __init retbleed_parse_cmdline(char *str)
++{
++      if (!str)
++              return -EINVAL;
++
++      if (!strcmp(str, "off"))
++              retbleed_cmd = RETBLEED_CMD_OFF;
++      else if (!strcmp(str, "auto"))
++              retbleed_cmd = RETBLEED_CMD_AUTO;
++      else if (!strcmp(str, "unret"))
++              retbleed_cmd = RETBLEED_CMD_UNRET;
++      else
++              pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
++
++      return 0;
++}
++early_param("retbleed", retbleed_parse_cmdline);
++
++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++
++static void __init retbleed_select_mitigation(void)
++{
++      if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
++              return;
++
++      switch (retbleed_cmd) {
++      case RETBLEED_CMD_OFF:
++              return;
++
++      case RETBLEED_CMD_UNRET:
++              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              break;
++
++      case RETBLEED_CMD_AUTO:
++      default:
++              if (!boot_cpu_has_bug(X86_BUG_RETBLEED))
++                      break;
++
++              if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              break;
++      }
++
++      switch (retbleed_mitigation) {
++      case RETBLEED_MITIGATION_UNRET:
++
++              if (!IS_ENABLED(CONFIG_RETPOLINE) ||
++                  !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
++                      pr_err(RETBLEED_COMPILER_MSG);
++                      retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++                      break;
++              }
++
++              setup_force_cpu_cap(X86_FEATURE_RETHUNK);
++              setup_force_cpu_cap(X86_FEATURE_UNRET);
++
++              if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++                  boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++                      pr_err(RETBLEED_UNTRAIN_MSG);
++              break;
++
++      default:
++              break;
++      }
++
++      pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
++}
++
++#undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V2 : " fmt
+ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+@@ -1919,7 +2020,12 @@ static ssize_t srbds_show_state(char *bu
+ static ssize_t retbleed_show_state(char *buf)
+ {
+-      return sprintf(buf, "Vulnerable\n");
++      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET &&
++          (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++           boot_cpu_data.x86_vendor != X86_VENDOR_HYGON))
++              return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++
++      return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
diff --git a/queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch b/queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
new file mode 100644 (file)
index 0000000..dd5a58f
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 6 Jul 2022 15:01:15 -0700
+Subject: x86/bugs: Add Cannon lake to RETBleed affected CPU list
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream.
+
+Cannon lake is also affected by RETBleed, add it to the list.
+
+Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability")
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1129,6 +1129,7 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(CANNONLAKE_L,    X86_STEPPING_ANY,               RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
diff --git a/queue-5.10/x86-bugs-add-retbleed-ibpb.patch b/queue-5.10/x86-bugs-add-retbleed-ibpb.patch
new file mode 100644 (file)
index 0000000..4640bf0
--- /dev/null
@@ -0,0 +1,255 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:02 +0200
+Subject: x86/bugs: Add retbleed=ibpb
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream.
+
+jmp2ret mitigates the easy-to-attack case at relatively low overhead.
+It mitigates the long speculation windows after a mispredicted RET, but
+it does not mitigate the short speculation window from arbitrary
+instruction boundaries.
+
+On Zen2, there is a chicken bit which needs setting, which mitigates
+"arbitrary instruction boundaries" down to just "basic block boundaries".
+
+But there is no fix for the short speculation window on basic block
+boundaries, other than to flush the entire BTB to evict all attacker
+predictions.
+
+On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP
+or no-SMT):
+
+  1) Nothing           System wide open
+  2) jmp2ret           May stop a script kiddy
+  3) jmp2ret+chickenbit  Raises the bar rather further
+  4) IBPB              Only thing which can count as "safe".
+
+Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit
+on Zen1 according to lmbench.
+
+  [ bp: Fixup feature bit comments, document option, 32-bit build fix. ]
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    3 +
+ arch/x86/entry/Makefile                         |    2 -
+ arch/x86/entry/entry.S                          |   22 ++++++++++++
+ arch/x86/include/asm/cpufeatures.h              |    2 -
+ arch/x86/include/asm/nospec-branch.h            |    8 +++-
+ arch/x86/kernel/cpu/bugs.c                      |   43 ++++++++++++++++++------
+ 6 files changed, 67 insertions(+), 13 deletions(-)
+ create mode 100644 arch/x86/entry/entry.S
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4666,6 +4666,9 @@
+                                      disabling SMT if necessary for
+                                      the full mitigation (only on Zen1
+                                      and older without STIBP).
++                      ibpb         - mitigate short speculation windows on
++                                     basic block boundaries too. Safe, highest
++                                     perf impact.
+                       unret        - force enable untrained return thunks,
+                                      only effective on AMD f15h-f17h
+                                      based systems.
+--- a/arch/x86/entry/Makefile
++++ b/arch/x86/entry/Makefile
+@@ -21,7 +21,7 @@ CFLAGS_syscall_64.o          += $(call cc-option
+ CFLAGS_syscall_32.o           += $(call cc-option,-Wno-override-init,)
+ CFLAGS_syscall_x32.o          += $(call cc-option,-Wno-override-init,)
+-obj-y                         := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
++obj-y                         := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+ obj-y                         += common.o
+ obj-y                         += vdso/
+--- /dev/null
++++ b/arch/x86/entry/entry.S
+@@ -0,0 +1,22 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Common place for both 32- and 64-bit entry routines.
++ */
++
++#include <linux/linkage.h>
++#include <asm/export.h>
++#include <asm/msr-index.h>
++
++.pushsection .noinstr.text, "ax"
++
++SYM_FUNC_START(entry_ibpb)
++      movl    $MSR_IA32_PRED_CMD, %ecx
++      movl    $PRED_CMD_IBPB, %eax
++      xorl    %edx, %edx
++      wrmsr
++      RET
++SYM_FUNC_END(entry_ibpb)
++/* For KVM */
++EXPORT_SYMBOL_GPL(entry_ibpb);
++
++.popsection
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -292,7 +292,7 @@
+ #define X86_FEATURE_PER_THREAD_MBA    (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+ /* FREE!                              (11*32+ 8) */
+ /* FREE!                              (11*32+ 9) */
+-/* FREE!                              (11*32+10) */
++#define X86_FEATURE_ENTRY_IBPB                (11*32+10) /* "" Issue an IBPB on kernel entry */
+ /* FREE!                              (11*32+11) */
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -123,14 +123,17 @@
+  * return thunk isn't mapped into the userspace tables (then again, AMD
+  * typically has NO_MELTDOWN).
+  *
+- * Doesn't clobber any registers but does require a stable stack.
++ * While zen_untrain_ret() doesn't clobber anything but requires stack,
++ * entry_ibpb() will clobber AX, CX, DX.
+  *
+  * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+  * where we have a stack but before any RET instruction.
+  */
+ .macro UNTRAIN_RET
+ #ifdef CONFIG_RETPOLINE
+-      ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
++      ALTERNATIVE_2 "",                                               \
++                    "call zen_untrain_ret", X86_FEATURE_UNRET,        \
++                    "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+@@ -144,6 +147,7 @@
+ extern void __x86_return_thunk(void);
+ extern void zen_untrain_ret(void);
++extern void entry_ibpb(void);
+ #ifdef CONFIG_RETPOLINE
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -761,6 +761,7 @@ static enum spectre_v2_mitigation spectr
+ enum retbleed_mitigation {
+       RETBLEED_MITIGATION_NONE,
+       RETBLEED_MITIGATION_UNRET,
++      RETBLEED_MITIGATION_IBPB,
+       RETBLEED_MITIGATION_IBRS,
+       RETBLEED_MITIGATION_EIBRS,
+ };
+@@ -769,11 +770,13 @@ enum retbleed_mitigation_cmd {
+       RETBLEED_CMD_OFF,
+       RETBLEED_CMD_AUTO,
+       RETBLEED_CMD_UNRET,
++      RETBLEED_CMD_IBPB,
+ };
+ const char * const retbleed_strings[] = {
+       [RETBLEED_MITIGATION_NONE]      = "Vulnerable",
+       [RETBLEED_MITIGATION_UNRET]     = "Mitigation: untrained return thunk",
++      [RETBLEED_MITIGATION_IBPB]      = "Mitigation: IBPB",
+       [RETBLEED_MITIGATION_IBRS]      = "Mitigation: IBRS",
+       [RETBLEED_MITIGATION_EIBRS]     = "Mitigation: Enhanced IBRS",
+ };
+@@ -803,6 +806,8 @@ static int __init retbleed_parse_cmdline
+                       retbleed_cmd = RETBLEED_CMD_AUTO;
+               } else if (!strcmp(str, "unret")) {
+                       retbleed_cmd = RETBLEED_CMD_UNRET;
++              } else if (!strcmp(str, "ibpb")) {
++                      retbleed_cmd = RETBLEED_CMD_IBPB;
+               } else if (!strcmp(str, "nosmt")) {
+                       retbleed_nosmt = true;
+               } else {
+@@ -817,11 +822,13 @@ static int __init retbleed_parse_cmdline
+ early_param("retbleed", retbleed_parse_cmdline);
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n"
+ #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+ static void __init retbleed_select_mitigation(void)
+ {
++      bool mitigate_smt = false;
++
+       if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+               return;
+@@ -833,11 +840,21 @@ static void __init retbleed_select_mitig
+               retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+               break;
++      case RETBLEED_CMD_IBPB:
++              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              break;
++
+       case RETBLEED_CMD_AUTO:
+       default:
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+-                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+-                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++
++                      if (IS_ENABLED(CONFIG_RETPOLINE) &&
++                          IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK))
++                              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                      else
++                              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              }
+               /*
+                * The Intel mitigation (IBRS) was already selected in
+@@ -853,26 +870,34 @@ static void __init retbleed_select_mitig
+               if (!IS_ENABLED(CONFIG_RETPOLINE) ||
+                   !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
+                       pr_err(RETBLEED_COMPILER_MSG);
+-                      retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+-                      break;
++                      retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++                      goto retbleed_force_ibpb;
+               }
+               setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+               setup_force_cpu_cap(X86_FEATURE_UNRET);
+-              if (!boot_cpu_has(X86_FEATURE_STIBP) &&
+-                  (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
+-                      cpu_smt_disable(false);
+-
+               if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+                   boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+                       pr_err(RETBLEED_UNTRAIN_MSG);
++
++              mitigate_smt = true;
++              break;
++
++      case RETBLEED_MITIGATION_IBPB:
++retbleed_force_ibpb:
++              setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++              mitigate_smt = true;
+               break;
+       default:
+               break;
+       }
++      if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
++          (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++              cpu_smt_disable(false);
++
+       /*
+        * Let IBRS trump all on Intel without affecting the effects of the
+        * retbleed= cmdline option.
diff --git a/queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch b/queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch
new file mode 100644 (file)
index 0000000..6968a78
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 15:07:19 -0700
+Subject: x86/bugs: Do IBPB fallback check only once
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream.
+
+When booting with retbleed=auto, if the kernel wasn't built with
+CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB.  Make
+sure a warning is printed in that case.  The IBPB fallback check is done
+twice, but it really only needs to be done once.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -847,18 +847,13 @@ static void __init retbleed_select_mitig
+       case RETBLEED_CMD_AUTO:
+       default:
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+-                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+-
+-                      if (IS_ENABLED(CONFIG_RETPOLINE) &&
+-                          IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK))
+-                              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+-                      else
+-                              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+-              }
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+               /*
+-               * The Intel mitigation (IBRS) was already selected in
+-               * spectre_v2_select_mitigation().
++               * The Intel mitigation (IBRS or eIBRS) was already selected in
++               * spectre_v2_select_mitigation().  'retbleed_mitigation' will
++               * be set accordingly below.
+                */
+               break;
diff --git a/queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch b/queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
new file mode 100644 (file)
index 0000000..81b2029
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Thu, 7 Jul 2022 13:41:52 -0300
+Subject: x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream.
+
+There are some VM configurations which have Skylake model but do not
+support IBPB. In those cases, when using retbleed=ibpb, userspace is going
+to be killed and kernel is going to panic.
+
+If the CPU does not support IBPB, warn and proceed with the auto option. Also,
+do not fallback to IBPB on AMD/Hygon systems if it is not supported.
+
+Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb")
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -821,7 +821,10 @@ static void __init retbleed_select_mitig
+               break;
+       case RETBLEED_CMD_IBPB:
+-              if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++              if (!boot_cpu_has(X86_FEATURE_IBPB)) {
++                      pr_err("WARNING: CPU does not support IBPB.\n");
++                      goto do_cmd_auto;
++              } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+                       retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+@@ -836,7 +839,7 @@ do_cmd_auto:
+                   boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+                       if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+                               retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+-                      else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY))
++                      else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+                               retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+               }
diff --git a/queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch b/queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch
new file mode 100644 (file)
index 0000000..b1cf04e
--- /dev/null
@@ -0,0 +1,143 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 14 Jun 2022 23:15:51 +0200
+Subject: x86/bugs: Enable STIBP for JMP2RET
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream.
+
+For untrained return thunks to be fully effective, STIBP must be enabled
+or SMT disabled.
+
+Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   16 ++++--
+ arch/x86/kernel/cpu/bugs.c                      |   58 +++++++++++++++++++-----
+ 2 files changed, 57 insertions(+), 17 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4660,11 +4660,17 @@
+                       Speculative Code Execution with Return Instructions)
+                       vulnerability.
+-                      off         - unconditionally disable
+-                      auto        - automatically select a migitation
+-                      unret       - force enable untrained return thunks,
+-                                    only effective on AMD Zen {1,2}
+-                                    based systems.
++                      off          - no mitigation
++                      auto         - automatically select a migitation
++                      auto,nosmt   - automatically select a mitigation,
++                                     disabling SMT if necessary for
++                                     the full mitigation (only on Zen1
++                                     and older without STIBP).
++                      unret        - force enable untrained return thunks,
++                                     only effective on AMD f15h-f17h
++                                     based systems.
++                      unret,nosmt  - like unret, will disable SMT when STIBP
++                                     is not available.
+                       Selecting 'auto' will choose a mitigation method at run
+                       time according to the CPU.
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -739,19 +739,34 @@ static enum retbleed_mitigation retbleed
+ static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+       RETBLEED_CMD_AUTO;
++static int __ro_after_init retbleed_nosmt = false;
++
+ static int __init retbleed_parse_cmdline(char *str)
+ {
+       if (!str)
+               return -EINVAL;
+-      if (!strcmp(str, "off"))
+-              retbleed_cmd = RETBLEED_CMD_OFF;
+-      else if (!strcmp(str, "auto"))
+-              retbleed_cmd = RETBLEED_CMD_AUTO;
+-      else if (!strcmp(str, "unret"))
+-              retbleed_cmd = RETBLEED_CMD_UNRET;
+-      else
+-              pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
++      while (str) {
++              char *next = strchr(str, ',');
++              if (next) {
++                      *next = 0;
++                      next++;
++              }
++
++              if (!strcmp(str, "off")) {
++                      retbleed_cmd = RETBLEED_CMD_OFF;
++              } else if (!strcmp(str, "auto")) {
++                      retbleed_cmd = RETBLEED_CMD_AUTO;
++              } else if (!strcmp(str, "unret")) {
++                      retbleed_cmd = RETBLEED_CMD_UNRET;
++              } else if (!strcmp(str, "nosmt")) {
++                      retbleed_nosmt = true;
++              } else {
++                      pr_err("Ignoring unknown retbleed option (%s).", str);
++              }
++
++              str = next;
++      }
+       return 0;
+ }
+@@ -797,6 +812,10 @@ static void __init retbleed_select_mitig
+               setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+               setup_force_cpu_cap(X86_FEATURE_UNRET);
++              if (!boot_cpu_has(X86_FEATURE_STIBP) &&
++                  (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++                      cpu_smt_disable(false);
++
+               if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+                   boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+                       pr_err(RETBLEED_UNTRAIN_MSG);
+@@ -1043,6 +1062,13 @@ spectre_v2_user_select_mitigation(enum s
+           boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+               mode = SPECTRE_V2_USER_STRICT_PREFERRED;
++      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
++              if (mode != SPECTRE_V2_USER_STRICT &&
++                  mode != SPECTRE_V2_USER_STRICT_PREFERRED)
++                      pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n");
++              mode = SPECTRE_V2_USER_STRICT_PREFERRED;
++      }
++
+       spectre_v2_user_stibp = mode;
+ set_mode:
+@@ -2020,10 +2046,18 @@ static ssize_t srbds_show_state(char *bu
+ static ssize_t retbleed_show_state(char *buf)
+ {
+-      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET &&
+-          (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+-           boot_cpu_data.x86_vendor != X86_VENDOR_HYGON))
+-              return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++      if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
++          if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++              boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++                  return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++
++          return sprintf(buf, "%s; SMT %s\n",
++                         retbleed_strings[retbleed_mitigation],
++                         !sched_smt_active() ? "disabled" :
++                         spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
++                         spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
++                         "enabled with STIBP protection" : "vulnerable");
++      }
+       return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+ }
diff --git a/queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch b/queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
new file mode 100644 (file)
index 0000000..27c601d
--- /dev/null
@@ -0,0 +1,119 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:52 +0200
+Subject: x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream.
+
+Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can
+differ from x86_spec_ctrl_base. As such, keep a per-CPU value
+reflecting the current task's MSR content.
+
+  [jpoimboe: rename]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    1 +
+ arch/x86/kernel/cpu/bugs.c           |   28 +++++++++++++++++++++++-----
+ arch/x86/kernel/process.c            |    2 +-
+ 3 files changed, 25 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -254,6 +254,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++extern void write_spec_ctrl_current(u64 val);
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -48,12 +48,30 @@ static void __init taa_select_mitigation
+ static void __init mmio_select_mitigation(void);
+ static void __init srbds_select_mitigation(void);
+-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
++/* The base value of the SPEC_CTRL MSR without task-specific bits set */
+ u64 x86_spec_ctrl_base;
+ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++
++/* The current value of the SPEC_CTRL MSR with task-specific bits set */
++DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
++
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+ /*
++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
++ */
++void write_spec_ctrl_current(u64 val)
++{
++      if (this_cpu_read(x86_spec_ctrl_current) == val)
++              return;
++
++      this_cpu_write(x86_spec_ctrl_current, val);
++      wrmsrl(MSR_IA32_SPEC_CTRL, val);
++}
++
++/*
+  * The vendor and possibly platform specific bits which can be modified in
+  * x86_spec_ctrl_base.
+  */
+@@ -1235,7 +1253,7 @@ static void __init spectre_v2_select_mit
+       if (spectre_v2_in_eibrs_mode(mode)) {
+               /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+-              wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base);
+       }
+       switch (mode) {
+@@ -1290,7 +1308,7 @@ static void __init spectre_v2_select_mit
+ static void update_stibp_msr(void * __unused)
+ {
+-      wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++      write_spec_ctrl_current(x86_spec_ctrl_base);
+ }
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1533,7 +1551,7 @@ static enum ssb_mitigation __init __ssb_
+                       x86_amd_ssb_disable();
+               } else {
+                       x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+-                      wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++                      write_spec_ctrl_current(x86_spec_ctrl_base);
+               }
+       }
+@@ -1751,7 +1769,7 @@ int arch_prctl_spec_ctrl_get(struct task
+ void x86_spec_ctrl_setup_ap(void)
+ {
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+-              wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base);
+       if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+               x86_amd_ssb_disable();
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -556,7 +556,7 @@ static __always_inline void __speculatio
+       }
+       if (updmsr)
+-              wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++              write_spec_ctrl_current(msr);
+ }
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch b/queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch
new file mode 100644 (file)
index 0000000..0176b4a
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:54 +0200
+Subject: x86/bugs: Optimize SPEC_CTRL MSR writes
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream.
+
+When changing SPEC_CTRL for user control, the WRMSR can be delayed
+until return-to-user when KERNEL_IBRS has been enabled.
+
+This avoids an MSR write during context switch.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    2 +-
+ arch/x86/kernel/cpu/bugs.c           |   18 ++++++++++++------
+ arch/x86/kernel/process.c            |    2 +-
+ 3 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -254,7 +254,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
+-extern void write_spec_ctrl_current(u64 val);
++extern void write_spec_ctrl_current(u64 val, bool force);
+ /*
+  * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -62,13 +62,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
+  * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+  * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
+  */
+-void write_spec_ctrl_current(u64 val)
++void write_spec_ctrl_current(u64 val, bool force)
+ {
+       if (this_cpu_read(x86_spec_ctrl_current) == val)
+               return;
+       this_cpu_write(x86_spec_ctrl_current, val);
+-      wrmsrl(MSR_IA32_SPEC_CTRL, val);
++
++      /*
++       * When KERNEL_IBRS this MSR is written on return-to-user, unless
++       * forced the update can be delayed until that time.
++       */
++      if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
++              wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ }
+ /*
+@@ -1253,7 +1259,7 @@ static void __init spectre_v2_select_mit
+       if (spectre_v2_in_eibrs_mode(mode)) {
+               /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+-              write_spec_ctrl_current(x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base, true);
+       }
+       switch (mode) {
+@@ -1308,7 +1314,7 @@ static void __init spectre_v2_select_mit
+ static void update_stibp_msr(void * __unused)
+ {
+-      write_spec_ctrl_current(x86_spec_ctrl_base);
++      write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1551,7 +1557,7 @@ static enum ssb_mitigation __init __ssb_
+                       x86_amd_ssb_disable();
+               } else {
+                       x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+-                      write_spec_ctrl_current(x86_spec_ctrl_base);
++                      write_spec_ctrl_current(x86_spec_ctrl_base, true);
+               }
+       }
+@@ -1769,7 +1775,7 @@ int arch_prctl_spec_ctrl_get(struct task
+ void x86_spec_ctrl_setup_ap(void)
+ {
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+-              write_spec_ctrl_current(x86_spec_ctrl_base);
++              write_spec_ctrl_current(x86_spec_ctrl_base, true);
+       if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+               x86_amd_ssb_disable();
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -556,7 +556,7 @@ static __always_inline void __speculatio
+       }
+       if (updmsr)
+-              write_spec_ctrl_current(msr);
++              write_spec_ctrl_current(msr, false);
+ }
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch b/queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch
new file mode 100644 (file)
index 0000000..5203763
--- /dev/null
@@ -0,0 +1,170 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+Date: Tue, 14 Jun 2022 23:15:49 +0200
+Subject: x86/bugs: Report AMD retbleed vulnerability
+
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+
+commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream.
+
+Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary
+Speculative Code Execution with Return Instructions) attack.
+
+  [peterz: add hygon]
+  [kim: invert parity; fam15h]
+
+Co-developed-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ arch/x86/kernel/cpu/bugs.c         |   13 +++++++++++++
+ arch/x86/kernel/cpu/common.c       |   19 +++++++++++++++++++
+ drivers/base/cpu.c                 |    8 ++++++++
+ include/linux/cpu.h                |    2 ++
+ 5 files changed, 43 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -426,5 +426,6 @@
+ #define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS                 X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA               X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
++#define X86_BUG_RETBLEED              X86_BUG(26) /* CPU is affected by RETBleed */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1917,6 +1917,11 @@ static ssize_t srbds_show_state(char *bu
+       return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
+ }
++static ssize_t retbleed_show_state(char *buf)
++{
++      return sprintf(buf, "Vulnerable\n");
++}
++
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+                              char *buf, unsigned int bug)
+ {
+@@ -1962,6 +1967,9 @@ static ssize_t cpu_show_common(struct de
+       case X86_BUG_MMIO_STALE_DATA:
+               return mmio_stale_data_show_state(buf);
++      case X86_BUG_RETBLEED:
++              return retbleed_show_state(buf);
++
+       default:
+               break;
+       }
+@@ -2018,4 +2026,9 @@ ssize_t cpu_show_mmio_stale_data(struct
+ {
+       return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
++
++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
++{
++      return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1092,16 +1092,27 @@ static const __initconst struct x86_cpu_
+       {}
+ };
++#define VULNBL(vendor, family, model, blacklist)      \
++      X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
++
+ #define VULNBL_INTEL_STEPPINGS(model, steppings, issues)                 \
+       X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6,             \
+                                           INTEL_FAM6_##model, steppings, \
+                                           X86_FEATURE_ANY, issues)
++#define VULNBL_AMD(family, blacklist)         \
++      VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
++
++#define VULNBL_HYGON(family, blacklist)               \
++      VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
++
+ #define SRBDS         BIT(0)
+ /* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+ #define MMIO          BIT(1)
+ /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+ #define MMIO_SBDS     BIT(2)
++/* CPU is affected by RETbleed, speculating where you would not expect it */
++#define RETBLEED      BIT(3)
+ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
+       VULNBL_INTEL_STEPPINGS(IVYBRIDGE,       X86_STEPPING_ANY,               SRBDS),
+@@ -1134,6 +1145,11 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPINGS(0x0, 0x0),        MMIO | MMIO_SBDS),
++
++      VULNBL_AMD(0x15, RETBLEED),
++      VULNBL_AMD(0x16, RETBLEED),
++      VULNBL_AMD(0x17, RETBLEED),
++      VULNBL_HYGON(0x18, RETBLEED),
+       {}
+ };
+@@ -1235,6 +1251,9 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++      if (cpu_matches(cpu_vuln_blacklist, RETBLEED))
++              setup_force_cpu_bug(X86_BUG_RETBLEED);
++
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+               return;
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -572,6 +572,12 @@ ssize_t __weak cpu_show_mmio_stale_data(
+       return sysfs_emit(buf, "Not affected\n");
+ }
++ssize_t __weak cpu_show_retbleed(struct device *dev,
++                               struct device_attribute *attr, char *buf)
++{
++      return sysfs_emit(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+@@ -582,6 +588,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444
+ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+@@ -594,6 +601,7 @@ static struct attribute *cpu_root_vulner
+       &dev_attr_itlb_multihit.attr,
+       &dev_attr_srbds.attr,
+       &dev_attr_mmio_stale_data.attr,
++      &dev_attr_retbleed.attr,
+       NULL
+ };
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct dev
+ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf);
++extern ssize_t cpu_show_retbleed(struct device *dev,
++                               struct device_attribute *attr, char *buf);
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch b/queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch
new file mode 100644 (file)
index 0000000..9ae6f43
--- /dev/null
@@ -0,0 +1,175 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Jun 2022 13:48:58 +0200
+Subject: x86/bugs: Report Intel retbleed vulnerability
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream.
+
+Skylake suffers from RSB underflow speculation issues; report this
+vulnerability and it's mitigation (spectre_v2=ibrs).
+
+  [jpoimboe: cleanups, eibrs]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    1 +
+ arch/x86/kernel/cpu/bugs.c       |   39 +++++++++++++++++++++++++++++++++------
+ arch/x86/kernel/cpu/common.c     |   24 ++++++++++++------------
+ 3 files changed, 46 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -91,6 +91,7 @@
+ #define MSR_IA32_ARCH_CAPABILITIES    0x0000010a
+ #define ARCH_CAP_RDCL_NO              BIT(0)  /* Not susceptible to Meltdown */
+ #define ARCH_CAP_IBRS_ALL             BIT(1)  /* Enhanced IBRS support */
++#define ARCH_CAP_RSBA                 BIT(2)  /* RET may use alternative branch predictors */
+ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH        BIT(3)  /* Skip L1D flush on vmentry */
+ #define ARCH_CAP_SSB_NO                       BIT(4)  /*
+                                                * Not susceptible to Speculative Store Bypass
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -746,12 +746,17 @@ static int __init nospectre_v1_cmdline(c
+ }
+ early_param("nospectre_v1", nospectre_v1_cmdline);
++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
++      SPECTRE_V2_NONE;
++
+ #undef pr_fmt
+ #define pr_fmt(fmt)     "RETBleed: " fmt
+ enum retbleed_mitigation {
+       RETBLEED_MITIGATION_NONE,
+       RETBLEED_MITIGATION_UNRET,
++      RETBLEED_MITIGATION_IBRS,
++      RETBLEED_MITIGATION_EIBRS,
+ };
+ enum retbleed_mitigation_cmd {
+@@ -763,6 +768,8 @@ enum retbleed_mitigation_cmd {
+ const char * const retbleed_strings[] = {
+       [RETBLEED_MITIGATION_NONE]      = "Vulnerable",
+       [RETBLEED_MITIGATION_UNRET]     = "Mitigation: untrained return thunk",
++      [RETBLEED_MITIGATION_IBRS]      = "Mitigation: IBRS",
++      [RETBLEED_MITIGATION_EIBRS]     = "Mitigation: Enhanced IBRS",
+ };
+ static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+@@ -805,6 +812,7 @@ early_param("retbleed", retbleed_parse_c
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+ #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+ static void __init retbleed_select_mitigation(void)
+ {
+@@ -821,12 +829,15 @@ static void __init retbleed_select_mitig
+       case RETBLEED_CMD_AUTO:
+       default:
+-              if (!boot_cpu_has_bug(X86_BUG_RETBLEED))
+-                      break;
+-
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+                   boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+                       retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++
++              /*
++               * The Intel mitigation (IBRS) was already selected in
++               * spectre_v2_select_mitigation().
++               */
++
+               break;
+       }
+@@ -856,15 +867,31 @@ static void __init retbleed_select_mitig
+               break;
+       }
++      /*
++       * Let IBRS trump all on Intel without affecting the effects of the
++       * retbleed= cmdline option.
++       */
++      if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++              switch (spectre_v2_enabled) {
++              case SPECTRE_V2_IBRS:
++                      retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
++                      break;
++              case SPECTRE_V2_EIBRS:
++              case SPECTRE_V2_EIBRS_RETPOLINE:
++              case SPECTRE_V2_EIBRS_LFENCE:
++                      retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
++                      break;
++              default:
++                      pr_err(RETBLEED_INTEL_MSG);
++              }
++      }
++
+       pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+ #undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V2 : " fmt
+-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+-      SPECTRE_V2_NONE;
+-
+ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+       SPECTRE_V2_USER_NONE;
+ static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1124,24 +1124,24 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(BROADWELL_G,     X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       BIT(3) | BIT(4) | BIT(6) |
+-                                              BIT(7) | BIT(0xB),              MMIO),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO),
++                                              BIT(7) | BIT(0xB),              MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x9, 0xC),        SRBDS | MMIO),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x9, 0xC),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x9, 0xD),        SRBDS | MMIO),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x9, 0xD),        SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPINGS(0x5, 0x5),        MMIO | MMIO_SBDS),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPINGS(0x5, 0x5),        MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPINGS(0x1, 0x1),        MMIO),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPINGS(0x4, 0x6),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE,       BIT(2) | BIT(3) | BIT(5),       MMIO | MMIO_SBDS),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+-      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPINGS(0x1, 0x1),        MMIO),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE,       BIT(2) | BIT(3) | BIT(5),       MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPINGS(0x1, 0x1),        MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPINGS(0x0, 0x0),        MMIO | MMIO_SBDS),
+@@ -1251,7 +1251,7 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+-      if (cpu_matches(cpu_vuln_blacklist, RETBLEED))
++      if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)))
+               setup_force_cpu_bug(X86_BUG_RETBLEED);
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
diff --git a/queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch b/queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
new file mode 100644 (file)
index 0000000..5299734
--- /dev/null
@@ -0,0 +1,103 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:56 +0200
+Subject: x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 166115c08a9b0b846b783088808a27d739be6e8d upstream.
+
+retbleed will depend on spectre_v2, while spectre_v2_user depends on
+retbleed. Break this cycle.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -37,8 +37,9 @@
+ #include "cpu.h"
+ static void __init spectre_v1_select_mitigation(void);
+-static void __init retbleed_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
++static void __init spectre_v2_user_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
+@@ -137,13 +138,19 @@ void __init check_bugs(void)
+       /* Select the proper CPU mitigations before patching alternatives: */
+       spectre_v1_select_mitigation();
++      spectre_v2_select_mitigation();
++      /*
++       * retbleed_select_mitigation() relies on the state set by
++       * spectre_v2_select_mitigation(); specifically it wants to know about
++       * spectre_v2=ibrs.
++       */
+       retbleed_select_mitigation();
+       /*
+-       * spectre_v2_select_mitigation() relies on the state set by
++       * spectre_v2_user_select_mitigation() relies on the state set by
+        * retbleed_select_mitigation(); specifically the STIBP selection is
+        * forced for UNRET.
+        */
+-      spectre_v2_select_mitigation();
++      spectre_v2_user_select_mitigation();
+       ssb_select_mitigation();
+       l1tf_select_mitigation();
+       md_clear_select_mitigation();
+@@ -969,13 +976,15 @@ static void __init spec_v2_user_print_co
+               pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+ }
++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
++
+ static enum spectre_v2_user_cmd __init
+-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_parse_user_cmdline(void)
+ {
+       char arg[20];
+       int ret, i;
+-      switch (v2_cmd) {
++      switch (spectre_v2_cmd) {
+       case SPECTRE_V2_CMD_NONE:
+               return SPECTRE_V2_USER_CMD_NONE;
+       case SPECTRE_V2_CMD_FORCE:
+@@ -1010,7 +1019,7 @@ static inline bool spectre_v2_in_ibrs_mo
+ }
+ static void __init
+-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_user_select_mitigation(void)
+ {
+       enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+       bool smt_possible = IS_ENABLED(CONFIG_SMP);
+@@ -1023,7 +1032,7 @@ spectre_v2_user_select_mitigation(enum s
+           cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+               smt_possible = false;
+-      cmd = spectre_v2_parse_user_cmdline(v2_cmd);
++      cmd = spectre_v2_parse_user_cmdline();
+       switch (cmd) {
+       case SPECTRE_V2_USER_CMD_NONE:
+               goto set_mode;
+@@ -1347,7 +1356,7 @@ static void __init spectre_v2_select_mit
+       }
+       /* Set up IBPB and STIBP depending on the general spectre V2 command */
+-      spectre_v2_user_select_mitigation(cmd);
++      spectre_v2_cmd = cmd;
+ }
+ static void update_stibp_msr(void * __unused)
diff --git a/queue-5.10/x86-common-stamp-out-the-stepping-madness.patch b/queue-5.10/x86-common-stamp-out-the-stepping-madness.patch
new file mode 100644 (file)
index 0000000..5128f8a
--- /dev/null
@@ -0,0 +1,78 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Jun 2022 14:03:25 +0200
+Subject: x86/common: Stamp out the stepping madness
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream.
+
+The whole MMIO/RETBLEED enumeration went overboard on steppings. Get
+rid of all that and simply use ANY.
+
+If a future stepping of these models would not be affected, it had
+better set the relevant ARCH_CAP_$FOO_NO bit in
+IA32_ARCH_CAPABILITIES.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |   37 ++++++++++++++++---------------------
+ 1 file changed, 16 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1119,32 +1119,27 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_INTEL_STEPPINGS(HASWELL,         X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(HASWELL_L,       X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(HASWELL_G,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(HASWELL_X,       BIT(2) | BIT(4),                MMIO),
+-      VULNBL_INTEL_STEPPINGS(BROADWELL_D,     X86_STEPPINGS(0x3, 0x5),        MMIO),
++      VULNBL_INTEL_STEPPINGS(HASWELL_X,       X86_STEPPING_ANY,               MMIO),
++      VULNBL_INTEL_STEPPINGS(BROADWELL_D,     X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(BROADWELL_G,     X86_STEPPING_ANY,               SRBDS),
+       VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
+       VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       BIT(3) | BIT(4) | BIT(6) |
+-                                              BIT(7) | BIT(0xB),              MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPINGS(0x3, 0x3),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x9, 0xC),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x9, 0xD),        SRBDS | MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPINGS(0x0, 0x8),        SRBDS),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPINGS(0x5, 0x5),        MMIO | MMIO_SBDS | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPINGS(0x1, 0x1),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPINGS(0x4, 0x6),        MMIO),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE,       BIT(2) | BIT(3) | BIT(5),       MMIO | MMIO_SBDS | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
++      VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPINGS(0x1, 0x1),        MMIO | RETBLEED),
+-      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPINGS(0x1, 0x1),        MMIO | MMIO_SBDS),
++      VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED),
++      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
+       VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
+-      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPINGS(0x0, 0x0),        MMIO | MMIO_SBDS),
++      VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
+       VULNBL_AMD(0x15, RETBLEED),
+       VULNBL_AMD(0x16, RETBLEED),
diff --git a/queue-5.10/x86-cpu-amd-add-spectral-chicken.patch b/queue-5.10/x86-cpu-amd-add-spectral-chicken.patch
new file mode 100644 (file)
index 0000000..3957fc0
--- /dev/null
@@ -0,0 +1,108 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:04 +0200
+Subject: x86/cpu/amd: Add Spectral Chicken
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream.
+
+Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken
+bit for some speculation behaviour. It needs setting.
+
+Note: very belatedly AMD released naming; it's now officially called
+      MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT
+      but shall remain the SPECTRAL CHICKEN.
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    3 +++
+ arch/x86/kernel/cpu/amd.c        |   23 ++++++++++++++++++++++-
+ arch/x86/kernel/cpu/cpu.h        |    2 ++
+ arch/x86/kernel/cpu/hygon.c      |    6 ++++++
+ 4 files changed, 33 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -508,6 +508,9 @@
+ /* Fam 17h MSRs */
+ #define MSR_F17H_IRPERF                       0xc00000e9
++#define MSR_ZEN2_SPECTRAL_CHICKEN     0xc00110e3
++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL         0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR         0xc0010231
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -914,6 +914,26 @@ static void init_amd_bd(struct cpuinfo_x
+       clear_rdrand_cpuid_bit(c);
+ }
++void init_spectral_chicken(struct cpuinfo_x86 *c)
++{
++      u64 value;
++
++      /*
++       * On Zen2 we offer this chicken (bit) on the altar of Speculation.
++       *
++       * This suppresses speculation from the middle of a basic block, i.e. it
++       * suppresses non-branch predictions.
++       *
++       * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
++       */
++      if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
++              if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
++                      value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
++                      wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
++              }
++      }
++}
++
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+ {
+       set_cpu_cap(c, X86_FEATURE_ZEN);
+@@ -959,7 +979,8 @@ static void init_amd(struct cpuinfo_x86
+       case 0x12: init_amd_ln(c); break;
+       case 0x15: init_amd_bd(c); break;
+       case 0x16: init_amd_jg(c); break;
+-      case 0x17: fallthrough;
++      case 0x17: init_spectral_chicken(c);
++                 fallthrough;
+       case 0x19: init_amd_zn(c); break;
+       }
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -60,6 +60,8 @@ extern void tsx_disable(void);
+ static inline void tsx_init(void) { }
+ #endif /* CONFIG_CPU_SUP_INTEL */
++extern void init_spectral_chicken(struct cpuinfo_x86 *c);
++
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/hygon.c
++++ b/arch/x86/kernel/cpu/hygon.c
+@@ -318,6 +318,12 @@ static void init_hygon(struct cpuinfo_x8
+       /* get apicid instead of initial apic id from cpuid */
+       c->apicid = hard_smp_processor_id();
++      /*
++       * XXX someone from Hygon needs to confirm this DTRT
++       *
++      init_spectral_chicken(c);
++       */
++
+       set_cpu_cap(c, X86_FEATURE_ZEN);
+       set_cpu_cap(c, X86_FEATURE_CPB);
diff --git a/queue-5.10/x86-cpu-amd-enumerate-btc_no.patch b/queue-5.10/x86-cpu-amd-enumerate-btc_no.patch
new file mode 100644 (file)
index 0000000..0eb427e
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 24 Jun 2022 14:41:21 +0100
+Subject: x86/cpu/amd: Enumerate BTC_NO
+
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+
+commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream.
+
+BTC_NO indicates that hardware is not susceptible to Branch Type Confusion.
+
+Zen3 CPUs don't suffer BTC.
+
+Hypervisors are expected to synthesise BTC_NO when it is appropriate
+given the migration pool, to prevent kernels using heuristics.
+
+  [ bp: Massage. ]
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no X86_FEATURE_BRS]
+[cascardo: no X86_FEATURE_CPPC]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ arch/x86/kernel/cpu/amd.c          |   21 +++++++++++++++------
+ arch/x86/kernel/cpu/common.c       |    6 ++++--
+ 3 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -316,6 +316,7 @@
+ #define X86_FEATURE_AMD_SSBD          (13*32+24) /* "" Speculative Store Bypass Disable */
+ #define X86_FEATURE_VIRT_SSBD         (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+ #define X86_FEATURE_AMD_SSB_NO                (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
++#define X86_FEATURE_BTC_NO            (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -942,12 +942,21 @@ static void init_amd_zn(struct cpuinfo_x
+       node_reclaim_distance = 32;
+ #endif
+-      /*
+-       * Fix erratum 1076: CPB feature bit not being set in CPUID.
+-       * Always set it, except when running under a hypervisor.
+-       */
+-      if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
+-              set_cpu_cap(c, X86_FEATURE_CPB);
++      /* Fix up CPUID bits, but only if not virtualised. */
++      if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
++
++              /* Erratum 1076: CPB feature bit not being set in CPUID. */
++              if (!cpu_has(c, X86_FEATURE_CPB))
++                      set_cpu_cap(c, X86_FEATURE_CPB);
++
++              /*
++               * Zen3 (Fam19 model < 0x10) parts are not susceptible to
++               * Branch Type Confusion, but predate the allocation of the
++               * BTC_NO bit.
++               */
++              if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
++                      set_cpu_cap(c, X86_FEATURE_BTC_NO);
++      }
+ }
+ static void init_amd(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1246,8 +1246,10 @@ static void __init cpu_set_bug_bits(stru
+           !arch_cap_mmio_immune(ia32_cap))
+               setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+-      if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)))
+-              setup_force_cpu_bug(X86_BUG_RETBLEED);
++      if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
++              if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
++                      setup_force_cpu_bug(X86_BUG_RETBLEED);
++      }
+       if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+               return;
diff --git a/queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch b/queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch
new file mode 100644 (file)
index 0000000..8f3f163
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:33 +0200
+Subject: x86/cpufeatures: Move RETPOLINE flags to word 11
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a883d624aed463c84c22596006e5a96f5b44db31 upstream.
+
+In order to extend the RETPOLINE features to 4, move them to word 11
+where there is still room. This mostly keeps DISABLE_RETPOLINE
+simple.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: bits 8 and 9 of word 11 are also free here,
+ so comment them accordingly]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -203,8 +203,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-#define X86_FEATURE_RETPOLINE         ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_LFENCE  ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
++/* FREE!                              ( 7*32+12) */
++/* FREE!                              ( 7*32+13) */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL     ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+@@ -290,6 +290,12 @@
+ #define X86_FEATURE_FENCE_SWAPGS_KERNEL       (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
+ #define X86_FEATURE_PER_THREAD_MBA    (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
++/* FREE!                              (11*32+ 8) */
++/* FREE!                              (11*32+ 9) */
++/* FREE!                              (11*32+10) */
++/* FREE!                              (11*32+11) */
++#define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16               (12*32+ 5) /* AVX512 BFLOAT16 instructions */
diff --git a/queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch b/queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch
new file mode 100644 (file)
index 0000000..cee660a
--- /dev/null
@@ -0,0 +1,355 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:53 +0200
+Subject: x86/entry: Add kernel IBRS implementation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream.
+
+Implement Kernel IBRS - currently the only known option to mitigate RSB
+underflow speculation issues on Skylake hardware.
+
+Note: since IBRS_ENTER requires fuller context established than
+UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET
+itself implies a RET, it must come after IBRS_ENTER. This means
+IBRS_ENTER needs to also move UNTRAIN_RET.
+
+Note 2: KERNEL_IBRS is sub-optimal for XenPV.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict at arch/x86/entry/entry_64.S, skip_r11rcx]
+[cascardo: conflict at arch/x86/entry/entry_64_compat.S]
+[cascardo: conflict fixups, no ANNOTATE_NOENDBR]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h           |   58 +++++++++++++++++++++++++++++++++++++
+ arch/x86/entry/entry_64.S          |   44 ++++++++++++++++++++++++----
+ arch/x86/entry/entry_64_compat.S   |   17 ++++++++--
+ arch/x86/include/asm/cpufeatures.h |    2 -
+ 4 files changed, 111 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -6,6 +6,8 @@
+ #include <asm/percpu.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/processor-flags.h>
++#include <asm/msr.h>
++#include <asm/nospec-branch.h>
+ /*
+@@ -309,6 +311,62 @@ For 32-bit we have the following convent
+ #endif
+ /*
++ * IBRS kernel mitigation for Spectre_v2.
++ *
++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
++ * the regs it uses (AX, CX, DX). Must be called before the first RET
++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
++ *
++ * The optional argument is used to save/restore the current value,
++ * which is used on the paranoid paths.
++ *
++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
++ */
++.macro IBRS_ENTER save_reg
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++      movl    $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++      rdmsr
++      shl     $32, %rdx
++      or      %rdx, %rax
++      mov     %rax, \save_reg
++      test    $SPEC_CTRL_IBRS, %eax
++      jz      .Ldo_wrmsr_\@
++      lfence
++      jmp     .Lend_\@
++.Ldo_wrmsr_\@:
++.endif
++
++      movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++      movl    %edx, %eax
++      shr     $32, %rdx
++      wrmsr
++.Lend_\@:
++.endm
++
++/*
++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
++ * regs. Must be called after the last RET.
++ */
++.macro IBRS_EXIT save_reg
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++      movl    $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++      mov     \save_reg, %rdx
++.else
++      movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++      andl    $(~SPEC_CTRL_IBRS), %edx
++.endif
++
++      movl    %edx, %eax
++      shr     $32, %rdx
++      wrmsr
++.Lend_\@:
++.endm
++
++/*
+  * Mitigate Spectre v1 for conditional swapgs code paths.
+  *
+  * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -102,7 +102,6 @@ SYM_CODE_START(entry_SYSCALL_64)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+-      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER_DS                              /* pt_regs->ss */
+@@ -118,6 +117,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+       /* IRQs are off. */
+       movq    %rax, %rdi
+       movq    %rsp, %rsi
++
++      /* clobbers %rax, make sure it is after saving the syscall nr */
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       call    do_syscall_64           /* returns with IRQs disabled */
+       /*
+@@ -192,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+        * perf profiles. Nothing jumps here.
+        */
+ syscall_return_via_sysret:
++      IBRS_EXIT
+       POP_REGS pop_rdi=0
+       /*
+@@ -569,6 +574,7 @@ __irqentry_text_end:
+ SYM_CODE_START_LOCAL(common_interrupt_return)
+ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
++      IBRS_EXIT
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates user mode. */
+       testb   $3, CS(%rsp)
+@@ -889,6 +895,9 @@ SYM_CODE_END(xen_failsafe_callback)
+  *              1 -> no SWAPGS on exit
+  *
+  *     Y        GSBASE value at entry, must be restored in paranoid_exit
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+  */
+ SYM_CODE_START_LOCAL(paranoid_entry)
+       UNWIND_HINT_FUNC
+@@ -912,7 +921,6 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * be retrieved from a kernel internal table.
+        */
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+-      UNTRAIN_RET
+       /*
+        * Handling GSBASE depends on the availability of FSGSBASE.
+@@ -934,7 +942,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * is needed here.
+        */
+       SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+-      RET
++      jmp .Lparanoid_gsbase_done
+ .Lparanoid_entry_checkgs:
+       /* EBX = 1 -> kernel GSBASE active, no restore required */
+@@ -953,8 +961,16 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+       xorl    %ebx, %ebx
+       swapgs
+ .Lparanoid_kernel_gsbase:
+-
+       FENCE_SWAPGS_KERNEL_ENTRY
++.Lparanoid_gsbase_done:
++
++      /*
++       * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
++       * CR3 above, keep the old value in a callee saved register.
++       */
++      IBRS_ENTER save_reg=%r15
++      UNTRAIN_RET
++
+       RET
+ SYM_CODE_END(paranoid_entry)
+@@ -976,9 +992,19 @@ SYM_CODE_END(paranoid_entry)
+  *              1 -> no SWAPGS on exit
+  *
+  *     Y        User space GSBASE, must be restored unconditionally
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+  */
+ SYM_CODE_START_LOCAL(paranoid_exit)
+       UNWIND_HINT_REGS
++
++      /*
++       * Must restore IBRS state before both CR3 and %GS since we need access
++       * to the per-CPU x86_spec_ctrl_shadow variable.
++       */
++      IBRS_EXIT save_reg=%r15
++
+       /*
+        * The order of operations is important. RESTORE_CR3 requires
+        * kernel GSBASE.
+@@ -1025,9 +1051,11 @@ SYM_CODE_START_LOCAL(error_entry)
+       FENCE_SWAPGS_USER_ENTRY
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      IBRS_ENTER
+       UNTRAIN_RET
+ .Lerror_entry_from_usermode_after_swapgs:
++
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+@@ -1081,6 +1109,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       SWAPGS
+       FENCE_SWAPGS_USER_ENTRY
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      IBRS_ENTER
+       UNTRAIN_RET
+       /*
+@@ -1176,7 +1205,6 @@ SYM_CODE_START(asm_exc_nmi)
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT_IRET_REGS base=%rdx offset=8
+-      UNTRAIN_RET
+       pushq   5*8(%rdx)       /* pt_regs->ss */
+       pushq   4*8(%rdx)       /* pt_regs->rsp */
+       pushq   3*8(%rdx)       /* pt_regs->flags */
+@@ -1187,6 +1215,9 @@ SYM_CODE_START(asm_exc_nmi)
+       PUSH_AND_CLEAR_REGS rdx=(%rdx)
+       ENCODE_FRAME_POINTER
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       /*
+        * At this point we no longer need to worry about stack damage
+        * due to nesting -- we're on the normal thread stack and we're
+@@ -1409,6 +1440,9 @@ end_repeat_nmi:
+       movq    $-1, %rsi
+       call    exc_nmi
++      /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
++      IBRS_EXIT save_reg=%r15
++
+       /* Always restore stashed CR3 value (see paranoid_entry) */
+       RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -4,7 +4,6 @@
+  *
+  * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+  */
+-#include "calling.h"
+ #include <asm/asm-offsets.h>
+ #include <asm/current.h>
+ #include <asm/errno.h>
+@@ -18,6 +17,8 @@
+ #include <linux/linkage.h>
+ #include <linux/err.h>
++#include "calling.h"
++
+       .section .entry.text, "ax"
+ /*
+@@ -72,7 +73,6 @@ SYM_CODE_START(entry_SYSENTER_compat)
+       pushq   $__USER32_CS            /* pt_regs->cs */
+       pushq   $0                      /* pt_regs->ip = 0 (placeholder) */
+ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+-      UNTRAIN_RET
+       /*
+        * User tracing code (ptrace or signal handlers) might assume that
+@@ -114,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_af
+       cld
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       /*
+        * SYSENTER doesn't filter flags, so we need to clear NT and AC
+        * ourselves.  To save a few cycles, we can check whether
+@@ -213,7 +216,6 @@ SYM_CODE_START(entry_SYSCALL_compat)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+-      UNTRAIN_RET
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER32_DS            /* pt_regs->ss */
+@@ -255,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
+       UNWIND_HINT_REGS
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       movq    %rsp, %rdi
+       call    do_fast_syscall_32
+       /* XEN PV guests always use IRET path */
+@@ -269,6 +274,8 @@ sysret32_from_system_call:
+        */
+       STACKLEAK_ERASE
++      IBRS_EXIT
++
+       movq    RBX(%rsp), %rbx         /* pt_regs->rbx */
+       movq    RBP(%rsp), %rbp         /* pt_regs->rbp */
+       movq    EFLAGS(%rsp), %r11      /* pt_regs->flags (in r11) */
+@@ -380,7 +387,6 @@ SYM_CODE_START(entry_INT80_compat)
+       pushq   (%rdi)                  /* pt_regs->di */
+ .Lint80_keep_stack:
+-      UNTRAIN_RET
+       pushq   %rsi                    /* pt_regs->si */
+       xorl    %esi, %esi              /* nospec   si */
+       pushq   %rdx                    /* pt_regs->dx */
+@@ -413,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat)
+       cld
++      IBRS_ENTER
++      UNTRAIN_RET
++
+       movq    %rsp, %rdi
+       call    do_int80_syscall_32
+       jmp     swapgs_restore_regs_and_return_to_usermode
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -203,7 +203,7 @@
+ #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-/* FREE!                              ( 7*32+12) */
++#define X86_FEATURE_KERNEL_IBRS               ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+ /* FREE!                              ( 7*32+13) */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
diff --git a/queue-5.10/x86-entry-remove-skip_r11rcx.patch b/queue-5.10/x86-entry-remove-skip_r11rcx.patch
new file mode 100644 (file)
index 0000000..3a74d23
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 6 May 2022 14:14:35 +0200
+Subject: x86/entry: Remove skip_r11rcx
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream.
+
+Yes, r11 and rcx have been restored previously, but since they're being
+popped anyway (into rsi) might as well pop them into their own regs --
+setting them to the value they already are.
+
+Less magical code.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h  |   10 +---------
+ arch/x86/entry/entry_64.S |    3 +--
+ 2 files changed, 2 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -146,27 +146,19 @@ For 32-bit we have the following convent
+ .endm
+-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
++.macro POP_REGS pop_rdi=1
+       popq %r15
+       popq %r14
+       popq %r13
+       popq %r12
+       popq %rbp
+       popq %rbx
+-      .if \skip_r11rcx
+-      popq %rsi
+-      .else
+       popq %r11
+-      .endif
+       popq %r10
+       popq %r9
+       popq %r8
+       popq %rax
+-      .if \skip_r11rcx
+-      popq %rsi
+-      .else
+       popq %rcx
+-      .endif
+       popq %rdx
+       popq %rsi
+       .if \pop_rdi
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+        * perf profiles. Nothing jumps here.
+        */
+ syscall_return_via_sysret:
+-      /* rcx and r11 are already restored (see code above) */
+-      POP_REGS pop_rdi=0 skip_r11rcx=1
++      POP_REGS pop_rdi=0
+       /*
+        * Now all regs are restored except RSP and RDI.
diff --git a/queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch b/queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch
new file mode 100644 (file)
index 0000000..3f7ebbc
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:40 +0200
+Subject: x86/ftrace: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream.
+
+Use the return thunk in ftrace trampolines, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: still copy return from ftrace_stub]
+[cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -308,7 +308,7 @@ union ftrace_op_code_union {
+       } __attribute__((packed));
+ };
+-#define RET_SIZE              1 + IS_ENABLED(CONFIG_SLS)
++#define RET_SIZE              (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -367,7 +367,10 @@ create_trampoline(struct ftrace_ops *ops
+       /* The trampoline ends with ret(q) */
+       retq = (unsigned long)ftrace_stub;
+-      ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++              memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
++      else
++              ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+       if (WARN_ON(ret < 0))
+               goto fail;
diff --git a/queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch b/queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch
new file mode 100644 (file)
index 0000000..8db1794
--- /dev/null
@@ -0,0 +1,199 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 22 Feb 2021 13:34:40 +0100
+Subject: x86/insn: Add a __ignore_sync_check__ marker
+
+From: Borislav Petkov <bp@suse.de>
+
+commit d30c7b820be5c4777fe6c3b0c21f9d0064251e51 upstream.
+
+Add an explicit __ignore_sync_check__ marker which will be used to mark
+lines which are supposed to be ignored by file synchronization check
+scripts, its advantage being that it explicitly denotes such lines in
+the code.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lkml.kernel.org/r/20210304174237.31945-4-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/inat.h       |    2 +-
+ arch/x86/include/asm/insn.h       |    2 +-
+ arch/x86/lib/inat.c               |    2 +-
+ arch/x86/lib/insn.c               |    6 +++---
+ tools/arch/x86/include/asm/inat.h |    2 +-
+ tools/arch/x86/include/asm/insn.h |    2 +-
+ tools/arch/x86/lib/inat.c         |    2 +-
+ tools/arch/x86/lib/insn.c         |    6 +++---
+ tools/objtool/sync-check.sh       |   17 +++++++++++++----
+ tools/perf/check-headers.sh       |   15 +++++++++++----
+ 10 files changed, 36 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/include/asm/inat.h
++++ b/arch/x86/include/asm/inat.h
+@@ -6,7 +6,7 @@
+  *
+  * Written by Masami Hiramatsu <mhiramat@redhat.com>
+  */
+-#include <asm/inat_types.h>
++#include <asm/inat_types.h> /* __ignore_sync_check__ */
+ /*
+  * Internal bits. Don't use bitmasks directly, because these bits are
+--- a/arch/x86/include/asm/insn.h
++++ b/arch/x86/include/asm/insn.h
+@@ -8,7 +8,7 @@
+  */
+ /* insn_attr_t is defined in inat.h */
+-#include <asm/inat.h>
++#include <asm/inat.h> /* __ignore_sync_check__ */
+ struct insn_field {
+       union {
+--- a/arch/x86/lib/inat.c
++++ b/arch/x86/lib/inat.c
+@@ -4,7 +4,7 @@
+  *
+  * Written by Masami Hiramatsu <mhiramat@redhat.com>
+  */
+-#include <asm/insn.h>
++#include <asm/insn.h> /* __ignore_sync_check__ */
+ /* Attribute tables are generated from opcode map */
+ #include "inat-tables.c"
+--- a/arch/x86/lib/insn.c
++++ b/arch/x86/lib/insn.c
+@@ -10,10 +10,10 @@
+ #else
+ #include <string.h>
+ #endif
+-#include <asm/inat.h>
+-#include <asm/insn.h>
++#include <asm/inat.h> /*__ignore_sync_check__ */
++#include <asm/insn.h> /* __ignore_sync_check__ */
+-#include <asm/emulate_prefix.h>
++#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n)     \
+--- a/tools/arch/x86/include/asm/inat.h
++++ b/tools/arch/x86/include/asm/inat.h
+@@ -6,7 +6,7 @@
+  *
+  * Written by Masami Hiramatsu <mhiramat@redhat.com>
+  */
+-#include "inat_types.h"
++#include "inat_types.h" /* __ignore_sync_check__ */
+ /*
+  * Internal bits. Don't use bitmasks directly, because these bits are
+--- a/tools/arch/x86/include/asm/insn.h
++++ b/tools/arch/x86/include/asm/insn.h
+@@ -8,7 +8,7 @@
+  */
+ /* insn_attr_t is defined in inat.h */
+-#include "inat.h"
++#include "inat.h" /* __ignore_sync_check__ */
+ struct insn_field {
+       union {
+--- a/tools/arch/x86/lib/inat.c
++++ b/tools/arch/x86/lib/inat.c
+@@ -4,7 +4,7 @@
+  *
+  * Written by Masami Hiramatsu <mhiramat@redhat.com>
+  */
+-#include "../include/asm/insn.h"
++#include "../include/asm/insn.h" /* __ignore_sync_check__ */
+ /* Attribute tables are generated from opcode map */
+ #include "inat-tables.c"
+--- a/tools/arch/x86/lib/insn.c
++++ b/tools/arch/x86/lib/insn.c
+@@ -10,10 +10,10 @@
+ #else
+ #include <string.h>
+ #endif
+-#include "../include/asm/inat.h"
+-#include "../include/asm/insn.h"
++#include "../include/asm/inat.h" /* __ignore_sync_check__ */
++#include "../include/asm/insn.h" /* __ignore_sync_check__ */
+-#include "../include/asm/emulate_prefix.h"
++#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n)     \
+--- a/tools/objtool/sync-check.sh
++++ b/tools/objtool/sync-check.sh
+@@ -16,11 +16,14 @@ arch/x86/include/asm/emulate_prefix.h
+ arch/x86/lib/x86-opcode-map.txt
+ arch/x86/tools/gen-insn-attr-x86.awk
+ include/linux/static_call_types.h
+-arch/x86/include/asm/inat.h     -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
+-arch/x86/include/asm/insn.h     -I '^#include [\"<]\(asm/\)*inat.h[\">]'
+-arch/x86/lib/inat.c             -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
+-arch/x86/lib/insn.c             -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
+ "
++
++SYNC_CHECK_FILES='
++arch/x86/include/asm/inat.h
++arch/x86/include/asm/insn.h
++arch/x86/lib/inat.c
++arch/x86/lib/insn.c
++'
+ fi
+ check_2 () {
+@@ -63,3 +66,9 @@ while read -r file_entry; do
+ done <<EOF
+ $FILES
+ EOF
++
++if [ "$SRCARCH" = "x86" ]; then
++      for i in $SYNC_CHECK_FILES; do
++              check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
++      done
++fi
+--- a/tools/perf/check-headers.sh
++++ b/tools/perf/check-headers.sh
+@@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h
+ include/uapi/asm-generic/unistd.h
+ '
++SYNC_CHECK_FILES='
++arch/x86/include/asm/inat.h
++arch/x86/include/asm/insn.h
++arch/x86/lib/inat.c
++arch/x86/lib/insn.c
++'
++
+ # These copies are under tools/perf/trace/beauty/ as they are not used to in
+ # building object files only by scripts in tools/perf/trace/beauty/ to generate
+ # tables that then gets included in .c files for things like id->string syscall
+@@ -129,6 +136,10 @@ for i in $FILES; do
+   check $i -B
+ done
++for i in $SYNC_CHECK_FILES; do
++  check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
++done
++
+ # diff with extra ignore lines
+ check arch/x86/lib/memcpy_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
+ check arch/x86/lib/memset_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
+@@ -137,10 +148,6 @@ check include/uapi/linux/mman.h       '-
+ check include/linux/build_bug.h       '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
+ check include/linux/ctype.h         '-I "isdigit("'
+ check lib/ctype.c                   '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
+-check arch/x86/include/asm/inat.h     '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
+-check arch/x86/include/asm/insn.h     '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
+-check arch/x86/lib/inat.c           '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
+-check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
+ # diff non-symmetric files
+ check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/queue-5.10/x86-insn-add-an-insn_decode-api.patch b/queue-5.10/x86-insn-add-an-insn_decode-api.patch
new file mode 100644 (file)
index 0000000..1e542ed
--- /dev/null
@@ -0,0 +1,965 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 3 Nov 2020 17:28:30 +0100
+Subject: x86/insn: Add an insn_decode() API
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 93281c4a96572a34504244969b938e035204778d upstream.
+
+Users of the instruction decoder should use this to decode instruction
+bytes. For that, have insn*() helpers return an int value to denote
+success/failure. When there's an error fetching the next insn byte and
+the insn falls short, return -ENODATA to denote that.
+
+While at it, make insn_get_opcode() more stricter as to whether what has
+seen so far is a valid insn and if not.
+
+Copy linux/kconfig.h for the tools-version of the decoder so that it can
+use IS_ENABLED().
+
+Also, cast the INSN_MODE_KERN dummy define value to (enum insn_mode)
+for tools use of the decoder because perf tool builds with -Werror and
+errors out with -Werror=sign-compare otherwise.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lkml.kernel.org/r/20210304174237.31945-5-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/insn.h       |   24 ++--
+ arch/x86/lib/insn.c               |  216 +++++++++++++++++++++++++++++-------
+ tools/arch/x86/include/asm/insn.h |   24 ++--
+ tools/arch/x86/lib/insn.c         |  222 +++++++++++++++++++++++++++++---------
+ tools/include/linux/kconfig.h     |   73 ++++++++++++
+ 5 files changed, 452 insertions(+), 107 deletions(-)
+ create mode 100644 tools/include/linux/kconfig.h
+
+--- a/arch/x86/include/asm/insn.h
++++ b/arch/x86/include/asm/insn.h
+@@ -87,13 +87,23 @@ struct insn {
+ #define X86_VEX_M_MAX 0x1f                    /* VEX3.M Maximum value */
+ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+-extern void insn_get_prefixes(struct insn *insn);
+-extern void insn_get_opcode(struct insn *insn);
+-extern void insn_get_modrm(struct insn *insn);
+-extern void insn_get_sib(struct insn *insn);
+-extern void insn_get_displacement(struct insn *insn);
+-extern void insn_get_immediate(struct insn *insn);
+-extern void insn_get_length(struct insn *insn);
++extern int insn_get_prefixes(struct insn *insn);
++extern int insn_get_opcode(struct insn *insn);
++extern int insn_get_modrm(struct insn *insn);
++extern int insn_get_sib(struct insn *insn);
++extern int insn_get_displacement(struct insn *insn);
++extern int insn_get_immediate(struct insn *insn);
++extern int insn_get_length(struct insn *insn);
++
++enum insn_mode {
++      INSN_MODE_32,
++      INSN_MODE_64,
++      /* Mode is determined by the current kernel build. */
++      INSN_MODE_KERN,
++      INSN_NUM_MODES,
++};
++
++extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+--- a/arch/x86/lib/insn.c
++++ b/arch/x86/lib/insn.c
+@@ -13,6 +13,9 @@
+ #include <asm/inat.h> /*__ignore_sync_check__ */
+ #include <asm/insn.h> /* __ignore_sync_check__ */
++#include <linux/errno.h>
++#include <linux/kconfig.h>
++
+ #include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru
+  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+  * to point to the (first) opcode.  No effect if @insn->prefixes.got
+  * is already set.
++ *
++ * * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_prefixes(struct insn *insn)
++int insn_get_prefixes(struct insn *insn)
+ {
+       struct insn_field *prefixes = &insn->prefixes;
+       insn_attr_t attr;
+@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn
+       int i, nb;
+       if (prefixes->got)
+-              return;
++              return 0;
+       insn_get_emulate_prefix(insn);
+@@ -217,8 +224,10 @@ vex_end:
+       prefixes->got = 1;
++      return 0;
++
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /**
+@@ -230,16 +239,25 @@ err_out:
+  * If necessary, first collects any preceding (prefix) bytes.
+  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
+  * is already 1.
++ *
++ * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_opcode(struct insn *insn)
++int insn_get_opcode(struct insn *insn)
+ {
+       struct insn_field *opcode = &insn->opcode;
++      int pfx_id, ret;
+       insn_byte_t op;
+-      int pfx_id;
++
+       if (opcode->got)
+-              return;
+-      if (!insn->prefixes.got)
+-              insn_get_prefixes(insn);
++              return 0;
++
++      if (!insn->prefixes.got) {
++              ret = insn_get_prefixes(insn);
++              if (ret)
++                      return ret;
++      }
+       /* Get first opcode */
+       op = get_next(insn_byte_t, insn);
+@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn)
+               insn->attr = inat_get_avx_attribute(op, m, p);
+               if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+                   (!inat_accept_vex(insn->attr) &&
+-                   !inat_is_group(insn->attr)))
+-                      insn->attr = 0; /* This instruction is bad */
+-              goto end;       /* VEX has only 1 byte for opcode */
++                   !inat_is_group(insn->attr))) {
++                      /* This instruction is bad */
++                      insn->attr = 0;
++                      return -EINVAL;
++              }
++              /* VEX has only 1 byte for opcode */
++              goto end;
+       }
+       insn->attr = inat_get_opcode_attribute(op);
+@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn)
+               pfx_id = insn_last_prefix_id(insn);
+               insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+       }
+-      if (inat_must_vex(insn->attr))
+-              insn->attr = 0; /* This instruction is bad */
++
++      if (inat_must_vex(insn->attr)) {
++              /* This instruction is bad */
++              insn->attr = 0;
++              return -EINVAL;
++      }
+ end:
+       opcode->got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /**
+@@ -283,15 +310,25 @@ err_out:
+  * Populates @insn->modrm and updates @insn->next_byte to point past the
+  * ModRM byte, if any.  If necessary, first collects the preceding bytes
+  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
++ *
++ * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_modrm(struct insn *insn)
++int insn_get_modrm(struct insn *insn)
+ {
+       struct insn_field *modrm = &insn->modrm;
+       insn_byte_t pfx_id, mod;
++      int ret;
++
+       if (modrm->got)
+-              return;
+-      if (!insn->opcode.got)
+-              insn_get_opcode(insn);
++              return 0;
++
++      if (!insn->opcode.got) {
++              ret = insn_get_opcode(insn);
++              if (ret)
++                      return ret;
++      }
+       if (inat_has_modrm(insn->attr)) {
+               mod = get_next(insn_byte_t, insn);
+@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn)
+                       pfx_id = insn_last_prefix_id(insn);
+                       insn->attr = inat_get_group_attribute(mod, pfx_id,
+                                                             insn->attr);
+-                      if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+-                              insn->attr = 0; /* This is bad */
++                      if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
++                              /* Bad insn */
++                              insn->attr = 0;
++                              return -EINVAL;
++                      }
+               }
+       }
+       if (insn->x86_64 && inat_is_force64(insn->attr))
+               insn->opnd_bytes = 8;
++
+       modrm->got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+@@ -325,11 +367,16 @@ err_out:
+ int insn_rip_relative(struct insn *insn)
+ {
+       struct insn_field *modrm = &insn->modrm;
++      int ret;
+       if (!insn->x86_64)
+               return 0;
+-      if (!modrm->got)
+-              insn_get_modrm(insn);
++
++      if (!modrm->got) {
++              ret = insn_get_modrm(insn);
++              if (ret)
++                      return 0;
++      }
+       /*
+        * For rip-relative instructions, the mod field (top 2 bits)
+        * is zero and the r/m field (bottom 3 bits) is 0x5.
+@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn)
+  *
+  * If necessary, first collects the instruction up to and including the
+  * ModRM byte.
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+  */
+-void insn_get_sib(struct insn *insn)
++int insn_get_sib(struct insn *insn)
+ {
+       insn_byte_t modrm;
++      int ret;
+       if (insn->sib.got)
+-              return;
+-      if (!insn->modrm.got)
+-              insn_get_modrm(insn);
++              return 0;
++
++      if (!insn->modrm.got) {
++              ret = insn_get_modrm(insn);
++              if (ret)
++                      return ret;
++      }
++
+       if (insn->modrm.nbytes) {
+               modrm = (insn_byte_t)insn->modrm.value;
+               if (insn->addr_bytes != 2 &&
+@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn)
+       }
+       insn->sib.got = 1;
++      return 0;
++
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+@@ -374,15 +433,25 @@ err_out:
+  * If necessary, first collects the instruction up to and including the
+  * SIB byte.
+  * Displacement value is sign-expanded.
++ *
++ * * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+  */
+-void insn_get_displacement(struct insn *insn)
++int insn_get_displacement(struct insn *insn)
+ {
+       insn_byte_t mod, rm, base;
++      int ret;
+       if (insn->displacement.got)
+-              return;
+-      if (!insn->sib.got)
+-              insn_get_sib(insn);
++              return 0;
++
++      if (!insn->sib.got) {
++              ret = insn_get_sib(insn);
++              if (ret)
++                      return ret;
++      }
++
+       if (insn->modrm.nbytes) {
+               /*
+                * Interpreting the modrm byte:
+@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *
+       }
+ out:
+       insn->displacement.got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /* Decode moffset16/32/64. Return 0 if failed */
+@@ -538,20 +608,30 @@ err_out:
+ }
+ /**
+- * insn_get_immediate() - Get the immediates of instruction
++ * insn_get_immediate() - Get the immediate in an instruction
+  * @insn:     &struct insn containing instruction
+  *
+  * If necessary, first collects the instruction up to and including the
+  * displacement bytes.
+  * Basically, most of immediates are sign-expanded. Unsigned-value can be
+- * get by bit masking with ((1 << (nbytes * 8)) - 1)
++ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
++ *
++ * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_immediate(struct insn *insn)
++int insn_get_immediate(struct insn *insn)
+ {
++      int ret;
++
+       if (insn->immediate.got)
+-              return;
+-      if (!insn->displacement.got)
+-              insn_get_displacement(insn);
++              return 0;
++
++      if (!insn->displacement.got) {
++              ret = insn_get_displacement(insn);
++              if (ret)
++                      return ret;
++      }
+       if (inat_has_moffset(insn->attr)) {
+               if (!__get_moffset(insn))
+@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins
+       }
+ done:
+       insn->immediate.got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /**
+@@ -615,13 +696,58 @@ err_out:
+  *
+  * If necessary, first collects the instruction up to and including the
+  * immediates bytes.
+- */
+-void insn_get_length(struct insn *insn)
++ *
++ * Returns:
++ *  - 0 on success
++ *  - < 0 on error
++*/
++int insn_get_length(struct insn *insn)
+ {
++      int ret;
++
+       if (insn->length)
+-              return;
+-      if (!insn->immediate.got)
+-              insn_get_immediate(insn);
++              return 0;
++
++      if (!insn->immediate.got) {
++              ret = insn_get_immediate(insn);
++              if (ret)
++                      return ret;
++      }
++
+       insn->length = (unsigned char)((unsigned long)insn->next_byte
+                                    - (unsigned long)insn->kaddr);
++
++      return 0;
++}
++
++/**
++ * insn_decode() - Decode an x86 instruction
++ * @insn:     &struct insn to be initialized
++ * @kaddr:    address (in kernel memory) of instruction (or copy thereof)
++ * @buf_len:  length of the insn buffer at @kaddr
++ * @m:                insn mode, see enum insn_mode
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
++ */
++int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
++{
++      int ret;
++
++/* #define INSN_MODE_KERN     -1 __ignore_sync_check__ mode is only valid in the kernel */
++
++      if (m == INSN_MODE_KERN)
++              insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
++      else
++              insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
++
++      ret = insn_get_length(insn);
++      if (ret)
++              return ret;
++
++      if (insn_complete(insn))
++              return 0;
++
++      return -EINVAL;
+ }
+--- a/tools/arch/x86/include/asm/insn.h
++++ b/tools/arch/x86/include/asm/insn.h
+@@ -87,13 +87,23 @@ struct insn {
+ #define X86_VEX_M_MAX 0x1f                    /* VEX3.M Maximum value */
+ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+-extern void insn_get_prefixes(struct insn *insn);
+-extern void insn_get_opcode(struct insn *insn);
+-extern void insn_get_modrm(struct insn *insn);
+-extern void insn_get_sib(struct insn *insn);
+-extern void insn_get_displacement(struct insn *insn);
+-extern void insn_get_immediate(struct insn *insn);
+-extern void insn_get_length(struct insn *insn);
++extern int insn_get_prefixes(struct insn *insn);
++extern int insn_get_opcode(struct insn *insn);
++extern int insn_get_modrm(struct insn *insn);
++extern int insn_get_sib(struct insn *insn);
++extern int insn_get_displacement(struct insn *insn);
++extern int insn_get_immediate(struct insn *insn);
++extern int insn_get_length(struct insn *insn);
++
++enum insn_mode {
++      INSN_MODE_32,
++      INSN_MODE_64,
++      /* Mode is determined by the current kernel build. */
++      INSN_MODE_KERN,
++      INSN_NUM_MODES,
++};
++
++extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+--- a/tools/arch/x86/lib/insn.c
++++ b/tools/arch/x86/lib/insn.c
+@@ -10,10 +10,13 @@
+ #else
+ #include <string.h>
+ #endif
+-#include "../include/asm/inat.h" /* __ignore_sync_check__ */
+-#include "../include/asm/insn.h" /* __ignore_sync_check__ */
++#include <asm/inat.h> /* __ignore_sync_check__ */
++#include <asm/insn.h> /* __ignore_sync_check__ */
+-#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
++#include <linux/errno.h>
++#include <linux/kconfig.h>
++
++#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n)     \
+@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru
+  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+  * to point to the (first) opcode.  No effect if @insn->prefixes.got
+  * is already set.
++ *
++ * * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_prefixes(struct insn *insn)
++int insn_get_prefixes(struct insn *insn)
+ {
+       struct insn_field *prefixes = &insn->prefixes;
+       insn_attr_t attr;
+@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn
+       int i, nb;
+       if (prefixes->got)
+-              return;
++              return 0;
+       insn_get_emulate_prefix(insn);
+@@ -217,8 +224,10 @@ vex_end:
+       prefixes->got = 1;
++      return 0;
++
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /**
+@@ -230,16 +239,25 @@ err_out:
+  * If necessary, first collects any preceding (prefix) bytes.
+  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
+  * is already 1.
++ *
++ * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_opcode(struct insn *insn)
++int insn_get_opcode(struct insn *insn)
+ {
+       struct insn_field *opcode = &insn->opcode;
++      int pfx_id, ret;
+       insn_byte_t op;
+-      int pfx_id;
++
+       if (opcode->got)
+-              return;
+-      if (!insn->prefixes.got)
+-              insn_get_prefixes(insn);
++              return 0;
++
++      if (!insn->prefixes.got) {
++              ret = insn_get_prefixes(insn);
++              if (ret)
++                      return ret;
++      }
+       /* Get first opcode */
+       op = get_next(insn_byte_t, insn);
+@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn)
+               insn->attr = inat_get_avx_attribute(op, m, p);
+               if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+                   (!inat_accept_vex(insn->attr) &&
+-                   !inat_is_group(insn->attr)))
+-                      insn->attr = 0; /* This instruction is bad */
+-              goto end;       /* VEX has only 1 byte for opcode */
++                   !inat_is_group(insn->attr))) {
++                      /* This instruction is bad */
++                      insn->attr = 0;
++                      return -EINVAL;
++              }
++              /* VEX has only 1 byte for opcode */
++              goto end;
+       }
+       insn->attr = inat_get_opcode_attribute(op);
+@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn)
+               pfx_id = insn_last_prefix_id(insn);
+               insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+       }
+-      if (inat_must_vex(insn->attr))
+-              insn->attr = 0; /* This instruction is bad */
++
++      if (inat_must_vex(insn->attr)) {
++              /* This instruction is bad */
++              insn->attr = 0;
++              return -EINVAL;
++      }
+ end:
+       opcode->got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /**
+@@ -283,15 +310,25 @@ err_out:
+  * Populates @insn->modrm and updates @insn->next_byte to point past the
+  * ModRM byte, if any.  If necessary, first collects the preceding bytes
+  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
++ *
++ * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_modrm(struct insn *insn)
++int insn_get_modrm(struct insn *insn)
+ {
+       struct insn_field *modrm = &insn->modrm;
+       insn_byte_t pfx_id, mod;
++      int ret;
++
+       if (modrm->got)
+-              return;
+-      if (!insn->opcode.got)
+-              insn_get_opcode(insn);
++              return 0;
++
++      if (!insn->opcode.got) {
++              ret = insn_get_opcode(insn);
++              if (ret)
++                      return ret;
++      }
+       if (inat_has_modrm(insn->attr)) {
+               mod = get_next(insn_byte_t, insn);
+@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn)
+                       pfx_id = insn_last_prefix_id(insn);
+                       insn->attr = inat_get_group_attribute(mod, pfx_id,
+                                                             insn->attr);
+-                      if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+-                              insn->attr = 0; /* This is bad */
++                      if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
++                              /* Bad insn */
++                              insn->attr = 0;
++                              return -EINVAL;
++                      }
+               }
+       }
+       if (insn->x86_64 && inat_is_force64(insn->attr))
+               insn->opnd_bytes = 8;
++
+       modrm->got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+@@ -325,11 +367,16 @@ err_out:
+ int insn_rip_relative(struct insn *insn)
+ {
+       struct insn_field *modrm = &insn->modrm;
++      int ret;
+       if (!insn->x86_64)
+               return 0;
+-      if (!modrm->got)
+-              insn_get_modrm(insn);
++
++      if (!modrm->got) {
++              ret = insn_get_modrm(insn);
++              if (ret)
++                      return 0;
++      }
+       /*
+        * For rip-relative instructions, the mod field (top 2 bits)
+        * is zero and the r/m field (bottom 3 bits) is 0x5.
+@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn)
+  *
+  * If necessary, first collects the instruction up to and including the
+  * ModRM byte.
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+  */
+-void insn_get_sib(struct insn *insn)
++int insn_get_sib(struct insn *insn)
+ {
+       insn_byte_t modrm;
++      int ret;
+       if (insn->sib.got)
+-              return;
+-      if (!insn->modrm.got)
+-              insn_get_modrm(insn);
++              return 0;
++
++      if (!insn->modrm.got) {
++              ret = insn_get_modrm(insn);
++              if (ret)
++                      return ret;
++      }
++
+       if (insn->modrm.nbytes) {
+               modrm = (insn_byte_t)insn->modrm.value;
+               if (insn->addr_bytes != 2 &&
+@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn)
+       }
+       insn->sib.got = 1;
++      return 0;
++
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+@@ -374,15 +433,25 @@ err_out:
+  * If necessary, first collects the instruction up to and including the
+  * SIB byte.
+  * Displacement value is sign-expanded.
++ *
++ * * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+  */
+-void insn_get_displacement(struct insn *insn)
++int insn_get_displacement(struct insn *insn)
+ {
+       insn_byte_t mod, rm, base;
++      int ret;
+       if (insn->displacement.got)
+-              return;
+-      if (!insn->sib.got)
+-              insn_get_sib(insn);
++              return 0;
++
++      if (!insn->sib.got) {
++              ret = insn_get_sib(insn);
++              if (ret)
++                      return ret;
++      }
++
+       if (insn->modrm.nbytes) {
+               /*
+                * Interpreting the modrm byte:
+@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *
+       }
+ out:
+       insn->displacement.got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /* Decode moffset16/32/64. Return 0 if failed */
+@@ -538,20 +608,30 @@ err_out:
+ }
+ /**
+- * insn_get_immediate() - Get the immediates of instruction
++ * insn_get_immediate() - Get the immediate in an instruction
+  * @insn:     &struct insn containing instruction
+  *
+  * If necessary, first collects the instruction up to and including the
+  * displacement bytes.
+  * Basically, most of immediates are sign-expanded. Unsigned-value can be
+- * get by bit masking with ((1 << (nbytes * 8)) - 1)
++ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
++ *
++ * Returns:
++ * 0:  on success
++ * < 0: on error
+  */
+-void insn_get_immediate(struct insn *insn)
++int insn_get_immediate(struct insn *insn)
+ {
++      int ret;
++
+       if (insn->immediate.got)
+-              return;
+-      if (!insn->displacement.got)
+-              insn_get_displacement(insn);
++              return 0;
++
++      if (!insn->displacement.got) {
++              ret = insn_get_displacement(insn);
++              if (ret)
++                      return ret;
++      }
+       if (inat_has_moffset(insn->attr)) {
+               if (!__get_moffset(insn))
+@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins
+       }
+ done:
+       insn->immediate.got = 1;
++      return 0;
+ err_out:
+-      return;
++      return -ENODATA;
+ }
+ /**
+@@ -615,13 +696,58 @@ err_out:
+  *
+  * If necessary, first collects the instruction up to and including the
+  * immediates bytes.
+- */
+-void insn_get_length(struct insn *insn)
++ *
++ * Returns:
++ *  - 0 on success
++ *  - < 0 on error
++*/
++int insn_get_length(struct insn *insn)
+ {
++      int ret;
++
+       if (insn->length)
+-              return;
+-      if (!insn->immediate.got)
+-              insn_get_immediate(insn);
++              return 0;
++
++      if (!insn->immediate.got) {
++              ret = insn_get_immediate(insn);
++              if (ret)
++                      return ret;
++      }
++
+       insn->length = (unsigned char)((unsigned long)insn->next_byte
+                                    - (unsigned long)insn->kaddr);
++
++      return 0;
++}
++
++/**
++ * insn_decode() - Decode an x86 instruction
++ * @insn:     &struct insn to be initialized
++ * @kaddr:    address (in kernel memory) of instruction (or copy thereof)
++ * @buf_len:  length of the insn buffer at @kaddr
++ * @m:                insn mode, see enum insn_mode
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
++ */
++int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
++{
++      int ret;
++
++#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */
++
++      if (m == INSN_MODE_KERN)
++              insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
++      else
++              insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
++
++      ret = insn_get_length(insn);
++      if (ret)
++              return ret;
++
++      if (insn_complete(insn))
++              return 0;
++
++      return -EINVAL;
+ }
+--- /dev/null
++++ b/tools/include/linux/kconfig.h
+@@ -0,0 +1,73 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _TOOLS_LINUX_KCONFIG_H
++#define _TOOLS_LINUX_KCONFIG_H
++
++/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */
++
++#ifdef CONFIG_CPU_BIG_ENDIAN
++#define __BIG_ENDIAN 4321
++#else
++#define __LITTLE_ENDIAN 1234
++#endif
++
++#define __ARG_PLACEHOLDER_1 0,
++#define __take_second_arg(__ignored, val, ...) val
++
++/*
++ * The use of "&&" / "||" is limited in certain expressions.
++ * The following enable to calculate "and" / "or" with macro expansion only.
++ */
++#define __and(x, y)                   ___and(x, y)
++#define ___and(x, y)                  ____and(__ARG_PLACEHOLDER_##x, y)
++#define ____and(arg1_or_junk, y)      __take_second_arg(arg1_or_junk y, 0)
++
++#define __or(x, y)                    ___or(x, y)
++#define ___or(x, y)                   ____or(__ARG_PLACEHOLDER_##x, y)
++#define ____or(arg1_or_junk, y)               __take_second_arg(arg1_or_junk 1, y)
++
++/*
++ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
++ * these only work with boolean and tristate options.
++ */
++
++/*
++ * Getting something that works in C and CPP for an arg that may or may
++ * not be defined is tricky.  Here, if we have "#define CONFIG_BOOGER 1"
++ * we match on the placeholder define, insert the "0," for arg1 and generate
++ * the triplet (0, 1, 0).  Then the last step cherry picks the 2nd arg (a one).
++ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
++ * the last step cherry picks the 2nd arg, we get a zero.
++ */
++#define __is_defined(x)                       ___is_defined(x)
++#define ___is_defined(val)            ____is_defined(__ARG_PLACEHOLDER_##val)
++#define ____is_defined(arg1_or_junk)  __take_second_arg(arg1_or_junk 1, 0)
++
++/*
++ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
++ * otherwise. For boolean options, this is equivalent to
++ * IS_ENABLED(CONFIG_FOO).
++ */
++#define IS_BUILTIN(option) __is_defined(option)
++
++/*
++ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
++ * otherwise.
++ */
++#define IS_MODULE(option) __is_defined(option##_MODULE)
++
++/*
++ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
++ * code can call a function defined in code compiled based on CONFIG_FOO.
++ * This is similar to IS_ENABLED(), but returns false when invoked from
++ * built-in code when CONFIG_FOO is set to 'm'.
++ */
++#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \
++                              __and(IS_MODULE(option), __is_defined(MODULE)))
++
++/*
++ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
++ * 0 otherwise.
++ */
++#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
++
++#endif /* _TOOLS_LINUX_KCONFIG_H */
diff --git a/queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch b/queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch
new file mode 100644 (file)
index 0000000..49437d6
--- /dev/null
@@ -0,0 +1,117 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Thu, 19 Nov 2020 19:20:18 +0100
+Subject: x86/insn-eval: Handle return values from the decoder
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 6e8c83d2a3afbfd5ee019ec720b75a42df515caa upstream.
+
+Now that the different instruction-inspecting functions return a value,
+test that and return early from callers if error has been encountered.
+
+While at it, do not call insn_get_modrm() when calling
+insn_get_displacement() because latter will make sure to call
+insn_get_modrm() if ModRM hasn't been parsed yet.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210304174237.31945-6-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/insn-eval.c |   34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/lib/insn-eval.c
++++ b/arch/x86/lib/insn-eval.c
+@@ -928,10 +928,11 @@ static int get_seg_base_limit(struct ins
+ static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs,
+                           int *regoff, long *eff_addr)
+ {
+-      insn_get_modrm(insn);
++      int ret;
+-      if (!insn->modrm.nbytes)
+-              return -EINVAL;
++      ret = insn_get_modrm(insn);
++      if (ret)
++              return ret;
+       if (X86_MODRM_MOD(insn->modrm.value) != 3)
+               return -EINVAL;
+@@ -977,14 +978,14 @@ static int get_eff_addr_modrm(struct ins
+                             int *regoff, long *eff_addr)
+ {
+       long tmp;
++      int ret;
+       if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
+               return -EINVAL;
+-      insn_get_modrm(insn);
+-
+-      if (!insn->modrm.nbytes)
+-              return -EINVAL;
++      ret = insn_get_modrm(insn);
++      if (ret)
++              return ret;
+       if (X86_MODRM_MOD(insn->modrm.value) > 2)
+               return -EINVAL;
+@@ -1106,18 +1107,21 @@ static int get_eff_addr_modrm_16(struct
+  * @base_offset will have a register, as an offset from the base of pt_regs,
+  * that can be used to resolve the associated segment.
+  *
+- * -EINVAL on error.
++ * Negative value on error.
+  */
+ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
+                           int *base_offset, long *eff_addr)
+ {
+       long base, indx;
+       int indx_offset;
++      int ret;
+       if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
+               return -EINVAL;
+-      insn_get_modrm(insn);
++      ret = insn_get_modrm(insn);
++      if (ret)
++              return ret;
+       if (!insn->modrm.nbytes)
+               return -EINVAL;
+@@ -1125,7 +1129,9 @@ static int get_eff_addr_sib(struct insn
+       if (X86_MODRM_MOD(insn->modrm.value) > 2)
+               return -EINVAL;
+-      insn_get_sib(insn);
++      ret = insn_get_sib(insn);
++      if (ret)
++              return ret;
+       if (!insn->sib.nbytes)
+               return -EINVAL;
+@@ -1194,8 +1200,8 @@ static void __user *get_addr_ref_16(stru
+       short eff_addr;
+       long tmp;
+-      insn_get_modrm(insn);
+-      insn_get_displacement(insn);
++      if (insn_get_displacement(insn))
++              goto out;
+       if (insn->addr_bytes != 2)
+               goto out;
+@@ -1529,7 +1535,9 @@ bool insn_decode_from_regs(struct insn *
+       insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
+       insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
+-      insn_get_length(insn);
++      if (insn_get_length(insn))
++              return false;
++
+       if (buf_size < insn->length)
+               return false;
diff --git a/queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch b/queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch
new file mode 100644 (file)
index 0000000..a5dde9d
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 2 Nov 2020 18:47:34 +0100
+Subject: x86/insn: Rename insn_decode() to insn_decode_from_regs()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 9e761296c52dcdb1aaa151b65bd39accb05740d9 upstream.
+
+Rename insn_decode() to insn_decode_from_regs() to denote that it
+receives regs as param and uses registers from there during decoding.
+Free the former name for a more generic version of the function.
+
+No functional changes.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210304174237.31945-2-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/insn-eval.h |    4 ++--
+ arch/x86/kernel/sev-es.c         |    2 +-
+ arch/x86/kernel/umip.c           |    2 +-
+ arch/x86/lib/insn-eval.c         |    6 +++---
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/insn-eval.h
++++ b/arch/x86/include/asm/insn-eval.h
+@@ -26,7 +26,7 @@ int insn_fetch_from_user(struct pt_regs
+                        unsigned char buf[MAX_INSN_SIZE]);
+ int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+                                 unsigned char buf[MAX_INSN_SIZE]);
+-bool insn_decode(struct insn *insn, struct pt_regs *regs,
+-               unsigned char buf[MAX_INSN_SIZE], int buf_size);
++bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
++                         unsigned char buf[MAX_INSN_SIZE], int buf_size);
+ #endif /* _ASM_X86_INSN_EVAL_H */
+--- a/arch/x86/kernel/sev-es.c
++++ b/arch/x86/kernel/sev-es.c
+@@ -236,7 +236,7 @@ static enum es_result vc_decode_insn(str
+                       return ES_EXCEPTION;
+               }
+-              if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
++              if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+                       return ES_DECODE_FAILED;
+       } else {
+               res = vc_fetch_insn_kernel(ctxt, buffer);
+--- a/arch/x86/kernel/umip.c
++++ b/arch/x86/kernel/umip.c
+@@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs
+       if (!nr_copied)
+               return false;
+-      if (!insn_decode(&insn, regs, buf, nr_copied))
++      if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
+               return false;
+       umip_inst = identify_insn(&insn);
+--- a/arch/x86/lib/insn-eval.c
++++ b/arch/x86/lib/insn-eval.c
+@@ -1492,7 +1492,7 @@ int insn_fetch_from_user_inatomic(struct
+ }
+ /**
+- * insn_decode() - Decode an instruction
++ * insn_decode_from_regs() - Decode an instruction
+  * @insn:     Structure to store decoded instruction
+  * @regs:     Structure with register values as seen when entering kernel mode
+  * @buf:      Buffer containing the instruction bytes
+@@ -1505,8 +1505,8 @@ int insn_fetch_from_user_inatomic(struct
+  *
+  * True if instruction was decoded, False otherwise.
+  */
+-bool insn_decode(struct insn *insn, struct pt_regs *regs,
+-               unsigned char buf[MAX_INSN_SIZE], int buf_size)
++bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
++                         unsigned char buf[MAX_INSN_SIZE], int buf_size)
+ {
+       int seg_defs;
diff --git a/queue-5.10/x86-kexec-disable-ret-on-kexec.patch b/queue-5.10/x86-kexec-disable-ret-on-kexec.patch
new file mode 100644 (file)
index 0000000..d8a1b04
--- /dev/null
@@ -0,0 +1,173 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Fri, 8 Jul 2022 19:10:11 +0200
+Subject: x86/kexec: Disable RET on kexec
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream.
+
+All the invocations unroll to __x86_return_thunk and this file
+must be PIC independent.
+
+This fixes kexec on 64-bit AMD boxes.
+
+  [ bp: Fix 32-bit build. ]
+
+Reported-by: Edward Tran <edward.tran@oracle.com>
+Reported-by: Awais Tanveer <awais.tanveer@oracle.com>
+Suggested-by: Ankur Arora <ankur.a.arora@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/relocate_kernel_32.S |   25 +++++++++++++++++++------
+ arch/x86/kernel/relocate_kernel_64.S |   23 +++++++++++++++++------
+ 2 files changed, 36 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kernel/relocate_kernel_32.S
++++ b/arch/x86/kernel/relocate_kernel_32.S
+@@ -7,10 +7,12 @@
+ #include <linux/linkage.h>
+ #include <asm/page_types.h>
+ #include <asm/kexec.h>
++#include <asm/nospec-branch.h>
+ #include <asm/processor-flags.h>
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+  */
+ #define PTR(x) (x << 2)
+@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+       movl    %edi, %eax
+       addl    $(identity_mapped - relocate_kernel), %eax
+       pushl   %eax
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(relocate_kernel)
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       xorl    %edx, %edx
+       xorl    %esi, %esi
+       xorl    %ebp, %ebp
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ 1:
+       popl    %edx
+       movl    CP_PA_SWAP_PAGE(%edi), %esp
+       addl    $PAGE_SIZE, %esp
+ 2:
++      ANNOTATE_RETPOLINE_SAFE
+       call    *%edx
+       /* get the re-entry point of the peer system */
+@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       movl    %edi, %eax
+       addl    $(virtual_mapped - relocate_kernel), %eax
+       pushl   %eax
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(identity_mapped)
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(virtual_mapped)
+       /* Do the copies */
+@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(swap_pages)
+       .globl kexec_control_code_size
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -13,7 +13,8 @@
+ #include <asm/unwind_hints.h>
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+  */
+ #define PTR(x) (x << 3)
+@@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+       /* jump to identity mapped page */
+       addq    $(identity_mapped - relocate_kernel), %r8
+       pushq   %r8
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(relocate_kernel)
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       xorl    %r14d, %r14d
+       xorl    %r15d, %r15d
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ 1:
+       popq    %rdx
+@@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       call    swap_pages
+       movq    $virtual_mapped, %rax
+       pushq   %rax
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(identity_mapped)
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       popq    %r12
+       popq    %rbp
+       popq    %rbx
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(virtual_mapped)
+       /* Do the copies */
+@@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+       lea     PAGE_SIZE(%rax), %rsi
+       jmp     0b
+ 3:
+-      RET
++      ANNOTATE_UNRET_SAFE
++      ret
++      int3
+ SYM_CODE_END(swap_pages)
+       .globl kexec_control_code_size
diff --git a/queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch b/queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch
new file mode 100644 (file)
index 0000000..a0c98f6
--- /dev/null
@@ -0,0 +1,99 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:42 +0200
+Subject: x86/kvm: Fix SETcc emulation for return thunks
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream.
+
+Prepare the SETcc fastop stuff for when RET can be larger still.
+
+The tricky bit here is that the expressions should not only be
+constant C expressions, but also absolute GAS expressions. This means
+no ?: and 'true' is ~0.
+
+Also ensure em_setcc() has the same alignment as the actual FOP_SETCC()
+ops, this ensures there cannot be an alignment hole between em_setcc()
+and the first op.
+
+Additionally, add a .skip directive to the FOP_SETCC() macro to fill
+any remaining space with INT3 traps; however the primary purpose of
+this directive is to generate AS warnings when the remaining space
+goes negative. Which is a very good indication the alignment magic
+went side-ways.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: ignore ENDBR when computing SETCC_LENGTH]
+[cascardo: conflict fixup]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -322,13 +322,15 @@ static int fastop(struct x86_emulate_ctx
+ #define FOP_RET(name) \
+       __FOP_RET(#name)
+-#define FOP_START(op) \
++#define __FOP_START(op, align) \
+       extern void em_##op(struct fastop *fake); \
+       asm(".pushsection .text, \"ax\" \n\t" \
+           ".global em_" #op " \n\t" \
+-          ".align " __stringify(FASTOP_SIZE) " \n\t" \
++          ".align " __stringify(align) " \n\t" \
+           "em_" #op ":\n\t"
++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
++
+ #define FOP_END \
+           ".popsection")
+@@ -432,15 +434,14 @@ static int fastop(struct x86_emulate_ctx
+ /*
+  * Depending on .config the SETcc functions look like:
+  *
+- * SETcc %al   [3 bytes]
+- * RET         [1 byte]
+- * INT3        [1 byte; CONFIG_SLS]
+- *
+- * Which gives possible sizes 4 or 5.  When rounded up to the
+- * next power-of-two alignment they become 4 or 8.
++ * SETcc %al                  [3 bytes]
++ * RET | JMP __x86_return_thunk       [1,5 bytes; CONFIG_RETPOLINE]
++ * INT3                               [1 byte; CONFIG_SLS]
+  */
+-#define SETCC_LENGTH  (4 + IS_ENABLED(CONFIG_SLS))
+-#define SETCC_ALIGN   (4 << IS_ENABLED(CONFIG_SLS))
++#define RET_LENGTH    (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
++                       IS_ENABLED(CONFIG_SLS))
++#define SETCC_LENGTH  (3 + RET_LENGTH)
++#define SETCC_ALIGN   (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
+ static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+ #define FOP_SETCC(op) \
+@@ -448,14 +449,15 @@ static_assert(SETCC_LENGTH <= SETCC_ALIG
+       ".type " #op ", @function \n\t" \
+       #op ": \n\t" \
+       #op " %al \n\t" \
+-      __FOP_RET(#op)
++      __FOP_RET(#op) \
++      ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
+ asm(".pushsection .fixup, \"ax\"\n"
+     ".global kvm_fastop_exception \n"
+     "kvm_fastop_exception: xor %esi, %esi; " ASM_RET
+     ".popsection");
+-FOP_START(setcc)
++__FOP_START(setcc, SETCC_ALIGN)
+ FOP_SETCC(seto)
+ FOP_SETCC(setno)
+ FOP_SETCC(setc)
diff --git a/queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch b/queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch
new file mode 100644 (file)
index 0000000..bfe019c
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:32 +0200
+Subject: x86/kvm/vmx: Make noinstr clean
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream.
+
+The recent mmio_stale_data fixes broke the noinstr constraints:
+
+  vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section
+  vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section
+
+make it all happy again.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c   |    6 +++---
+ arch/x86/kvm/x86.c       |    4 ++--
+ include/linux/kvm_host.h |    2 +-
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -380,9 +380,9 @@ static __always_inline void vmx_disable_
+       if (!vmx->disable_fb_clear)
+               return;
+-      rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++      msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
+       msr |= FB_CLEAR_DIS;
+-      wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++      native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+       /* Cache the MSR value to avoid reading it later */
+       vmx->msr_ia32_mcu_opt_ctrl = msr;
+ }
+@@ -393,7 +393,7 @@ static __always_inline void vmx_enable_f
+               return;
+       vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+-      wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
++      native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ }
+ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -11171,9 +11171,9 @@ void kvm_arch_end_assignment(struct kvm
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+-bool kvm_arch_has_assigned_device(struct kvm *kvm)
++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+-      return atomic_read(&kvm->arch.assigned_device_count);
++      return arch_atomic_read(&kvm->arch.assigned_device_count);
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -988,7 +988,7 @@ static inline void kvm_arch_end_assignme
+ {
+ }
+-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+       return false;
+ }
diff --git a/queue-5.10/x86-lib-atomic64_386_32-rename-things.patch b/queue-5.10/x86-lib-atomic64_386_32-rename-things.patch
new file mode 100644 (file)
index 0000000..6b6051a
--- /dev/null
@@ -0,0 +1,248 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:39 +0100
+Subject: x86/lib/atomic64_386_32: Rename things
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 22da5a07c75e1104caf6a42f189c97b83d070073 upstream.
+
+Principally, in order to get rid of #define RET in this code to make
+place for a new RET, but also to clarify the code, rename a bunch of
+things:
+
+  s/UNLOCK/IRQ_RESTORE/
+  s/LOCK/IRQ_SAVE/
+  s/BEGIN/BEGIN_IRQ_SAVE/
+  s/\<RET\>/RET_IRQ_RESTORE/
+  s/RET_ENDP/\tRET_IRQ_RESTORE\rENDP/
+
+which then leaves RET unused so it can be removed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134907.841623970@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/atomic64_386_32.S |   84 ++++++++++++++++++++++-------------------
+ 1 file changed, 46 insertions(+), 38 deletions(-)
+
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -9,81 +9,83 @@
+ #include <asm/alternative.h>
+ /* if you want SMP support, implement these with real spinlocks */
+-.macro LOCK reg
++.macro IRQ_SAVE reg
+       pushfl
+       cli
+ .endm
+-.macro UNLOCK reg
++.macro IRQ_RESTORE reg
+       popfl
+ .endm
+-#define BEGIN(op) \
++#define BEGIN_IRQ_SAVE(op) \
+ .macro endp; \
+ SYM_FUNC_END(atomic64_##op##_386); \
+ .purgem endp; \
+ .endm; \
+ SYM_FUNC_START(atomic64_##op##_386); \
+-      LOCK v;
++      IRQ_SAVE v;
+ #define ENDP endp
+-#define RET \
+-      UNLOCK v; \
++#define RET_IRQ_RESTORE \
++      IRQ_RESTORE v; \
+       ret
+-#define RET_ENDP \
+-      RET; \
+-      ENDP
+-
+ #define v %ecx
+-BEGIN(read)
++BEGIN_IRQ_SAVE(read)
+       movl  (v), %eax
+       movl 4(v), %edx
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %esi
+-BEGIN(set)
++BEGIN_IRQ_SAVE(set)
+       movl %ebx,  (v)
+       movl %ecx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v  %esi
+-BEGIN(xchg)
++BEGIN_IRQ_SAVE(xchg)
+       movl  (v), %eax
+       movl 4(v), %edx
+       movl %ebx,  (v)
+       movl %ecx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %ecx
+-BEGIN(add)
++BEGIN_IRQ_SAVE(add)
+       addl %eax,  (v)
+       adcl %edx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %ecx
+-BEGIN(add_return)
++BEGIN_IRQ_SAVE(add_return)
+       addl  (v), %eax
+       adcl 4(v), %edx
+       movl %eax,  (v)
+       movl %edx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %ecx
+-BEGIN(sub)
++BEGIN_IRQ_SAVE(sub)
+       subl %eax,  (v)
+       sbbl %edx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %ecx
+-BEGIN(sub_return)
++BEGIN_IRQ_SAVE(sub_return)
+       negl %edx
+       negl %eax
+       sbbl $0, %edx
+@@ -91,47 +93,52 @@ BEGIN(sub_return)
+       adcl 4(v), %edx
+       movl %eax,  (v)
+       movl %edx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %esi
+-BEGIN(inc)
++BEGIN_IRQ_SAVE(inc)
+       addl $1,  (v)
+       adcl $0, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %esi
+-BEGIN(inc_return)
++BEGIN_IRQ_SAVE(inc_return)
+       movl  (v), %eax
+       movl 4(v), %edx
+       addl $1, %eax
+       adcl $0, %edx
+       movl %eax,  (v)
+       movl %edx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %esi
+-BEGIN(dec)
++BEGIN_IRQ_SAVE(dec)
+       subl $1,  (v)
+       sbbl $0, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %esi
+-BEGIN(dec_return)
++BEGIN_IRQ_SAVE(dec_return)
+       movl  (v), %eax
+       movl 4(v), %edx
+       subl $1, %eax
+       sbbl $0, %edx
+       movl %eax,  (v)
+       movl %edx, 4(v)
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
+ #define v %esi
+-BEGIN(add_unless)
++BEGIN_IRQ_SAVE(add_unless)
+       addl %eax, %ecx
+       adcl %edx, %edi
+       addl  (v), %eax
+@@ -143,7 +150,7 @@ BEGIN(add_unless)
+       movl %edx, 4(v)
+       movl $1, %eax
+ 2:
+-      RET
++      RET_IRQ_RESTORE
+ 3:
+       cmpl %edx, %edi
+       jne 1b
+@@ -153,7 +160,7 @@ ENDP
+ #undef v
+ #define v %esi
+-BEGIN(inc_not_zero)
++BEGIN_IRQ_SAVE(inc_not_zero)
+       movl  (v), %eax
+       movl 4(v), %edx
+       testl %eax, %eax
+@@ -165,7 +172,7 @@ BEGIN(inc_not_zero)
+       movl %edx, 4(v)
+       movl $1, %eax
+ 2:
+-      RET
++      RET_IRQ_RESTORE
+ 3:
+       testl %edx, %edx
+       jne 1b
+@@ -174,7 +181,7 @@ ENDP
+ #undef v
+ #define v %esi
+-BEGIN(dec_if_positive)
++BEGIN_IRQ_SAVE(dec_if_positive)
+       movl  (v), %eax
+       movl 4(v), %edx
+       subl $1, %eax
+@@ -183,5 +190,6 @@ BEGIN(dec_if_positive)
+       movl %eax,  (v)
+       movl %edx, 4(v)
+ 1:
+-RET_ENDP
++      RET_IRQ_RESTORE
++ENDP
+ #undef v
diff --git a/queue-5.10/x86-objtool-create-.return_sites.patch b/queue-5.10/x86-objtool-create-.return_sites.patch
new file mode 100644 (file)
index 0000000..acce887
--- /dev/null
@@ -0,0 +1,200 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:38 +0200
+Subject: x86,objtool: Create .return_sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream.
+
+Find all the return-thunk sites and record them in a .return_sites
+section such that the kernel can undo this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict fixup because of functions added to support IBT]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h            |    1 
+ tools/objtool/arch/x86/decode.c |    5 ++
+ tools/objtool/check.c           |   75 ++++++++++++++++++++++++++++++++++++++++
+ tools/objtool/elf.h             |    1 
+ tools/objtool/objtool.c         |    1 
+ tools/objtool/objtool.h         |    1 
+ 6 files changed, 84 insertions(+)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
+ int arch_decode_hint_reg(u8 sp_reg, int *base);
+ bool arch_is_retpoline(struct symbol *sym);
++bool arch_is_rethunk(struct symbol *sym);
+ int arch_rewrite_retpolines(struct objtool_file *file);
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -649,3 +649,8 @@ bool arch_is_retpoline(struct symbol *sy
+ {
+       return !strncmp(sym->name, "__x86_indirect_", 15);
+ }
++
++bool arch_is_rethunk(struct symbol *sym)
++{
++      return !strcmp(sym->name, "__x86_return_thunk");
++}
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -653,6 +653,52 @@ static int create_retpoline_sites_sectio
+       return 0;
+ }
++static int create_return_sites_sections(struct objtool_file *file)
++{
++      struct instruction *insn;
++      struct section *sec;
++      int idx;
++
++      sec = find_section_by_name(file->elf, ".return_sites");
++      if (sec) {
++              WARN("file already has .return_sites, skipping");
++              return 0;
++      }
++
++      idx = 0;
++      list_for_each_entry(insn, &file->return_thunk_list, call_node)
++              idx++;
++
++      if (!idx)
++              return 0;
++
++      sec = elf_create_section(file->elf, ".return_sites", 0,
++                               sizeof(int), idx);
++      if (!sec) {
++              WARN("elf_create_section: .return_sites");
++              return -1;
++      }
++
++      idx = 0;
++      list_for_each_entry(insn, &file->return_thunk_list, call_node) {
++
++              int *site = (int *)sec->data->d_buf + idx;
++              *site = 0;
++
++              if (elf_add_reloc_to_insn(file->elf, sec,
++                                        idx * sizeof(int),
++                                        R_X86_64_PC32,
++                                        insn->sec, insn->offset)) {
++                      WARN("elf_add_reloc_to_insn: .return_sites");
++                      return -1;
++              }
++
++              idx++;
++      }
++
++      return 0;
++}
++
+ /*
+  * Warnings shouldn't be reported for ignored functions.
+  */
+@@ -888,6 +934,11 @@ __weak bool arch_is_retpoline(struct sym
+       return false;
+ }
++__weak bool arch_is_rethunk(struct symbol *sym)
++{
++      return false;
++}
++
+ #define NEGATIVE_RELOC        ((void *)-1L)
+ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+@@ -1029,6 +1080,19 @@ static void add_retpoline_call(struct ob
+       annotate_call_site(file, insn, false);
+ }
++
++static void add_return_call(struct objtool_file *file, struct instruction *insn)
++{
++      /*
++       * Return thunk tail calls are really just returns in disguise,
++       * so convert them accordingly.
++       */
++      insn->type = INSN_RETURN;
++      insn->retpoline_safe = true;
++
++      list_add_tail(&insn->call_node, &file->return_thunk_list);
++}
++
+ /*
+  * Find the destination instructions for all jumps.
+  */
+@@ -1053,6 +1117,9 @@ static int add_jump_destinations(struct
+               } else if (reloc->sym->retpoline_thunk) {
+                       add_retpoline_call(file, insn);
+                       continue;
++              } else if (reloc->sym->return_thunk) {
++                      add_return_call(file, insn);
++                      continue;
+               } else if (insn->func) {
+                       /* internal or external sibling call (with reloc) */
+                       add_call_dest(file, insn, reloc->sym, true);
+@@ -1842,6 +1909,9 @@ static int classify_symbols(struct objto
+                       if (arch_is_retpoline(func))
+                               func->retpoline_thunk = true;
++                      if (arch_is_rethunk(func))
++                              func->return_thunk = true;
++
+                       if (!strcmp(func->name, "__fentry__"))
+                               func->fentry = true;
+@@ -3235,6 +3305,11 @@ int check(struct objtool_file *file)
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
++
++              ret = create_return_sites_sections(file);
++              if (ret < 0)
++                      goto out;
++              warnings += ret;
+       }
+       if (stats) {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -58,6 +58,7 @@ struct symbol {
+       u8 uaccess_safe      : 1;
+       u8 static_call_tramp : 1;
+       u8 retpoline_thunk   : 1;
++      u8 return_thunk      : 1;
+       u8 fentry            : 1;
+       u8 kcov              : 1;
+ };
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(c
+       INIT_LIST_HEAD(&file.insn_list);
+       hash_init(file.insn_hash);
+       INIT_LIST_HEAD(&file.retpoline_call_list);
++      INIT_LIST_HEAD(&file.return_thunk_list);
+       INIT_LIST_HEAD(&file.static_call_list);
+       file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+       file.ignore_unreachables = no_unreachable;
+--- a/tools/objtool/objtool.h
++++ b/tools/objtool/objtool.h
+@@ -19,6 +19,7 @@ struct objtool_file {
+       struct list_head insn_list;
+       DECLARE_HASHTABLE(insn_hash, 20);
+       struct list_head retpoline_call_list;
++      struct list_head return_thunk_list;
+       struct list_head static_call_list;
+       bool ignore_unreachables, c_file, hints, rodata;
+ };
diff --git a/queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch b/queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch
new file mode 100644 (file)
index 0000000..e6a126e
--- /dev/null
@@ -0,0 +1,3331 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:40 +0100
+Subject: x86: Prepare asm files for straight-line-speculation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f94909ceb1ed4bfdb2ada72f93236305e6d6951f upstream.
+
+Replace all ret/retq instructions with RET in preparation of making
+RET a macro. Since AS is case insensitive it's a big no-op without
+RET defined.
+
+  find arch/x86/ -name \*.S | while read file
+  do
+       sed -i 's/\<ret[q]*\>/RET/' $file
+  done
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134907.905503893@infradead.org
+[bwh: Backported to 5.10: ran the above command]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/efi_thunk_64.S      |    2 -
+ arch/x86/boot/compressed/head_64.S           |    4 +-
+ arch/x86/boot/compressed/mem_encrypt.S       |    4 +-
+ arch/x86/crypto/aegis128-aesni-asm.S         |   48 ++++++++++++------------
+ arch/x86/crypto/aes_ctrby8_avx-x86_64.S      |    2 -
+ arch/x86/crypto/aesni-intel_asm.S            |   52 +++++++++++++--------------
+ arch/x86/crypto/aesni-intel_avx-x86_64.S     |   40 ++++++++++----------
+ arch/x86/crypto/blake2s-core.S               |    4 +-
+ arch/x86/crypto/blowfish-x86_64-asm_64.S     |   12 +++---
+ arch/x86/crypto/camellia-aesni-avx-asm_64.S  |   18 ++++-----
+ arch/x86/crypto/camellia-aesni-avx2-asm_64.S |   18 ++++-----
+ arch/x86/crypto/camellia-x86_64-asm_64.S     |   12 +++---
+ arch/x86/crypto/cast5-avx-x86_64-asm_64.S    |   12 +++---
+ arch/x86/crypto/cast6-avx-x86_64-asm_64.S    |   16 ++++----
+ arch/x86/crypto/chacha-avx2-x86_64.S         |    6 +--
+ arch/x86/crypto/chacha-avx512vl-x86_64.S     |    6 +--
+ arch/x86/crypto/chacha-ssse3-x86_64.S        |    8 ++--
+ arch/x86/crypto/crc32-pclmul_asm.S           |    2 -
+ arch/x86/crypto/crc32c-pcl-intel-asm_64.S    |    2 -
+ arch/x86/crypto/crct10dif-pcl-asm_64.S       |    2 -
+ arch/x86/crypto/des3_ede-asm_64.S            |    4 +-
+ arch/x86/crypto/ghash-clmulni-intel_asm.S    |    6 +--
+ arch/x86/crypto/nh-avx2-x86_64.S             |    2 -
+ arch/x86/crypto/nh-sse2-x86_64.S             |    2 -
+ arch/x86/crypto/serpent-avx-x86_64-asm_64.S  |   16 ++++----
+ arch/x86/crypto/serpent-avx2-asm_64.S        |   16 ++++----
+ arch/x86/crypto/serpent-sse2-i586-asm_32.S   |    6 +--
+ arch/x86/crypto/serpent-sse2-x86_64-asm_64.S |    6 +--
+ arch/x86/crypto/sha1_avx2_x86_64_asm.S       |    2 -
+ arch/x86/crypto/sha1_ni_asm.S                |    2 -
+ arch/x86/crypto/sha1_ssse3_asm.S             |    2 -
+ arch/x86/crypto/sha256-avx-asm.S             |    2 -
+ arch/x86/crypto/sha256-avx2-asm.S            |    2 -
+ arch/x86/crypto/sha256-ssse3-asm.S           |    2 -
+ arch/x86/crypto/sha256_ni_asm.S              |    2 -
+ arch/x86/crypto/sha512-avx-asm.S             |    2 -
+ arch/x86/crypto/sha512-avx2-asm.S            |    2 -
+ arch/x86/crypto/sha512-ssse3-asm.S           |    2 -
+ arch/x86/crypto/twofish-avx-x86_64-asm_64.S  |   16 ++++----
+ arch/x86/crypto/twofish-i586-asm_32.S        |    4 +-
+ arch/x86/crypto/twofish-x86_64-asm_64-3way.S |    6 +--
+ arch/x86/crypto/twofish-x86_64-asm_64.S      |    4 +-
+ arch/x86/entry/entry_32.S                    |    2 -
+ arch/x86/entry/entry_64.S                    |   12 +++---
+ arch/x86/entry/thunk_32.S                    |    2 -
+ arch/x86/entry/thunk_64.S                    |    2 -
+ arch/x86/entry/vdso/vdso32/system_call.S     |    2 -
+ arch/x86/entry/vsyscall/vsyscall_emu_64.S    |    6 +--
+ arch/x86/kernel/acpi/wakeup_32.S             |    6 +--
+ arch/x86/kernel/ftrace_32.S                  |    6 +--
+ arch/x86/kernel/ftrace_64.S                  |   10 ++---
+ arch/x86/kernel/head_32.S                    |    2 -
+ arch/x86/kernel/irqflags.S                   |    4 +-
+ arch/x86/kernel/relocate_kernel_32.S         |   10 ++---
+ arch/x86/kernel/relocate_kernel_64.S         |   10 ++---
+ arch/x86/kernel/sev_verify_cbit.S            |    2 -
+ arch/x86/kernel/verify_cpu.S                 |    4 +-
+ arch/x86/kvm/svm/vmenter.S                   |    2 -
+ arch/x86/kvm/vmx/vmenter.S                   |   14 +++----
+ arch/x86/lib/atomic64_386_32.S               |    2 -
+ arch/x86/lib/atomic64_cx8_32.S               |   16 ++++----
+ arch/x86/lib/checksum_32.S                   |    8 ++--
+ arch/x86/lib/clear_page_64.S                 |    6 +--
+ arch/x86/lib/cmpxchg16b_emu.S                |    4 +-
+ arch/x86/lib/cmpxchg8b_emu.S                 |    4 +-
+ arch/x86/lib/copy_mc_64.S                    |    6 +--
+ arch/x86/lib/copy_page_64.S                  |    4 +-
+ arch/x86/lib/copy_user_64.S                  |   12 +++---
+ arch/x86/lib/csum-copy_64.S                  |    2 -
+ arch/x86/lib/getuser.S                       |   22 +++++------
+ arch/x86/lib/hweight.S                       |    6 +--
+ arch/x86/lib/iomap_copy_64.S                 |    2 -
+ arch/x86/lib/memcpy_64.S                     |   12 +++---
+ arch/x86/lib/memmove_64.S                    |    4 +-
+ arch/x86/lib/memset_64.S                     |    6 +--
+ arch/x86/lib/msr-reg.S                       |    4 +-
+ arch/x86/lib/putuser.S                       |    6 +--
+ arch/x86/lib/retpoline.S                     |    2 -
+ arch/x86/math-emu/div_Xsig.S                 |    2 -
+ arch/x86/math-emu/div_small.S                |    2 -
+ arch/x86/math-emu/mul_Xsig.S                 |    6 +--
+ arch/x86/math-emu/polynom_Xsig.S             |    2 -
+ arch/x86/math-emu/reg_norm.S                 |    6 +--
+ arch/x86/math-emu/reg_round.S                |    2 -
+ arch/x86/math-emu/reg_u_add.S                |    2 -
+ arch/x86/math-emu/reg_u_div.S                |    2 -
+ arch/x86/math-emu/reg_u_mul.S                |    2 -
+ arch/x86/math-emu/reg_u_sub.S                |    2 -
+ arch/x86/math-emu/round_Xsig.S               |    4 +-
+ arch/x86/math-emu/shr_Xsig.S                 |    8 ++--
+ arch/x86/math-emu/wm_shrx.S                  |   16 ++++----
+ arch/x86/mm/mem_encrypt_boot.S               |    4 +-
+ arch/x86/platform/efi/efi_stub_32.S          |    2 -
+ arch/x86/platform/efi/efi_stub_64.S          |    2 -
+ arch/x86/platform/efi/efi_thunk_64.S         |    2 -
+ arch/x86/platform/olpc/xo1-wakeup.S          |    6 +--
+ arch/x86/power/hibernate_asm_32.S            |    4 +-
+ arch/x86/power/hibernate_asm_64.S            |    4 +-
+ arch/x86/um/checksum_32.S                    |    4 +-
+ arch/x86/um/setjmp_32.S                      |    2 -
+ arch/x86/um/setjmp_64.S                      |    2 -
+ arch/x86/xen/xen-asm.S                       |   14 +++----
+ arch/x86/xen/xen-head.S                      |    2 -
+ 103 files changed, 353 insertions(+), 353 deletions(-)
+
+--- a/arch/x86/boot/compressed/efi_thunk_64.S
++++ b/arch/x86/boot/compressed/efi_thunk_64.S
+@@ -89,7 +89,7 @@ SYM_FUNC_START(__efi64_thunk)
+       pop     %rbx
+       pop     %rbp
+-      ret
++      RET
+ SYM_FUNC_END(__efi64_thunk)
+       .code32
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -786,7 +786,7 @@ SYM_FUNC_START(efi32_pe_entry)
+ 2:    popl    %edi                            // restore callee-save registers
+       popl    %ebx
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(efi32_pe_entry)
+       .section ".rodata"
+@@ -868,7 +868,7 @@ SYM_FUNC_START(startup32_check_sev_cbit)
+       popl    %ebx
+       popl    %eax
+ #endif
+-      ret
++      RET
+ SYM_FUNC_END(startup32_check_sev_cbit)
+ /*
+--- a/arch/x86/boot/compressed/mem_encrypt.S
++++ b/arch/x86/boot/compressed/mem_encrypt.S
+@@ -58,7 +58,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
+ #endif        /* CONFIG_AMD_MEM_ENCRYPT */
+-      ret
++      RET
+ SYM_FUNC_END(get_sev_encryption_bit)
+       .code64
+@@ -99,7 +99,7 @@ SYM_FUNC_START(set_sev_encryption_mask)
+ #endif
+       xor     %rax, %rax
+-      ret
++      RET
+ SYM_FUNC_END(set_sev_encryption_mask)
+       .data
+--- a/arch/x86/crypto/aegis128-aesni-asm.S
++++ b/arch/x86/crypto/aegis128-aesni-asm.S
+@@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial)
+       pxor T0, MSG
+ .Lld_partial_8:
+-      ret
++      RET
+ SYM_FUNC_END(__load_partial)
+ /*
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial)
+       mov %r10b, (%r9)
+ .Lst_partial_1:
+-      ret
++      RET
+ SYM_FUNC_END(__store_partial)
+ /*
+@@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ini
+       movdqu STATE4, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_init)
+ /*
+@@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+       movdqu STATE3, 0x30(STATEP)
+       movdqu STATE4, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lad_out_1:
+       movdqu STATE4, 0x00(STATEP)
+@@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+       movdqu STATE2, 0x30(STATEP)
+       movdqu STATE3, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lad_out_2:
+       movdqu STATE3, 0x00(STATEP)
+@@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+       movdqu STATE1, 0x30(STATEP)
+       movdqu STATE2, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lad_out_3:
+       movdqu STATE2, 0x00(STATEP)
+@@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+       movdqu STATE0, 0x30(STATEP)
+       movdqu STATE1, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lad_out_4:
+       movdqu STATE1, 0x00(STATEP)
+@@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+       movdqu STATE4, 0x30(STATEP)
+       movdqu STATE0, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lad_out:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_ad)
+ .macro encrypt_block a s0 s1 s2 s3 s4 i
+@@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+       movdqu STATE2, 0x30(STATEP)
+       movdqu STATE3, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lenc_out_1:
+       movdqu STATE3, 0x00(STATEP)
+@@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+       movdqu STATE1, 0x30(STATEP)
+       movdqu STATE2, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lenc_out_2:
+       movdqu STATE2, 0x00(STATEP)
+@@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+       movdqu STATE0, 0x30(STATEP)
+       movdqu STATE1, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lenc_out_3:
+       movdqu STATE1, 0x00(STATEP)
+@@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+       movdqu STATE4, 0x30(STATEP)
+       movdqu STATE0, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lenc_out_4:
+       movdqu STATE0, 0x00(STATEP)
+@@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+       movdqu STATE3, 0x30(STATEP)
+       movdqu STATE4, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Lenc_out:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_enc)
+ /*
+@@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+       movdqu STATE3, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
+ .macro decrypt_block a s0 s1 s2 s3 s4 i
+@@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+       movdqu STATE2, 0x30(STATEP)
+       movdqu STATE3, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Ldec_out_1:
+       movdqu STATE3, 0x00(STATEP)
+@@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+       movdqu STATE1, 0x30(STATEP)
+       movdqu STATE2, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Ldec_out_2:
+       movdqu STATE2, 0x00(STATEP)
+@@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+       movdqu STATE0, 0x30(STATEP)
+       movdqu STATE1, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Ldec_out_3:
+       movdqu STATE1, 0x00(STATEP)
+@@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+       movdqu STATE4, 0x30(STATEP)
+       movdqu STATE0, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Ldec_out_4:
+       movdqu STATE0, 0x00(STATEP)
+@@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+       movdqu STATE3, 0x30(STATEP)
+       movdqu STATE4, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ .Ldec_out:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_dec)
+ /*
+@@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+       movdqu STATE3, 0x40(STATEP)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
+ /*
+@@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_fin
+       movdqu MSG, (%rsi)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(crypto_aegis128_aesni_final)
+--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
++++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+@@ -525,7 +525,7 @@ ddq_add_8:
+       /* return updated IV */
+       vpshufb xbyteswap, xcounter, xcounter
+       vmovdqu xcounter, (p_iv)
+-      ret
++      RET
+ .endm
+ /*
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -1598,7 +1598,7 @@ SYM_FUNC_START(aesni_gcm_dec)
+       GCM_ENC_DEC dec
+       GCM_COMPLETE arg10, arg11
+       FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_dec)
+@@ -1687,7 +1687,7 @@ SYM_FUNC_START(aesni_gcm_enc)
+       GCM_COMPLETE arg10, arg11
+       FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_enc)
+ /*****************************************************************************
+@@ -1705,7 +1705,7 @@ SYM_FUNC_START(aesni_gcm_init)
+       FUNC_SAVE
+       GCM_INIT %arg3, %arg4,%arg5, %arg6
+       FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_init)
+ /*****************************************************************************
+@@ -1720,7 +1720,7 @@ SYM_FUNC_START(aesni_gcm_enc_update)
+       FUNC_SAVE
+       GCM_ENC_DEC enc
+       FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_enc_update)
+ /*****************************************************************************
+@@ -1735,7 +1735,7 @@ SYM_FUNC_START(aesni_gcm_dec_update)
+       FUNC_SAVE
+       GCM_ENC_DEC dec
+       FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_dec_update)
+ /*****************************************************************************
+@@ -1750,7 +1750,7 @@ SYM_FUNC_START(aesni_gcm_finalize)
+       FUNC_SAVE
+       GCM_COMPLETE %arg3 %arg4
+       FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_finalize)
+ #endif
+@@ -1766,7 +1766,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a
+       pxor %xmm1, %xmm0
+       movaps %xmm0, (TKEYP)
+       add $0x10, TKEYP
+-      ret
++      RET
+ SYM_FUNC_END(_key_expansion_256a)
+ SYM_FUNC_END_ALIAS(_key_expansion_128)
+@@ -1791,7 +1791,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a
+       shufps $0b01001110, %xmm2, %xmm1
+       movaps %xmm1, 0x10(TKEYP)
+       add $0x20, TKEYP
+-      ret
++      RET
+ SYM_FUNC_END(_key_expansion_192a)
+ SYM_FUNC_START_LOCAL(_key_expansion_192b)
+@@ -1810,7 +1810,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b
+       movaps %xmm0, (TKEYP)
+       add $0x10, TKEYP
+-      ret
++      RET
+ SYM_FUNC_END(_key_expansion_192b)
+ SYM_FUNC_START_LOCAL(_key_expansion_256b)
+@@ -1822,7 +1822,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b
+       pxor %xmm1, %xmm2
+       movaps %xmm2, (TKEYP)
+       add $0x10, TKEYP
+-      ret
++      RET
+ SYM_FUNC_END(_key_expansion_256b)
+ /*
+@@ -1937,7 +1937,7 @@ SYM_FUNC_START(aesni_set_key)
+       popl KEYP
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_set_key)
+ /*
+@@ -1961,7 +1961,7 @@ SYM_FUNC_START(aesni_enc)
+       popl KEYP
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_enc)
+ /*
+@@ -2018,7 +2018,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
+       aesenc KEY, STATE
+       movaps 0x70(TKEYP), KEY
+       aesenclast KEY, STATE
+-      ret
++      RET
+ SYM_FUNC_END(_aesni_enc1)
+ /*
+@@ -2126,7 +2126,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
+       aesenclast KEY, STATE2
+       aesenclast KEY, STATE3
+       aesenclast KEY, STATE4
+-      ret
++      RET
+ SYM_FUNC_END(_aesni_enc4)
+ /*
+@@ -2151,7 +2151,7 @@ SYM_FUNC_START(aesni_dec)
+       popl KEYP
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_dec)
+ /*
+@@ -2208,7 +2208,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
+       aesdec KEY, STATE
+       movaps 0x70(TKEYP), KEY
+       aesdeclast KEY, STATE
+-      ret
++      RET
+ SYM_FUNC_END(_aesni_dec1)
+ /*
+@@ -2316,7 +2316,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
+       aesdeclast KEY, STATE2
+       aesdeclast KEY, STATE3
+       aesdeclast KEY, STATE4
+-      ret
++      RET
+ SYM_FUNC_END(_aesni_dec4)
+ /*
+@@ -2376,7 +2376,7 @@ SYM_FUNC_START(aesni_ecb_enc)
+       popl LEN
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_ecb_enc)
+ /*
+@@ -2437,7 +2437,7 @@ SYM_FUNC_START(aesni_ecb_dec)
+       popl LEN
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_ecb_dec)
+ /*
+@@ -2481,7 +2481,7 @@ SYM_FUNC_START(aesni_cbc_enc)
+       popl IVP
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_cbc_enc)
+ /*
+@@ -2574,7 +2574,7 @@ SYM_FUNC_START(aesni_cbc_dec)
+       popl IVP
+ #endif
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_cbc_dec)
+ #ifdef __x86_64__
+@@ -2602,7 +2602,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init)
+       mov $1, TCTR_LOW
+       movq TCTR_LOW, INC
+       movq CTR, TCTR_LOW
+-      ret
++      RET
+ SYM_FUNC_END(_aesni_inc_init)
+ /*
+@@ -2630,7 +2630,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
+ .Linc_low:
+       movaps CTR, IV
+       pshufb BSWAP_MASK, IV
+-      ret
++      RET
+ SYM_FUNC_END(_aesni_inc)
+ /*
+@@ -2693,7 +2693,7 @@ SYM_FUNC_START(aesni_ctr_enc)
+       movups IV, (IVP)
+ .Lctr_enc_just_ret:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_ctr_enc)
+ /*
+@@ -2778,7 +2778,7 @@ SYM_FUNC_START(aesni_xts_encrypt)
+       movups IV, (IVP)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_xts_encrypt)
+ /*
+@@ -2846,7 +2846,7 @@ SYM_FUNC_START(aesni_xts_decrypt)
+       movups IV, (IVP)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(aesni_xts_decrypt)
+ #endif
+--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
+@@ -1777,7 +1777,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2)
+         FUNC_SAVE
+         INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_init_avx_gen2)
+ ###############################################################################
+@@ -1798,15 +1798,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_
+         # must be 192
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
+         FUNC_RESTORE
+-        ret
++        RET
+ key_128_enc_update:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
+         FUNC_RESTORE
+-        ret
++        RET
+ key_256_enc_update:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
+ ###############################################################################
+@@ -1827,15 +1827,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_
+         # must be 192
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
+         FUNC_RESTORE
+-        ret
++        RET
+ key_128_dec_update:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
+         FUNC_RESTORE
+-        ret
++        RET
+ key_256_dec_update:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
+ ###############################################################################
+@@ -1856,15 +1856,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_ge
+         # must be 192
+         GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
+         FUNC_RESTORE
+-        ret
++        RET
+ key_128_finalize:
+         GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
+         FUNC_RESTORE
+-        ret
++        RET
+ key_256_finalize:
+         GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
+ ###############################################################################
+@@ -2745,7 +2745,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4)
+         FUNC_SAVE
+         INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_init_avx_gen4)
+ ###############################################################################
+@@ -2766,15 +2766,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_
+         # must be 192
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
+         FUNC_RESTORE
+-      ret
++      RET
+ key_128_enc_update4:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
+         FUNC_RESTORE
+-      ret
++      RET
+ key_256_enc_update4:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
+         FUNC_RESTORE
+-      ret
++      RET
+ SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
+ ###############################################################################
+@@ -2795,15 +2795,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_
+         # must be 192
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
+         FUNC_RESTORE
+-        ret
++        RET
+ key_128_dec_update4:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
+         FUNC_RESTORE
+-        ret
++        RET
+ key_256_dec_update4:
+         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
+ ###############################################################################
+@@ -2824,13 +2824,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_ge
+         # must be 192
+         GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
+         FUNC_RESTORE
+-        ret
++        RET
+ key_128_finalize4:
+         GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
+         FUNC_RESTORE
+-        ret
++        RET
+ key_256_finalize4:
+         GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
+         FUNC_RESTORE
+-        ret
++        RET
+ SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
+--- a/arch/x86/crypto/blake2s-core.S
++++ b/arch/x86/crypto/blake2s-core.S
+@@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3)
+       movdqu          %xmm1,0x10(%rdi)
+       movdqu          %xmm14,0x20(%rdi)
+ .Lendofloop:
+-      ret
++      RET
+ SYM_FUNC_END(blake2s_compress_ssse3)
+ #ifdef CONFIG_AS_AVX512
+@@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512)
+       vmovdqu         %xmm1,0x10(%rdi)
+       vmovdqu         %xmm4,0x20(%rdi)
+       vzeroupper
+-      retq
++      RET
+ SYM_FUNC_END(blake2s_compress_avx512)
+ #endif /* CONFIG_AS_AVX512 */
+--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
++++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
+@@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk)
+       jnz .L__enc_xor;
+       write_block();
+-      ret;
++      RET;
+ .L__enc_xor:
+       xor_block();
+-      ret;
++      RET;
+ SYM_FUNC_END(__blowfish_enc_blk)
+ SYM_FUNC_START(blowfish_dec_blk)
+@@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk)
+       movq %r11, %r12;
+-      ret;
++      RET;
+ SYM_FUNC_END(blowfish_dec_blk)
+ /**********************************************************************
+@@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way)
+       popq %rbx;
+       popq %r12;
+-      ret;
++      RET;
+ .L__enc_xor4:
+       xor_block4();
+       popq %rbx;
+       popq %r12;
+-      ret;
++      RET;
+ SYM_FUNC_END(__blowfish_enc_blk_4way)
+ SYM_FUNC_START(blowfish_dec_blk_4way)
+@@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way)
+       popq %rbx;
+       popq %r12;
+-      ret;
++      RET;
+ SYM_FUNC_END(blowfish_dec_blk_4way)
+--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+@@ -193,7 +193,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_
+       roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
+                 %rcx, (%r9));
+-      ret;
++      RET;
+ SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+ .align 8
+@@ -201,7 +201,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_
+       roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
+                 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
+                 %rax, (%r9));
+-      ret;
++      RET;
+ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+ /*
+@@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk1
+                   %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
+       FRAME_END
+-      ret;
++      RET;
+ .align 8
+ .Lenc_max32:
+@@ -874,7 +874,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk1
+                   %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
+       FRAME_END
+-      ret;
++      RET;
+ .align 8
+ .Ldec_max32:
+@@ -915,7 +915,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way)
+                    %xmm8, %rsi);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_ecb_enc_16way)
+ SYM_FUNC_START(camellia_ecb_dec_16way)
+@@ -945,7 +945,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way)
+                    %xmm8, %rsi);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_ecb_dec_16way)
+ SYM_FUNC_START(camellia_cbc_dec_16way)
+@@ -996,7 +996,7 @@ SYM_FUNC_START(camellia_cbc_dec_16way)
+                    %xmm8, %rsi);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_cbc_dec_16way)
+ #define inc_le128(x, minus_one, tmp) \
+@@ -1109,7 +1109,7 @@ SYM_FUNC_START(camellia_ctr_16way)
+                    %xmm8, %rsi);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_ctr_16way)
+ #define gf128mul_x_ble(iv, mask, tmp) \
+@@ -1253,7 +1253,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_
+                    %xmm8, %rsi);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_xts_crypt_16way)
+ SYM_FUNC_START(camellia_xts_enc_16way)
+--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+@@ -227,7 +227,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_
+       roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
+                 %rcx, (%r9));
+-      ret;
++      RET;
+ SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+ .align 8
+@@ -235,7 +235,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_
+       roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
+                 %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
+                 %rax, (%r9));
+-      ret;
++      RET;
+ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+ /*
+@@ -825,7 +825,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk3
+                   %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
+       FRAME_END
+-      ret;
++      RET;
+ .align 8
+ .Lenc_max32:
+@@ -912,7 +912,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk3
+                   %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
+       FRAME_END
+-      ret;
++      RET;
+ .align 8
+ .Ldec_max32:
+@@ -957,7 +957,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_ecb_enc_32way)
+ SYM_FUNC_START(camellia_ecb_dec_32way)
+@@ -991,7 +991,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_ecb_dec_32way)
+ SYM_FUNC_START(camellia_cbc_dec_32way)
+@@ -1059,7 +1059,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_cbc_dec_32way)
+ #define inc_le128(x, minus_one, tmp) \
+@@ -1199,7 +1199,7 @@ SYM_FUNC_START(camellia_ctr_32way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_ctr_32way)
+ #define gf128mul_x_ble(iv, mask, tmp) \
+@@ -1366,7 +1366,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_xts_crypt_32way)
+ SYM_FUNC_START(camellia_xts_enc_32way)
+--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
++++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
+@@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk)
+       enc_outunpack(mov, RT1);
+       movq RR12, %r12;
+-      ret;
++      RET;
+ .L__enc_xor:
+       enc_outunpack(xor, RT1);
+       movq RR12, %r12;
+-      ret;
++      RET;
+ SYM_FUNC_END(__camellia_enc_blk)
+ SYM_FUNC_START(camellia_dec_blk)
+@@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk)
+       dec_outunpack();
+       movq RR12, %r12;
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_dec_blk)
+ /**********************************************************************
+@@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way)
+       movq RR12, %r12;
+       popq %rbx;
+-      ret;
++      RET;
+ .L__enc2_xor:
+       enc_outunpack2(xor, RT2);
+       movq RR12, %r12;
+       popq %rbx;
+-      ret;
++      RET;
+ SYM_FUNC_END(__camellia_enc_blk_2way)
+ SYM_FUNC_START(camellia_dec_blk_2way)
+@@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way)
+       movq RR12, %r12;
+       movq RXOR, %rbx;
+-      ret;
++      RET;
+ SYM_FUNC_END(camellia_dec_blk_2way)
+--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+@@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
+       outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+       outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
+-      ret;
++      RET;
+ SYM_FUNC_END(__cast5_enc_blk16)
+ .align 16
+@@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
+       outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+       outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
+-      ret;
++      RET;
+ .L__skip_dec:
+       vpsrldq $4, RKR, RKR;
+@@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way)
+       popq %r15;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast5_ecb_enc_16way)
+ SYM_FUNC_START(cast5_ecb_dec_16way)
+@@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way)
+       popq %r15;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast5_ecb_dec_16way)
+ SYM_FUNC_START(cast5_cbc_dec_16way)
+@@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way)
+       popq %r15;
+       popq %r12;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast5_cbc_dec_16way)
+ SYM_FUNC_START(cast5_ctr_16way)
+@@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way)
+       popq %r15;
+       popq %r12;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast5_ctr_16way)
+--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+@@ -291,7 +291,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
+       outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+       outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+-      ret;
++      RET;
+ SYM_FUNC_END(__cast6_enc_blk8)
+ .align 8
+@@ -338,7 +338,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
+       outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+       outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+-      ret;
++      RET;
+ SYM_FUNC_END(__cast6_dec_blk8)
+ SYM_FUNC_START(cast6_ecb_enc_8way)
+@@ -361,7 +361,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way)
+       popq %r15;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast6_ecb_enc_8way)
+ SYM_FUNC_START(cast6_ecb_dec_8way)
+@@ -384,7 +384,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way)
+       popq %r15;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast6_ecb_dec_8way)
+ SYM_FUNC_START(cast6_cbc_dec_8way)
+@@ -410,7 +410,7 @@ SYM_FUNC_START(cast6_cbc_dec_8way)
+       popq %r15;
+       popq %r12;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast6_cbc_dec_8way)
+ SYM_FUNC_START(cast6_ctr_8way)
+@@ -438,7 +438,7 @@ SYM_FUNC_START(cast6_ctr_8way)
+       popq %r15;
+       popq %r12;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast6_ctr_8way)
+ SYM_FUNC_START(cast6_xts_enc_8way)
+@@ -465,7 +465,7 @@ SYM_FUNC_START(cast6_xts_enc_8way)
+       popq %r15;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast6_xts_enc_8way)
+ SYM_FUNC_START(cast6_xts_dec_8way)
+@@ -492,5 +492,5 @@ SYM_FUNC_START(cast6_xts_dec_8way)
+       popq %r15;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(cast6_xts_dec_8way)
+--- a/arch/x86/crypto/chacha-avx2-x86_64.S
++++ b/arch/x86/crypto/chacha-avx2-x86_64.S
+@@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2)
+ .Ldone2:
+       vzeroupper
+-      ret
++      RET
+ .Lxorpart2:
+       # xor remaining bytes from partial register into output
+@@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2)
+ .Ldone4:
+       vzeroupper
+-      ret
++      RET
+ .Lxorpart4:
+       # xor remaining bytes from partial register into output
+@@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2)
+ .Ldone8:
+       vzeroupper
+       lea             -8(%r10),%rsp
+-      ret
++      RET
+ .Lxorpart8:
+       # xor remaining bytes from partial register into output
+--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
++++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
+@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512v
+ .Ldone2:
+       vzeroupper
+-      ret
++      RET
+ .Lxorpart2:
+       # xor remaining bytes from partial register into output
+@@ -432,7 +432,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512v
+ .Ldone4:
+       vzeroupper
+-      ret
++      RET
+ .Lxorpart4:
+       # xor remaining bytes from partial register into output
+@@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512v
+ .Ldone8:
+       vzeroupper
+-      ret
++      RET
+ .Lxorpart8:
+       # xor remaining bytes from partial register into output
+--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
++++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
+@@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute)
+       sub             $2,%r8d
+       jnz             .Ldoubleround
+-      ret
++      RET
+ SYM_FUNC_END(chacha_permute)
+ SYM_FUNC_START(chacha_block_xor_ssse3)
+@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
+ .Ldone:
+       FRAME_END
+-      ret
++      RET
+ .Lxorpart:
+       # xor remaining bytes from partial register into output
+@@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3)
+       movdqu          %xmm3,0x10(%rsi)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(hchacha_block_ssse3)
+ SYM_FUNC_START(chacha_4block_xor_ssse3)
+@@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3)
+ .Ldone4:
+       lea             -8(%r10),%rsp
+-      ret
++      RET
+ .Lxorpart4:
+       # xor remaining bytes from partial register into output
+--- a/arch/x86/crypto/crc32-pclmul_asm.S
++++ b/arch/x86/crypto/crc32-pclmul_asm.S
+@@ -236,5 +236,5 @@ fold_64:
+       pxor    %xmm2, %xmm1
+       pextrd  $0x01, %xmm1, %eax
+-      ret
++      RET
+ SYM_FUNC_END(crc32_pclmul_le_16)
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
++++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -309,7 +309,7 @@ do_return:
+       popq    %rsi
+       popq    %rdi
+       popq    %rbx
+-        ret
++        RET
+ SYM_FUNC_END(crc_pcl)
+ .section      .rodata, "a", @progbits
+--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
++++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
+@@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl)
+       # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
+       pextrw  $0, %xmm0, %eax
+-      ret
++      RET
+ .align 16
+ .Lless_than_256_bytes:
+--- a/arch/x86/crypto/des3_ede-asm_64.S
++++ b/arch/x86/crypto/des3_ede-asm_64.S
+@@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk
+       popq %r12;
+       popq %rbx;
+-      ret;
++      RET;
+ SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
+ /***********************************************************************
+@@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk
+       popq %r12;
+       popq %rbx;
+-      ret;
++      RET;
+ SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
+ .section      .rodata, "a", @progbits
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_bl
+       psrlq $1, T2
+       pxor T2, T1
+       pxor T1, DATA
+-      ret
++      RET
+ SYM_FUNC_END(__clmul_gf128mul_ble)
+ /* void clmul_ghash_mul(char *dst, const u128 *shash) */
+@@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul)
+       pshufb BSWAP, DATA
+       movups DATA, (%rdi)
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(clmul_ghash_mul)
+ /*
+@@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update)
+       movups DATA, (%rdi)
+ .Lupdate_just_ret:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(clmul_ghash_update)
+--- a/arch/x86/crypto/nh-avx2-x86_64.S
++++ b/arch/x86/crypto/nh-avx2-x86_64.S
+@@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2)
+       vpaddq          T1, T0, T0
+       vpaddq          T4, T0, T0
+       vmovdqu         T0, (HASH)
+-      ret
++      RET
+ SYM_FUNC_END(nh_avx2)
+--- a/arch/x86/crypto/nh-sse2-x86_64.S
++++ b/arch/x86/crypto/nh-sse2-x86_64.S
+@@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2)
+       paddq           PASS2_SUMS, T1
+       movdqu          T0, 0x00(HASH)
+       movdqu          T1, 0x10(HASH)
+-      ret
++      RET
+ SYM_FUNC_END(nh_sse2)
+--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+@@ -605,7 +605,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_
+       write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+       write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__serpent_enc_blk8_avx)
+ .align 8
+@@ -659,7 +659,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_
+       write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+       write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__serpent_dec_blk8_avx)
+ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+@@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+       store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_ecb_enc_8way_avx)
+ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+@@ -695,7 +695,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+       store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_ecb_dec_8way_avx)
+ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+@@ -713,7 +713,7 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+       store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_cbc_dec_8way_avx)
+ SYM_FUNC_START(serpent_ctr_8way_avx)
+@@ -733,7 +733,7 @@ SYM_FUNC_START(serpent_ctr_8way_avx)
+       store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_ctr_8way_avx)
+ SYM_FUNC_START(serpent_xts_enc_8way_avx)
+@@ -755,7 +755,7 @@ SYM_FUNC_START(serpent_xts_enc_8way_avx)
+       store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_xts_enc_8way_avx)
+ SYM_FUNC_START(serpent_xts_dec_8way_avx)
+@@ -777,5 +777,5 @@ SYM_FUNC_START(serpent_xts_dec_8way_avx)
+       store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_xts_dec_8way_avx)
+--- a/arch/x86/crypto/serpent-avx2-asm_64.S
++++ b/arch/x86/crypto/serpent-avx2-asm_64.S
+@@ -611,7 +611,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16
+       write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+       write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__serpent_enc_blk16)
+ .align 8
+@@ -665,7 +665,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16
+       write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+       write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__serpent_dec_blk16)
+ SYM_FUNC_START(serpent_ecb_enc_16way)
+@@ -687,7 +687,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_ecb_enc_16way)
+ SYM_FUNC_START(serpent_ecb_dec_16way)
+@@ -709,7 +709,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_ecb_dec_16way)
+ SYM_FUNC_START(serpent_cbc_dec_16way)
+@@ -732,7 +732,7 @@ SYM_FUNC_START(serpent_cbc_dec_16way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_cbc_dec_16way)
+ SYM_FUNC_START(serpent_ctr_16way)
+@@ -757,7 +757,7 @@ SYM_FUNC_START(serpent_ctr_16way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_ctr_16way)
+ SYM_FUNC_START(serpent_xts_enc_16way)
+@@ -783,7 +783,7 @@ SYM_FUNC_START(serpent_xts_enc_16way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_xts_enc_16way)
+ SYM_FUNC_START(serpent_xts_dec_16way)
+@@ -809,5 +809,5 @@ SYM_FUNC_START(serpent_xts_dec_16way)
+       vzeroupper;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_xts_dec_16way)
+--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
++++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+@@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way)
+       write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
+-      ret;
++      RET;
+ .L__enc_xor4:
+       xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
+-      ret;
++      RET;
+ SYM_FUNC_END(__serpent_enc_blk_4way)
+ SYM_FUNC_START(serpent_dec_blk_4way)
+@@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way)
+       movl arg_dst(%esp), %eax;
+       write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_dec_blk_4way)
+--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
++++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+@@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way)
+       write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+       write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ .L__enc_xor8:
+       xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+       xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__serpent_enc_blk_8way)
+ SYM_FUNC_START(serpent_dec_blk_8way)
+@@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way)
+       write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+       write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(serpent_dec_blk_8way)
+--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
++++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+@@ -674,7 +674,7 @@ _loop3:
+       pop     %r12
+       pop     %rbx
+-      ret
++      RET
+       SYM_FUNC_END(\name)
+ .endm
+--- a/arch/x86/crypto/sha1_ni_asm.S
++++ b/arch/x86/crypto/sha1_ni_asm.S
+@@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform)
+ .Ldone_hash:
+       mov             RSPSAVE, %rsp
+-      ret
++      RET
+ SYM_FUNC_END(sha1_ni_transform)
+ .section      .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+--- a/arch/x86/crypto/sha1_ssse3_asm.S
++++ b/arch/x86/crypto/sha1_ssse3_asm.S
+@@ -99,7 +99,7 @@
+       pop     %rbp
+       pop     %r12
+       pop     %rbx
+-      ret
++      RET
+       SYM_FUNC_END(\name)
+ .endm
+--- a/arch/x86/crypto/sha256-avx-asm.S
++++ b/arch/x86/crypto/sha256-avx-asm.S
+@@ -458,7 +458,7 @@ done_hash:
+       popq    %r13
+       popq    %r12
+       popq    %rbx
+-      ret
++      RET
+ SYM_FUNC_END(sha256_transform_avx)
+ .section      .rodata.cst256.K256, "aM", @progbits, 256
+--- a/arch/x86/crypto/sha256-avx2-asm.S
++++ b/arch/x86/crypto/sha256-avx2-asm.S
+@@ -711,7 +711,7 @@ done_hash:
+       popq    %r13
+       popq    %r12
+       popq    %rbx
+-      ret
++      RET
+ SYM_FUNC_END(sha256_transform_rorx)
+ .section      .rodata.cst512.K256, "aM", @progbits, 512
+--- a/arch/x86/crypto/sha256-ssse3-asm.S
++++ b/arch/x86/crypto/sha256-ssse3-asm.S
+@@ -472,7 +472,7 @@ done_hash:
+       popq    %r12
+       popq    %rbx
+-      ret
++      RET
+ SYM_FUNC_END(sha256_transform_ssse3)
+ .section      .rodata.cst256.K256, "aM", @progbits, 256
+--- a/arch/x86/crypto/sha256_ni_asm.S
++++ b/arch/x86/crypto/sha256_ni_asm.S
+@@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform)
+ .Ldone_hash:
+-      ret
++      RET
+ SYM_FUNC_END(sha256_ni_transform)
+ .section      .rodata.cst256.K256, "aM", @progbits, 256
+--- a/arch/x86/crypto/sha512-avx-asm.S
++++ b/arch/x86/crypto/sha512-avx-asm.S
+@@ -364,7 +364,7 @@ updateblock:
+       mov     frame_RSPSAVE(%rsp), %rsp
+ nowork:
+-      ret
++      RET
+ SYM_FUNC_END(sha512_transform_avx)
+ ########################################################################
+--- a/arch/x86/crypto/sha512-avx2-asm.S
++++ b/arch/x86/crypto/sha512-avx2-asm.S
+@@ -681,7 +681,7 @@ done_hash:
+       # Restore Stack Pointer
+       mov     frame_RSPSAVE(%rsp), %rsp
+-      ret
++      RET
+ SYM_FUNC_END(sha512_transform_rorx)
+ ########################################################################
+--- a/arch/x86/crypto/sha512-ssse3-asm.S
++++ b/arch/x86/crypto/sha512-ssse3-asm.S
+@@ -366,7 +366,7 @@ updateblock:
+       mov     frame_RSPSAVE(%rsp), %rsp
+ nowork:
+-      ret
++      RET
+ SYM_FUNC_END(sha512_transform_ssse3)
+ ########################################################################
+--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+@@ -272,7 +272,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
+       outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+       outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__twofish_enc_blk8)
+ .align 8
+@@ -312,7 +312,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
+       outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+       outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+-      ret;
++      RET;
+ SYM_FUNC_END(__twofish_dec_blk8)
+ SYM_FUNC_START(twofish_ecb_enc_8way)
+@@ -332,7 +332,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way)
+       store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_ecb_enc_8way)
+ SYM_FUNC_START(twofish_ecb_dec_8way)
+@@ -352,7 +352,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way)
+       store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_ecb_dec_8way)
+ SYM_FUNC_START(twofish_cbc_dec_8way)
+@@ -377,7 +377,7 @@ SYM_FUNC_START(twofish_cbc_dec_8way)
+       popq %r12;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_cbc_dec_8way)
+ SYM_FUNC_START(twofish_ctr_8way)
+@@ -404,7 +404,7 @@ SYM_FUNC_START(twofish_ctr_8way)
+       popq %r12;
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_ctr_8way)
+ SYM_FUNC_START(twofish_xts_enc_8way)
+@@ -428,7 +428,7 @@ SYM_FUNC_START(twofish_xts_enc_8way)
+       store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_xts_enc_8way)
+ SYM_FUNC_START(twofish_xts_dec_8way)
+@@ -452,5 +452,5 @@ SYM_FUNC_START(twofish_xts_dec_8way)
+       store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+       FRAME_END
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_xts_dec_8way)
+--- a/arch/x86/crypto/twofish-i586-asm_32.S
++++ b/arch/x86/crypto/twofish-i586-asm_32.S
+@@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk)
+       pop     %ebx
+       pop     %ebp
+       mov     $1,     %eax
+-      ret
++      RET
+ SYM_FUNC_END(twofish_enc_blk)
+ SYM_FUNC_START(twofish_dec_blk)
+@@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk)
+       pop     %ebx
+       pop     %ebp
+       mov     $1,     %eax
+-      ret
++      RET
+ SYM_FUNC_END(twofish_dec_blk)
+--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
++++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+@@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
+       popq %rbx;
+       popq %r12;
+       popq %r13;
+-      ret;
++      RET;
+ .L__enc_xor3:
+       outunpack_enc3(xor);
+@@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
+       popq %rbx;
+       popq %r12;
+       popq %r13;
+-      ret;
++      RET;
+ SYM_FUNC_END(__twofish_enc_blk_3way)
+ SYM_FUNC_START(twofish_dec_blk_3way)
+@@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way)
+       popq %rbx;
+       popq %r12;
+       popq %r13;
+-      ret;
++      RET;
+ SYM_FUNC_END(twofish_dec_blk_3way)
+--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
++++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
+@@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk)
+       popq    R1
+       movl    $1,%eax
+-      ret
++      RET
+ SYM_FUNC_END(twofish_enc_blk)
+ SYM_FUNC_START(twofish_dec_blk)
+@@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk)
+       popq    R1
+       movl    $1,%eax
+-      ret
++      RET
+ SYM_FUNC_END(twofish_dec_blk)
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -821,7 +821,7 @@ SYM_FUNC_START(schedule_tail_wrapper)
+       popl    %eax
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(schedule_tail_wrapper)
+ .popsection
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -740,7 +740,7 @@ SYM_FUNC_START(asm_load_gs_index)
+ 2:    ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
+       swapgs
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(asm_load_gs_index)
+ EXPORT_SYMBOL(asm_load_gs_index)
+@@ -799,7 +799,7 @@ SYM_INNER_LABEL(asm_call_irq_on_stack, S
+       /* Restore the previous stack pointer from RBP. */
+       leaveq
+-      ret
++      RET
+ SYM_FUNC_END(asm_call_on_stack)
+ #ifdef CONFIG_XEN_PV
+@@ -932,7 +932,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+        * is needed here.
+        */
+       SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+-      ret
++      RET
+ .Lparanoid_entry_checkgs:
+       /* EBX = 1 -> kernel GSBASE active, no restore required */
+@@ -953,7 +953,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ .Lparanoid_kernel_gsbase:
+       FENCE_SWAPGS_KERNEL_ENTRY
+-      ret
++      RET
+ SYM_CODE_END(paranoid_entry)
+ /*
+@@ -1032,7 +1032,7 @@ SYM_CODE_START_LOCAL(error_entry)
+       movq    %rax, %rsp                      /* switch stack */
+       ENCODE_FRAME_POINTER
+       pushq   %r12
+-      ret
++      RET
+       /*
+        * There are two places in the kernel that can potentially fault with
+@@ -1063,7 +1063,7 @@ SYM_CODE_START_LOCAL(error_entry)
+        */
+ .Lerror_entry_done_lfence:
+       FENCE_SWAPGS_KERNEL_ENTRY
+-      ret
++      RET
+ .Lbstep_iret:
+       /* Fix truncated RIP */
+--- a/arch/x86/entry/thunk_32.S
++++ b/arch/x86/entry/thunk_32.S
+@@ -24,7 +24,7 @@ SYM_CODE_START_NOALIGN(\name)
+       popl %edx
+       popl %ecx
+       popl %eax
+-      ret
++      RET
+       _ASM_NOKPROBE(\name)
+ SYM_CODE_END(\name)
+       .endm
+--- a/arch/x86/entry/thunk_64.S
++++ b/arch/x86/entry/thunk_64.S
+@@ -55,7 +55,7 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_res
+       popq %rsi
+       popq %rdi
+       popq %rbp
+-      ret
++      RET
+       _ASM_NOKPROBE(__thunk_restore)
+ SYM_CODE_END(__thunk_restore)
+ #endif
+--- a/arch/x86/entry/vdso/vdso32/system_call.S
++++ b/arch/x86/entry/vdso/vdso32/system_call.S
+@@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L
+       popl    %ecx
+       CFI_RESTORE             ecx
+       CFI_ADJUST_CFA_OFFSET   -4
+-      ret
++      RET
+       CFI_ENDPROC
+       .size __kernel_vsyscall,.-__kernel_vsyscall
+--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+@@ -19,17 +19,17 @@ __vsyscall_page:
+       mov $__NR_gettimeofday, %rax
+       syscall
+-      ret
++      RET
+       .balign 1024, 0xcc
+       mov $__NR_time, %rax
+       syscall
+-      ret
++      RET
+       .balign 1024, 0xcc
+       mov $__NR_getcpu, %rax
+       syscall
+-      ret
++      RET
+       .balign 4096, 0xcc
+--- a/arch/x86/kernel/acpi/wakeup_32.S
++++ b/arch/x86/kernel/acpi/wakeup_32.S
+@@ -60,7 +60,7 @@ save_registers:
+       popl    saved_context_eflags
+       movl    $ret_point, saved_eip
+-      ret
++      RET
+ restore_registers:
+@@ -70,7 +70,7 @@ restore_registers:
+       movl    saved_context_edi, %edi
+       pushl   saved_context_eflags
+       popfl
+-      ret
++      RET
+ SYM_CODE_START(do_suspend_lowlevel)
+       call    save_processor_state
+@@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel)
+ ret_point:
+       call    restore_registers
+       call    restore_processor_state
+-      ret
++      RET
+ SYM_CODE_END(do_suspend_lowlevel)
+ .data
+--- a/arch/x86/kernel/ftrace_32.S
++++ b/arch/x86/kernel/ftrace_32.S
+@@ -19,7 +19,7 @@
+ #endif
+ SYM_FUNC_START(__fentry__)
+-      ret
++      RET
+ SYM_FUNC_END(__fentry__)
+ EXPORT_SYMBOL(__fentry__)
+@@ -84,7 +84,7 @@ ftrace_graph_call:
+ /* This is weak to keep gas from relaxing the jumps */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+-      ret
++      RET
+ SYM_CODE_END(ftrace_caller)
+ SYM_CODE_START(ftrace_regs_caller)
+@@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller)
+       popl    %edx
+       popl    %ecx
+       popl    %eax
+-      ret
++      RET
+ SYM_CODE_END(ftrace_graph_caller)
+ .globl return_to_handler
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -132,7 +132,7 @@
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ SYM_FUNC_START(__fentry__)
+-      retq
++      RET
+ SYM_FUNC_END(__fentry__)
+ EXPORT_SYMBOL(__fentry__)
+@@ -170,10 +170,10 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L
+ /*
+  * This is weak to keep gas from relaxing the jumps.
+- * It is also used to copy the retq for trampolines.
++ * It is also used to copy the RET for trampolines.
+  */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+-      retq
++      RET
+ SYM_FUNC_END(ftrace_epilogue)
+ SYM_FUNC_START(ftrace_regs_caller)
+@@ -287,7 +287,7 @@ fgraph_trace:
+ #endif
+ SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
+-      retq
++      RET
+ trace:
+       /* save_mcount_regs fills in first two parameters */
+@@ -319,7 +319,7 @@ SYM_FUNC_START(ftrace_graph_caller)
+       restore_mcount_regs
+-      retq
++      RET
+ SYM_FUNC_END(ftrace_graph_caller)
+ SYM_CODE_START(return_to_handler)
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -354,7 +354,7 @@ setup_once:
+ #endif
+       andl $0,setup_once_ref  /* Once is enough, thanks */
+-      ret
++      RET
+ SYM_FUNC_START(early_idt_handler_array)
+       # 36(%esp) %eflags
+--- a/arch/x86/kernel/irqflags.S
++++ b/arch/x86/kernel/irqflags.S
+@@ -10,7 +10,7 @@
+ SYM_FUNC_START(native_save_fl)
+       pushf
+       pop %_ASM_AX
+-      ret
++      RET
+ SYM_FUNC_END(native_save_fl)
+ EXPORT_SYMBOL(native_save_fl)
+@@ -21,6 +21,6 @@ EXPORT_SYMBOL(native_save_fl)
+ SYM_FUNC_START(native_restore_fl)
+       push %_ASM_ARG1
+       popf
+-      ret
++      RET
+ SYM_FUNC_END(native_restore_fl)
+ EXPORT_SYMBOL(native_restore_fl)
+--- a/arch/x86/kernel/relocate_kernel_32.S
++++ b/arch/x86/kernel/relocate_kernel_32.S
+@@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+       movl    %edi, %eax
+       addl    $(identity_mapped - relocate_kernel), %eax
+       pushl   %eax
+-      ret
++      RET
+ SYM_CODE_END(relocate_kernel)
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -159,7 +159,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       xorl    %edx, %edx
+       xorl    %esi, %esi
+       xorl    %ebp, %ebp
+-      ret
++      RET
+ 1:
+       popl    %edx
+       movl    CP_PA_SWAP_PAGE(%edi), %esp
+@@ -190,7 +190,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       movl    %edi, %eax
+       addl    $(virtual_mapped - relocate_kernel), %eax
+       pushl   %eax
+-      ret
++      RET
+ SYM_CODE_END(identity_mapped)
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -208,7 +208,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+-      ret
++      RET
+ SYM_CODE_END(virtual_mapped)
+       /* Do the copies */
+@@ -271,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+-      ret
++      RET
+ SYM_CODE_END(swap_pages)
+       .globl kexec_control_code_size
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -104,7 +104,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+       /* jump to identity mapped page */
+       addq    $(identity_mapped - relocate_kernel), %r8
+       pushq   %r8
+-      ret
++      RET
+ SYM_CODE_END(relocate_kernel)
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       xorl    %r14d, %r14d
+       xorl    %r15d, %r15d
+-      ret
++      RET
+ 1:
+       popq    %rdx
+@@ -210,7 +210,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+       call    swap_pages
+       movq    $virtual_mapped, %rax
+       pushq   %rax
+-      ret
++      RET
+ SYM_CODE_END(identity_mapped)
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       popq    %r12
+       popq    %rbp
+       popq    %rbx
+-      ret
++      RET
+ SYM_CODE_END(virtual_mapped)
+       /* Do the copies */
+@@ -288,7 +288,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+       lea     PAGE_SIZE(%rax), %rsi
+       jmp     0b
+ 3:
+-      ret
++      RET
+ SYM_CODE_END(swap_pages)
+       .globl kexec_control_code_size
+--- a/arch/x86/kernel/sev_verify_cbit.S
++++ b/arch/x86/kernel/sev_verify_cbit.S
+@@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit)
+ #endif
+       /* Return page-table pointer */
+       movq    %rdi, %rax
+-      ret
++      RET
+ SYM_FUNC_END(sev_verify_cbit)
+--- a/arch/x86/kernel/verify_cpu.S
++++ b/arch/x86/kernel/verify_cpu.S
+@@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu)
+ .Lverify_cpu_no_longmode:
+       popf                            # Restore caller passed flags
+       movl $1,%eax
+-      ret
++      RET
+ .Lverify_cpu_sse_ok:
+       popf                            # Restore caller passed flags
+       xorl %eax, %eax
+-      ret
++      RET
+ SYM_FUNC_END(verify_cpu)
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -166,5 +166,5 @@ SYM_FUNC_START(__svm_vcpu_run)
+       pop %edi
+ #endif
+       pop %_ASM_BP
+-      ret
++      RET
+ SYM_FUNC_END(__svm_vcpu_run)
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -49,14 +49,14 @@ SYM_FUNC_START_LOCAL(vmx_vmenter)
+       je 2f
+ 1:    vmresume
+-      ret
++      RET
+ 2:    vmlaunch
+-      ret
++      RET
+ 3:    cmpb $0, kvm_rebooting
+       je 4f
+-      ret
++      RET
+ 4:    ud2
+       _ASM_EXTABLE(1b, 3b)
+@@ -89,7 +89,7 @@ SYM_FUNC_START(vmx_vmexit)
+       pop %_ASM_AX
+ .Lvmexit_skip_rsb:
+ #endif
+-      ret
++      RET
+ SYM_FUNC_END(vmx_vmexit)
+ /**
+@@ -228,7 +228,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+       pop %edi
+ #endif
+       pop %_ASM_BP
+-      ret
++      RET
+       /* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
+ 2:    mov $1, %eax
+@@ -293,7 +293,7 @@ SYM_FUNC_START(vmread_error_trampoline)
+       pop %_ASM_AX
+       pop %_ASM_BP
+-      ret
++      RET
+ SYM_FUNC_END(vmread_error_trampoline)
+ SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
+@@ -326,5 +326,5 @@ SYM_FUNC_START(vmx_do_interrupt_nmi_irqo
+        */
+       mov %_ASM_BP, %_ASM_SP
+       pop %_ASM_BP
+-      ret
++      RET
+ SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -30,7 +30,7 @@ SYM_FUNC_START(atomic64_##op##_386); \
+ #define RET_IRQ_RESTORE \
+       IRQ_RESTORE v; \
+-      ret
++      RET
+ #define v %ecx
+ BEGIN_IRQ_SAVE(read)
+--- a/arch/x86/lib/atomic64_cx8_32.S
++++ b/arch/x86/lib/atomic64_cx8_32.S
+@@ -18,7 +18,7 @@
+ SYM_FUNC_START(atomic64_read_cx8)
+       read64 %ecx
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_read_cx8)
+ SYM_FUNC_START(atomic64_set_cx8)
+@@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8)
+       cmpxchg8b (%esi)
+       jne 1b
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_set_cx8)
+ SYM_FUNC_START(atomic64_xchg_cx8)
+@@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8)
+       cmpxchg8b (%esi)
+       jne 1b
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_xchg_cx8)
+ .macro addsub_return func ins insc
+@@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_
+       popl %esi
+       popl %ebx
+       popl %ebp
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_\func\()_return_cx8)
+ .endm
+@@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_
+       movl %ebx, %eax
+       movl %ecx, %edx
+       popl %ebx
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_\func\()_return_cx8)
+ .endm
+@@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_
+       movl %ebx, %eax
+       movl %ecx, %edx
+       popl %ebx
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_dec_if_positive_cx8)
+ SYM_FUNC_START(atomic64_add_unless_cx8)
+@@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8)
+       addl $8, %esp
+       popl %ebx
+       popl %ebp
+-      ret
++      RET
+ 4:
+       cmpl %edx, 4(%esp)
+       jne 2b
+@@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8
+       movl $1, %eax
+ 3:
+       popl %ebx
+-      ret
++      RET
+ SYM_FUNC_END(atomic64_inc_not_zero_cx8)
+--- a/arch/x86/lib/checksum_32.S
++++ b/arch/x86/lib/checksum_32.S
+@@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial)
+ 8:
+       popl %ebx
+       popl %esi
+-      ret
++      RET
+ SYM_FUNC_END(csum_partial)
+ #else
+@@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial)
+ 90: 
+       popl %ebx
+       popl %esi
+-      ret
++      RET
+ SYM_FUNC_END(csum_partial)
+                               
+ #endif
+@@ -371,7 +371,7 @@ EXC(       movb %cl, (%edi)        )
+       popl %esi
+       popl %edi
+       popl %ecx                       # equivalent to addl $4,%esp
+-      ret     
++      RET
+ SYM_FUNC_END(csum_partial_copy_generic)
+ #else
+@@ -447,7 +447,7 @@ EXC(       movb %dl, (%edi)         )
+       popl %esi
+       popl %edi
+       popl %ebx
+-      ret
++      RET
+ SYM_FUNC_END(csum_partial_copy_generic)
+                               
+ #undef ROUND
+--- a/arch/x86/lib/clear_page_64.S
++++ b/arch/x86/lib/clear_page_64.S
+@@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep)
+       movl $4096/8,%ecx
+       xorl %eax,%eax
+       rep stosq
+-      ret
++      RET
+ SYM_FUNC_END(clear_page_rep)
+ EXPORT_SYMBOL_GPL(clear_page_rep)
+@@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig)
+       leaq    64(%rdi),%rdi
+       jnz     .Lloop
+       nop
+-      ret
++      RET
+ SYM_FUNC_END(clear_page_orig)
+ EXPORT_SYMBOL_GPL(clear_page_orig)
+@@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms)
+       movl $4096,%ecx
+       xorl %eax,%eax
+       rep stosb
+-      ret
++      RET
+ SYM_FUNC_END(clear_page_erms)
+ EXPORT_SYMBOL_GPL(clear_page_erms)
+--- a/arch/x86/lib/cmpxchg16b_emu.S
++++ b/arch/x86/lib/cmpxchg16b_emu.S
+@@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
+       popfq
+       mov $1, %al
+-      ret
++      RET
+ .Lnot_same:
+       popfq
+       xor %al,%al
+-      ret
++      RET
+ SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
+--- a/arch/x86/lib/cmpxchg8b_emu.S
++++ b/arch/x86/lib/cmpxchg8b_emu.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
+       movl %ecx, 4(%esi)
+       popfl
+-      ret
++      RET
+ .Lnot_same:
+       movl  (%esi), %eax
+@@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
+       movl 4(%esi), %edx
+       popfl
+-      ret
++      RET
+ SYM_FUNC_END(cmpxchg8b_emu)
+ EXPORT_SYMBOL(cmpxchg8b_emu)
+--- a/arch/x86/lib/copy_mc_64.S
++++ b/arch/x86/lib/copy_mc_64.S
+@@ -86,7 +86,7 @@ SYM_FUNC_START(copy_mc_fragile)
+ .L_done_memcpy_trap:
+       xorl %eax, %eax
+ .L_done:
+-      ret
++      RET
+ SYM_FUNC_END(copy_mc_fragile)
+ EXPORT_SYMBOL_GPL(copy_mc_fragile)
+@@ -142,7 +142,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_str
+       rep movsb
+       /* Copy successful. Return zero */
+       xorl %eax, %eax
+-      ret
++      RET
+ SYM_FUNC_END(copy_mc_enhanced_fast_string)
+       .section .fixup, "ax"
+@@ -155,7 +155,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_strin
+        * user-copy routines.
+        */
+       movq %rcx, %rax
+-      ret
++      RET
+       .previous
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page)
+       ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+       movl    $4096/8, %ecx
+       rep     movsq
+-      ret
++      RET
+ SYM_FUNC_END(copy_page)
+ EXPORT_SYMBOL(copy_page)
+@@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs)
+       movq    (%rsp), %rbx
+       movq    1*8(%rsp), %r12
+       addq    $2*8, %rsp
+-      ret
++      RET
+ SYM_FUNC_END(copy_page_regs)
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -105,7 +105,7 @@ SYM_FUNC_START(copy_user_generic_unrolle
+       jnz 21b
+ 23:   xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+       .section .fixup,"ax"
+ 30:   shll $6,%ecx
+@@ -173,7 +173,7 @@ SYM_FUNC_START(copy_user_generic_string)
+       movsb
+       xorl %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+       .section .fixup,"ax"
+ 11:   leal (%rdx,%rcx,8),%ecx
+@@ -207,7 +207,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_s
+       movsb
+       xorl %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+       .section .fixup,"ax"
+ 12:   movl %ecx,%edx          /* ecx is zerorest also */
+@@ -239,7 +239,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_
+ 1:    rep movsb
+ 2:    mov %ecx,%eax
+       ASM_CLAC
+-      ret
++      RET
+       /*
+        * Return zero to pretend that this copy succeeded. This
+@@ -250,7 +250,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_
+        */
+ 3:    xorl %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+       _ASM_EXTABLE_CPY(1b, 2b)
+ SYM_CODE_END(.Lcopy_user_handle_tail)
+@@ -361,7 +361,7 @@ SYM_FUNC_START(__copy_user_nocache)
+       xorl %eax,%eax
+       ASM_CLAC
+       sfence
+-      ret
++      RET
+       .section .fixup,"ax"
+ .L_fixup_4x8b_copy:
+--- a/arch/x86/lib/csum-copy_64.S
++++ b/arch/x86/lib/csum-copy_64.S
+@@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic
+       movq 3*8(%rsp), %r13
+       movq 4*8(%rsp), %r15
+       addq $5*8, %rsp
+-      ret
++      RET
+ .Lshort:
+       movl %ecx, %r10d
+       jmp  .L1
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1)
+ 1:    movzbl (%_ASM_AX),%edx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_1)
+ EXPORT_SYMBOL(__get_user_1)
+@@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2)
+ 2:    movzwl (%_ASM_AX),%edx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_2)
+ EXPORT_SYMBOL(__get_user_2)
+@@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4)
+ 3:    movl (%_ASM_AX),%edx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_4)
+ EXPORT_SYMBOL(__get_user_4)
+@@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8)
+ 4:    movq (%_ASM_AX),%rdx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ #else
+       LOAD_TASK_SIZE_MINUS_N(7)
+       cmp %_ASM_DX,%_ASM_AX
+@@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8)
+ 5:    movl 4(%_ASM_AX),%ecx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ #endif
+ SYM_FUNC_END(__get_user_8)
+ EXPORT_SYMBOL(__get_user_8)
+@@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1)
+ 6:    movzbl (%_ASM_AX),%edx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_nocheck_1)
+ EXPORT_SYMBOL(__get_user_nocheck_1)
+@@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2)
+ 7:    movzwl (%_ASM_AX),%edx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_nocheck_2)
+ EXPORT_SYMBOL(__get_user_nocheck_2)
+@@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4)
+ 8:    movl (%_ASM_AX),%edx
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_nocheck_4)
+ EXPORT_SYMBOL(__get_user_nocheck_4)
+@@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8)
+ #endif
+       xor %eax,%eax
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__get_user_nocheck_8)
+ EXPORT_SYMBOL(__get_user_nocheck_8)
+@@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac
+ bad_get_user:
+       xor %edx,%edx
+       mov $(-EFAULT),%_ASM_AX
+-      ret
++      RET
+ SYM_CODE_END(.Lbad_get_user_clac)
+ #ifdef CONFIG_X86_32
+@@ -179,7 +179,7 @@ bad_get_user_8:
+       xor %edx,%edx
+       xor %ecx,%ecx
+       mov $(-EFAULT),%_ASM_AX
+-      ret
++      RET
+ SYM_CODE_END(.Lbad_get_user_8_clac)
+ #endif
+--- a/arch/x86/lib/hweight.S
++++ b/arch/x86/lib/hweight.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32)
+       imull $0x01010101, %eax, %eax           # w_tmp *= 0x01010101
+       shrl $24, %eax                          # w = w_tmp >> 24
+       __ASM_SIZE(pop,) %__ASM_REG(dx)
+-      ret
++      RET
+ SYM_FUNC_END(__sw_hweight32)
+ EXPORT_SYMBOL(__sw_hweight32)
+@@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64)
+       popq    %rdx
+       popq    %rdi
+-      ret
++      RET
+ #else /* CONFIG_X86_32 */
+       /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+       pushl   %ecx
+@@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64)
+       addl    %ecx, %eax                      # result
+       popl    %ecx
+-      ret
++      RET
+ #endif
+ SYM_FUNC_END(__sw_hweight64)
+ EXPORT_SYMBOL(__sw_hweight64)
+--- a/arch/x86/lib/iomap_copy_64.S
++++ b/arch/x86/lib/iomap_copy_64.S
+@@ -11,5 +11,5 @@
+ SYM_FUNC_START(__iowrite32_copy)
+       movl %edx,%ecx
+       rep movsd
+-      ret
++      RET
+ SYM_FUNC_END(__iowrite32_copy)
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
+       rep movsq
+       movl %edx, %ecx
+       rep movsb
+-      ret
++      RET
+ SYM_FUNC_END(memcpy)
+ SYM_FUNC_END_ALIAS(__memcpy)
+ EXPORT_SYMBOL(memcpy)
+@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
+       movq %rdi, %rax
+       movq %rdx, %rcx
+       rep movsb
+-      ret
++      RET
+ SYM_FUNC_END(memcpy_erms)
+ SYM_FUNC_START_LOCAL(memcpy_orig)
+@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movq %r9,       1*8(%rdi)
+       movq %r10,      -2*8(%rdi, %rdx)
+       movq %r11,      -1*8(%rdi, %rdx)
+-      retq
++      RET
+       .p2align 4
+ .Lless_16bytes:
+       cmpl $8,        %edx
+@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movq -1*8(%rsi, %rdx),  %r9
+       movq %r8,       0*8(%rdi)
+       movq %r9,       -1*8(%rdi, %rdx)
+-      retq
++      RET
+       .p2align 4
+ .Lless_8bytes:
+       cmpl $4,        %edx
+@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movl -4(%rsi, %rdx), %r8d
+       movl %ecx, (%rdi)
+       movl %r8d, -4(%rdi, %rdx)
+-      retq
++      RET
+       .p2align 4
+ .Lless_3bytes:
+       subl $1, %edx
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+       movb %cl, (%rdi)
+ .Lend:
+-      retq
++      RET
+ SYM_FUNC_END(memcpy_orig)
+ .popsection
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+       /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+       ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+-      ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
++      ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS
+       /*
+        * movsq instruction have many startup latency
+@@ -205,7 +205,7 @@ SYM_FUNC_START(__memmove)
+       movb (%rsi), %r11b
+       movb %r11b, (%rdi)
+ 13:
+-      retq
++      RET
+ SYM_FUNC_END(__memmove)
+ SYM_FUNC_END_ALIAS(memmove)
+ EXPORT_SYMBOL(__memmove)
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
+       movl %edx,%ecx
+       rep stosb
+       movq %r9,%rax
+-      ret
++      RET
+ SYM_FUNC_END(__memset)
+ SYM_FUNC_END_ALIAS(memset)
+ EXPORT_SYMBOL(memset)
+@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
+       movq %rdx,%rcx
+       rep stosb
+       movq %r9,%rax
+-      ret
++      RET
+ SYM_FUNC_END(memset_erms)
+ SYM_FUNC_START_LOCAL(memset_orig)
+@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
+ .Lende:
+       movq    %r10,%rax
+-      ret
++      RET
+ .Lbad_alignment:
+       cmpq $7,%rdx
+--- a/arch/x86/lib/msr-reg.S
++++ b/arch/x86/lib/msr-reg.S
+@@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs)
+       movl    %edi, 28(%r10)
+       popq %r12
+       popq %rbx
+-      ret
++      RET
+ 3:
+       movl    $-EIO, %r11d
+       jmp     2b
+@@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs)
+       popl %esi
+       popl %ebp
+       popl %ebx
+-      ret
++      RET
+ 3:
+       movl    $-EIO, 4(%esp)
+       jmp     2b
+--- a/arch/x86/lib/putuser.S
++++ b/arch/x86/lib/putuser.S
+@@ -52,7 +52,7 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SY
+ 1:    movb %al,(%_ASM_CX)
+       xor %ecx,%ecx
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__put_user_1)
+ EXPORT_SYMBOL(__put_user_1)
+ EXPORT_SYMBOL(__put_user_nocheck_1)
+@@ -66,7 +66,7 @@ SYM_INNER_LABEL(__put_user_nocheck_2, SY
+ 2:    movw %ax,(%_ASM_CX)
+       xor %ecx,%ecx
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__put_user_2)
+ EXPORT_SYMBOL(__put_user_2)
+ EXPORT_SYMBOL(__put_user_nocheck_2)
+@@ -80,7 +80,7 @@ SYM_INNER_LABEL(__put_user_nocheck_4, SY
+ 3:    movl %eax,(%_ASM_CX)
+       xor %ecx,%ecx
+       ASM_CLAC
+-      ret
++      RET
+ SYM_FUNC_END(__put_user_4)
+ EXPORT_SYMBOL(__put_user_4)
+ EXPORT_SYMBOL(__put_user_nocheck_4)
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -23,7 +23,7 @@
+ .Ldo_rop_\@:
+       mov     %\reg, (%_ASM_SP)
+       UNWIND_HINT_FUNC
+-      ret
++      RET
+ .endm
+ .macro THUNK reg
+--- a/arch/x86/math-emu/div_Xsig.S
++++ b/arch/x86/math-emu/div_Xsig.S
+@@ -341,7 +341,7 @@ L_exit:
+       popl    %esi
+       leave
+-      ret
++      RET
+ #ifdef PARANOID
+--- a/arch/x86/math-emu/div_small.S
++++ b/arch/x86/math-emu/div_small.S
+@@ -44,5 +44,5 @@ SYM_FUNC_START(FPU_div_small)
+       popl    %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(FPU_div_small)
+--- a/arch/x86/math-emu/mul_Xsig.S
++++ b/arch/x86/math-emu/mul_Xsig.S
+@@ -62,7 +62,7 @@ SYM_FUNC_START(mul32_Xsig)
+       popl %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(mul32_Xsig)
+@@ -115,7 +115,7 @@ SYM_FUNC_START(mul64_Xsig)
+       popl %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(mul64_Xsig)
+@@ -175,5 +175,5 @@ SYM_FUNC_START(mul_Xsig_Xsig)
+       popl %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(mul_Xsig_Xsig)
+--- a/arch/x86/math-emu/polynom_Xsig.S
++++ b/arch/x86/math-emu/polynom_Xsig.S
+@@ -133,5 +133,5 @@ L_accum_done:
+       popl    %edi
+       popl    %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(polynomial_Xsig)
+--- a/arch/x86/math-emu/reg_norm.S
++++ b/arch/x86/math-emu/reg_norm.S
+@@ -72,7 +72,7 @@ L_exit_valid:
+ L_exit:
+       popl    %ebx
+       leave
+-      ret
++      RET
+ L_zero:
+@@ -138,7 +138,7 @@ L_exit_nuo_valid:
+       popl    %ebx
+       leave
+-      ret
++      RET
+ L_exit_nuo_zero:
+       movl    TAG_Zero,%eax
+@@ -146,5 +146,5 @@ L_exit_nuo_zero:
+       popl    %ebx
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(FPU_normalize_nuo)
+--- a/arch/x86/math-emu/reg_round.S
++++ b/arch/x86/math-emu/reg_round.S
+@@ -437,7 +437,7 @@ fpu_Arith_exit:
+       popl    %edi
+       popl    %esi
+       leave
+-      ret
++      RET
+ /*
+--- a/arch/x86/math-emu/reg_u_add.S
++++ b/arch/x86/math-emu/reg_u_add.S
+@@ -164,6 +164,6 @@ L_exit:
+       popl    %edi
+       popl    %esi
+       leave
+-      ret
++      RET
+ #endif /* PARANOID */
+ SYM_FUNC_END(FPU_u_add)
+--- a/arch/x86/math-emu/reg_u_div.S
++++ b/arch/x86/math-emu/reg_u_div.S
+@@ -468,7 +468,7 @@ L_exit:
+       popl    %esi
+       leave
+-      ret
++      RET
+ #endif /* PARANOID */ 
+ SYM_FUNC_END(FPU_u_div)
+--- a/arch/x86/math-emu/reg_u_mul.S
++++ b/arch/x86/math-emu/reg_u_mul.S
+@@ -144,7 +144,7 @@ L_exit:
+       popl    %edi
+       popl    %esi
+       leave
+-      ret
++      RET
+ #endif /* PARANOID */ 
+ SYM_FUNC_END(FPU_u_mul)
+--- a/arch/x86/math-emu/reg_u_sub.S
++++ b/arch/x86/math-emu/reg_u_sub.S
+@@ -270,5 +270,5 @@ L_exit:
+       popl    %edi
+       popl    %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(FPU_u_sub)
+--- a/arch/x86/math-emu/round_Xsig.S
++++ b/arch/x86/math-emu/round_Xsig.S
+@@ -78,7 +78,7 @@ L_exit:
+       popl    %esi
+       popl    %ebx
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(round_Xsig)
+@@ -138,5 +138,5 @@ L_n_exit:
+       popl    %esi
+       popl    %ebx
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(norm_Xsig)
+--- a/arch/x86/math-emu/shr_Xsig.S
++++ b/arch/x86/math-emu/shr_Xsig.S
+@@ -45,7 +45,7 @@ SYM_FUNC_START(shr_Xsig)
+       popl    %ebx
+       popl    %esi
+       leave
+-      ret
++      RET
+ L_more_than_31:
+       cmpl    $64,%ecx
+@@ -61,7 +61,7 @@ L_more_than_31:
+       movl    $0,8(%esi)
+       popl    %esi
+       leave
+-      ret
++      RET
+ L_more_than_63:
+       cmpl    $96,%ecx
+@@ -76,7 +76,7 @@ L_more_than_63:
+       movl    %edx,8(%esi)
+       popl    %esi
+       leave
+-      ret
++      RET
+ L_more_than_95:
+       xorl    %eax,%eax
+@@ -85,5 +85,5 @@ L_more_than_95:
+       movl    %eax,8(%esi)
+       popl    %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(shr_Xsig)
+--- a/arch/x86/math-emu/wm_shrx.S
++++ b/arch/x86/math-emu/wm_shrx.S
+@@ -55,7 +55,7 @@ SYM_FUNC_START(FPU_shrx)
+       popl    %ebx
+       popl    %esi
+       leave
+-      ret
++      RET
+ L_more_than_31:
+       cmpl    $64,%ecx
+@@ -70,7 +70,7 @@ L_more_than_31:
+       movl    $0,4(%esi)
+       popl    %esi
+       leave
+-      ret
++      RET
+ L_more_than_63:
+       cmpl    $96,%ecx
+@@ -84,7 +84,7 @@ L_more_than_63:
+       movl    %edx,4(%esi)
+       popl    %esi
+       leave
+-      ret
++      RET
+ L_more_than_95:
+       xorl    %eax,%eax
+@@ -92,7 +92,7 @@ L_more_than_95:
+       movl    %eax,4(%esi)
+       popl    %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(FPU_shrx)
+@@ -146,7 +146,7 @@ SYM_FUNC_START(FPU_shrxs)
+       popl    %ebx
+       popl    %esi
+       leave
+-      ret
++      RET
+ /* Shift by [0..31] bits */
+ Ls_less_than_32:
+@@ -163,7 +163,7 @@ Ls_less_than_32:
+       popl    %ebx
+       popl    %esi
+       leave
+-      ret
++      RET
+ /* Shift by [64..95] bits */
+ Ls_more_than_63:
+@@ -189,7 +189,7 @@ Ls_more_than_63:
+       popl    %ebx
+       popl    %esi
+       leave
+-      ret
++      RET
+ Ls_more_than_95:
+ /* Shift by [96..inf) bits */
+@@ -203,5 +203,5 @@ Ls_more_than_95:
+       popl    %ebx
+       popl    %esi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(FPU_shrxs)
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -65,7 +65,7 @@ SYM_FUNC_START(sme_encrypt_execute)
+       movq    %rbp, %rsp              /* Restore original stack pointer */
+       pop     %rbp
+-      ret
++      RET
+ SYM_FUNC_END(sme_encrypt_execute)
+ SYM_FUNC_START(__enc_copy)
+@@ -151,6 +151,6 @@ SYM_FUNC_START(__enc_copy)
+       pop     %r12
+       pop     %r15
+-      ret
++      RET
+ .L__enc_copy_end:
+ SYM_FUNC_END(__enc_copy)
+--- a/arch/x86/platform/efi/efi_stub_32.S
++++ b/arch/x86/platform/efi/efi_stub_32.S
+@@ -56,5 +56,5 @@ SYM_FUNC_START(efi_call_svam)
+       movl    16(%esp), %ebx
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(efi_call_svam)
+--- a/arch/x86/platform/efi/efi_stub_64.S
++++ b/arch/x86/platform/efi/efi_stub_64.S
+@@ -23,5 +23,5 @@ SYM_FUNC_START(__efi_call)
+       mov %rsi, %rcx
+       CALL_NOSPEC rdi
+       leave
+-      ret
++      RET
+ SYM_FUNC_END(__efi_call)
+--- a/arch/x86/platform/efi/efi_thunk_64.S
++++ b/arch/x86/platform/efi/efi_thunk_64.S
+@@ -63,7 +63,7 @@ SYM_CODE_START(__efi64_thunk)
+ 1:    movq    24(%rsp), %rsp
+       pop     %rbx
+       pop     %rbp
+-      retq
++      RET
+       .code32
+ 2:    pushl   $__KERNEL_CS
+--- a/arch/x86/platform/olpc/xo1-wakeup.S
++++ b/arch/x86/platform/olpc/xo1-wakeup.S
+@@ -77,7 +77,7 @@ save_registers:
+       pushfl
+       popl saved_context_eflags
+-      ret
++      RET
+ restore_registers:
+       movl saved_context_ebp, %ebp
+@@ -88,7 +88,7 @@ restore_registers:
+       pushl saved_context_eflags
+       popfl
+-      ret
++      RET
+ SYM_CODE_START(do_olpc_suspend_lowlevel)
+       call    save_processor_state
+@@ -109,7 +109,7 @@ ret_point:
+       call    restore_registers
+       call    restore_processor_state
+-      ret
++      RET
+ SYM_CODE_END(do_olpc_suspend_lowlevel)
+ .data
+--- a/arch/x86/power/hibernate_asm_32.S
++++ b/arch/x86/power/hibernate_asm_32.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
+       FRAME_BEGIN
+       call swsusp_save
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(swsusp_arch_suspend)
+ SYM_CODE_START(restore_image)
+@@ -108,5 +108,5 @@ SYM_FUNC_START(restore_registers)
+       /* tell the hibernation core that we've just restored the memory */
+       movl    %eax, in_suspend
+-      ret
++      RET
+ SYM_FUNC_END(restore_registers)
+--- a/arch/x86/power/hibernate_asm_64.S
++++ b/arch/x86/power/hibernate_asm_64.S
+@@ -49,7 +49,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
+       FRAME_BEGIN
+       call swsusp_save
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(swsusp_arch_suspend)
+ SYM_CODE_START(restore_image)
+@@ -143,5 +143,5 @@ SYM_FUNC_START(restore_registers)
+       /* tell the hibernation core that we've just restored the memory */
+       movq    %rax, in_suspend(%rip)
+-      ret
++      RET
+ SYM_FUNC_END(restore_registers)
+--- a/arch/x86/um/checksum_32.S
++++ b/arch/x86/um/checksum_32.S
+@@ -110,7 +110,7 @@ csum_partial:
+ 7:    
+       popl %ebx
+       popl %esi
+-      ret
++      RET
+ #else
+@@ -208,7 +208,7 @@ csum_partial:
+ 80: 
+       popl %ebx
+       popl %esi
+-      ret
++      RET
+                               
+ #endif
+       EXPORT_SYMBOL(csum_partial)
+--- a/arch/x86/um/setjmp_32.S
++++ b/arch/x86/um/setjmp_32.S
+@@ -34,7 +34,7 @@ kernel_setjmp:
+       movl %esi,12(%edx)
+       movl %edi,16(%edx)
+       movl %ecx,20(%edx)              # Return address
+-      ret
++      RET
+       .size kernel_setjmp,.-kernel_setjmp
+--- a/arch/x86/um/setjmp_64.S
++++ b/arch/x86/um/setjmp_64.S
+@@ -33,7 +33,7 @@ kernel_setjmp:
+       movq %r14,40(%rdi)
+       movq %r15,48(%rdi)
+       movq %rsi,56(%rdi)              # Return address
+-      ret
++      RET
+       .size kernel_setjmp,.-kernel_setjmp
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -45,7 +45,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
+       call check_events
+ 1:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(xen_irq_enable_direct)
+@@ -55,7 +55,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
+  */
+ SYM_FUNC_START(xen_irq_disable_direct)
+       movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+-      ret
++      RET
+ SYM_FUNC_END(xen_irq_disable_direct)
+ /*
+@@ -71,7 +71,7 @@ SYM_FUNC_START(xen_save_fl_direct)
+       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+       setz %ah
+       addb %ah, %ah
+-      ret
++      RET
+ SYM_FUNC_END(xen_save_fl_direct)
+@@ -98,7 +98,7 @@ SYM_FUNC_START(xen_restore_fl_direct)
+       call check_events
+ 1:
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(xen_restore_fl_direct)
+@@ -128,7 +128,7 @@ SYM_FUNC_START(check_events)
+       pop %rcx
+       pop %rax
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(check_events)
+ SYM_FUNC_START(xen_read_cr2)
+@@ -136,14 +136,14 @@ SYM_FUNC_START(xen_read_cr2)
+       _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+       _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(xen_read_cr2);
+ SYM_FUNC_START(xen_read_cr2_direct)
+       FRAME_BEGIN
+       _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+       FRAME_END
+-      ret
++      RET
+ SYM_FUNC_END(xen_read_cr2_direct);
+ .macro xen_pv_trap name
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -70,7 +70,7 @@ SYM_CODE_START(hypercall_page)
+       .rept (PAGE_SIZE / 32)
+               UNWIND_HINT_FUNC
+               .skip 31, 0x90
+-              ret
++              RET
+       .endr
+ #define HYPERCALL(n) \
diff --git a/queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch b/queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch
new file mode 100644 (file)
index 0000000..86245a8
--- /dev/null
@@ -0,0 +1,191 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:41 +0100
+Subject: x86: Prepare inline-asm for straight-line-speculation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b17c2baa305cccbd16bafa289fd743cc2db77966 upstream.
+
+Replace all ret/retq instructions with ASM_RET in preparation of
+making it more than a single instruction.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134907.964635458@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/linkage.h            |    4 ++++
+ arch/x86/include/asm/paravirt.h           |    2 +-
+ arch/x86/include/asm/qspinlock_paravirt.h |    4 ++--
+ arch/x86/kernel/alternative.c             |    2 +-
+ arch/x86/kernel/kprobes/core.c            |    2 +-
+ arch/x86/kernel/paravirt.c                |    2 +-
+ arch/x86/kvm/emulate.c                    |    4 ++--
+ arch/x86/lib/error-inject.c               |    3 ++-
+ samples/ftrace/ftrace-direct-modify.c     |    4 ++--
+ samples/ftrace/ftrace-direct-too.c        |    2 +-
+ samples/ftrace/ftrace-direct.c            |    2 +-
+ 11 files changed, 18 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,6 +18,10 @@
+ #define __ALIGN_STR   __stringify(__ALIGN)
+ #endif
++#else /* __ASSEMBLY__ */
++
++#define ASM_RET       "ret\n\t"
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_LINKAGE_H */
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -630,7 +630,7 @@ bool __raw_callee_save___native_vcpu_is_
+           "call " #func ";"                                           \
+           PV_RESTORE_ALL_CALLER_REGS                                  \
+           FRAME_END                                                   \
+-          "ret;"                                                      \
++          ASM_RET                                                     \
+           ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
+           ".popsection")
+--- a/arch/x86/include/asm/qspinlock_paravirt.h
++++ b/arch/x86/include/asm/qspinlock_paravirt.h
+@@ -48,7 +48,7 @@ asm    (".pushsection .text;"
+       "jne   .slowpath;"
+       "pop   %rdx;"
+       FRAME_END
+-      "ret;"
++      ASM_RET
+       ".slowpath: "
+       "push   %rsi;"
+       "movzbl %al,%esi;"
+@@ -56,7 +56,7 @@ asm    (".pushsection .text;"
+       "pop    %rsi;"
+       "pop    %rdx;"
+       FRAME_END
+-      "ret;"
++      ASM_RET
+       ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
+       ".popsection");
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -869,7 +869,7 @@ asm (
+ "     .type           int3_magic, @function\n"
+ "int3_magic:\n"
+ "     movl    $1, (%" _ASM_ARG1 ")\n"
+-"     ret\n"
++      ASM_RET
+ "     .size           int3_magic, .-int3_magic\n"
+ "     .popsection\n"
+ );
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -768,7 +768,7 @@ asm(
+       RESTORE_REGS_STRING
+       "       popfl\n"
+ #endif
+-      "       ret\n"
++      ASM_RET
+       ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
+ );
+ NOKPROBE_SYMBOL(kretprobe_trampoline);
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -40,7 +40,7 @@ extern void _paravirt_nop(void);
+ asm (".pushsection .entry.text, \"ax\"\n"
+      ".global _paravirt_nop\n"
+      "_paravirt_nop:\n\t"
+-     "ret\n\t"
++     ASM_RET
+      ".size _paravirt_nop, . - _paravirt_nop\n\t"
+      ".type _paravirt_nop, @function\n\t"
+      ".popsection");
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -316,7 +316,7 @@ static int fastop(struct x86_emulate_ctx
+       __FOP_FUNC(#name)
+ #define __FOP_RET(name) \
+-      "ret \n\t" \
++      ASM_RET \
+       ".size " name ", .-" name "\n\t"
+ #define FOP_RET(name) \
+@@ -437,7 +437,7 @@ static int fastop(struct x86_emulate_ctx
+ asm(".pushsection .fixup, \"ax\"\n"
+     ".global kvm_fastop_exception \n"
+-    "kvm_fastop_exception: xor %esi, %esi; ret\n"
++    "kvm_fastop_exception: xor %esi, %esi; " ASM_RET
+     ".popsection");
+ FOP_START(setcc)
+--- a/arch/x86/lib/error-inject.c
++++ b/arch/x86/lib/error-inject.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
++#include <linux/linkage.h>
+ #include <linux/error-injection.h>
+ #include <linux/kprobes.h>
+@@ -10,7 +11,7 @@ asm(
+       ".type just_return_func, @function\n"
+       ".globl just_return_func\n"
+       "just_return_func:\n"
+-      "       ret\n"
++              ASM_RET
+       ".size just_return_func, .-just_return_func\n"
+ );
+--- a/samples/ftrace/ftrace-direct-modify.c
++++ b/samples/ftrace/ftrace-direct-modify.c
+@@ -31,7 +31,7 @@ asm (
+ "     call my_direct_func1\n"
+ "     leave\n"
+ "     .size           my_tramp1, .-my_tramp1\n"
+-"     ret\n"
++      ASM_RET
+ "     .type           my_tramp2, @function\n"
+ "     .globl          my_tramp2\n"
+ "   my_tramp2:"
+@@ -39,7 +39,7 @@ asm (
+ "     movq %rsp, %rbp\n"
+ "     call my_direct_func2\n"
+ "     leave\n"
+-"     ret\n"
++      ASM_RET
+ "     .size           my_tramp2, .-my_tramp2\n"
+ "     .popsection\n"
+ );
+--- a/samples/ftrace/ftrace-direct-too.c
++++ b/samples/ftrace/ftrace-direct-too.c
+@@ -31,7 +31,7 @@ asm (
+ "     popq %rsi\n"
+ "     popq %rdi\n"
+ "     leave\n"
+-"     ret\n"
++      ASM_RET
+ "     .size           my_tramp, .-my_tramp\n"
+ "     .popsection\n"
+ );
+--- a/samples/ftrace/ftrace-direct.c
++++ b/samples/ftrace/ftrace-direct.c
+@@ -24,7 +24,7 @@ asm (
+ "     call my_direct_func\n"
+ "     popq %rdi\n"
+ "     leave\n"
+-"     ret\n"
++      ASM_RET
+ "     .size           my_tramp, .-my_tramp\n"
+ "     .popsection\n"
+ );
diff --git a/queue-5.10/x86-realmode-build-with-d__disable_exports.patch b/queue-5.10/x86-realmode-build-with-d__disable_exports.patch
new file mode 100644 (file)
index 0000000..a62e4dd
--- /dev/null
@@ -0,0 +1,29 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 1 Jul 2022 11:21:20 -0300
+Subject: x86/realmode: build with -D__DISABLE_EXPORTS
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+Commit 156ff4a544ae ("x86/ibt: Base IBT bits") added this option when
+building realmode in order to disable IBT there. This is also needed in
+order to disable return thunks.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -31,7 +31,7 @@ endif
+ CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
+ M16_CFLAGS     := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
+-REALMODE_CFLAGS       := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
++REALMODE_CFLAGS       := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \
+                  -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
+                  -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+                  -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
diff --git a/queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch b/queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch
new file mode 100644 (file)
index 0000000..3023941
--- /dev/null
@@ -0,0 +1,594 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 27 Jun 2022 22:21:17 +0000
+Subject: x86/retbleed: Add fine grained Kconfig knobs
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream.
+
+Do fine-grained Kconfig for all the various retbleed parts.
+
+NOTE: if your compiler doesn't support return thunks this will
+silently 'upgrade' your mitigation to IBPB, you might not like this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: there is no CONFIG_OBJTOOL]
+[cascardo: objtool calling and option parsing has changed]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - In scripts/Makefile.build, add the objtool option with an ifdef
+   block, same as for other options
+ - Adjust filename, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile                                 |    8 +-
+ arch/x86/Kconfig                         |  106 +++++++++++++++++++++++--------
+ arch/x86/entry/calling.h                 |    4 +
+ arch/x86/include/asm/disabled-features.h |   18 ++++-
+ arch/x86/include/asm/linkage.h           |    4 -
+ arch/x86/include/asm/nospec-branch.h     |   10 ++
+ arch/x86/include/asm/static_call.h       |    2 
+ arch/x86/kernel/alternative.c            |    5 +
+ arch/x86/kernel/cpu/amd.c                |    2 
+ arch/x86/kernel/cpu/bugs.c               |   42 +++++++-----
+ arch/x86/kernel/static_call.c            |    2 
+ arch/x86/kvm/emulate.c                   |    4 -
+ arch/x86/lib/retpoline.S                 |    4 +
+ scripts/Makefile.build                   |    3 
+ scripts/link-vmlinux.sh                  |    2 
+ security/Kconfig                         |   11 ---
+ tools/objtool/builtin-check.c            |    3 
+ tools/objtool/builtin.h                  |    2 
+ tools/objtool/check.c                    |    9 ++
+ 19 files changed, 172 insertions(+), 69 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -672,14 +672,18 @@ endif
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS      := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+-RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS      := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
+-RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ endif
++
++ifdef CONFIG_RETHUNK
++RETHUNK_CFLAGS         := -mfunction-return=thunk-extern
++RETPOLINE_CFLAGS       += $(RETHUNK_CFLAGS)
++endif
++
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -453,30 +453,6 @@ config GOLDFISH
+       def_bool y
+       depends on X86_GOLDFISH
+-config RETPOLINE
+-      bool "Avoid speculative indirect branches in kernel"
+-      default y
+-      help
+-        Compile kernel with the retpoline compiler options to guard against
+-        kernel-to-user data leaks by avoiding speculative indirect
+-        branches. Requires a compiler with -mindirect-branch=thunk-extern
+-        support for full protection. The kernel may run slower.
+-
+-config CC_HAS_SLS
+-      def_bool $(cc-option,-mharden-sls=all)
+-
+-config CC_HAS_RETURN_THUNK
+-      def_bool $(cc-option,-mfunction-return=thunk-extern)
+-
+-config SLS
+-      bool "Mitigate Straight-Line-Speculation"
+-      depends on CC_HAS_SLS && X86_64
+-      default n
+-      help
+-        Compile the kernel with straight-line-speculation options to guard
+-        against straight line speculation. The kernel image might be slightly
+-        larger.
+-
+ config X86_CPU_RESCTRL
+       bool "x86 CPU resource control support"
+       depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
+@@ -2430,6 +2406,88 @@ source "kernel/livepatch/Kconfig"
+ endmenu
++config CC_HAS_SLS
++      def_bool $(cc-option,-mharden-sls=all)
++
++config CC_HAS_RETURN_THUNK
++      def_bool $(cc-option,-mfunction-return=thunk-extern)
++
++menuconfig SPECULATION_MITIGATIONS
++      bool "Mitigations for speculative execution vulnerabilities"
++      default y
++      help
++        Say Y here to enable options which enable mitigations for
++        speculative execution hardware vulnerabilities.
++
++        If you say N, all mitigations will be disabled. You really
++        should know what you are doing to say so.
++
++if SPECULATION_MITIGATIONS
++
++config PAGE_TABLE_ISOLATION
++      bool "Remove the kernel mapping in user mode"
++      default y
++      depends on (X86_64 || X86_PAE)
++      help
++        This feature reduces the number of hardware side channels by
++        ensuring that the majority of kernel addresses are not mapped
++        into userspace.
++
++        See Documentation/x86/pti.rst for more details.
++
++config RETPOLINE
++      bool "Avoid speculative indirect branches in kernel"
++      default y
++      help
++        Compile kernel with the retpoline compiler options to guard against
++        kernel-to-user data leaks by avoiding speculative indirect
++        branches. Requires a compiler with -mindirect-branch=thunk-extern
++        support for full protection. The kernel may run slower.
++
++config RETHUNK
++      bool "Enable return-thunks"
++      depends on RETPOLINE && CC_HAS_RETURN_THUNK
++      default y
++      help
++        Compile the kernel with the return-thunks compiler option to guard
++        against kernel-to-user data leaks by avoiding return speculation.
++        Requires a compiler with -mfunction-return=thunk-extern
++        support for full protection. The kernel may run slower.
++
++config CPU_UNRET_ENTRY
++      bool "Enable UNRET on kernel entry"
++      depends on CPU_SUP_AMD && RETHUNK
++      default y
++      help
++        Compile the kernel with support for the retbleed=unret mitigation.
++
++config CPU_IBPB_ENTRY
++      bool "Enable IBPB on kernel entry"
++      depends on CPU_SUP_AMD
++      default y
++      help
++        Compile the kernel with support for the retbleed=ibpb mitigation.
++
++config CPU_IBRS_ENTRY
++      bool "Enable IBRS on kernel entry"
++      depends on CPU_SUP_INTEL
++      default y
++      help
++        Compile the kernel with support for the spectre_v2=ibrs mitigation.
++        This mitigates both spectre_v2 and retbleed at great cost to
++        performance.
++
++config SLS
++      bool "Mitigate Straight-Line-Speculation"
++      depends on CC_HAS_SLS && X86_64
++      default n
++      help
++        Compile the kernel with straight-line-speculation options to guard
++        against straight line speculation. The kernel image might be slightly
++        larger.
++
++endif
++
+ config ARCH_HAS_ADD_PAGES
+       def_bool y
+       depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -323,6 +323,7 @@ For 32-bit we have the following convent
+  * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+  */
+ .macro IBRS_ENTER save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+       movl    $MSR_IA32_SPEC_CTRL, %ecx
+@@ -343,6 +344,7 @@ For 32-bit we have the following convent
+       shr     $32, %rdx
+       wrmsr
+ .Lend_\@:
++#endif
+ .endm
+ /*
+@@ -350,6 +352,7 @@ For 32-bit we have the following convent
+  * regs. Must be called after the last RET.
+  */
+ .macro IBRS_EXIT save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+       movl    $MSR_IA32_SPEC_CTRL, %ecx
+@@ -364,6 +367,7 @@ For 32-bit we have the following convent
+       shr     $32, %rdx
+       wrmsr
+ .Lend_\@:
++#endif
+ .endm
+ /*
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -60,9 +60,19 @@
+ # define DISABLE_RETPOLINE    0
+ #else
+ # define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+-                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
+-                               (1 << (X86_FEATURE_RETHUNK & 31)) | \
+-                               (1 << (X86_FEATURE_UNRET & 31)))
++                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
++#ifdef CONFIG_RETHUNK
++# define DISABLE_RETHUNK      0
++#else
++# define DISABLE_RETHUNK      (1 << (X86_FEATURE_RETHUNK & 31))
++#endif
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
++# define DISABLE_UNRET                0
++#else
++# define DISABLE_UNRET                (1 << (X86_FEATURE_UNRET & 31))
+ #endif
+ /* Force disable because it's broken beyond repair */
+@@ -82,7 +92,7 @@
+ #define DISABLED_MASK8        0
+ #define DISABLED_MASK9        (DISABLE_SMAP)
+ #define DISABLED_MASK10       0
+-#define DISABLED_MASK11       (DISABLE_RETPOLINE)
++#define DISABLED_MASK11       (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+ #define DISABLED_MASK12       0
+ #define DISABLED_MASK13       0
+ #define DISABLED_MASK14       0
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,7 +18,7 @@
+ #define __ALIGN_STR   __stringify(__ALIGN)
+ #endif
+-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define RET   jmp __x86_return_thunk
+ #else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+@@ -30,7 +30,7 @@
+ #else /* __ASSEMBLY__ */
+-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define ASM_RET       "jmp __x86_return_thunk\n\t"
+ #else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -127,6 +127,12 @@
+ .Lskip_rsb_\@:
+ .endm
++#ifdef CONFIG_CPU_UNRET_ENTRY
++#define CALL_ZEN_UNTRAIN_RET  "call zen_untrain_ret"
++#else
++#define CALL_ZEN_UNTRAIN_RET  ""
++#endif
++
+ /*
+  * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+  * return thunk isn't mapped into the userspace tables (then again, AMD
+@@ -139,10 +145,10 @@
+  * where we have a stack but before any RET instruction.
+  */
+ .macro UNTRAIN_RET
+-#ifdef CONFIG_RETPOLINE
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+       ANNOTATE_UNRET_END
+       ALTERNATIVE_2 "",                                               \
+-                    "call zen_untrain_ret", X86_FEATURE_UNRET,        \
++                    CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
+                     "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -44,7 +44,7 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)                     \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+-#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_RETHUNK
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+ #else
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -662,6 +662,7 @@ void __init_or_module noinline apply_ret
+       }
+ }
++#ifdef CONFIG_RETHUNK
+ /*
+  * Rewrite the compiler generated return thunk tail-calls.
+  *
+@@ -723,6 +724,10 @@ void __init_or_module noinline apply_ret
+               }
+       }
+ }
++#else
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
++#endif /* CONFIG_RETHUNK */
++
+ #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -916,6 +916,7 @@ static void init_amd_bd(struct cpuinfo_x
+ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ {
++#ifdef CONFIG_CPU_UNRET_ENTRY
+       u64 value;
+       /*
+@@ -932,6 +933,7 @@ void init_spectral_chicken(struct cpuinf
+                       wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+               }
+       }
++#endif
+ }
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -798,7 +798,6 @@ static int __init retbleed_parse_cmdline
+ early_param("retbleed", retbleed_parse_cmdline);
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n"
+ #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+ static void __init retbleed_select_mitigation(void)
+@@ -813,18 +812,33 @@ static void __init retbleed_select_mitig
+               return;
+       case RETBLEED_CMD_UNRET:
+-              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
++                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++              } else {
++                      pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
++                      goto do_cmd_auto;
++              }
+               break;
+       case RETBLEED_CMD_IBPB:
+-              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++                      retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              } else {
++                      pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
++                      goto do_cmd_auto;
++              }
+               break;
++do_cmd_auto:
+       case RETBLEED_CMD_AUTO:
+       default:
+               if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+-                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+-                      retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                  boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++                      if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
++                              retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++                      else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY))
++                              retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++              }
+               /*
+                * The Intel mitigation (IBRS or eIBRS) was already selected in
+@@ -837,14 +851,6 @@ static void __init retbleed_select_mitig
+       switch (retbleed_mitigation) {
+       case RETBLEED_MITIGATION_UNRET:
+-
+-              if (!IS_ENABLED(CONFIG_RETPOLINE) ||
+-                  !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
+-                      pr_err(RETBLEED_COMPILER_MSG);
+-                      retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+-                      goto retbleed_force_ibpb;
+-              }
+-
+               setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+               setup_force_cpu_cap(X86_FEATURE_UNRET);
+@@ -856,7 +862,6 @@ static void __init retbleed_select_mitig
+               break;
+       case RETBLEED_MITIGATION_IBPB:
+-retbleed_force_ibpb:
+               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+               mitigate_smt = true;
+               break;
+@@ -1227,6 +1232,12 @@ static enum spectre_v2_mitigation_cmd __
+               return SPECTRE_V2_CMD_AUTO;
+       }
++      if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
++              pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
+       if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+               pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+                      mitigation_options[i].option);
+@@ -1284,7 +1295,8 @@ static void __init spectre_v2_select_mit
+                       break;
+               }
+-              if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++              if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
++                  boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+                   retbleed_cmd != RETBLEED_CMD_OFF &&
+                   boot_cpu_has(X86_FEATURE_IBRS) &&
+                   boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -108,7 +108,7 @@ void arch_static_call_transform(void *si
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
+-#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_RETHUNK
+ /*
+  * This is called by apply_returns() to fix up static call trampolines,
+  * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -435,10 +435,10 @@ static int fastop(struct x86_emulate_ctx
+  * Depending on .config the SETcc functions look like:
+  *
+  * SETcc %al                  [3 bytes]
+- * RET | JMP __x86_return_thunk       [1,5 bytes; CONFIG_RETPOLINE]
++ * RET | JMP __x86_return_thunk       [1,5 bytes; CONFIG_RETHUNK]
+  * INT3                               [1 byte; CONFIG_SLS]
+  */
+-#define RET_LENGTH    (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
++#define RET_LENGTH    (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \
+                        IS_ENABLED(CONFIG_SLS))
+ #define SETCC_LENGTH  (3 + RET_LENGTH)
+ #define SETCC_ALIGN   (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -71,6 +71,8 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+  * This function name is magical and is used by -mfunction-return=thunk-extern
+  * for the compiler to generate JMPs to it.
+  */
++#ifdef CONFIG_RETHUNK
++
+       .section .text.__x86.return_thunk
+ /*
+@@ -135,3 +137,5 @@ SYM_FUNC_END(zen_untrain_ret)
+ __EXPORT_THUNK(zen_untrain_ret)
+ EXPORT_SYMBOL(__x86_return_thunk)
++
++#endif /* CONFIG_RETHUNK */
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -227,6 +227,9 @@ endif
+ ifdef CONFIG_RETPOLINE
+   objtool_args += --retpoline
+ endif
++ifdef CONFIG_RETHUNK
++  objtool_args += --rethunk
++endif
+ ifdef CONFIG_X86_SMAP
+   objtool_args += --uaccess
+ endif
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -65,7 +65,7 @@ objtool_link()
+       if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+               objtoolopt="check"
+-              if [ -n "${CONFIG_RETPOLINE}" ]; then
++              if [ -n "${CONFIG_CPU_UNRET_ENTRY}" ]; then
+                       objtoolopt="${objtoolopt} --unret"
+               fi
+               if [ -z "${CONFIG_FRAME_POINTER}" ]; then
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -54,17 +54,6 @@ config SECURITY_NETWORK
+         implement socket and networking access controls.
+         If you are unsure how to answer this question, answer N.
+-config PAGE_TABLE_ISOLATION
+-      bool "Remove the kernel mapping in user mode"
+-      default y
+-      depends on (X86_64 || X86_PAE) && !UML
+-      help
+-        This feature reduces the number of hardware side channels by
+-        ensuring that the majority of kernel addresses are not mapped
+-        into userspace.
+-
+-        See Documentation/x86/pti.rst for more details.
+-
+ config SECURITY_INFINIBAND
+       bool "Infiniband Security Hooks"
+       depends on SECURITY && INFINIBAND
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -19,7 +19,7 @@
+ #include "objtool.h"
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+-     validate_dup, vmlinux, sls, unret;
++     validate_dup, vmlinux, sls, unret, rethunk;
+ static const char * const check_usage[] = {
+       "objtool check [<options>] file.o",
+@@ -30,6 +30,7 @@ const struct option check_options[] = {
+       OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+       OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++      OPT_BOOLEAN(0,   "rethunk", &rethunk, "validate and annotate rethunk usage"),
+       OPT_BOOLEAN(0,   "unret", &unret, "validate entry unret placement"),
+       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+       OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -9,7 +9,7 @@
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+-            validate_dup, vmlinux, sls, unret;
++            validate_dup, vmlinux, sls, unret, rethunk;
+ extern int cmd_check(int argc, const char **argv);
+ extern int cmd_orc(int argc, const char **argv);
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -3262,8 +3262,11 @@ static int validate_retpoline(struct obj
+                       continue;
+               if (insn->type == INSN_RETURN) {
+-                      WARN_FUNC("'naked' return found in RETPOLINE build",
+-                                insn->sec, insn->offset);
++                      if (rethunk) {
++                              WARN_FUNC("'naked' return found in RETHUNK build",
++                                        insn->sec, insn->offset);
++                      } else
++                              continue;
+               } else {
+                       WARN_FUNC("indirect %s found in RETPOLINE build",
+                                 insn->sec, insn->offset,
+@@ -3533,7 +3536,9 @@ int check(struct objtool_file *file)
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
++      }
++      if (rethunk) {
+               ret = create_return_sites_sections(file);
+               if (ret < 0)
+                       goto out;
diff --git a/queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch b/queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch
new file mode 100644 (file)
index 0000000..fb6f6b6
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:34 +0200
+Subject: x86/retpoline: Cleanup some #ifdefery
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream.
+
+On it's own not much of a cleanup but it prepares for more/similar
+code.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict fixup because of DISABLE_ENQCMD]
+[cascardo: no changes at nospec-branch.h and bpf_jit_comp.c]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/disabled-features.h |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -56,6 +56,13 @@
+ # define DISABLE_PTI          (1 << (X86_FEATURE_PTI & 31))
+ #endif
++#ifdef CONFIG_RETPOLINE
++# define DISABLE_RETPOLINE    0
++#else
++# define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
++                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
+ /* Force disable because it's broken beyond repair */
+ #define DISABLE_ENQCMD                (1 << (X86_FEATURE_ENQCMD & 31))
+@@ -73,7 +80,7 @@
+ #define DISABLED_MASK8        0
+ #define DISABLED_MASK9        (DISABLE_SMAP)
+ #define DISABLED_MASK10       0
+-#define DISABLED_MASK11       0
++#define DISABLED_MASK11       (DISABLE_RETPOLINE)
+ #define DISABLED_MASK12       0
+ #define DISABLED_MASK13       0
+ #define DISABLED_MASK14       0
diff --git a/queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch b/queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch
new file mode 100644 (file)
index 0000000..f53fd2b
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:41 +0200
+Subject: x86/retpoline: Create a retpoline thunk array
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1a6f74429c42a3854980359a758e222005712aee upstream.
+
+Stick all the retpolines in a single symbol and have the individual
+thunks as inner labels, this should guarantee thunk order and layout.
+
+Previously there were 16 (or rather 15 without rsp) separate symbols and
+a toolchain might reasonably expect it could displace them however it
+liked, with disregard for their relative position.
+
+However, now they're part of a larger symbol. Any change to their
+relative position would disrupt this larger _array symbol and thus not
+be sound.
+
+This is the same reasoning used for data symbols. On their own there
+is no guarantee about their relative position wrt to one aonther, but
+we're still able to do arrays because an array as a whole is a single
+larger symbol.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.169659320@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    8 +++++++-
+ arch/x86/lib/retpoline.S             |   14 +++++++++-----
+ 2 files changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -12,6 +12,8 @@
+ #include <asm/msr-index.h>
+ #include <asm/unwind_hints.h>
++#define RETPOLINE_THUNK_SIZE  32
++
+ /*
+  * Fill the CPU return stack buffer.
+  *
+@@ -120,11 +122,15 @@
+ #ifdef CONFIG_RETPOLINE
++typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
++
+ #define GEN(reg) \
+-      extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
++      extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
++extern retpoline_thunk_t __x86_indirect_thunk_array[];
++
+ #ifdef CONFIG_X86_64
+ /*
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -28,16 +28,14 @@
+ .macro THUNK reg
+-      .align 32
+-
+-SYM_FUNC_START(__x86_indirect_thunk_\reg)
++      .align RETPOLINE_THUNK_SIZE
++SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
++      UNWIND_HINT_EMPTY
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+                     __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+                     __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+-SYM_FUNC_END(__x86_indirect_thunk_\reg)
+-
+ .endm
+ /*
+@@ -55,10 +53,16 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ #define __EXPORT_THUNK(sym)   _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg)     __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
++      .align RETPOLINE_THUNK_SIZE
++SYM_CODE_START(__x86_indirect_thunk_array)
++
+ #define GEN(reg) THUNK reg
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
++      .align RETPOLINE_THUNK_SIZE
++SYM_CODE_END(__x86_indirect_thunk_array)
++
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
diff --git a/queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch b/queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch
new file mode 100644 (file)
index 0000000..b7a5f20
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:40 +0200
+Subject: x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 6fda8a38865607db739be3e567a2387376222dbd upstream.
+
+Because it makes no sense to split the retpoline gunk over multiple
+headers.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.106290934@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h |    8 --------
+ arch/x86/include/asm/nospec-branch.h  |    7 +++++++
+ arch/x86/net/bpf_jit_comp.c           |    1 -
+ 3 files changed, 7 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -17,11 +17,3 @@
+ extern void cmpxchg8b_emu(void);
+ #endif
+-#ifdef CONFIG_RETPOLINE
+-
+-#define GEN(reg) \
+-      extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-#undef GEN
+-
+-#endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -5,6 +5,7 @@
+ #include <linux/static_key.h>
+ #include <linux/objtool.h>
++#include <linux/linkage.h>
+ #include <asm/alternative.h>
+ #include <asm/cpufeatures.h>
+@@ -118,6 +119,12 @@
+       ".popsection\n\t"
+ #ifdef CONFIG_RETPOLINE
++
++#define GEN(reg) \
++      extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
++#include <asm/GEN-for-each-reg.h>
++#undef GEN
++
+ #ifdef CONFIG_X86_64
+ /*
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -15,7 +15,6 @@
+ #include <asm/set_memory.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/text-patching.h>
+-#include <asm/asm-prototypes.h>
+ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+ {
diff --git a/queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch b/queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch
new file mode 100644 (file)
index 0000000..c7f1904
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:37 +0200
+Subject: x86/retpoline: Remove unused replacement symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4fe79e710d9574a14993f8b4e16b7252da72d5e8 upstream.
+
+Now that objtool no longer creates alternatives, these replacement
+symbols are no longer needed, remove them.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.915051744@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h |   10 --------
+ arch/x86/lib/retpoline.S              |   42 ----------------------------------
+ 2 files changed, 52 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -24,14 +24,4 @@ extern void cmpxchg8b_emu(void);
+       extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+ #include <asm/GEN-for-each-reg.h>
+-#undef GEN
+-#define GEN(reg) \
+-      extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) \
+-      extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -41,36 +41,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ .endm
+ /*
+- * This generates .altinstr_replacement symbols for use by objtool. They,
+- * however, must not actually live in .altinstr_replacement since that will be
+- * discarded after init, but module alternatives will also reference these
+- * symbols.
+- *
+- * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+- */
+-.macro ALT_THUNK reg
+-
+-      .align 1
+-
+-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+-      ANNOTATE_RETPOLINE_SAFE
+-1:    call    *%\reg
+-2:    .skip   5-(2b-1b), 0x90
+-SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+-
+-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
+-
+-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+-      ANNOTATE_RETPOLINE_SAFE
+-1:    jmp     *%\reg
+-2:    .skip   5-(2b-1b), 0x90
+-SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+-
+-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
+-
+-.endm
+-
+-/*
+  * Despite being an assembler file we can't just use .irp here
+  * because __KSYM_DEPS__ only uses the C preprocessor and would
+  * only see one instance of "__x86_indirect_thunk_\reg" rather
+@@ -92,15 +62,3 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_
+ #undef GEN
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) ALT_THUNK reg
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+-#include <asm/GEN-for-each-reg.h>
diff --git a/queue-5.10/x86-retpoline-simplify-retpolines.patch b/queue-5.10/x86-retpoline-simplify-retpolines.patch
new file mode 100644 (file)
index 0000000..7ae493d
--- /dev/null
@@ -0,0 +1,217 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:02 +0100
+Subject: x86/retpoline: Simplify retpolines
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 119251855f9adf9421cb5eb409933092141ab2c7 upstream.
+
+Due to:
+
+  c9c324dc22aa ("objtool: Support stack layout changes in alternatives")
+
+it is now possible to simplify the retpolines.
+
+Currently our retpolines consist of 2 symbols:
+
+ - __x86_indirect_thunk_\reg: the compiler target
+ - __x86_retpoline_\reg:  the actual retpoline.
+
+Both are consecutive in code and aligned such that for any one register
+they both live in the same cacheline:
+
+  0000000000000000 <__x86_indirect_thunk_rax>:
+   0:   ff e0                   jmpq   *%rax
+   2:   90                      nop
+   3:   90                      nop
+   4:   90                      nop
+
+  0000000000000005 <__x86_retpoline_rax>:
+   5:   e8 07 00 00 00          callq  11 <__x86_retpoline_rax+0xc>
+   a:   f3 90                   pause
+   c:   0f ae e8                lfence
+   f:   eb f9                   jmp    a <__x86_retpoline_rax+0x5>
+  11:   48 89 04 24             mov    %rax,(%rsp)
+  15:   c3                      retq
+  16:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)
+
+The thunk is an alternative_2, where one option is a JMP to the
+retpoline. This was done so that objtool didn't need to deal with
+alternatives with stack ops. But that problem has been solved, so now
+it is possible to fold the entire retpoline into the alternative to
+simplify and consolidate unused bytes:
+
+  0000000000000000 <__x86_indirect_thunk_rax>:
+   0:   ff e0                   jmpq   *%rax
+   2:   90                      nop
+   3:   90                      nop
+   4:   90                      nop
+   5:   90                      nop
+   6:   90                      nop
+   7:   90                      nop
+   8:   90                      nop
+   9:   90                      nop
+   a:   90                      nop
+   b:   90                      nop
+   c:   90                      nop
+   d:   90                      nop
+   e:   90                      nop
+   f:   90                      nop
+  10:   90                      nop
+  11:   66 66 2e 0f 1f 84 00 00 00 00 00        data16 nopw %cs:0x0(%rax,%rax,1)
+  1c:   0f 1f 40 00             nopl   0x0(%rax)
+
+Notice that since the longest alternative sequence is now:
+
+   0:   e8 07 00 00 00          callq  c <.altinstr_replacement+0xc>
+   5:   f3 90                   pause
+   7:   0f ae e8                lfence
+   a:   eb f9                   jmp    5 <.altinstr_replacement+0x5>
+   c:   48 89 04 24             mov    %rax,(%rsp)
+  10:   c3                      retq
+
+17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if
+we can shrink the retpoline by 1 byte we can pack it more densely).
+
+ [ bp: Massage commit message. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/20210326151259.506071949@infradead.org
+[bwh: Backported to 5.10:
+ - Use X86_FEATRURE_RETPOLINE_LFENCE flag instead of
+   X86_FEATURE_RETPOLINE_AMD, since the later renaming of this flag
+   has already been applied
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h |    7 -------
+ arch/x86/include/asm/nospec-branch.h  |    6 +++---
+ arch/x86/lib/retpoline.S              |   34 +++++++++++++++++-----------------
+ tools/objtool/check.c                 |    3 +--
+ 4 files changed, 21 insertions(+), 29 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
+ #define DECL_INDIRECT_THUNK(reg) \
+       extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+-#define DECL_RETPOLINE(reg) \
+-      extern asmlinkage void __x86_retpoline_ ## reg (void);
+-
+ #undef GEN
+ #define GEN(reg) DECL_INDIRECT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+-#undef GEN
+-#define GEN(reg) DECL_RETPOLINE(reg)
+-#include <asm/GEN-for-each-reg.h>
+-
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -80,7 +80,7 @@
+ .macro JMP_NOSPEC reg:req
+ #ifdef CONFIG_RETPOLINE
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+-                    __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
++                    __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
+                     __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ #else
+       jmp     *%\reg
+@@ -90,7 +90,7 @@
+ .macro CALL_NOSPEC reg:req
+ #ifdef CONFIG_RETPOLINE
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+-                    __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
++                    __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
+                     __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ #else
+       call    *%\reg
+@@ -128,7 +128,7 @@
+       ALTERNATIVE_2(                                          \
+       ANNOTATE_RETPOLINE_SAFE                                 \
+       "call *%[thunk_target]\n",                              \
+-      "call __x86_retpoline_%V[thunk_target]\n",              \
++      "call __x86_indirect_thunk_%V[thunk_target]\n",         \
+       X86_FEATURE_RETPOLINE,                                  \
+       "lfence;\n"                                             \
+       ANNOTATE_RETPOLINE_SAFE                                 \
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -10,27 +10,31 @@
+ #include <asm/unwind_hints.h>
+ #include <asm/frame.h>
+-.macro THUNK reg
+-      .section .text.__x86.indirect_thunk
+-
+-      .align 32
+-SYM_FUNC_START(__x86_indirect_thunk_\reg)
+-      JMP_NOSPEC \reg
+-SYM_FUNC_END(__x86_indirect_thunk_\reg)
+-
+-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
++.macro RETPOLINE reg
+       ANNOTATE_INTRA_FUNCTION_CALL
+-      call    .Ldo_rop_\@
++      call    .Ldo_rop_\@
+ .Lspec_trap_\@:
+       UNWIND_HINT_EMPTY
+       pause
+       lfence
+-      jmp     .Lspec_trap_\@
++      jmp .Lspec_trap_\@
+ .Ldo_rop_\@:
+-      mov     %\reg, (%_ASM_SP)
++      mov     %\reg, (%_ASM_SP)
+       UNWIND_HINT_FUNC
+       ret
+-SYM_FUNC_END(__x86_retpoline_\reg)
++.endm
++
++.macro THUNK reg
++      .section .text.__x86.indirect_thunk
++
++      .align 32
++SYM_FUNC_START(__x86_indirect_thunk_\reg)
++
++      ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
++                    __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
++                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
++
++SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ .endm
+@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg)
+ #define __EXPORT_THUNK(sym)   _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg)     __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+-#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
+ #undef GEN
+ #define GEN(reg) THUNK reg
+@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+-#undef GEN
+-#define GEN(reg) EXPORT_RETPOLINE(reg)
+-#include <asm/GEN-for-each-reg.h>
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -800,8 +800,7 @@ static int add_jump_destinations(struct
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+-              } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+-                         !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
++              } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
+                       /*
+                        * Retpoline jumps are really dynamic jumps in
+                        * disguise, so convert them accordingly.
diff --git a/queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch b/queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch
new file mode 100644 (file)
index 0000000..3dd4e4a
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:35 +0200
+Subject: x86/retpoline: Swizzle retpoline thunk
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 00e1533325fd1fb5459229fe37f235462649f668 upstream.
+
+Put the actual retpoline thunk as the original code so that it can
+become more complicated. Specifically, it allows RET to be a JMP,
+which can't be .altinstr_replacement since that doesn't do relocations
+(except for the very first instruction).
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -32,9 +32,9 @@
+ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
+       UNWIND_HINT_EMPTY
+-      ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+-                    __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+-                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
++      ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
++                    __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
++                    __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
+ .endm
diff --git a/queue-5.10/x86-retpoline-use-mfunction-return.patch b/queue-5.10/x86-retpoline-use-mfunction-return.patch
new file mode 100644 (file)
index 0000000..3c14bbf
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:36 +0200
+Subject: x86/retpoline: Use -mfunction-return
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream.
+
+Utilize -mfunction-return=thunk-extern when available to have the
+compiler replace RET instructions with direct JMPs to the symbol
+__x86_return_thunk. This does not affect assembler (.S) sources, only C
+sources.
+
+-mfunction-return=thunk-extern has been available since gcc 7.3 and
+clang 15.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: RETPOLINE_CFLAGS is at Makefile]
+[cascardo: remove ANNOTATE_NOENDBR from __x86_return_thunk]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile                             |    2 ++
+ arch/x86/include/asm/nospec-branch.h |    2 ++
+ arch/x86/lib/retpoline.S             |   12 ++++++++++++
+ 3 files changed, 16 insertions(+)
+
+--- a/Makefile
++++ b/Makefile
+@@ -672,11 +672,13 @@ endif
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS      := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS      := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
++RETPOLINE_CFLAGS      += $(call cc-option,-mfunction-return=thunk-extern)
+ endif
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -120,6 +120,8 @@
+       _ASM_PTR " 999b\n\t"                                    \
+       ".popsection\n\t"
++extern void __x86_return_thunk(void);
++
+ #ifdef CONFIG_RETPOLINE
+ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -66,3 +66,15 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
++
++/*
++ * This function name is magical and is used by -mfunction-return=thunk-extern
++ * for the compiler to generate JMPs to it.
++ */
++SYM_CODE_START(__x86_return_thunk)
++      UNWIND_HINT_EMPTY
++      ret
++      int3
++SYM_CODE_END(__x86_return_thunk)
++
++__EXPORT_THUNK(__x86_return_thunk)
diff --git a/queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch b/queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch
new file mode 100644 (file)
index 0000000..c3c5c34
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 14 Jun 2022 23:15:44 +0200
+Subject: x86/sev: Avoid using __x86_return_thunk
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream.
+
+Specifically, it's because __enc_copy() encrypts the kernel after
+being relocated outside the kernel in sme_encrypt_execute(), and the
+RET macro's jmp offset isn't amended prior to execution.
+
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/mem_encrypt_boot.S |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute)
+       movq    %rbp, %rsp              /* Restore original stack pointer */
+       pop     %rbp
+-      RET
++      /* Offset to __x86_return_thunk would be wrong here */
++      ret
++      int3
+ SYM_FUNC_END(sme_encrypt_execute)
+ SYM_FUNC_START(__enc_copy)
+@@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy)
+       pop     %r12
+       pop     %r15
+-      RET
++      /* Offset to __x86_return_thunk would be wrong here */
++      ret
++      int3
+ .L__enc_copy_end:
+ SYM_FUNC_END(__enc_copy)
diff --git a/queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch b/queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
new file mode 100644 (file)
index 0000000..850e41a
--- /dev/null
@@ -0,0 +1,209 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 14 Jun 2022 23:15:55 +0200
+Subject: x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream.
+
+Extend spectre_v2= boot option with Kernel IBRS.
+
+  [jpoimboe: no STIBP with IBRS]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    1 
+ arch/x86/include/asm/nospec-branch.h            |    1 
+ arch/x86/kernel/cpu/bugs.c                      |   66 ++++++++++++++++++------
+ 3 files changed, 54 insertions(+), 14 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5026,6 +5026,7 @@
+                       eibrs             - enhanced IBRS
+                       eibrs,retpoline   - enhanced IBRS + Retpolines
+                       eibrs,lfence      - enhanced IBRS + LFENCE
++                      ibrs              - use IBRS to protect kernel
+                       Not specifying this option is equivalent to
+                       spectre_v2=auto.
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -212,6 +212,7 @@ enum spectre_v2_mitigation {
+       SPECTRE_V2_EIBRS,
+       SPECTRE_V2_EIBRS_RETPOLINE,
+       SPECTRE_V2_EIBRS_LFENCE,
++      SPECTRE_V2_IBRS,
+ };
+ /* The indirect branch speculation control variants */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -928,6 +928,7 @@ enum spectre_v2_mitigation_cmd {
+       SPECTRE_V2_CMD_EIBRS,
+       SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+       SPECTRE_V2_CMD_EIBRS_LFENCE,
++      SPECTRE_V2_CMD_IBRS,
+ };
+ enum spectre_v2_user_cmd {
+@@ -1000,11 +1001,12 @@ spectre_v2_parse_user_cmdline(enum spect
+       return SPECTRE_V2_USER_CMD_AUTO;
+ }
+-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+ {
+-      return (mode == SPECTRE_V2_EIBRS ||
+-              mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+-              mode == SPECTRE_V2_EIBRS_LFENCE);
++      return mode == SPECTRE_V2_IBRS ||
++             mode == SPECTRE_V2_EIBRS ||
++             mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++             mode == SPECTRE_V2_EIBRS_LFENCE;
+ }
+ static void __init
+@@ -1069,12 +1071,12 @@ spectre_v2_user_select_mitigation(enum s
+       }
+       /*
+-       * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
+-       * required.
++       * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
++       * STIBP is not required.
+        */
+       if (!boot_cpu_has(X86_FEATURE_STIBP) ||
+           !smt_possible ||
+-          spectre_v2_in_eibrs_mode(spectre_v2_enabled))
++          spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+               return;
+       /*
+@@ -1106,6 +1108,7 @@ static const char * const spectre_v2_str
+       [SPECTRE_V2_EIBRS]                      = "Mitigation: Enhanced IBRS",
+       [SPECTRE_V2_EIBRS_LFENCE]               = "Mitigation: Enhanced IBRS + LFENCE",
+       [SPECTRE_V2_EIBRS_RETPOLINE]            = "Mitigation: Enhanced IBRS + Retpolines",
++      [SPECTRE_V2_IBRS]                       = "Mitigation: IBRS",
+ };
+ static const struct {
+@@ -1123,6 +1126,7 @@ static const struct {
+       { "eibrs,lfence",       SPECTRE_V2_CMD_EIBRS_LFENCE,      false },
+       { "eibrs,retpoline",    SPECTRE_V2_CMD_EIBRS_RETPOLINE,   false },
+       { "auto",               SPECTRE_V2_CMD_AUTO,              false },
++      { "ibrs",               SPECTRE_V2_CMD_IBRS,              false },
+ };
+ static void __init spec_v2_print_cond(const char *reason, bool secure)
+@@ -1185,6 +1189,24 @@ static enum spectre_v2_mitigation_cmd __
+               return SPECTRE_V2_CMD_AUTO;
+       }
++      if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
++              pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
++      if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
++              pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
++      if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
++              pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
++                     mitigation_options[i].option);
++              return SPECTRE_V2_CMD_AUTO;
++      }
++
+       spec_v2_print_cond(mitigation_options[i].option,
+                          mitigation_options[i].secure);
+       return cmd;
+@@ -1224,6 +1246,14 @@ static void __init spectre_v2_select_mit
+                       break;
+               }
++              if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++                  retbleed_cmd != RETBLEED_CMD_OFF &&
++                  boot_cpu_has(X86_FEATURE_IBRS) &&
++                  boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++                      mode = SPECTRE_V2_IBRS;
++                      break;
++              }
++
+               mode = spectre_v2_select_retpoline();
+               break;
+@@ -1240,6 +1270,10 @@ static void __init spectre_v2_select_mit
+               mode = spectre_v2_select_retpoline();
+               break;
++      case SPECTRE_V2_CMD_IBRS:
++              mode = SPECTRE_V2_IBRS;
++              break;
++
+       case SPECTRE_V2_CMD_EIBRS:
+               mode = SPECTRE_V2_EIBRS;
+               break;
+@@ -1256,7 +1290,7 @@ static void __init spectre_v2_select_mit
+       if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+               pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+-      if (spectre_v2_in_eibrs_mode(mode)) {
++      if (spectre_v2_in_ibrs_mode(mode)) {
+               /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+               write_spec_ctrl_current(x86_spec_ctrl_base, true);
+@@ -1267,6 +1301,10 @@ static void __init spectre_v2_select_mit
+       case SPECTRE_V2_EIBRS:
+               break;
++      case SPECTRE_V2_IBRS:
++              setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
++              break;
++
+       case SPECTRE_V2_LFENCE:
+       case SPECTRE_V2_EIBRS_LFENCE:
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+@@ -1293,17 +1331,17 @@ static void __init spectre_v2_select_mit
+       pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+       /*
+-       * Retpoline means the kernel is safe because it has no indirect
+-       * branches. Enhanced IBRS protects firmware too, so, enable restricted
+-       * speculation around firmware calls only when Enhanced IBRS isn't
+-       * supported.
++       * Retpoline protects the kernel, but doesn't protect firmware.  IBRS
++       * and Enhanced IBRS protect firmware too, so enable IBRS around
++       * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
++       * enabled.
+        *
+        * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
+        * the user might select retpoline on the kernel command line and if
+        * the CPU supports Enhanced IBRS, kernel might un-intentionally not
+        * enable IBRS around firmware calls.
+        */
+-      if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
++      if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+               setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+               pr_info("Enabling Restricted Speculation for firmware calls\n");
+       }
+@@ -2012,7 +2050,7 @@ static ssize_t mmio_stale_data_show_stat
+ static char *stibp_state(void)
+ {
+-      if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
++      if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+               return "";
+       switch (spectre_v2_user_stibp) {
diff --git a/queue-5.10/x86-speculation-disable-rrsba-behavior.patch b/queue-5.10/x86-speculation-disable-rrsba-behavior.patch
new file mode 100644 (file)
index 0000000..73c0f3b
--- /dev/null
@@ -0,0 +1,154 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Fri, 8 Jul 2022 13:36:09 -0700
+Subject: x86/speculation: Disable RRSBA behavior
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream.
+
+Some Intel processors may use alternate predictors for RETs on
+RSB-underflow. This condition may be vulnerable to Branch History
+Injection (BHI) and intramode-BTI.
+
+Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines,
+eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against
+such attacks. However, on RSB-underflow, RET target prediction may
+fallback to alternate predictors. As a result, RET's predicted target
+may get influenced by branch history.
+
+A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback
+behavior when in kernel mode. When set, RETs will not take predictions
+from alternate predictors, hence mitigating RETs as well. Support for
+this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2).
+
+For spectre v2 mitigation, when a user selects a mitigation that
+protects indirect CALLs and JMPs against BHI and intramode-BTI, set
+RRSBA_DIS_S also to protect RETs for RSB-underflow case.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[bwh: Backported to 5.15: adjust context in scattered.c]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h     |    2 +-
+ arch/x86/include/asm/msr-index.h       |    9 +++++++++
+ arch/x86/kernel/cpu/bugs.c             |   26 ++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/scattered.c        |    1 +
+ tools/arch/x86/include/asm/msr-index.h |    9 +++++++++
+ 5 files changed, 46 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -293,7 +293,7 @@
+ /* FREE!                              (11*32+ 8) */
+ /* FREE!                              (11*32+ 9) */
+ #define X86_FEATURE_ENTRY_IBPB                (11*32+10) /* "" Issue an IBPB on kernel entry */
+-/* FREE!                              (11*32+11) */
++#define X86_FEATURE_RRSBA_CTRL                (11*32+11) /* "" RET prediction control */
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ #define X86_FEATURE_RETHUNK           (11*32+14) /* "" Use REturn THUNK */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP                       BIT(SPEC_CTRL_STIBP_SHIFT)      /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT          2          /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD                        BIT(SPEC_CTRL_SSBD_SHIFT)       /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT   6          /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S         BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+ #define MSR_IA32_PRED_CMD             0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB                 BIT(0)     /* Indirect Branch Prediction Barrier */
+@@ -139,6 +141,13 @@
+                                                * bit available to control VERW
+                                                * behavior.
+                                                */
++#define ARCH_CAP_RRSBA                        BIT(19) /*
++                                               * Indicates RET may use predictors
++                                               * other than the RSB. With eIBRS
++                                               * enabled predictions in kernel mode
++                                               * are restricted to targets in
++                                               * kernel.
++                                               */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1274,6 +1274,22 @@ static enum spectre_v2_mitigation __init
+       return SPECTRE_V2_RETPOLINE;
+ }
++/* Disable in-kernel use of non-RSB RET predictors */
++static void __init spec_ctrl_disable_kernel_rrsba(void)
++{
++      u64 ia32_cap;
++
++      if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
++              return;
++
++      ia32_cap = x86_read_arch_cap_msr();
++
++      if (ia32_cap & ARCH_CAP_RRSBA) {
++              x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
++              write_spec_ctrl_current(x86_spec_ctrl_base, true);
++      }
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+       enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -1368,6 +1384,16 @@ static void __init spectre_v2_select_mit
+               break;
+       }
++      /*
++       * Disable alternate RSB predictions in kernel when indirect CALLs and
++       * JMPs gets protection against BHI and Intramode-BTI, but RET
++       * prediction from a non-RSB predictor is still a risk.
++       */
++      if (mode == SPECTRE_V2_EIBRS_LFENCE ||
++          mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++          mode == SPECTRE_V2_RETPOLINE)
++              spec_ctrl_disable_kernel_rrsba();
++
+       spectre_v2_enabled = mode;
+       pr_info("%s\n", spectre_v2_strings[mode]);
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -26,6 +26,7 @@ struct cpuid_bit {
+ static const struct cpuid_bit cpuid_bits[] = {
+       { X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
+       { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
++      { X86_FEATURE_RRSBA_CTRL,       CPUID_EDX,  2, 0x00000007, 2 },
+       { X86_FEATURE_CQM_LLC,          CPUID_EDX,  1, 0x0000000f, 0 },
+       { X86_FEATURE_CQM_OCCUP_LLC,    CPUID_EDX,  0, 0x0000000f, 1 },
+       { X86_FEATURE_CQM_MBM_TOTAL,    CPUID_EDX,  1, 0x0000000f, 1 },
+--- a/tools/arch/x86/include/asm/msr-index.h
++++ b/tools/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP                       BIT(SPEC_CTRL_STIBP_SHIFT)      /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT          2          /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD                        BIT(SPEC_CTRL_SSBD_SHIFT)       /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT   6          /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S         BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+ #define MSR_IA32_PRED_CMD             0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB                 BIT(0)     /* Indirect Branch Prediction Barrier */
+@@ -138,6 +140,13 @@
+                                                * bit available to control VERW
+                                                * behavior.
+                                                */
++#define ARCH_CAP_RRSBA                        BIT(19) /*
++                                               * Indicates RET may use predictors
++                                               * other than the RSB. With eIBRS
++                                               * enabled predictions in kernel mode
++                                               * are restricted to targets in
++                                               * kernel.
++                                               */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
diff --git a/queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch b/queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
new file mode 100644 (file)
index 0000000..47e6b56
--- /dev/null
@@ -0,0 +1,135 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:15 +0200
+Subject: x86/speculation: Fill RSB on vmexit for IBRS
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream.
+
+Prevent RSB underflow/poisoning attacks with RSB.  While at it, add a
+bunch of comments to attempt to document the current state of tribal
+knowledge about RSB attacks and what exactly is being mitigated.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    2 -
+ arch/x86/kernel/cpu/bugs.c         |   63 ++++++++++++++++++++++++++++++++++---
+ arch/x86/kvm/vmx/vmenter.S         |    6 +--
+ 3 files changed, 62 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -204,7 +204,7 @@
+ #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+ #define X86_FEATURE_KERNEL_IBRS               ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+-/* FREE!                              ( 7*32+13) */
++#define X86_FEATURE_RSB_VMEXIT                ( 7*32+13) /* "" Fill RSB on VM-Exit */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL     ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1357,17 +1357,70 @@ static void __init spectre_v2_select_mit
+       pr_info("%s\n", spectre_v2_strings[mode]);
+       /*
+-       * If spectre v2 protection has been enabled, unconditionally fill
+-       * RSB during a context switch; this protects against two independent
+-       * issues:
++       * If Spectre v2 protection has been enabled, fill the RSB during a
++       * context switch.  In general there are two types of RSB attacks
++       * across context switches, for which the CALLs/RETs may be unbalanced.
+        *
+-       *      - RSB underflow (and switch to BTB) on Skylake+
+-       *      - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
++       * 1) RSB underflow
++       *
++       *    Some Intel parts have "bottomless RSB".  When the RSB is empty,
++       *    speculated return targets may come from the branch predictor,
++       *    which could have a user-poisoned BTB or BHB entry.
++       *
++       *    AMD has it even worse: *all* returns are speculated from the BTB,
++       *    regardless of the state of the RSB.
++       *
++       *    When IBRS or eIBRS is enabled, the "user -> kernel" attack
++       *    scenario is mitigated by the IBRS branch prediction isolation
++       *    properties, so the RSB buffer filling wouldn't be necessary to
++       *    protect against this type of attack.
++       *
++       *    The "user -> user" attack scenario is mitigated by RSB filling.
++       *
++       * 2) Poisoned RSB entry
++       *
++       *    If the 'next' in-kernel return stack is shorter than 'prev',
++       *    'next' could be tricked into speculating with a user-poisoned RSB
++       *    entry.
++       *
++       *    The "user -> kernel" attack scenario is mitigated by SMEP and
++       *    eIBRS.
++       *
++       *    The "user -> user" scenario, also known as SpectreBHB, requires
++       *    RSB clearing.
++       *
++       * So to mitigate all cases, unconditionally fill RSB on context
++       * switches.
++       *
++       * FIXME: Is this pointless for retbleed-affected AMD?
+        */
+       setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+       pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+       /*
++       * Similar to context switches, there are two types of RSB attacks
++       * after vmexit:
++       *
++       * 1) RSB underflow
++       *
++       * 2) Poisoned RSB entry
++       *
++       * When retpoline is enabled, both are mitigated by filling/clearing
++       * the RSB.
++       *
++       * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
++       * prediction isolation protections, RSB still needs to be cleared
++       * because of #2.  Note that SMEP provides no protection here, unlike
++       * user-space-poisoned RSB entries.
++       *
++       * eIBRS, on the other hand, has RSB-poisoning protections, so it
++       * doesn't need RSB clearing after vmexit.
++       */
++      if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
++          boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
++              setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
++
++      /*
+        * Retpoline protects the kernel, but doesn't protect firmware.  IBRS
+        * and Enhanced IBRS protect firmware too, so enable IBRS around
+        * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -193,15 +193,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+        * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+        * the first unbalanced RET after vmexit!
+        *
+-       * For retpoline, RSB filling is needed to prevent poisoned RSB entries
+-       * and (in some cases) RSB underflow.
++       * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
++       * entries and (in some cases) RSB underflow.
+        *
+        * eIBRS has its own protection against poisoned RSB, so it doesn't
+        * need the RSB filling sequence.  But it does need to be enabled
+        * before the first unbalanced RET.
+          */
+-      FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++      FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
+       pop %_ASM_ARG2  /* @flags */
+       pop %_ASM_ARG1  /* @vmx */
diff --git a/queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch b/queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
new file mode 100644 (file)
index 0000000..2f4a3d3
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:06 +0200
+Subject: x86/speculation: Fix firmware entry SPEC_CTRL handling
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream.
+
+The firmware entry code may accidentally clear STIBP or SSBD. Fix that.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -286,18 +286,16 @@ extern u64 spec_ctrl_current(void);
+  */
+ #define firmware_restrict_branch_speculation_start()                  \
+ do {                                                                  \
+-      u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS;                  \
+-                                                                      \
+       preempt_disable();                                              \
+-      alternative_msr_write(MSR_IA32_SPEC_CTRL, val,                  \
++      alternative_msr_write(MSR_IA32_SPEC_CTRL,                       \
++                            spec_ctrl_current() | SPEC_CTRL_IBRS,     \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+ } while (0)
+ #define firmware_restrict_branch_speculation_end()                    \
+ do {                                                                  \
+-      u64 val = x86_spec_ctrl_base;                                   \
+-                                                                      \
+-      alternative_msr_write(MSR_IA32_SPEC_CTRL, val,                  \
++      alternative_msr_write(MSR_IA32_SPEC_CTRL,                       \
++                            spec_ctrl_current(),                      \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+       preempt_enable();                                               \
+ } while (0)
diff --git a/queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch b/queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
new file mode 100644 (file)
index 0000000..896a4ff
--- /dev/null
@@ -0,0 +1,78 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:05 +0200
+Subject: x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream.
+
+If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants
+to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be
+silently disabled.
+
+There's nothing retpoline-specific about RSB buffer filling.  Remove the
+CONFIG_RETPOLINE guards around it.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S            |    2 --
+ arch/x86/entry/entry_64.S            |    2 --
+ arch/x86/include/asm/nospec-branch.h |    2 --
+ 3 files changed, 6 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -782,7 +782,6 @@ SYM_CODE_START(__switch_to_asm)
+       movl    %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+ #endif
+-#ifdef CONFIG_RETPOLINE
+       /*
+        * When switching from a shallower to a deeper call stack
+        * the RSB may either underflow or use entries populated
+@@ -791,7 +790,6 @@ SYM_CODE_START(__switch_to_asm)
+        * speculative execution to prevent attack.
+        */
+       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+       /* Restore flags or the incoming task to restore AC state. */
+       popfl
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -249,7 +249,6 @@ SYM_FUNC_START(__switch_to_asm)
+       movq    %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+ #endif
+-#ifdef CONFIG_RETPOLINE
+       /*
+        * When switching from a shallower to a deeper call stack
+        * the RSB may either underflow or use entries populated
+@@ -258,7 +257,6 @@ SYM_FUNC_START(__switch_to_asm)
+        * speculative execution to prevent attack.
+        */
+       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+       /* restore callee-saved registers */
+       popq    %r15
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -122,11 +122,9 @@
+   * monstrosity above, manually.
+   */
+ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+-#ifdef CONFIG_RETPOLINE
+       ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+       __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+ .Lskip_rsb_\@:
+-#endif
+ .endm
+ /*
diff --git a/queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch b/queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
new file mode 100644 (file)
index 0000000..b38f24d
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:07 +0200
+Subject: x86/speculation: Fix SPEC_CTRL write on SMT state change
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream.
+
+If the SMT state changes, SSBD might get accidentally disabled.  Fix
+that.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1414,7 +1414,8 @@ static void __init spectre_v2_select_mit
+ static void update_stibp_msr(void * __unused)
+ {
+-      write_spec_ctrl_current(x86_spec_ctrl_base, true);
++      u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
++      write_spec_ctrl_current(val, true);
+ }
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
diff --git a/queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch b/queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch
new file mode 100644 (file)
index 0000000..b4ac5bf
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Fri, 17 Jun 2022 12:12:48 -0700
+Subject: x86/speculation: Remove x86_spec_ctrl_mask
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream.
+
+This mask has been made redundant by kvm_spec_ctrl_test_value().  And it
+doesn't even work when MSR interception is disabled, as the guest can
+just write to SPEC_CTRL directly.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   31 +------------------------------
+ 1 file changed, 1 insertion(+), 30 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -85,12 +85,6 @@ u64 spec_ctrl_current(void)
+ EXPORT_SYMBOL_GPL(spec_ctrl_current);
+ /*
+- * The vendor and possibly platform specific bits which can be modified in
+- * x86_spec_ctrl_base.
+- */
+-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+-
+-/*
+  * AMD specific MSR info for Speculative Store Bypass control.
+  * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+  */
+@@ -138,10 +132,6 @@ void __init check_bugs(void)
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+               rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+-      /* Allow STIBP in MSR_SPEC_CTRL if supported */
+-      if (boot_cpu_has(X86_FEATURE_STIBP))
+-              x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+-
+       /* Select the proper CPU mitigations before patching alternatives: */
+       spectre_v1_select_mitigation();
+       spectre_v2_select_mitigation();
+@@ -199,19 +189,10 @@ void __init check_bugs(void)
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+-      u64 msrval, guestval, hostval = spec_ctrl_current();
++      u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+       struct thread_info *ti = current_thread_info();
+-      /* Is MSR_SPEC_CTRL implemented ? */
+       if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+-              /*
+-               * Restrict guest_spec_ctrl to supported values. Clear the
+-               * modifiable bits in the host base value and or the
+-               * modifiable bits from the guest value.
+-               */
+-              guestval = hostval & ~x86_spec_ctrl_mask;
+-              guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+-
+               if (hostval != guestval) {
+                       msrval = setguest ? guestval : hostval;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -1622,16 +1603,6 @@ static enum ssb_mitigation __init __ssb_
+       }
+       /*
+-       * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+-       * bit in the mask to allow guests to use the mitigation even in the
+-       * case where the host does not enable it.
+-       */
+-      if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+-          static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+-              x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+-      }
+-
+-      /*
+        * We have three CPU feature flags that are in play here:
+        *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+        *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
diff --git a/queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch b/queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
new file mode 100644 (file)
index 0000000..6304f78
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:08 +0200
+Subject: x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream.
+
+There's no need to recalculate the host value for every entry/exit.
+Just use the cached value in spec_ctrl_current().
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -199,7 +199,7 @@ void __init check_bugs(void)
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+-      u64 msrval, guestval, hostval = x86_spec_ctrl_base;
++      u64 msrval, guestval, hostval = spec_ctrl_current();
+       struct thread_info *ti = current_thread_info();
+       /* Is MSR_SPEC_CTRL implemented ? */
+@@ -212,15 +212,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl,
+               guestval = hostval & ~x86_spec_ctrl_mask;
+               guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+-              /* SSBD controlled in MSR_SPEC_CTRL */
+-              if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+-                  static_cpu_has(X86_FEATURE_AMD_SSBD))
+-                      hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+-
+-              /* Conditional STIBP enabled? */
+-              if (static_branch_unlikely(&switch_to_cond_stibp))
+-                      hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+-
+               if (hostval != guestval) {
+                       msrval = setguest ? guestval : hostval;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -1353,7 +1344,6 @@ static void __init spectre_v2_select_mit
+               pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+       if (spectre_v2_in_ibrs_mode(mode)) {
+-              /* Force it so VMEXIT will restore correctly */
+               x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+               write_spec_ctrl_current(x86_spec_ctrl_base, true);
+       }
diff --git a/queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch b/queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch
new file mode 100644 (file)
index 0000000..36183cb
--- /dev/null
@@ -0,0 +1,73 @@
+From c27c753ea6fd1237f4f96abf8b623d7bab505513 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2022 14:01:06 +0200
+Subject: x86/static_call: Serialize __static_call_fixup() properly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream.
+
+__static_call_fixup() invokes __static_call_transform() without holding
+text_mutex, which causes lockdep to complain in text_poke_bp().
+
+Adding the proper locking cures that, but as this is either used during
+early boot or during module finalizing, it's not required to use
+text_poke_bp(). Add an argument to __static_call_transform() which tells
+it to use text_poke_early() for it.
+
+Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/static_call.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -20,7 +20,8 @@ static const u8 tramp_ud[] = { 0x0f, 0xb
+ static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
+-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
++static void __ref __static_call_transform(void *insn, enum insn_type type,
++                                        void *func, bool modinit)
+ {
+       int size = CALL_INSN_SIZE;
+       const void *code;
+@@ -49,7 +50,7 @@ static void __ref __static_call_transfor
+       if (memcmp(insn, code, size) == 0)
+               return;
+-      if (unlikely(system_state == SYSTEM_BOOTING))
++      if (system_state == SYSTEM_BOOTING || modinit)
+               return text_poke_early(insn, code, size);
+       text_poke_bp(insn, code, size, NULL);
+@@ -96,12 +97,12 @@ void arch_static_call_transform(void *si
+       if (tramp) {
+               __static_call_validate(tramp, true);
+-              __static_call_transform(tramp, __sc_insn(!func, true), func);
++              __static_call_transform(tramp, __sc_insn(!func, true), func, false);
+       }
+       if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
+               __static_call_validate(site, tail);
+-              __static_call_transform(site, __sc_insn(!func, tail), func);
++              __static_call_transform(site, __sc_insn(!func, tail), func, false);
+       }
+       mutex_unlock(&text_mutex);
+@@ -127,8 +128,10 @@ bool __static_call_fixup(void *tramp, u8
+               return false;
+       }
++      mutex_lock(&text_mutex);
+       if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
+-              __static_call_transform(tramp, RET, NULL);
++              __static_call_transform(tramp, RET, NULL, true);
++      mutex_unlock(&text_mutex);
+       return true;
+ }
diff --git a/queue-5.10/x86-static_call-use-alternative-ret-encoding.patch b/queue-5.10/x86-static_call-use-alternative-ret-encoding.patch
new file mode 100644 (file)
index 0000000..9eb51d2
--- /dev/null
@@ -0,0 +1,184 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:39 +0200
+Subject: x86,static_call: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream.
+
+In addition to teaching static_call about the new way to spell 'RET',
+there is an added complication in that static_call() is allowed to
+rewrite text before it is known which particular spelling is required.
+
+In order to deal with this; have a static_call specific fixup in the
+apply_return() 'alternative' patching routine that will rewrite the
+static_call trampoline to match the definite sequence.
+
+This in turn creates the problem of uniquely identifying static call
+trampolines. Currently trampolines are 8 bytes, the first 5 being the
+jmp.d32/ret sequence and the final 3 a byte sequence that spells out
+'SCT'.
+
+This sequence is used in __static_call_validate() to ensure it is
+patching a trampoline and not a random other jmp.d32. That is,
+false-positives shouldn't be plenty, but aren't a big concern.
+
+OTOH the new __static_call_fixup() must not have false-positives, and
+'SCT' decodes to the somewhat weird but semi plausible sequence:
+
+  push %rbx
+  rex.XB push %r12
+
+Additionally, there are SLS concerns with immediate jumps. Combined it
+seems like a good moment to change the signature to a single 3 byte
+trap instruction that is unique to this usage and will not ever get
+generated by accident.
+
+As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes
+to:
+
+  ud1 %esp, %ecx
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: skip validation as introduced by 2105a92748e8 ("static_call,x86: Robustify trampoline patching")]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/static_call.h |   17 ++++++++++++++++
+ arch/x86/kernel/alternative.c      |   12 +++++++----
+ arch/x86/kernel/static_call.c      |   38 ++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 62 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -21,6 +21,16 @@
+  * relative displacement across sections.
+  */
++/*
++ * The trampoline is 8 bytes and of the general form:
++ *
++ *   jmp.d32 \func
++ *   ud1 %esp, %ecx
++ *
++ * That trailing #UD provides both a speculation stop and serves as a unique
++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
++ * and __static_call_fixup().
++ */
+ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns)                  \
+       asm(".pushsection .static_call.text, \"ax\"             \n"     \
+           ".align 4                                           \n"     \
+@@ -34,8 +44,13 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)                     \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
++#ifdef CONFIG_RETPOLINE
++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
++      __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
++#else
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                      \
+       __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
++#endif
+ #define ARCH_ADD_TRAMP_KEY(name)                                      \
+@@ -44,4 +59,6 @@
+           ".long " STATIC_CALL_KEY_STR(name) " - .            \n"     \
+           ".popsection                                        \n")
++extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
++
+ #endif /* _ASM_STATIC_CALL_H */
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -693,18 +693,22 @@ void __init_or_module noinline apply_ret
+       s32 *s;
+       for (s = start; s < end; s++) {
+-              void *addr = (void *)s + *s;
++              void *dest = NULL, *addr = (void *)s + *s;
+               struct insn insn;
+               int len, ret;
+               u8 bytes[16];
+-              u8 op1;
++              u8 op;
+               ret = insn_decode_kernel(&insn, addr);
+               if (WARN_ON_ONCE(ret < 0))
+                       continue;
+-              op1 = insn.opcode.bytes[0];
+-              if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE))
++              op = insn.opcode.bytes[0];
++              if (op == JMP32_INSN_OPCODE)
++                      dest = addr + insn.length + insn.immediate.value;
++
++              if (__static_call_fixup(addr, op, dest) ||
++                  WARN_ON_ONCE(dest != &__x86_return_thunk))
+                       continue;
+               DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -11,6 +11,13 @@ enum insn_type {
+       RET = 3,  /* tramp / site cond-tail-call */
+ };
++/*
++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
++ * that there is no false-positive trampoline identification while also being a
++ * speculation stop.
++ */
++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
++
+ static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
+ static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+@@ -32,7 +39,10 @@ static void __ref __static_call_transfor
+               break;
+       case RET:
+-              code = &retinsn;
++              if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++                      code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
++              else
++                      code = &retinsn;
+               break;
+       }
+@@ -97,3 +107,29 @@ void arch_static_call_transform(void *si
+       mutex_unlock(&text_mutex);
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
++
++#ifdef CONFIG_RETPOLINE
++/*
++ * This is called by apply_returns() to fix up static call trampolines,
++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
++ * having a return trampoline.
++ *
++ * The problem is that static_call() is available before determining
++ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
++ *
++ * This means that __static_call_transform() above can have overwritten the
++ * return trampoline and we now need to fix things up to be consistent.
++ */
++bool __static_call_fixup(void *tramp, u8 op, void *dest)
++{
++      if (memcmp(tramp+5, tramp_ud, 3)) {
++              /* Not a trampoline site, not our problem. */
++              return false;
++      }
++
++      if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
++              __static_call_transform(tramp, RET, NULL);
++
++      return true;
++}
++#endif
diff --git a/queue-5.10/x86-undo-return-thunk-damage.patch b/queue-5.10/x86-undo-return-thunk-damage.patch
new file mode 100644 (file)
index 0000000..f2be9ea
--- /dev/null
@@ -0,0 +1,195 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:37 +0200
+Subject: x86: Undo return-thunk damage
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream.
+
+Introduce X86_FEATURE_RETHUNK for those afflicted with needing this.
+
+  [ bp: Do only INT3 padding - simpler. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL]
+[cascardo: no IBT support]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h       |    1 
+ arch/x86/include/asm/cpufeatures.h       |    1 
+ arch/x86/include/asm/disabled-features.h |    3 +
+ arch/x86/kernel/alternative.c            |   60 +++++++++++++++++++++++++++++++
+ arch/x86/kernel/module.c                 |    8 +++-
+ arch/x86/kernel/vmlinux.lds.S            |    7 +++
+ 6 files changed, 78 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -76,6 +76,7 @@ extern int alternatives_patched;
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+ extern void apply_retpolines(s32 *start, s32 *end);
++extern void apply_returns(s32 *start, s32 *end);
+ struct module;
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -296,6 +296,7 @@
+ /* FREE!                              (11*32+11) */
+ #define X86_FEATURE_RETPOLINE         (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE  (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
++#define X86_FEATURE_RETHUNK           (11*32+14) /* "" Use REturn THUNK */
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16               (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -60,7 +60,8 @@
+ # define DISABLE_RETPOLINE    0
+ #else
+ # define DISABLE_RETPOLINE    ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+-                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++                               (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
++                               (1 << (X86_FEATURE_RETHUNK & 31)))
+ #endif
+ /* Force disable because it's broken beyond repair */
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -270,6 +270,7 @@ static void __init_or_module add_nops(vo
+ }
+ extern s32 __retpoline_sites[], __retpoline_sites_end[];
++extern s32 __return_sites[], __return_sites_end[];
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ extern s32 __smp_locks[], __smp_locks_end[];
+ void text_poke_early(void *addr, const void *opcode, size_t len);
+@@ -661,9 +662,67 @@ void __init_or_module noinline apply_ret
+       }
+ }
++/*
++ * Rewrite the compiler generated return thunk tail-calls.
++ *
++ * For example, convert:
++ *
++ *   JMP __x86_return_thunk
++ *
++ * into:
++ *
++ *   RET
++ */
++static int patch_return(void *addr, struct insn *insn, u8 *bytes)
++{
++      int i = 0;
++
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++              return -1;
++
++      bytes[i++] = RET_INSN_OPCODE;
++
++      for (; i < insn->length;)
++              bytes[i++] = INT3_INSN_OPCODE;
++
++      return i;
++}
++
++void __init_or_module noinline apply_returns(s32 *start, s32 *end)
++{
++      s32 *s;
++
++      for (s = start; s < end; s++) {
++              void *addr = (void *)s + *s;
++              struct insn insn;
++              int len, ret;
++              u8 bytes[16];
++              u8 op1;
++
++              ret = insn_decode_kernel(&insn, addr);
++              if (WARN_ON_ONCE(ret < 0))
++                      continue;
++
++              op1 = insn.opcode.bytes[0];
++              if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE))
++                      continue;
++
++              DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
++                      addr, addr, insn.length,
++                      addr + insn.length + insn.immediate.value);
++
++              len = patch_return(addr, &insn, bytes);
++              if (len == insn.length) {
++                      DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
++                      DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
++                      text_poke_early(addr, bytes, len);
++              }
++      }
++}
+ #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+ #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+@@ -956,6 +1015,7 @@ void __init alternative_instructions(voi
+        * those can rewrite the retpoline thunks.
+        */
+       apply_retpolines(__retpoline_sites, __retpoline_sites_end);
++      apply_returns(__return_sites, __return_sites_end);
+       apply_alternatives(__alt_instructions, __alt_instructions_end);
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -252,7 +252,7 @@ int module_finalize(const Elf_Ehdr *hdr,
+ {
+       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+               *para = NULL, *orc = NULL, *orc_ip = NULL,
+-              *retpolines = NULL;
++              *retpolines = NULL, *returns = NULL;
+       char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -270,12 +270,18 @@ int module_finalize(const Elf_Ehdr *hdr,
+                       orc_ip = s;
+               if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
+                       retpolines = s;
++              if (!strcmp(".return_sites", secstrings + s->sh_name))
++                      returns = s;
+       }
+       if (retpolines) {
+               void *rseg = (void *)retpolines->sh_addr;
+               apply_retpolines(rseg, rseg + retpolines->sh_size);
+       }
++      if (returns) {
++              void *rseg = (void *)returns->sh_addr;
++              apply_returns(rseg, rseg + returns->sh_size);
++      }
+       if (alt) {
+               /* patch .altinstructions */
+               void *aseg = (void *)alt->sh_addr;
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -284,6 +284,13 @@ SECTIONS
+               *(.retpoline_sites)
+               __retpoline_sites_end = .;
+       }
++
++      . = ALIGN(8);
++      .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
++              __return_sites = .;
++              *(.return_sites)
++              __return_sites_end = .;
++      }
+ #endif
+       /*
diff --git a/queue-5.10/x86-use-return-thunk-in-asm-code.patch b/queue-5.10/x86-use-return-thunk-in-asm-code.patch
new file mode 100644 (file)
index 0000000..b1dc6fc
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:45 +0200
+Subject: x86: Use return-thunk in asm code
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream.
+
+Use the return thunk in asm code. If the thunk isn't needed, it will
+get patched into a RET instruction during boot by apply_returns().
+
+Since alternatives can't handle relocations outside of the first
+instruction, putting a 'jmp __x86_return_thunk' in one is not valid,
+therefore carve out the memmove ERMS path into a separate label and jump
+to it.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no RANDSTRUCT_CFLAGS]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vdso/Makefile   |    1 +
+ arch/x86/include/asm/linkage.h |    8 ++++++++
+ arch/x86/lib/memmove_64.S      |    7 ++++++-
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/vdso/Makefile
++++ b/arch/x86/entry/vdso/Makefile
+@@ -91,6 +91,7 @@ endif
+ endif
+ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
+ #
+ # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,19 +18,27 @@
+ #define __ALIGN_STR   __stringify(__ALIGN)
+ #endif
++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define RET   jmp __x86_return_thunk
++#else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+ #define RET   ret; int3
+ #else
+ #define RET   ret
+ #endif
++#endif /* CONFIG_RETPOLINE */
+ #else /* __ASSEMBLY__ */
++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define ASM_RET       "jmp __x86_return_thunk\n\t"
++#else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+ #define ASM_RET       "ret; int3\n\t"
+ #else
+ #define ASM_RET       "ret\n\t"
+ #endif
++#endif /* CONFIG_RETPOLINE */
+ #endif /* __ASSEMBLY__ */
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+       /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+       ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+-      ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
++      ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
+       /*
+        * movsq instruction have many startup latency
+@@ -206,6 +206,11 @@ SYM_FUNC_START(__memmove)
+       movb %r11b, (%rdi)
+ 13:
+       RET
++
++.Lmemmove_erms:
++      movq %rdx, %rcx
++      rep movsb
++      RET
+ SYM_FUNC_END(__memmove)
+ SYM_FUNC_END_ALIAS(memmove)
+ EXPORT_SYMBOL(__memmove)
diff --git a/queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch b/queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
new file mode 100644 (file)
index 0000000..3328db0
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:43 +0200
+Subject: x86/vsyscall_emu/64: Don't use RET in vsyscall emulation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 15583e514eb16744b80be85dea0774ece153177d upstream.
+
+This is userspace code and doesn't play by the normal kernel rules.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vsyscall/vsyscall_emu_64.S |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+@@ -19,17 +19,20 @@ __vsyscall_page:
+       mov $__NR_gettimeofday, %rax
+       syscall
+-      RET
++      ret
++      int3
+       .balign 1024, 0xcc
+       mov $__NR_time, %rax
+       syscall
+-      RET
++      ret
++      int3
+       .balign 1024, 0xcc
+       mov $__NR_getcpu, %rax
+       syscall
+-      RET
++      ret
++      int3
+       .balign 4096, 0xcc
diff --git a/queue-5.10/x86-xen-rename-sys-entry-points.patch b/queue-5.10/x86-xen-rename-sys-entry-points.patch
new file mode 100644 (file)
index 0000000..b140c9a
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:00 +0200
+Subject: x86/xen: Rename SYS* entry points
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream.
+
+Native SYS{CALL,ENTER} entry points are called
+entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are
+named consistently.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/setup.c   |    6 +++---
+ arch/x86/xen/xen-asm.S |   20 ++++++++++----------
+ arch/x86/xen/xen-ops.h |    6 +++---
+ 3 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -922,7 +922,7 @@ void xen_enable_sysenter(void)
+       if (!boot_cpu_has(sysenter_feature))
+               return;
+-      ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
++      ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
+       if(ret != 0)
+               setup_clear_cpu_cap(sysenter_feature);
+ }
+@@ -931,7 +931,7 @@ void xen_enable_syscall(void)
+ {
+       int ret;
+-      ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
++      ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
+       if (ret != 0) {
+               printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
+               /* Pretty fatal; 64-bit userspace has no other
+@@ -940,7 +940,7 @@ void xen_enable_syscall(void)
+       if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
+               ret = register_callback(CALLBACKTYPE_syscall32,
+-                                      xen_syscall32_target);
++                                      xen_entry_SYSCALL_compat);
+               if (ret != 0)
+                       setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+       }
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -276,7 +276,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+  */
+ /* Normal 64-bit system call target */
+-SYM_CODE_START(xen_syscall_target)
++SYM_CODE_START(xen_entry_SYSCALL_64)
+       UNWIND_HINT_EMPTY
+       popq %rcx
+       popq %r11
+@@ -290,12 +290,12 @@ SYM_CODE_START(xen_syscall_target)
+       movq $__USER_CS, 1*8(%rsp)
+       jmp entry_SYSCALL_64_after_hwframe
+-SYM_CODE_END(xen_syscall_target)
++SYM_CODE_END(xen_entry_SYSCALL_64)
+ #ifdef CONFIG_IA32_EMULATION
+ /* 32-bit compat syscall target */
+-SYM_CODE_START(xen_syscall32_target)
++SYM_CODE_START(xen_entry_SYSCALL_compat)
+       UNWIND_HINT_EMPTY
+       popq %rcx
+       popq %r11
+@@ -309,10 +309,10 @@ SYM_CODE_START(xen_syscall32_target)
+       movq $__USER32_CS, 1*8(%rsp)
+       jmp entry_SYSCALL_compat_after_hwframe
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+ /* 32-bit compat sysenter target */
+-SYM_CODE_START(xen_sysenter_target)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
+       UNWIND_HINT_EMPTY
+       /*
+        * NB: Xen is polite and clears TF from EFLAGS for us.  This means
+@@ -330,18 +330,18 @@ SYM_CODE_START(xen_sysenter_target)
+       movq $__USER32_CS, 1*8(%rsp)
+       jmp entry_SYSENTER_compat_after_hwframe
+-SYM_CODE_END(xen_sysenter_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
+ #else /* !CONFIG_IA32_EMULATION */
+-SYM_CODE_START(xen_syscall32_target)
+-SYM_CODE_START(xen_sysenter_target)
++SYM_CODE_START(xen_entry_SYSCALL_compat)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
+       UNWIND_HINT_EMPTY
+       lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
+       mov $-ENOSYS, %rax
+       pushq $0
+       jmp hypercall_iret
+-SYM_CODE_END(xen_sysenter_target)
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+ #endif        /* CONFIG_IA32_EMULATION */
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -10,10 +10,10 @@
+ /* These are code, but not functions.  Defined in entry.S */
+ extern const char xen_failsafe_callback[];
+-void xen_sysenter_target(void);
++void xen_entry_SYSENTER_compat(void);
+ #ifdef CONFIG_X86_64
+-void xen_syscall_target(void);
+-void xen_syscall32_target(void);
++void xen_entry_SYSCALL_64(void);
++void xen_entry_SYSCALL_compat(void);
+ #endif
+ extern void *xen_initial_gdt;
diff --git a/queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch b/queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch
new file mode 100644 (file)
index 0000000..19c84cd
--- /dev/null
@@ -0,0 +1,138 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:28 -0600
+Subject: x86/xen: Support objtool validation in xen-asm.S
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit cde07a4e4434ddfb9b1616ac971edf6d66329804 upstream.
+
+The OBJECT_FILES_NON_STANDARD annotation is used to tell objtool to
+ignore a file.  File-level ignores won't work when validating vmlinux.o.
+
+Tweak the ELF metadata and unwind hints to allow objtool to follow the
+code.
+
+Cc: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/8b042a09c69e8645f3b133ef6653ba28f896807d.1611263462.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/Makefile  |    1 -
+ arch/x86/xen/xen-asm.S |   29 +++++++++++++++++++----------
+ 2 files changed, 19 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/xen/Makefile
++++ b/arch/x86/xen/Makefile
+@@ -1,5 +1,4 @@
+ # SPDX-License-Identifier: GPL-2.0
+-OBJECT_FILES_NON_STANDARD_xen-asm.o := y
+ ifdef CONFIG_FUNCTION_TRACER
+ # Do not profile debug and lowlevel utilities
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -14,6 +14,7 @@
+ #include <asm/thread_info.h>
+ #include <asm/asm.h>
+ #include <asm/frame.h>
++#include <asm/unwind_hints.h>
+ #include <xen/interface/xen.h>
+@@ -147,6 +148,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
+ .macro xen_pv_trap name
+ SYM_CODE_START(xen_\name)
++      UNWIND_HINT_EMPTY
+       pop %rcx
+       pop %r11
+       jmp  \name
+@@ -186,6 +188,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callb
+ SYM_CODE_START(xen_early_idt_handler_array)
+       i = 0
+       .rept NUM_EXCEPTION_VECTORS
++      UNWIND_HINT_EMPTY
+       pop %rcx
+       pop %r11
+       jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+@@ -212,11 +215,13 @@ hypercall_iret = hypercall_page + __HYPE
+  * rsp->rax           }
+  */
+ SYM_CODE_START(xen_iret)
++      UNWIND_HINT_EMPTY
+       pushq $0
+       jmp hypercall_iret
+ SYM_CODE_END(xen_iret)
+ SYM_CODE_START(xen_sysret64)
++      UNWIND_HINT_EMPTY
+       /*
+        * We're already on the usermode stack at this point, but
+        * still with the kernel gs, so we can easily switch back.
+@@ -271,7 +276,8 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+  */
+ /* Normal 64-bit system call target */
+-SYM_FUNC_START(xen_syscall_target)
++SYM_CODE_START(xen_syscall_target)
++      UNWIND_HINT_EMPTY
+       popq %rcx
+       popq %r11
+@@ -284,12 +290,13 @@ SYM_FUNC_START(xen_syscall_target)
+       movq $__USER_CS, 1*8(%rsp)
+       jmp entry_SYSCALL_64_after_hwframe
+-SYM_FUNC_END(xen_syscall_target)
++SYM_CODE_END(xen_syscall_target)
+ #ifdef CONFIG_IA32_EMULATION
+ /* 32-bit compat syscall target */
+-SYM_FUNC_START(xen_syscall32_target)
++SYM_CODE_START(xen_syscall32_target)
++      UNWIND_HINT_EMPTY
+       popq %rcx
+       popq %r11
+@@ -302,10 +309,11 @@ SYM_FUNC_START(xen_syscall32_target)
+       movq $__USER32_CS, 1*8(%rsp)
+       jmp entry_SYSCALL_compat_after_hwframe
+-SYM_FUNC_END(xen_syscall32_target)
++SYM_CODE_END(xen_syscall32_target)
+ /* 32-bit compat sysenter target */
+-SYM_FUNC_START(xen_sysenter_target)
++SYM_CODE_START(xen_sysenter_target)
++      UNWIND_HINT_EMPTY
+       /*
+        * NB: Xen is polite and clears TF from EFLAGS for us.  This means
+        * that we don't need to guard against single step exceptions here.
+@@ -322,17 +330,18 @@ SYM_FUNC_START(xen_sysenter_target)
+       movq $__USER32_CS, 1*8(%rsp)
+       jmp entry_SYSENTER_compat_after_hwframe
+-SYM_FUNC_END(xen_sysenter_target)
++SYM_CODE_END(xen_sysenter_target)
+ #else /* !CONFIG_IA32_EMULATION */
+-SYM_FUNC_START_ALIAS(xen_syscall32_target)
+-SYM_FUNC_START(xen_sysenter_target)
++SYM_CODE_START(xen_syscall32_target)
++SYM_CODE_START(xen_sysenter_target)
++      UNWIND_HINT_EMPTY
+       lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
+       mov $-ENOSYS, %rax
+       pushq $0
+       jmp hypercall_iret
+-SYM_FUNC_END(xen_sysenter_target)
+-SYM_FUNC_END_ALIAS(xen_syscall32_target)
++SYM_CODE_END(xen_sysenter_target)
++SYM_CODE_END(xen_syscall32_target)
+ #endif        /* CONFIG_IA32_EMULATION */
diff --git a/queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch b/queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch
new file mode 100644 (file)
index 0000000..a6d85db
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:29 -0600
+Subject: x86/xen: Support objtool vmlinux.o validation in xen-head.S
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit f4b4bc10b0b85ec66f1a9bf5dddf475e6695b6d2 upstream.
+
+The Xen hypercall page is filled with zeros, causing objtool to fall
+through all the empty hypercall functions until it reaches a real
+function, resulting in a stack state mismatch.
+
+The build-time contents of the hypercall page don't matter because the
+page gets rewritten by the hypervisor.  Make it more palatable to
+objtool by making each hypervisor function a true empty function, with
+nops and a return.
+
+Cc: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/0883bde1d7a1fb3b6a4c952bc0200e873752f609.1611263462.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/xen-head.S |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -68,8 +68,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle)
+       .balign PAGE_SIZE
+ SYM_CODE_START(hypercall_page)
+       .rept (PAGE_SIZE / 32)
+-              UNWIND_HINT_EMPTY
+-              .skip 32
++              UNWIND_HINT_FUNC
++              .skip 31, 0x90
++              ret
+       .endr
+ #define HYPERCALL(n) \